Ticket #1163: 0003-ffprobe-implement-string-validation-policy-setting.patch
| File 0003-ffprobe-implement-string-validation-policy-setting.patch, 10.0 KB (added by , 13 years ago) |
|---|
-
doc/ffprobe.texi
From 8c9d5f214578547b0ff261258c391da94d516a28 Mon Sep 17 00:00:00 2001 From: Stefano Sabatini <stefasab@gmail.com> Date: Wed, 2 Oct 2013 16:22:17 +0200 Subject: [PATCH] ffprobe: implement string validation policy setting This should fix trac tickets #1163, #2502, #2955. --- doc/ffprobe.texi | 25 ++++++++++ ffprobe.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 164 insertions(+), 7 deletions(-) diff --git a/doc/ffprobe.texi b/doc/ffprobe.texi index 777dbe7..7f84c2b 100644
a b Show information related to program and library versions. This is the 317 317 equivalent of setting both @option{-show_program_version} and 318 318 @option{-show_library_versions} options. 319 319 320 @item -string_validation_policy @var{policy} 321 Set string validation policy. 322 323 @var{policy} must be "fail" or in the form 324 "replace[=@var{replacement}]". 325 326 If it is set to "fail", the program will fail immediately in case an 327 invalid string (UTF-8) sequence is found in the input. This is 328 especially useful to validate input metadata. 329 330 In case the "replace" form is choosen, the program will substitute the 331 invalid UTF-8 sequences with the string specified in 332 @var{replacement}, which is typically a simple character such as "?", 333 but any valid UTF-8 sequence is accepted. 334 335 In case the replacement string is not specified, the program will 336 assume the empty string, that is it will remove the invalid sequences 337 from the input strings. 338 339 This is especially useful to create validate metadata output from 340 invalid sources. 341 342 By default the program will apply the replace policy with an empty 343 replacement. 344 320 345 @item -bitexact 321 346 Force bitexact output, useful to produce output which is not dependent 322 347 on the specific build. -
ffprobe.c
diff --git a/ffprobe.c b/ffprobe.c index 5cc11a2..01fab90 100644
a b 26 26 #include "config.h" 27 27 #include "version.h" 28 28 29 #include <ctype.h> /* isprint */ 29 30 #include <string.h> 30 31 31 32 #include "libavformat/avformat.h" … … static int show_private_data = 1; 75 76 static char *print_format; 76 77 static char *stream_specifier; 77 78 79 typedef enum { 80 STRING_VALIDATION_POLICY_FAIL, 81 STRING_VALIDATION_POLICY_REPLACE, 82 } StringValidationPolicy; 83 84 StringValidationPolicy string_validation_policy = STRING_VALIDATION_POLICY_REPLACE; 85 static char *string_validation_replace; 86 78 87 typedef struct { 79 88 int id; ///< identifier 80 89 int64_t start, end; ///< start, end in second/AV_TIME_BASE units … … static inline void writer_print_integer(WriterContext *wctx, 428 437 } 429 438 } 430 439 440 static inline int validate_string(char **dstp, const char *src, void *log_ctx) 441 { 442 const uint8_t *p; 443 AVBPrint dstbuf; 444 int invalid_chars_nb = 0, ret = 0; 445 446 av_bprint_init(&dstbuf, 0, AV_BPRINT_SIZE_UNLIMITED); 447 448 for (p = (uint8_t *)src; *p;) { 449 uint32_t code; 450 uint8_t tmp; 451 int invalid = 0; 452 453 if (av_get_utf8(&code, &p) < 0) 454 invalid = 1; 455 456 /* non-printable ASCII */ 457 if (code < 128 && !isprint(code)) 458 invalid = 1; 459 460 if (invalid) { 461 invalid_chars_nb++; 462 463 switch (string_validation_policy) { 464 case STRING_VALIDATION_POLICY_FAIL: 465 av_log(log_ctx, AV_LOG_ERROR, 466 "Invalid UTF-8 sequence found in sequence '%s'\n", src); 467 ret = AVERROR_INVALIDDATA; 468 goto end; 469 break; 470 471 case STRING_VALIDATION_POLICY_REPLACE: 472 if (string_validation_replace) { 473 const uint8_t *s = string_validation_replace; 474 while (*s) { 475 if (av_get_utf8(&code, &s) < 0) 476 break; 477 PUT_UTF8(code, tmp, av_bprint_chars(&dstbuf, tmp, 1);); 478 } 479 } 480 break; 481 } 482 } else { 483 PUT_UTF8(code, tmp, av_bprint_chars(&dstbuf, tmp, 1);); 484 } 485 } 486 487 if (invalid_chars_nb) { 488 av_log(log_ctx, AV_LOG_WARNING, 489 "%d invalid UTF-8 sequence found in sequence '%s', replaced with '%s'\n", 490 invalid_chars_nb, src, (char *)av_x_if_null(string_validation_replace, "")); 491 } 492 493 end: 494 av_bprint_finalize(&dstbuf, dstp); 495 return ret; 496 } 497 498 #define PRINT_STRING_OPT 1 499 #define PRINT_STRING_VALIDATE 2 500 431 501 static inline int writer_print_string(WriterContext *wctx, 432 const char *key, const char *val, int opt)502 const char *key, const char *val, int flags) 433 503 { 434 504 const struct section *section = wctx->section[wctx->level]; 435 505 int ret = 0; 436 506 437 if (opt && !(wctx->writer->flags & WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS)) 507 if ((flags & PRINT_STRING_OPT) 508 && !(wctx->writer->flags & WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS)) 438 509 return 0; 439 510 440 511 if (section->show_all_entries || av_dict_get(section->entries_to_show, key, NULL, 0)) { 441 wctx->writer->print_string(wctx, key, val); 512 if (flags & PRINT_STRING_VALIDATE) { 513 char *key1 = NULL, *val1 = NULL; 514 ret = validate_string(&key1, key, wctx); 515 if (ret < 0) goto end; 516 ret = validate_string(&val1, val, wctx); 517 if (ret < 0) goto end; 518 wctx->writer->print_string(wctx, key1, val1); 519 end: 520 if (ret < 0) { 521 av_log(wctx, AV_LOG_ERROR, 522 "Invalid key=value string combination %s=%s in section %s\n", 523 key, val, section->unique_name); 524 } 525 av_free(key1); 526 av_free(val1); 527 } else { 528 wctx->writer->print_string(wctx, key, val); 529 } 530 442 531 wctx->nb_item[wctx->level]++; 443 532 } 444 533 … … static void writer_print_time(WriterContext *wctx, const char *key, 460 549 char buf[128]; 461 550 462 551 if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) { 463 writer_print_string(wctx, key, "N/A", 1);552 writer_print_string(wctx, key, "N/A", PRINT_STRING_OPT); 464 553 } else { 465 554 double d = ts * av_q2d(*time_base); 466 555 struct unit_value uv; … … static void writer_print_time(WriterContext *wctx, const char *key, 474 563 static void writer_print_ts(WriterContext *wctx, const char *key, int64_t ts, int is_duration) 475 564 { 476 565 if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) { 477 writer_print_string(wctx, key, "N/A", 1);566 writer_print_string(wctx, key, "N/A", PRINT_STRING_OPT); 478 567 } else { 479 568 writer_print_integer(wctx, key, ts); 480 569 } … … static void writer_register_all(void) 1443 1532 #define print_int(k, v) writer_print_integer(w, k, v) 1444 1533 #define print_q(k, v, s) writer_print_rational(w, k, v, s) 1445 1534 #define print_str(k, v) writer_print_string(w, k, v, 0) 1446 #define print_str_opt(k, v) writer_print_string(w, k, v, 1)1535 #define print_str_opt(k, v) writer_print_string(w, k, v, PRINT_STRING_OPT) 1447 1536 #define print_time(k, v, tb) writer_print_time(w, k, v, tb, 0) 1448 1537 #define print_ts(k, v) writer_print_ts(w, k, v, 0) 1449 1538 #define print_duration_time(k, v, tb) writer_print_time(w, k, v, tb, 1) … … static inline int show_tags(WriterContext *wctx, AVDictionary *tags, int section 1468 1557 writer_print_section_header(wctx, section_id); 1469 1558 1470 1559 while ((tag = av_dict_get(tags, "", tag, AV_DICT_IGNORE_SUFFIX))) { 1471 ret = writer_print_string(wctx, tag->key, tag->value, 0);1560 ret = writer_print_string(wctx, tag->key, tag->value, PRINT_STRING_VALIDATE); 1472 1561 if (ret < 0) 1473 1562 break; 1474 1563 } … … static int opt_read_intervals(void *optctx, const char *opt, const char *arg) 2536 2625 return parse_read_intervals(arg); 2537 2626 } 2538 2627 2628 static int opt_string_validation_policy(void *optctx, const char *opt, const char *arg) 2629 { 2630 char *mode = av_strdup(arg); 2631 char *next; 2632 int ret = 0; 2633 2634 if (!mode) return AVERROR(ENOMEM); 2635 2636 next = strchr(mode, '='); 2637 if (next) 2638 *next++ = 0; 2639 2640 if (!strcmp(mode, "fail")) { 2641 string_validation_policy = STRING_VALIDATION_POLICY_FAIL; 2642 if (next) { 2643 av_log(NULL, AV_LOG_ERROR, 2644 "No argument must be specified for option '%s' with mode 'fail'\n", opt); 2645 ret = AVERROR(EINVAL); 2646 goto end; 2647 } 2648 } else if (!strcmp(mode, "replace")) { 2649 string_validation_policy = STRING_VALIDATION_POLICY_REPLACE; 2650 string_validation_replace = av_strdup(next); 2651 2652 if (next && !string_validation_replace) { 2653 ret = AVERROR(ENOMEM); 2654 goto end; 2655 } 2656 } else { 2657 av_log(NULL, AV_LOG_ERROR, 2658 "Invalid argument '%s' for option '%s', " 2659 "choose between fail, or replace[=REPLACEMENT]\n", arg, opt); 2660 ret = AVERROR(EINVAL); 2661 goto end; 2662 } 2663 2664 end: 2665 av_free(mode); 2666 return ret; 2667 } 2668 2539 2669 static int opt_pretty(void *optctx, const char *opt, const char *arg) 2540 2670 { 2541 2671 show_value_unit = 1; … … static const OptionDef real_options[] = { 2635 2765 { "private", OPT_BOOL, {(void*)&show_private_data}, "same as show_private_data" }, 2636 2766 { "bitexact", OPT_BOOL, {&do_bitexact}, "force bitexact output" }, 2637 2767 { "read_intervals", HAS_ARG, {.func_arg = opt_read_intervals}, "set read intervals", "read_intervals" }, 2768 { "string_validation_policy", HAS_ARG, {.func_arg = opt_string_validation_policy}, "select the string validation policy", "policy_specification" }, 2638 2769 { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {.func_arg = opt_default}, "generic catch all option", "" }, 2639 2770 { "i", HAS_ARG, {.func_arg = opt_input_file_i}, "read specified file", "input_file"}, 2640 2771 { NULL, }, … … int main(int argc, char **argv) 2747 2878 2748 2879 end: 2749 2880 av_freep(&print_format); 2881 av_freep(&string_validation_replace); 2750 2882 av_freep(&read_intervals); 2751 2883 2752 2884 uninit_opts();
