Ticket #1163: 0003-ffprobe-implement-string-validation-policy-setting.2.patch
| File 0003-ffprobe-implement-string-validation-policy-setting.2.patch, 12.4 KB (added by , 12 years ago) |
|---|
-
doc/ffprobe.texi
From bbb2bae1ef7c17dd2bc5b24dfc0d65d757fab4a9 Mon Sep 17 00:00:00 2001 From: Stefano Sabatini <stefasab@gmail.com> Date: Wed, 2 Oct 2013 16:22:17 +0200 Subject: [PATCH] ffprobe: implement string validation policy setting This should fix trac tickets #1163, #2502. --- doc/ffprobe.texi | 29 +++++++++ ffprobe.c | 190 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 207 insertions(+), 12 deletions(-) diff --git a/doc/ffprobe.texi b/doc/ffprobe.texi index 777dbe7..f23f3dc 100644
a b Show information related to program and library versions. This is the 317 317 equivalent of setting both @option{-show_program_version} and 318 318 @option{-show_library_versions} options. 319 319 320 @item -string_validation_policy @var{policy} 321 Set string validation policy. 322 323 @var{policy} must be "fail", "ignore", or in the form 324 "replace[=@var{replacement}]". 325 326 If it is set to "fail", the program will fail immediately in case an 327 invalid string (UTF-8) sequence is found in the input. This is 328 especially useful to validate input metadata. 329 330 If it is set to "ignore", any invalidation error will be ignored. This 331 will result in possibly broken output, especially with the json or xml 332 writer. 333 334 In case the "replace" form is choosen, the program will substitute the 335 invalid UTF-8 sequences with the string specified in 336 @var{replacement}, which is typically a simple character such as "?", 337 but any valid UTF-8 sequence is accepted. 338 339 In case the replacement string is not specified, the program will 340 assume the empty string, that is it will remove the invalid sequences 341 from the input strings. 342 343 This is especially useful to create validate metadata output from 344 invalid sources. 345 346 By default the program will apply the replace policy with an empty 347 replacement. 348 320 349 @item -bitexact 321 350 Force bitexact output, useful to produce output which is not dependent 322 351 on the specific build. -
ffprobe.c
diff --git a/ffprobe.c b/ffprobe.c index 581bce9..c8858f5 100644
a b static int show_private_data = 1; 75 75 static char *print_format; 76 76 static char *stream_specifier; 77 77 78 typedef enum { 79 STRING_VALIDATION_POLICY_FAIL, 80 STRING_VALIDATION_POLICY_REPLACE, 81 STRING_VALIDATION_POLICY_IGNORE, 82 } StringValidationPolicy; 83 84 StringValidationPolicy string_validation_policy = STRING_VALIDATION_POLICY_REPLACE; 85 static char *string_validation_replace; 86 78 87 typedef struct { 79 88 int id; ///< identifier 80 89 int64_t start, end; ///< start, end in second/AV_TIME_BASE units … … static inline void writer_print_integer(WriterContext *wctx, 428 437 } 429 438 } 430 439 440 static char *get_utf8_sequence_string(char *buf, size_t buf_size, 441 const uint8_t *ubuf, size_t ubuf_size) 442 { 443 AVBPrint bp; 444 int i; 445 446 av_bprint_init_for_buffer(&bp, buf, buf_size); 447 av_bprintf(&bp, "0X"); 448 for (i = 0; i < ubuf_size; i++) 449 av_bprintf(&bp, "%02X", ubuf[i]); 450 return buf; 451 } 452 453 static inline int validate_string(char **dstp, const char *src, void *log_ctx) 454 { 455 const uint8_t *p; 456 AVBPrint dstbuf; 457 int invalid_chars_nb = 0, ret = 0; 458 size_t len = strlen(src); 459 460 av_bprint_init(&dstbuf, 0, AV_BPRINT_SIZE_UNLIMITED); 461 462 for (p = (uint8_t *)src; *p;) { 463 uint32_t code; 464 uint8_t tmp; 465 int invalid = 0; 466 const uint8_t *p0 = p; 467 468 if (av_utf8_decode(&code, &p, len, AV_UTF8_DECODE_FLAG_CHECK_RANGE) < 0) { 469 char buf[32]; 470 av_log(log_ctx, AV_LOG_WARNING, 471 "Invalid UTF-8 sequence %s found in string '%s'\n", 472 get_utf8_sequence_string(buf, sizeof(buf), p0, p-p0), src); 473 invalid = 1; 474 } 475 len -= p-p0; 476 477 if (invalid) { 478 invalid_chars_nb++; 479 480 switch (string_validation_policy) { 481 case STRING_VALIDATION_POLICY_FAIL: 482 av_log(log_ctx, AV_LOG_ERROR, 483 "Invalid UTF-8 sequence found in string '%s'\n", src); 484 ret = AVERROR_INVALIDDATA; 485 goto end; 486 break; 487 488 case STRING_VALIDATION_POLICY_REPLACE: 489 if (string_validation_replace) 490 av_bprintf(&dstbuf, "%s", string_validation_replace); 491 break; 492 } 493 } 494 495 if (!invalid || string_validation_policy == STRING_VALIDATION_POLICY_IGNORE) { 496 PUT_UTF8(code, tmp, av_bprint_chars(&dstbuf, tmp, 1);); 497 } 498 } 499 500 if (invalid_chars_nb && string_validation_policy == STRING_VALIDATION_POLICY_REPLACE) { 501 av_log(log_ctx, AV_LOG_WARNING, 502 "%d invalid UTF-8 sequence(s) found in string '%s', replaced with '%s'\n", 503 invalid_chars_nb, src, (char *)av_x_if_null(string_validation_replace, "")); 504 } 505 506 end: 507 av_bprint_finalize(&dstbuf, dstp); 508 return ret; 509 } 510 511 #define PRINT_STRING_OPT 1 512 #define PRINT_STRING_VALIDATE 2 513 431 514 static inline int writer_print_string(WriterContext *wctx, 432 const char *key, const char *val, int opt)515 const char *key, const char *val, int flags) 433 516 { 434 517 const struct section *section = wctx->section[wctx->level]; 435 518 int ret = 0; 436 519 437 if (opt && !(wctx->writer->flags & WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS)) 520 if ((flags & PRINT_STRING_OPT) 521 && !(wctx->writer->flags & WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS)) 438 522 return 0; 439 523 440 524 if (section->show_all_entries || av_dict_get(section->entries_to_show, key, NULL, 0)) { 441 wctx->writer->print_string(wctx, key, val); 525 if (flags & PRINT_STRING_VALIDATE) { 526 char *key1 = NULL, *val1 = NULL; 527 ret = validate_string(&key1, key, wctx); 528 if (ret < 0) goto end; 529 ret = validate_string(&val1, val, wctx); 530 if (ret < 0) goto end; 531 wctx->writer->print_string(wctx, key1, val1); 532 end: 533 if (ret < 0) { 534 av_log(wctx, AV_LOG_ERROR, 535 "Invalid key=value string combination %s=%s in section %s\n", 536 key, val, section->unique_name); 537 } 538 av_free(key1); 539 av_free(val1); 540 } else { 541 wctx->writer->print_string(wctx, key, val); 542 } 543 442 544 wctx->nb_item[wctx->level]++; 443 545 } 444 546 … … static void writer_print_time(WriterContext *wctx, const char *key, 460 562 char buf[128]; 461 563 462 564 if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) { 463 writer_print_string(wctx, key, "N/A", 1);565 writer_print_string(wctx, key, "N/A", PRINT_STRING_OPT); 464 566 } else { 465 567 double d = ts * av_q2d(*time_base); 466 568 struct unit_value uv; … … static void writer_print_time(WriterContext *wctx, const char *key, 474 576 static void writer_print_ts(WriterContext *wctx, const char *key, int64_t ts, int is_duration) 475 577 { 476 578 if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) { 477 writer_print_string(wctx, key, "N/A", 1);579 writer_print_string(wctx, key, "N/A", PRINT_STRING_OPT); 478 580 } else { 479 581 writer_print_integer(wctx, key, ts); 480 582 } … … static void writer_register_all(void) 1443 1545 #define print_int(k, v) writer_print_integer(w, k, v) 1444 1546 #define print_q(k, v, s) writer_print_rational(w, k, v, s) 1445 1547 #define print_str(k, v) writer_print_string(w, k, v, 0) 1446 #define print_str_opt(k, v) writer_print_string(w, k, v, 1) 1548 #define print_str_opt(k, v) writer_print_string(w, k, v, PRINT_STRING_OPT) 1549 #define print_str_validate(k, v) writer_print_string(w, k, v, PRINT_STRING_VALIDATE) 1447 1550 #define print_time(k, v, tb) writer_print_time(w, k, v, tb, 0) 1448 1551 #define print_ts(k, v) writer_print_ts(w, k, v, 0) 1449 1552 #define print_duration_time(k, v, tb) writer_print_time(w, k, v, tb, 1) … … static void writer_register_all(void) 1458 1561 #define print_section_header(s) writer_print_section_header(w, s) 1459 1562 #define print_section_footer(s) writer_print_section_footer(w, s) 1460 1563 1461 static inline int show_tags(WriterContext *w ctx, AVDictionary *tags, int section_id)1564 static inline int show_tags(WriterContext *w, AVDictionary *tags, int section_id) 1462 1565 { 1463 1566 AVDictionaryEntry *tag = NULL; 1464 1567 int ret = 0; 1465 1568 1466 1569 if (!tags) 1467 1570 return 0; 1468 writer_print_section_header(w ctx, section_id);1571 writer_print_section_header(w, section_id); 1469 1572 1470 1573 while ((tag = av_dict_get(tags, "", tag, AV_DICT_IGNORE_SUFFIX))) { 1471 ret = writer_print_string(wctx, tag->key, tag->value, 0); 1472 if (ret < 0) 1574 if ((ret = print_str_validate(tag->key, tag->value)) < 0) 1473 1575 break; 1474 1576 } 1475 writer_print_section_footer(w ctx);1577 writer_print_section_footer(w); 1476 1578 1477 1579 return ret; 1478 1580 } … … static int show_format(WriterContext *w, AVFormatContext *fmt_ctx) 2021 2123 int ret = 0; 2022 2124 2023 2125 writer_print_section_header(w, SECTION_ID_FORMAT); 2024 print_str ("filename",fmt_ctx->filename);2126 print_str_validate("filename", fmt_ctx->filename); 2025 2127 print_int("nb_streams", fmt_ctx->nb_streams); 2026 2128 print_int("nb_programs", fmt_ctx->nb_programs); 2027 2129 print_str("format_name", fmt_ctx->iformat->name); … … static int opt_read_intervals(void *optctx, const char *opt, const char *arg) 2537 2639 return parse_read_intervals(arg); 2538 2640 } 2539 2641 2642 static int opt_string_validation_policy(void *optctx, const char *opt, const char *arg) 2643 { 2644 char *mode = av_strdup(arg); 2645 char *next; 2646 int ret = 0; 2647 2648 if (!mode) return AVERROR(ENOMEM); 2649 2650 next = strchr(mode, '='); 2651 if (next) 2652 *next++ = 0; 2653 2654 if (!strcmp(mode, "fail")) { 2655 string_validation_policy = STRING_VALIDATION_POLICY_FAIL; 2656 } else if (!strcmp(mode, "ignore")) { 2657 string_validation_policy = STRING_VALIDATION_POLICY_IGNORE; 2658 } else if (!strcmp(mode, "replace")) { 2659 string_validation_policy = STRING_VALIDATION_POLICY_REPLACE; 2660 string_validation_replace = av_strdup(next); 2661 2662 if (next && !string_validation_replace) { 2663 ret = AVERROR(ENOMEM); 2664 goto end; 2665 } 2666 2667 { 2668 /* validate replace string */ 2669 const uint8_t *p = string_validation_replace; 2670 while (*p) { 2671 char buf[32]; 2672 const uint8_t *p0 = p; 2673 int32_t code; 2674 ret = av_utf8_decode(&code, &p, strlen(p), AV_UTF8_DECODE_FLAG_CHECK_RANGE); 2675 if (ret < 0) { 2676 av_log(NULL, AV_LOG_ERROR, 2677 "Invalid UTF8 sequence %s found in string validation replace '%s'\n", 2678 get_utf8_sequence_string(buf, sizeof(buf), p0, p-p0), 2679 string_validation_replace); 2680 goto end; 2681 } 2682 } 2683 } 2684 } else { 2685 av_log(NULL, AV_LOG_ERROR, 2686 "Invalid argument '%s' for option '%s', " 2687 "choose between fail, ignore, or replace[=REPLACEMENT]\n", arg, opt); 2688 ret = AVERROR(EINVAL); 2689 goto end; 2690 } 2691 2692 if (next && string_validation_policy != STRING_VALIDATION_POLICY_REPLACE) { 2693 av_log(NULL, AV_LOG_ERROR, 2694 "No argument must be specified for option '%s' with fail or ignore policy\n", opt); 2695 ret = AVERROR(EINVAL); 2696 goto end; 2697 } 2698 2699 end: 2700 av_free(mode); 2701 return ret; 2702 } 2703 2540 2704 static int opt_pretty(void *optctx, const char *opt, const char *arg) 2541 2705 { 2542 2706 show_value_unit = 1; … … static const OptionDef real_options[] = { 2636 2800 { "private", OPT_BOOL, {(void*)&show_private_data}, "same as show_private_data" }, 2637 2801 { "bitexact", OPT_BOOL, {&do_bitexact}, "force bitexact output" }, 2638 2802 { "read_intervals", HAS_ARG, {.func_arg = opt_read_intervals}, "set read intervals", "read_intervals" }, 2803 { "string_validation_policy", HAS_ARG, {.func_arg = opt_string_validation_policy}, "select the string validation policy", "policy_specification" }, 2639 2804 { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {.func_arg = opt_default}, "generic catch all option", "" }, 2640 2805 { "i", HAS_ARG, {.func_arg = opt_input_file_i}, "read specified file", "input_file"}, 2641 2806 { NULL, }, … … int main(int argc, char **argv) 2748 2913 2749 2914 end: 2750 2915 av_freep(&print_format); 2916 av_freep(&string_validation_replace); 2751 2917 av_freep(&read_intervals); 2752 2918 2753 2919 uninit_opts();
