Ticket #1163: 0003-ffprobe-implement-string-validation-policy-setting.2.patch

File 0003-ffprobe-implement-string-validation-policy-setting.2.patch, 12.4 KB (added by saste, 6 years ago)
  • doc/ffprobe.texi

    From bbb2bae1ef7c17dd2bc5b24dfc0d65d757fab4a9 Mon Sep 17 00:00:00 2001
    From: Stefano Sabatini <stefasab@gmail.com>
    Date: Wed, 2 Oct 2013 16:22:17 +0200
    Subject: [PATCH] ffprobe: implement string validation policy setting
    
    This should fix trac tickets #1163, #2502.
    ---
     doc/ffprobe.texi |  29 +++++++++
     ffprobe.c        | 190 +++++++++++++++++++++++++++++++++++++++++++++++++++----
     2 files changed, 207 insertions(+), 12 deletions(-)
    
    diff --git a/doc/ffprobe.texi b/doc/ffprobe.texi
    index 777dbe7..f23f3dc 100644
    a b Show information related to program and library versions. This is the 
    317317equivalent of setting both @option{-show_program_version} and
    318318@option{-show_library_versions} options.
    319319
     320@item -string_validation_policy @var{policy}
     321Set string validation policy.
     322
     323@var{policy} must be "fail", "ignore", or in the form
     324"replace[=@var{replacement}]".
     325
     326If it is set to "fail", the program will fail immediately in case an
     327invalid string (UTF-8) sequence is found in the input. This is
     328especially useful to validate input metadata.
     329
     330If it is set to "ignore", any invalidation error will be ignored. This
     331will result in possibly broken output, especially with the json or xml
     332writer.
     333
     334In case the "replace" form is choosen, the program will substitute the
     335invalid UTF-8 sequences with the string specified in
     336@var{replacement}, which is typically a simple character such as "?",
     337but any valid UTF-8 sequence is accepted.
     338
     339In case the replacement string is not specified, the program will
     340assume the empty string, that is it will remove the invalid sequences
     341from the input strings.
     342
     343This is especially useful to create validate metadata output from
     344invalid sources.
     345
     346By default the program will apply the replace policy with an empty
     347replacement.
     348
    320349@item -bitexact
    321350Force bitexact output, useful to produce output which is not dependent
    322351on the specific build.
  • ffprobe.c

    diff --git a/ffprobe.c b/ffprobe.c
    index 581bce9..c8858f5 100644
    a b static int show_private_data = 1; 
    7575static char *print_format;
    7676static char *stream_specifier;
    7777
     78typedef enum {
     79    STRING_VALIDATION_POLICY_FAIL,
     80    STRING_VALIDATION_POLICY_REPLACE,
     81    STRING_VALIDATION_POLICY_IGNORE,
     82} StringValidationPolicy;
     83
     84StringValidationPolicy string_validation_policy = STRING_VALIDATION_POLICY_REPLACE;
     85static char *string_validation_replace;
     86
    7887typedef struct {
    7988    int id;             ///< identifier
    8089    int64_t start, end; ///< start, end in second/AV_TIME_BASE units
    static inline void writer_print_integer(WriterContext *wctx, 
    428437    }
    429438}
    430439
     440static char *get_utf8_sequence_string(char *buf, size_t buf_size,
     441                                      const uint8_t *ubuf, size_t ubuf_size)
     442{
     443    AVBPrint bp;
     444    int i;
     445
     446    av_bprint_init_for_buffer(&bp, buf, buf_size);
     447    av_bprintf(&bp, "0X");
     448    for (i = 0; i < ubuf_size; i++)
     449        av_bprintf(&bp, "%02X", ubuf[i]);
     450    return buf;
     451}
     452
     453static inline int validate_string(char **dstp, const char *src, void *log_ctx)
     454{
     455    const uint8_t *p;
     456    AVBPrint dstbuf;
     457    int invalid_chars_nb = 0, ret = 0;
     458    size_t len = strlen(src);
     459
     460    av_bprint_init(&dstbuf, 0, AV_BPRINT_SIZE_UNLIMITED);
     461
     462    for (p = (uint8_t *)src; *p;) {
     463        uint32_t code;
     464        uint8_t tmp;
     465        int invalid = 0;
     466        const uint8_t *p0 = p;
     467
     468        if (av_utf8_decode(&code, &p, len, AV_UTF8_DECODE_FLAG_CHECK_RANGE) < 0) {
     469            char buf[32];
     470            av_log(log_ctx, AV_LOG_WARNING,
     471                   "Invalid UTF-8 sequence %s found in string '%s'\n",
     472                   get_utf8_sequence_string(buf, sizeof(buf), p0, p-p0), src);
     473            invalid = 1;
     474        }
     475        len -= p-p0;
     476
     477        if (invalid) {
     478            invalid_chars_nb++;
     479
     480            switch (string_validation_policy) {
     481            case STRING_VALIDATION_POLICY_FAIL:
     482                av_log(log_ctx, AV_LOG_ERROR,
     483                       "Invalid UTF-8 sequence found in string '%s'\n", src);
     484                ret = AVERROR_INVALIDDATA;
     485                goto end;
     486                break;
     487
     488            case STRING_VALIDATION_POLICY_REPLACE:
     489                if (string_validation_replace)
     490                    av_bprintf(&dstbuf, "%s", string_validation_replace);
     491                break;
     492            }
     493        }
     494
     495        if (!invalid || string_validation_policy == STRING_VALIDATION_POLICY_IGNORE) {
     496            PUT_UTF8(code, tmp, av_bprint_chars(&dstbuf, tmp, 1););
     497        }
     498    }
     499
     500    if (invalid_chars_nb && string_validation_policy == STRING_VALIDATION_POLICY_REPLACE) {
     501        av_log(log_ctx, AV_LOG_WARNING,
     502               "%d invalid UTF-8 sequence(s) found in string '%s', replaced with '%s'\n",
     503               invalid_chars_nb, src, (char *)av_x_if_null(string_validation_replace, ""));
     504    }
     505
     506end:
     507    av_bprint_finalize(&dstbuf, dstp);
     508    return ret;
     509}
     510
     511#define PRINT_STRING_OPT      1
     512#define PRINT_STRING_VALIDATE 2
     513
    431514static inline int writer_print_string(WriterContext *wctx,
    432                                       const char *key, const char *val, int opt)
     515                                      const char *key, const char *val, int flags)
    433516{
    434517    const struct section *section = wctx->section[wctx->level];
    435518    int ret = 0;
    436519
    437     if (opt && !(wctx->writer->flags & WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS))
     520    if ((flags & PRINT_STRING_OPT)
     521        && !(wctx->writer->flags & WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS))
    438522        return 0;
    439523
    440524    if (section->show_all_entries || av_dict_get(section->entries_to_show, key, NULL, 0)) {
    441         wctx->writer->print_string(wctx, key, val);
     525        if (flags & PRINT_STRING_VALIDATE) {
     526            char *key1 = NULL, *val1 = NULL;
     527            ret = validate_string(&key1, key, wctx);
     528            if (ret < 0) goto end;
     529            ret = validate_string(&val1, val, wctx);
     530            if (ret < 0) goto end;
     531            wctx->writer->print_string(wctx, key1, val1);
     532        end:
     533            if (ret < 0) {
     534                av_log(wctx, AV_LOG_ERROR,
     535                       "Invalid key=value string combination %s=%s in section %s\n",
     536                       key, val, section->unique_name);
     537            }
     538            av_free(key1);
     539            av_free(val1);
     540        } else {
     541            wctx->writer->print_string(wctx, key, val);
     542        }
     543
    442544        wctx->nb_item[wctx->level]++;
    443545    }
    444546
    static void writer_print_time(WriterContext *wctx, const char *key, 
    460562    char buf[128];
    461563
    462564    if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) {
    463         writer_print_string(wctx, key, "N/A", 1);
     565        writer_print_string(wctx, key, "N/A", PRINT_STRING_OPT);
    464566    } else {
    465567        double d = ts * av_q2d(*time_base);
    466568        struct unit_value uv;
    static void writer_print_time(WriterContext *wctx, const char *key, 
    474576static void writer_print_ts(WriterContext *wctx, const char *key, int64_t ts, int is_duration)
    475577{
    476578    if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) {
    477         writer_print_string(wctx, key, "N/A", 1);
     579        writer_print_string(wctx, key, "N/A", PRINT_STRING_OPT);
    478580    } else {
    479581        writer_print_integer(wctx, key, ts);
    480582    }
    static void writer_register_all(void) 
    14431545#define print_int(k, v)         writer_print_integer(w, k, v)
    14441546#define print_q(k, v, s)        writer_print_rational(w, k, v, s)
    14451547#define print_str(k, v)         writer_print_string(w, k, v, 0)
    1446 #define print_str_opt(k, v)     writer_print_string(w, k, v, 1)
     1548#define print_str_opt(k, v)     writer_print_string(w, k, v, PRINT_STRING_OPT)
     1549#define print_str_validate(k, v) writer_print_string(w, k, v, PRINT_STRING_VALIDATE)
    14471550#define print_time(k, v, tb)    writer_print_time(w, k, v, tb, 0)
    14481551#define print_ts(k, v)          writer_print_ts(w, k, v, 0)
    14491552#define print_duration_time(k, v, tb) writer_print_time(w, k, v, tb, 1)
    static void writer_register_all(void) 
    14581561#define print_section_header(s) writer_print_section_header(w, s)
    14591562#define print_section_footer(s) writer_print_section_footer(w, s)
    14601563
    1461 static inline int show_tags(WriterContext *wctx, AVDictionary *tags, int section_id)
     1564static inline int show_tags(WriterContext *w, AVDictionary *tags, int section_id)
    14621565{
    14631566    AVDictionaryEntry *tag = NULL;
    14641567    int ret = 0;
    14651568
    14661569    if (!tags)
    14671570        return 0;
    1468     writer_print_section_header(wctx, section_id);
     1571    writer_print_section_header(w, section_id);
    14691572
    14701573    while ((tag = av_dict_get(tags, "", tag, AV_DICT_IGNORE_SUFFIX))) {
    1471         ret = writer_print_string(wctx, tag->key, tag->value, 0);
    1472         if (ret < 0)
     1574        if ((ret = print_str_validate(tag->key, tag->value)) < 0)
    14731575            break;
    14741576    }
    1475     writer_print_section_footer(wctx);
     1577    writer_print_section_footer(w);
    14761578
    14771579    return ret;
    14781580}
    static int show_format(WriterContext *w, AVFormatContext *fmt_ctx) 
    20212123    int ret = 0;
    20222124
    20232125    writer_print_section_header(w, SECTION_ID_FORMAT);
    2024     print_str("filename",        fmt_ctx->filename);
     2126    print_str_validate("filename", fmt_ctx->filename);
    20252127    print_int("nb_streams",       fmt_ctx->nb_streams);
    20262128    print_int("nb_programs",      fmt_ctx->nb_programs);
    20272129    print_str("format_name",      fmt_ctx->iformat->name);
    static int opt_read_intervals(void *optctx, const char *opt, const char *arg) 
    25372639    return parse_read_intervals(arg);
    25382640}
    25392641
     2642static int opt_string_validation_policy(void *optctx, const char *opt, const char *arg)
     2643{
     2644    char *mode = av_strdup(arg);
     2645    char *next;
     2646    int ret = 0;
     2647
     2648    if (!mode) return AVERROR(ENOMEM);
     2649
     2650    next = strchr(mode, '=');
     2651    if (next)
     2652        *next++ = 0;
     2653
     2654    if (!strcmp(mode, "fail")) {
     2655        string_validation_policy = STRING_VALIDATION_POLICY_FAIL;
     2656    } else if (!strcmp(mode, "ignore")) {
     2657        string_validation_policy = STRING_VALIDATION_POLICY_IGNORE;
     2658    } else if (!strcmp(mode, "replace")) {
     2659        string_validation_policy = STRING_VALIDATION_POLICY_REPLACE;
     2660        string_validation_replace = av_strdup(next);
     2661
     2662        if (next && !string_validation_replace) {
     2663            ret = AVERROR(ENOMEM);
     2664            goto end;
     2665        }
     2666
     2667        {
     2668            /* validate replace string */
     2669            const uint8_t *p = string_validation_replace;
     2670            while (*p) {
     2671                char buf[32];
     2672                const uint8_t *p0 = p;
     2673                int32_t code;
     2674                ret = av_utf8_decode(&code, &p, strlen(p), AV_UTF8_DECODE_FLAG_CHECK_RANGE);
     2675                if (ret < 0) {
     2676                    av_log(NULL, AV_LOG_ERROR,
     2677                           "Invalid UTF8 sequence %s found in string validation replace '%s'\n",
     2678                           get_utf8_sequence_string(buf, sizeof(buf), p0, p-p0),
     2679                           string_validation_replace);
     2680                    goto end;
     2681                }
     2682            }
     2683        }
     2684    } else {
     2685        av_log(NULL, AV_LOG_ERROR,
     2686               "Invalid argument '%s' for option '%s', "
     2687               "choose between fail, ignore, or replace[=REPLACEMENT]\n", arg, opt);
     2688        ret = AVERROR(EINVAL);
     2689        goto end;
     2690    }
     2691
     2692    if (next && string_validation_policy != STRING_VALIDATION_POLICY_REPLACE) {
     2693        av_log(NULL, AV_LOG_ERROR,
     2694               "No argument must be specified for option '%s' with fail or ignore policy\n", opt);
     2695        ret = AVERROR(EINVAL);
     2696        goto end;
     2697    }
     2698
     2699end:
     2700    av_free(mode);
     2701    return ret;
     2702}
     2703
    25402704static int opt_pretty(void *optctx, const char *opt, const char *arg)
    25412705{
    25422706    show_value_unit              = 1;
    static const OptionDef real_options[] = { 
    26362800    { "private",           OPT_BOOL, {(void*)&show_private_data}, "same as show_private_data" },
    26372801    { "bitexact", OPT_BOOL, {&do_bitexact}, "force bitexact output" },
    26382802    { "read_intervals", HAS_ARG, {.func_arg = opt_read_intervals}, "set read intervals", "read_intervals" },
     2803    { "string_validation_policy",  HAS_ARG, {.func_arg = opt_string_validation_policy}, "select the string validation policy", "policy_specification" },
    26392804    { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {.func_arg = opt_default}, "generic catch all option", "" },
    26402805    { "i", HAS_ARG, {.func_arg = opt_input_file_i}, "read specified file", "input_file"},
    26412806    { NULL, },
    int main(int argc, char **argv) 
    27482913
    27492914end:
    27502915    av_freep(&print_format);
     2916    av_freep(&string_validation_replace);
    27512917    av_freep(&read_intervals);
    27522918
    27532919    uninit_opts();