Ticket #1163: 0003-ffprobe-implement-string-validation-policy-setting.patch

File 0003-ffprobe-implement-string-validation-policy-setting.patch, 10.0 KB (added by saste, 6 years ago)
  • doc/ffprobe.texi

    From 8c9d5f214578547b0ff261258c391da94d516a28 Mon Sep 17 00:00:00 2001
    From: Stefano Sabatini <stefasab@gmail.com>
    Date: Wed, 2 Oct 2013 16:22:17 +0200
    Subject: [PATCH] ffprobe: implement string validation policy setting
    
    This should fix trac tickets #1163, #2502, #2955.
    ---
     doc/ffprobe.texi |  25 ++++++++++
     ffprobe.c        | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
     2 files changed, 164 insertions(+), 7 deletions(-)
    
    diff --git a/doc/ffprobe.texi b/doc/ffprobe.texi
    index 777dbe7..7f84c2b 100644
    a b Show information related to program and library versions. This is the 
    317317equivalent of setting both @option{-show_program_version} and
    318318@option{-show_library_versions} options.
    319319
     320@item -string_validation_policy @var{policy}
     321Set string validation policy.
     322
     323@var{policy} must be "fail" or in the form
     324"replace[=@var{replacement}]".
     325
     326If it is set to "fail", the program will fail immediately in case an
     327invalid string (UTF-8) sequence is found in the input. This is
     328especially useful to validate input metadata.
     329
     330In case the "replace" form is choosen, the program will substitute the
     331invalid UTF-8 sequences with the string specified in
     332@var{replacement}, which is typically a simple character such as "?",
     333but any valid UTF-8 sequence is accepted.
     334
     335In case the replacement string is not specified, the program will
     336assume the empty string, that is it will remove the invalid sequences
     337from the input strings.
     338
     339This is especially useful to create validate metadata output from
     340invalid sources.
     341
     342By default the program will apply the replace policy with an empty
     343replacement.
     344
    320345@item -bitexact
    321346Force bitexact output, useful to produce output which is not dependent
    322347on the specific build.
  • ffprobe.c

    diff --git a/ffprobe.c b/ffprobe.c
    index 5cc11a2..01fab90 100644
    a b  
    2626#include "config.h"
    2727#include "version.h"
    2828
     29#include <ctype.h>              /* isprint */
    2930#include <string.h>
    3031
    3132#include "libavformat/avformat.h"
    static int show_private_data = 1; 
    7576static char *print_format;
    7677static char *stream_specifier;
    7778
     79typedef enum {
     80    STRING_VALIDATION_POLICY_FAIL,
     81    STRING_VALIDATION_POLICY_REPLACE,
     82} StringValidationPolicy;
     83
     84StringValidationPolicy string_validation_policy = STRING_VALIDATION_POLICY_REPLACE;
     85static char *string_validation_replace;
     86
    7887typedef struct {
    7988    int id;             ///< identifier
    8089    int64_t start, end; ///< start, end in second/AV_TIME_BASE units
    static inline void writer_print_integer(WriterContext *wctx, 
    428437    }
    429438}
    430439
     440static inline int validate_string(char **dstp, const char *src, void *log_ctx)
     441{
     442    const uint8_t *p;
     443    AVBPrint dstbuf;
     444    int invalid_chars_nb = 0, ret = 0;
     445
     446    av_bprint_init(&dstbuf, 0, AV_BPRINT_SIZE_UNLIMITED);
     447
     448    for (p = (uint8_t *)src; *p;) {
     449        uint32_t code;
     450        uint8_t tmp;
     451        int invalid = 0;
     452
     453        if (av_get_utf8(&code, &p) < 0)
     454            invalid = 1;
     455
     456        /* non-printable ASCII */
     457        if (code < 128 && !isprint(code))
     458            invalid = 1;
     459
     460        if (invalid) {
     461            invalid_chars_nb++;
     462
     463            switch (string_validation_policy) {
     464            case STRING_VALIDATION_POLICY_FAIL:
     465                av_log(log_ctx, AV_LOG_ERROR,
     466                       "Invalid UTF-8 sequence found in sequence '%s'\n", src);
     467                ret = AVERROR_INVALIDDATA;
     468                goto end;
     469            break;
     470
     471            case STRING_VALIDATION_POLICY_REPLACE:
     472                if (string_validation_replace) {
     473                    const uint8_t *s = string_validation_replace;
     474                    while (*s) {
     475                        if (av_get_utf8(&code, &s) < 0)
     476                            break;
     477                        PUT_UTF8(code, tmp, av_bprint_chars(&dstbuf, tmp, 1););
     478                    }
     479                }
     480            break;
     481            }
     482        } else {
     483            PUT_UTF8(code, tmp, av_bprint_chars(&dstbuf, tmp, 1););
     484        }
     485    }
     486
     487    if (invalid_chars_nb) {
     488        av_log(log_ctx, AV_LOG_WARNING,
     489               "%d invalid UTF-8 sequence found in sequence '%s', replaced with '%s'\n",
     490               invalid_chars_nb, src, (char *)av_x_if_null(string_validation_replace, ""));
     491    }
     492
     493end:
     494    av_bprint_finalize(&dstbuf, dstp);
     495    return ret;
     496}
     497
     498#define PRINT_STRING_OPT      1
     499#define PRINT_STRING_VALIDATE 2
     500
    431501static inline int writer_print_string(WriterContext *wctx,
    432                                       const char *key, const char *val, int opt)
     502                                      const char *key, const char *val, int flags)
    433503{
    434504    const struct section *section = wctx->section[wctx->level];
    435505    int ret = 0;
    436506
    437     if (opt && !(wctx->writer->flags & WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS))
     507    if ((flags & PRINT_STRING_OPT)
     508        && !(wctx->writer->flags & WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS))
    438509        return 0;
    439510
    440511    if (section->show_all_entries || av_dict_get(section->entries_to_show, key, NULL, 0)) {
    441         wctx->writer->print_string(wctx, key, val);
     512        if (flags & PRINT_STRING_VALIDATE) {
     513            char *key1 = NULL, *val1 = NULL;
     514            ret = validate_string(&key1, key, wctx);
     515            if (ret < 0) goto end;
     516            ret = validate_string(&val1, val, wctx);
     517            if (ret < 0) goto end;
     518            wctx->writer->print_string(wctx, key1, val1);
     519        end:
     520            if (ret < 0) {
     521                av_log(wctx, AV_LOG_ERROR,
     522                       "Invalid key=value string combination %s=%s in section %s\n",
     523                       key, val, section->unique_name);
     524            }
     525            av_free(key1);
     526            av_free(val1);
     527        } else {
     528            wctx->writer->print_string(wctx, key, val);
     529        }
     530
    442531        wctx->nb_item[wctx->level]++;
    443532    }
    444533
    static void writer_print_time(WriterContext *wctx, const char *key, 
    460549    char buf[128];
    461550
    462551    if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) {
    463         writer_print_string(wctx, key, "N/A", 1);
     552        writer_print_string(wctx, key, "N/A", PRINT_STRING_OPT);
    464553    } else {
    465554        double d = ts * av_q2d(*time_base);
    466555        struct unit_value uv;
    static void writer_print_time(WriterContext *wctx, const char *key, 
    474563static void writer_print_ts(WriterContext *wctx, const char *key, int64_t ts, int is_duration)
    475564{
    476565    if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) {
    477         writer_print_string(wctx, key, "N/A", 1);
     566        writer_print_string(wctx, key, "N/A", PRINT_STRING_OPT);
    478567    } else {
    479568        writer_print_integer(wctx, key, ts);
    480569    }
    static void writer_register_all(void) 
    14431532#define print_int(k, v)         writer_print_integer(w, k, v)
    14441533#define print_q(k, v, s)        writer_print_rational(w, k, v, s)
    14451534#define print_str(k, v)         writer_print_string(w, k, v, 0)
    1446 #define print_str_opt(k, v)     writer_print_string(w, k, v, 1)
     1535#define print_str_opt(k, v)     writer_print_string(w, k, v, PRINT_STRING_OPT)
    14471536#define print_time(k, v, tb)    writer_print_time(w, k, v, tb, 0)
    14481537#define print_ts(k, v)          writer_print_ts(w, k, v, 0)
    14491538#define print_duration_time(k, v, tb) writer_print_time(w, k, v, tb, 1)
    static inline int show_tags(WriterContext *wctx, AVDictionary *tags, int section 
    14681557    writer_print_section_header(wctx, section_id);
    14691558
    14701559    while ((tag = av_dict_get(tags, "", tag, AV_DICT_IGNORE_SUFFIX))) {
    1471         ret = writer_print_string(wctx, tag->key, tag->value, 0);
     1560        ret = writer_print_string(wctx, tag->key, tag->value, PRINT_STRING_VALIDATE);
    14721561        if (ret < 0)
    14731562            break;
    14741563    }
    static int opt_read_intervals(void *optctx, const char *opt, const char *arg) 
    25362625    return parse_read_intervals(arg);
    25372626}
    25382627
     2628static int opt_string_validation_policy(void *optctx, const char *opt, const char *arg)
     2629{
     2630    char *mode = av_strdup(arg);
     2631    char *next;
     2632    int ret = 0;
     2633
     2634    if (!mode) return AVERROR(ENOMEM);
     2635
     2636    next = strchr(mode, '=');
     2637    if (next)
     2638        *next++ = 0;
     2639
     2640    if (!strcmp(mode, "fail")) {
     2641        string_validation_policy = STRING_VALIDATION_POLICY_FAIL;
     2642        if (next) {
     2643            av_log(NULL, AV_LOG_ERROR,
     2644                   "No argument must be specified for option '%s' with mode 'fail'\n", opt);
     2645            ret = AVERROR(EINVAL);
     2646            goto end;
     2647        }
     2648    } else if (!strcmp(mode, "replace")) {
     2649        string_validation_policy = STRING_VALIDATION_POLICY_REPLACE;
     2650        string_validation_replace = av_strdup(next);
     2651
     2652        if (next && !string_validation_replace) {
     2653            ret = AVERROR(ENOMEM);
     2654            goto end;
     2655        }
     2656    } else {
     2657        av_log(NULL, AV_LOG_ERROR,
     2658               "Invalid argument '%s' for option '%s', "
     2659               "choose between fail, or replace[=REPLACEMENT]\n", arg, opt);
     2660        ret = AVERROR(EINVAL);
     2661        goto end;
     2662    }
     2663
     2664end:
     2665    av_free(mode);
     2666    return ret;
     2667}
     2668
    25392669static int opt_pretty(void *optctx, const char *opt, const char *arg)
    25402670{
    25412671    show_value_unit              = 1;
    static const OptionDef real_options[] = { 
    26352765    { "private",           OPT_BOOL, {(void*)&show_private_data}, "same as show_private_data" },
    26362766    { "bitexact", OPT_BOOL, {&do_bitexact}, "force bitexact output" },
    26372767    { "read_intervals", HAS_ARG, {.func_arg = opt_read_intervals}, "set read intervals", "read_intervals" },
     2768    { "string_validation_policy",  HAS_ARG, {.func_arg = opt_string_validation_policy}, "select the string validation policy", "policy_specification" },
    26382769    { "default", HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {.func_arg = opt_default}, "generic catch all option", "" },
    26392770    { "i", HAS_ARG, {.func_arg = opt_input_file_i}, "read specified file", "input_file"},
    26402771    { NULL, },
    int main(int argc, char **argv) 
    27472878
    27482879end:
    27492880    av_freep(&print_format);
     2881    av_freep(&string_validation_replace);
    27502882    av_freep(&read_intervals);
    27512883
    27522884    uninit_opts();