Ticket #1163: 0001-lavu-avstring-add-av_utf8_decode-function.patch

File 0001-lavu-avstring-add-av_utf8_decode-function.patch, 7.2 KB (added by saste, 6 years ago)
  • doc/APIchanges

    From ea516d668152f7c5f59615c40748fc28e510c488 Mon Sep 17 00:00:00 2001
    From: Stefano Sabatini <stefasab@gmail.com>
    Date: Thu, 3 Oct 2013 01:21:40 +0200
    Subject: [PATCH] lavu/avstring: add av_utf8_decode() function
    
    ---
     doc/APIchanges       |  3 +++
     libavutil/Makefile   |  1 +
     libavutil/avstring.c | 53 ++++++++++++++++++++++++++++++++++++++++
     libavutil/avstring.h | 26 ++++++++++++++++++++
     libavutil/utf8.c     | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++
     libavutil/version.h  |  2 +-
     6 files changed, 153 insertions(+), 1 deletion(-)
     create mode 100644 libavutil/utf8.c
    
    diff --git a/doc/APIchanges b/doc/APIchanges
    index dfdc159..b292d19 100644
    a b libavutil: 2012-10-22 
    1515
    1616API changes, most recent first:
    1717
     182013-11-12 - xxxxxxx - lavu 52.53.100 - avstring.h
     19  Add av_utf8_decode() function.
     20
    18212013-11-xx - xxxxxxx - lavc 55.41.100 / 55.25.0 - avcodec.h
    1922                       lavu 52.51.100 - frame.h
    2023  Add ITU-R BT.2020 and other not yet included values to color primaries,
  • libavutil/Makefile

    diff --git a/libavutil/Makefile b/libavutil/Makefile
    index 7b3b439..19540e4 100644
    a b TESTPROGS = adler32 \ 
    155155            sha                                                         \
    156156            sha512                                                      \
    157157            tree                                                        \
     158            utf8                                                        \
    158159            xtea                                                        \
    159160
    160161TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo
  • libavutil/avstring.c

    diff --git a/libavutil/avstring.c b/libavutil/avstring.c
    index eed58fa..69a0230 100644
    a b int av_isxdigit(int c) 
    307307    return av_isdigit(c) || (c >= 'a' && c <= 'f');
    308308}
    309309
     310int av_utf8_decode(int32_t *code, const uint8_t **buf, size_t left, int flags)
     311{
     312    const uint8_t *p = *buf;
     313    uint32_t top;
     314    int ret = 0;
     315
     316    if (!left)
     317        return 0;
     318
     319    *code = *p++;
     320
     321    /* first sequence byte starts with 10, or is 1111-1110 or 1111-1111,
     322       which is not admitted */
     323    if ((*code & 0xc0) == 0x80 || *code >= 0xFE) {
     324        ret = AVERROR(EILSEQ);
     325        goto end;
     326    }
     327    top = (*code & 128) >> 1;
     328
     329    while (*code & top) {
     330        int tmp;
     331        if (!--left) {
     332            ret = AVERROR(EILSEQ); /* incomplete sequence */
     333            goto end;
     334        }
     335
     336        /* we assume the byte to be in the form 10xx-xxxx */
     337        tmp = *p++ - 128;   /* strip leading 1 */
     338        if (tmp>>6) {
     339            ret = AVERROR(EILSEQ);
     340            goto end;
     341        }
     342        *code = (*code<<6) + tmp;
     343        top <<= 5;
     344    }
     345    *code &= (top << 1) - 1;
     346
     347    if (flags & AV_UTF8_DECODE_FLAG_CHECK_RANGE) {
     348        /* only accepts valid Unicode points:
     349           #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
     350           that is any Unicode character, excluding the surrogate blocks, FFFE, and FFFF */
     351        if (*code != 0x9 && *code != 0xA && *code != 0xD &&
     352            (*code <    0x20 || *code >   0xD7FF) &&
     353            (*code <  0xE000 || *code >   0xFFFD) &&
     354            (*code < 0x10000 || *code > 0x10FFFF))
     355            ret = AVERROR(EILSEQ);
     356    }
     357
     358end:
     359    *buf = p;
     360    return ret;
     361}
     362
    310363#ifdef TEST
    311364
    312365int main(void)
  • libavutil/avstring.h

    diff --git a/libavutil/avstring.h b/libavutil/avstring.h
    index 438ef79..b4f3b07 100644
    a b  
    2222#define AVUTIL_AVSTRING_H
    2323
    2424#include <stddef.h>
     25#include <stdint.h>
    2526#include "attributes.h"
    2627
    2728/**
    int av_escape(char **dst, const char *src, const char *special_chars, 
    296297              enum AVEscapeMode mode, int flags);
    297298
    298299/**
     300 * Check UNICODE range validity of the decoded code.
     301 */
     302#define AV_UTF8_DECODE_FLAG_CHECK_RANGE 1
     303
     304/**
     305 * Read and decode a single UTF-8 character sequence from buffer in
     306 * *buf, and update *buf to point to the next byte after the parsed
     307 * sequence.
     308 *
     309 * In case of invalid sequence, the pointer will be updated to the
     310 * next byte after the invalid sequence.
     311 *
     312 * @param code pointer whose pointed value is updated to keep the
     313 * parsed code in case of success
     314 * @param left bytes left to read in the buffer. By default it won't
     315 * read more than 6 bytes (maximum number of bytes in an UTF-8
     316 * sequence).
     317 * @param flags a collection of AV_UTF8_DECODE_FLAG_* flags
     318 * @return >= 0 in case a sequence was successfully read, a negative
     319 * value in case of invalid sequence
     320 * @see GET_UTF8()
     321 */
     322int av_utf8_decode(int32_t *code, const uint8_t **buf, size_t left, int flags);
     323
     324/**
    299325 * @}
    300326 */
    301327
  • new file libavutil/utf8.c

    diff --git a/libavutil/utf8.c b/libavutil/utf8.c
    new file mode 100644
    index 0000000..4be0e95
    - +  
     1/*
     2 * Copyright (c) 2013 Stefano Sabatini
     3 *
     4 * This file is part of FFmpeg.
     5 *
     6 * FFmpeg is free software; you can redistribute it and/or
     7 * modify it under the terms of the GNU Lesser General Public
     8 * License as published by the Free Software Foundation; either
     9 * version 2.1 of the License, or (at your option) any later version.
     10 *
     11 * FFmpeg is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 * Lesser General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU Lesser General Public
     17 * License along with FFmpeg; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
     19 */
     20
     21#include <stdio.h>
     22
     23#include "libavutil/avstring.h"
     24#include "libavutil/file.h"
     25
     26static void print_sequence(const char *p, int l, int indent)
     27{
     28    int i;
     29    for (i = 0; i < l; i++)
     30        printf("%02X", (uint8_t)p[i]);
     31    for (i = l*2; i < indent; i++)
     32        printf("%c", ' ');
     33}
     34
     35int main(int argc, char **argv)
     36{
     37    int ret;
     38    char *filename = argv[1];
     39    uint8_t *file_buf;
     40    size_t file_buf_size;
     41    uint32_t code;
     42    const uint8_t *p;
     43    int i = 0;
     44
     45    ret = av_file_map(filename, &file_buf, &file_buf_size, 0, NULL);
     46    if (ret < 0)
     47        goto end;
     48
     49    p = file_buf;
     50    while (i < file_buf_size) {
     51        int l, r;
     52        const uint8_t *p0 = p;
     53        r = av_utf8_decode(&code, &p, file_buf_size - i, 0);
     54        l = (int)(p-p0);
     55        print_sequence(p0, l, 20);
     56        if (r >= 0) {
     57            printf("%-10d 0x%-10X %-5d ", code, code, l);
     58            if (*p0 == '\n') { printf("\\n\n"); }
     59            else             { printf ("%.*s\n", l, p0); }
     60        } else {
     61            printf("-> illegal sequence\n");
     62        }
     63        i += l;
     64    }
     65
     66end:
     67    av_file_unmap(file_buf, file_buf_size);
     68    return ret < 0;
     69}
  • libavutil/version.h

    diff --git a/libavutil/version.h b/libavutil/version.h
    index 2e2f571..c6ec6e0 100644
    a b  
    7575 */
    7676
    7777#define LIBAVUTIL_VERSION_MAJOR  52
    78 #define LIBAVUTIL_VERSION_MINOR  52
     78#define LIBAVUTIL_VERSION_MINOR  53
    7979#define LIBAVUTIL_VERSION_MICRO 100
    8080
    8181#define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \