Ticket #1163: 0002-lavu-avstring-add-av_get_utf8-function.patch

File 0002-lavu-avstring-add-av_get_utf8-function.patch, 5.1 KB (added by saste, 6 years ago)
  • libavutil/Makefile

    From 5fac9639ee1b7d1c359147611a372a56bd822976 Mon Sep 17 00:00:00 2001
    From: Stefano Sabatini <stefasab@gmail.com>
    Date: Thu, 3 Oct 2013 01:21:40 +0200
    Subject: [PATCH] lavu/avstring: add av_get_utf8() function
    
    TODO: minor bump, APIchanges entry
    ---
     libavutil/Makefile   |  1 +
     libavutil/avstring.c | 31 +++++++++++++++++++++++
     libavutil/avstring.h | 12 +++++++++
     libavutil/utf8.c     | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++
     4 files changed, 113 insertions(+)
     create mode 100644 libavutil/utf8.c
    
    diff --git a/libavutil/Makefile b/libavutil/Makefile
    index 7b3b439..19540e4 100644
    a b TESTPROGS = adler32 \ 
    155155            sha                                                         \
    156156            sha512                                                      \
    157157            tree                                                        \
     158            utf8                                                        \
    158159            xtea                                                        \
    159160
    160161TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo
  • libavutil/avstring.c

    diff --git a/libavutil/avstring.c b/libavutil/avstring.c
    index eed58fa..ffa540a 100644
    a b int av_isxdigit(int c) 
    307307    return av_isdigit(c) || (c >= 'a' && c <= 'f');
    308308}
    309309
     310int av_get_utf8(int32_t *code, const uint8_t **buf)
     311{
     312    const uint8_t *p = *buf;
     313    uint32_t top;
     314
     315    *code = *p++;
     316
     317    /* first sequence byte starts with 10, or is 1111-1110 or 1111-1111,
     318       which is not admitted */
     319    if ((*code & 0xc0) == 0x80 || *code >= 0xFE) {
     320        *buf = p;
     321        return AVERROR(EINVAL);
     322    }
     323    top = (*code & 128) >> 1;
     324
     325    while (*code & top) {
     326        /* we assume the byte to be in the form 10xx-xxxx */
     327        int tmp = *p++ - 128;   /* strip leading 1 */
     328        if (tmp>>6) {
     329            *buf = p;
     330            return AVERROR(EINVAL);
     331        }
     332        *code = (*code<<6) + tmp;
     333        top <<= 5;
     334    }
     335    *code &= (top << 1) - 1;
     336
     337    *buf = p;
     338    return 0;
     339}
     340
    310341#ifdef TEST
    311342
    312343int main(void)
  • libavutil/avstring.h

    diff --git a/libavutil/avstring.h b/libavutil/avstring.h
    index 438ef79..8f4ba42 100644
    a b  
    2222#define AVUTIL_AVSTRING_H
    2323
    2424#include <stddef.h>
     25#include <stdint.h>
    2526#include "attributes.h"
    2627
    2728/**
    static inline int av_tolower(int c) 
    226227int av_isxdigit(int c);
    227228
    228229/**
     230 * Read an UTF-8 character from buffer in *buf, and update *buf to point to the
     231 * next sequence after the parsed sequence.
     232 * In case of invalid sequence, the point will be updated to the next
     233 * character after the invalid sequence.
     234 *
     235 * @return >= 0 in case a sequence was successfully read, a negative
     236 * value in case of invalid sequence
     237 */
     238int av_get_utf8(int32_t *code, const uint8_t **buf);
     239
     240/**
    229241 * Locale-independent case-insensitive compare.
    230242 * @note This means only ASCII-range characters are case-insensitive
    231243 */
  • new file libavutil/utf8.c

    diff --git a/libavutil/utf8.c b/libavutil/utf8.c
    new file mode 100644
    index 0000000..4c1e1c7
    - +  
     1/*
     2 * Copyright (c) 2013 Stefano Sabatini
     3 *
     4 * This file is part of FFmpeg.
     5 *
     6 * FFmpeg is free software; you can redistribute it and/or
     7 * modify it under the terms of the GNU Lesser General Public
     8 * License as published by the Free Software Foundation; either
     9 * version 2.1 of the License, or (at your option) any later version.
     10 *
     11 * FFmpeg is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 * Lesser General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU Lesser General Public
     17 * License along with FFmpeg; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
     19 */
     20
     21#include <stdio.h>
     22
     23#include "libavutil/avstring.h"
     24#include "libavutil/file.h"
     25
     26static void print_sequence(const char *p, int l, int indent)
     27{
     28    int i;
     29    for (i = 0; i < l; i++)
     30        printf("%02X", (uint8_t)p[i]);
     31    for (i=l*2; i < indent; i++)
     32        printf("%c", ' ');
     33}
     34
     35int main(int argc, char **argv)
     36{
     37    int ret;
     38    char *filename = argv[1];
     39    uint8_t *file_buf;
     40    size_t file_buf_size;
     41    uint32_t code;
     42    const uint8_t *p;
     43    int i = 0;
     44
     45    ret = av_file_map(filename, &file_buf, &file_buf_size, 0, NULL);
     46    if (ret < 0)
     47        goto end;
     48
     49    p = file_buf;
     50    while (i < file_buf_size) {
     51        int l, r;
     52        const uint8_t *p0 = p;
     53        r = av_get_utf8(&code, &p, 0);
     54        l = (int)(p-p0);
     55        print_sequence(p0, l, 20);
     56        if (r >= 0) {
     57            printf("%-10d 0x%-10X %-5d ", code, code, l);
     58            if (*p0 == '\n') { printf("\\n\n"); }
     59            else { printf ("%.*s\n", l, p0); }
     60        } else {
     61            printf("-> illegal sequence\n");
     62        }
     63        i += l;
     64    }
     65
     66end:
     67    av_file_unmap(file_buf, file_buf_size);
     68    return ret < 0;
     69}