Ticket #1163: 0002-lavu-avstring-add-av_get_utf8-function.patch
| File 0002-lavu-avstring-add-av_get_utf8-function.patch, 5.1 KB (added by , 13 years ago) |
|---|
-
libavutil/Makefile
From 5fac9639ee1b7d1c359147611a372a56bd822976 Mon Sep 17 00:00:00 2001 From: Stefano Sabatini <stefasab@gmail.com> Date: Thu, 3 Oct 2013 01:21:40 +0200 Subject: [PATCH] lavu/avstring: add av_get_utf8() function TODO: minor bump, APIchanges entry --- libavutil/Makefile | 1 + libavutil/avstring.c | 31 +++++++++++++++++++++++ libavutil/avstring.h | 12 +++++++++ libavutil/utf8.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+) create mode 100644 libavutil/utf8.c diff --git a/libavutil/Makefile b/libavutil/Makefile index 7b3b439..19540e4 100644
a b TESTPROGS = adler32 \ 155 155 sha \ 156 156 sha512 \ 157 157 tree \ 158 utf8 \ 158 159 xtea \ 159 160 160 161 TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo -
libavutil/avstring.c
diff --git a/libavutil/avstring.c b/libavutil/avstring.c index eed58fa..ffa540a 100644
a b int av_isxdigit(int c) 307 307 return av_isdigit(c) || (c >= 'a' && c <= 'f'); 308 308 } 309 309 310 int av_get_utf8(int32_t *code, const uint8_t **buf) 311 { 312 const uint8_t *p = *buf; 313 uint32_t top; 314 315 *code = *p++; 316 317 /* first sequence byte starts with 10, or is 1111-1110 or 1111-1111, 318 which is not admitted */ 319 if ((*code & 0xc0) == 0x80 || *code >= 0xFE) { 320 *buf = p; 321 return AVERROR(EINVAL); 322 } 323 top = (*code & 128) >> 1; 324 325 while (*code & top) { 326 /* we assume the byte to be in the form 10xx-xxxx */ 327 int tmp = *p++ - 128; /* strip leading 1 */ 328 if (tmp>>6) { 329 *buf = p; 330 return AVERROR(EINVAL); 331 } 332 *code = (*code<<6) + tmp; 333 top <<= 5; 334 } 335 *code &= (top << 1) - 1; 336 337 *buf = p; 338 return 0; 339 } 340 310 341 #ifdef TEST 311 342 312 343 int main(void) -
libavutil/avstring.h
diff --git a/libavutil/avstring.h b/libavutil/avstring.h index 438ef79..8f4ba42 100644
a b 22 22 #define AVUTIL_AVSTRING_H 23 23 24 24 #include <stddef.h> 25 #include <stdint.h> 25 26 #include "attributes.h" 26 27 27 28 /** … … static inline int av_tolower(int c) 226 227 int av_isxdigit(int c); 227 228 228 229 /** 230 * Read an UTF-8 character from buffer in *buf, and update *buf to point to the 231 * next sequence after the parsed sequence. 232 * In case of invalid sequence, the point will be updated to the next 233 * character after the invalid sequence. 234 * 235 * @return >= 0 in case a sequence was successfully read, a negative 236 * value in case of invalid sequence 237 */ 238 int av_get_utf8(int32_t *code, const uint8_t **buf); 239 240 /** 229 241 * Locale-independent case-insensitive compare. 230 242 * @note This means only ASCII-range characters are case-insensitive 231 243 */ -
new file libavutil/utf8.c
diff --git a/libavutil/utf8.c b/libavutil/utf8.c new file mode 100644 index 0000000..4c1e1c7
- + 1 /* 2 * Copyright (c) 2013 Stefano Sabatini 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21 #include <stdio.h> 22 23 #include "libavutil/avstring.h" 24 #include "libavutil/file.h" 25 26 static void print_sequence(const char *p, int l, int indent) 27 { 28 int i; 29 for (i = 0; i < l; i++) 30 printf("%02X", (uint8_t)p[i]); 31 for (i=l*2; i < indent; i++) 32 printf("%c", ' '); 33 } 34 35 int main(int argc, char **argv) 36 { 37 int ret; 38 char *filename = argv[1]; 39 uint8_t *file_buf; 40 size_t file_buf_size; 41 uint32_t code; 42 const uint8_t *p; 43 int i = 0; 44 45 ret = av_file_map(filename, &file_buf, &file_buf_size, 0, NULL); 46 if (ret < 0) 47 goto end; 48 49 p = file_buf; 50 while (i < file_buf_size) { 51 int l, r; 52 const uint8_t *p0 = p; 53 r = av_get_utf8(&code, &p, 0); 54 l = (int)(p-p0); 55 print_sequence(p0, l, 20); 56 if (r >= 0) { 57 printf("%-10d 0x%-10X %-5d ", code, code, l); 58 if (*p0 == '\n') { printf("\\n\n"); } 59 else { printf ("%.*s\n", l, p0); } 60 } else { 61 printf("-> illegal sequence\n"); 62 } 63 i += l; 64 } 65 66 end: 67 av_file_unmap(file_buf, file_buf_size); 68 return ret < 0; 69 }
