Ticket #1163: 0001-lavu-avstring-add-av_utf8_decode-function.patch
| File 0001-lavu-avstring-add-av_utf8_decode-function.patch, 7.2 KB (added by , 13 years ago) |
|---|
-
doc/APIchanges
From ea516d668152f7c5f59615c40748fc28e510c488 Mon Sep 17 00:00:00 2001 From: Stefano Sabatini <stefasab@gmail.com> Date: Thu, 3 Oct 2013 01:21:40 +0200 Subject: [PATCH] lavu/avstring: add av_utf8_decode() function --- doc/APIchanges | 3 +++ libavutil/Makefile | 1 + libavutil/avstring.c | 53 ++++++++++++++++++++++++++++++++++++++++ libavutil/avstring.h | 26 ++++++++++++++++++++ libavutil/utf8.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++ libavutil/version.h | 2 +- 6 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 libavutil/utf8.c diff --git a/doc/APIchanges b/doc/APIchanges index dfdc159..b292d19 100644
a b libavutil: 2012-10-22 15 15 16 16 API changes, most recent first: 17 17 18 2013-11-12 - xxxxxxx - lavu 52.53.100 - avstring.h 19 Add av_utf8_decode() function. 20 18 21 2013-11-xx - xxxxxxx - lavc 55.41.100 / 55.25.0 - avcodec.h 19 22 lavu 52.51.100 - frame.h 20 23 Add ITU-R BT.2020 and other not yet included values to color primaries, -
libavutil/Makefile
diff --git a/libavutil/Makefile b/libavutil/Makefile index 7b3b439..19540e4 100644
a b TESTPROGS = adler32 \ 155 155 sha \ 156 156 sha512 \ 157 157 tree \ 158 utf8 \ 158 159 xtea \ 159 160 160 161 TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo -
libavutil/avstring.c
diff --git a/libavutil/avstring.c b/libavutil/avstring.c index eed58fa..69a0230 100644
a b int av_isxdigit(int c) 307 307 return av_isdigit(c) || (c >= 'a' && c <= 'f'); 308 308 } 309 309 310 int av_utf8_decode(int32_t *code, const uint8_t **buf, size_t left, int flags) 311 { 312 const uint8_t *p = *buf; 313 uint32_t top; 314 int ret = 0; 315 316 if (!left) 317 return 0; 318 319 *code = *p++; 320 321 /* first sequence byte starts with 10, or is 1111-1110 or 1111-1111, 322 which is not admitted */ 323 if ((*code & 0xc0) == 0x80 || *code >= 0xFE) { 324 ret = AVERROR(EILSEQ); 325 goto end; 326 } 327 top = (*code & 128) >> 1; 328 329 while (*code & top) { 330 int tmp; 331 if (!--left) { 332 ret = AVERROR(EILSEQ); /* incomplete sequence */ 333 goto end; 334 } 335 336 /* we assume the byte to be in the form 10xx-xxxx */ 337 tmp = *p++ - 128; /* strip leading 1 */ 338 if (tmp>>6) { 339 ret = AVERROR(EILSEQ); 340 goto end; 341 } 342 *code = (*code<<6) + tmp; 343 top <<= 5; 344 } 345 *code &= (top << 1) - 1; 346 347 if (flags & AV_UTF8_DECODE_FLAG_CHECK_RANGE) { 348 /* only accepts valid Unicode points: 349 #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] 350 that is any Unicode character, excluding the surrogate blocks, FFFE, and FFFF */ 351 if (*code != 0x9 && *code != 0xA && *code != 0xD && 352 (*code < 0x20 || *code > 0xD7FF) && 353 (*code < 0xE000 || *code > 0xFFFD) && 354 (*code < 0x10000 || *code > 0x10FFFF)) 355 ret = AVERROR(EILSEQ); 356 } 357 358 end: 359 *buf = p; 360 return ret; 361 } 362 310 363 #ifdef TEST 311 364 312 365 int main(void) -
libavutil/avstring.h
diff --git a/libavutil/avstring.h b/libavutil/avstring.h index 438ef79..b4f3b07 100644
a b 22 22 #define AVUTIL_AVSTRING_H 23 23 24 24 #include <stddef.h> 25 #include <stdint.h> 25 26 #include "attributes.h" 26 27 27 28 /** … … int av_escape(char **dst, const char *src, const char *special_chars, 296 297 enum AVEscapeMode mode, int flags); 297 298 298 299 /** 300 * Check UNICODE range validity of the decoded code. 301 */ 302 #define AV_UTF8_DECODE_FLAG_CHECK_RANGE 1 303 304 /** 305 * Read and decode a single UTF-8 character sequence from buffer in 306 * *buf, and update *buf to point to the next byte after the parsed 307 * sequence. 308 * 309 * In case of invalid sequence, the pointer will be updated to the 310 * next byte after the invalid sequence. 311 * 312 * @param code pointer whose pointed value is updated to keep the 313 * parsed code in case of success 314 * @param left bytes left to read in the buffer. By default it won't 315 * read more than 6 bytes (maximum number of bytes in an UTF-8 316 * sequence). 317 * @param flags a collection of AV_UTF8_DECODE_FLAG_* flags 318 * @return >= 0 in case a sequence was successfully read, a negative 319 * value in case of invalid sequence 320 * @see GET_UTF8() 321 */ 322 int av_utf8_decode(int32_t *code, const uint8_t **buf, size_t left, int flags); 323 324 /** 299 325 * @} 300 326 */ 301 327 -
new file libavutil/utf8.c
diff --git a/libavutil/utf8.c b/libavutil/utf8.c new file mode 100644 index 0000000..4be0e95
- + 1 /* 2 * Copyright (c) 2013 Stefano Sabatini 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21 #include <stdio.h> 22 23 #include "libavutil/avstring.h" 24 #include "libavutil/file.h" 25 26 static void print_sequence(const char *p, int l, int indent) 27 { 28 int i; 29 for (i = 0; i < l; i++) 30 printf("%02X", (uint8_t)p[i]); 31 for (i = l*2; i < indent; i++) 32 printf("%c", ' '); 33 } 34 35 int main(int argc, char **argv) 36 { 37 int ret; 38 char *filename = argv[1]; 39 uint8_t *file_buf; 40 size_t file_buf_size; 41 uint32_t code; 42 const uint8_t *p; 43 int i = 0; 44 45 ret = av_file_map(filename, &file_buf, &file_buf_size, 0, NULL); 46 if (ret < 0) 47 goto end; 48 49 p = file_buf; 50 while (i < file_buf_size) { 51 int l, r; 52 const uint8_t *p0 = p; 53 r = av_utf8_decode(&code, &p, file_buf_size - i, 0); 54 l = (int)(p-p0); 55 print_sequence(p0, l, 20); 56 if (r >= 0) { 57 printf("%-10d 0x%-10X %-5d ", code, code, l); 58 if (*p0 == '\n') { printf("\\n\n"); } 59 else { printf ("%.*s\n", l, p0); } 60 } else { 61 printf("-> illegal sequence\n"); 62 } 63 i += l; 64 } 65 66 end: 67 av_file_unmap(file_buf, file_buf_size); 68 return ret < 0; 69 } -
libavutil/version.h
diff --git a/libavutil/version.h b/libavutil/version.h index 2e2f571..c6ec6e0 100644
a b 75 75 */ 76 76 77 77 #define LIBAVUTIL_VERSION_MAJOR 52 78 #define LIBAVUTIL_VERSION_MINOR 5 278 #define LIBAVUTIL_VERSION_MINOR 53 79 79 #define LIBAVUTIL_VERSION_MICRO 100 80 80 81 81 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
