24
|
1 #include <stdlib.h>
|
|
2 #include <string.h>
|
|
3
|
|
4 #include "utf8.h"
|
|
5
|
|
6
|
|
7 /* Desc: convert UTF8 -> ASCII
|
|
8 *
|
|
9 * In : UTF8 string
|
|
10 * Out : ASCII string
|
|
11 *
|
|
12 * Note: it is up to the caller to free the returned string
|
|
13 */
|
|
14 char *utf8_decode(const char *src)
|
|
15 {
|
|
16 char *ret = calloc(1, strlen(src) + 1);
|
|
17 char *aux = ret;
|
|
18
|
|
19 while (*src) {
|
|
20 unsigned char lead = *src++;
|
|
21 if ((lead & 0xe0) == 0xc0) {
|
|
22 unsigned char ch2 = *src++;
|
|
23 *aux = ((lead & 0x1f) << 6) | (ch2 & 0x3f);
|
|
24 } else {
|
|
25 *aux = lead;
|
|
26 }
|
|
27 aux++;
|
|
28 }
|
|
29
|
|
30 return ret;
|
|
31 }
|
|
32
|
|
33
|
|
34 /* Desc: convert ASCII -> UTF8
|
|
35 *
|
|
36 * In : ASCII string
|
|
37 * Out : UTF8 string
|
|
38 *
|
|
39 * Note: it is up to the caller to free the returned string
|
|
40 */
|
|
41 char *utf8_encode(const char *src)
|
|
42 {
|
|
43 char *ret = calloc(1, (strlen(src) * 2) + 1);
|
|
44 char *aux = ret;
|
|
45
|
|
46 while (*src) {
|
|
47 unsigned char ch = *src++;
|
|
48 if (ch < 0x80) {
|
|
49 *aux = ch;
|
|
50 } else { /* if (ch < 0x800) { */
|
|
51 *aux++ = 0xc0 | (ch >> 6 & 0x1f);
|
|
52 *aux = 0xc0 | (0x80 | (ch & 0x3f));
|
|
53 }
|
|
54 aux++;
|
|
55 }
|
|
56
|
|
57 return ret;
|
|
58 }
|