annotate mcabber/libjabber/xmltok.c @ 1197:6f602d3270a4

Add /pgp [-]force With this command it becomes possible to enforce PGP encryption without checking if the remote client has PGP support. It can be used to send encrypted offline messages too.
author Mikael Berthe <mikael@lilotux.net>
date Fri, 27 Apr 2007 00:37:57 +0200
parents 0aa9015f06df
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1 /*
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
2 The contents of this file are subject to the Mozilla Public License
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
3 Version 1.1 (the "License"); you may not use this file except in
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
4 compliance with the License. You may obtain a copy of the License at
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
5 http://www.mozilla.org/MPL/
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
6
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
7 Software distributed under the License is distributed on an "AS IS"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
8 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
9 License for the specific language governing rights and limitations
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
10 under the License.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
11
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
12 The Original Code is expat.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
13
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
14 The Initial Developer of the Original Code is James Clark.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
15 Portions created by James Clark are Copyright (C) 1998, 1999
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
16 James Clark. All Rights Reserved.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
17
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
18 Contributor(s):
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
19
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
20 Alternatively, the contents of this file may be used under the terms
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
21 of the GNU General Public License (the "GPL"), in which case the
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
22 provisions of the GPL are applicable instead of those above. If you
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
23 wish to allow use of your version of this file only under the terms of
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
24 the GPL and not to allow others to use your version of this file under
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
25 the MPL, indicate your decision by deleting the provisions above and
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
26 replace them with the notice and other provisions required by the
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
27 GPL. If you do not delete the provisions above, a recipient may use
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
28 your version of this file under either the MPL or the GPL.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
29 */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
30
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
31 #include "xmldef.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
32 #include "xmltok.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
33 #include "nametab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
34
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
35 #define VTABLE1 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
36 { PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok) }, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
37 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
38 PREFIX(sameName), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
39 PREFIX(nameMatchesAscii), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
40 PREFIX(nameLength), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
41 PREFIX(skipS), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
42 PREFIX(getAtts), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
43 PREFIX(charRefNumber), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
44 PREFIX(predefinedEntityName), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
45 PREFIX(updatePosition), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
46 PREFIX(isPublicId)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
47
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
48 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
49
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
50 #define UCS2_GET_NAMING(pages, hi, lo) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
51 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
52
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
53 /* A 2 byte UTF-8 representation splits the characters 11 bits
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
54 between the bottom 5 and 6 bits of the bytes.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
55 We need 8 bits to index into pages, 3 bits to add to that index and
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
56 5 bits to generate the mask. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
57 #define UTF8_GET_NAMING2(pages, byte) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
58 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
59 + ((((byte)[0]) & 3) << 1) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
60 + ((((byte)[1]) >> 5) & 1)] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
61 & (1 << (((byte)[1]) & 0x1F)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
62
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
63 /* A 3 byte UTF-8 representation splits the characters 16 bits
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
64 between the bottom 4, 6 and 6 bits of the bytes.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
65 We need 8 bits to index into pages, 3 bits to add to that index and
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
66 5 bits to generate the mask. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
67 #define UTF8_GET_NAMING3(pages, byte) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
68 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
69 + ((((byte)[1]) >> 2) & 0xF)] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
70 << 3) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
71 + ((((byte)[1]) & 3) << 1) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
72 + ((((byte)[2]) >> 5) & 1)] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
73 & (1 << (((byte)[2]) & 0x1F)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
74
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
75 #define UTF8_GET_NAMING(pages, p, n) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
76 ((n) == 2 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
77 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
78 : ((n) == 3 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
79 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
80 : 0))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
81
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
82 #define UTF8_INVALID3(p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
83 ((*p) == 0xED \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
84 ? (((p)[1] & 0x20) != 0) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
85 : ((*p) == 0xEF \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
86 ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
87 : 0))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
88
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
89 #define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
90
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
91 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
92 int isNever(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
93 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
94 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
95 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
96
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
97 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
98 int utf8_isName2(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
99 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
100 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
101 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
102
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
103 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
104 int utf8_isName3(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
105 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
106 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
107 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
108
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
109 #define utf8_isName4 isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
110
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
111 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
112 int utf8_isNmstrt2(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
113 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
114 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
115 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
116
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
117 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
118 int utf8_isNmstrt3(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
119 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
120 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
121 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
122
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
123 #define utf8_isNmstrt4 isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
124
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
125 #define utf8_isInvalid2 isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
126
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
127 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
128 int utf8_isInvalid3(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
129 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
130 return UTF8_INVALID3((const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
131 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
132
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
133 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
134 int utf8_isInvalid4(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
135 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
136 return UTF8_INVALID4((const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
137 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
138
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
139 struct normal_encoding {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
140 ENCODING enc;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
141 unsigned char type[256];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
142 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
143 int (*byteType)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
144 int (*isNameMin)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
145 int (*isNmstrtMin)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
146 int (*byteToAscii)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
147 int (*charMatches)(const ENCODING *, const char *, int);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
148 #endif /* XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
149 int (*isName2)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
150 int (*isName3)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
151 int (*isName4)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
152 int (*isNmstrt2)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
153 int (*isNmstrt3)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
154 int (*isNmstrt4)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
155 int (*isInvalid2)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
156 int (*isInvalid3)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
157 int (*isInvalid4)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
158 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
159
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
160 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
161
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
162 #define STANDARD_VTABLE(E) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
163 E ## byteType, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
164 E ## isNameMin, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
165 E ## isNmstrtMin, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
166 E ## byteToAscii, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
167 E ## charMatches,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
168
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
169 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
170
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
171 #define STANDARD_VTABLE(E) /* as nothing */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
172
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
173 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
174
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
175 #define NORMAL_VTABLE(E) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
176 E ## isName2, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
177 E ## isName3, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
178 E ## isName4, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
179 E ## isNmstrt2, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
180 E ## isNmstrt3, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
181 E ## isNmstrt4, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
182 E ## isInvalid2, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
183 E ## isInvalid3, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
184 E ## isInvalid4
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
185
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
186 static int checkCharRefNumber(int);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
187
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
188 #include "xmltok_impl.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
189
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
190 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
191 #define sb_isNameMin isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
192 #define sb_isNmstrtMin isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
193 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
194
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
195 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
196 #define MINBPC(enc) ((enc)->minBytesPerChar)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
197 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
198 /* minimum bytes per character */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
199 #define MINBPC(enc) 1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
200 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
201
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
202 #define SB_BYTE_TYPE(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
203 (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
204
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
205 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
206 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
207 int sb_byteType(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
208 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
209 return SB_BYTE_TYPE(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
210 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
211 #define BYTE_TYPE(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
212 (((const struct normal_encoding *)(enc))->byteType(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
213 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
214 #define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
215 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
216
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
217 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
218 #define BYTE_TO_ASCII(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
219 (((const struct normal_encoding *)(enc))->byteToAscii(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
220 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
221 int sb_byteToAscii(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
222 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
223 return *p;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
224 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
225 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
226 #define BYTE_TO_ASCII(enc, p) (*p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
227 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
228
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
229 #define IS_NAME_CHAR(enc, p, n) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
230 (((const struct normal_encoding *)(enc))->isName ## n(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
231 #define IS_NMSTRT_CHAR(enc, p, n) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
232 (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
233 #define IS_INVALID_CHAR(enc, p, n) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
234 (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
235
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
236 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
237 #define IS_NAME_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
238 (((const struct normal_encoding *)(enc))->isNameMin(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
239 #define IS_NMSTRT_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
240 (((const struct normal_encoding *)(enc))->isNmstrtMin(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
241 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
242 #define IS_NAME_CHAR_MINBPC(enc, p) (0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
243 #define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
244 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
245
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
246 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
247 #define CHAR_MATCHES(enc, p, c) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
248 (((const struct normal_encoding *)(enc))->charMatches(enc, p, c))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
249 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
250 int sb_charMatches(const ENCODING *enc, const char *p, int c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
251 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
252 return *p == c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
253 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
254 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
255 /* c is an ASCII character */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
256 #define CHAR_MATCHES(enc, p, c) (*(p) == c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
257 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
258
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
259 #define PREFIX(ident) normal_ ## ident
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
260 #include "xmltok_impl_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
261
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
262 #undef MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
263 #undef BYTE_TYPE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
264 #undef BYTE_TO_ASCII
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
265 #undef CHAR_MATCHES
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
266 #undef IS_NAME_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
267 #undef IS_NAME_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
268 #undef IS_NMSTRT_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
269 #undef IS_NMSTRT_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
270 #undef IS_INVALID_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
271
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
272 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
273 UTF8_cval1 = 0x00,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
274 UTF8_cval2 = 0xc0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
275 UTF8_cval3 = 0xe0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
276 UTF8_cval4 = 0xf0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
277 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
278
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
279 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
280 void utf8_toUtf8(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
281 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
282 char **toP, const char *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
283 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
284 char *to;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
285 const char *from;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
286 if (fromLim - *fromP > toLim - *toP) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
287 /* Avoid copying partial characters. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
288 for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
289 if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
290 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
291 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
292 for (to = *toP, from = *fromP; from != fromLim; from++, to++)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
293 *to = *from;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
294 *fromP = from;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
295 *toP = to;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
296 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
297
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
298 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
299 void utf8_toUtf16(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
300 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
301 unsigned short **toP, const unsigned short *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
302 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
303 unsigned short *to = *toP;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
304 const char *from = *fromP;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
305 while (from != fromLim && to != toLim) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
306 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
307 case BT_LEAD2:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
308 *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
309 from += 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
310 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
311 case BT_LEAD3:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
312 *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
313 from += 3;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
314 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
315 case BT_LEAD4:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
316 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
317 unsigned long n;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
318 if (to + 1 == toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
319 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
320 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
321 n -= 0x10000;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
322 to[0] = (unsigned short)((n >> 10) | 0xD800);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
323 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
324 to += 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
325 from += 4;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
326 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
327 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
328 default:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
329 *to++ = *from++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
330 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
331 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
332 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
333 *fromP = from;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
334 *toP = to;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
335 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
336
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
337 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
338 static const struct normal_encoding utf8_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
339 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
340 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
341 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
342 #include "utf8tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
343 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
344 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
345 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
346 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
347
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
348 static const struct normal_encoding utf8_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
349 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
350 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
351 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
352 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
353 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
354 #include "utf8tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
355 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
356 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
357 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
358
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
359 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
360
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
361 static const struct normal_encoding internal_utf8_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
362 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
363 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
364 #include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
365 #include "utf8tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
366 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
367 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
368 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
369
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
370 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
371
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
372 static const struct normal_encoding internal_utf8_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
373 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
374 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
375 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
376 #include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
377 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
378 #include "utf8tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
379 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
380 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
381 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
382
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
383 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
384 void latin1_toUtf8(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
385 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
386 char **toP, const char *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
387 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
388 for (;;) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
389 unsigned char c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
390 if (*fromP == fromLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
391 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
392 c = (unsigned char)**fromP;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
393 if (c & 0x80) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
394 if (toLim - *toP < 2)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
395 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
396 *(*toP)++ = ((c >> 6) | UTF8_cval2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
397 *(*toP)++ = ((c & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
398 (*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
399 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
400 else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
401 if (*toP == toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
402 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
403 *(*toP)++ = *(*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
404 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
405 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
406 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
407
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
408 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
409 void latin1_toUtf16(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
410 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
411 unsigned short **toP, const unsigned short *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
412 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
413 while (*fromP != fromLim && *toP != toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
414 *(*toP)++ = (unsigned char)*(*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
415 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
416
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
417 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
418
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
419 static const struct normal_encoding latin1_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
420 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
421 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
422 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
423 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
424 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
425 STANDARD_VTABLE(sb_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
426 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
427
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
428 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
429
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
430 static const struct normal_encoding latin1_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
431 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
432 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
433 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
434 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
435 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
436 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
437 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
438 STANDARD_VTABLE(sb_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
439 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
440
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
441 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
442 void ascii_toUtf8(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
443 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
444 char **toP, const char *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
445 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
446 while (*fromP != fromLim && *toP != toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
447 *(*toP)++ = *(*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
448 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
449
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
450 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
451
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
452 static const struct normal_encoding ascii_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
453 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
454 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
455 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
456 /* BT_NONXML == 0 */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
457 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
458 STANDARD_VTABLE(sb_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
459 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
460
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
461 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
462
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
463 static const struct normal_encoding ascii_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
464 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
465 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
466 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
467 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
468 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
469 /* BT_NONXML == 0 */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
470 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
471 STANDARD_VTABLE(sb_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
472 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
473
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
474 static int unicode_byte_type(char hi, char lo)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
475 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
476 switch ((unsigned char)hi) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
477 case 0xD8: case 0xD9: case 0xDA: case 0xDB:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
478 return BT_LEAD4;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
479 case 0xDC: case 0xDD: case 0xDE: case 0xDF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
480 return BT_TRAIL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
481 case 0xFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
482 switch ((unsigned char)lo) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
483 case 0xFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
484 case 0xFE:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
485 return BT_NONXML;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
486 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
487 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
488 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
489 return BT_NONASCII;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
490 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
491
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
492 #define DEFINE_UTF16_TO_UTF8(E) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
493 static \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
494 void E ## toUtf8(const ENCODING *enc, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
495 const char **fromP, const char *fromLim, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
496 char **toP, const char *toLim) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
497 { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
498 const char *from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
499 for (from = *fromP; from != fromLim; from += 2) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
500 int plane; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
501 unsigned char lo2; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
502 unsigned char lo = GET_LO(from); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
503 unsigned char hi = GET_HI(from); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
504 switch (hi) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
505 case 0: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
506 if (lo < 0x80) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
507 if (*toP == toLim) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
508 *fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
509 return; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
510 } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
511 *(*toP)++ = lo; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
512 break; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
513 } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
514 /* fall through */ \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
515 case 0x1: case 0x2: case 0x3: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
516 case 0x4: case 0x5: case 0x6: case 0x7: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
517 if (toLim - *toP < 2) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
518 *fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
519 return; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
520 } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
521 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
522 *(*toP)++ = ((lo & 0x3f) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
523 break; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
524 default: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
525 if (toLim - *toP < 3) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
526 *fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
527 return; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
528 } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
529 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
530 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
531 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
532 *(*toP)++ = ((lo & 0x3f) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
533 break; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
534 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
535 if (toLim - *toP < 4) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
536 *fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
537 return; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
538 } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
539 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
540 *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
541 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
542 from += 2; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
543 lo2 = GET_LO(from); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
544 *(*toP)++ = (((lo & 0x3) << 4) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
545 | ((GET_HI(from) & 0x3) << 2) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
546 | (lo2 >> 6) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
547 | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
548 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
549 break; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
550 } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
551 } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
552 *fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
553 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
554
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
555 #define DEFINE_UTF16_TO_UTF16(E) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
556 static \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
557 void E ## toUtf16(const ENCODING *enc, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
558 const char **fromP, const char *fromLim, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
559 unsigned short **toP, const unsigned short *toLim) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
560 { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
561 /* Avoid copying first half only of surrogate */ \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
562 if (fromLim - *fromP > ((toLim - *toP) << 1) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
563 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
564 fromLim -= 2; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
565 for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
566 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
567 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
568
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
569 #define SET2(ptr, ch) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
570 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
571 #define GET_LO(ptr) ((unsigned char)(ptr)[0])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
572 #define GET_HI(ptr) ((unsigned char)(ptr)[1])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
573
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
574 DEFINE_UTF16_TO_UTF8(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
575 DEFINE_UTF16_TO_UTF16(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
576
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
577 #undef SET2
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
578 #undef GET_LO
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
579 #undef GET_HI
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
580
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
581 #define SET2(ptr, ch) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
582 (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
583 #define GET_LO(ptr) ((unsigned char)(ptr)[1])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
584 #define GET_HI(ptr) ((unsigned char)(ptr)[0])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
585
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
586 DEFINE_UTF16_TO_UTF8(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
587 DEFINE_UTF16_TO_UTF16(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
588
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
589 #undef SET2
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
590 #undef GET_LO
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
591 #undef GET_HI
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
592
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
593 #define LITTLE2_BYTE_TYPE(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
594 ((p)[1] == 0 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
595 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
596 : unicode_byte_type((p)[1], (p)[0]))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
597 #define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
598 #define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
599 #define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
600 UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
601 #define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
602 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
603
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
604 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
605
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
606 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
607 int little2_byteType(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
608 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
609 return LITTLE2_BYTE_TYPE(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
610 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
611
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
612 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
613 int little2_byteToAscii(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
614 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
615 return LITTLE2_BYTE_TO_ASCII(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
616 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
617
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
618 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
619 int little2_charMatches(const ENCODING *enc, const char *p, int c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
620 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
621 return LITTLE2_CHAR_MATCHES(enc, p, c);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
622 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
623
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
624 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
625 int little2_isNameMin(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
626 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
627 return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
628 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
629
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
630 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
631 int little2_isNmstrtMin(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
632 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
633 return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
634 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
635
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
636 #undef VTABLE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
637 #define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
638
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
639 #else /* not XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
640
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
641 #undef PREFIX
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
642 #define PREFIX(ident) little2_ ## ident
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
643 #define MINBPC(enc) 2
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
644 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
645 #define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
414
ec86d759ed54 Trailing whitespace cleanup
Mikael Berthe <mikael@lilotux.net>
parents: 237
diff changeset
646 #define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
647 #define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
648 #define IS_NAME_CHAR(enc, p, n) 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
649 #define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
650 #define IS_NMSTRT_CHAR(enc, p, n) (0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
651 #define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
652
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
653 #include "xmltok_impl_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
654
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
655 #undef MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
656 #undef BYTE_TYPE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
657 #undef BYTE_TO_ASCII
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
658 #undef CHAR_MATCHES
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
659 #undef IS_NAME_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
660 #undef IS_NAME_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
661 #undef IS_NMSTRT_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
662 #undef IS_NMSTRT_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
663 #undef IS_INVALID_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
664
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
665 #endif /* not XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
666
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
667 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
668
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
669 static const struct normal_encoding little2_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
670 { VTABLE, 2, 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
671 #if XML_BYTE_ORDER == 12
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
672 1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
673 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
674 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
675 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
676 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
677 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
678 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
679 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
680 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
681 STANDARD_VTABLE(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
682 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
683
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
684 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
685
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
686 static const struct normal_encoding little2_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
687 { VTABLE, 2, 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
688 #if XML_BYTE_ORDER == 12
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
689 1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
690 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
691 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
692 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
693 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
694 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
695 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
696 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
697 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
698 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
699 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
700 STANDARD_VTABLE(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
701 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
702
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
703 #if XML_BYTE_ORDER != 21
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
704
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
705 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
706
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
707 static const struct normal_encoding internal_little2_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
708 { VTABLE, 2, 0, 1 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
709 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
710 #include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
711 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
712 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
713 STANDARD_VTABLE(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
714 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
715
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
716 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
717
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
718 static const struct normal_encoding internal_little2_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
719 { VTABLE, 2, 0, 1 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
720 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
721 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
722 #include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
723 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
724 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
725 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
726 STANDARD_VTABLE(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
727 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
728
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
729 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
730
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
731
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
732 #define BIG2_BYTE_TYPE(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
733 ((p)[0] == 0 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
734 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
735 : unicode_byte_type((p)[0], (p)[1]))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
736 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
737 #define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
738 #define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
739 UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
740 #define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
741 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
742
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
743 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
744
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
745 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
746 int big2_byteType(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
747 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
748 return BIG2_BYTE_TYPE(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
749 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
750
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
751 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
752 int big2_byteToAscii(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
753 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
754 return BIG2_BYTE_TO_ASCII(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
755 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
756
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
757 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
758 int big2_charMatches(const ENCODING *enc, const char *p, int c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
759 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
760 return BIG2_CHAR_MATCHES(enc, p, c);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
761 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
762
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
763 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
764 int big2_isNameMin(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
765 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
766 return BIG2_IS_NAME_CHAR_MINBPC(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
767 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
768
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
769 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
770 int big2_isNmstrtMin(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
771 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
772 return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
773 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
774
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
775 #undef VTABLE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
776 #define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
777
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
778 #else /* not XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
779
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
780 #undef PREFIX
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
781 #define PREFIX(ident) big2_ ## ident
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
782 #define MINBPC(enc) 2
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
783 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
784 #define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
414
ec86d759ed54 Trailing whitespace cleanup
Mikael Berthe <mikael@lilotux.net>
parents: 237
diff changeset
785 #define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
786 #define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
787 #define IS_NAME_CHAR(enc, p, n) 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
788 #define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
789 #define IS_NMSTRT_CHAR(enc, p, n) (0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
790 #define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
791
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
792 #include "xmltok_impl_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
793
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
794 #undef MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
795 #undef BYTE_TYPE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
796 #undef BYTE_TO_ASCII
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
797 #undef CHAR_MATCHES
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
798 #undef IS_NAME_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
799 #undef IS_NAME_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
800 #undef IS_NMSTRT_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
801 #undef IS_NMSTRT_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
802 #undef IS_INVALID_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
803
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
804 #endif /* not XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
805
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
806 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
807
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
808 static const struct normal_encoding big2_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
809 { VTABLE, 2, 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
810 #if XML_BYTE_ORDER == 21
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
811 1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
812 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
813 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
814 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
815 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
816 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
817 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
818 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
819 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
820 STANDARD_VTABLE(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
821 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
822
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
823 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
824
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
825 static const struct normal_encoding big2_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
826 { VTABLE, 2, 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
827 #if XML_BYTE_ORDER == 21
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
828 1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
829 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
830 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
831 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
832 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
833 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
834 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
835 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
836 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
837 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
838 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
839 STANDARD_VTABLE(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
840 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
841
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
842 #if XML_BYTE_ORDER != 12
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
843
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
844 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
845
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
846 static const struct normal_encoding internal_big2_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
847 { VTABLE, 2, 0, 1 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
848 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
849 #include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
850 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
851 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
852 STANDARD_VTABLE(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
853 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
854
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
855 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
856
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
857 static const struct normal_encoding internal_big2_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
858 { VTABLE, 2, 0, 1 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
859 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
860 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
861 #include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
862 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
863 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
864 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
865 STANDARD_VTABLE(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
866 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
867
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
868 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
869
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
870 #undef PREFIX
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
871
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
872 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
873 int streqci(const char *s1, const char *s2)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
874 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
875 for (;;) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
876 char c1 = *s1++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
877 char c2 = *s2++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
878 if ('a' <= c1 && c1 <= 'z')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
879 c1 += 'A' - 'a';
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
880 if ('a' <= c2 && c2 <= 'z')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
881 c2 += 'A' - 'a';
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
882 if (c1 != c2)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
883 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
884 if (!c1)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
885 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
886 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
887 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
888 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
889
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
890 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
891 void initUpdatePosition(const ENCODING *enc, const char *ptr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
892 const char *end, POSITION *pos)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
893 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
894 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
895 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
896
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
897 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
898 int toAscii(const ENCODING *enc, const char *ptr, const char *end)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
899 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
900 char buf[1];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
901 char *p = buf;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
902 XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
903 if (p == buf)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
904 return -1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
905 else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
906 return buf[0];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
907 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
908
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
909 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
910 int isSpace(int c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
911 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
912 switch (c) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
913 case 0x20:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
914 case 0xD:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
915 case 0xA:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
916 case 0x9:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
917 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
918 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
919 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
920 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
921
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
922 /* Return 1 if there's just optional white space
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
923 or there's an S followed by name=val. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
924 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
925 int parsePseudoAttribute(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
926 const char *ptr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
927 const char *end,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
928 const char **namePtr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
929 const char **valPtr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
930 const char **nextTokPtr)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
931 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
932 int c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
933 char open;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
934 if (ptr == end) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
935 *namePtr = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
936 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
937 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
938 if (!isSpace(toAscii(enc, ptr, end))) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
939 *nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
940 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
941 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
942 do {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
943 ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
944 } while (isSpace(toAscii(enc, ptr, end)));
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
945 if (ptr == end) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
946 *namePtr = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
947 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
948 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
949 *namePtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
950 for (;;) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
951 c = toAscii(enc, ptr, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
952 if (c == -1) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
953 *nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
954 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
955 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
956 if (c == '=')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
957 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
958 if (isSpace(c)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
959 do {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
960 ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
961 } while (isSpace(c = toAscii(enc, ptr, end)));
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
962 if (c != '=') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
963 *nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
964 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
965 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
966 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
967 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
968 ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
969 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
970 if (ptr == *namePtr) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
971 *nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
972 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
973 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
974 ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
975 c = toAscii(enc, ptr, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
976 while (isSpace(c)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
977 ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
978 c = toAscii(enc, ptr, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
979 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
980 if (c != '"' && c != '\'') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
981 *nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
982 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
983 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
984 open = c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
985 ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
986 *valPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
987 for (;; ptr += enc->minBytesPerChar) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
988 c = toAscii(enc, ptr, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
989 if (c == open)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
990 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
991 if (!('a' <= c && c <= 'z')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
992 && !('A' <= c && c <= 'Z')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
993 && !('0' <= c && c <= '9')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
994 && c != '.'
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
995 && c != '-'
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
996 && c != '_') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
997 *nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
998 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
999 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1000 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1001 *nextTokPtr = ptr + enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1002 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1003 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1004
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1005 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1006 int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1007 const char *,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1008 const char *),
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1009 int isGeneralTextEntity,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1010 const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1011 const char *ptr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1012 const char *end,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1013 const char **badPtr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1014 const char **versionPtr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1015 const char **encodingName,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1016 const ENCODING **encoding,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1017 int *standalone)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1018 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1019 const char *val = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1020 const char *name = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1021 ptr += 5 * enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1022 end -= 2 * enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1023 if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr) || !name) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1024 *badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1025 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1026 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1027 if (!XmlNameMatchesAscii(enc, name, "version")) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1028 if (!isGeneralTextEntity) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1029 *badPtr = name;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1030 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1031 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1032 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1033 else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1034 if (versionPtr)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1035 *versionPtr = val;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1036 if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1037 *badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1038 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1039 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1040 if (!name) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1041 if (isGeneralTextEntity) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1042 /* a TextDecl must have an EncodingDecl */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1043 *badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1044 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1045 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1046 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1047 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1048 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1049 if (XmlNameMatchesAscii(enc, name, "encoding")) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1050 int c = toAscii(enc, val, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1051 if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z')) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1052 *badPtr = val;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1053 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1054 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1055 if (encodingName)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1056 *encodingName = val;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1057 if (encoding)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1058 *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1059 if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1060 *badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1061 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1062 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1063 if (!name)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1064 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1065 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1066 if (!XmlNameMatchesAscii(enc, name, "standalone") || isGeneralTextEntity) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1067 *badPtr = name;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1068 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1069 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1070 if (XmlNameMatchesAscii(enc, val, "yes")) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1071 if (standalone)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1072 *standalone = 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1073 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1074 else if (XmlNameMatchesAscii(enc, val, "no")) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1075 if (standalone)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1076 *standalone = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1077 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1078 else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1079 *badPtr = val;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1080 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1081 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1082 while (isSpace(toAscii(enc, ptr, end)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1083 ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1084 if (ptr != end) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1085 *badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1086 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1087 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1088 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1089 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1090
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1091 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1092 int checkCharRefNumber(int result)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1093 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1094 switch (result >> 8) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1095 case 0xD8: case 0xD9: case 0xDA: case 0xDB:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1096 case 0xDC: case 0xDD: case 0xDE: case 0xDF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1097 return -1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1098 case 0:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1099 if (latin1_encoding.type[result] == BT_NONXML)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1100 return -1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1101 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1102 case 0xFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1103 if (result == 0xFFFE || result == 0xFFFF)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1104 return -1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1105 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1106 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1107 return result;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1108 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1109
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1110 int XmlUtf8Encode(int c, char *buf)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1111 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1112 enum {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1113 /* minN is minimum legal resulting value for N byte sequence */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1114 min2 = 0x80,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1115 min3 = 0x800,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1116 min4 = 0x10000
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1117 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1118
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1119 if (c < 0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1120 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1121 if (c < min2) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1122 buf[0] = (c | UTF8_cval1);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1123 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1124 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1125 if (c < min3) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1126 buf[0] = ((c >> 6) | UTF8_cval2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1127 buf[1] = ((c & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1128 return 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1129 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1130 if (c < min4) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1131 buf[0] = ((c >> 12) | UTF8_cval3);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1132 buf[1] = (((c >> 6) & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1133 buf[2] = ((c & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1134 return 3;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1135 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1136 if (c < 0x110000) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1137 buf[0] = ((c >> 18) | UTF8_cval4);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1138 buf[1] = (((c >> 12) & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1139 buf[2] = (((c >> 6) & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1140 buf[3] = ((c & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1141 return 4;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1142 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1143 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1144 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1145
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1146 int XmlUtf16Encode(int charNum, unsigned short *buf)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1147 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1148 if (charNum < 0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1149 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1150 if (charNum < 0x10000) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1151 buf[0] = charNum;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1152 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1153 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1154 if (charNum < 0x110000) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1155 charNum -= 0x10000;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1156 buf[0] = (charNum >> 10) + 0xD800;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1157 buf[1] = (charNum & 0x3FF) + 0xDC00;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1158 return 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1159 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1160 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1161 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1162
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1163 struct unknown_encoding {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1164 struct normal_encoding normal;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1165 int (*convert)(void *userData, const char *p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1166 void *userData;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1167 unsigned short utf16[256];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1168 char utf8[256][4];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1169 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1170
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1171 int XmlSizeOfUnknownEncoding()
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1172 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1173 return sizeof(struct unknown_encoding);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1174 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1175
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1176 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1177 int unknown_isName(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1178 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1179 int c = ((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1180 ->convert(((const struct unknown_encoding *)enc)->userData, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1181 if (c & ~0xFFFF)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1182 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1183 return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1184 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1185
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1186 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1187 int unknown_isNmstrt(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1188 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1189 int c = ((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1190 ->convert(((const struct unknown_encoding *)enc)->userData, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1191 if (c & ~0xFFFF)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1192 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1193 return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1194 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1195
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1196 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1197 int unknown_isInvalid(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1198 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1199 int c = ((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1200 ->convert(((const struct unknown_encoding *)enc)->userData, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1201 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1202 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1203
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1204 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1205 void unknown_toUtf8(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1206 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1207 char **toP, const char *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1208 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1209 char buf[XML_UTF8_ENCODE_MAX];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1210 for (;;) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1211 const char *utf8;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1212 int n;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1213 if (*fromP == fromLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1214 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1215 utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1216 n = *utf8++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1217 if (n == 0) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1218 int c = ((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1219 ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1220 n = XmlUtf8Encode(c, buf);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1221 if (n > toLim - *toP)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1222 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1223 utf8 = buf;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1224 *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1225 - (BT_LEAD2 - 2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1226 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1227 else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1228 if (n > toLim - *toP)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1229 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1230 (*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1231 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1232 do {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1233 *(*toP)++ = *utf8++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1234 } while (--n != 0);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1235 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1236 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1237
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1238 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1239 void unknown_toUtf16(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1240 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1241 unsigned short **toP, const unsigned short *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1242 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1243 while (*fromP != fromLim && *toP != toLim) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1244 unsigned short c
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1245 = ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1246 if (c == 0) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1247 c = (unsigned short)((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1248 ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1249 *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1250 - (BT_LEAD2 - 2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1251 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1252 else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1253 (*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1254 *(*toP)++ = c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1255 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1256 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1257
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1258 ENCODING *
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1259 XmlInitUnknownEncoding(void *mem,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1260 int *table,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1261 int (*convert)(void *userData, const char *p),
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1262 void *userData)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1263 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1264 int i;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1265 struct unknown_encoding *e = mem;
883
0aa9015f06df Remove some more libjabber warnings
Mikael Berthe <mikael@lilotux.net>
parents: 414
diff changeset
1266 for (i = 0; i < (int)sizeof(struct normal_encoding); i++)
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1267 ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1268 for (i = 0; i < 128; i++)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1269 if (latin1_encoding.type[i] != BT_OTHER
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1270 && latin1_encoding.type[i] != BT_NONXML
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1271 && table[i] != i)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1272 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1273 for (i = 0; i < 256; i++) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1274 int c = table[i];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1275 if (c == -1) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1276 e->normal.type[i] = BT_MALFORM;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1277 /* This shouldn't really get used. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1278 e->utf16[i] = 0xFFFF;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1279 e->utf8[i][0] = 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1280 e->utf8[i][1] = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1281 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1282 else if (c < 0) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1283 if (c < -4)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1284 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1285 e->normal.type[i] = BT_LEAD2 - (c + 2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1286 e->utf8[i][0] = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1287 e->utf16[i] = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1288 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1289 else if (c < 0x80) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1290 if (latin1_encoding.type[c] != BT_OTHER
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1291 && latin1_encoding.type[c] != BT_NONXML
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1292 && c != i)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1293 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1294 e->normal.type[i] = latin1_encoding.type[c];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1295 e->utf8[i][0] = 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1296 e->utf8[i][1] = (char)c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1297 e->utf16[i] = c == 0 ? 0xFFFF : c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1298 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1299 else if (checkCharRefNumber(c) < 0) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1300 e->normal.type[i] = BT_NONXML;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1301 /* This shouldn't really get used. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1302 e->utf16[i] = 0xFFFF;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1303 e->utf8[i][0] = 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1304 e->utf8[i][1] = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1305 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1306 else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1307 if (c > 0xFFFF)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1308 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1309 if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1310 e->normal.type[i] = BT_NMSTRT;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1311 else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1312 e->normal.type[i] = BT_NAME;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1313 else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1314 e->normal.type[i] = BT_OTHER;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1315 e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1316 e->utf16[i] = c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1317 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1318 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1319 e->userData = userData;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1320 e->convert = convert;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1321 if (convert) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1322 e->normal.isName2 = unknown_isName;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1323 e->normal.isName3 = unknown_isName;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1324 e->normal.isName4 = unknown_isName;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1325 e->normal.isNmstrt2 = unknown_isNmstrt;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1326 e->normal.isNmstrt3 = unknown_isNmstrt;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1327 e->normal.isNmstrt4 = unknown_isNmstrt;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1328 e->normal.isInvalid2 = unknown_isInvalid;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1329 e->normal.isInvalid3 = unknown_isInvalid;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1330 e->normal.isInvalid4 = unknown_isInvalid;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1331 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1332 e->normal.enc.utf8Convert = unknown_toUtf8;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1333 e->normal.enc.utf16Convert = unknown_toUtf16;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1334 return &(e->normal.enc);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1335 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1336
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1337 /* If this enumeration is changed, getEncodingIndex and encodings
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1338 must also be changed. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1339 enum {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1340 UNKNOWN_ENC = -1,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1341 ISO_8859_1_ENC = 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1342 US_ASCII_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1343 UTF_8_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1344 UTF_16_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1345 UTF_16BE_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1346 UTF_16LE_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1347 /* must match encodingNames up to here */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1348 NO_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1349 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1350
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1351 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1352 int getEncodingIndex(const char *name)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1353 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1354 static const char *encodingNames[] = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1355 "ISO-8859-1",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1356 "US-ASCII",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1357 "UTF-8",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1358 "UTF-16",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1359 "UTF-16BE"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1360 "UTF-16LE",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1361 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1362 int i;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1363 if (name == 0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1364 return NO_ENC;
883
0aa9015f06df Remove some more libjabber warnings
Mikael Berthe <mikael@lilotux.net>
parents: 414
diff changeset
1365 for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++)
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1366 if (streqci(name, encodingNames[i]))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1367 return i;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1368 return UNKNOWN_ENC;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1369 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1370
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1371 /* For binary compatibility, we store the index of the encoding specified
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1372 at initialization in the isUtf16 member. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1373
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1374 #define INIT_ENC_INDEX(enc) ((enc)->initEnc.isUtf16)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1375
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1376 /* This is what detects the encoding.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1377 encodingTable maps from encoding indices to encodings;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1378 INIT_ENC_INDEX(enc) is the index of the external (protocol) specified encoding;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1379 state is XML_CONTENT_STATE if we're parsing an external text entity,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1380 and XML_PROLOG_STATE otherwise.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1381 */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1382
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1383
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1384 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1385 int initScan(const ENCODING **encodingTable,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1386 const INIT_ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1387 int state,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1388 const char *ptr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1389 const char *end,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1390 const char **nextTokPtr)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1391 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1392 const ENCODING **encPtr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1393
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1394 if (ptr == end)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1395 return XML_TOK_NONE;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1396 encPtr = enc->encPtr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1397 if (ptr + 1 == end) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1398 /* only a single byte available for auto-detection */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1399 /* a well-formed document entity must have more than one byte */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1400 if (state != XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1401 return XML_TOK_PARTIAL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1402 /* so we're parsing an external text entity... */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1403 /* if UTF-16 was externally specified, then we need at least 2 bytes */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1404 switch (INIT_ENC_INDEX(enc)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1405 case UTF_16_ENC:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1406 case UTF_16LE_ENC:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1407 case UTF_16BE_ENC:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1408 return XML_TOK_PARTIAL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1409 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1410 switch ((unsigned char)*ptr) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1411 case 0xFE:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1412 case 0xFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1413 case 0xEF: /* possibly first byte of UTF-8 BOM */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1414 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1415 && state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1416 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1417 /* fall through */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1418 case 0x00:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1419 case 0x3C:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1420 return XML_TOK_PARTIAL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1421 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1422 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1423 else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1424 switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1425 case 0xFEFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1426 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1427 && state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1428 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1429 *nextTokPtr = ptr + 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1430 *encPtr = encodingTable[UTF_16BE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1431 return XML_TOK_BOM;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1432 /* 00 3C is handled in the default case */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1433 case 0x3C00:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1434 if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1435 || INIT_ENC_INDEX(enc) == UTF_16_ENC)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1436 && state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1437 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1438 *encPtr = encodingTable[UTF_16LE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1439 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1440 case 0xFFFE:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1441 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1442 && state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1443 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1444 *nextTokPtr = ptr + 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1445 *encPtr = encodingTable[UTF_16LE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1446 return XML_TOK_BOM;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1447 case 0xEFBB:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1448 /* Maybe a UTF-8 BOM (EF BB BF) */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1449 /* If there's an explicitly specified (external) encoding
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1450 of ISO-8859-1 or some flavour of UTF-16
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1451 and this is an external text entity,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1452 don't look for the BOM,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1453 because it might be a legal data. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1454 if (state == XML_CONTENT_STATE) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1455 int e = INIT_ENC_INDEX(enc);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1456 if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1457 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1458 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1459 if (ptr + 2 == end)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1460 return XML_TOK_PARTIAL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1461 if ((unsigned char)ptr[2] == 0xBF) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1462 *encPtr = encodingTable[UTF_8_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1463 return XML_TOK_BOM;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1464 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1465 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1466 default:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1467 if (ptr[0] == '\0') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1468 /* 0 isn't a legal data character. Furthermore a document entity can only
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1469 start with ASCII characters. So the only way this can fail to be big-endian
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1470 UTF-16 if it it's an external parsed general entity that's labelled as
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1471 UTF-16LE. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1472 if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1473 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1474 *encPtr = encodingTable[UTF_16BE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1475 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1476 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1477 else if (ptr[1] == '\0') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1478 /* We could recover here in the case:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1479 - parsing an external entity
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1480 - second byte is 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1481 - no externally specified encoding
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1482 - no encoding declaration
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1483 by assuming UTF-16LE. But we don't, because this would mean when
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1484 presented just with a single byte, we couldn't reliably determine
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1485 whether we needed further bytes. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1486 if (state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1487 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1488 *encPtr = encodingTable[UTF_16LE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1489 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1490 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1491 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1492 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1493 }
237
c8df64f43625 [/trunk] Changeset 250 by mikael
mikael
parents: 25
diff changeset
1494 *encPtr = encodingTable[(int)INIT_ENC_INDEX(enc)];
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1495 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1496 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1497
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1498
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1499 #define NS(x) x
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1500 #define ns(x) x
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1501 #include "xmltok_ns_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1502 #undef NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1503 #undef ns
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1504
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1505 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1506
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1507 #define NS(x) x ## NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1508 #define ns(x) x ## _ns
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1509
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1510 #include "xmltok_ns_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1511
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1512 #undef NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1513 #undef ns
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1514
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1515 ENCODING *
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1516 XmlInitUnknownEncodingNS(void *mem,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1517 int *table,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1518 int (*convert)(void *userData, const char *p),
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1519 void *userData)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1520 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1521 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1522 if (enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1523 ((struct normal_encoding *)enc)->type[':'] = BT_COLON;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1524 return enc;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1525 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1526
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1527 #endif /* XML_NS */