annotate cos/python/Objects/unicodeobject.c @ 39:600f48b74799

Move ide
author windel
date Fri, 03 Feb 2012 18:40:43 +0100
parents 7f74363f4c82
children
rev   line source
27
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1 /*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3 Unicode implementation based on original code by Fredrik Lundh,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4 modified by Marc-Andre Lemburg <mal@lemburg.com>.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6 Major speed upgrades to the method implementations at the Reykjavik
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9 Copyright (c) Corporation for National Research Initiatives.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11 --------------------------------------------------------------------
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12 The original string type implementation is:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14 Copyright (c) 1999 by Secret Labs AB
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
15 Copyright (c) 1999 by Fredrik Lundh
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
16
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
17 By obtaining, using, and/or copying this software and/or its
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
18 associated documentation, you agree that you have read, understood,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
19 and will comply with the following terms and conditions:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
20
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
21 Permission to use, copy, modify, and distribute this software and its
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
22 associated documentation for any purpose and without fee is hereby
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
23 granted, provided that the above copyright notice appears in all
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
24 copies, and that both that copyright notice and this permission notice
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
25 appear in supporting documentation, and that the name of Secret Labs
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
26 AB or the author not be used in advertising or publicity pertaining to
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
27 distribution of the software without specific, written prior
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
28 permission.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
29
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
30 SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
31 THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
32 FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
33 ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
34 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
35 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
36 OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
37 --------------------------------------------------------------------
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
38
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
39 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
40
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
41 #define PY_SSIZE_T_CLEAN
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
42 #include "Python.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
43 #include "ucnhash.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
44
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
45 /* Endianness switches; defaults to little endian */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
46
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
47 #ifdef WORDS_BIGENDIAN
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
48 # define BYTEORDER_IS_BIG_ENDIAN
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
49 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
50 # define BYTEORDER_IS_LITTLE_ENDIAN
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
51 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
52
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
53 /* --- Globals ------------------------------------------------------------
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
54
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
55 The globals are initialized by the _PyUnicode_Init() API and should
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
56 not be used before calling that API.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
57
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
58 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
59
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
60
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
61 /* Maximum code point of Unicode 6.0: 0x10ffff (1,114,111) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
62 #define MAX_UNICODE 0x10ffff
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
63
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
64 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
65 # define _PyUnicode_CHECK(op) _PyUnicode_CheckConsistency(op, 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
66 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
67 # define _PyUnicode_CHECK(op) PyUnicode_Check(op)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
68 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
69
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
70 #define _PyUnicode_UTF8(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
71 (((PyCompactUnicodeObject*)(op))->utf8)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
72 #define PyUnicode_UTF8(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
73 (assert(_PyUnicode_CHECK(op)), \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
74 assert(PyUnicode_IS_READY(op)), \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
75 PyUnicode_IS_COMPACT_ASCII(op) ? \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
76 ((char*)((PyASCIIObject*)(op) + 1)) : \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
77 _PyUnicode_UTF8(op))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
78 #define _PyUnicode_UTF8_LENGTH(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
79 (((PyCompactUnicodeObject*)(op))->utf8_length)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
80 #define PyUnicode_UTF8_LENGTH(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
81 (assert(_PyUnicode_CHECK(op)), \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
82 assert(PyUnicode_IS_READY(op)), \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
83 PyUnicode_IS_COMPACT_ASCII(op) ? \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
84 ((PyASCIIObject*)(op))->length : \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
85 _PyUnicode_UTF8_LENGTH(op))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
86 #define _PyUnicode_WSTR(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
87 (((PyASCIIObject*)(op))->wstr)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
88 #define _PyUnicode_WSTR_LENGTH(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
89 (((PyCompactUnicodeObject*)(op))->wstr_length)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
90 #define _PyUnicode_LENGTH(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
91 (((PyASCIIObject *)(op))->length)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
92 #define _PyUnicode_STATE(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
93 (((PyASCIIObject *)(op))->state)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
94 #define _PyUnicode_HASH(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
95 (((PyASCIIObject *)(op))->hash)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
96 #define _PyUnicode_KIND(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
97 (assert(_PyUnicode_CHECK(op)), \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
98 ((PyASCIIObject *)(op))->state.kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
99 #define _PyUnicode_GET_LENGTH(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
100 (assert(_PyUnicode_CHECK(op)), \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
101 ((PyASCIIObject *)(op))->length)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
102 #define _PyUnicode_DATA_ANY(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
103 (((PyUnicodeObject*)(op))->data.any)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
104
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
105 #undef PyUnicode_READY
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
106 #define PyUnicode_READY(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
107 (assert(_PyUnicode_CHECK(op)), \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
108 (PyUnicode_IS_READY(op) ? \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
109 0 : \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
110 _PyUnicode_Ready(op)))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
111
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
112 #define _PyUnicode_SHARE_UTF8(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
113 (assert(_PyUnicode_CHECK(op)), \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
114 assert(!PyUnicode_IS_COMPACT_ASCII(op)), \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
115 (_PyUnicode_UTF8(op) == PyUnicode_DATA(op)))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
116 #define _PyUnicode_SHARE_WSTR(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
117 (assert(_PyUnicode_CHECK(op)), \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
118 (_PyUnicode_WSTR(unicode) == PyUnicode_DATA(op)))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
119
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
120 /* true if the Unicode object has an allocated UTF-8 memory block
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
121 (not shared with other data) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
122 #define _PyUnicode_HAS_UTF8_MEMORY(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
123 (assert(_PyUnicode_CHECK(op)), \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
124 (!PyUnicode_IS_COMPACT_ASCII(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
125 && _PyUnicode_UTF8(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
126 && _PyUnicode_UTF8(op) != PyUnicode_DATA(op)))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
127
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
128 /* true if the Unicode object has an allocated wstr memory block
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
129 (not shared with other data) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
130 #define _PyUnicode_HAS_WSTR_MEMORY(op) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
131 (assert(_PyUnicode_CHECK(op)), \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
132 (_PyUnicode_WSTR(op) && \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
133 (!PyUnicode_IS_READY(op) || \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
134 _PyUnicode_WSTR(op) != PyUnicode_DATA(op))))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
135
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
136 /* Generic helper macro to convert characters of different types.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
137 from_type and to_type have to be valid type names, begin and end
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
138 are pointers to the source characters which should be of type
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
139 "from_type *". to is a pointer of type "to_type *" and points to the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
140 buffer where the result characters are written to. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
141 #define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
142 do { \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
143 to_type *_to = (to_type *) to; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
144 const from_type *_iter = (begin); \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
145 const from_type *_end = (end); \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
146 Py_ssize_t n = (_end) - (_iter); \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
147 const from_type *_unrolled_end = \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
148 _iter + (n & ~ (Py_ssize_t) 3); \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
149 while (_iter < (_unrolled_end)) { \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
150 _to[0] = (to_type) _iter[0]; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
151 _to[1] = (to_type) _iter[1]; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
152 _to[2] = (to_type) _iter[2]; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
153 _to[3] = (to_type) _iter[3]; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
154 _iter += 4; _to += 4; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
155 } \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
156 while (_iter < (_end)) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
157 *_to++ = (to_type) *_iter++; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
158 } while (0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
159
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
160 /* The Unicode string has been modified: reset the hash */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
161 #define _PyUnicode_DIRTY(op) do { _PyUnicode_HASH(op) = -1; } while (0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
162
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
163 /* This dictionary holds all interned unicode strings. Note that references
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
164 to strings in this dictionary are *not* counted in the string's ob_refcnt.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
165 When the interned string reaches a refcnt of 0 the string deallocation
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
166 function will delete the reference from this dictionary.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
167
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
168 Another way to look at this is that to say that the actual reference
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
169 count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
170 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
171 static PyObject *interned;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
172
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
173 /* The empty Unicode object is shared to improve performance. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
174 static PyObject *unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
175
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
176 /* List of static strings. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
177 static _Py_Identifier *static_strings;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
178
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
179 /* Single character Unicode strings in the Latin-1 range are being
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
180 shared as well. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
181 static PyObject *unicode_latin1[256];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
182
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
183 /* Fast detection of the most frequent whitespace characters */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
184 const unsigned char _Py_ascii_whitespace[] = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
185 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
186 /* case 0x0009: * CHARACTER TABULATION */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
187 /* case 0x000A: * LINE FEED */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
188 /* case 0x000B: * LINE TABULATION */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
189 /* case 0x000C: * FORM FEED */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
190 /* case 0x000D: * CARRIAGE RETURN */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
191 0, 1, 1, 1, 1, 1, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
192 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
193 /* case 0x001C: * FILE SEPARATOR */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
194 /* case 0x001D: * GROUP SEPARATOR */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
195 /* case 0x001E: * RECORD SEPARATOR */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
196 /* case 0x001F: * UNIT SEPARATOR */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
197 0, 0, 0, 0, 1, 1, 1, 1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
198 /* case 0x0020: * SPACE */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
199 1, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
200 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
201 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
202 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
203
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
204 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
205 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
206 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
207 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
208 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
209 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
210 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
211 0, 0, 0, 0, 0, 0, 0, 0
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
212 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
213
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
214 /* forward */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
215 static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
216 static PyObject* get_latin1_char(unsigned char ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
217 static void copy_characters(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
218 PyObject *to, Py_ssize_t to_start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
219 PyObject *from, Py_ssize_t from_start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
220 Py_ssize_t how_many);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
221
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
222 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
223 unicode_fromascii(const unsigned char *s, Py_ssize_t size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
224 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
225 _PyUnicode_FromUCS1(const unsigned char *s, Py_ssize_t size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
226 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
227 _PyUnicode_FromUCS2(const Py_UCS2 *s, Py_ssize_t size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
228 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
229 _PyUnicode_FromUCS4(const Py_UCS4 *s, Py_ssize_t size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
230
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
231 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
232 unicode_encode_call_errorhandler(const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
233 PyObject **errorHandler,const char *encoding, const char *reason,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
234 PyObject *unicode, PyObject **exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
235 Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
236
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
237 static void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
238 raise_encode_exception(PyObject **exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
239 const char *encoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
240 PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
241 Py_ssize_t startpos, Py_ssize_t endpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
242 const char *reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
243
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
244 /* Same for linebreaks */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
245 static unsigned char ascii_linebreak[] = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
246 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
247 /* 0x000A, * LINE FEED */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
248 /* 0x000B, * LINE TABULATION */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
249 /* 0x000C, * FORM FEED */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
250 /* 0x000D, * CARRIAGE RETURN */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
251 0, 0, 1, 1, 1, 1, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
252 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
253 /* 0x001C, * FILE SEPARATOR */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
254 /* 0x001D, * GROUP SEPARATOR */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
255 /* 0x001E, * RECORD SEPARATOR */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
256 0, 0, 0, 0, 1, 1, 1, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
257 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
258 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
259 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
260 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
261
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
262 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
263 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
264 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
265 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
266 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
267 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
268 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
269 0, 0, 0, 0, 0, 0, 0, 0
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
270 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
271
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
272 /* The max unicode value is always 0x10FFFF while using the PEP-393 API.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
273 This function is kept for backward compatibility with the old API. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
274 Py_UNICODE
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
275 PyUnicode_GetMax(void)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
276 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
277 #ifdef Py_UNICODE_WIDE
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
278 return 0x10FFFF;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
279 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
280 /* This is actually an illegal character, so it should
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
281 not be passed to unichr. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
282 return 0xFFFF;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
283 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
284 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
285
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
286 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
287 int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
288 _PyUnicode_CheckConsistency(PyObject *op, int check_content)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
289 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
290 PyASCIIObject *ascii;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
291 unsigned int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
292
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
293 assert(PyUnicode_Check(op));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
294
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
295 ascii = (PyASCIIObject *)op;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
296 kind = ascii->state.kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
297
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
298 if (ascii->state.ascii == 1 && ascii->state.compact == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
299 assert(kind == PyUnicode_1BYTE_KIND);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
300 assert(ascii->state.ready == 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
301 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
302 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
303 PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
304 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
305
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
306 if (ascii->state.compact == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
307 data = compact + 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
308 assert(kind == PyUnicode_1BYTE_KIND
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
309 || kind == PyUnicode_2BYTE_KIND
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
310 || kind == PyUnicode_4BYTE_KIND);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
311 assert(ascii->state.ascii == 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
312 assert(ascii->state.ready == 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
313 assert (compact->utf8 != data);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
314 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
315 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
316 PyUnicodeObject *unicode = (PyUnicodeObject *)op;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
317
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
318 data = unicode->data.any;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
319 if (kind == PyUnicode_WCHAR_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
320 assert(ascii->length == 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
321 assert(ascii->hash == -1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
322 assert(ascii->state.compact == 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
323 assert(ascii->state.ascii == 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
324 assert(ascii->state.ready == 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
325 assert(ascii->state.interned == SSTATE_NOT_INTERNED);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
326 assert(ascii->wstr != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
327 assert(data == NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
328 assert(compact->utf8 == NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
329 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
330 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
331 assert(kind == PyUnicode_1BYTE_KIND
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
332 || kind == PyUnicode_2BYTE_KIND
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
333 || kind == PyUnicode_4BYTE_KIND);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
334 assert(ascii->state.compact == 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
335 assert(ascii->state.ready == 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
336 assert(data != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
337 if (ascii->state.ascii) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
338 assert (compact->utf8 == data);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
339 assert (compact->utf8_length == ascii->length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
340 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
341 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
342 assert (compact->utf8 != data);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
343 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
344 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
345 if (kind != PyUnicode_WCHAR_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
346 if (
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
347 kind == PyUnicode_4BYTE_KIND
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
348 )
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
349 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
350 assert(ascii->wstr == data);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
351 assert(compact->wstr_length == ascii->length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
352 } else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
353 assert(ascii->wstr != data);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
354 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
355
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
356 if (compact->utf8 == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
357 assert(compact->utf8_length == 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
358 if (ascii->wstr == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
359 assert(compact->wstr_length == 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
360 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
361 /* check that the best kind is used */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
362 if (check_content && kind != PyUnicode_WCHAR_KIND)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
363 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
364 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
365 Py_UCS4 maxchar = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
366 void *data = PyUnicode_DATA(ascii);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
367 for (i=0; i < ascii->length; i++)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
368 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
369 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
370 if (ch > maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
371 maxchar = ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
372 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
373 if (kind == PyUnicode_1BYTE_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
374 if (ascii->state.ascii == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
375 assert(maxchar >= 128);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
376 assert(maxchar <= 255);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
377 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
378 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
379 assert(maxchar < 128);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
380 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
381 else if (kind == PyUnicode_2BYTE_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
382 assert(maxchar >= 0x100);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
383 assert(maxchar <= 0xFFFF);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
384 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
385 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
386 assert(maxchar >= 0x10000);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
387 assert(maxchar <= MAX_UNICODE);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
388 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
389 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
390 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
391 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
392 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
393
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
394 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
395 unicode_result_wchar(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
396 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
397 #ifndef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
398 Py_ssize_t len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
399
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
400 assert(Py_REFCNT(unicode) == 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
401
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
402 len = _PyUnicode_WSTR_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
403 if (len == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
404 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
405 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
406 return unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
407 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
408
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
409 if (len == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
410 wchar_t ch = _PyUnicode_WSTR(unicode)[0];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
411 if (ch < 256) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
412 PyObject *latin1_char = get_latin1_char((unsigned char)ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
413 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
414 return latin1_char;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
415 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
416 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
417
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
418 if (_PyUnicode_Ready(unicode) < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
419 Py_XDECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
420 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
421 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
422 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
423 /* don't make the result ready in debug mode to ensure that the caller
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
424 makes the string ready before using it */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
425 assert(_PyUnicode_CheckConsistency(unicode, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
426 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
427 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
428 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
429
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
430 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
431 unicode_result_ready(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
432 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
433 Py_ssize_t length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
434
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
435 length = PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
436 if (length == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
437 if (unicode != unicode_empty) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
438 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
439 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
440 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
441 return unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
442 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
443
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
444 if (length == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
445 Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
446 if (ch < 256) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
447 PyObject *latin1_char = unicode_latin1[ch];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
448 if (latin1_char != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
449 if (unicode != latin1_char) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
450 Py_INCREF(latin1_char);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
451 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
452 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
453 return latin1_char;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
454 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
455 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
456 assert(_PyUnicode_CheckConsistency(unicode, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
457 Py_INCREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
458 unicode_latin1[ch] = unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
459 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
460 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
461 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
462 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
463
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
464 assert(_PyUnicode_CheckConsistency(unicode, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
465 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
466 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
467
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
468 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
469 unicode_result(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
470 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
471 assert(_PyUnicode_CHECK(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
472 if (PyUnicode_IS_READY(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
473 return unicode_result_ready(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
474 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
475 return unicode_result_wchar(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
476 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
477
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
478 #ifdef HAVE_MBCS
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
479 static OSVERSIONINFOEX winver;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
480 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
481
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
482 /* --- Bloom Filters ----------------------------------------------------- */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
483
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
484 /* stuff to implement simple "bloom filters" for Unicode characters.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
485 to keep things simple, we use a single bitmask, using the least 5
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
486 bits from each unicode characters as the bit index. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
487
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
488 /* the linebreak mask is set up by Unicode_Init below */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
489
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
490 #if LONG_BIT >= 128
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
491 #define BLOOM_WIDTH 128
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
492 #elif LONG_BIT >= 64
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
493 #define BLOOM_WIDTH 64
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
494 #elif LONG_BIT >= 32
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
495 #define BLOOM_WIDTH 32
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
496 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
497 #error "LONG_BIT is smaller than 32"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
498 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
499
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
500 #define BLOOM_MASK unsigned long
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
501
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
502 static BLOOM_MASK bloom_linebreak;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
503
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
504 #define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
505 #define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
506
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
507 #define BLOOM_LINEBREAK(ch) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
508 ((ch) < 128U ? ascii_linebreak[(ch)] : \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
509 (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch)))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
510
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
511 Py_LOCAL_INLINE(BLOOM_MASK)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
512 make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
513 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
514 /* calculate simple bloom-style bitmask for a given unicode string */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
515
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
516 BLOOM_MASK mask;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
517 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
518
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
519 mask = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
520 for (i = 0; i < len; i++)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
521 BLOOM_ADD(mask, PyUnicode_READ(kind, ptr, i));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
522
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
523 return mask;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
524 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
525
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
526 #define BLOOM_MEMBER(mask, chr, str) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
527 (BLOOM(mask, chr) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
528 && (PyUnicode_FindChar(str, chr, 0, PyUnicode_GET_LENGTH(str), 1) >= 0))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
529
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
530 /* Compilation of templated routines */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
531
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
532 #include "stringlib/asciilib.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
533 #include "stringlib/fastsearch.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
534 #include "stringlib/partition.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
535 #include "stringlib/split.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
536 #include "stringlib/count.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
537 #include "stringlib/find.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
538 #include "stringlib/find_max_char.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
539 #include "stringlib/localeutil.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
540 #include "stringlib/undef.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
541
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
542 #include "stringlib/ucs1lib.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
543 #include "stringlib/fastsearch.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
544 #include "stringlib/partition.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
545 #include "stringlib/split.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
546 #include "stringlib/count.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
547 #include "stringlib/find.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
548 #include "stringlib/find_max_char.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
549 #include "stringlib/localeutil.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
550 #include "stringlib/undef.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
551
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
552 #include "stringlib/ucs2lib.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
553 #include "stringlib/fastsearch.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
554 #include "stringlib/partition.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
555 #include "stringlib/split.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
556 #include "stringlib/count.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
557 #include "stringlib/find.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
558 #include "stringlib/find_max_char.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
559 #include "stringlib/localeutil.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
560 #include "stringlib/undef.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
561
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
562 #include "stringlib/ucs4lib.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
563 #include "stringlib/fastsearch.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
564 #include "stringlib/partition.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
565 #include "stringlib/split.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
566 #include "stringlib/count.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
567 #include "stringlib/find.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
568 #include "stringlib/find_max_char.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
569 #include "stringlib/localeutil.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
570 #include "stringlib/undef.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
571
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
572 #include "stringlib/unicodedefs.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
573 #include "stringlib/fastsearch.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
574 #include "stringlib/count.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
575 #include "stringlib/find.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
576 #include "stringlib/undef.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
577
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
578 /* --- Unicode Object ----------------------------------------------------- */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
579
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
580 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
581 fixup(PyObject *self, Py_UCS4 (*fixfct)(PyObject *s));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
582
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
583 Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
584 Py_ssize_t size, Py_UCS4 ch,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
585 int direction)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
586 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
587 int mode = (direction == 1) ? FAST_SEARCH : FAST_RSEARCH;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
588
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
589 switch (kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
590 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
591 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
592 Py_UCS1 ch1 = (Py_UCS1) ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
593 if (ch1 == ch)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
594 return ucs1lib_fastsearch((Py_UCS1 *) s, size, &ch1, 1, 0, mode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
595 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
596 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
597 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
598 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
599 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
600 Py_UCS2 ch2 = (Py_UCS2) ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
601 if (ch2 == ch)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
602 return ucs2lib_fastsearch((Py_UCS2 *) s, size, &ch2, 1, 0, mode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
603 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
604 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
605 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
606 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
607 return ucs4lib_fastsearch((Py_UCS4 *) s, size, &ch, 1, 0, mode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
608 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
609 assert(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
610 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
611 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
612 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
613
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
614 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
615 resize_compact(PyObject *unicode, Py_ssize_t length)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
616 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
617 Py_ssize_t char_size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
618 Py_ssize_t struct_size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
619 Py_ssize_t new_size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
620 int share_wstr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
621
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
622 assert(PyUnicode_IS_READY(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
623 char_size = PyUnicode_KIND(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
624 if (PyUnicode_IS_COMPACT_ASCII(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
625 struct_size = sizeof(PyASCIIObject);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
626 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
627 struct_size = sizeof(PyCompactUnicodeObject);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
628 share_wstr = _PyUnicode_SHARE_WSTR(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
629
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
630 _Py_DEC_REFTOTAL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
631 _Py_ForgetReference(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
632
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
633 if (length > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
634 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
635 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
636 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
637 new_size = (struct_size + (length + 1) * char_size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
638
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
639 unicode = (PyObject *)PyObject_REALLOC((char *)unicode, new_size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
640 if (unicode == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
641 PyObject_Del(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
642 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
643 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
644 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
645 _Py_NewReference(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
646 _PyUnicode_LENGTH(unicode) = length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
647 if (share_wstr) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
648 _PyUnicode_WSTR(unicode) = PyUnicode_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
649 if (!PyUnicode_IS_COMPACT_ASCII(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
650 _PyUnicode_WSTR_LENGTH(unicode) = length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
651 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
652 PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
653 length, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
654 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
655 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
656
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
657 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
658 resize_inplace(PyObject *unicode, Py_ssize_t length)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
659 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
660 wchar_t *wstr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
661 assert(!PyUnicode_IS_COMPACT(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
662 assert(Py_REFCNT(unicode) == 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
663
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
664 _PyUnicode_DIRTY(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
665
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
666 if (PyUnicode_IS_READY(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
667 Py_ssize_t char_size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
668 Py_ssize_t new_size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
669 int share_wstr, share_utf8;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
670 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
671
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
672 data = _PyUnicode_DATA_ANY(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
673 assert(data != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
674 char_size = PyUnicode_KIND(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
675 share_wstr = _PyUnicode_SHARE_WSTR(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
676 share_utf8 = _PyUnicode_SHARE_UTF8(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
677 if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
678 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
679 PyObject_DEL(_PyUnicode_UTF8(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
680 _PyUnicode_UTF8(unicode) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
681 _PyUnicode_UTF8_LENGTH(unicode) = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
682 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
683
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
684 if (length > (PY_SSIZE_T_MAX / char_size - 1)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
685 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
686 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
687 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
688 new_size = (length + 1) * char_size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
689
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
690 data = (PyObject *)PyObject_REALLOC(data, new_size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
691 if (data == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
692 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
693 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
694 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
695 _PyUnicode_DATA_ANY(unicode) = data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
696 if (share_wstr) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
697 _PyUnicode_WSTR(unicode) = data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
698 _PyUnicode_WSTR_LENGTH(unicode) = length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
699 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
700 if (share_utf8) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
701 _PyUnicode_UTF8(unicode) = data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
702 _PyUnicode_UTF8_LENGTH(unicode) = length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
703 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
704 _PyUnicode_LENGTH(unicode) = length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
705 PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
706 if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
707 assert(_PyUnicode_CheckConsistency(unicode, 0));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
708 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
709 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
710 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
711 assert(_PyUnicode_WSTR(unicode) != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
712
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
713 /* check for integer overflow */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
714 if (length > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
715 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
716 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
717 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
718 wstr = _PyUnicode_WSTR(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
719 wstr = PyObject_REALLOC(wstr, sizeof(wchar_t) * (length + 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
720 if (!wstr) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
721 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
722 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
723 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
724 _PyUnicode_WSTR(unicode) = wstr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
725 _PyUnicode_WSTR(unicode)[length] = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
726 _PyUnicode_WSTR_LENGTH(unicode) = length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
727 assert(_PyUnicode_CheckConsistency(unicode, 0));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
728 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
729 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
730
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
731 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
732 resize_copy(PyObject *unicode, Py_ssize_t length)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
733 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
734 Py_ssize_t copy_length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
735 if (PyUnicode_IS_COMPACT(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
736 PyObject *copy;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
737 assert(PyUnicode_IS_READY(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
738
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
739 copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
740 if (copy == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
741 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
742
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
743 copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
744 copy_characters(copy, 0, unicode, 0, copy_length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
745 return copy;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
746 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
747 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
748 PyObject *w;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
749 assert(_PyUnicode_WSTR(unicode) != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
750 assert(_PyUnicode_DATA_ANY(unicode) == NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
751 w = (PyObject*)_PyUnicode_New(length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
752 if (w == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
753 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
754 copy_length = _PyUnicode_WSTR_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
755 copy_length = Py_MIN(copy_length, length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
756 Py_UNICODE_COPY(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
757 copy_length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
758 return w;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
759 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
760 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
761
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
762 /* We allocate one more byte to make sure the string is
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
763 Ux0000 terminated; some code (e.g. new_identifier)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
764 relies on that.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
765
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
766 XXX This allocator could further be enhanced by assuring that the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
767 free list never reduces its size below 1.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
768
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
769 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
770
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
771 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
772 static int unicode_old_new_calls = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
773 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
774
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
775 static PyUnicodeObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
776 _PyUnicode_New(Py_ssize_t length)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
777 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
778 register PyUnicodeObject *unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
779 size_t new_size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
780
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
781 /* Optimization for empty strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
782 if (length == 0 && unicode_empty != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
783 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
784 return (PyUnicodeObject*)unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
785 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
786
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
787 /* Ensure we won't overflow the size. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
788 if (length > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
789 return (PyUnicodeObject *)PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
790 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
791 if (length < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
792 PyErr_SetString(PyExc_SystemError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
793 "Negative size passed to _PyUnicode_New");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
794 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
795 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
796
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
797 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
798 ++unicode_old_new_calls;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
799 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
800
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
801 unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
802 if (unicode == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
803 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
804 new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
805 _PyUnicode_WSTR(unicode) = (Py_UNICODE*) PyObject_MALLOC(new_size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
806 if (!_PyUnicode_WSTR(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
807 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
808 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
809 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
810
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
811 /* Initialize the first element to guard against cases where
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
812 * the caller fails before initializing str -- unicode_resize()
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
813 * reads str[0], and the Keep-Alive optimization can keep memory
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
814 * allocated for str alive across a call to unicode_dealloc(unicode).
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
815 * We don't want unicode_resize to read uninitialized memory in
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
816 * that case.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
817 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
818 _PyUnicode_WSTR(unicode)[0] = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
819 _PyUnicode_WSTR(unicode)[length] = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
820 _PyUnicode_WSTR_LENGTH(unicode) = length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
821 _PyUnicode_HASH(unicode) = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
822 _PyUnicode_STATE(unicode).interned = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
823 _PyUnicode_STATE(unicode).kind = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
824 _PyUnicode_STATE(unicode).compact = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
825 _PyUnicode_STATE(unicode).ready = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
826 _PyUnicode_STATE(unicode).ascii = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
827 _PyUnicode_DATA_ANY(unicode) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
828 _PyUnicode_LENGTH(unicode) = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
829 _PyUnicode_UTF8(unicode) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
830 _PyUnicode_UTF8_LENGTH(unicode) = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
831 assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
832 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
833
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
834 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
835 /* XXX UNREF/NEWREF interface should be more symmetrical */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
836 _Py_DEC_REFTOTAL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
837 _Py_ForgetReference((PyObject *)unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
838 PyObject_Del(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
839 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
840 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
841
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
842 static const char*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
843 unicode_kind_name(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
844 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
845 /* don't check consistency: unicode_kind_name() is called from
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
846 _PyUnicode_Dump() */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
847 if (!PyUnicode_IS_COMPACT(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
848 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
849 if (!PyUnicode_IS_READY(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
850 return "wstr";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
851 switch(PyUnicode_KIND(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
852 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
853 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
854 if (PyUnicode_IS_ASCII(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
855 return "legacy ascii";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
856 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
857 return "legacy latin1";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
858 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
859 return "legacy UCS2";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
860 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
861 return "legacy UCS4";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
862 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
863 return "<legacy invalid kind>";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
864 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
865 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
866 assert(PyUnicode_IS_READY(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
867 switch(PyUnicode_KIND(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
868 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
869 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
870 if (PyUnicode_IS_ASCII(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
871 return "ascii";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
872 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
873 return "latin1";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
874 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
875 return "UCS2";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
876 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
877 return "UCS4";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
878 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
879 return "<invalid compact kind>";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
880 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
881 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
882
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
883 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
884 static int unicode_new_new_calls = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
885
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
886 /* Functions wrapping macros for use in debugger */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
887 char *_PyUnicode_utf8(void *unicode){
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
888 return PyUnicode_UTF8(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
889 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
890
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
891 void *_PyUnicode_compact_data(void *unicode) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
892 return _PyUnicode_COMPACT_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
893 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
894 void *_PyUnicode_data(void *unicode){
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
895 printf("obj %p\n", unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
896 printf("compact %d\n", PyUnicode_IS_COMPACT(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
897 printf("compact ascii %d\n", PyUnicode_IS_COMPACT_ASCII(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
898 printf("ascii op %p\n", ((void*)((PyASCIIObject*)(unicode) + 1)));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
899 printf("compact op %p\n", ((void*)((PyCompactUnicodeObject*)(unicode) + 1)));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
900 printf("compact data %p\n", _PyUnicode_COMPACT_DATA(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
901 return PyUnicode_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
902 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
903
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
904 void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
905 _PyUnicode_Dump(PyObject *op)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
906 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
907 PyASCIIObject *ascii = (PyASCIIObject *)op;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
908 PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
909 PyUnicodeObject *unicode = (PyUnicodeObject *)op;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
910 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
911
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
912 if (ascii->state.compact)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
913 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
914 if (ascii->state.ascii)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
915 data = (ascii + 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
916 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
917 data = (compact + 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
918 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
919 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
920 data = unicode->data.any;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
921 printf("%s: len=%zu, ",unicode_kind_name(op), ascii->length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
922
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
923 if (ascii->wstr == data)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
924 printf("shared ");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
925 printf("wstr=%p", ascii->wstr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
926
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
927 if (!(ascii->state.ascii == 1 && ascii->state.compact == 1)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
928 printf(" (%zu), ", compact->wstr_length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
929 if (!ascii->state.compact && compact->utf8 == unicode->data.any)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
930 printf("shared ");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
931 printf("utf8=%p (%zu)", compact->utf8, compact->utf8_length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
932 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
933 printf(", data=%p\n", data);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
934 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
935 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
936
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
937 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
938 PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
939 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
940 PyObject *obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
941 PyCompactUnicodeObject *unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
942 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
943 int kind_state;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
944 int is_sharing, is_ascii;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
945 Py_ssize_t char_size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
946 Py_ssize_t struct_size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
947
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
948 /* Optimization for empty strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
949 if (size == 0 && unicode_empty != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
950 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
951 return unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
952 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
953
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
954 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
955 ++unicode_new_new_calls;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
956 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
957
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
958 is_ascii = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
959 is_sharing = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
960 struct_size = sizeof(PyCompactUnicodeObject);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
961 if (maxchar < 128) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
962 kind_state = PyUnicode_1BYTE_KIND;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
963 char_size = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
964 is_ascii = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
965 struct_size = sizeof(PyASCIIObject);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
966 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
967 else if (maxchar < 256) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
968 kind_state = PyUnicode_1BYTE_KIND;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
969 char_size = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
970 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
971 else if (maxchar < 65536) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
972 kind_state = PyUnicode_2BYTE_KIND;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
973 char_size = 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
974 if (sizeof(wchar_t) == 2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
975 is_sharing = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
976 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
977 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
978 kind_state = PyUnicode_4BYTE_KIND;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
979 char_size = 4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
980 if (sizeof(wchar_t) == 4)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
981 is_sharing = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
982 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
983
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
984 /* Ensure we won't overflow the size. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
985 if (size < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
986 PyErr_SetString(PyExc_SystemError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
987 "Negative size passed to PyUnicode_New");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
988 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
989 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
990 if (size > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
991 return PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
992
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
993 /* Duplicated allocation code from _PyObject_New() instead of a call to
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
994 * PyObject_New() so we are able to allocate space for the object and
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
995 * it's data buffer.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
996 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
997 obj = (PyObject *) PyObject_MALLOC(struct_size + (size + 1) * char_size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
998 if (obj == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
999 return PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1000 obj = PyObject_INIT(obj, &PyUnicode_Type);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1001 if (obj == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1002 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1003
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1004 unicode = (PyCompactUnicodeObject *)obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1005 if (is_ascii)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1006 data = ((PyASCIIObject*)obj) + 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1007 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1008 data = unicode + 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1009 _PyUnicode_LENGTH(unicode) = size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1010 _PyUnicode_HASH(unicode) = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1011 _PyUnicode_STATE(unicode).interned = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1012 _PyUnicode_STATE(unicode).kind = kind_state;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1013 _PyUnicode_STATE(unicode).compact = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1014 _PyUnicode_STATE(unicode).ready = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1015 _PyUnicode_STATE(unicode).ascii = is_ascii;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1016 if (is_ascii) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1017 ((char*)data)[size] = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1018 _PyUnicode_WSTR(unicode) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1019 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1020 else if (kind_state == PyUnicode_1BYTE_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1021 ((char*)data)[size] = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1022 _PyUnicode_WSTR(unicode) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1023 _PyUnicode_WSTR_LENGTH(unicode) = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1024 unicode->utf8 = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1025 unicode->utf8_length = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1026 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1027 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1028 unicode->utf8 = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1029 unicode->utf8_length = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1030 if (kind_state == PyUnicode_2BYTE_KIND)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1031 ((Py_UCS2*)data)[size] = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1032 else /* kind_state == PyUnicode_4BYTE_KIND */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1033 ((Py_UCS4*)data)[size] = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1034 if (is_sharing) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1035 _PyUnicode_WSTR_LENGTH(unicode) = size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1036 _PyUnicode_WSTR(unicode) = (wchar_t *)data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1037 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1038 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1039 _PyUnicode_WSTR_LENGTH(unicode) = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1040 _PyUnicode_WSTR(unicode) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1041 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1042 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1043 assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1044 return obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1045 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1046
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1047 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1048 _PyUnicode_Dirty(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1049 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1050 assert(_PyUnicode_CHECK(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1051 if (Py_REFCNT(unicode) != 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1052 PyErr_SetString(PyExc_SystemError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1053 "Cannot modify a string having more than 1 reference");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1054 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1055 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1056 _PyUnicode_DIRTY(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1057 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1058 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1059
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1060 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1061 _copy_characters(PyObject *to, Py_ssize_t to_start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1062 PyObject *from, Py_ssize_t from_start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1063 Py_ssize_t how_many, int check_maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1064 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1065 unsigned int from_kind, to_kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1066 void *from_data, *to_data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1067 int fast;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1068
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1069 assert(PyUnicode_Check(from));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1070 assert(PyUnicode_Check(to));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1071 assert(PyUnicode_IS_READY(from));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1072 assert(PyUnicode_IS_READY(to));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1073
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1074 assert(PyUnicode_GET_LENGTH(from) >= how_many);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1075 assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1076 assert(0 <= how_many);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1077
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1078 if (how_many == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1079 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1080
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1081 from_kind = PyUnicode_KIND(from);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1082 from_data = PyUnicode_DATA(from);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1083 to_kind = PyUnicode_KIND(to);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1084 to_data = PyUnicode_DATA(to);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1085
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1086 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1087 if (!check_maxchar
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1088 && (from_kind > to_kind
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1089 || (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to))))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1090 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1091 const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1092 Py_UCS4 ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1093 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1094 for (i=0; i < how_many; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1095 ch = PyUnicode_READ(from_kind, from_data, from_start + i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1096 assert(ch <= to_maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1097 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1098 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1099 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1100 fast = (from_kind == to_kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1101 if (check_maxchar
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1102 && (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to)))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1103 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1104 /* deny latin1 => ascii */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1105 fast = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1106 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1107
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1108 if (fast) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1109 Py_MEMCPY((char*)to_data + to_kind * to_start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1110 (char*)from_data + from_kind * from_start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1111 to_kind * how_many);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1112 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1113 else if (from_kind == PyUnicode_1BYTE_KIND
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1114 && to_kind == PyUnicode_2BYTE_KIND)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1115 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1116 _PyUnicode_CONVERT_BYTES(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1117 Py_UCS1, Py_UCS2,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1118 PyUnicode_1BYTE_DATA(from) + from_start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1119 PyUnicode_1BYTE_DATA(from) + from_start + how_many,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1120 PyUnicode_2BYTE_DATA(to) + to_start
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1121 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1122 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1123 else if (from_kind == PyUnicode_1BYTE_KIND
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1124 && to_kind == PyUnicode_4BYTE_KIND)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1125 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1126 _PyUnicode_CONVERT_BYTES(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1127 Py_UCS1, Py_UCS4,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1128 PyUnicode_1BYTE_DATA(from) + from_start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1129 PyUnicode_1BYTE_DATA(from) + from_start + how_many,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1130 PyUnicode_4BYTE_DATA(to) + to_start
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1131 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1132 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1133 else if (from_kind == PyUnicode_2BYTE_KIND
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1134 && to_kind == PyUnicode_4BYTE_KIND)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1135 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1136 _PyUnicode_CONVERT_BYTES(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1137 Py_UCS2, Py_UCS4,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1138 PyUnicode_2BYTE_DATA(from) + from_start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1139 PyUnicode_2BYTE_DATA(from) + from_start + how_many,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1140 PyUnicode_4BYTE_DATA(to) + to_start
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1141 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1142 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1143 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1144 /* check if max_char(from substring) <= max_char(to) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1145 if (from_kind > to_kind
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1146 /* latin1 => ascii */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1147 || (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to)))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1148 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1149 /* slow path to check for character overflow */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1150 const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1151 Py_UCS4 ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1152 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1153
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1154 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1155 for (i=0; i < how_many; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1156 ch = PyUnicode_READ(from_kind, from_data, from_start + i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1157 assert(ch <= to_maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1158 PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1159 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1160 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1161 if (!check_maxchar) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1162 for (i=0; i < how_many; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1163 ch = PyUnicode_READ(from_kind, from_data, from_start + i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1164 PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1165 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1166 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1167 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1168 for (i=0; i < how_many; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1169 ch = PyUnicode_READ(from_kind, from_data, from_start + i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1170 if (ch > to_maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1171 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1172 PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1173 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1174 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1175 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1176 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1177 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1178 assert(0 && "inconsistent state");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1179 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1180 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1181 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1182 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1183 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1184
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1185 static void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1186 copy_characters(PyObject *to, Py_ssize_t to_start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1187 PyObject *from, Py_ssize_t from_start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1188 Py_ssize_t how_many)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1189 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1190 (void)_copy_characters(to, to_start, from, from_start, how_many, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1191 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1192
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1193 Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1194 PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1195 PyObject *from, Py_ssize_t from_start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1196 Py_ssize_t how_many)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1197 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1198 int err;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1199
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1200 if (!PyUnicode_Check(from) || !PyUnicode_Check(to)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1201 PyErr_BadInternalCall();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1202 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1203 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1204
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1205 if (PyUnicode_READY(from))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1206 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1207 if (PyUnicode_READY(to))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1208 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1209
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1210 how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1211 if (to_start + how_many > PyUnicode_GET_LENGTH(to)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1212 PyErr_Format(PyExc_SystemError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1213 "Cannot write %zi characters at %zi "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1214 "in a string of %zi characters",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1215 how_many, to_start, PyUnicode_GET_LENGTH(to));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1216 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1217 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1218
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1219 if (how_many == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1220 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1221
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1222 if (_PyUnicode_Dirty(to))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1223 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1224
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1225 err = _copy_characters(to, to_start, from, from_start, how_many, 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1226 if (err) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1227 PyErr_Format(PyExc_SystemError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1228 "Cannot copy %s characters "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1229 "into a string of %s characters",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1230 unicode_kind_name(from),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1231 unicode_kind_name(to));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1232 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1233 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1234 return how_many;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1235 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1236
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1237 /* Find the maximum code point and count the number of surrogate pairs so a
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1238 correct string length can be computed before converting a string to UCS4.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1239 This function counts single surrogates as a character and not as a pair.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1240
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1241 Return 0 on success, or -1 on error. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1242 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1243 find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1244 Py_UCS4 *maxchar, Py_ssize_t *num_surrogates)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1245 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1246 const wchar_t *iter;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1247 Py_UCS4 ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1248
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1249 assert(num_surrogates != NULL && maxchar != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1250 *num_surrogates = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1251 *maxchar = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1252
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1253 for (iter = begin; iter < end; ) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1254 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1255 ch = *iter;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1256 iter++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1257 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1258 if (ch > *maxchar) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1259 *maxchar = ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1260 if (*maxchar > MAX_UNICODE) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1261 PyErr_Format(PyExc_ValueError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1262 "character U+%x is not in range [U+0000; U+10ffff]",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1263 ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1264 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1265 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1266 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1267 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1268 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1269 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1270
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1271 int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1272 _PyUnicode_Ready(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1273 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1274 wchar_t *end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1275 Py_UCS4 maxchar = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1276 Py_ssize_t num_surrogates;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1277
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1278 /* _PyUnicode_Ready() is only intended for old-style API usage where
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1279 strings were created using _PyObject_New() and where no canonical
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1280 representation (the str field) has been set yet aka strings
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1281 which are not yet ready. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1282 assert(_PyUnicode_CHECK(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1283 assert(_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1284 assert(_PyUnicode_WSTR(unicode) != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1285 assert(_PyUnicode_DATA_ANY(unicode) == NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1286 assert(_PyUnicode_UTF8(unicode) == NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1287 /* Actually, it should neither be interned nor be anything else: */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1288 assert(_PyUnicode_STATE(unicode).interned == SSTATE_NOT_INTERNED);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1289
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1290 end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1291 if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1292 &maxchar, &num_surrogates) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1293 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1294
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1295 if (maxchar < 256) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1296 _PyUnicode_DATA_ANY(unicode) = PyObject_MALLOC(_PyUnicode_WSTR_LENGTH(unicode) + 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1297 if (!_PyUnicode_DATA_ANY(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1298 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1299 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1300 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1301 _PyUnicode_CONVERT_BYTES(wchar_t, unsigned char,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1302 _PyUnicode_WSTR(unicode), end,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1303 PyUnicode_1BYTE_DATA(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1304 PyUnicode_1BYTE_DATA(unicode)[_PyUnicode_WSTR_LENGTH(unicode)] = '\0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1305 _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1306 _PyUnicode_STATE(unicode).kind = PyUnicode_1BYTE_KIND;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1307 if (maxchar < 128) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1308 _PyUnicode_STATE(unicode).ascii = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1309 _PyUnicode_UTF8(unicode) = _PyUnicode_DATA_ANY(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1310 _PyUnicode_UTF8_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1311 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1312 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1313 _PyUnicode_STATE(unicode).ascii = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1314 _PyUnicode_UTF8(unicode) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1315 _PyUnicode_UTF8_LENGTH(unicode) = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1316 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1317 PyObject_FREE(_PyUnicode_WSTR(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1318 _PyUnicode_WSTR(unicode) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1319 _PyUnicode_WSTR_LENGTH(unicode) = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1320 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1321 /* In this case we might have to convert down from 4-byte native
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1322 wchar_t to 2-byte unicode. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1323 else if (maxchar < 65536) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1324 assert(num_surrogates == 0 &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1325 "FindMaxCharAndNumSurrogatePairs() messed up");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1326
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1327 /* sizeof(wchar_t) == 4 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1328 _PyUnicode_DATA_ANY(unicode) = PyObject_MALLOC(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1329 2 * (_PyUnicode_WSTR_LENGTH(unicode) + 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1330 if (!_PyUnicode_DATA_ANY(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1331 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1332 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1333 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1334 _PyUnicode_CONVERT_BYTES(wchar_t, Py_UCS2,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1335 _PyUnicode_WSTR(unicode), end,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1336 PyUnicode_2BYTE_DATA(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1337 PyUnicode_2BYTE_DATA(unicode)[_PyUnicode_WSTR_LENGTH(unicode)] = '\0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1338 _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1339 _PyUnicode_STATE(unicode).kind = PyUnicode_2BYTE_KIND;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1340 _PyUnicode_UTF8(unicode) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1341 _PyUnicode_UTF8_LENGTH(unicode) = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1342 PyObject_FREE(_PyUnicode_WSTR(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1343 _PyUnicode_WSTR(unicode) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1344 _PyUnicode_WSTR_LENGTH(unicode) = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1345 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1346 /* maxchar exeeds 16 bit, wee need 4 bytes for unicode characters */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1347 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1348 assert(num_surrogates == 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1349
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1350 _PyUnicode_DATA_ANY(unicode) = _PyUnicode_WSTR(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1351 _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1352 _PyUnicode_UTF8(unicode) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1353 _PyUnicode_UTF8_LENGTH(unicode) = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1354 _PyUnicode_STATE(unicode).kind = PyUnicode_4BYTE_KIND;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1355 PyUnicode_4BYTE_DATA(unicode)[_PyUnicode_LENGTH(unicode)] = '\0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1356 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1357 _PyUnicode_STATE(unicode).ready = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1358 assert(_PyUnicode_CheckConsistency(unicode, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1359 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1360 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1361
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1362 static void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1363 unicode_dealloc(register PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1364 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1365 switch (PyUnicode_CHECK_INTERNED(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1366 case SSTATE_NOT_INTERNED:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1367 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1368
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1369 case SSTATE_INTERNED_MORTAL:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1370 /* revive dead object temporarily for DelItem */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1371 Py_REFCNT(unicode) = 3;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1372 if (PyDict_DelItem(interned, unicode) != 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1373 Py_FatalError(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1374 "deletion of interned string failed");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1375 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1376
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1377 case SSTATE_INTERNED_IMMORTAL:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1378 Py_FatalError("Immortal interned string died.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1379
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1380 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1381 Py_FatalError("Inconsistent interned string state.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1382 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1383
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1384 if (_PyUnicode_HAS_WSTR_MEMORY(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1385 PyObject_DEL(_PyUnicode_WSTR(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1386 if (_PyUnicode_HAS_UTF8_MEMORY(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1387 PyObject_DEL(_PyUnicode_UTF8(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1388
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1389 if (PyUnicode_IS_COMPACT(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1390 Py_TYPE(unicode)->tp_free(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1391 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1392 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1393 if (_PyUnicode_DATA_ANY(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1394 PyObject_DEL(_PyUnicode_DATA_ANY(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1395 Py_TYPE(unicode)->tp_free(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1396 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1397 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1398
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1399 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1400 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1401 unicode_is_singleton(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1402 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1403 PyASCIIObject *ascii = (PyASCIIObject *)unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1404 if (unicode == unicode_empty)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1405 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1406 if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1407 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1408 Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1409 if (ch < 256 && unicode_latin1[ch] == unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1410 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1411 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1412 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1413 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1414 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1415
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1416 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1417 unicode_resizable(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1418 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1419 if (Py_REFCNT(unicode) != 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1420 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1421 if (PyUnicode_CHECK_INTERNED(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1422 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1423 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1424 /* singleton refcount is greater than 1 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1425 assert(!unicode_is_singleton(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1426 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1427 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1428 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1429
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1430 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1431 unicode_resize(PyObject **p_unicode, Py_ssize_t length)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1432 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1433 PyObject *unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1434 Py_ssize_t old_length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1435
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1436 assert(p_unicode != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1437 unicode = *p_unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1438
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1439 assert(unicode != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1440 assert(PyUnicode_Check(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1441 assert(0 <= length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1442
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1443 if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1444 old_length = PyUnicode_WSTR_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1445 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1446 old_length = PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1447 if (old_length == length)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1448 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1449
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1450 if (length == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1451 Py_DECREF(*p_unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1452 *p_unicode = unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1453 Py_INCREF(*p_unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1454 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1455 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1456
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1457 if (!unicode_resizable(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1458 PyObject *copy = resize_copy(unicode, length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1459 if (copy == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1460 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1461 Py_DECREF(*p_unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1462 *p_unicode = copy;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1463 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1464 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1465
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1466 if (PyUnicode_IS_COMPACT(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1467 *p_unicode = resize_compact(unicode, length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1468 if (*p_unicode == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1469 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1470 assert(_PyUnicode_CheckConsistency(*p_unicode, 0));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1471 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1472 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1473 return resize_inplace(unicode, length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1474 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1475
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1476 int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1477 PyUnicode_Resize(PyObject **p_unicode, Py_ssize_t length)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1478 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1479 PyObject *unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1480 if (p_unicode == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1481 PyErr_BadInternalCall();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1482 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1483 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1484 unicode = *p_unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1485 if (unicode == NULL || !PyUnicode_Check(unicode) || length < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1486 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1487 PyErr_BadInternalCall();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1488 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1489 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1490 return unicode_resize(p_unicode, length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1491 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1492
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1493 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1494 unicode_widen(PyObject **p_unicode, unsigned int maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1495 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1496 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1497 assert(PyUnicode_IS_READY(*p_unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1498 if (maxchar <= PyUnicode_MAX_CHAR_VALUE(*p_unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1499 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1500 result = PyUnicode_New(PyUnicode_GET_LENGTH(*p_unicode),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1501 maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1502 if (result == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1503 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1504 PyUnicode_CopyCharacters(result, 0, *p_unicode, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1505 PyUnicode_GET_LENGTH(*p_unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1506 Py_DECREF(*p_unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1507 *p_unicode = result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1508 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1509 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1510
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1511 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1512 unicode_putchar(PyObject **p_unicode, Py_ssize_t *pos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1513 Py_UCS4 ch)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1514 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1515 if (unicode_widen(p_unicode, ch) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1516 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1517 PyUnicode_WRITE(PyUnicode_KIND(*p_unicode),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1518 PyUnicode_DATA(*p_unicode),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1519 (*pos)++, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1520 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1521 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1522
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1523 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1524 get_latin1_char(unsigned char ch)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1525 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1526 PyObject *unicode = unicode_latin1[ch];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1527 if (!unicode) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1528 unicode = PyUnicode_New(1, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1529 if (!unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1530 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1531 PyUnicode_1BYTE_DATA(unicode)[0] = ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1532 assert(_PyUnicode_CheckConsistency(unicode, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1533 unicode_latin1[ch] = unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1534 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1535 Py_INCREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1536 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1537 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1538
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1539 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1540 PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1541 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1542 PyObject *unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1543 Py_UCS4 maxchar = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1544 Py_ssize_t num_surrogates;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1545
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1546 if (u == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1547 return (PyObject*)_PyUnicode_New(size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1548
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1549 /* If the Unicode data is known at construction time, we can apply
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1550 some optimizations which share commonly used objects. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1551
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1552 /* Optimization for empty strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1553 if (size == 0 && unicode_empty != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1554 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1555 return unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1556 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1557
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1558 /* Single character Unicode objects in the Latin-1 range are
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1559 shared when using this constructor */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1560 if (size == 1 && *u < 256)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1561 return get_latin1_char((unsigned char)*u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1562
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1563 /* If not empty and not single character, copy the Unicode data
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1564 into the new object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1565 if (find_maxchar_surrogates(u, u + size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1566 &maxchar, &num_surrogates) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1567 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1568
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1569 unicode = PyUnicode_New(size - num_surrogates, maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1570 if (!unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1571 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1572
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1573 switch (PyUnicode_KIND(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1574 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1575 _PyUnicode_CONVERT_BYTES(Py_UNICODE, unsigned char,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1576 u, u + size, PyUnicode_1BYTE_DATA(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1577 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1578 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1579 #if Py_UNICODE_SIZE == 2
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1580 Py_MEMCPY(PyUnicode_2BYTE_DATA(unicode), u, size * 2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1581 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1582 _PyUnicode_CONVERT_BYTES(Py_UNICODE, Py_UCS2,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1583 u, u + size, PyUnicode_2BYTE_DATA(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1584 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1585 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1586 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1587 #if SIZEOF_WCHAR_T == 2
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1588 /* This is the only case which has to process surrogates, thus
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1589 a simple copy loop is not enough and we need a function. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1590 unicode_convert_wchar_to_ucs4(u, u + size, unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1591 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1592 assert(num_surrogates == 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1593 Py_MEMCPY(PyUnicode_4BYTE_DATA(unicode), u, size * 4);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1594 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1595 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1596 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1597 assert(0 && "Impossible state");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1598 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1599
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1600 return unicode_result(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1601 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1602
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1603 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1604 PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1605 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1606 if (size < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1607 PyErr_SetString(PyExc_SystemError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1608 "Negative size passed to PyUnicode_FromStringAndSize");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1609 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1610 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1611
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1612 /* If the Unicode data is known at construction time, we can apply
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1613 some optimizations which share commonly used objects.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1614 Also, this means the input must be UTF-8, so fall back to the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1615 UTF-8 decoder at the end. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1616 if (u != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1617
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1618 /* Optimization for empty strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1619 if (size == 0 && unicode_empty != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1620 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1621 return unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1622 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1623
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1624 /* Single characters are shared when using this constructor.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1625 Restrict to ASCII, since the input must be UTF-8. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1626 if (size == 1 && (unsigned char)*u < 128)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1627 return get_latin1_char((unsigned char)*u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1628
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1629 return PyUnicode_DecodeUTF8(u, size, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1630 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1631
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1632 return (PyObject *)_PyUnicode_New(size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1633 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1634
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1635 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1636 PyUnicode_FromString(const char *u)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1637 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1638 size_t size = strlen(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1639 if (size > PY_SSIZE_T_MAX) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1640 PyErr_SetString(PyExc_OverflowError, "input too long");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1641 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1642 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1643
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1644 return PyUnicode_FromStringAndSize(u, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1645 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1646
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1647 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1648 _PyUnicode_FromId(_Py_Identifier *id)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1649 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1650 if (!id->object) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1651 id->object = PyUnicode_FromString(id->string);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1652 if (!id->object)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1653 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1654 PyUnicode_InternInPlace(&id->object);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1655 assert(!id->next);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1656 id->next = static_strings;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1657 static_strings = id;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1658 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1659 return id->object;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1660 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1661
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1662 void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1663 _PyUnicode_ClearStaticStrings()
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1664 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1665 _Py_Identifier *i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1666 for (i = static_strings; i; i = i->next) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1667 Py_DECREF(i->object);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1668 i->object = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1669 i->next = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1670 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1671 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1672
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1673 /* Internal function, don't check maximum character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1674
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1675 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1676 unicode_fromascii(const unsigned char* s, Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1677 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1678 PyObject *res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1679 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1680 const unsigned char *p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1681 const unsigned char *end = s + size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1682 for (p=s; p < end; p++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1683 assert(*p < 128);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1684 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1685 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1686 if (size == 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1687 return get_latin1_char(s[0]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1688 res = PyUnicode_New(size, 127);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1689 if (!res)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1690 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1691 memcpy(PyUnicode_1BYTE_DATA(res), s, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1692 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1693 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1694
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1695 static Py_UCS4
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1696 kind_maxchar_limit(unsigned int kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1697 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1698 switch(kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1699 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1700 return 0x80;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1701 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1702 return 0x100;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1703 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1704 return 0x10000;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1705 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1706 assert(0 && "invalid kind");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1707 return MAX_UNICODE;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1708 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1709 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1710
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1711 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1712 _PyUnicode_FromUCS1(const unsigned char* u, Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1713 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1714 PyObject *res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1715 unsigned char max_char;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1716
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1717 if (size == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1718 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1719 return unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1720 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1721 assert(size > 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1722 if (size == 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1723 return get_latin1_char(u[0]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1724
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1725 max_char = ucs1lib_find_max_char(u, u + size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1726 res = PyUnicode_New(size, max_char);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1727 if (!res)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1728 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1729 memcpy(PyUnicode_1BYTE_DATA(res), u, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1730 assert(_PyUnicode_CheckConsistency(res, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1731 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1732 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1733
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1734 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1735 _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1736 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1737 PyObject *res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1738 Py_UCS2 max_char;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1739
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1740 if (size == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1741 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1742 return unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1743 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1744 assert(size > 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1745 if (size == 1 && u[0] < 256)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1746 return get_latin1_char((unsigned char)u[0]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1747
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1748 max_char = ucs2lib_find_max_char(u, u + size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1749 res = PyUnicode_New(size, max_char);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1750 if (!res)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1751 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1752 if (max_char >= 256)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1753 memcpy(PyUnicode_2BYTE_DATA(res), u, sizeof(Py_UCS2)*size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1754 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1755 _PyUnicode_CONVERT_BYTES(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1756 Py_UCS2, Py_UCS1, u, u + size, PyUnicode_1BYTE_DATA(res));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1757 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1758 assert(_PyUnicode_CheckConsistency(res, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1759 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1760 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1761
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1762 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1763 _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1764 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1765 PyObject *res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1766 Py_UCS4 max_char;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1767
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1768 if (size == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1769 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1770 return unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1771 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1772 assert(size > 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1773 if (size == 1 && u[0] < 256)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1774 return get_latin1_char((unsigned char)u[0]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1775
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1776 max_char = ucs4lib_find_max_char(u, u + size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1777 res = PyUnicode_New(size, max_char);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1778 if (!res)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1779 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1780 if (max_char < 256)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1781 _PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS1, u, u + size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1782 PyUnicode_1BYTE_DATA(res));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1783 else if (max_char < 0x10000)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1784 _PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS2, u, u + size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1785 PyUnicode_2BYTE_DATA(res));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1786 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1787 memcpy(PyUnicode_4BYTE_DATA(res), u, sizeof(Py_UCS4)*size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1788 assert(_PyUnicode_CheckConsistency(res, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1789 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1790 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1791
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1792 PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1793 PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1794 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1795 if (size < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1796 PyErr_SetString(PyExc_ValueError, "size must be positive");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1797 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1798 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1799 switch(kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1800 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1801 return _PyUnicode_FromUCS1(buffer, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1802 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1803 return _PyUnicode_FromUCS2(buffer, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1804 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1805 return _PyUnicode_FromUCS4(buffer, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1806 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1807 PyErr_SetString(PyExc_SystemError, "invalid kind");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1808 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1809 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1810 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1811
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1812 /* Ensure that a string uses the most efficient storage, if it is not the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1813 case: create a new string with of the right kind. Write NULL into *p_unicode
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1814 on error. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1815 static void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1816 unicode_adjust_maxchar(PyObject **p_unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1817 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1818 PyObject *unicode, *copy;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1819 Py_UCS4 max_char;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1820 Py_ssize_t len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1821 unsigned int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1822
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1823 assert(p_unicode != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1824 unicode = *p_unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1825 assert(PyUnicode_IS_READY(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1826 if (PyUnicode_IS_ASCII(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1827 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1828
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1829 len = PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1830 kind = PyUnicode_KIND(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1831 if (kind == PyUnicode_1BYTE_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1832 const Py_UCS1 *u = PyUnicode_1BYTE_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1833 max_char = ucs1lib_find_max_char(u, u + len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1834 if (max_char >= 128)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1835 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1836 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1837 else if (kind == PyUnicode_2BYTE_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1838 const Py_UCS2 *u = PyUnicode_2BYTE_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1839 max_char = ucs2lib_find_max_char(u, u + len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1840 if (max_char >= 256)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1841 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1842 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1843 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1844 const Py_UCS4 *u = PyUnicode_4BYTE_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1845 assert(kind == PyUnicode_4BYTE_KIND);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1846 max_char = ucs4lib_find_max_char(u, u + len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1847 if (max_char >= 0x10000)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1848 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1849 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1850 copy = PyUnicode_New(len, max_char);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1851 copy_characters(copy, 0, unicode, 0, len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1852 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1853 *p_unicode = copy;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1854 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1855
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1856 PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1857 PyUnicode_Copy(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1858 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1859 Py_ssize_t length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1860 PyObject *copy;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1861
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1862 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1863 PyErr_BadInternalCall();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1864 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1865 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1866 if (PyUnicode_READY(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1867 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1868
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1869 length = PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1870 copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1871 if (!copy)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1872 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1873 assert(PyUnicode_KIND(copy) == PyUnicode_KIND(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1874
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1875 Py_MEMCPY(PyUnicode_DATA(copy), PyUnicode_DATA(unicode),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1876 length * PyUnicode_KIND(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1877 assert(_PyUnicode_CheckConsistency(copy, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1878 return copy;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1879 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1880
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1881
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1882 /* Widen Unicode objects to larger buffers. Don't write terminating null
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1883 character. Return NULL on error. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1884
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1885 void*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1886 _PyUnicode_AsKind(PyObject *s, unsigned int kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1887 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1888 Py_ssize_t len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1889 void *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1890 unsigned int skind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1891
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1892 if (PyUnicode_READY(s))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1893 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1894
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1895 len = PyUnicode_GET_LENGTH(s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1896 skind = PyUnicode_KIND(s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1897 if (skind >= kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1898 PyErr_SetString(PyExc_SystemError, "invalid widening attempt");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1899 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1900 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1901 switch(kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1902 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1903 result = PyMem_Malloc(len * sizeof(Py_UCS2));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1904 if (!result)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1905 return PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1906 assert(skind == PyUnicode_1BYTE_KIND);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1907 _PyUnicode_CONVERT_BYTES(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1908 Py_UCS1, Py_UCS2,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1909 PyUnicode_1BYTE_DATA(s),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1910 PyUnicode_1BYTE_DATA(s) + len,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1911 result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1912 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1913 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1914 result = PyMem_Malloc(len * sizeof(Py_UCS4));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1915 if (!result)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1916 return PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1917 if (skind == PyUnicode_2BYTE_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1918 _PyUnicode_CONVERT_BYTES(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1919 Py_UCS2, Py_UCS4,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1920 PyUnicode_2BYTE_DATA(s),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1921 PyUnicode_2BYTE_DATA(s) + len,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1922 result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1923 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1924 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1925 assert(skind == PyUnicode_1BYTE_KIND);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1926 _PyUnicode_CONVERT_BYTES(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1927 Py_UCS1, Py_UCS4,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1928 PyUnicode_1BYTE_DATA(s),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1929 PyUnicode_1BYTE_DATA(s) + len,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1930 result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1931 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1932 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1933 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1934 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1935 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1936 PyErr_SetString(PyExc_SystemError, "invalid kind");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1937 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1938 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1939
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1940 static Py_UCS4*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1941 as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1942 int copy_null)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1943 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1944 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1945 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1946 Py_ssize_t len, targetlen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1947 if (PyUnicode_READY(string) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1948 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1949 kind = PyUnicode_KIND(string);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1950 data = PyUnicode_DATA(string);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1951 len = PyUnicode_GET_LENGTH(string);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1952 targetlen = len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1953 if (copy_null)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1954 targetlen++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1955 if (!target) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1956 if (PY_SSIZE_T_MAX / sizeof(Py_UCS4) < targetlen) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1957 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1958 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1959 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1960 target = PyMem_Malloc(targetlen * sizeof(Py_UCS4));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1961 if (!target) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1962 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1963 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1964 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1965 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1966 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1967 if (targetsize < targetlen) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1968 PyErr_Format(PyExc_SystemError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1969 "string is longer than the buffer");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1970 if (copy_null && 0 < targetsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1971 target[0] = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1972 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1973 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1974 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1975 if (kind == PyUnicode_1BYTE_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1976 Py_UCS1 *start = (Py_UCS1 *) data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1977 _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS4, start, start + len, target);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1978 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1979 else if (kind == PyUnicode_2BYTE_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1980 Py_UCS2 *start = (Py_UCS2 *) data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1981 _PyUnicode_CONVERT_BYTES(Py_UCS2, Py_UCS4, start, start + len, target);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1982 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1983 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1984 assert(kind == PyUnicode_4BYTE_KIND);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1985 Py_MEMCPY(target, data, len * sizeof(Py_UCS4));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1986 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1987 if (copy_null)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1988 target[len] = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1989 return target;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1990 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1991
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1992 Py_UCS4*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1993 PyUnicode_AsUCS4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1994 int copy_null)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1995 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1996 if (target == NULL || targetsize < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1997 PyErr_BadInternalCall();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1998 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
1999 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2000 return as_ucs4(string, target, targetsize, copy_null);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2001 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2002
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2003 Py_UCS4*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2004 PyUnicode_AsUCS4Copy(PyObject *string)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2005 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2006 return as_ucs4(string, NULL, 0, 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2007 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2008
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2009 #ifdef HAVE_WCHAR_H
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2010
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2011 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2012 PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2013 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2014 if (w == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2015 if (size == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2016 return PyUnicode_New(0, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2017 PyErr_BadInternalCall();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2018 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2019 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2020
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2021 if (size == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2022 size = wcslen(w);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2023 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2024
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2025 return PyUnicode_FromUnicode(w, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2026 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2027
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2028 #endif /* HAVE_WCHAR_H */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2029
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2030 static void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2031 makefmt(char *fmt, int longflag, int longlongflag, int size_tflag,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2032 int zeropad, int width, int precision, char c)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2033 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2034 *fmt++ = '%';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2035 if (width) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2036 if (zeropad)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2037 *fmt++ = '0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2038 fmt += sprintf(fmt, "%d", width);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2039 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2040 if (precision)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2041 fmt += sprintf(fmt, ".%d", precision);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2042 if (longflag)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2043 *fmt++ = 'l';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2044 else if (longlongflag) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2045 /* longlongflag should only ever be nonzero on machines with
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2046 HAVE_LONG_LONG defined */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2047 #ifdef HAVE_LONG_LONG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2048 char *f = PY_FORMAT_LONG_LONG;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2049 while (*f)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2050 *fmt++ = *f++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2051 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2052 /* we shouldn't ever get here */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2053 assert(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2054 *fmt++ = 'l';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2055 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2056 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2057 else if (size_tflag) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2058 char *f = PY_FORMAT_SIZE_T;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2059 while (*f)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2060 *fmt++ = *f++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2061 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2062 *fmt++ = c;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2063 *fmt = '\0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2064 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2065
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2066 /* helper for PyUnicode_FromFormatV() */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2067
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2068 static const char*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2069 parse_format_flags(const char *f,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2070 int *p_width, int *p_precision,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2071 int *p_longflag, int *p_longlongflag, int *p_size_tflag)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2072 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2073 int width, precision, longflag, longlongflag, size_tflag;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2074
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2075 /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2076 f++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2077 width = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2078 while (Py_ISDIGIT((unsigned)*f))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2079 width = (width*10) + *f++ - '0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2080 precision = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2081 if (*f == '.') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2082 f++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2083 while (Py_ISDIGIT((unsigned)*f))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2084 precision = (precision*10) + *f++ - '0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2085 if (*f == '%') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2086 /* "%.3%s" => f points to "3" */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2087 f--;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2088 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2089 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2090 if (*f == '\0') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2091 /* bogus format "%.1" => go backward, f points to "1" */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2092 f--;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2093 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2094 if (p_width != NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2095 *p_width = width;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2096 if (p_precision != NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2097 *p_precision = precision;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2098
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2099 /* Handle %ld, %lu, %lld and %llu. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2100 longflag = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2101 longlongflag = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2102 size_tflag = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2103
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2104 if (*f == 'l') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2105 if (f[1] == 'd' || f[1] == 'u' || f[1] == 'i') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2106 longflag = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2107 ++f;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2108 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2109 #ifdef HAVE_LONG_LONG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2110 else if (f[1] == 'l' &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2111 (f[2] == 'd' || f[2] == 'u' || f[2] == 'i')) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2112 longlongflag = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2113 f += 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2114 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2115 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2116 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2117 /* handle the size_t flag. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2118 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u' || f[1] == 'i')) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2119 size_tflag = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2120 ++f;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2121 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2122 if (p_longflag != NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2123 *p_longflag = longflag;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2124 if (p_longlongflag != NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2125 *p_longlongflag = longlongflag;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2126 if (p_size_tflag != NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2127 *p_size_tflag = size_tflag;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2128 return f;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2129 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2130
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2131 /* maximum number of characters required for output of %ld. 21 characters
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2132 allows for 64-bit integers (in decimal) and an optional sign. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2133 #define MAX_LONG_CHARS 21
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2134 /* maximum number of characters required for output of %lld.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2135 We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2136 plus 1 for the sign. 53/22 is an upper bound for log10(256). */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2137 #define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2138
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2139 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2140 PyUnicode_FromFormatV(const char *format, va_list vargs)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2141 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2142 va_list count;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2143 Py_ssize_t callcount = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2144 PyObject **callresults = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2145 PyObject **callresult = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2146 Py_ssize_t n = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2147 int width = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2148 int precision = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2149 int zeropad;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2150 const char* f;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2151 PyObject *string;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2152 /* used by sprintf */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2153 char fmt[61]; /* should be enough for %0width.precisionlld */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2154 Py_UCS4 maxchar = 127; /* result is ASCII by default */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2155 Py_UCS4 argmaxchar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2156 Py_ssize_t numbersize = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2157 char *numberresults = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2158 char *numberresult = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2159 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2160 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2161 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2162
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2163 Py_VA_COPY(count, vargs);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2164 /* step 1: count the number of %S/%R/%A/%s format specifications
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2165 * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2166 * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2167 * result in an array)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2168 * also estimate a upper bound for all the number formats in the string,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2169 * numbers will be formatted in step 3 and be kept in a '\0'-separated
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2170 * buffer before putting everything together. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2171 for (f = format; *f; f++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2172 if (*f == '%') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2173 int longlongflag;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2174 /* skip width or width.precision (eg. "1.2" of "%1.2f") */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2175 f = parse_format_flags(f, &width, NULL, NULL, &longlongflag, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2176 if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2177 ++callcount;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2178
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2179 else if (*f == 'd' || *f=='u' || *f=='i' || *f=='x' || *f=='p') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2180 #ifdef HAVE_LONG_LONG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2181 if (longlongflag) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2182 if (width < MAX_LONG_LONG_CHARS)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2183 width = MAX_LONG_LONG_CHARS;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2184 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2185 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2186 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2187 /* MAX_LONG_CHARS is enough to hold a 64-bit integer,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2188 including sign. Decimal takes the most space. This
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2189 isn't enough for octal. If a width is specified we
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2190 need more (which we allocate later). */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2191 if (width < MAX_LONG_CHARS)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2192 width = MAX_LONG_CHARS;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2193
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2194 /* account for the size + '\0' to separate numbers
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2195 inside of the numberresults buffer */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2196 numbersize += (width + 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2197 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2198 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2199 else if ((unsigned char)*f > 127) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2200 PyErr_Format(PyExc_ValueError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2201 "PyUnicode_FromFormatV() expects an ASCII-encoded format "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2202 "string, got a non-ASCII byte: 0x%02x",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2203 (unsigned char)*f);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2204 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2205 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2206 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2207 /* step 2: allocate memory for the results of
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2208 * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2209 if (callcount) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2210 callresults = PyObject_Malloc(sizeof(PyObject *) * callcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2211 if (!callresults) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2212 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2213 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2214 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2215 callresult = callresults;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2216 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2217 /* step 2.5: allocate memory for the results of formating numbers */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2218 if (numbersize) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2219 numberresults = PyObject_Malloc(numbersize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2220 if (!numberresults) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2221 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2222 goto fail;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2223 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2224 numberresult = numberresults;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2225 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2226
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2227 /* step 3: format numbers and figure out how large a buffer we need */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2228 for (f = format; *f; f++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2229 if (*f == '%') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2230 const char* p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2231 int longflag;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2232 int longlongflag;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2233 int size_tflag;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2234 int numprinted;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2235
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2236 p = f;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2237 zeropad = (f[1] == '0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2238 f = parse_format_flags(f, &width, &precision,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2239 &longflag, &longlongflag, &size_tflag);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2240 switch (*f) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2241 case 'c':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2242 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2243 Py_UCS4 ordinal = va_arg(count, int);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2244 maxchar = Py_MAX(maxchar, ordinal);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2245 n++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2246 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2247 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2248 case '%':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2249 n++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2250 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2251 case 'i':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2252 case 'd':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2253 makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2254 width, precision, *f);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2255 if (longflag)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2256 numprinted = sprintf(numberresult, fmt,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2257 va_arg(count, long));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2258 #ifdef HAVE_LONG_LONG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2259 else if (longlongflag)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2260 numprinted = sprintf(numberresult, fmt,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2261 va_arg(count, PY_LONG_LONG));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2262 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2263 else if (size_tflag)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2264 numprinted = sprintf(numberresult, fmt,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2265 va_arg(count, Py_ssize_t));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2266 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2267 numprinted = sprintf(numberresult, fmt,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2268 va_arg(count, int));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2269 n += numprinted;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2270 /* advance by +1 to skip over the '\0' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2271 numberresult += (numprinted + 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2272 assert(*(numberresult - 1) == '\0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2273 assert(*(numberresult - 2) != '\0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2274 assert(numprinted >= 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2275 assert(numberresult <= numberresults + numbersize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2276 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2277 case 'u':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2278 makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2279 width, precision, 'u');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2280 if (longflag)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2281 numprinted = sprintf(numberresult, fmt,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2282 va_arg(count, unsigned long));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2283 #ifdef HAVE_LONG_LONG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2284 else if (longlongflag)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2285 numprinted = sprintf(numberresult, fmt,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2286 va_arg(count, unsigned PY_LONG_LONG));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2287 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2288 else if (size_tflag)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2289 numprinted = sprintf(numberresult, fmt,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2290 va_arg(count, size_t));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2291 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2292 numprinted = sprintf(numberresult, fmt,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2293 va_arg(count, unsigned int));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2294 n += numprinted;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2295 numberresult += (numprinted + 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2296 assert(*(numberresult - 1) == '\0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2297 assert(*(numberresult - 2) != '\0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2298 assert(numprinted >= 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2299 assert(numberresult <= numberresults + numbersize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2300 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2301 case 'x':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2302 makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2303 numprinted = sprintf(numberresult, fmt, va_arg(count, int));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2304 n += numprinted;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2305 numberresult += (numprinted + 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2306 assert(*(numberresult - 1) == '\0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2307 assert(*(numberresult - 2) != '\0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2308 assert(numprinted >= 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2309 assert(numberresult <= numberresults + numbersize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2310 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2311 case 'p':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2312 numprinted = sprintf(numberresult, "%p", va_arg(count, void*));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2313 /* %p is ill-defined: ensure leading 0x. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2314 if (numberresult[1] == 'X')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2315 numberresult[1] = 'x';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2316 else if (numberresult[1] != 'x') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2317 memmove(numberresult + 2, numberresult,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2318 strlen(numberresult) + 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2319 numberresult[0] = '0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2320 numberresult[1] = 'x';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2321 numprinted += 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2322 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2323 n += numprinted;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2324 numberresult += (numprinted + 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2325 assert(*(numberresult - 1) == '\0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2326 assert(*(numberresult - 2) != '\0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2327 assert(numprinted >= 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2328 assert(numberresult <= numberresults + numbersize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2329 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2330 case 's':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2331 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2332 /* UTF-8 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2333 const char *s = va_arg(count, const char*);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2334 PyObject *str = PyUnicode_DecodeUTF8(s, strlen(s), "replace");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2335 if (!str)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2336 goto fail;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2337 /* since PyUnicode_DecodeUTF8 returns already flexible
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2338 unicode objects, there is no need to call ready on them */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2339 argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2340 maxchar = Py_MAX(maxchar, argmaxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2341 n += PyUnicode_GET_LENGTH(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2342 /* Remember the str and switch to the next slot */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2343 *callresult++ = str;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2344 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2345 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2346 case 'U':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2347 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2348 PyObject *obj = va_arg(count, PyObject *);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2349 assert(obj && _PyUnicode_CHECK(obj));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2350 if (PyUnicode_READY(obj) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2351 goto fail;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2352 argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2353 maxchar = Py_MAX(maxchar, argmaxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2354 n += PyUnicode_GET_LENGTH(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2355 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2356 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2357 case 'V':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2358 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2359 PyObject *obj = va_arg(count, PyObject *);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2360 const char *str = va_arg(count, const char *);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2361 PyObject *str_obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2362 assert(obj || str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2363 assert(!obj || _PyUnicode_CHECK(obj));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2364 if (obj) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2365 if (PyUnicode_READY(obj) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2366 goto fail;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2367 argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2368 maxchar = Py_MAX(maxchar, argmaxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2369 n += PyUnicode_GET_LENGTH(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2370 *callresult++ = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2371 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2372 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2373 str_obj = PyUnicode_DecodeUTF8(str, strlen(str), "replace");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2374 if (!str_obj)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2375 goto fail;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2376 if (PyUnicode_READY(str_obj)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2377 Py_DECREF(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2378 goto fail;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2379 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2380 argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2381 maxchar = Py_MAX(maxchar, argmaxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2382 n += PyUnicode_GET_LENGTH(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2383 *callresult++ = str_obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2384 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2385 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2386 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2387 case 'S':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2388 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2389 PyObject *obj = va_arg(count, PyObject *);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2390 PyObject *str;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2391 assert(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2392 str = PyObject_Str(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2393 if (!str || PyUnicode_READY(str) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2394 goto fail;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2395 argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2396 maxchar = Py_MAX(maxchar, argmaxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2397 n += PyUnicode_GET_LENGTH(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2398 /* Remember the str and switch to the next slot */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2399 *callresult++ = str;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2400 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2401 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2402 case 'R':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2403 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2404 PyObject *obj = va_arg(count, PyObject *);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2405 PyObject *repr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2406 assert(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2407 repr = PyObject_Repr(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2408 if (!repr || PyUnicode_READY(repr) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2409 goto fail;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2410 argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2411 maxchar = Py_MAX(maxchar, argmaxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2412 n += PyUnicode_GET_LENGTH(repr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2413 /* Remember the repr and switch to the next slot */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2414 *callresult++ = repr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2415 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2416 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2417 case 'A':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2418 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2419 PyObject *obj = va_arg(count, PyObject *);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2420 PyObject *ascii;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2421 assert(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2422 ascii = PyObject_ASCII(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2423 if (!ascii || PyUnicode_READY(ascii) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2424 goto fail;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2425 argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2426 maxchar = Py_MAX(maxchar, argmaxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2427 n += PyUnicode_GET_LENGTH(ascii);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2428 /* Remember the repr and switch to the next slot */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2429 *callresult++ = ascii;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2430 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2431 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2432 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2433 /* if we stumble upon an unknown
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2434 formatting code, copy the rest of
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2435 the format string to the output
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2436 string. (we cannot just skip the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2437 code, since there's no way to know
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2438 what's in the argument list) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2439 n += strlen(p);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2440 goto expand;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2441 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2442 } else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2443 n++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2444 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2445 expand:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2446 /* step 4: fill the buffer */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2447 /* Since we've analyzed how much space we need,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2448 we don't have to resize the string.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2449 There can be no errors beyond this point. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2450 string = PyUnicode_New(n, maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2451 if (!string)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2452 goto fail;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2453 kind = PyUnicode_KIND(string);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2454 data = PyUnicode_DATA(string);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2455 callresult = callresults;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2456 numberresult = numberresults;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2457
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2458 for (i = 0, f = format; *f; f++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2459 if (*f == '%') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2460 const char* p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2461
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2462 p = f;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2463 f = parse_format_flags(f, NULL, NULL, NULL, NULL, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2464 /* checking for == because the last argument could be a empty
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2465 string, which causes i to point to end, the assert at the end of
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2466 the loop */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2467 assert(i <= PyUnicode_GET_LENGTH(string));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2468
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2469 switch (*f) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2470 case 'c':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2471 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2472 const int ordinal = va_arg(vargs, int);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2473 PyUnicode_WRITE(kind, data, i++, ordinal);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2474 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2475 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2476 case 'i':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2477 case 'd':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2478 case 'u':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2479 case 'x':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2480 case 'p':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2481 /* unused, since we already have the result */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2482 if (*f == 'p')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2483 (void) va_arg(vargs, void *);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2484 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2485 (void) va_arg(vargs, int);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2486 /* extract the result from numberresults and append. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2487 for (; *numberresult; ++i, ++numberresult)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2488 PyUnicode_WRITE(kind, data, i, *numberresult);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2489 /* skip over the separating '\0' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2490 assert(*numberresult == '\0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2491 numberresult++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2492 assert(numberresult <= numberresults + numbersize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2493 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2494 case 's':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2495 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2496 /* unused, since we already have the result */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2497 Py_ssize_t size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2498 (void) va_arg(vargs, char *);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2499 size = PyUnicode_GET_LENGTH(*callresult);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2500 assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2501 copy_characters(string, i, *callresult, 0, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2502 i += size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2503 /* We're done with the unicode()/repr() => forget it */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2504 Py_DECREF(*callresult);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2505 /* switch to next unicode()/repr() result */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2506 ++callresult;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2507 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2508 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2509 case 'U':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2510 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2511 PyObject *obj = va_arg(vargs, PyObject *);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2512 Py_ssize_t size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2513 assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2514 size = PyUnicode_GET_LENGTH(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2515 copy_characters(string, i, obj, 0, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2516 i += size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2517 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2518 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2519 case 'V':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2520 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2521 Py_ssize_t size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2522 PyObject *obj = va_arg(vargs, PyObject *);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2523 va_arg(vargs, const char *);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2524 if (obj) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2525 size = PyUnicode_GET_LENGTH(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2526 assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2527 copy_characters(string, i, obj, 0, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2528 i += size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2529 } else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2530 size = PyUnicode_GET_LENGTH(*callresult);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2531 assert(PyUnicode_KIND(*callresult) <=
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2532 PyUnicode_KIND(string));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2533 copy_characters(string, i, *callresult, 0, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2534 i += size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2535 Py_DECREF(*callresult);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2536 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2537 ++callresult;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2538 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2539 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2540 case 'S':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2541 case 'R':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2542 case 'A':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2543 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2544 Py_ssize_t size = PyUnicode_GET_LENGTH(*callresult);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2545 /* unused, since we already have the result */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2546 (void) va_arg(vargs, PyObject *);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2547 assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2548 copy_characters(string, i, *callresult, 0, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2549 i += size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2550 /* We're done with the unicode()/repr() => forget it */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2551 Py_DECREF(*callresult);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2552 /* switch to next unicode()/repr() result */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2553 ++callresult;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2554 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2555 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2556 case '%':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2557 PyUnicode_WRITE(kind, data, i++, '%');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2558 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2559 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2560 for (; *p; ++p, ++i)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2561 PyUnicode_WRITE(kind, data, i, *p);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2562 assert(i == PyUnicode_GET_LENGTH(string));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2563 goto end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2564 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2565 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2566 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2567 assert(i < PyUnicode_GET_LENGTH(string));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2568 PyUnicode_WRITE(kind, data, i++, *f);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2569 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2570 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2571 assert(i == PyUnicode_GET_LENGTH(string));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2572
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2573 end:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2574 if (callresults)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2575 PyObject_Free(callresults);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2576 if (numberresults)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2577 PyObject_Free(numberresults);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2578 return unicode_result(string);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2579 fail:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2580 if (callresults) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2581 PyObject **callresult2 = callresults;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2582 while (callresult2 < callresult) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2583 Py_XDECREF(*callresult2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2584 ++callresult2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2585 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2586 PyObject_Free(callresults);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2587 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2588 if (numberresults)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2589 PyObject_Free(numberresults);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2590 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2591 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2592
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2593 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2594 PyUnicode_FromFormat(const char *format, ...)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2595 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2596 PyObject* ret;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2597 va_list vargs;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2598
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2599 #ifdef HAVE_STDARG_PROTOTYPES
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2600 va_start(vargs, format);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2601 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2602 va_start(vargs);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2603 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2604 ret = PyUnicode_FromFormatV(format, vargs);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2605 va_end(vargs);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2606 return ret;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2607 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2608
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2609 #ifdef HAVE_WCHAR_H
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2610
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2611 /* Helper function for PyUnicode_AsWideChar() and PyUnicode_AsWideCharString():
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2612 convert a Unicode object to a wide character string.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2613
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2614 - If w is NULL: return the number of wide characters (including the null
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2615 character) required to convert the unicode object. Ignore size argument.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2616
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2617 - Otherwise: return the number of wide characters (excluding the null
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2618 character) written into w. Write at most size wide characters (including
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2619 the null character). */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2620 static Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2621 unicode_aswidechar(PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2622 wchar_t *w,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2623 Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2624 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2625 Py_ssize_t res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2626 const wchar_t *wstr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2627
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2628 wstr = PyUnicode_AsUnicodeAndSize(unicode, &res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2629 if (wstr == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2630 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2631
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2632 if (w != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2633 if (size > res)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2634 size = res + 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2635 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2636 res = size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2637 Py_MEMCPY(w, wstr, size * sizeof(wchar_t));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2638 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2639 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2640 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2641 return res + 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2642 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2643
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2644 Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2645 PyUnicode_AsWideChar(PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2646 wchar_t *w,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2647 Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2648 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2649 if (unicode == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2650 PyErr_BadInternalCall();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2651 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2652 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2653 return unicode_aswidechar(unicode, w, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2654 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2655
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2656 wchar_t*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2657 PyUnicode_AsWideCharString(PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2658 Py_ssize_t *size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2659 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2660 wchar_t* buffer;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2661 Py_ssize_t buflen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2662
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2663 if (unicode == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2664 PyErr_BadInternalCall();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2665 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2666 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2667
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2668 buflen = unicode_aswidechar(unicode, NULL, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2669 if (buflen == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2670 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2671 if (PY_SSIZE_T_MAX / sizeof(wchar_t) < buflen) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2672 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2673 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2674 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2675
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2676 buffer = PyMem_MALLOC(buflen * sizeof(wchar_t));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2677 if (buffer == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2678 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2679 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2680 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2681 buflen = unicode_aswidechar(unicode, buffer, buflen);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2682 if (buflen == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2683 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2684 if (size != NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2685 *size = buflen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2686 return buffer;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2687 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2688
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2689 #endif /* HAVE_WCHAR_H */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2690
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2691 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2692 PyUnicode_FromOrdinal(int ordinal)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2693 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2694 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2695 if (ordinal < 0 || ordinal > MAX_UNICODE) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2696 PyErr_SetString(PyExc_ValueError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2697 "chr() arg not in range(0x110000)");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2698 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2699 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2700
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2701 if (ordinal < 256)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2702 return get_latin1_char(ordinal);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2703
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2704 v = PyUnicode_New(1, ordinal);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2705 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2706 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2707 PyUnicode_WRITE(PyUnicode_KIND(v), PyUnicode_DATA(v), 0, ordinal);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2708 assert(_PyUnicode_CheckConsistency(v, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2709 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2710 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2711
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2712 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2713 PyUnicode_FromObject(register PyObject *obj)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2714 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2715 /* XXX Perhaps we should make this API an alias of
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2716 PyObject_Str() instead ?! */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2717 if (PyUnicode_CheckExact(obj)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2718 if (PyUnicode_READY(obj))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2719 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2720 Py_INCREF(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2721 return obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2722 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2723 if (PyUnicode_Check(obj)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2724 /* For a Unicode subtype that's not a Unicode object,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2725 return a true Unicode object with the same data. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2726 return PyUnicode_Copy(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2727 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2728 PyErr_Format(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2729 "Can't convert '%.100s' object to str implicitly",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2730 Py_TYPE(obj)->tp_name);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2731 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2732 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2733
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2734 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2735 PyUnicode_FromEncodedObject(register PyObject *obj,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2736 const char *encoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2737 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2738 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2739 Py_buffer buffer;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2740 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2741
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2742 if (obj == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2743 PyErr_BadInternalCall();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2744 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2745 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2746
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2747 /* Decoding bytes objects is the most common case and should be fast */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2748 if (PyBytes_Check(obj)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2749 if (PyBytes_GET_SIZE(obj) == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2750 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2751 v = unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2752 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2753 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2754 v = PyUnicode_Decode(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2755 PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2756 encoding, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2757 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2758 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2759 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2760
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2761 if (PyUnicode_Check(obj)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2762 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2763 "decoding str is not supported");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2764 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2765 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2766
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2767 /* Retrieve a bytes buffer view through the PEP 3118 buffer interface */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2768 if (PyObject_GetBuffer(obj, &buffer, PyBUF_SIMPLE) < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2769 PyErr_Format(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2770 "coercing to str: need bytes, bytearray "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2771 "or buffer-like object, %.80s found",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2772 Py_TYPE(obj)->tp_name);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2773 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2774 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2775
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2776 if (buffer.len == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2777 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2778 v = unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2779 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2780 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2781 v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2782
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2783 PyBuffer_Release(&buffer);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2784 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2785 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2786
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2787 /* Convert encoding to lower case and replace '_' with '-' in order to
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2788 catch e.g. UTF_8. Return 0 on error (encoding is longer than lower_len-1),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2789 1 on success. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2790 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2791 normalize_encoding(const char *encoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2792 char *lower,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2793 size_t lower_len)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2794 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2795 const char *e;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2796 char *l;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2797 char *l_end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2798
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2799 if (encoding == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2800 strcpy(lower, "utf-8");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2801 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2802 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2803 e = encoding;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2804 l = lower;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2805 l_end = &lower[lower_len - 1];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2806 while (*e) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2807 if (l == l_end)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2808 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2809 if (Py_ISUPPER(*e)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2810 *l++ = Py_TOLOWER(*e++);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2811 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2812 else if (*e == '_') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2813 *l++ = '-';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2814 e++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2815 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2816 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2817 *l++ = *e++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2818 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2819 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2820 *l = '\0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2821 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2822 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2823
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2824 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2825 PyUnicode_Decode(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2826 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2827 const char *encoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2828 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2829 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2830 PyObject *buffer = NULL, *unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2831 Py_buffer info;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2832 char lower[11]; /* Enough for any encoding shortcut */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2833
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2834 /* Shortcuts for common default encodings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2835 if (normalize_encoding(encoding, lower, sizeof(lower))) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2836 if ((strcmp(lower, "utf-8") == 0) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2837 (strcmp(lower, "utf8") == 0))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2838 return PyUnicode_DecodeUTF8(s, size, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2839 else if ((strcmp(lower, "latin-1") == 0) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2840 (strcmp(lower, "latin1") == 0) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2841 (strcmp(lower, "iso-8859-1") == 0))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2842 return PyUnicode_DecodeLatin1(s, size, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2843 #ifdef HAVE_MBCS
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2844 else if (strcmp(lower, "mbcs") == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2845 return PyUnicode_DecodeMBCS(s, size, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2846 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2847 else if (strcmp(lower, "ascii") == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2848 return PyUnicode_DecodeASCII(s, size, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2849 else if (strcmp(lower, "utf-16") == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2850 return PyUnicode_DecodeUTF16(s, size, errors, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2851 else if (strcmp(lower, "utf-32") == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2852 return PyUnicode_DecodeUTF32(s, size, errors, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2853 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2854
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2855 /* Decode via the codec registry */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2856 buffer = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2857 if (PyBuffer_FillInfo(&info, NULL, (void *)s, size, 1, PyBUF_FULL_RO) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2858 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2859 buffer = PyMemoryView_FromBuffer(&info);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2860 if (buffer == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2861 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2862 unicode = PyCodec_Decode(buffer, encoding, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2863 if (unicode == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2864 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2865 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2866 PyErr_Format(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2867 "decoder did not return a str object (type=%.400s)",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2868 Py_TYPE(unicode)->tp_name);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2869 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2870 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2871 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2872 Py_DECREF(buffer);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2873 return unicode_result(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2874
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2875 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2876 Py_XDECREF(buffer);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2877 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2878 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2879
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2880 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2881 PyUnicode_AsDecodedObject(PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2882 const char *encoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2883 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2884 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2885 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2886
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2887 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2888 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2889 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2890 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2891
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2892 if (encoding == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2893 encoding = PyUnicode_GetDefaultEncoding();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2894
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2895 /* Decode via the codec registry */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2896 v = PyCodec_Decode(unicode, encoding, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2897 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2898 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2899 return unicode_result(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2900
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2901 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2902 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2903 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2904
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2905 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2906 PyUnicode_AsDecodedUnicode(PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2907 const char *encoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2908 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2909 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2910 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2911
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2912 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2913 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2914 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2915 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2916
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2917 if (encoding == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2918 encoding = PyUnicode_GetDefaultEncoding();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2919
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2920 /* Decode via the codec registry */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2921 v = PyCodec_Decode(unicode, encoding, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2922 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2923 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2924 if (!PyUnicode_Check(v)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2925 PyErr_Format(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2926 "decoder did not return a str object (type=%.400s)",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2927 Py_TYPE(v)->tp_name);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2928 Py_DECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2929 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2930 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2931 return unicode_result(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2932
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2933 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2934 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2935 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2936
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2937 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2938 PyUnicode_Encode(const Py_UNICODE *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2939 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2940 const char *encoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2941 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2942 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2943 PyObject *v, *unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2944
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2945 unicode = PyUnicode_FromUnicode(s, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2946 if (unicode == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2947 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2948 v = PyUnicode_AsEncodedString(unicode, encoding, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2949 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2950 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2951 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2952
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2953 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2954 PyUnicode_AsEncodedObject(PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2955 const char *encoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2956 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2957 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2958 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2959
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2960 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2961 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2962 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2963 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2964
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2965 if (encoding == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2966 encoding = PyUnicode_GetDefaultEncoding();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2967
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2968 /* Encode via the codec registry */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2969 v = PyCodec_Encode(unicode, encoding, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2970 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2971 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2972 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2973
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2974 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2975 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2976 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2977
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2978 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2979 PyUnicode_EncodeFSDefault(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2980 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2981 #ifdef HAVE_MBCS
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2982 return PyUnicode_EncodeCodePage(CP_ACP, unicode, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2983 #elif defined(__APPLE__)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2984 return _PyUnicode_AsUTF8String(unicode, "surrogateescape");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2985 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2986 PyInterpreterState *interp = PyThreadState_GET()->interp;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2987 /* Bootstrap check: if the filesystem codec is implemented in Python, we
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2988 cannot use it to encode and decode filenames before it is loaded. Load
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2989 the Python codec requires to encode at least its own filename. Use the C
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2990 version of the locale codec until the codec registry is initialized and
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2991 the Python codec is loaded.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2992
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2993 Py_FileSystemDefaultEncoding is shared between all interpreters, we
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2994 cannot only rely on it: check also interp->fscodec_initialized for
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2995 subinterpreters. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2996 if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2997 return PyUnicode_AsEncodedString(unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2998 Py_FileSystemDefaultEncoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
2999 "surrogateescape");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3000 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3001 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3002 /* locale encoding with surrogateescape */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3003 wchar_t *wchar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3004 char *bytes;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3005 PyObject *bytes_obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3006 size_t error_pos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3007
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3008 wchar = PyUnicode_AsWideCharString(unicode, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3009 if (wchar == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3010 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3011 bytes = _Py_wchar2char(wchar, &error_pos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3012 if (bytes == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3013 if (error_pos != (size_t)-1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3014 char *errmsg = strerror(errno);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3015 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3016 if (errmsg == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3017 errmsg = "Py_wchar2char() failed";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3018 raise_encode_exception(&exc,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3019 "filesystemencoding", unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3020 error_pos, error_pos+1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3021 errmsg);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3022 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3023 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3024 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3025 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3026 PyMem_Free(wchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3027 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3028 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3029 PyMem_Free(wchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3030
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3031 bytes_obj = PyBytes_FromString(bytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3032 PyMem_Free(bytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3033 return bytes_obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3034 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3035 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3036 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3037
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3038 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3039 PyUnicode_AsEncodedString(PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3040 const char *encoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3041 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3042 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3043 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3044 char lower[11]; /* Enough for any encoding shortcut */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3045
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3046 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3047 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3048 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3049 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3050
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3051 /* Shortcuts for common default encodings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3052 if (normalize_encoding(encoding, lower, sizeof(lower))) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3053 if ((strcmp(lower, "utf-8") == 0) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3054 (strcmp(lower, "utf8") == 0))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3055 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3056 if (errors == NULL || strcmp(errors, "strict") == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3057 return _PyUnicode_AsUTF8String(unicode, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3058 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3059 return _PyUnicode_AsUTF8String(unicode, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3060 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3061 else if ((strcmp(lower, "latin-1") == 0) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3062 (strcmp(lower, "latin1") == 0) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3063 (strcmp(lower, "iso-8859-1") == 0))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3064 return _PyUnicode_AsLatin1String(unicode, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3065 #ifdef HAVE_MBCS
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3066 else if (strcmp(lower, "mbcs") == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3067 return PyUnicode_EncodeCodePage(CP_ACP, unicode, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3068 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3069 else if (strcmp(lower, "ascii") == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3070 return _PyUnicode_AsASCIIString(unicode, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3071 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3072
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3073 /* Encode via the codec registry */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3074 v = PyCodec_Encode(unicode, encoding, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3075 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3076 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3077
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3078 /* The normal path */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3079 if (PyBytes_Check(v))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3080 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3081
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3082 /* If the codec returns a buffer, raise a warning and convert to bytes */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3083 if (PyByteArray_Check(v)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3084 int error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3085 PyObject *b;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3086
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3087 error = PyErr_WarnFormat(PyExc_RuntimeWarning, 1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3088 "encoder %s returned bytearray instead of bytes",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3089 encoding);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3090 if (error) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3091 Py_DECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3092 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3093 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3094
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3095 b = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3096 Py_DECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3097 return b;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3098 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3099
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3100 PyErr_Format(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3101 "encoder did not return a bytes object (type=%.400s)",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3102 Py_TYPE(v)->tp_name);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3103 Py_DECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3104 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3105 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3106
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3107 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3108 PyUnicode_AsEncodedUnicode(PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3109 const char *encoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3110 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3111 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3112 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3113
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3114 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3115 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3116 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3117 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3118
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3119 if (encoding == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3120 encoding = PyUnicode_GetDefaultEncoding();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3121
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3122 /* Encode via the codec registry */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3123 v = PyCodec_Encode(unicode, encoding, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3124 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3125 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3126 if (!PyUnicode_Check(v)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3127 PyErr_Format(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3128 "encoder did not return an str object (type=%.400s)",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3129 Py_TYPE(v)->tp_name);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3130 Py_DECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3131 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3132 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3133 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3134
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3135 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3136 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3137 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3138
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3139 PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3140 PyUnicode_DecodeFSDefault(const char *s) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3141 Py_ssize_t size = (Py_ssize_t)strlen(s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3142 return PyUnicode_DecodeFSDefaultAndSize(s, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3143 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3144
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3145 PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3146 PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3147 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3148 #ifdef HAVE_MBCS
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3149 return PyUnicode_DecodeMBCS(s, size, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3150 #elif defined(__APPLE__)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3151 return PyUnicode_DecodeUTF8(s, size, "surrogateescape");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3152 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3153 PyInterpreterState *interp = PyThreadState_GET()->interp;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3154 /* Bootstrap check: if the filesystem codec is implemented in Python, we
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3155 cannot use it to encode and decode filenames before it is loaded. Load
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3156 the Python codec requires to encode at least its own filename. Use the C
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3157 version of the locale codec until the codec registry is initialized and
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3158 the Python codec is loaded.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3159
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3160 Py_FileSystemDefaultEncoding is shared between all interpreters, we
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3161 cannot only rely on it: check also interp->fscodec_initialized for
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3162 subinterpreters. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3163 if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3164 return PyUnicode_Decode(s, size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3165 Py_FileSystemDefaultEncoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3166 "surrogateescape");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3167 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3168 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3169 /* locale encoding with surrogateescape */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3170 wchar_t *wchar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3171 PyObject *unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3172 size_t len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3173
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3174 if (s[size] != '\0' || size != strlen(s)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3175 PyErr_SetString(PyExc_TypeError, "embedded NUL character");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3176 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3177 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3178
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3179 wchar = _Py_char2wchar(s, &len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3180 if (wchar == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3181 return PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3182
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3183 unicode = PyUnicode_FromWideChar(wchar, len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3184 PyMem_Free(wchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3185 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3186 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3187 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3188 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3189
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3190
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3191 int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3192 PyUnicode_FSConverter(PyObject* arg, void* addr)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3193 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3194 PyObject *output = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3195 Py_ssize_t size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3196 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3197 if (arg == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3198 Py_DECREF(*(PyObject**)addr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3199 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3200 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3201 if (PyBytes_Check(arg)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3202 output = arg;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3203 Py_INCREF(output);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3204 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3205 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3206 arg = PyUnicode_FromObject(arg);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3207 if (!arg)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3208 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3209 output = PyUnicode_EncodeFSDefault(arg);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3210 Py_DECREF(arg);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3211 if (!output)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3212 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3213 if (!PyBytes_Check(output)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3214 Py_DECREF(output);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3215 PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3216 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3217 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3218 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3219 size = PyBytes_GET_SIZE(output);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3220 data = PyBytes_AS_STRING(output);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3221 if (size != strlen(data)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3222 PyErr_SetString(PyExc_TypeError, "embedded NUL character");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3223 Py_DECREF(output);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3224 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3225 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3226 *(PyObject**)addr = output;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3227 return Py_CLEANUP_SUPPORTED;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3228 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3229
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3230
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3231 int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3232 PyUnicode_FSDecoder(PyObject* arg, void* addr)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3233 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3234 PyObject *output = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3235 if (arg == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3236 Py_DECREF(*(PyObject**)addr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3237 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3238 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3239 if (PyUnicode_Check(arg)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3240 if (PyUnicode_READY(arg))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3241 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3242 output = arg;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3243 Py_INCREF(output);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3244 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3245 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3246 arg = PyBytes_FromObject(arg);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3247 if (!arg)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3248 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3249 output = PyUnicode_DecodeFSDefaultAndSize(PyBytes_AS_STRING(arg),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3250 PyBytes_GET_SIZE(arg));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3251 Py_DECREF(arg);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3252 if (!output)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3253 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3254 if (!PyUnicode_Check(output)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3255 Py_DECREF(output);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3256 PyErr_SetString(PyExc_TypeError, "decoder failed to return unicode");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3257 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3258 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3259 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3260 if (PyUnicode_READY(output) < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3261 Py_DECREF(output);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3262 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3263 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3264 if (findchar(PyUnicode_DATA(output), PyUnicode_KIND(output),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3265 PyUnicode_GET_LENGTH(output), 0, 1) >= 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3266 PyErr_SetString(PyExc_TypeError, "embedded NUL character");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3267 Py_DECREF(output);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3268 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3269 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3270 *(PyObject**)addr = output;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3271 return Py_CLEANUP_SUPPORTED;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3272 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3273
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3274
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3275 char*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3276 PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3277 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3278 PyObject *bytes;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3279
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3280 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3281 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3282 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3283 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3284 if (PyUnicode_READY(unicode) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3285 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3286
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3287 if (PyUnicode_UTF8(unicode) == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3288 assert(!PyUnicode_IS_COMPACT_ASCII(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3289 bytes = _PyUnicode_AsUTF8String(unicode, "strict");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3290 if (bytes == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3291 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3292 _PyUnicode_UTF8(unicode) = PyObject_MALLOC(PyBytes_GET_SIZE(bytes) + 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3293 if (_PyUnicode_UTF8(unicode) == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3294 Py_DECREF(bytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3295 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3296 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3297 _PyUnicode_UTF8_LENGTH(unicode) = PyBytes_GET_SIZE(bytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3298 Py_MEMCPY(_PyUnicode_UTF8(unicode),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3299 PyBytes_AS_STRING(bytes),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3300 _PyUnicode_UTF8_LENGTH(unicode) + 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3301 Py_DECREF(bytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3302 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3303
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3304 if (psize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3305 *psize = PyUnicode_UTF8_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3306 return PyUnicode_UTF8(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3307 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3308
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3309 char*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3310 PyUnicode_AsUTF8(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3311 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3312 return PyUnicode_AsUTF8AndSize(unicode, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3313 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3314
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3315 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3316 static int unicode_as_unicode_calls = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3317 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3318
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3319
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3320 Py_UNICODE *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3321 PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3322 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3323 const unsigned char *one_byte;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3324 #if SIZEOF_WCHAR_T == 4
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3325 const Py_UCS2 *two_bytes;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3326 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3327 const Py_UCS4 *four_bytes;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3328 const Py_UCS4 *ucs4_end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3329 Py_ssize_t num_surrogates;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3330 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3331 wchar_t *w;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3332 wchar_t *wchar_end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3333
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3334 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3335 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3336 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3337 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3338 if (_PyUnicode_WSTR(unicode) == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3339 /* Non-ASCII compact unicode object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3340 assert(_PyUnicode_KIND(unicode) != 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3341 assert(PyUnicode_IS_READY(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3342
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3343 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3344 ++unicode_as_unicode_calls;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3345 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3346
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3347 if (PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3348 #if SIZEOF_WCHAR_T == 2
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3349 four_bytes = PyUnicode_4BYTE_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3350 ucs4_end = four_bytes + _PyUnicode_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3351 num_surrogates = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3352
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3353 for (; four_bytes < ucs4_end; ++four_bytes) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3354 if (*four_bytes > 0xFFFF)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3355 ++num_surrogates;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3356 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3357
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3358 _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3359 sizeof(wchar_t) * (_PyUnicode_LENGTH(unicode) + 1 + num_surrogates));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3360 if (!_PyUnicode_WSTR(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3361 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3362 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3363 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3364 _PyUnicode_WSTR_LENGTH(unicode) = _PyUnicode_LENGTH(unicode) + num_surrogates;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3365
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3366 w = _PyUnicode_WSTR(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3367 wchar_end = w + _PyUnicode_WSTR_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3368 four_bytes = PyUnicode_4BYTE_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3369 for (; four_bytes < ucs4_end; ++four_bytes, ++w) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3370 if (*four_bytes > 0xFFFF) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3371 assert(*four_bytes <= MAX_UNICODE);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3372 /* encode surrogate pair in this case */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3373 *w++ = Py_UNICODE_HIGH_SURROGATE(*four_bytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3374 *w = Py_UNICODE_LOW_SURROGATE(*four_bytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3375 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3376 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3377 *w = *four_bytes;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3378
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3379 if (w > wchar_end) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3380 assert(0 && "Miscalculated string end");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3381 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3382 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3383 *w = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3384 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3385 /* sizeof(wchar_t) == 4 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3386 Py_FatalError("Impossible unicode object state, wstr and str "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3387 "should share memory already.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3388 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3389 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3390 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3391 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3392 _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3393 (_PyUnicode_LENGTH(unicode) + 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3394 if (!_PyUnicode_WSTR(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3395 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3396 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3397 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3398 if (!PyUnicode_IS_COMPACT_ASCII(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3399 _PyUnicode_WSTR_LENGTH(unicode) = _PyUnicode_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3400 w = _PyUnicode_WSTR(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3401 wchar_end = w + _PyUnicode_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3402
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3403 if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3404 one_byte = PyUnicode_1BYTE_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3405 for (; w < wchar_end; ++one_byte, ++w)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3406 *w = *one_byte;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3407 /* null-terminate the wstr */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3408 *w = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3409 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3410 else if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3411 #if SIZEOF_WCHAR_T == 4
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3412 two_bytes = PyUnicode_2BYTE_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3413 for (; w < wchar_end; ++two_bytes, ++w)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3414 *w = *two_bytes;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3415 /* null-terminate the wstr */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3416 *w = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3417 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3418 /* sizeof(wchar_t) == 2 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3419 PyObject_FREE(_PyUnicode_WSTR(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3420 _PyUnicode_WSTR(unicode) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3421 Py_FatalError("Impossible unicode object state, wstr "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3422 "and str should share memory already.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3423 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3424 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3425 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3426 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3427 assert(0 && "This should never happen.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3428 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3429 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3430 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3431 if (size != NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3432 *size = PyUnicode_WSTR_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3433 return _PyUnicode_WSTR(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3434 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3435
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3436 Py_UNICODE *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3437 PyUnicode_AsUnicode(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3438 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3439 return PyUnicode_AsUnicodeAndSize(unicode, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3440 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3441
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3442
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3443 Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3444 PyUnicode_GetSize(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3445 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3446 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3447 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3448 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3449 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3450 return PyUnicode_GET_SIZE(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3451
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3452 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3453 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3454 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3455
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3456 Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3457 PyUnicode_GetLength(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3458 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3459 if (!PyUnicode_Check(unicode) || PyUnicode_READY(unicode) == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3460 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3461 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3462 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3463
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3464 return PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3465 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3466
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3467 Py_UCS4
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3468 PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3469 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3470 if (!PyUnicode_Check(unicode) || PyUnicode_READY(unicode) == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3471 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3472 return (Py_UCS4)-1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3473 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3474 if (index < 0 || index >= _PyUnicode_LENGTH(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3475 PyErr_SetString(PyExc_IndexError, "string index out of range");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3476 return (Py_UCS4)-1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3477 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3478 return PyUnicode_READ_CHAR(unicode, index);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3479 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3480
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3481 int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3482 PyUnicode_WriteChar(PyObject *unicode, Py_ssize_t index, Py_UCS4 ch)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3483 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3484 if (!PyUnicode_Check(unicode) || !PyUnicode_IS_COMPACT(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3485 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3486 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3487 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3488 if (index < 0 || index >= _PyUnicode_LENGTH(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3489 PyErr_SetString(PyExc_IndexError, "string index out of range");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3490 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3491 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3492 if (_PyUnicode_Dirty(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3493 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3494 PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3495 index, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3496 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3497 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3498
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3499 const char *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3500 PyUnicode_GetDefaultEncoding(void)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3501 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3502 return "utf-8";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3503 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3504
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3505 /* create or adjust a UnicodeDecodeError */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3506 static void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3507 make_decode_exception(PyObject **exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3508 const char *encoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3509 const char *input, Py_ssize_t length,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3510 Py_ssize_t startpos, Py_ssize_t endpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3511 const char *reason)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3512 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3513 if (*exceptionObject == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3514 *exceptionObject = PyUnicodeDecodeError_Create(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3515 encoding, input, length, startpos, endpos, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3516 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3517 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3518 if (PyUnicodeDecodeError_SetStart(*exceptionObject, startpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3519 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3520 if (PyUnicodeDecodeError_SetEnd(*exceptionObject, endpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3521 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3522 if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3523 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3524 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3525 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3526
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3527 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3528 Py_DECREF(*exceptionObject);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3529 *exceptionObject = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3530 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3531
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3532 /* error handling callback helper:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3533 build arguments, call the callback and check the arguments,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3534 if no exception occurred, copy the replacement to the output
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3535 and adjust various state variables.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3536 return 0 on success, -1 on error
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3537 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3538
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3539 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3540 unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3541 const char *encoding, const char *reason,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3542 const char **input, const char **inend, Py_ssize_t *startinpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3543 Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3544 PyObject **output, Py_ssize_t *outpos)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3545 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3546 static char *argparse = "O!n;decoding error handler must return (str, int) tuple";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3547
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3548 PyObject *restuple = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3549 PyObject *repunicode = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3550 Py_ssize_t outsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3551 Py_ssize_t insize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3552 Py_ssize_t requiredsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3553 Py_ssize_t newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3554 PyObject *inputobj = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3555 int res = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3556
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3557 if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3558 outsize = PyUnicode_GET_LENGTH(*output);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3559 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3560 outsize = _PyUnicode_WSTR_LENGTH(*output);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3561
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3562 if (*errorHandler == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3563 *errorHandler = PyCodec_LookupError(errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3564 if (*errorHandler == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3565 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3566 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3567
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3568 make_decode_exception(exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3569 encoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3570 *input, *inend - *input,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3571 *startinpos, *endinpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3572 reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3573 if (*exceptionObject == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3574 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3575
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3576 restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3577 if (restuple == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3578 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3579 if (!PyTuple_Check(restuple)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3580 PyErr_SetString(PyExc_TypeError, &argparse[4]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3581 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3582 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3583 if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3584 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3585 if (PyUnicode_READY(repunicode) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3586 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3587
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3588 /* Copy back the bytes variables, which might have been modified by the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3589 callback */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3590 inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3591 if (!inputobj)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3592 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3593 if (!PyBytes_Check(inputobj)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3594 PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3595 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3596 *input = PyBytes_AS_STRING(inputobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3597 insize = PyBytes_GET_SIZE(inputobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3598 *inend = *input + insize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3599 /* we can DECREF safely, as the exception has another reference,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3600 so the object won't go away. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3601 Py_DECREF(inputobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3602
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3603 if (newpos<0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3604 newpos = insize+newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3605 if (newpos<0 || newpos>insize) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3606 PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3607 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3608 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3609
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3610 if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3611 /* need more space? (at least enough for what we
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3612 have+the replacement+the rest of the string (starting
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3613 at the new input position), so we won't have to check space
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3614 when there are no errors in the rest of the string) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3615 Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3616 requiredsize = *outpos + replen + insize-newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3617 if (requiredsize > outsize) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3618 if (requiredsize<2*outsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3619 requiredsize = 2*outsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3620 if (unicode_resize(output, requiredsize) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3621 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3622 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3623 if (unicode_widen(output, PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3624 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3625 copy_characters(*output, *outpos, repunicode, 0, replen);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3626 *outpos += replen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3627 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3628 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3629 wchar_t *repwstr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3630 Py_ssize_t repwlen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3631 repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3632 if (repwstr == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3633 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3634 /* need more space? (at least enough for what we
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3635 have+the replacement+the rest of the string (starting
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3636 at the new input position), so we won't have to check space
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3637 when there are no errors in the rest of the string) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3638 requiredsize = *outpos + repwlen + insize-newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3639 if (requiredsize > outsize) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3640 if (requiredsize < 2*outsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3641 requiredsize = 2*outsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3642 if (unicode_resize(output, requiredsize) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3643 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3644 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3645 wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3646 *outpos += repwlen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3647 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3648 *endinpos = newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3649 *inptr = *input + newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3650
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3651 /* we made it! */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3652 res = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3653
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3654 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3655 Py_XDECREF(restuple);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3656 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3657 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3658
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3659 /* --- UTF-7 Codec -------------------------------------------------------- */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3660
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3661 /* See RFC2152 for details. We encode conservatively and decode liberally. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3662
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3663 /* Three simple macros defining base-64. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3664
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3665 /* Is c a base-64 character? */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3666
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3667 #define IS_BASE64(c) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3668 (((c) >= 'A' && (c) <= 'Z') || \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3669 ((c) >= 'a' && (c) <= 'z') || \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3670 ((c) >= '0' && (c) <= '9') || \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3671 (c) == '+' || (c) == '/')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3672
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3673 /* given that c is a base-64 character, what is its base-64 value? */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3674
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3675 #define FROM_BASE64(c) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3676 (((c) >= 'A' && (c) <= 'Z') ? (c) - 'A' : \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3677 ((c) >= 'a' && (c) <= 'z') ? (c) - 'a' + 26 : \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3678 ((c) >= '0' && (c) <= '9') ? (c) - '0' + 52 : \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3679 (c) == '+' ? 62 : 63)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3680
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3681 /* What is the base-64 character of the bottom 6 bits of n? */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3682
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3683 #define TO_BASE64(n) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3684 ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f])
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3685
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3686 /* DECODE_DIRECT: this byte encountered in a UTF-7 string should be
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3687 * decoded as itself. We are permissive on decoding; the only ASCII
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3688 * byte not decoding to itself is the + which begins a base64
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3689 * string. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3690
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3691 #define DECODE_DIRECT(c) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3692 ((c) <= 127 && (c) != '+')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3693
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3694 /* The UTF-7 encoder treats ASCII characters differently according to
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3695 * whether they are Set D, Set O, Whitespace, or special (i.e. none of
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3696 * the above). See RFC2152. This array identifies these different
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3697 * sets:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3698 * 0 : "Set D"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3699 * alphanumeric and '(),-./:?
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3700 * 1 : "Set O"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3701 * !"#$%&*;<=>@[]^_`{|}
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3702 * 2 : "whitespace"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3703 * ht nl cr sp
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3704 * 3 : special (must be base64 encoded)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3705 * everything else (i.e. +\~ and non-printing codes 0-8 11-12 14-31 127)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3706 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3707
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3708 static
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3709 char utf7_category[128] = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3710 /* nul soh stx etx eot enq ack bel bs ht nl vt np cr so si */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3711 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3712 /* dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3713 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3714 /* sp ! " # $ % & ' ( ) * + , - . / */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3715 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3716 /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3717 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3718 /* @ A B C D E F G H I J K L M N O */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3719 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3720 /* P Q R S T U V W X Y Z [ \ ] ^ _ */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3721 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3722 /* ` a b c d e f g h i j k l m n o */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3723 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3724 /* p q r s t u v w x y z { | } ~ del */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3725 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3726 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3727
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3728 /* ENCODE_DIRECT: this character should be encoded as itself. The
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3729 * answer depends on whether we are encoding set O as itself, and also
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3730 * on whether we are encoding whitespace as itself. RFC2152 makes it
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3731 * clear that the answers to these questions vary between
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3732 * applications, so this code needs to be flexible. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3733
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3734 #define ENCODE_DIRECT(c, directO, directWS) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3735 ((c) < 128 && (c) > 0 && \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3736 ((utf7_category[(c)] == 0) || \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3737 (directWS && (utf7_category[(c)] == 2)) || \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3738 (directO && (utf7_category[(c)] == 1))))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3739
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3740 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3741 PyUnicode_DecodeUTF7(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3742 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3743 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3744 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3745 return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3746 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3747
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3748 /* The decoder. The only state we preserve is our read position,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3749 * i.e. how many characters we have consumed. So if we end in the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3750 * middle of a shift sequence we have to back off the read position
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3751 * and the output to the beginning of the sequence, otherwise we lose
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3752 * all the shift state (seen bits, number of bits seen, high
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3753 * surrogate). */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3754
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3755 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3756 PyUnicode_DecodeUTF7Stateful(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3757 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3758 const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3759 Py_ssize_t *consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3760 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3761 const char *starts = s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3762 Py_ssize_t startinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3763 Py_ssize_t endinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3764 Py_ssize_t outpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3765 const char *e;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3766 PyObject *unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3767 const char *errmsg = "";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3768 int inShift = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3769 Py_ssize_t shiftOutStart;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3770 unsigned int base64bits = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3771 unsigned long base64buffer = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3772 Py_UCS4 surrogate = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3773 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3774 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3775
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3776 /* Start off assuming it's all ASCII. Widen later as necessary. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3777 unicode = PyUnicode_New(size, 127);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3778 if (!unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3779 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3780 if (size == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3781 if (consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3782 *consumed = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3783 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3784 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3785
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3786 shiftOutStart = outpos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3787 e = s + size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3788
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3789 while (s < e) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3790 Py_UCS4 ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3791 restart:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3792 ch = (unsigned char) *s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3793
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3794 if (inShift) { /* in a base-64 section */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3795 if (IS_BASE64(ch)) { /* consume a base-64 character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3796 base64buffer = (base64buffer << 6) | FROM_BASE64(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3797 base64bits += 6;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3798 s++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3799 if (base64bits >= 16) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3800 /* we have enough bits for a UTF-16 value */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3801 Py_UCS4 outCh = (Py_UCS4)(base64buffer >> (base64bits-16));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3802 base64bits -= 16;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3803 base64buffer &= (1 << base64bits) - 1; /* clear high bits */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3804 if (surrogate) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3805 /* expecting a second surrogate */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3806 if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3807 Py_UCS4 ch2 = Py_UNICODE_JOIN_SURROGATES(surrogate, outCh);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3808 if (unicode_putchar(&unicode, &outpos, ch2) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3809 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3810 surrogate = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3811 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3812 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3813 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3814 if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3815 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3816 surrogate = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3817 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3818 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3819 if (Py_UNICODE_IS_HIGH_SURROGATE(outCh)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3820 /* first surrogate */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3821 surrogate = outCh;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3822 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3823 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3824 if (unicode_putchar(&unicode, &outpos, outCh) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3825 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3826 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3827 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3828 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3829 else { /* now leaving a base-64 section */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3830 inShift = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3831 s++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3832 if (surrogate) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3833 if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3834 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3835 surrogate = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3836 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3837 if (base64bits > 0) { /* left-over bits */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3838 if (base64bits >= 6) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3839 /* We've seen at least one base-64 character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3840 errmsg = "partial character in shift sequence";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3841 goto utf7Error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3842 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3843 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3844 /* Some bits remain; they should be zero */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3845 if (base64buffer != 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3846 errmsg = "non-zero padding bits in shift sequence";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3847 goto utf7Error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3848 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3849 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3850 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3851 if (ch != '-') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3852 /* '-' is absorbed; other terminating
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3853 characters are preserved */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3854 if (unicode_putchar(&unicode, &outpos, ch) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3855 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3856 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3857 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3858 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3859 else if ( ch == '+' ) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3860 startinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3861 s++; /* consume '+' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3862 if (s < e && *s == '-') { /* '+-' encodes '+' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3863 s++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3864 if (unicode_putchar(&unicode, &outpos, '+') < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3865 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3866 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3867 else { /* begin base64-encoded section */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3868 inShift = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3869 shiftOutStart = outpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3870 base64bits = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3871 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3872 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3873 else if (DECODE_DIRECT(ch)) { /* character decodes as itself */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3874 if (unicode_putchar(&unicode, &outpos, ch) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3875 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3876 s++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3877 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3878 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3879 startinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3880 s++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3881 errmsg = "unexpected special character";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3882 goto utf7Error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3883 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3884 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3885 utf7Error:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3886 endinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3887 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3888 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3889 "utf7", errmsg,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3890 &starts, &e, &startinpos, &endinpos, &exc, &s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3891 &unicode, &outpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3892 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3893 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3894
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3895 /* end of string */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3896
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3897 if (inShift && !consumed) { /* in shift sequence, no more to follow */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3898 /* if we're in an inconsistent state, that's an error */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3899 if (surrogate ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3900 (base64bits >= 6) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3901 (base64bits > 0 && base64buffer != 0)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3902 endinpos = size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3903 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3904 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3905 "utf7", "unterminated shift sequence",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3906 &starts, &e, &startinpos, &endinpos, &exc, &s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3907 &unicode, &outpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3908 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3909 if (s < e)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3910 goto restart;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3911 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3912 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3913
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3914 /* return state */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3915 if (consumed) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3916 if (inShift) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3917 outpos = shiftOutStart; /* back off output */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3918 *consumed = startinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3919 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3920 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3921 *consumed = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3922 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3923 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3924
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3925 if (unicode_resize(&unicode, outpos) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3926 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3927
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3928 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3929 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3930 return unicode_result(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3931
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3932 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3933 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3934 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3935 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3936 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3937 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3938
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3939
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3940 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3941 _PyUnicode_EncodeUTF7(PyObject *str,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3942 int base64SetO,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3943 int base64WhiteSpace,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3944 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3945 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3946 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3947 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3948 Py_ssize_t len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3949 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3950 Py_ssize_t allocated;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3951 int inShift = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3952 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3953 unsigned int base64bits = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3954 unsigned long base64buffer = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3955 char * out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3956 char * start;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3957
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3958 if (PyUnicode_READY(str) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3959 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3960 kind = PyUnicode_KIND(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3961 data = PyUnicode_DATA(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3962 len = PyUnicode_GET_LENGTH(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3963
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3964 if (len == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3965 return PyBytes_FromStringAndSize(NULL, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3966
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3967 /* It might be possible to tighten this worst case */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3968 allocated = 8 * len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3969 if (allocated / 8 != len)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3970 return PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3971
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3972 v = PyBytes_FromStringAndSize(NULL, allocated);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3973 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3974 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3975
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3976 start = out = PyBytes_AS_STRING(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3977 for (i = 0; i < len; ++i) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3978 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3979
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3980 if (inShift) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3981 if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3982 /* shifting out */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3983 if (base64bits) { /* output remaining bits */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3984 *out++ = TO_BASE64(base64buffer << (6-base64bits));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3985 base64buffer = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3986 base64bits = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3987 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3988 inShift = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3989 /* Characters not in the BASE64 set implicitly unshift the sequence
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3990 so no '-' is required, except if the character is itself a '-' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3991 if (IS_BASE64(ch) || ch == '-') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3992 *out++ = '-';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3993 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3994 *out++ = (char) ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3995 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3996 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3997 goto encode_char;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3998 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
3999 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4000 else { /* not in a shift sequence */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4001 if (ch == '+') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4002 *out++ = '+';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4003 *out++ = '-';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4004 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4005 else if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4006 *out++ = (char) ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4007 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4008 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4009 *out++ = '+';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4010 inShift = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4011 goto encode_char;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4012 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4013 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4014 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4015 encode_char:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4016 if (ch >= 0x10000) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4017 assert(ch <= MAX_UNICODE);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4018
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4019 /* code first surrogate */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4020 base64bits += 16;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4021 base64buffer = (base64buffer << 16) | 0xd800 | ((ch-0x10000) >> 10);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4022 while (base64bits >= 6) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4023 *out++ = TO_BASE64(base64buffer >> (base64bits-6));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4024 base64bits -= 6;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4025 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4026 /* prepare second surrogate */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4027 ch = Py_UNICODE_LOW_SURROGATE(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4028 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4029 base64bits += 16;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4030 base64buffer = (base64buffer << 16) | ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4031 while (base64bits >= 6) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4032 *out++ = TO_BASE64(base64buffer >> (base64bits-6));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4033 base64bits -= 6;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4034 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4035 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4036 if (base64bits)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4037 *out++= TO_BASE64(base64buffer << (6-base64bits) );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4038 if (inShift)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4039 *out++ = '-';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4040 if (_PyBytes_Resize(&v, out - start) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4041 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4042 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4043 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4044 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4045 PyUnicode_EncodeUTF7(const Py_UNICODE *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4046 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4047 int base64SetO,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4048 int base64WhiteSpace,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4049 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4050 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4051 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4052 PyObject *tmp = PyUnicode_FromUnicode(s, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4053 if (tmp == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4054 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4055 result = _PyUnicode_EncodeUTF7(tmp, base64SetO,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4056 base64WhiteSpace, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4057 Py_DECREF(tmp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4058 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4059 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4060
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4061 #undef IS_BASE64
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4062 #undef FROM_BASE64
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4063 #undef TO_BASE64
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4064 #undef DECODE_DIRECT
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4065 #undef ENCODE_DIRECT
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4066
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4067 /* --- UTF-8 Codec -------------------------------------------------------- */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4068
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4069 static
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4070 char utf8_code_length[256] = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4071 /* Map UTF-8 encoded prefix byte to sequence length. Zero means
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4072 illegal prefix. See RFC 3629 for details */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4073 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00-0F */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4074 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4075 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4076 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4077 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4078 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4079 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4080 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 70-7F */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4081 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80-8F */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4082 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4083 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4084 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0-BF */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4085 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* C0-C1 + C2-CF */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4086 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* D0-DF */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4087 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* E0-EF */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4088 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F0-F4 + F5-FF */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4089 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4090
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4091 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4092 PyUnicode_DecodeUTF8(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4093 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4094 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4095 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4096 return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4097 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4098
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4099 #include "stringlib/ucs1lib.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4100 #include "stringlib/codecs.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4101 #include "stringlib/undef.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4102
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4103 #include "stringlib/ucs2lib.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4104 #include "stringlib/codecs.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4105 #include "stringlib/undef.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4106
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4107 #include "stringlib/ucs4lib.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4108 #include "stringlib/codecs.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4109 #include "stringlib/undef.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4110
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4111 /* Mask to check or force alignment of a pointer to C 'long' boundaries */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4112 #define LONG_PTR_MASK (size_t) (SIZEOF_LONG - 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4113
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4114 /* Mask to quickly check whether a C 'long' contains a
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4115 non-ASCII, UTF8-encoded char. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4116 #if (SIZEOF_LONG == 8)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4117 # define ASCII_CHAR_MASK 0x8080808080808080L
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4118 #elif (SIZEOF_LONG == 4)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4119 # define ASCII_CHAR_MASK 0x80808080L
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4120 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4121 # error C 'long' size should be either 4 or 8!
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4122 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4123
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4124 /* Scans a UTF-8 string and returns the maximum character to be expected
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4125 and the size of the decoded unicode string.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4126
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4127 This function doesn't check for errors, these checks are performed in
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4128 PyUnicode_DecodeUTF8Stateful.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4129 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4130 static Py_UCS4
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4131 utf8_max_char_size_and_char_count(const char *s, Py_ssize_t string_size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4132 Py_ssize_t *unicode_size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4133 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4134 Py_ssize_t char_count = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4135 const unsigned char *p = (const unsigned char *)s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4136 const unsigned char *end = p + string_size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4137 const unsigned char *aligned_end = (const unsigned char *) ((size_t) end & ~LONG_PTR_MASK);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4138
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4139 assert(unicode_size != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4140
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4141 /* By having a cascade of independent loops which fallback onto each
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4142 other, we minimize the amount of work done in the average loop
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4143 iteration, and we also maximize the CPU's ability to predict
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4144 branches correctly (because a given condition will have always the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4145 same boolean outcome except perhaps in the last iteration of the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4146 corresponding loop).
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4147 In the general case this brings us rather close to decoding
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4148 performance pre-PEP 393, despite the two-pass decoding.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4149
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4150 Note that the pure ASCII loop is not duplicated once a non-ASCII
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4151 character has been encountered. It is actually a pessimization (by
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4152 a significant factor) to use this loop on text with many non-ASCII
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4153 characters, and it is important to avoid bad performance on valid
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4154 utf-8 data (invalid utf-8 being a different can of worms).
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4155 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4156
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4157 /* ASCII */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4158 for (; p < end; ++p) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4159 /* Only check value if it's not a ASCII char... */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4160 if (*p < 0x80) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4161 /* Fast path, see below in PyUnicode_DecodeUTF8Stateful for
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4162 an explanation. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4163 if (!((size_t) p & LONG_PTR_MASK)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4164 /* Help register allocation */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4165 register const unsigned char *_p = p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4166 while (_p < aligned_end) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4167 unsigned long value = *(unsigned long *) _p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4168 if (value & ASCII_CHAR_MASK)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4169 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4170 _p += SIZEOF_LONG;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4171 char_count += SIZEOF_LONG;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4172 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4173 p = _p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4174 if (p == end)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4175 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4176 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4177 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4178 if (*p < 0x80)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4179 ++char_count;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4180 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4181 goto _ucs1loop;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4182 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4183 *unicode_size = char_count;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4184 return 127;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4185
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4186 _ucs1loop:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4187 for (; p < end; ++p) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4188 if (*p < 0xc4)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4189 char_count += ((*p & 0xc0) != 0x80);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4190 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4191 goto _ucs2loop;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4192 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4193 *unicode_size = char_count;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4194 return 255;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4195
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4196 _ucs2loop:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4197 for (; p < end; ++p) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4198 if (*p < 0xf0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4199 char_count += ((*p & 0xc0) != 0x80);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4200 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4201 goto _ucs4loop;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4202 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4203 *unicode_size = char_count;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4204 return 65535;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4205
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4206 _ucs4loop:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4207 for (; p < end; ++p) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4208 char_count += ((*p & 0xc0) != 0x80);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4209 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4210 *unicode_size = char_count;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4211 return 65537;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4212 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4213
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4214 /* Called when we encountered some error that wasn't detected in the original
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4215 scan, e.g. an encoded surrogate character. The original maxchar computation
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4216 may have been incorrect, so redo it. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4217 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4218 refit_partial_string(PyObject **unicode, int kind, void *data, Py_ssize_t n)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4219 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4220 PyObject *tmp;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4221 Py_ssize_t k;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4222 Py_UCS4 maxchar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4223 for (k = 0, maxchar = 0; k < n; k++)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4224 maxchar = Py_MAX(maxchar, PyUnicode_READ(kind, data, k));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4225 tmp = PyUnicode_New(PyUnicode_GET_LENGTH(*unicode), maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4226 if (tmp == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4227 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4228 PyUnicode_CopyCharacters(tmp, 0, *unicode, 0, n);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4229 Py_DECREF(*unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4230 *unicode = tmp;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4231 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4232 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4233
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4234 /* Similar to PyUnicode_WRITE but may attempt to widen and resize the string
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4235 in case of errors. Implicit parameters: unicode, kind, data, has_errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4236 onError. Potential resizing overallocates, so the result needs to shrink
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4237 at the end.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4238 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4239 #define WRITE_MAYBE_FAIL(index, value) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4240 do { \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4241 if (has_errors) { \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4242 Py_ssize_t pos = index; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4243 if (pos > PyUnicode_GET_LENGTH(unicode) && \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4244 unicode_resize(&unicode, pos + pos/8) < 0) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4245 goto onError; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4246 if (unicode_putchar(&unicode, &pos, value) < 0) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4247 goto onError; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4248 } \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4249 else \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4250 PyUnicode_WRITE(kind, data, index, value); \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4251 } while (0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4252
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4253 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4254 PyUnicode_DecodeUTF8Stateful(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4255 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4256 const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4257 Py_ssize_t *consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4258 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4259 const char *starts = s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4260 int n;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4261 int k;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4262 Py_ssize_t startinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4263 Py_ssize_t endinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4264 const char *e, *aligned_end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4265 PyObject *unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4266 const char *errmsg = "";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4267 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4268 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4269 Py_UCS4 maxchar = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4270 Py_ssize_t unicode_size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4271 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4272 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4273 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4274 int has_errors = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4275
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4276 if (size == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4277 if (consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4278 *consumed = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4279 return (PyObject *)PyUnicode_New(0, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4280 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4281 maxchar = utf8_max_char_size_and_char_count(s, size, &unicode_size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4282 /* When the string is ASCII only, just use memcpy and return.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4283 unicode_size may be != size if there is an incomplete UTF-8
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4284 sequence at the end of the ASCII block. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4285 if (maxchar < 128 && size == unicode_size) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4286 if (consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4287 *consumed = size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4288
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4289 if (size == 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4290 return get_latin1_char((unsigned char)s[0]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4291
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4292 unicode = PyUnicode_New(unicode_size, maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4293 if (!unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4294 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4295 Py_MEMCPY(PyUnicode_1BYTE_DATA(unicode), s, unicode_size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4296 assert(_PyUnicode_CheckConsistency(unicode, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4297 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4298 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4299
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4300 /* In case of errors, maxchar and size computation might be incorrect;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4301 code below refits and resizes as necessary. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4302 unicode = PyUnicode_New(unicode_size, maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4303 if (!unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4304 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4305 kind = PyUnicode_KIND(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4306 data = PyUnicode_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4307
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4308 /* Unpack UTF-8 encoded data */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4309 i = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4310 e = s + size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4311 switch (kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4312 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4313 has_errors = ucs1lib_utf8_try_decode(s, e, (Py_UCS1 *) data, &s, &i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4314 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4315 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4316 has_errors = ucs2lib_utf8_try_decode(s, e, (Py_UCS2 *) data, &s, &i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4317 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4318 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4319 has_errors = ucs4lib_utf8_try_decode(s, e, (Py_UCS4 *) data, &s, &i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4320 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4321 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4322 if (!has_errors) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4323 /* Ensure the unicode size calculation was correct */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4324 assert(i == unicode_size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4325 assert(s == e);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4326 if (consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4327 *consumed = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4328 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4329 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4330 /* Fall through to the generic decoding loop for the rest of
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4331 the string */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4332 if (refit_partial_string(&unicode, kind, data, i) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4333 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4334
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4335 aligned_end = (const char *) ((size_t) e & ~LONG_PTR_MASK);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4336
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4337 while (s < e) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4338 Py_UCS4 ch = (unsigned char)*s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4339
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4340 if (ch < 0x80) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4341 /* Fast path for runs of ASCII characters. Given that common UTF-8
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4342 input will consist of an overwhelming majority of ASCII
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4343 characters, we try to optimize for this case by checking
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4344 as many characters as a C 'long' can contain.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4345 First, check if we can do an aligned read, as most CPUs have
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4346 a penalty for unaligned reads.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4347 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4348 if (!((size_t) s & LONG_PTR_MASK)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4349 /* Help register allocation */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4350 register const char *_s = s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4351 register Py_ssize_t _i = i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4352 while (_s < aligned_end) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4353 /* Read a whole long at a time (either 4 or 8 bytes),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4354 and do a fast unrolled copy if it only contains ASCII
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4355 characters. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4356 unsigned long value = *(unsigned long *) _s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4357 if (value & ASCII_CHAR_MASK)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4358 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4359 WRITE_MAYBE_FAIL(_i+0, _s[0]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4360 WRITE_MAYBE_FAIL(_i+1, _s[1]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4361 WRITE_MAYBE_FAIL(_i+2, _s[2]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4362 WRITE_MAYBE_FAIL(_i+3, _s[3]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4363 #if (SIZEOF_LONG == 8)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4364 WRITE_MAYBE_FAIL(_i+4, _s[4]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4365 WRITE_MAYBE_FAIL(_i+5, _s[5]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4366 WRITE_MAYBE_FAIL(_i+6, _s[6]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4367 WRITE_MAYBE_FAIL(_i+7, _s[7]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4368 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4369 _s += SIZEOF_LONG;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4370 _i += SIZEOF_LONG;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4371 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4372 s = _s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4373 i = _i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4374 if (s == e)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4375 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4376 ch = (unsigned char)*s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4377 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4378 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4379
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4380 if (ch < 0x80) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4381 WRITE_MAYBE_FAIL(i++, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4382 s++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4383 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4384 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4385
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4386 n = utf8_code_length[ch];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4387
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4388 if (s + n > e) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4389 if (consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4390 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4391 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4392 errmsg = "unexpected end of data";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4393 startinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4394 endinpos = startinpos+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4395 for (k=1; (k < size-startinpos) && ((s[k]&0xC0) == 0x80); k++)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4396 endinpos++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4397 goto utf8Error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4398 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4399 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4400
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4401 switch (n) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4402
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4403 case 0:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4404 errmsg = "invalid start byte";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4405 startinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4406 endinpos = startinpos+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4407 goto utf8Error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4408
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4409 case 1:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4410 errmsg = "internal error";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4411 startinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4412 endinpos = startinpos+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4413 goto utf8Error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4414
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4415 case 2:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4416 if ((s[1] & 0xc0) != 0x80) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4417 errmsg = "invalid continuation byte";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4418 startinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4419 endinpos = startinpos + 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4420 goto utf8Error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4421 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4422 ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4423 assert ((ch > 0x007F) && (ch <= 0x07FF));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4424 WRITE_MAYBE_FAIL(i++, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4425 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4426
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4427 case 3:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4428 /* Decoding UTF-8 sequences in range \xed\xa0\x80-\xed\xbf\xbf
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4429 will result in surrogates in range d800-dfff. Surrogates are
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4430 not valid UTF-8 so they are rejected.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4431 See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4432 (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4433 if ((s[1] & 0xc0) != 0x80 ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4434 (s[2] & 0xc0) != 0x80 ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4435 ((unsigned char)s[0] == 0xE0 &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4436 (unsigned char)s[1] < 0xA0) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4437 ((unsigned char)s[0] == 0xED &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4438 (unsigned char)s[1] > 0x9F)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4439 errmsg = "invalid continuation byte";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4440 startinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4441 endinpos = startinpos + 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4442
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4443 /* if s[1] first two bits are 1 and 0, then the invalid
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4444 continuation byte is s[2], so increment endinpos by 1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4445 if not, s[1] is invalid and endinpos doesn't need to
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4446 be incremented. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4447 if ((s[1] & 0xC0) == 0x80)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4448 endinpos++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4449 goto utf8Error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4450 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4451 ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4452 assert ((ch > 0x07FF) && (ch <= 0xFFFF));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4453 WRITE_MAYBE_FAIL(i++, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4454 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4455
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4456 case 4:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4457 if ((s[1] & 0xc0) != 0x80 ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4458 (s[2] & 0xc0) != 0x80 ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4459 (s[3] & 0xc0) != 0x80 ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4460 ((unsigned char)s[0] == 0xF0 &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4461 (unsigned char)s[1] < 0x90) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4462 ((unsigned char)s[0] == 0xF4 &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4463 (unsigned char)s[1] > 0x8F)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4464 errmsg = "invalid continuation byte";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4465 startinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4466 endinpos = startinpos + 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4467 if ((s[1] & 0xC0) == 0x80) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4468 endinpos++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4469 if ((s[2] & 0xC0) == 0x80)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4470 endinpos++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4471 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4472 goto utf8Error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4473 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4474 ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) +
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4475 ((s[2] & 0x3f) << 6) + (s[3] & 0x3f);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4476 assert ((ch > 0xFFFF) && (ch <= MAX_UNICODE));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4477
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4478 WRITE_MAYBE_FAIL(i++, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4479 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4480 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4481 s += n;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4482 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4483
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4484 utf8Error:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4485 if (!has_errors) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4486 if (refit_partial_string(&unicode, kind, data, i) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4487 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4488 has_errors = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4489 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4490 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4491 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4492 "utf8", errmsg,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4493 &starts, &e, &startinpos, &endinpos, &exc, &s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4494 &unicode, &i))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4495 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4496 /* Update data because unicode_decode_call_errorhandler might have
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4497 re-created or resized the unicode object. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4498 data = PyUnicode_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4499 kind = PyUnicode_KIND(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4500 aligned_end = (const char *) ((size_t) e & ~LONG_PTR_MASK);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4501 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4502 /* Ensure the unicode_size calculation above was correct: */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4503 assert(has_errors || i == unicode_size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4504
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4505 if (consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4506 *consumed = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4507
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4508 /* Adjust length and ready string when it contained errors and
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4509 is of the old resizable kind. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4510 if (has_errors) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4511 if (PyUnicode_Resize(&unicode, i) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4512 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4513 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4514
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4515 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4516 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4517 assert(_PyUnicode_CheckConsistency(unicode, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4518 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4519
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4520 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4521 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4522 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4523 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4524 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4525 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4526
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4527 #undef WRITE_MAYBE_FAIL
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4528
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4529 #ifdef __APPLE__
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4530
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4531 /* Simplified UTF-8 decoder using surrogateescape error handler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4532 used to decode the command line arguments on Mac OS X. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4533
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4534 wchar_t*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4535 _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4536 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4537 int n;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4538 const char *e;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4539 wchar_t *unicode, *p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4540
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4541 /* Note: size will always be longer than the resulting Unicode
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4542 character count */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4543 if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4544 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4545 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4546 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4547 unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4548 if (!unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4549 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4550
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4551 /* Unpack UTF-8 encoded data */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4552 p = unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4553 e = s + size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4554 while (s < e) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4555 Py_UCS4 ch = (unsigned char)*s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4556
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4557 if (ch < 0x80) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4558 *p++ = (wchar_t)ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4559 s++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4560 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4561 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4562
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4563 n = utf8_code_length[ch];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4564 if (s + n > e) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4565 goto surrogateescape;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4566 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4567
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4568 switch (n) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4569 case 0:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4570 case 1:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4571 goto surrogateescape;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4572
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4573 case 2:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4574 if ((s[1] & 0xc0) != 0x80)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4575 goto surrogateescape;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4576 ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4577 assert ((ch > 0x007F) && (ch <= 0x07FF));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4578 *p++ = (wchar_t)ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4579 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4580
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4581 case 3:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4582 /* Decoding UTF-8 sequences in range \xed\xa0\x80-\xed\xbf\xbf
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4583 will result in surrogates in range d800-dfff. Surrogates are
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4584 not valid UTF-8 so they are rejected.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4585 See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4586 (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4587 if ((s[1] & 0xc0) != 0x80 ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4588 (s[2] & 0xc0) != 0x80 ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4589 ((unsigned char)s[0] == 0xE0 &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4590 (unsigned char)s[1] < 0xA0) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4591 ((unsigned char)s[0] == 0xED &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4592 (unsigned char)s[1] > 0x9F)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4593
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4594 goto surrogateescape;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4595 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4596 ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4597 assert ((ch > 0x07FF) && (ch <= 0xFFFF));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4598 *p++ = (wchar_t)ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4599 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4600
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4601 case 4:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4602 if ((s[1] & 0xc0) != 0x80 ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4603 (s[2] & 0xc0) != 0x80 ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4604 (s[3] & 0xc0) != 0x80 ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4605 ((unsigned char)s[0] == 0xF0 &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4606 (unsigned char)s[1] < 0x90) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4607 ((unsigned char)s[0] == 0xF4 &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4608 (unsigned char)s[1] > 0x8F)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4609 goto surrogateescape;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4610 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4611 ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) +
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4612 ((s[2] & 0x3f) << 6) + (s[3] & 0x3f);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4613 assert ((ch > 0xFFFF) && (ch <= MAX_UNICODE));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4614
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4615 #if SIZEOF_WCHAR_T == 4
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4616 *p++ = (wchar_t)ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4617 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4618 /* compute and append the two surrogates: */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4619 *p++ = (wchar_t)Py_UNICODE_HIGH_SURROGATE(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4620 *p++ = (wchar_t)Py_UNICODE_LOW_SURROGATE(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4621 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4622 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4623 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4624 s += n;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4625 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4626
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4627 surrogateescape:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4628 *p++ = 0xDC00 + ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4629 s++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4630 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4631 *p = L'\0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4632 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4633 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4634
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4635 #endif /* __APPLE__ */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4636
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4637 /* Primary internal function which creates utf8 encoded bytes objects.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4638
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4639 Allocation strategy: if the string is short, convert into a stack buffer
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4640 and allocate exactly as much space needed at the end. Else allocate the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4641 maximum possible needed (4 result bytes per Unicode character), and return
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4642 the excess memory at the end.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4643 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4644 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4645 _PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4646 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4647 #define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4648
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4649 Py_ssize_t i; /* index into s of next input byte */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4650 PyObject *result; /* result string object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4651 char *p; /* next free byte in output buffer */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4652 Py_ssize_t nallocated; /* number of result bytes allocated */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4653 Py_ssize_t nneeded; /* number of result bytes needed */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4654 char stackbuf[MAX_SHORT_UNICHARS * 4];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4655 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4656 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4657 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4658 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4659 Py_ssize_t size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4660 PyObject *rep = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4661
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4662 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4663 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4664 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4665 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4666
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4667 if (PyUnicode_READY(unicode) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4668 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4669
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4670 if (PyUnicode_UTF8(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4671 return PyBytes_FromStringAndSize(PyUnicode_UTF8(unicode),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4672 PyUnicode_UTF8_LENGTH(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4673
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4674 kind = PyUnicode_KIND(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4675 data = PyUnicode_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4676 size = PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4677
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4678 assert(size >= 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4679
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4680 if (size <= MAX_SHORT_UNICHARS) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4681 /* Write into the stack buffer; nallocated can't overflow.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4682 * At the end, we'll allocate exactly as much heap space as it
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4683 * turns out we need.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4684 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4685 nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4686 result = NULL; /* will allocate after we're done */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4687 p = stackbuf;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4688 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4689 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4690 /* Overallocate on the heap, and give the excess back at the end. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4691 nallocated = size * 4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4692 if (nallocated / 4 != size) /* overflow! */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4693 return PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4694 result = PyBytes_FromStringAndSize(NULL, nallocated);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4695 if (result == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4696 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4697 p = PyBytes_AS_STRING(result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4698 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4699
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4700 for (i = 0; i < size;) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4701 Py_UCS4 ch = PyUnicode_READ(kind, data, i++);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4702
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4703 if (ch < 0x80)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4704 /* Encode ASCII */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4705 *p++ = (char) ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4706
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4707 else if (ch < 0x0800) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4708 /* Encode Latin-1 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4709 *p++ = (char)(0xc0 | (ch >> 6));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4710 *p++ = (char)(0x80 | (ch & 0x3f));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4711 } else if (Py_UNICODE_IS_SURROGATE(ch)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4712 Py_ssize_t newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4713 Py_ssize_t repsize, k, startpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4714 startpos = i-1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4715 rep = unicode_encode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4716 errors, &errorHandler, "utf-8", "surrogates not allowed",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4717 unicode, &exc, startpos, startpos+1, &newpos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4718 if (!rep)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4719 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4720
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4721 if (PyBytes_Check(rep))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4722 repsize = PyBytes_GET_SIZE(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4723 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4724 repsize = PyUnicode_GET_LENGTH(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4725
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4726 if (repsize > 4) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4727 Py_ssize_t offset;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4728
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4729 if (result == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4730 offset = p - stackbuf;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4731 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4732 offset = p - PyBytes_AS_STRING(result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4733
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4734 if (nallocated > PY_SSIZE_T_MAX - repsize + 4) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4735 /* integer overflow */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4736 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4737 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4738 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4739 nallocated += repsize - 4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4740 if (result != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4741 if (_PyBytes_Resize(&result, nallocated) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4742 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4743 } else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4744 result = PyBytes_FromStringAndSize(NULL, nallocated);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4745 if (result == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4746 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4747 Py_MEMCPY(PyBytes_AS_STRING(result), stackbuf, offset);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4748 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4749 p = PyBytes_AS_STRING(result) + offset;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4750 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4751
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4752 if (PyBytes_Check(rep)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4753 char *prep = PyBytes_AS_STRING(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4754 for(k = repsize; k > 0; k--)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4755 *p++ = *prep++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4756 } else /* rep is unicode */ {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4757 enum PyUnicode_Kind repkind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4758 void *repdata;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4759
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4760 if (PyUnicode_READY(rep) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4761 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4762 repkind = PyUnicode_KIND(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4763 repdata = PyUnicode_DATA(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4764
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4765 for(k=0; k<repsize; k++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4766 Py_UCS4 c = PyUnicode_READ(repkind, repdata, k);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4767 if (0x80 <= c) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4768 raise_encode_exception(&exc, "utf-8",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4769 unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4770 i-1, i,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4771 "surrogates not allowed");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4772 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4773 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4774 *p++ = (char)c;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4775 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4776 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4777 Py_CLEAR(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4778 } else if (ch < 0x10000) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4779 *p++ = (char)(0xe0 | (ch >> 12));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4780 *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4781 *p++ = (char)(0x80 | (ch & 0x3f));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4782 } else /* ch >= 0x10000 */ {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4783 assert(ch <= MAX_UNICODE);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4784 /* Encode UCS4 Unicode ordinals */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4785 *p++ = (char)(0xf0 | (ch >> 18));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4786 *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4787 *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4788 *p++ = (char)(0x80 | (ch & 0x3f));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4789 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4790 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4791
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4792 if (result == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4793 /* This was stack allocated. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4794 nneeded = p - stackbuf;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4795 assert(nneeded <= nallocated);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4796 result = PyBytes_FromStringAndSize(stackbuf, nneeded);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4797 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4798 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4799 /* Cut back to size actually needed. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4800 nneeded = p - PyBytes_AS_STRING(result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4801 assert(nneeded <= nallocated);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4802 _PyBytes_Resize(&result, nneeded);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4803 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4804
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4805 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4806 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4807 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4808 error:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4809 Py_XDECREF(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4810 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4811 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4812 Py_XDECREF(result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4813 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4814
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4815 #undef MAX_SHORT_UNICHARS
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4816 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4817
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4818 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4819 PyUnicode_EncodeUTF8(const Py_UNICODE *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4820 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4821 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4822 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4823 PyObject *v, *unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4824
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4825 unicode = PyUnicode_FromUnicode(s, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4826 if (unicode == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4827 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4828 v = _PyUnicode_AsUTF8String(unicode, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4829 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4830 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4831 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4832
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4833 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4834 PyUnicode_AsUTF8String(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4835 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4836 return _PyUnicode_AsUTF8String(unicode, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4837 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4838
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4839 /* --- UTF-32 Codec ------------------------------------------------------- */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4840
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4841 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4842 PyUnicode_DecodeUTF32(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4843 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4844 const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4845 int *byteorder)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4846 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4847 return PyUnicode_DecodeUTF32Stateful(s, size, errors, byteorder, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4848 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4849
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4850 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4851 PyUnicode_DecodeUTF32Stateful(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4852 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4853 const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4854 int *byteorder,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4855 Py_ssize_t *consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4856 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4857 const char *starts = s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4858 Py_ssize_t startinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4859 Py_ssize_t endinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4860 Py_ssize_t outpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4861 PyObject *unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4862 const unsigned char *q, *e;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4863 int bo = 0; /* assume native ordering by default */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4864 const char *errmsg = "";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4865 /* Offsets from q for retrieving bytes in the right order. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4866 #ifdef BYTEORDER_IS_LITTLE_ENDIAN
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4867 int iorder[] = {0, 1, 2, 3};
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4868 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4869 int iorder[] = {3, 2, 1, 0};
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4870 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4871 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4872 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4873
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4874 q = (unsigned char *)s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4875 e = q + size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4876
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4877 if (byteorder)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4878 bo = *byteorder;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4879
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4880 /* Check for BOM marks (U+FEFF) in the input and adjust current
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4881 byte order setting accordingly. In native mode, the leading BOM
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4882 mark is skipped, in all other modes, it is copied to the output
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4883 stream as-is (giving a ZWNBSP character). */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4884 if (bo == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4885 if (size >= 4) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4886 const Py_UCS4 bom = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4887 (q[iorder[1]] << 8) | q[iorder[0]];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4888 #ifdef BYTEORDER_IS_LITTLE_ENDIAN
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4889 if (bom == 0x0000FEFF) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4890 q += 4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4891 bo = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4892 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4893 else if (bom == 0xFFFE0000) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4894 q += 4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4895 bo = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4896 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4897 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4898 if (bom == 0x0000FEFF) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4899 q += 4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4900 bo = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4901 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4902 else if (bom == 0xFFFE0000) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4903 q += 4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4904 bo = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4905 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4906 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4907 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4908 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4909
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4910 if (bo == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4911 /* force LE */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4912 iorder[0] = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4913 iorder[1] = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4914 iorder[2] = 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4915 iorder[3] = 3;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4916 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4917 else if (bo == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4918 /* force BE */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4919 iorder[0] = 3;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4920 iorder[1] = 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4921 iorder[2] = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4922 iorder[3] = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4923 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4924
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4925 /* This might be one to much, because of a BOM */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4926 unicode = PyUnicode_New((size+3)/4, 127);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4927 if (!unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4928 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4929 if (size == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4930 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4931 outpos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4932
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4933 while (q < e) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4934 Py_UCS4 ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4935 /* remaining bytes at the end? (size should be divisible by 4) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4936 if (e-q<4) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4937 if (consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4938 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4939 errmsg = "truncated data";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4940 startinpos = ((const char *)q)-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4941 endinpos = ((const char *)e)-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4942 goto utf32Error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4943 /* The remaining input chars are ignored if the callback
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4944 chooses to skip the input */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4945 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4946 ch = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4947 (q[iorder[1]] << 8) | q[iorder[0]];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4948
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4949 if (ch >= 0x110000)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4950 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4951 errmsg = "codepoint not in range(0x110000)";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4952 startinpos = ((const char *)q)-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4953 endinpos = startinpos+4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4954 goto utf32Error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4955 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4956 if (unicode_putchar(&unicode, &outpos, ch) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4957 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4958 q += 4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4959 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4960 utf32Error:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4961 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4962 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4963 "utf32", errmsg,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4964 &starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4965 &unicode, &outpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4966 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4967 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4968
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4969 if (byteorder)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4970 *byteorder = bo;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4971
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4972 if (consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4973 *consumed = (const char *)q-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4974
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4975 /* Adjust length */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4976 if (PyUnicode_Resize(&unicode, outpos) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4977 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4978
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4979 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4980 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4981 return unicode_result(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4982
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4983 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4984 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4985 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4986 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4987 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4988 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4989
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4990 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4991 _PyUnicode_EncodeUTF32(PyObject *str,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4992 const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4993 int byteorder)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4994 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4995 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4996 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4997 Py_ssize_t len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4998 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
4999 unsigned char *p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5000 Py_ssize_t nsize, bytesize, i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5001 /* Offsets from p for storing byte pairs in the right order. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5002 #ifdef BYTEORDER_IS_LITTLE_ENDIAN
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5003 int iorder[] = {0, 1, 2, 3};
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5004 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5005 int iorder[] = {3, 2, 1, 0};
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5006 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5007
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5008 #define STORECHAR(CH) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5009 do { \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5010 p[iorder[3]] = ((CH) >> 24) & 0xff; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5011 p[iorder[2]] = ((CH) >> 16) & 0xff; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5012 p[iorder[1]] = ((CH) >> 8) & 0xff; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5013 p[iorder[0]] = (CH) & 0xff; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5014 p += 4; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5015 } while(0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5016
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5017 if (!PyUnicode_Check(str)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5018 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5019 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5020 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5021 if (PyUnicode_READY(str) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5022 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5023 kind = PyUnicode_KIND(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5024 data = PyUnicode_DATA(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5025 len = PyUnicode_GET_LENGTH(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5026
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5027 nsize = len + (byteorder == 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5028 bytesize = nsize * 4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5029 if (bytesize / 4 != nsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5030 return PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5031 v = PyBytes_FromStringAndSize(NULL, bytesize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5032 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5033 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5034
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5035 p = (unsigned char *)PyBytes_AS_STRING(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5036 if (byteorder == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5037 STORECHAR(0xFEFF);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5038 if (len == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5039 goto done;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5040
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5041 if (byteorder == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5042 /* force LE */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5043 iorder[0] = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5044 iorder[1] = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5045 iorder[2] = 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5046 iorder[3] = 3;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5047 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5048 else if (byteorder == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5049 /* force BE */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5050 iorder[0] = 3;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5051 iorder[1] = 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5052 iorder[2] = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5053 iorder[3] = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5054 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5055
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5056 for (i = 0; i < len; i++)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5057 STORECHAR(PyUnicode_READ(kind, data, i));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5058
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5059 done:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5060 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5061 #undef STORECHAR
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5062 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5063
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5064 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5065 PyUnicode_EncodeUTF32(const Py_UNICODE *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5066 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5067 const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5068 int byteorder)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5069 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5070 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5071 PyObject *tmp = PyUnicode_FromUnicode(s, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5072 if (tmp == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5073 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5074 result = _PyUnicode_EncodeUTF32(tmp, errors, byteorder);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5075 Py_DECREF(tmp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5076 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5077 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5078
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5079 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5080 PyUnicode_AsUTF32String(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5081 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5082 return _PyUnicode_EncodeUTF32(unicode, NULL, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5083 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5084
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5085 /* --- UTF-16 Codec ------------------------------------------------------- */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5086
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5087 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5088 PyUnicode_DecodeUTF16(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5089 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5090 const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5091 int *byteorder)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5092 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5093 return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5094 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5095
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5096 /* Two masks for fast checking of whether a C 'long' may contain
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5097 UTF16-encoded surrogate characters. This is an efficient heuristic,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5098 assuming that non-surrogate characters with a code point >= 0x8000 are
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5099 rare in most input.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5100 FAST_CHAR_MASK is used when the input is in native byte ordering,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5101 SWAPPED_FAST_CHAR_MASK when the input is in byteswapped ordering.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5102 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5103 #if (SIZEOF_LONG == 8)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5104 # define FAST_CHAR_MASK 0x8000800080008000L
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5105 # define SWAPPED_FAST_CHAR_MASK 0x0080008000800080L
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5106 #elif (SIZEOF_LONG == 4)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5107 # define FAST_CHAR_MASK 0x80008000L
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5108 # define SWAPPED_FAST_CHAR_MASK 0x00800080L
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5109 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5110 # error C 'long' size should be either 4 or 8!
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5111 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5112
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5113 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5114 PyUnicode_DecodeUTF16Stateful(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5115 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5116 const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5117 int *byteorder,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5118 Py_ssize_t *consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5119 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5120 const char *starts = s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5121 Py_ssize_t startinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5122 Py_ssize_t endinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5123 Py_ssize_t outpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5124 PyObject *unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5125 const unsigned char *q, *e, *aligned_end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5126 int bo = 0; /* assume native ordering by default */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5127 int native_ordering = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5128 const char *errmsg = "";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5129 /* Offsets from q for retrieving byte pairs in the right order. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5130 #ifdef BYTEORDER_IS_LITTLE_ENDIAN
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5131 int ihi = 1, ilo = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5132 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5133 int ihi = 0, ilo = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5134 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5135 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5136 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5137
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5138 /* Note: size will always be longer than the resulting Unicode
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5139 character count */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5140 unicode = PyUnicode_New(size, 127);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5141 if (!unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5142 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5143 if (size == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5144 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5145 outpos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5146
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5147 q = (unsigned char *)s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5148 e = q + size - 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5149
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5150 if (byteorder)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5151 bo = *byteorder;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5152
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5153 /* Check for BOM marks (U+FEFF) in the input and adjust current
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5154 byte order setting accordingly. In native mode, the leading BOM
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5155 mark is skipped, in all other modes, it is copied to the output
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5156 stream as-is (giving a ZWNBSP character). */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5157 if (bo == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5158 if (size >= 2) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5159 const Py_UCS4 bom = (q[ihi] << 8) | q[ilo];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5160 #ifdef BYTEORDER_IS_LITTLE_ENDIAN
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5161 if (bom == 0xFEFF) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5162 q += 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5163 bo = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5164 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5165 else if (bom == 0xFFFE) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5166 q += 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5167 bo = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5168 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5169 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5170 if (bom == 0xFEFF) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5171 q += 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5172 bo = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5173 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5174 else if (bom == 0xFFFE) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5175 q += 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5176 bo = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5177 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5178 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5179 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5180 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5181
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5182 if (bo == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5183 /* force LE */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5184 ihi = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5185 ilo = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5186 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5187 else if (bo == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5188 /* force BE */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5189 ihi = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5190 ilo = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5191 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5192 #ifdef BYTEORDER_IS_LITTLE_ENDIAN
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5193 native_ordering = ilo < ihi;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5194 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5195 native_ordering = ilo > ihi;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5196 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5197
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5198 aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5199 while (q < e) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5200 Py_UCS4 ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5201 /* First check for possible aligned read of a C 'long'. Unaligned
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5202 reads are more expensive, better to defer to another iteration. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5203 if (!((size_t) q & LONG_PTR_MASK)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5204 /* Fast path for runs of non-surrogate chars. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5205 register const unsigned char *_q = q;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5206 int kind = PyUnicode_KIND(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5207 void *data = PyUnicode_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5208 while (_q < aligned_end) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5209 unsigned long block = * (unsigned long *) _q;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5210 unsigned short *pblock = (unsigned short*)&block;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5211 Py_UCS4 maxch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5212 if (native_ordering) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5213 /* Can use buffer directly */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5214 if (block & FAST_CHAR_MASK)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5215 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5216 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5217 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5218 /* Need to byte-swap */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5219 unsigned char *_p = (unsigned char*)pblock;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5220 if (block & SWAPPED_FAST_CHAR_MASK)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5221 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5222 _p[0] = _q[1];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5223 _p[1] = _q[0];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5224 _p[2] = _q[3];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5225 _p[3] = _q[2];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5226 #if (SIZEOF_LONG == 8)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5227 _p[4] = _q[5];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5228 _p[5] = _q[4];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5229 _p[6] = _q[7];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5230 _p[7] = _q[6];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5231 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5232 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5233 maxch = Py_MAX(pblock[0], pblock[1]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5234 #if SIZEOF_LONG == 8
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5235 maxch = Py_MAX(maxch, Py_MAX(pblock[2], pblock[3]));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5236 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5237 if (maxch > PyUnicode_MAX_CHAR_VALUE(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5238 if (unicode_widen(&unicode, maxch) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5239 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5240 kind = PyUnicode_KIND(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5241 data = PyUnicode_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5242 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5243 PyUnicode_WRITE(kind, data, outpos++, pblock[0]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5244 PyUnicode_WRITE(kind, data, outpos++, pblock[1]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5245 #if SIZEOF_LONG == 8
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5246 PyUnicode_WRITE(kind, data, outpos++, pblock[2]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5247 PyUnicode_WRITE(kind, data, outpos++, pblock[3]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5248 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5249 _q += SIZEOF_LONG;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5250 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5251 q = _q;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5252 if (q >= e)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5253 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5254 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5255 ch = (q[ihi] << 8) | q[ilo];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5256
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5257 q += 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5258
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5259 if (!Py_UNICODE_IS_SURROGATE(ch)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5260 if (unicode_putchar(&unicode, &outpos, ch) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5261 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5262 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5263 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5264
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5265 /* UTF-16 code pair: */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5266 if (q > e) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5267 errmsg = "unexpected end of data";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5268 startinpos = (((const char *)q) - 2) - starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5269 endinpos = ((const char *)e) + 1 - starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5270 goto utf16Error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5271 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5272 if (Py_UNICODE_IS_HIGH_SURROGATE(ch)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5273 Py_UCS4 ch2 = (q[ihi] << 8) | q[ilo];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5274 q += 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5275 if (Py_UNICODE_IS_LOW_SURROGATE(ch2)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5276 if (unicode_putchar(&unicode, &outpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5277 Py_UNICODE_JOIN_SURROGATES(ch, ch2)) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5278 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5279 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5280 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5281 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5282 errmsg = "illegal UTF-16 surrogate";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5283 startinpos = (((const char *)q)-4)-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5284 endinpos = startinpos+2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5285 goto utf16Error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5286 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5287
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5288 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5289 errmsg = "illegal encoding";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5290 startinpos = (((const char *)q)-2)-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5291 endinpos = startinpos+2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5292 /* Fall through to report the error */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5293
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5294 utf16Error:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5295 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5296 errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5297 &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5298 "utf16", errmsg,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5299 &starts,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5300 (const char **)&e,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5301 &startinpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5302 &endinpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5303 &exc,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5304 (const char **)&q,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5305 &unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5306 &outpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5307 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5308 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5309 /* remaining byte at the end? (size should be even) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5310 if (e == q) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5311 if (!consumed) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5312 errmsg = "truncated data";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5313 startinpos = ((const char *)q) - starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5314 endinpos = ((const char *)e) + 1 - starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5315 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5316 errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5317 &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5318 "utf16", errmsg,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5319 &starts,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5320 (const char **)&e,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5321 &startinpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5322 &endinpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5323 &exc,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5324 (const char **)&q,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5325 &unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5326 &outpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5327 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5328 /* The remaining input chars are ignored if the callback
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5329 chooses to skip the input */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5330 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5331 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5332
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5333 if (byteorder)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5334 *byteorder = bo;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5335
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5336 if (consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5337 *consumed = (const char *)q-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5338
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5339 /* Adjust length */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5340 if (PyUnicode_Resize(&unicode, outpos) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5341 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5342
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5343 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5344 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5345 return unicode_result(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5346
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5347 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5348 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5349 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5350 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5351 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5352 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5353
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5354 #undef FAST_CHAR_MASK
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5355 #undef SWAPPED_FAST_CHAR_MASK
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5356
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5357 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5358 _PyUnicode_EncodeUTF16(PyObject *str,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5359 const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5360 int byteorder)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5361 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5362 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5363 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5364 Py_ssize_t len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5365 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5366 unsigned char *p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5367 Py_ssize_t nsize, bytesize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5368 Py_ssize_t i, pairs;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5369 /* Offsets from p for storing byte pairs in the right order. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5370 #ifdef BYTEORDER_IS_LITTLE_ENDIAN
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5371 int ihi = 1, ilo = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5372 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5373 int ihi = 0, ilo = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5374 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5375
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5376 #define STORECHAR(CH) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5377 do { \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5378 p[ihi] = ((CH) >> 8) & 0xff; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5379 p[ilo] = (CH) & 0xff; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5380 p += 2; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5381 } while(0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5382
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5383 if (!PyUnicode_Check(str)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5384 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5385 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5386 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5387 if (PyUnicode_READY(str) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5388 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5389 kind = PyUnicode_KIND(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5390 data = PyUnicode_DATA(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5391 len = PyUnicode_GET_LENGTH(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5392
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5393 pairs = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5394 if (kind == PyUnicode_4BYTE_KIND)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5395 for (i = 0; i < len; i++)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5396 if (PyUnicode_READ(kind, data, i) >= 0x10000)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5397 pairs++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5398 /* 2 * (len + pairs + (byteorder == 0)) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5399 if (len > PY_SSIZE_T_MAX - pairs - (byteorder == 0))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5400 return PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5401 nsize = len + pairs + (byteorder == 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5402 bytesize = nsize * 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5403 if (bytesize / 2 != nsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5404 return PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5405 v = PyBytes_FromStringAndSize(NULL, bytesize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5406 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5407 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5408
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5409 p = (unsigned char *)PyBytes_AS_STRING(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5410 if (byteorder == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5411 STORECHAR(0xFEFF);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5412 if (len == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5413 goto done;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5414
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5415 if (byteorder == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5416 /* force LE */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5417 ihi = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5418 ilo = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5419 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5420 else if (byteorder == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5421 /* force BE */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5422 ihi = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5423 ilo = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5424 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5425
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5426 for (i = 0; i < len; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5427 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5428 Py_UCS4 ch2 = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5429 if (ch >= 0x10000) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5430 ch2 = Py_UNICODE_LOW_SURROGATE(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5431 ch = Py_UNICODE_HIGH_SURROGATE(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5432 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5433 STORECHAR(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5434 if (ch2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5435 STORECHAR(ch2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5436 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5437
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5438 done:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5439 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5440 #undef STORECHAR
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5441 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5442
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5443 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5444 PyUnicode_EncodeUTF16(const Py_UNICODE *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5445 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5446 const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5447 int byteorder)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5448 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5449 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5450 PyObject *tmp = PyUnicode_FromUnicode(s, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5451 if (tmp == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5452 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5453 result = _PyUnicode_EncodeUTF16(tmp, errors, byteorder);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5454 Py_DECREF(tmp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5455 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5456 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5457
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5458 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5459 PyUnicode_AsUTF16String(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5460 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5461 return _PyUnicode_EncodeUTF16(unicode, NULL, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5462 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5463
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5464 /* --- Unicode Escape Codec ----------------------------------------------- */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5465
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5466 /* Helper function for PyUnicode_DecodeUnicodeEscape, determines
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5467 if all the escapes in the string make it still a valid ASCII string.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5468 Returns -1 if any escapes were found which cause the string to
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5469 pop out of ASCII range. Otherwise returns the length of the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5470 required buffer to hold the string.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5471 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5472 static Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5473 length_of_escaped_ascii_string(const char *s, Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5474 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5475 const unsigned char *p = (const unsigned char *)s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5476 const unsigned char *end = p + size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5477 Py_ssize_t length = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5478
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5479 if (size < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5480 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5481
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5482 for (; p < end; ++p) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5483 if (*p > 127) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5484 /* Non-ASCII */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5485 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5486 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5487 else if (*p != '\\') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5488 /* Normal character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5489 ++length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5490 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5491 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5492 /* Backslash-escape, check next char */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5493 ++p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5494 /* Escape sequence reaches till end of string or
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5495 non-ASCII follow-up. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5496 if (p >= end || *p > 127)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5497 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5498 switch (*p) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5499 case '\n':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5500 /* backslash + \n result in zero characters */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5501 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5502 case '\\': case '\'': case '\"':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5503 case 'b': case 'f': case 't':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5504 case 'n': case 'r': case 'v': case 'a':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5505 ++length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5506 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5507 case '0': case '1': case '2': case '3':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5508 case '4': case '5': case '6': case '7':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5509 case 'x': case 'u': case 'U': case 'N':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5510 /* these do not guarantee ASCII characters */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5511 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5512 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5513 /* count the backslash + the other character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5514 length += 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5515 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5516 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5517 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5518 return length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5519 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5520
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5521 static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5522
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5523 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5524 PyUnicode_DecodeUnicodeEscape(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5525 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5526 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5527 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5528 const char *starts = s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5529 Py_ssize_t startinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5530 Py_ssize_t endinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5531 int j;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5532 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5533 const char *end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5534 char* message;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5535 Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5536 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5537 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5538 Py_ssize_t len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5539 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5540
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5541 len = length_of_escaped_ascii_string(s, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5542
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5543 /* After length_of_escaped_ascii_string() there are two alternatives,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5544 either the string is pure ASCII with named escapes like \n, etc.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5545 and we determined it's exact size (common case)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5546 or it contains \x, \u, ... escape sequences. then we create a
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5547 legacy wchar string and resize it at the end of this function. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5548 if (len >= 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5549 v = PyUnicode_New(len, 127);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5550 if (!v)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5551 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5552 assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5553 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5554 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5555 /* Escaped strings will always be longer than the resulting
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5556 Unicode string, so we start with size here and then reduce the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5557 length after conversion to the true value.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5558 (but if the error callback returns a long replacement string
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5559 we'll have to allocate more space) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5560 v = PyUnicode_New(size, 127);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5561 if (!v)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5562 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5563 len = size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5564 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5565
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5566 if (size == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5567 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5568 i = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5569 end = s + size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5570
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5571 while (s < end) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5572 unsigned char c;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5573 Py_UCS4 x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5574 int digits;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5575
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5576 /* The only case in which i == ascii_length is a backslash
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5577 followed by a newline. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5578 assert(i <= len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5579
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5580 /* Non-escape characters are interpreted as Unicode ordinals */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5581 if (*s != '\\') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5582 if (unicode_putchar(&v, &i, (unsigned char) *s++) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5583 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5584 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5585 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5586
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5587 startinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5588 /* \ - Escapes */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5589 s++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5590 c = *s++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5591 if (s > end)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5592 c = '\0'; /* Invalid after \ */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5593
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5594 /* The only case in which i == ascii_length is a backslash
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5595 followed by a newline. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5596 assert(i < len || (i == len && c == '\n'));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5597
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5598 switch (c) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5599
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5600 /* \x escapes */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5601 #define WRITECHAR(ch) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5602 do { \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5603 if (unicode_putchar(&v, &i, ch) < 0) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5604 goto onError; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5605 }while(0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5606
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5607 case '\n': break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5608 case '\\': WRITECHAR('\\'); break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5609 case '\'': WRITECHAR('\''); break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5610 case '\"': WRITECHAR('\"'); break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5611 case 'b': WRITECHAR('\b'); break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5612 /* FF */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5613 case 'f': WRITECHAR('\014'); break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5614 case 't': WRITECHAR('\t'); break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5615 case 'n': WRITECHAR('\n'); break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5616 case 'r': WRITECHAR('\r'); break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5617 /* VT */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5618 case 'v': WRITECHAR('\013'); break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5619 /* BEL, not classic C */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5620 case 'a': WRITECHAR('\007'); break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5621
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5622 /* \OOO (octal) escapes */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5623 case '0': case '1': case '2': case '3':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5624 case '4': case '5': case '6': case '7':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5625 x = s[-1] - '0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5626 if (s < end && '0' <= *s && *s <= '7') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5627 x = (x<<3) + *s++ - '0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5628 if (s < end && '0' <= *s && *s <= '7')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5629 x = (x<<3) + *s++ - '0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5630 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5631 WRITECHAR(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5632 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5633
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5634 /* hex escapes */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5635 /* \xXX */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5636 case 'x':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5637 digits = 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5638 message = "truncated \\xXX escape";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5639 goto hexescape;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5640
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5641 /* \uXXXX */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5642 case 'u':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5643 digits = 4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5644 message = "truncated \\uXXXX escape";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5645 goto hexescape;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5646
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5647 /* \UXXXXXXXX */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5648 case 'U':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5649 digits = 8;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5650 message = "truncated \\UXXXXXXXX escape";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5651 hexescape:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5652 chr = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5653 if (s+digits>end) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5654 endinpos = size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5655 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5656 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5657 "unicodeescape", "end of string in escape sequence",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5658 &starts, &end, &startinpos, &endinpos, &exc, &s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5659 &v, &i))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5660 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5661 goto nextByte;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5662 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5663 for (j = 0; j < digits; ++j) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5664 c = (unsigned char) s[j];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5665 if (!Py_ISXDIGIT(c)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5666 endinpos = (s+j+1)-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5667 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5668 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5669 "unicodeescape", message,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5670 &starts, &end, &startinpos, &endinpos, &exc, &s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5671 &v, &i))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5672 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5673 len = PyUnicode_GET_LENGTH(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5674 goto nextByte;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5675 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5676 chr = (chr<<4) & ~0xF;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5677 if (c >= '0' && c <= '9')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5678 chr += c - '0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5679 else if (c >= 'a' && c <= 'f')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5680 chr += 10 + c - 'a';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5681 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5682 chr += 10 + c - 'A';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5683 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5684 s += j;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5685 if (chr == 0xffffffff && PyErr_Occurred())
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5686 /* _decoding_error will have already written into the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5687 target buffer. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5688 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5689 store:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5690 /* when we get here, chr is a 32-bit unicode character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5691 if (chr <= MAX_UNICODE) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5692 WRITECHAR(chr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5693 } else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5694 endinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5695 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5696 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5697 "unicodeescape", "illegal Unicode character",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5698 &starts, &end, &startinpos, &endinpos, &exc, &s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5699 &v, &i))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5700 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5701 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5702 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5703
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5704 /* \N{name} */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5705 case 'N':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5706 message = "malformed \\N character escape";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5707 if (ucnhash_CAPI == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5708 /* load the unicode data module */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5709 ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5710 PyUnicodeData_CAPSULE_NAME, 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5711 if (ucnhash_CAPI == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5712 goto ucnhashError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5713 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5714 if (*s == '{') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5715 const char *start = s+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5716 /* look for the closing brace */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5717 while (*s != '}' && s < end)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5718 s++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5719 if (s > start && s < end && *s == '}') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5720 /* found a name. look it up in the unicode database */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5721 message = "unknown Unicode character name";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5722 s++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5723 if (ucnhash_CAPI->getcode(NULL, start, (int)(s-start-1),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5724 &chr, 0))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5725 goto store;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5726 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5727 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5728 endinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5729 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5730 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5731 "unicodeescape", message,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5732 &starts, &end, &startinpos, &endinpos, &exc, &s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5733 &v, &i))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5734 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5735 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5736
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5737 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5738 if (s > end) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5739 message = "\\ at end of string";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5740 s--;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5741 endinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5742 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5743 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5744 "unicodeescape", message,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5745 &starts, &end, &startinpos, &endinpos, &exc, &s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5746 &v, &i))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5747 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5748 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5749 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5750 WRITECHAR('\\');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5751 WRITECHAR(s[-1]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5752 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5753 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5754 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5755 nextByte:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5756 ;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5757 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5758 #undef WRITECHAR
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5759
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5760 if (PyUnicode_Resize(&v, i) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5761 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5762 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5763 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5764 return unicode_result(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5765
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5766 ucnhashError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5767 PyErr_SetString(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5768 PyExc_UnicodeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5769 "\\N escapes not supported (can't load unicodedata module)"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5770 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5771 Py_XDECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5772 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5773 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5774 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5775
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5776 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5777 Py_XDECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5778 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5779 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5780 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5781 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5782
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5783 /* Return a Unicode-Escape string version of the Unicode object.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5784
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5785 If quotes is true, the string is enclosed in u"" or u'' quotes as
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5786 appropriate.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5787
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5788 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5789
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5790 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5791 PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5792 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5793 Py_ssize_t i, len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5794 PyObject *repr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5795 char *p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5796 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5797 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5798 Py_ssize_t expandsize = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5799
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5800 /* Initial allocation is based on the longest-possible unichr
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5801 escape.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5802
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5803 In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5804 unichr, so in this case it's the longest unichr escape. In
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5805 narrow (UTF-16) builds this is five chars per source unichr
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5806 since there are two unichrs in the surrogate pair, so in narrow
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5807 (UTF-16) builds it's not the longest unichr escape.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5808
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5809 In wide or narrow builds '\uxxxx' is 6 chars per source unichr,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5810 so in the narrow (UTF-16) build case it's the longest unichr
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5811 escape.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5812 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5813
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5814 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5815 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5816 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5817 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5818 if (PyUnicode_READY(unicode) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5819 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5820 len = PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5821 kind = PyUnicode_KIND(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5822 data = PyUnicode_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5823 switch(kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5824 case PyUnicode_1BYTE_KIND: expandsize = 4; break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5825 case PyUnicode_2BYTE_KIND: expandsize = 6; break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5826 case PyUnicode_4BYTE_KIND: expandsize = 10; break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5827 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5828
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5829 if (len == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5830 return PyBytes_FromStringAndSize(NULL, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5831
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5832 if (len > (PY_SSIZE_T_MAX - 2 - 1) / expandsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5833 return PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5834
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5835 repr = PyBytes_FromStringAndSize(NULL,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5836 2
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5837 + expandsize*len
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5838 + 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5839 if (repr == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5840 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5841
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5842 p = PyBytes_AS_STRING(repr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5843
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5844 for (i = 0; i < len; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5845 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5846
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5847 /* Escape backslashes */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5848 if (ch == '\\') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5849 *p++ = '\\';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5850 *p++ = (char) ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5851 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5852 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5853
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5854 /* Map 21-bit characters to '\U00xxxxxx' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5855 else if (ch >= 0x10000) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5856 assert(ch <= MAX_UNICODE);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5857 *p++ = '\\';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5858 *p++ = 'U';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5859 *p++ = Py_hexdigits[(ch >> 28) & 0x0000000F];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5860 *p++ = Py_hexdigits[(ch >> 24) & 0x0000000F];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5861 *p++ = Py_hexdigits[(ch >> 20) & 0x0000000F];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5862 *p++ = Py_hexdigits[(ch >> 16) & 0x0000000F];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5863 *p++ = Py_hexdigits[(ch >> 12) & 0x0000000F];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5864 *p++ = Py_hexdigits[(ch >> 8) & 0x0000000F];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5865 *p++ = Py_hexdigits[(ch >> 4) & 0x0000000F];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5866 *p++ = Py_hexdigits[ch & 0x0000000F];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5867 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5868 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5869
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5870 /* Map 16-bit characters to '\uxxxx' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5871 if (ch >= 256) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5872 *p++ = '\\';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5873 *p++ = 'u';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5874 *p++ = Py_hexdigits[(ch >> 12) & 0x000F];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5875 *p++ = Py_hexdigits[(ch >> 8) & 0x000F];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5876 *p++ = Py_hexdigits[(ch >> 4) & 0x000F];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5877 *p++ = Py_hexdigits[ch & 0x000F];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5878 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5879
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5880 /* Map special whitespace to '\t', \n', '\r' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5881 else if (ch == '\t') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5882 *p++ = '\\';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5883 *p++ = 't';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5884 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5885 else if (ch == '\n') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5886 *p++ = '\\';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5887 *p++ = 'n';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5888 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5889 else if (ch == '\r') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5890 *p++ = '\\';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5891 *p++ = 'r';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5892 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5893
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5894 /* Map non-printable US ASCII to '\xhh' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5895 else if (ch < ' ' || ch >= 0x7F) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5896 *p++ = '\\';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5897 *p++ = 'x';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5898 *p++ = Py_hexdigits[(ch >> 4) & 0x000F];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5899 *p++ = Py_hexdigits[ch & 0x000F];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5900 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5901
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5902 /* Copy everything else as-is */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5903 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5904 *p++ = (char) ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5905 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5906
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5907 assert(p - PyBytes_AS_STRING(repr) > 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5908 if (_PyBytes_Resize(&repr, p - PyBytes_AS_STRING(repr)) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5909 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5910 return repr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5911 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5912
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5913 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5914 PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5915 Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5916 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5917 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5918 PyObject *tmp = PyUnicode_FromUnicode(s, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5919 if (tmp == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5920 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5921 result = PyUnicode_AsUnicodeEscapeString(tmp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5922 Py_DECREF(tmp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5923 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5924 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5925
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5926 /* --- Raw Unicode Escape Codec ------------------------------------------- */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5927
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5928 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5929 PyUnicode_DecodeRawUnicodeEscape(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5930 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5931 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5932 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5933 const char *starts = s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5934 Py_ssize_t startinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5935 Py_ssize_t endinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5936 Py_ssize_t outpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5937 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5938 const char *end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5939 const char *bs;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5940 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5941 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5942
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5943 /* Escaped strings will always be longer than the resulting
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5944 Unicode string, so we start with size here and then reduce the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5945 length after conversion to the true value. (But decoding error
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5946 handler might have to resize the string) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5947 v = PyUnicode_New(size, 127);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5948 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5949 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5950 if (size == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5951 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5952 outpos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5953 end = s + size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5954 while (s < end) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5955 unsigned char c;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5956 Py_UCS4 x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5957 int i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5958 int count;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5959
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5960 /* Non-escape characters are interpreted as Unicode ordinals */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5961 if (*s != '\\') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5962 if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5963 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5964 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5965 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5966 startinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5967
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5968 /* \u-escapes are only interpreted iff the number of leading
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5969 backslashes if odd */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5970 bs = s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5971 for (;s < end;) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5972 if (*s != '\\')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5973 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5974 if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5975 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5976 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5977 if (((s - bs) & 1) == 0 ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5978 s >= end ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5979 (*s != 'u' && *s != 'U')) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5980 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5981 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5982 outpos--;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5983 count = *s=='u' ? 4 : 8;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5984 s++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5985
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5986 /* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5987 for (x = 0, i = 0; i < count; ++i, ++s) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5988 c = (unsigned char)*s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5989 if (!Py_ISXDIGIT(c)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5990 endinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5991 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5992 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5993 "rawunicodeescape", "truncated \\uXXXX",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5994 &starts, &end, &startinpos, &endinpos, &exc, &s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5995 &v, &outpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5996 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5997 goto nextByte;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5998 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
5999 x = (x<<4) & ~0xF;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6000 if (c >= '0' && c <= '9')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6001 x += c - '0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6002 else if (c >= 'a' && c <= 'f')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6003 x += 10 + c - 'a';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6004 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6005 x += 10 + c - 'A';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6006 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6007 if (x <= MAX_UNICODE) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6008 if (unicode_putchar(&v, &outpos, x) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6009 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6010 } else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6011 endinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6012 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6013 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6014 "rawunicodeescape", "\\Uxxxxxxxx out of range",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6015 &starts, &end, &startinpos, &endinpos, &exc, &s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6016 &v, &outpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6017 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6018 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6019 nextByte:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6020 ;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6021 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6022 if (PyUnicode_Resize(&v, outpos) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6023 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6024 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6025 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6026 return unicode_result(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6027
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6028 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6029 Py_XDECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6030 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6031 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6032 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6033 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6034
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6035
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6036 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6037 PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6038 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6039 PyObject *repr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6040 char *p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6041 char *q;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6042 Py_ssize_t expandsize, pos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6043 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6044 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6045 Py_ssize_t len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6046
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6047 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6048 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6049 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6050 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6051 if (PyUnicode_READY(unicode) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6052 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6053 kind = PyUnicode_KIND(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6054 data = PyUnicode_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6055 len = PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6056 /* 4 byte characters can take up 10 bytes, 2 byte characters can take up 6
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6057 bytes, and 1 byte characters 4. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6058 expandsize = kind * 2 + 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6059
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6060 if (len > PY_SSIZE_T_MAX / expandsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6061 return PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6062
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6063 repr = PyBytes_FromStringAndSize(NULL, expandsize * len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6064 if (repr == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6065 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6066 if (len == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6067 return repr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6068
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6069 p = q = PyBytes_AS_STRING(repr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6070 for (pos = 0; pos < len; pos++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6071 Py_UCS4 ch = PyUnicode_READ(kind, data, pos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6072 /* Map 32-bit characters to '\Uxxxxxxxx' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6073 if (ch >= 0x10000) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6074 assert(ch <= MAX_UNICODE);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6075 *p++ = '\\';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6076 *p++ = 'U';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6077 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6078 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6079 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6080 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6081 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6082 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6083 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6084 *p++ = Py_hexdigits[ch & 15];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6085 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6086 /* Map 16-bit characters to '\uxxxx' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6087 else if (ch >= 256) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6088 *p++ = '\\';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6089 *p++ = 'u';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6090 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6091 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6092 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6093 *p++ = Py_hexdigits[ch & 15];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6094 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6095 /* Copy everything else as-is */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6096 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6097 *p++ = (char) ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6098 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6099
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6100 assert(p > q);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6101 if (_PyBytes_Resize(&repr, p - q) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6102 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6103 return repr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6104 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6105
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6106 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6107 PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6108 Py_ssize_t size)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6109 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6110 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6111 PyObject *tmp = PyUnicode_FromUnicode(s, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6112 if (tmp == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6113 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6114 result = PyUnicode_AsRawUnicodeEscapeString(tmp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6115 Py_DECREF(tmp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6116 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6117 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6118
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6119 /* --- Unicode Internal Codec ------------------------------------------- */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6120
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6121 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6122 _PyUnicode_DecodeUnicodeInternal(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6123 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6124 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6125 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6126 const char *starts = s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6127 Py_ssize_t startinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6128 Py_ssize_t endinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6129 Py_ssize_t outpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6130 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6131 const char *end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6132 const char *reason;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6133 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6134 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6135
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6136 if (PyErr_WarnEx(PyExc_DeprecationWarning,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6137 "unicode_internal codec has been deprecated",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6138 1))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6139 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6140
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6141 /* XXX overflow detection missing */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6142 v = PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6143 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6144 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6145 if (PyUnicode_GET_LENGTH(v) == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6146 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6147 outpos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6148 end = s + size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6149
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6150 while (s < end) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6151 Py_UNICODE uch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6152 Py_UCS4 ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6153 /* We copy the raw representation one byte at a time because the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6154 pointer may be unaligned (see test_codeccallbacks). */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6155 ((char *) &uch)[0] = s[0];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6156 ((char *) &uch)[1] = s[1];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6157 #ifdef Py_UNICODE_WIDE
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6158 ((char *) &uch)[2] = s[2];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6159 ((char *) &uch)[3] = s[3];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6160 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6161 ch = uch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6162
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6163 /* We have to sanity check the raw data, otherwise doom looms for
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6164 some malformed UCS-4 data. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6165 if (
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6166 #ifdef Py_UNICODE_WIDE
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6167 ch > 0x10ffff ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6168 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6169 end-s < Py_UNICODE_SIZE
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6170 )
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6171 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6172 startinpos = s - starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6173 if (end-s < Py_UNICODE_SIZE) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6174 endinpos = end-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6175 reason = "truncated input";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6176 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6177 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6178 endinpos = s - starts + Py_UNICODE_SIZE;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6179 reason = "illegal code point (> 0x10FFFF)";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6180 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6181 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6182 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6183 "unicode_internal", reason,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6184 &starts, &end, &startinpos, &endinpos, &exc, &s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6185 &v, &outpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6186 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6187 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6188 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6189
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6190 s += Py_UNICODE_SIZE;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6191 #ifndef Py_UNICODE_WIDE
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6192 if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && s < end)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6193 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6194 Py_UNICODE uch2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6195 ((char *) &uch2)[0] = s[0];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6196 ((char *) &uch2)[1] = s[1];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6197 if (Py_UNICODE_IS_LOW_SURROGATE(uch2))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6198 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6199 ch = Py_UNICODE_JOIN_SURROGATES(uch, uch2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6200 s += Py_UNICODE_SIZE;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6201 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6202 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6203 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6204
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6205 if (unicode_putchar(&v, &outpos, ch) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6206 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6207 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6208
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6209 if (PyUnicode_Resize(&v, outpos) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6210 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6211 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6212 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6213 return unicode_result(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6214
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6215 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6216 Py_XDECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6217 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6218 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6219 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6220 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6221
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6222 /* --- Latin-1 Codec ------------------------------------------------------ */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6223
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6224 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6225 PyUnicode_DecodeLatin1(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6226 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6227 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6228 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6229 /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6230 return _PyUnicode_FromUCS1((unsigned char*)s, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6231 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6232
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6233 /* create or adjust a UnicodeEncodeError */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6234 static void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6235 make_encode_exception(PyObject **exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6236 const char *encoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6237 PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6238 Py_ssize_t startpos, Py_ssize_t endpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6239 const char *reason)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6240 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6241 if (*exceptionObject == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6242 *exceptionObject = PyObject_CallFunction(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6243 PyExc_UnicodeEncodeError, "sOnns",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6244 encoding, unicode, startpos, endpos, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6245 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6246 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6247 if (PyUnicodeEncodeError_SetStart(*exceptionObject, startpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6248 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6249 if (PyUnicodeEncodeError_SetEnd(*exceptionObject, endpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6250 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6251 if (PyUnicodeEncodeError_SetReason(*exceptionObject, reason))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6252 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6253 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6254 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6255 Py_DECREF(*exceptionObject);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6256 *exceptionObject = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6257 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6258 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6259
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6260 /* raises a UnicodeEncodeError */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6261 static void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6262 raise_encode_exception(PyObject **exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6263 const char *encoding,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6264 PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6265 Py_ssize_t startpos, Py_ssize_t endpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6266 const char *reason)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6267 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6268 make_encode_exception(exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6269 encoding, unicode, startpos, endpos, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6270 if (*exceptionObject != NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6271 PyCodec_StrictErrors(*exceptionObject);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6272 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6273
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6274 /* error handling callback helper:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6275 build arguments, call the callback and check the arguments,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6276 put the result into newpos and return the replacement string, which
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6277 has to be freed by the caller */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6278 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6279 unicode_encode_call_errorhandler(const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6280 PyObject **errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6281 const char *encoding, const char *reason,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6282 PyObject *unicode, PyObject **exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6283 Py_ssize_t startpos, Py_ssize_t endpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6284 Py_ssize_t *newpos)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6285 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6286 static char *argparse = "On;encoding error handler must return (str/bytes, int) tuple";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6287 Py_ssize_t len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6288 PyObject *restuple;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6289 PyObject *resunicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6290
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6291 if (*errorHandler == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6292 *errorHandler = PyCodec_LookupError(errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6293 if (*errorHandler == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6294 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6295 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6296
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6297 if (PyUnicode_READY(unicode) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6298 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6299 len = PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6300
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6301 make_encode_exception(exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6302 encoding, unicode, startpos, endpos, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6303 if (*exceptionObject == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6304 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6305
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6306 restuple = PyObject_CallFunctionObjArgs(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6307 *errorHandler, *exceptionObject, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6308 if (restuple == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6309 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6310 if (!PyTuple_Check(restuple)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6311 PyErr_SetString(PyExc_TypeError, &argparse[3]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6312 Py_DECREF(restuple);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6313 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6314 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6315 if (!PyArg_ParseTuple(restuple, argparse,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6316 &resunicode, newpos)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6317 Py_DECREF(restuple);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6318 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6319 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6320 if (!PyUnicode_Check(resunicode) && !PyBytes_Check(resunicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6321 PyErr_SetString(PyExc_TypeError, &argparse[3]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6322 Py_DECREF(restuple);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6323 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6324 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6325 if (*newpos<0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6326 *newpos = len + *newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6327 if (*newpos<0 || *newpos>len) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6328 PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6329 Py_DECREF(restuple);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6330 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6331 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6332 Py_INCREF(resunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6333 Py_DECREF(restuple);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6334 return resunicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6335 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6336
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6337 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6338 unicode_encode_ucs1(PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6339 const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6340 unsigned int limit)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6341 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6342 /* input state */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6343 Py_ssize_t pos=0, size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6344 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6345 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6346 /* output object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6347 PyObject *res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6348 /* pointer into the output */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6349 char *str;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6350 /* current output position */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6351 Py_ssize_t ressize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6352 const char *encoding = (limit == 256) ? "latin-1" : "ascii";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6353 const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6354 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6355 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6356 /* the following variable is used for caching string comparisons
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6357 * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6358 int known_errorHandler = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6359
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6360 if (PyUnicode_READY(unicode) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6361 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6362 size = PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6363 kind = PyUnicode_KIND(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6364 data = PyUnicode_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6365 /* allocate enough for a simple encoding without
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6366 replacements, if we need more, we'll resize */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6367 if (size == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6368 return PyBytes_FromStringAndSize(NULL, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6369 res = PyBytes_FromStringAndSize(NULL, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6370 if (res == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6371 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6372 str = PyBytes_AS_STRING(res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6373 ressize = size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6374
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6375 while (pos < size) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6376 Py_UCS4 c = PyUnicode_READ(kind, data, pos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6377
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6378 /* can we encode this? */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6379 if (c<limit) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6380 /* no overflow check, because we know that the space is enough */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6381 *str++ = (char)c;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6382 ++pos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6383 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6384 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6385 Py_ssize_t requiredsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6386 PyObject *repunicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6387 Py_ssize_t repsize, newpos, respos, i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6388 /* startpos for collecting unencodable chars */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6389 Py_ssize_t collstart = pos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6390 Py_ssize_t collend = pos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6391 /* find all unecodable characters */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6392 while ((collend < size) && (PyUnicode_READ(kind, data, collend)>=limit))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6393 ++collend;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6394 /* cache callback name lookup (if not done yet, i.e. it's the first error) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6395 if (known_errorHandler==-1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6396 if ((errors==NULL) || (!strcmp(errors, "strict")))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6397 known_errorHandler = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6398 else if (!strcmp(errors, "replace"))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6399 known_errorHandler = 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6400 else if (!strcmp(errors, "ignore"))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6401 known_errorHandler = 3;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6402 else if (!strcmp(errors, "xmlcharrefreplace"))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6403 known_errorHandler = 4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6404 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6405 known_errorHandler = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6406 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6407 switch (known_errorHandler) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6408 case 1: /* strict */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6409 raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6410 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6411 case 2: /* replace */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6412 while (collstart++<collend)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6413 *str++ = '?'; /* fall through */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6414 case 3: /* ignore */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6415 pos = collend;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6416 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6417 case 4: /* xmlcharrefreplace */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6418 respos = str - PyBytes_AS_STRING(res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6419 /* determine replacement size */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6420 for (i = collstart, repsize = 0; i < collend; ++i) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6421 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6422 if (ch < 10)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6423 repsize += 2+1+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6424 else if (ch < 100)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6425 repsize += 2+2+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6426 else if (ch < 1000)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6427 repsize += 2+3+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6428 else if (ch < 10000)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6429 repsize += 2+4+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6430 else if (ch < 100000)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6431 repsize += 2+5+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6432 else if (ch < 1000000)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6433 repsize += 2+6+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6434 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6435 assert(ch <= MAX_UNICODE);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6436 repsize += 2+7+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6437 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6438 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6439 requiredsize = respos+repsize+(size-collend);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6440 if (requiredsize > ressize) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6441 if (requiredsize<2*ressize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6442 requiredsize = 2*ressize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6443 if (_PyBytes_Resize(&res, requiredsize))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6444 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6445 str = PyBytes_AS_STRING(res) + respos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6446 ressize = requiredsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6447 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6448 /* generate replacement */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6449 for (i = collstart; i < collend; ++i) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6450 str += sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6451 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6452 pos = collend;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6453 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6454 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6455 repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6456 encoding, reason, unicode, &exc,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6457 collstart, collend, &newpos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6458 if (repunicode == NULL || (PyUnicode_Check(repunicode) &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6459 PyUnicode_READY(repunicode) < 0))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6460 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6461 if (PyBytes_Check(repunicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6462 /* Directly copy bytes result to output. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6463 repsize = PyBytes_Size(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6464 if (repsize > 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6465 /* Make room for all additional bytes. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6466 respos = str - PyBytes_AS_STRING(res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6467 if (_PyBytes_Resize(&res, ressize+repsize-1)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6468 Py_DECREF(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6469 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6470 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6471 str = PyBytes_AS_STRING(res) + respos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6472 ressize += repsize-1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6473 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6474 memcpy(str, PyBytes_AsString(repunicode), repsize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6475 str += repsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6476 pos = newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6477 Py_DECREF(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6478 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6479 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6480 /* need more space? (at least enough for what we
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6481 have+the replacement+the rest of the string, so
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6482 we won't have to check space for encodable characters) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6483 respos = str - PyBytes_AS_STRING(res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6484 repsize = PyUnicode_GET_LENGTH(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6485 requiredsize = respos+repsize+(size-collend);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6486 if (requiredsize > ressize) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6487 if (requiredsize<2*ressize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6488 requiredsize = 2*ressize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6489 if (_PyBytes_Resize(&res, requiredsize)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6490 Py_DECREF(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6491 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6492 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6493 str = PyBytes_AS_STRING(res) + respos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6494 ressize = requiredsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6495 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6496 /* check if there is anything unencodable in the replacement
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6497 and copy it to the output */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6498 for (i = 0; repsize-->0; ++i, ++str) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6499 c = PyUnicode_READ_CHAR(repunicode, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6500 if (c >= limit) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6501 raise_encode_exception(&exc, encoding, unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6502 pos, pos+1, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6503 Py_DECREF(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6504 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6505 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6506 *str = (char)c;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6507 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6508 pos = newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6509 Py_DECREF(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6510 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6511 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6512 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6513 /* Resize if we allocated to much */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6514 size = str - PyBytes_AS_STRING(res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6515 if (size < ressize) { /* If this falls res will be NULL */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6516 assert(size >= 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6517 if (_PyBytes_Resize(&res, size) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6518 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6519 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6520
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6521 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6522 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6523 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6524
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6525 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6526 Py_XDECREF(res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6527 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6528 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6529 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6530 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6531
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6532 /* Deprecated */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6533 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6534 PyUnicode_EncodeLatin1(const Py_UNICODE *p,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6535 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6536 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6537 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6538 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6539 PyObject *unicode = PyUnicode_FromUnicode(p, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6540 if (unicode == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6541 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6542 result = unicode_encode_ucs1(unicode, errors, 256);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6543 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6544 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6545 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6546
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6547 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6548 _PyUnicode_AsLatin1String(PyObject *unicode, const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6549 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6550 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6551 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6552 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6553 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6554 if (PyUnicode_READY(unicode) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6555 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6556 /* Fast path: if it is a one-byte string, construct
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6557 bytes object directly. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6558 if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6559 return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6560 PyUnicode_GET_LENGTH(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6561 /* Non-Latin-1 characters present. Defer to above function to
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6562 raise the exception. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6563 return unicode_encode_ucs1(unicode, errors, 256);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6564 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6565
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6566 PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6567 PyUnicode_AsLatin1String(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6568 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6569 return _PyUnicode_AsLatin1String(unicode, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6570 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6571
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6572 /* --- 7-bit ASCII Codec -------------------------------------------------- */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6573
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6574 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6575 PyUnicode_DecodeASCII(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6576 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6577 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6578 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6579 const char *starts = s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6580 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6581 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6582 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6583 Py_ssize_t startinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6584 Py_ssize_t endinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6585 Py_ssize_t outpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6586 const char *e;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6587 int has_error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6588 const unsigned char *p = (const unsigned char *)s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6589 const unsigned char *end = p + size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6590 const unsigned char *aligned_end = (const unsigned char *) ((size_t) end & ~LONG_PTR_MASK);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6591 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6592 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6593
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6594 if (size == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6595 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6596 return unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6597 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6598
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6599 /* ASCII is equivalent to the first 128 ordinals in Unicode. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6600 if (size == 1 && (unsigned char)s[0] < 128)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6601 return get_latin1_char((unsigned char)s[0]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6602
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6603 has_error = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6604 while (p < end && !has_error) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6605 /* Fast path, see below in PyUnicode_DecodeUTF8Stateful for
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6606 an explanation. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6607 if (!((size_t) p & LONG_PTR_MASK)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6608 /* Help register allocation */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6609 register const unsigned char *_p = p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6610 while (_p < aligned_end) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6611 unsigned long value = *(unsigned long *) _p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6612 if (value & ASCII_CHAR_MASK) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6613 has_error = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6614 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6615 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6616 _p += SIZEOF_LONG;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6617 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6618 if (_p == end)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6619 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6620 if (has_error)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6621 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6622 p = _p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6623 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6624 if (*p & 0x80) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6625 has_error = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6626 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6627 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6628 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6629 ++p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6630 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6631 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6632 if (!has_error)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6633 return unicode_fromascii((const unsigned char *)s, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6634
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6635 v = PyUnicode_New(size, 127);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6636 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6637 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6638 if (size == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6639 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6640 kind = PyUnicode_KIND(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6641 data = PyUnicode_DATA(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6642 outpos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6643 e = s + size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6644 while (s < e) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6645 register unsigned char c = (unsigned char)*s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6646 if (c < 128) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6647 PyUnicode_WRITE(kind, data, outpos++, c);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6648 ++s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6649 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6650 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6651 startinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6652 endinpos = startinpos + 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6653 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6654 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6655 "ascii", "ordinal not in range(128)",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6656 &starts, &e, &startinpos, &endinpos, &exc, &s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6657 &v, &outpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6658 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6659 kind = PyUnicode_KIND(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6660 data = PyUnicode_DATA(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6661 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6662 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6663 if (PyUnicode_Resize(&v, outpos) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6664 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6665 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6666 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6667 assert(_PyUnicode_CheckConsistency(v, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6668 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6669
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6670 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6671 Py_XDECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6672 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6673 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6674 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6675 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6676
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6677 /* Deprecated */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6678 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6679 PyUnicode_EncodeASCII(const Py_UNICODE *p,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6680 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6681 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6682 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6683 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6684 PyObject *unicode = PyUnicode_FromUnicode(p, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6685 if (unicode == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6686 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6687 result = unicode_encode_ucs1(unicode, errors, 128);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6688 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6689 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6690 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6691
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6692 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6693 _PyUnicode_AsASCIIString(PyObject *unicode, const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6694 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6695 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6696 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6697 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6698 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6699 if (PyUnicode_READY(unicode) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6700 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6701 /* Fast path: if it is an ASCII-only string, construct bytes object
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6702 directly. Else defer to above function to raise the exception. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6703 if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6704 return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6705 PyUnicode_GET_LENGTH(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6706 return unicode_encode_ucs1(unicode, errors, 128);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6707 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6708
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6709 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6710 PyUnicode_AsASCIIString(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6711 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6712 return _PyUnicode_AsASCIIString(unicode, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6713 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6714
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6715 #ifdef HAVE_MBCS
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6716
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6717 /* --- MBCS codecs for Windows -------------------------------------------- */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6718
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6719 #if SIZEOF_INT < SIZEOF_SIZE_T
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6720 #define NEED_RETRY
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6721 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6722
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6723 #ifndef WC_ERR_INVALID_CHARS
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6724 # define WC_ERR_INVALID_CHARS 0x0080
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6725 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6726
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6727 static char*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6728 code_page_name(UINT code_page, PyObject **obj)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6729 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6730 *obj = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6731 if (code_page == CP_ACP)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6732 return "mbcs";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6733 if (code_page == CP_UTF7)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6734 return "CP_UTF7";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6735 if (code_page == CP_UTF8)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6736 return "CP_UTF8";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6737
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6738 *obj = PyBytes_FromFormat("cp%u", code_page);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6739 if (*obj == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6740 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6741 return PyBytes_AS_STRING(*obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6742 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6743
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6744 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6745 is_dbcs_lead_byte(UINT code_page, const char *s, int offset)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6746 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6747 const char *curr = s + offset;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6748 const char *prev;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6749
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6750 if (!IsDBCSLeadByteEx(code_page, *curr))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6751 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6752
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6753 prev = CharPrevExA(code_page, s, curr, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6754 if (prev == curr)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6755 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6756 /* FIXME: This code is limited to "true" double-byte encodings,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6757 as it assumes an incomplete character consists of a single
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6758 byte. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6759 if (curr - prev == 2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6760 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6761 if (!IsDBCSLeadByteEx(code_page, *prev))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6762 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6763 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6764 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6765
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6766 static DWORD
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6767 decode_code_page_flags(UINT code_page)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6768 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6769 if (code_page == CP_UTF7) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6770 /* The CP_UTF7 decoder only supports flags=0 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6771 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6772 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6773 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6774 return MB_ERR_INVALID_CHARS;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6775 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6776
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6777 /*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6778 * Decode a byte string from a Windows code page into unicode object in strict
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6779 * mode.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6780 *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6781 * Returns consumed size if succeed, returns -2 on decode error, or raise a
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6782 * WindowsError and returns -1 on other error.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6783 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6784 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6785 decode_code_page_strict(UINT code_page,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6786 PyObject **v,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6787 const char *in,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6788 int insize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6789 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6790 const DWORD flags = decode_code_page_flags(code_page);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6791 wchar_t *out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6792 DWORD outsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6793
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6794 /* First get the size of the result */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6795 assert(insize > 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6796 outsize = MultiByteToWideChar(code_page, flags, in, insize, NULL, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6797 if (outsize <= 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6798 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6799
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6800 if (*v == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6801 /* Create unicode object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6802 *v = (PyObject*)_PyUnicode_New(outsize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6803 if (*v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6804 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6805 out = PyUnicode_AS_UNICODE(*v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6806 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6807 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6808 /* Extend unicode object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6809 Py_ssize_t n = PyUnicode_GET_SIZE(*v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6810 if (PyUnicode_Resize(v, n + outsize) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6811 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6812 out = PyUnicode_AS_UNICODE(*v) + n;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6813 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6814
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6815 /* Do the conversion */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6816 outsize = MultiByteToWideChar(code_page, flags, in, insize, out, outsize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6817 if (outsize <= 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6818 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6819 return insize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6820
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6821 error:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6822 if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6823 return -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6824 PyErr_SetFromWindowsErr(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6825 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6826 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6827
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6828 /*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6829 * Decode a byte string from a code page into unicode object with an error
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6830 * handler.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6831 *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6832 * Returns consumed size if succeed, or raise a WindowsError or
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6833 * UnicodeDecodeError exception and returns -1 on error.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6834 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6835 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6836 decode_code_page_errors(UINT code_page,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6837 PyObject **v,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6838 const char *in, const int size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6839 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6840 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6841 const char *startin = in;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6842 const char *endin = in + size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6843 const DWORD flags = decode_code_page_flags(code_page);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6844 /* Ideally, we should get reason from FormatMessage. This is the Windows
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6845 2000 English version of the message. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6846 const char *reason = "No mapping for the Unicode character exists "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6847 "in the target code page.";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6848 /* each step cannot decode more than 1 character, but a character can be
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6849 represented as a surrogate pair */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6850 wchar_t buffer[2], *startout, *out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6851 int insize, outsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6852 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6853 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6854 PyObject *encoding_obj = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6855 char *encoding;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6856 DWORD err;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6857 int ret = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6858
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6859 assert(size > 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6860
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6861 encoding = code_page_name(code_page, &encoding_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6862 if (encoding == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6863 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6864
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6865 if (errors == NULL || strcmp(errors, "strict") == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6866 /* The last error was ERROR_NO_UNICODE_TRANSLATION, then we raise a
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6867 UnicodeDecodeError. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6868 make_decode_exception(&exc, encoding, in, size, 0, 0, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6869 if (exc != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6870 PyCodec_StrictErrors(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6871 Py_CLEAR(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6872 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6873 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6874 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6875
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6876 if (*v == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6877 /* Create unicode object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6878 if (size > PY_SSIZE_T_MAX / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6879 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6880 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6881 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6882 *v = (PyObject*)_PyUnicode_New(size * Py_ARRAY_LENGTH(buffer));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6883 if (*v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6884 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6885 startout = PyUnicode_AS_UNICODE(*v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6886 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6887 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6888 /* Extend unicode object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6889 Py_ssize_t n = PyUnicode_GET_SIZE(*v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6890 if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6891 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6892 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6893 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6894 if (PyUnicode_Resize(v, n + size * Py_ARRAY_LENGTH(buffer)) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6895 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6896 startout = PyUnicode_AS_UNICODE(*v) + n;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6897 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6898
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6899 /* Decode the byte string character per character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6900 out = startout;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6901 while (in < endin)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6902 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6903 /* Decode a character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6904 insize = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6905 do
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6906 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6907 outsize = MultiByteToWideChar(code_page, flags,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6908 in, insize,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6909 buffer, Py_ARRAY_LENGTH(buffer));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6910 if (outsize > 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6911 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6912 err = GetLastError();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6913 if (err != ERROR_NO_UNICODE_TRANSLATION
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6914 && err != ERROR_INSUFFICIENT_BUFFER)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6915 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6916 PyErr_SetFromWindowsErr(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6917 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6918 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6919 insize++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6920 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6921 /* 4=maximum length of a UTF-8 sequence */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6922 while (insize <= 4 && (in + insize) <= endin);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6923
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6924 if (outsize <= 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6925 Py_ssize_t startinpos, endinpos, outpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6926
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6927 startinpos = in - startin;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6928 endinpos = startinpos + 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6929 outpos = out - PyUnicode_AS_UNICODE(*v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6930 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6931 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6932 encoding, reason,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6933 &startin, &endin, &startinpos, &endinpos, &exc, &in,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6934 v, &outpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6935 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6936 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6937 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6938 out = PyUnicode_AS_UNICODE(*v) + outpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6939 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6940 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6941 in += insize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6942 memcpy(out, buffer, outsize * sizeof(wchar_t));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6943 out += outsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6944 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6945 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6946
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6947 /* write a NUL character at the end */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6948 *out = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6949
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6950 /* Extend unicode object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6951 outsize = out - startout;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6952 assert(outsize <= PyUnicode_WSTR_LENGTH(*v));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6953 if (PyUnicode_Resize(v, outsize) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6954 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6955 ret = size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6956
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6957 error:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6958 Py_XDECREF(encoding_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6959 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6960 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6961 return ret;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6962 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6963
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6964 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6965 decode_code_page_stateful(int code_page,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6966 const char *s, Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6967 const char *errors, Py_ssize_t *consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6968 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6969 PyObject *v = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6970 int chunk_size, final, converted, done;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6971
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6972 if (code_page < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6973 PyErr_SetString(PyExc_ValueError, "invalid code page number");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6974 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6975 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6976
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6977 if (consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6978 *consumed = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6979
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6980 do
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6981 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6982 #ifdef NEED_RETRY
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6983 if (size > INT_MAX) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6984 chunk_size = INT_MAX;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6985 final = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6986 done = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6987 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6988 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6989 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6990 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6991 chunk_size = (int)size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6992 final = (consumed == NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6993 done = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6994 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6995
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6996 /* Skip trailing lead-byte unless 'final' is set */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6997 if (!final && is_dbcs_lead_byte(code_page, s, chunk_size - 1))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6998 --chunk_size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
6999
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7000 if (chunk_size == 0 && done) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7001 if (v != NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7002 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7003 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7004 return unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7005 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7006
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7007
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7008 converted = decode_code_page_strict(code_page, &v,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7009 s, chunk_size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7010 if (converted == -2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7011 converted = decode_code_page_errors(code_page, &v,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7012 s, chunk_size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7013 errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7014 assert(converted != 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7015
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7016 if (converted < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7017 Py_XDECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7018 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7019 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7020
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7021 if (consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7022 *consumed += converted;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7023
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7024 s += converted;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7025 size -= converted;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7026 } while (!done);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7027
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7028 return unicode_result(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7029 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7030
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7031 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7032 PyUnicode_DecodeCodePageStateful(int code_page,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7033 const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7034 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7035 const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7036 Py_ssize_t *consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7037 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7038 return decode_code_page_stateful(code_page, s, size, errors, consumed);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7039 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7040
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7041 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7042 PyUnicode_DecodeMBCSStateful(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7043 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7044 const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7045 Py_ssize_t *consumed)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7046 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7047 return decode_code_page_stateful(CP_ACP, s, size, errors, consumed);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7048 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7049
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7050 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7051 PyUnicode_DecodeMBCS(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7052 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7053 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7054 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7055 return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7056 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7057
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7058 static DWORD
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7059 encode_code_page_flags(UINT code_page, const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7060 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7061 if (code_page == CP_UTF8) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7062 if (winver.dwMajorVersion >= 6)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7063 /* CP_UTF8 supports WC_ERR_INVALID_CHARS on Windows Vista
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7064 and later */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7065 return WC_ERR_INVALID_CHARS;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7066 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7067 /* CP_UTF8 only supports flags=0 on Windows older than Vista */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7068 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7069 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7070 else if (code_page == CP_UTF7) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7071 /* CP_UTF7 only supports flags=0 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7072 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7073 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7074 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7075 if (errors != NULL && strcmp(errors, "replace") == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7076 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7077 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7078 return WC_NO_BEST_FIT_CHARS;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7079 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7080 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7081
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7082 /*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7083 * Encode a Unicode string to a Windows code page into a byte string in strict
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7084 * mode.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7085 *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7086 * Returns consumed characters if succeed, returns -2 on encode error, or raise
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7087 * a WindowsError and returns -1 on other error.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7088 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7089 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7090 encode_code_page_strict(UINT code_page, PyObject **outbytes,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7091 PyObject *unicode, Py_ssize_t offset, int len,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7092 const char* errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7093 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7094 BOOL usedDefaultChar = FALSE;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7095 BOOL *pusedDefaultChar = &usedDefaultChar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7096 int outsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7097 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7098 wchar_t *p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7099 Py_ssize_t size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7100 const DWORD flags = encode_code_page_flags(code_page, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7101 char *out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7102 /* Create a substring so that we can get the UTF-16 representation
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7103 of just the slice under consideration. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7104 PyObject *substring;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7105
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7106 assert(len > 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7107
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7108 if (code_page != CP_UTF8 && code_page != CP_UTF7)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7109 pusedDefaultChar = &usedDefaultChar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7110 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7111 pusedDefaultChar = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7112
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7113 substring = PyUnicode_Substring(unicode, offset, offset+len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7114 if (substring == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7115 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7116 p = PyUnicode_AsUnicodeAndSize(substring, &size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7117 if (p == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7118 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7119 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7120 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7121
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7122 /* First get the size of the result */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7123 outsize = WideCharToMultiByte(code_page, flags,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7124 p, size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7125 NULL, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7126 NULL, pusedDefaultChar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7127 if (outsize <= 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7128 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7129 /* If we used a default char, then we failed! */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7130 if (pusedDefaultChar && *pusedDefaultChar) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7131 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7132 return -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7133 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7134
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7135 if (*outbytes == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7136 /* Create string object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7137 *outbytes = PyBytes_FromStringAndSize(NULL, outsize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7138 if (*outbytes == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7139 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7140 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7141 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7142 out = PyBytes_AS_STRING(*outbytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7143 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7144 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7145 /* Extend string object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7146 const Py_ssize_t n = PyBytes_Size(*outbytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7147 if (outsize > PY_SSIZE_T_MAX - n) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7148 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7149 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7150 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7151 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7152 if (_PyBytes_Resize(outbytes, n + outsize) < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7153 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7154 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7155 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7156 out = PyBytes_AS_STRING(*outbytes) + n;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7157 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7158
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7159 /* Do the conversion */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7160 outsize = WideCharToMultiByte(code_page, flags,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7161 p, size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7162 out, outsize,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7163 NULL, pusedDefaultChar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7164 Py_CLEAR(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7165 if (outsize <= 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7166 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7167 if (pusedDefaultChar && *pusedDefaultChar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7168 return -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7169 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7170
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7171 error:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7172 Py_XDECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7173 if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7174 return -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7175 PyErr_SetFromWindowsErr(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7176 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7177 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7178
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7179 /*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7180 * Encode a Unicode string to a Windows code page into a byte string using a
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7181 * error handler.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7182 *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7183 * Returns consumed characters if succeed, or raise a WindowsError and returns
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7184 * -1 on other error.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7185 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7186 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7187 encode_code_page_errors(UINT code_page, PyObject **outbytes,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7188 PyObject *unicode, Py_ssize_t unicode_offset,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7189 Py_ssize_t insize, const char* errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7190 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7191 const DWORD flags = encode_code_page_flags(code_page, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7192 Py_ssize_t pos = unicode_offset;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7193 Py_ssize_t endin = unicode_offset + insize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7194 /* Ideally, we should get reason from FormatMessage. This is the Windows
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7195 2000 English version of the message. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7196 const char *reason = "invalid character";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7197 /* 4=maximum length of a UTF-8 sequence */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7198 char buffer[4];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7199 BOOL usedDefaultChar = FALSE, *pusedDefaultChar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7200 Py_ssize_t outsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7201 char *out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7202 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7203 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7204 PyObject *encoding_obj = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7205 char *encoding;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7206 Py_ssize_t newpos, newoutsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7207 PyObject *rep;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7208 int ret = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7209
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7210 assert(insize > 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7211
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7212 encoding = code_page_name(code_page, &encoding_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7213 if (encoding == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7214 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7215
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7216 if (errors == NULL || strcmp(errors, "strict") == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7217 /* The last error was ERROR_NO_UNICODE_TRANSLATION,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7218 then we raise a UnicodeEncodeError. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7219 make_encode_exception(&exc, encoding, unicode, 0, 0, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7220 if (exc != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7221 PyCodec_StrictErrors(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7222 Py_DECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7223 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7224 Py_XDECREF(encoding_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7225 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7226 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7227
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7228 if (code_page != CP_UTF8 && code_page != CP_UTF7)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7229 pusedDefaultChar = &usedDefaultChar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7230 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7231 pusedDefaultChar = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7232
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7233 if (Py_ARRAY_LENGTH(buffer) > PY_SSIZE_T_MAX / insize) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7234 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7235 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7236 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7237 outsize = insize * Py_ARRAY_LENGTH(buffer);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7238
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7239 if (*outbytes == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7240 /* Create string object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7241 *outbytes = PyBytes_FromStringAndSize(NULL, outsize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7242 if (*outbytes == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7243 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7244 out = PyBytes_AS_STRING(*outbytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7245 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7246 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7247 /* Extend string object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7248 Py_ssize_t n = PyBytes_Size(*outbytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7249 if (n > PY_SSIZE_T_MAX - outsize) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7250 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7251 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7252 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7253 if (_PyBytes_Resize(outbytes, n + outsize) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7254 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7255 out = PyBytes_AS_STRING(*outbytes) + n;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7256 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7257
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7258 /* Encode the string character per character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7259 while (pos < endin)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7260 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7261 Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, pos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7262 wchar_t chars[2];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7263 int charsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7264 if (ch < 0x10000) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7265 chars[0] = (wchar_t)ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7266 charsize = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7267 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7268 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7269 ch -= 0x10000;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7270 chars[0] = 0xd800 + (ch >> 10);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7271 chars[1] = 0xdc00 + (ch & 0x3ff);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7272 charsize = 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7273 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7274
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7275 outsize = WideCharToMultiByte(code_page, flags,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7276 chars, charsize,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7277 buffer, Py_ARRAY_LENGTH(buffer),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7278 NULL, pusedDefaultChar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7279 if (outsize > 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7280 if (pusedDefaultChar == NULL || !(*pusedDefaultChar))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7281 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7282 pos++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7283 memcpy(out, buffer, outsize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7284 out += outsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7285 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7286 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7287 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7288 else if (GetLastError() != ERROR_NO_UNICODE_TRANSLATION) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7289 PyErr_SetFromWindowsErr(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7290 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7291 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7292
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7293 rep = unicode_encode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7294 errors, &errorHandler, encoding, reason,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7295 unicode, &exc,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7296 pos, pos + 1, &newpos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7297 if (rep == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7298 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7299 pos = newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7300
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7301 if (PyBytes_Check(rep)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7302 outsize = PyBytes_GET_SIZE(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7303 if (outsize != 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7304 Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7305 newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7306 if (_PyBytes_Resize(outbytes, newoutsize) < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7307 Py_DECREF(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7308 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7309 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7310 out = PyBytes_AS_STRING(*outbytes) + offset;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7311 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7312 memcpy(out, PyBytes_AS_STRING(rep), outsize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7313 out += outsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7314 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7315 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7316 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7317 enum PyUnicode_Kind kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7318 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7319
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7320 if (PyUnicode_READY(rep) < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7321 Py_DECREF(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7322 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7323 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7324
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7325 outsize = PyUnicode_GET_LENGTH(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7326 if (outsize != 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7327 Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7328 newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7329 if (_PyBytes_Resize(outbytes, newoutsize) < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7330 Py_DECREF(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7331 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7332 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7333 out = PyBytes_AS_STRING(*outbytes) + offset;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7334 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7335 kind = PyUnicode_KIND(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7336 data = PyUnicode_DATA(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7337 for (i=0; i < outsize; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7338 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7339 if (ch > 127) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7340 raise_encode_exception(&exc,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7341 encoding, unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7342 pos, pos + 1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7343 "unable to encode error handler result to ASCII");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7344 Py_DECREF(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7345 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7346 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7347 *out = (unsigned char)ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7348 out++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7349 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7350 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7351 Py_DECREF(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7352 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7353 /* write a NUL byte */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7354 *out = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7355 outsize = out - PyBytes_AS_STRING(*outbytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7356 assert(outsize <= PyBytes_GET_SIZE(*outbytes));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7357 if (_PyBytes_Resize(outbytes, outsize) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7358 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7359 ret = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7360
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7361 error:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7362 Py_XDECREF(encoding_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7363 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7364 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7365 return ret;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7366 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7367
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7368 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7369 encode_code_page(int code_page,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7370 PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7371 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7372 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7373 Py_ssize_t len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7374 PyObject *outbytes = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7375 Py_ssize_t offset;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7376 int chunk_len, ret, done;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7377
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7378 if (PyUnicode_READY(unicode) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7379 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7380 len = PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7381
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7382 if (code_page < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7383 PyErr_SetString(PyExc_ValueError, "invalid code page number");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7384 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7385 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7386
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7387 if (len == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7388 return PyBytes_FromStringAndSize(NULL, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7389
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7390 offset = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7391 do
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7392 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7393 #ifdef NEED_RETRY
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7394 /* UTF-16 encoding may double the size, so use only INT_MAX/2
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7395 chunks. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7396 if (len > INT_MAX/2) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7397 chunk_len = INT_MAX/2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7398 done = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7399 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7400 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7401 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7402 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7403 chunk_len = (int)len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7404 done = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7405 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7406
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7407 ret = encode_code_page_strict(code_page, &outbytes,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7408 unicode, offset, chunk_len,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7409 errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7410 if (ret == -2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7411 ret = encode_code_page_errors(code_page, &outbytes,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7412 unicode, offset,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7413 chunk_len, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7414 if (ret < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7415 Py_XDECREF(outbytes);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7416 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7417 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7418
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7419 offset += chunk_len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7420 len -= chunk_len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7421 } while (!done);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7422
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7423 return outbytes;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7424 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7425
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7426 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7427 PyUnicode_EncodeMBCS(const Py_UNICODE *p,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7428 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7429 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7430 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7431 PyObject *unicode, *res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7432 unicode = PyUnicode_FromUnicode(p, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7433 if (unicode == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7434 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7435 res = encode_code_page(CP_ACP, unicode, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7436 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7437 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7438 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7439
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7440 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7441 PyUnicode_EncodeCodePage(int code_page,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7442 PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7443 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7444 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7445 return encode_code_page(code_page, unicode, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7446 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7447
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7448 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7449 PyUnicode_AsMBCSString(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7450 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7451 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7452 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7453 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7454 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7455 return PyUnicode_EncodeCodePage(CP_ACP, unicode, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7456 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7457
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7458 #undef NEED_RETRY
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7459
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7460 #endif /* HAVE_MBCS */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7461
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7462 /* --- Character Mapping Codec -------------------------------------------- */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7463
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7464 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7465 PyUnicode_DecodeCharmap(const char *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7466 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7467 PyObject *mapping,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7468 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7469 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7470 const char *starts = s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7471 Py_ssize_t startinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7472 Py_ssize_t endinpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7473 Py_ssize_t outpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7474 const char *e;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7475 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7476 Py_ssize_t extrachars = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7477 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7478 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7479
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7480 /* Default to Latin-1 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7481 if (mapping == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7482 return PyUnicode_DecodeLatin1(s, size, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7483
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7484 v = PyUnicode_New(size, 127);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7485 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7486 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7487 if (size == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7488 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7489 outpos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7490 e = s + size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7491 if (PyUnicode_CheckExact(mapping)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7492 Py_ssize_t maplen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7493 enum PyUnicode_Kind kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7494 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7495 Py_UCS4 x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7496
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7497 if (PyUnicode_READY(mapping) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7498 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7499
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7500 maplen = PyUnicode_GET_LENGTH(mapping);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7501 data = PyUnicode_DATA(mapping);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7502 kind = PyUnicode_KIND(mapping);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7503 while (s < e) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7504 unsigned char ch = *s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7505
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7506 if (ch < maplen)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7507 x = PyUnicode_READ(kind, data, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7508 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7509 x = 0xfffe; /* invalid value */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7510
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7511 if (x == 0xfffe)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7512 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7513 /* undefined mapping */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7514 startinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7515 endinpos = startinpos+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7516 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7517 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7518 "charmap", "character maps to <undefined>",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7519 &starts, &e, &startinpos, &endinpos, &exc, &s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7520 &v, &outpos)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7521 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7522 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7523 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7524 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7525
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7526 if (unicode_putchar(&v, &outpos, x) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7527 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7528 ++s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7529 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7530 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7531 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7532 while (s < e) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7533 unsigned char ch = *s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7534 PyObject *w, *x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7535
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7536 /* Get mapping (char ordinal -> integer, Unicode char or None) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7537 w = PyLong_FromLong((long)ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7538 if (w == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7539 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7540 x = PyObject_GetItem(mapping, w);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7541 Py_DECREF(w);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7542 if (x == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7543 if (PyErr_ExceptionMatches(PyExc_LookupError)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7544 /* No mapping found means: mapping is undefined. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7545 PyErr_Clear();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7546 x = Py_None;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7547 Py_INCREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7548 } else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7549 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7550 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7551
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7552 /* Apply mapping */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7553 if (PyLong_Check(x)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7554 long value = PyLong_AS_LONG(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7555 if (value < 0 || value > 65535) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7556 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7557 "character mapping must be in range(65536)");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7558 Py_DECREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7559 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7560 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7561 if (unicode_putchar(&v, &outpos, value) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7562 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7563 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7564 else if (x == Py_None) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7565 /* undefined mapping */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7566 startinpos = s-starts;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7567 endinpos = startinpos+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7568 if (unicode_decode_call_errorhandler(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7569 errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7570 "charmap", "character maps to <undefined>",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7571 &starts, &e, &startinpos, &endinpos, &exc, &s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7572 &v, &outpos)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7573 Py_DECREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7574 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7575 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7576 Py_DECREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7577 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7578 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7579 else if (PyUnicode_Check(x)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7580 Py_ssize_t targetsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7581
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7582 if (PyUnicode_READY(x) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7583 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7584 targetsize = PyUnicode_GET_LENGTH(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7585
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7586 if (targetsize == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7587 /* 1-1 mapping */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7588 if (unicode_putchar(&v, &outpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7589 PyUnicode_READ_CHAR(x, 0)) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7590 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7591 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7592 else if (targetsize > 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7593 /* 1-n mapping */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7594 if (targetsize > extrachars) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7595 /* resize first */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7596 Py_ssize_t needed = (targetsize - extrachars) + \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7597 (targetsize << 2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7598 extrachars += needed;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7599 /* XXX overflow detection missing */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7600 if (PyUnicode_Resize(&v,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7601 PyUnicode_GET_LENGTH(v) + needed) < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7602 Py_DECREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7603 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7604 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7605 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7606 if (unicode_widen(&v, PyUnicode_MAX_CHAR_VALUE(x)) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7607 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7608 PyUnicode_CopyCharacters(v, outpos, x, 0, targetsize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7609 outpos += targetsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7610 extrachars -= targetsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7611 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7612 /* 1-0 mapping: skip the character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7613 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7614 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7615 /* wrong return value */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7616 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7617 "character mapping must return integer, None or str");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7618 Py_DECREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7619 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7620 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7621 Py_DECREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7622 ++s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7623 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7624 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7625 if (PyUnicode_Resize(&v, outpos) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7626 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7627 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7628 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7629 return unicode_result(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7630
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7631 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7632 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7633 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7634 Py_XDECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7635 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7636 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7637
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7638 /* Charmap encoding: the lookup table */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7639
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7640 struct encoding_map {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7641 PyObject_HEAD
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7642 unsigned char level1[32];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7643 int count2, count3;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7644 unsigned char level23[1];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7645 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7646
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7647 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7648 encoding_map_size(PyObject *obj, PyObject* args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7649 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7650 struct encoding_map *map = (struct encoding_map*)obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7651 return PyLong_FromLong(sizeof(*map) - 1 + 16*map->count2 +
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7652 128*map->count3);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7653 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7654
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7655 static PyMethodDef encoding_map_methods[] = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7656 {"size", encoding_map_size, METH_NOARGS,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7657 PyDoc_STR("Return the size (in bytes) of this object") },
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7658 { 0 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7659 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7660
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7661 static void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7662 encoding_map_dealloc(PyObject* o)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7663 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7664 PyObject_FREE(o);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7665 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7666
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7667 static PyTypeObject EncodingMapType = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7668 PyVarObject_HEAD_INIT(NULL, 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7669 "EncodingMap", /*tp_name*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7670 sizeof(struct encoding_map), /*tp_basicsize*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7671 0, /*tp_itemsize*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7672 /* methods */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7673 encoding_map_dealloc, /*tp_dealloc*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7674 0, /*tp_print*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7675 0, /*tp_getattr*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7676 0, /*tp_setattr*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7677 0, /*tp_reserved*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7678 0, /*tp_repr*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7679 0, /*tp_as_number*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7680 0, /*tp_as_sequence*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7681 0, /*tp_as_mapping*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7682 0, /*tp_hash*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7683 0, /*tp_call*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7684 0, /*tp_str*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7685 0, /*tp_getattro*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7686 0, /*tp_setattro*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7687 0, /*tp_as_buffer*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7688 Py_TPFLAGS_DEFAULT, /*tp_flags*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7689 0, /*tp_doc*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7690 0, /*tp_traverse*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7691 0, /*tp_clear*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7692 0, /*tp_richcompare*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7693 0, /*tp_weaklistoffset*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7694 0, /*tp_iter*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7695 0, /*tp_iternext*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7696 encoding_map_methods, /*tp_methods*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7697 0, /*tp_members*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7698 0, /*tp_getset*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7699 0, /*tp_base*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7700 0, /*tp_dict*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7701 0, /*tp_descr_get*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7702 0, /*tp_descr_set*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7703 0, /*tp_dictoffset*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7704 0, /*tp_init*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7705 0, /*tp_alloc*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7706 0, /*tp_new*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7707 0, /*tp_free*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7708 0, /*tp_is_gc*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7709 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7710
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7711 PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7712 PyUnicode_BuildEncodingMap(PyObject* string)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7713 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7714 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7715 struct encoding_map *mresult;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7716 int i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7717 int need_dict = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7718 unsigned char level1[32];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7719 unsigned char level2[512];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7720 unsigned char *mlevel1, *mlevel2, *mlevel3;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7721 int count2 = 0, count3 = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7722 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7723 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7724 Py_UCS4 ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7725
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7726 if (!PyUnicode_Check(string) || PyUnicode_GET_LENGTH(string) != 256) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7727 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7728 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7729 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7730 kind = PyUnicode_KIND(string);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7731 data = PyUnicode_DATA(string);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7732 memset(level1, 0xFF, sizeof level1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7733 memset(level2, 0xFF, sizeof level2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7734
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7735 /* If there isn't a one-to-one mapping of NULL to \0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7736 or if there are non-BMP characters, we need to use
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7737 a mapping dictionary. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7738 if (PyUnicode_READ(kind, data, 0) != 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7739 need_dict = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7740 for (i = 1; i < 256; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7741 int l1, l2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7742 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7743 if (ch == 0 || ch > 0xFFFF) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7744 need_dict = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7745 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7746 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7747 if (ch == 0xFFFE)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7748 /* unmapped character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7749 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7750 l1 = ch >> 11;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7751 l2 = ch >> 7;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7752 if (level1[l1] == 0xFF)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7753 level1[l1] = count2++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7754 if (level2[l2] == 0xFF)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7755 level2[l2] = count3++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7756 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7757
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7758 if (count2 >= 0xFF || count3 >= 0xFF)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7759 need_dict = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7760
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7761 if (need_dict) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7762 PyObject *result = PyDict_New();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7763 PyObject *key, *value;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7764 if (!result)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7765 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7766 for (i = 0; i < 256; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7767 key = PyLong_FromLong(PyUnicode_READ(kind, data, i));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7768 value = PyLong_FromLong(i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7769 if (!key || !value)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7770 goto failed1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7771 if (PyDict_SetItem(result, key, value) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7772 goto failed1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7773 Py_DECREF(key);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7774 Py_DECREF(value);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7775 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7776 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7777 failed1:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7778 Py_XDECREF(key);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7779 Py_XDECREF(value);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7780 Py_DECREF(result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7781 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7782 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7783
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7784 /* Create a three-level trie */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7785 result = PyObject_MALLOC(sizeof(struct encoding_map) +
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7786 16*count2 + 128*count3 - 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7787 if (!result)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7788 return PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7789 PyObject_Init(result, &EncodingMapType);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7790 mresult = (struct encoding_map*)result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7791 mresult->count2 = count2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7792 mresult->count3 = count3;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7793 mlevel1 = mresult->level1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7794 mlevel2 = mresult->level23;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7795 mlevel3 = mresult->level23 + 16*count2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7796 memcpy(mlevel1, level1, 32);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7797 memset(mlevel2, 0xFF, 16*count2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7798 memset(mlevel3, 0, 128*count3);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7799 count3 = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7800 for (i = 1; i < 256; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7801 int o1, o2, o3, i2, i3;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7802 if (PyUnicode_READ(kind, data, i) == 0xFFFE)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7803 /* unmapped character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7804 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7805 o1 = PyUnicode_READ(kind, data, i)>>11;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7806 o2 = (PyUnicode_READ(kind, data, i)>>7) & 0xF;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7807 i2 = 16*mlevel1[o1] + o2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7808 if (mlevel2[i2] == 0xFF)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7809 mlevel2[i2] = count3++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7810 o3 = PyUnicode_READ(kind, data, i) & 0x7F;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7811 i3 = 128*mlevel2[i2] + o3;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7812 mlevel3[i3] = i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7813 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7814 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7815 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7816
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7817 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7818 encoding_map_lookup(Py_UCS4 c, PyObject *mapping)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7819 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7820 struct encoding_map *map = (struct encoding_map*)mapping;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7821 int l1 = c>>11;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7822 int l2 = (c>>7) & 0xF;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7823 int l3 = c & 0x7F;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7824 int i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7825
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7826 if (c > 0xFFFF)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7827 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7828 if (c == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7829 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7830 /* level 1*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7831 i = map->level1[l1];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7832 if (i == 0xFF) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7833 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7834 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7835 /* level 2*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7836 i = map->level23[16*i+l2];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7837 if (i == 0xFF) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7838 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7839 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7840 /* level 3 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7841 i = map->level23[16*map->count2 + 128*i + l3];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7842 if (i == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7843 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7844 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7845 return i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7846 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7847
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7848 /* Lookup the character ch in the mapping. If the character
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7849 can't be found, Py_None is returned (or NULL, if another
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7850 error occurred). */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7851 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7852 charmapencode_lookup(Py_UCS4 c, PyObject *mapping)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7853 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7854 PyObject *w = PyLong_FromLong((long)c);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7855 PyObject *x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7856
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7857 if (w == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7858 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7859 x = PyObject_GetItem(mapping, w);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7860 Py_DECREF(w);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7861 if (x == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7862 if (PyErr_ExceptionMatches(PyExc_LookupError)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7863 /* No mapping found means: mapping is undefined. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7864 PyErr_Clear();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7865 x = Py_None;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7866 Py_INCREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7867 return x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7868 } else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7869 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7870 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7871 else if (x == Py_None)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7872 return x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7873 else if (PyLong_Check(x)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7874 long value = PyLong_AS_LONG(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7875 if (value < 0 || value > 255) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7876 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7877 "character mapping must be in range(256)");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7878 Py_DECREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7879 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7880 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7881 return x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7882 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7883 else if (PyBytes_Check(x))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7884 return x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7885 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7886 /* wrong return value */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7887 PyErr_Format(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7888 "character mapping must return integer, bytes or None, not %.400s",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7889 x->ob_type->tp_name);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7890 Py_DECREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7891 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7892 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7893 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7894
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7895 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7896 charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7897 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7898 Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7899 /* exponentially overallocate to minimize reallocations */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7900 if (requiredsize < 2*outsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7901 requiredsize = 2*outsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7902 if (_PyBytes_Resize(outobj, requiredsize))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7903 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7904 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7905 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7906
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7907 typedef enum charmapencode_result {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7908 enc_SUCCESS, enc_FAILED, enc_EXCEPTION
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7909 } charmapencode_result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7910 /* lookup the character, put the result in the output string and adjust
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7911 various state variables. Resize the output bytes object if not enough
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7912 space is available. Return a new reference to the object that
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7913 was put in the output buffer, or Py_None, if the mapping was undefined
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7914 (in which case no character was written) or NULL, if a
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7915 reallocation error occurred. The caller must decref the result */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7916 static charmapencode_result
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7917 charmapencode_output(Py_UCS4 c, PyObject *mapping,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7918 PyObject **outobj, Py_ssize_t *outpos)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7919 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7920 PyObject *rep;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7921 char *outstart;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7922 Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7923
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7924 if (Py_TYPE(mapping) == &EncodingMapType) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7925 int res = encoding_map_lookup(c, mapping);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7926 Py_ssize_t requiredsize = *outpos+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7927 if (res == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7928 return enc_FAILED;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7929 if (outsize<requiredsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7930 if (charmapencode_resize(outobj, outpos, requiredsize))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7931 return enc_EXCEPTION;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7932 outstart = PyBytes_AS_STRING(*outobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7933 outstart[(*outpos)++] = (char)res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7934 return enc_SUCCESS;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7935 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7936
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7937 rep = charmapencode_lookup(c, mapping);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7938 if (rep==NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7939 return enc_EXCEPTION;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7940 else if (rep==Py_None) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7941 Py_DECREF(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7942 return enc_FAILED;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7943 } else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7944 if (PyLong_Check(rep)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7945 Py_ssize_t requiredsize = *outpos+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7946 if (outsize<requiredsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7947 if (charmapencode_resize(outobj, outpos, requiredsize)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7948 Py_DECREF(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7949 return enc_EXCEPTION;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7950 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7951 outstart = PyBytes_AS_STRING(*outobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7952 outstart[(*outpos)++] = (char)PyLong_AS_LONG(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7953 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7954 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7955 const char *repchars = PyBytes_AS_STRING(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7956 Py_ssize_t repsize = PyBytes_GET_SIZE(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7957 Py_ssize_t requiredsize = *outpos+repsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7958 if (outsize<requiredsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7959 if (charmapencode_resize(outobj, outpos, requiredsize)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7960 Py_DECREF(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7961 return enc_EXCEPTION;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7962 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7963 outstart = PyBytes_AS_STRING(*outobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7964 memcpy(outstart + *outpos, repchars, repsize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7965 *outpos += repsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7966 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7967 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7968 Py_DECREF(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7969 return enc_SUCCESS;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7970 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7971
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7972 /* handle an error in PyUnicode_EncodeCharmap
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7973 Return 0 on success, -1 on error */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7974 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7975 charmap_encoding_error(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7976 PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7977 PyObject **exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7978 int *known_errorHandler, PyObject **errorHandler, const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7979 PyObject **res, Py_ssize_t *respos)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7980 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7981 PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7982 Py_ssize_t size, repsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7983 Py_ssize_t newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7984 enum PyUnicode_Kind kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7985 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7986 Py_ssize_t index;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7987 /* startpos for collecting unencodable chars */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7988 Py_ssize_t collstartpos = *inpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7989 Py_ssize_t collendpos = *inpos+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7990 Py_ssize_t collpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7991 char *encoding = "charmap";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7992 char *reason = "character maps to <undefined>";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7993 charmapencode_result x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7994 Py_UCS4 ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7995 int val;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7996
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7997 if (PyUnicode_READY(unicode) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7998 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
7999 size = PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8000 /* find all unencodable characters */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8001 while (collendpos < size) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8002 PyObject *rep;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8003 if (Py_TYPE(mapping) == &EncodingMapType) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8004 ch = PyUnicode_READ_CHAR(unicode, collendpos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8005 val = encoding_map_lookup(ch, mapping);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8006 if (val != -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8007 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8008 ++collendpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8009 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8010 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8011
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8012 ch = PyUnicode_READ_CHAR(unicode, collendpos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8013 rep = charmapencode_lookup(ch, mapping);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8014 if (rep==NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8015 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8016 else if (rep!=Py_None) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8017 Py_DECREF(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8018 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8019 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8020 Py_DECREF(rep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8021 ++collendpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8022 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8023 /* cache callback name lookup
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8024 * (if not done yet, i.e. it's the first error) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8025 if (*known_errorHandler==-1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8026 if ((errors==NULL) || (!strcmp(errors, "strict")))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8027 *known_errorHandler = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8028 else if (!strcmp(errors, "replace"))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8029 *known_errorHandler = 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8030 else if (!strcmp(errors, "ignore"))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8031 *known_errorHandler = 3;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8032 else if (!strcmp(errors, "xmlcharrefreplace"))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8033 *known_errorHandler = 4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8034 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8035 *known_errorHandler = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8036 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8037 switch (*known_errorHandler) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8038 case 1: /* strict */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8039 raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8040 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8041 case 2: /* replace */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8042 for (collpos = collstartpos; collpos<collendpos; ++collpos) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8043 x = charmapencode_output('?', mapping, res, respos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8044 if (x==enc_EXCEPTION) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8045 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8046 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8047 else if (x==enc_FAILED) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8048 raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8049 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8050 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8051 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8052 /* fall through */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8053 case 3: /* ignore */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8054 *inpos = collendpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8055 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8056 case 4: /* xmlcharrefreplace */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8057 /* generate replacement (temporarily (mis)uses p) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8058 for (collpos = collstartpos; collpos < collendpos; ++collpos) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8059 char buffer[2+29+1+1];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8060 char *cp;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8061 sprintf(buffer, "&#%d;", (int)PyUnicode_READ_CHAR(unicode, collpos));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8062 for (cp = buffer; *cp; ++cp) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8063 x = charmapencode_output(*cp, mapping, res, respos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8064 if (x==enc_EXCEPTION)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8065 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8066 else if (x==enc_FAILED) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8067 raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8068 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8069 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8070 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8071 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8072 *inpos = collendpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8073 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8074 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8075 repunicode = unicode_encode_call_errorhandler(errors, errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8076 encoding, reason, unicode, exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8077 collstartpos, collendpos, &newpos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8078 if (repunicode == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8079 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8080 if (PyBytes_Check(repunicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8081 /* Directly copy bytes result to output. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8082 Py_ssize_t outsize = PyBytes_Size(*res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8083 Py_ssize_t requiredsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8084 repsize = PyBytes_Size(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8085 requiredsize = *respos + repsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8086 if (requiredsize > outsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8087 /* Make room for all additional bytes. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8088 if (charmapencode_resize(res, respos, requiredsize)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8089 Py_DECREF(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8090 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8091 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8092 memcpy(PyBytes_AsString(*res) + *respos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8093 PyBytes_AsString(repunicode), repsize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8094 *respos += repsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8095 *inpos = newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8096 Py_DECREF(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8097 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8098 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8099 /* generate replacement */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8100 if (PyUnicode_READY(repunicode) < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8101 Py_DECREF(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8102 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8103 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8104 repsize = PyUnicode_GET_LENGTH(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8105 data = PyUnicode_DATA(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8106 kind = PyUnicode_KIND(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8107 for (index = 0; index < repsize; index++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8108 Py_UCS4 repch = PyUnicode_READ(kind, data, index);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8109 x = charmapencode_output(repch, mapping, res, respos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8110 if (x==enc_EXCEPTION) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8111 Py_DECREF(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8112 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8113 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8114 else if (x==enc_FAILED) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8115 Py_DECREF(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8116 raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8117 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8118 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8119 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8120 *inpos = newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8121 Py_DECREF(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8122 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8123 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8124 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8125
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8126 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8127 _PyUnicode_EncodeCharmap(PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8128 PyObject *mapping,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8129 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8130 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8131 /* output object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8132 PyObject *res = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8133 /* current input position */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8134 Py_ssize_t inpos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8135 Py_ssize_t size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8136 /* current output position */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8137 Py_ssize_t respos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8138 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8139 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8140 /* the following variable is used for caching string comparisons
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8141 * -1=not initialized, 0=unknown, 1=strict, 2=replace,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8142 * 3=ignore, 4=xmlcharrefreplace */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8143 int known_errorHandler = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8144
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8145 if (PyUnicode_READY(unicode) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8146 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8147 size = PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8148
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8149 /* Default to Latin-1 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8150 if (mapping == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8151 return unicode_encode_ucs1(unicode, errors, 256);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8152
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8153 /* allocate enough for a simple encoding without
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8154 replacements, if we need more, we'll resize */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8155 res = PyBytes_FromStringAndSize(NULL, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8156 if (res == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8157 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8158 if (size == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8159 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8160
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8161 while (inpos<size) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8162 Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, inpos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8163 /* try to encode it */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8164 charmapencode_result x = charmapencode_output(ch, mapping, &res, &respos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8165 if (x==enc_EXCEPTION) /* error */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8166 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8167 if (x==enc_FAILED) { /* unencodable character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8168 if (charmap_encoding_error(unicode, &inpos, mapping,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8169 &exc,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8170 &known_errorHandler, &errorHandler, errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8171 &res, &respos)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8172 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8173 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8174 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8175 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8176 /* done with this character => adjust input position */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8177 ++inpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8178 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8179
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8180 /* Resize if we allocated to much */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8181 if (respos<PyBytes_GET_SIZE(res))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8182 if (_PyBytes_Resize(&res, respos) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8183 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8184
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8185 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8186 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8187 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8188
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8189 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8190 Py_XDECREF(res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8191 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8192 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8193 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8194 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8195
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8196 /* Deprecated */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8197 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8198 PyUnicode_EncodeCharmap(const Py_UNICODE *p,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8199 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8200 PyObject *mapping,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8201 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8202 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8203 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8204 PyObject *unicode = PyUnicode_FromUnicode(p, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8205 if (unicode == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8206 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8207 result = _PyUnicode_EncodeCharmap(unicode, mapping, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8208 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8209 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8210 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8211
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8212 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8213 PyUnicode_AsCharmapString(PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8214 PyObject *mapping)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8215 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8216 if (!PyUnicode_Check(unicode) || mapping == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8217 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8218 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8219 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8220 return _PyUnicode_EncodeCharmap(unicode, mapping, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8221 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8222
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8223 /* create or adjust a UnicodeTranslateError */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8224 static void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8225 make_translate_exception(PyObject **exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8226 PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8227 Py_ssize_t startpos, Py_ssize_t endpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8228 const char *reason)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8229 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8230 if (*exceptionObject == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8231 *exceptionObject = _PyUnicodeTranslateError_Create(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8232 unicode, startpos, endpos, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8233 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8234 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8235 if (PyUnicodeTranslateError_SetStart(*exceptionObject, startpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8236 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8237 if (PyUnicodeTranslateError_SetEnd(*exceptionObject, endpos))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8238 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8239 if (PyUnicodeTranslateError_SetReason(*exceptionObject, reason))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8240 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8241 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8242 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8243 Py_DECREF(*exceptionObject);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8244 *exceptionObject = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8245 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8246 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8247
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8248 /* raises a UnicodeTranslateError */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8249 static void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8250 raise_translate_exception(PyObject **exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8251 PyObject *unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8252 Py_ssize_t startpos, Py_ssize_t endpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8253 const char *reason)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8254 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8255 make_translate_exception(exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8256 unicode, startpos, endpos, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8257 if (*exceptionObject != NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8258 PyCodec_StrictErrors(*exceptionObject);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8259 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8260
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8261 /* error handling callback helper:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8262 build arguments, call the callback and check the arguments,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8263 put the result into newpos and return the replacement string, which
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8264 has to be freed by the caller */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8265 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8266 unicode_translate_call_errorhandler(const char *errors,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8267 PyObject **errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8268 const char *reason,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8269 PyObject *unicode, PyObject **exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8270 Py_ssize_t startpos, Py_ssize_t endpos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8271 Py_ssize_t *newpos)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8272 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8273 static char *argparse = "O!n;translating error handler must return (str, int) tuple";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8274
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8275 Py_ssize_t i_newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8276 PyObject *restuple;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8277 PyObject *resunicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8278
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8279 if (*errorHandler == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8280 *errorHandler = PyCodec_LookupError(errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8281 if (*errorHandler == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8282 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8283 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8284
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8285 make_translate_exception(exceptionObject,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8286 unicode, startpos, endpos, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8287 if (*exceptionObject == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8288 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8289
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8290 restuple = PyObject_CallFunctionObjArgs(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8291 *errorHandler, *exceptionObject, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8292 if (restuple == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8293 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8294 if (!PyTuple_Check(restuple)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8295 PyErr_SetString(PyExc_TypeError, &argparse[4]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8296 Py_DECREF(restuple);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8297 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8298 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8299 if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8300 &resunicode, &i_newpos)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8301 Py_DECREF(restuple);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8302 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8303 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8304 if (i_newpos<0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8305 *newpos = PyUnicode_GET_LENGTH(unicode)+i_newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8306 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8307 *newpos = i_newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8308 if (*newpos<0 || *newpos>PyUnicode_GET_LENGTH(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8309 PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8310 Py_DECREF(restuple);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8311 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8312 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8313 Py_INCREF(resunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8314 Py_DECREF(restuple);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8315 return resunicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8316 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8317
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8318 /* Lookup the character ch in the mapping and put the result in result,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8319 which must be decrefed by the caller.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8320 Return 0 on success, -1 on error */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8321 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8322 charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8323 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8324 PyObject *w = PyLong_FromLong((long)c);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8325 PyObject *x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8326
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8327 if (w == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8328 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8329 x = PyObject_GetItem(mapping, w);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8330 Py_DECREF(w);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8331 if (x == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8332 if (PyErr_ExceptionMatches(PyExc_LookupError)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8333 /* No mapping found means: use 1:1 mapping. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8334 PyErr_Clear();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8335 *result = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8336 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8337 } else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8338 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8339 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8340 else if (x == Py_None) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8341 *result = x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8342 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8343 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8344 else if (PyLong_Check(x)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8345 long value = PyLong_AS_LONG(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8346 long max = PyUnicode_GetMax();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8347 if (value < 0 || value > max) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8348 PyErr_Format(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8349 "character mapping must be in range(0x%x)", max+1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8350 Py_DECREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8351 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8352 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8353 *result = x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8354 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8355 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8356 else if (PyUnicode_Check(x)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8357 *result = x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8358 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8359 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8360 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8361 /* wrong return value */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8362 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8363 "character mapping must return integer, None or str");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8364 Py_DECREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8365 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8366 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8367 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8368 /* ensure that *outobj is at least requiredsize characters long,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8369 if not reallocate and adjust various state variables.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8370 Return 0 on success, -1 on error */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8371 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8372 charmaptranslate_makespace(Py_UCS4 **outobj, Py_ssize_t *psize,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8373 Py_ssize_t requiredsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8374 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8375 Py_ssize_t oldsize = *psize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8376 if (requiredsize > oldsize) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8377 /* exponentially overallocate to minimize reallocations */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8378 if (requiredsize < 2 * oldsize)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8379 requiredsize = 2 * oldsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8380 *outobj = PyMem_Realloc(*outobj, requiredsize * sizeof(Py_UCS4));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8381 if (*outobj == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8382 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8383 *psize = requiredsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8384 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8385 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8386 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8387 /* lookup the character, put the result in the output string and adjust
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8388 various state variables. Return a new reference to the object that
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8389 was put in the output buffer in *result, or Py_None, if the mapping was
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8390 undefined (in which case no character was written).
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8391 The called must decref result.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8392 Return 0 on success, -1 on error. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8393 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8394 charmaptranslate_output(PyObject *input, Py_ssize_t ipos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8395 PyObject *mapping, Py_UCS4 **output,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8396 Py_ssize_t *osize, Py_ssize_t *opos,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8397 PyObject **res)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8398 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8399 Py_UCS4 curinp = PyUnicode_READ_CHAR(input, ipos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8400 if (charmaptranslate_lookup(curinp, mapping, res))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8401 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8402 if (*res==NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8403 /* not found => default to 1:1 mapping */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8404 (*output)[(*opos)++] = curinp;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8405 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8406 else if (*res==Py_None)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8407 ;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8408 else if (PyLong_Check(*res)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8409 /* no overflow check, because we know that the space is enough */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8410 (*output)[(*opos)++] = (Py_UCS4)PyLong_AS_LONG(*res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8411 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8412 else if (PyUnicode_Check(*res)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8413 Py_ssize_t repsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8414 if (PyUnicode_READY(*res) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8415 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8416 repsize = PyUnicode_GET_LENGTH(*res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8417 if (repsize==1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8418 /* no overflow check, because we know that the space is enough */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8419 (*output)[(*opos)++] = PyUnicode_READ_CHAR(*res, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8420 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8421 else if (repsize!=0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8422 /* more than one character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8423 Py_ssize_t requiredsize = *opos +
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8424 (PyUnicode_GET_LENGTH(input) - ipos) +
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8425 repsize - 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8426 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8427 if (charmaptranslate_makespace(output, osize, requiredsize))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8428 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8429 for(i = 0; i < repsize; i++)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8430 (*output)[(*opos)++] = PyUnicode_READ_CHAR(*res, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8431 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8432 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8433 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8434 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8435 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8436 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8437
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8438 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8439 _PyUnicode_TranslateCharmap(PyObject *input,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8440 PyObject *mapping,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8441 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8442 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8443 /* input object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8444 char *idata;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8445 Py_ssize_t size, i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8446 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8447 /* output buffer */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8448 Py_UCS4 *output = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8449 Py_ssize_t osize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8450 PyObject *res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8451 /* current output position */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8452 Py_ssize_t opos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8453 char *reason = "character maps to <undefined>";
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8454 PyObject *errorHandler = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8455 PyObject *exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8456 /* the following variable is used for caching string comparisons
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8457 * -1=not initialized, 0=unknown, 1=strict, 2=replace,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8458 * 3=ignore, 4=xmlcharrefreplace */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8459 int known_errorHandler = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8460
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8461 if (mapping == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8462 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8463 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8464 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8465
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8466 if (PyUnicode_READY(input) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8467 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8468 idata = (char*)PyUnicode_DATA(input);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8469 kind = PyUnicode_KIND(input);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8470 size = PyUnicode_GET_LENGTH(input);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8471 i = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8472
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8473 if (size == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8474 Py_INCREF(input);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8475 return input;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8476 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8477
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8478 /* allocate enough for a simple 1:1 translation without
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8479 replacements, if we need more, we'll resize */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8480 osize = size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8481 output = PyMem_Malloc(osize * sizeof(Py_UCS4));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8482 opos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8483 if (output == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8484 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8485 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8486 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8487
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8488 while (i<size) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8489 /* try to encode it */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8490 PyObject *x = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8491 if (charmaptranslate_output(input, i, mapping,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8492 &output, &osize, &opos, &x)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8493 Py_XDECREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8494 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8495 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8496 Py_XDECREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8497 if (x!=Py_None) /* it worked => adjust input pointer */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8498 ++i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8499 else { /* untranslatable character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8500 PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8501 Py_ssize_t repsize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8502 Py_ssize_t newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8503 Py_ssize_t uni2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8504 /* startpos for collecting untranslatable chars */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8505 Py_ssize_t collstart = i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8506 Py_ssize_t collend = i+1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8507 Py_ssize_t coll;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8508
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8509 /* find all untranslatable characters */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8510 while (collend < size) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8511 if (charmaptranslate_lookup(PyUnicode_READ(kind,idata, collend), mapping, &x))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8512 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8513 Py_XDECREF(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8514 if (x!=Py_None)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8515 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8516 ++collend;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8517 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8518 /* cache callback name lookup
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8519 * (if not done yet, i.e. it's the first error) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8520 if (known_errorHandler==-1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8521 if ((errors==NULL) || (!strcmp(errors, "strict")))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8522 known_errorHandler = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8523 else if (!strcmp(errors, "replace"))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8524 known_errorHandler = 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8525 else if (!strcmp(errors, "ignore"))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8526 known_errorHandler = 3;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8527 else if (!strcmp(errors, "xmlcharrefreplace"))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8528 known_errorHandler = 4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8529 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8530 known_errorHandler = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8531 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8532 switch (known_errorHandler) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8533 case 1: /* strict */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8534 raise_translate_exception(&exc, input, collstart,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8535 collend, reason);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8536 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8537 case 2: /* replace */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8538 /* No need to check for space, this is a 1:1 replacement */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8539 for (coll = collstart; coll<collend; coll++)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8540 output[opos++] = '?';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8541 /* fall through */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8542 case 3: /* ignore */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8543 i = collend;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8544 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8545 case 4: /* xmlcharrefreplace */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8546 /* generate replacement (temporarily (mis)uses i) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8547 for (i = collstart; i < collend; ++i) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8548 char buffer[2+29+1+1];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8549 char *cp;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8550 sprintf(buffer, "&#%d;", PyUnicode_READ(kind, idata, i));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8551 if (charmaptranslate_makespace(&output, &osize,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8552 opos+strlen(buffer)+(size-collend)))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8553 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8554 for (cp = buffer; *cp; ++cp)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8555 output[opos++] = *cp;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8556 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8557 i = collend;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8558 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8559 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8560 repunicode = unicode_translate_call_errorhandler(errors, &errorHandler,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8561 reason, input, &exc,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8562 collstart, collend, &newpos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8563 if (repunicode == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8564 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8565 if (PyUnicode_READY(repunicode) < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8566 Py_DECREF(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8567 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8568 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8569 /* generate replacement */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8570 repsize = PyUnicode_GET_LENGTH(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8571 if (charmaptranslate_makespace(&output, &osize,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8572 opos+repsize+(size-collend))) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8573 Py_DECREF(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8574 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8575 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8576 for (uni2 = 0; repsize-->0; ++uni2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8577 output[opos++] = PyUnicode_READ_CHAR(repunicode, uni2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8578 i = newpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8579 Py_DECREF(repunicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8580 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8581 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8582 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8583 res = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, opos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8584 if (!res)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8585 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8586 PyMem_Free(output);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8587 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8588 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8589 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8590
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8591 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8592 PyMem_Free(output);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8593 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8594 Py_XDECREF(errorHandler);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8595 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8596 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8597
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8598 /* Deprecated. Use PyUnicode_Translate instead. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8599 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8600 PyUnicode_TranslateCharmap(const Py_UNICODE *p,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8601 Py_ssize_t size,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8602 PyObject *mapping,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8603 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8604 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8605 PyObject *unicode = PyUnicode_FromUnicode(p, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8606 if (!unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8607 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8608 return _PyUnicode_TranslateCharmap(unicode, mapping, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8609 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8610
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8611 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8612 PyUnicode_Translate(PyObject *str,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8613 PyObject *mapping,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8614 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8615 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8616 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8617
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8618 str = PyUnicode_FromObject(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8619 if (str == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8620 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8621 result = _PyUnicode_TranslateCharmap(str, mapping, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8622 Py_DECREF(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8623 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8624
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8625 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8626 Py_XDECREF(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8627 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8628 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8629
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8630 static Py_UCS4
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8631 fix_decimal_and_space_to_ascii(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8632 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8633 /* No need to call PyUnicode_READY(self) because this function is only
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8634 called as a callback from fixup() which does it already. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8635 const Py_ssize_t len = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8636 const int kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8637 void *data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8638 Py_UCS4 maxchar = 0, ch, fixed;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8639 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8640
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8641 for (i = 0; i < len; ++i) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8642 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8643 fixed = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8644 if (ch > 127) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8645 if (Py_UNICODE_ISSPACE(ch))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8646 fixed = ' ';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8647 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8648 const int decimal = Py_UNICODE_TODECIMAL(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8649 if (decimal >= 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8650 fixed = '0' + decimal;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8651 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8652 if (fixed != 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8653 if (fixed > maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8654 maxchar = fixed;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8655 PyUnicode_WRITE(kind, data, i, fixed);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8656 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8657 else if (ch > maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8658 maxchar = ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8659 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8660 else if (ch > maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8661 maxchar = ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8662 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8663
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8664 return maxchar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8665 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8666
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8667 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8668 _PyUnicode_TransformDecimalAndSpaceToASCII(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8669 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8670 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8671 PyErr_BadInternalCall();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8672 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8673 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8674 if (PyUnicode_READY(unicode) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8675 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8676 if (PyUnicode_MAX_CHAR_VALUE(unicode) <= 127) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8677 /* If the string is already ASCII, just return the same string */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8678 Py_INCREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8679 return unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8680 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8681 return fixup(unicode, fix_decimal_and_space_to_ascii);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8682 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8683
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8684 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8685 PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8686 Py_ssize_t length)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8687 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8688 PyObject *decimal;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8689 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8690 Py_UCS4 maxchar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8691 enum PyUnicode_Kind kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8692 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8693
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8694 maxchar = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8695 for (i = 0; i < length; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8696 Py_UNICODE ch = s[i];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8697 if (ch > 127) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8698 int decimal = Py_UNICODE_TODECIMAL(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8699 if (decimal >= 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8700 ch = '0' + decimal;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8701 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8702 maxchar = Py_MAX(maxchar, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8703 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8704
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8705 /* Copy to a new string */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8706 decimal = PyUnicode_New(length, maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8707 if (decimal == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8708 return decimal;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8709 kind = PyUnicode_KIND(decimal);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8710 data = PyUnicode_DATA(decimal);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8711 /* Iterate over code points */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8712 for (i = 0; i < length; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8713 Py_UNICODE ch = s[i];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8714 if (ch > 127) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8715 int decimal = Py_UNICODE_TODECIMAL(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8716 if (decimal >= 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8717 ch = '0' + decimal;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8718 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8719 PyUnicode_WRITE(kind, data, i, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8720 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8721 return unicode_result(decimal);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8722 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8723 /* --- Decimal Encoder ---------------------------------------------------- */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8724
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8725 int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8726 PyUnicode_EncodeDecimal(Py_UNICODE *s,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8727 Py_ssize_t length,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8728 char *output,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8729 const char *errors)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8730 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8731 PyObject *unicode;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8732 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8733 enum PyUnicode_Kind kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8734 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8735
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8736 if (output == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8737 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8738 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8739 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8740
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8741 unicode = PyUnicode_FromUnicode(s, length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8742 if (unicode == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8743 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8744
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8745 if (PyUnicode_READY(unicode) < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8746 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8747 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8748 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8749 kind = PyUnicode_KIND(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8750 data = PyUnicode_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8751
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8752 for (i=0; i < length; ) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8753 PyObject *exc;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8754 Py_UCS4 ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8755 int decimal;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8756 Py_ssize_t startpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8757
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8758 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8759
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8760 if (Py_UNICODE_ISSPACE(ch)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8761 *output++ = ' ';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8762 i++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8763 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8764 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8765 decimal = Py_UNICODE_TODECIMAL(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8766 if (decimal >= 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8767 *output++ = '0' + decimal;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8768 i++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8769 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8770 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8771 if (0 < ch && ch < 256) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8772 *output++ = (char)ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8773 i++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8774 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8775 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8776
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8777 startpos = i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8778 exc = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8779 raise_encode_exception(&exc, "decimal", unicode,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8780 startpos, startpos+1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8781 "invalid decimal Unicode string");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8782 Py_XDECREF(exc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8783 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8784 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8785 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8786 /* 0-terminate the output string */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8787 *output++ = '\0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8788 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8789 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8790 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8791
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8792 /* --- Helpers ------------------------------------------------------------ */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8793
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8794 static Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8795 any_find_slice(int direction, PyObject* s1, PyObject* s2,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8796 Py_ssize_t start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8797 Py_ssize_t end)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8798 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8799 int kind1, kind2, kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8800 void *buf1, *buf2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8801 Py_ssize_t len1, len2, result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8802
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8803 kind1 = PyUnicode_KIND(s1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8804 kind2 = PyUnicode_KIND(s2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8805 kind = kind1 > kind2 ? kind1 : kind2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8806 buf1 = PyUnicode_DATA(s1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8807 buf2 = PyUnicode_DATA(s2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8808 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8809 buf1 = _PyUnicode_AsKind(s1, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8810 if (!buf1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8811 return -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8812 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8813 buf2 = _PyUnicode_AsKind(s2, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8814 if (!buf2) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8815 if (kind1 != kind) PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8816 return -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8817 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8818 len1 = PyUnicode_GET_LENGTH(s1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8819 len2 = PyUnicode_GET_LENGTH(s2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8820
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8821 if (direction > 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8822 switch(kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8823 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8824 if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8825 result = asciilib_find_slice(buf1, len1, buf2, len2, start, end);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8826 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8827 result = ucs1lib_find_slice(buf1, len1, buf2, len2, start, end);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8828 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8829 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8830 result = ucs2lib_find_slice(buf1, len1, buf2, len2, start, end);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8831 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8832 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8833 result = ucs4lib_find_slice(buf1, len1, buf2, len2, start, end);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8834 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8835 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8836 assert(0); result = -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8837 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8838 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8839 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8840 switch(kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8841 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8842 if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8843 result = asciilib_rfind_slice(buf1, len1, buf2, len2, start, end);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8844 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8845 result = ucs1lib_rfind_slice(buf1, len1, buf2, len2, start, end);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8846 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8847 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8848 result = ucs2lib_rfind_slice(buf1, len1, buf2, len2, start, end);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8849 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8850 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8851 result = ucs4lib_rfind_slice(buf1, len1, buf2, len2, start, end);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8852 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8853 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8854 assert(0); result = -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8855 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8856 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8857
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8858 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8859 PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8860 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8861 PyMem_Free(buf2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8862
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8863 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8864 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8865
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8866 Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8867 _PyUnicode_InsertThousandsGrouping(PyObject *unicode, int kind, void *data,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8868 Py_ssize_t n_buffer,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8869 void *digits, Py_ssize_t n_digits,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8870 Py_ssize_t min_width,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8871 const char *grouping,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8872 const char *thousands_sep)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8873 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8874 switch(kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8875 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8876 if (unicode != NULL && PyUnicode_IS_ASCII(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8877 return _PyUnicode_ascii_InsertThousandsGrouping(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8878 (Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8879 min_width, grouping, thousands_sep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8880 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8881 return _PyUnicode_ucs1_InsertThousandsGrouping(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8882 (Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8883 min_width, grouping, thousands_sep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8884 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8885 return _PyUnicode_ucs2_InsertThousandsGrouping(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8886 (Py_UCS2*)data, n_buffer, (Py_UCS2*)digits, n_digits,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8887 min_width, grouping, thousands_sep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8888 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8889 return _PyUnicode_ucs4_InsertThousandsGrouping(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8890 (Py_UCS4*)data, n_buffer, (Py_UCS4*)digits, n_digits,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8891 min_width, grouping, thousands_sep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8892 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8893 assert(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8894 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8895 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8896
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8897
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8898 /* helper macro to fixup start/end slice values */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8899 #define ADJUST_INDICES(start, end, len) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8900 if (end > len) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8901 end = len; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8902 else if (end < 0) { \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8903 end += len; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8904 if (end < 0) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8905 end = 0; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8906 } \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8907 if (start < 0) { \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8908 start += len; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8909 if (start < 0) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8910 start = 0; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8911 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8912
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8913 Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8914 PyUnicode_Count(PyObject *str,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8915 PyObject *substr,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8916 Py_ssize_t start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8917 Py_ssize_t end)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8918 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8919 Py_ssize_t result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8920 PyObject* str_obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8921 PyObject* sub_obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8922 int kind1, kind2, kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8923 void *buf1 = NULL, *buf2 = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8924 Py_ssize_t len1, len2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8925
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8926 str_obj = PyUnicode_FromObject(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8927 if (!str_obj || PyUnicode_READY(str_obj) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8928 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8929 sub_obj = PyUnicode_FromObject(substr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8930 if (!sub_obj || PyUnicode_READY(sub_obj) == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8931 Py_DECREF(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8932 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8933 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8934
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8935 kind1 = PyUnicode_KIND(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8936 kind2 = PyUnicode_KIND(sub_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8937 kind = kind1 > kind2 ? kind1 : kind2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8938 buf1 = PyUnicode_DATA(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8939 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8940 buf1 = _PyUnicode_AsKind(str_obj, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8941 if (!buf1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8942 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8943 buf2 = PyUnicode_DATA(sub_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8944 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8945 buf2 = _PyUnicode_AsKind(sub_obj, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8946 if (!buf2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8947 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8948 len1 = PyUnicode_GET_LENGTH(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8949 len2 = PyUnicode_GET_LENGTH(sub_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8950
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8951 ADJUST_INDICES(start, end, len1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8952 switch(kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8953 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8954 if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sub_obj))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8955 result = asciilib_count(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8956 ((Py_UCS1*)buf1) + start, end - start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8957 buf2, len2, PY_SSIZE_T_MAX
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8958 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8959 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8960 result = ucs1lib_count(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8961 ((Py_UCS1*)buf1) + start, end - start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8962 buf2, len2, PY_SSIZE_T_MAX
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8963 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8964 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8965 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8966 result = ucs2lib_count(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8967 ((Py_UCS2*)buf1) + start, end - start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8968 buf2, len2, PY_SSIZE_T_MAX
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8969 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8970 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8971 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8972 result = ucs4lib_count(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8973 ((Py_UCS4*)buf1) + start, end - start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8974 buf2, len2, PY_SSIZE_T_MAX
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8975 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8976 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8977 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8978 assert(0); result = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8979 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8980
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8981 Py_DECREF(sub_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8982 Py_DECREF(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8983
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8984 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8985 PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8986 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8987 PyMem_Free(buf2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8988
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8989 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8990 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8991 Py_DECREF(sub_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8992 Py_DECREF(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8993 if (kind1 != kind && buf1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8994 PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8995 if (kind2 != kind && buf2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8996 PyMem_Free(buf2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8997 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8998 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
8999
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9000 Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9001 PyUnicode_Find(PyObject *str,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9002 PyObject *sub,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9003 Py_ssize_t start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9004 Py_ssize_t end,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9005 int direction)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9006 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9007 Py_ssize_t result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9008
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9009 str = PyUnicode_FromObject(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9010 if (!str || PyUnicode_READY(str) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9011 return -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9012 sub = PyUnicode_FromObject(sub);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9013 if (!sub || PyUnicode_READY(sub) == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9014 Py_DECREF(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9015 return -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9016 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9017
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9018 result = any_find_slice(direction,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9019 str, sub, start, end
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9020 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9021
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9022 Py_DECREF(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9023 Py_DECREF(sub);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9024
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9025 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9026 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9027
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9028 Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9029 PyUnicode_FindChar(PyObject *str, Py_UCS4 ch,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9030 Py_ssize_t start, Py_ssize_t end,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9031 int direction)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9032 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9033 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9034 Py_ssize_t result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9035 if (PyUnicode_READY(str) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9036 return -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9037 if (start < 0 || end < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9038 PyErr_SetString(PyExc_IndexError, "string index out of range");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9039 return -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9040 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9041 if (end > PyUnicode_GET_LENGTH(str))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9042 end = PyUnicode_GET_LENGTH(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9043 kind = PyUnicode_KIND(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9044 result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9045 kind, end-start, ch, direction);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9046 if (result == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9047 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9048 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9049 return start + result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9050 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9051
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9052 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9053 tailmatch(PyObject *self,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9054 PyObject *substring,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9055 Py_ssize_t start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9056 Py_ssize_t end,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9057 int direction)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9058 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9059 int kind_self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9060 int kind_sub;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9061 void *data_self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9062 void *data_sub;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9063 Py_ssize_t offset;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9064 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9065 Py_ssize_t end_sub;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9066
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9067 if (PyUnicode_READY(self) == -1 ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9068 PyUnicode_READY(substring) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9069 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9070
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9071 if (PyUnicode_GET_LENGTH(substring) == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9072 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9073
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9074 ADJUST_INDICES(start, end, PyUnicode_GET_LENGTH(self));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9075 end -= PyUnicode_GET_LENGTH(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9076 if (end < start)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9077 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9078
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9079 kind_self = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9080 data_self = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9081 kind_sub = PyUnicode_KIND(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9082 data_sub = PyUnicode_DATA(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9083 end_sub = PyUnicode_GET_LENGTH(substring) - 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9084
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9085 if (direction > 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9086 offset = end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9087 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9088 offset = start;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9089
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9090 if (PyUnicode_READ(kind_self, data_self, offset) ==
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9091 PyUnicode_READ(kind_sub, data_sub, 0) &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9092 PyUnicode_READ(kind_self, data_self, offset + end_sub) ==
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9093 PyUnicode_READ(kind_sub, data_sub, end_sub)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9094 /* If both are of the same kind, memcmp is sufficient */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9095 if (kind_self == kind_sub) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9096 return ! memcmp((char *)data_self +
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9097 (offset * PyUnicode_KIND(substring)),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9098 data_sub,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9099 PyUnicode_GET_LENGTH(substring) *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9100 PyUnicode_KIND(substring));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9101 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9102 /* otherwise we have to compare each character by first accesing it */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9103 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9104 /* We do not need to compare 0 and len(substring)-1 because
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9105 the if statement above ensured already that they are equal
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9106 when we end up here. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9107 // TODO: honor direction and do a forward or backwards search
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9108 for (i = 1; i < end_sub; ++i) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9109 if (PyUnicode_READ(kind_self, data_self, offset + i) !=
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9110 PyUnicode_READ(kind_sub, data_sub, i))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9111 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9112 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9113 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9114 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9115 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9116
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9117 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9118 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9119
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9120 Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9121 PyUnicode_Tailmatch(PyObject *str,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9122 PyObject *substr,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9123 Py_ssize_t start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9124 Py_ssize_t end,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9125 int direction)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9126 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9127 Py_ssize_t result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9128
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9129 str = PyUnicode_FromObject(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9130 if (str == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9131 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9132 substr = PyUnicode_FromObject(substr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9133 if (substr == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9134 Py_DECREF(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9135 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9136 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9137
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9138 result = tailmatch(str, substr,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9139 start, end, direction);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9140 Py_DECREF(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9141 Py_DECREF(substr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9142 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9143 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9144
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9145 /* Apply fixfct filter to the Unicode object self and return a
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9146 reference to the modified object */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9147
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9148 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9149 fixup(PyObject *self,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9150 Py_UCS4 (*fixfct)(PyObject *s))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9151 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9152 PyObject *u;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9153 Py_UCS4 maxchar_old, maxchar_new = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9154
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9155 u = PyUnicode_Copy(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9156 if (u == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9157 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9158 maxchar_old = PyUnicode_MAX_CHAR_VALUE(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9159
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9160 /* fix functions return the new maximum character in a string,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9161 if the kind of the resulting unicode object does not change,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9162 everything is fine. Otherwise we need to change the string kind
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9163 and re-run the fix function. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9164 maxchar_new = fixfct(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9165 if (maxchar_new == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9166 /* do nothing, keep maxchar_new at 0 which means no changes. */;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9167 else if (maxchar_new <= 127)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9168 maxchar_new = 127;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9169 else if (maxchar_new <= 255)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9170 maxchar_new = 255;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9171 else if (maxchar_new <= 65535)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9172 maxchar_new = 65535;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9173 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9174 maxchar_new = MAX_UNICODE;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9175
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9176 if (!maxchar_new && PyUnicode_CheckExact(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9177 /* fixfct should return TRUE if it modified the buffer. If
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9178 FALSE, return a reference to the original buffer instead
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9179 (to save space, not time) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9180 Py_INCREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9181 Py_DECREF(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9182 return self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9183 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9184 else if (maxchar_new == maxchar_old) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9185 return u;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9186 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9187 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9188 /* In case the maximum character changed, we need to
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9189 convert the string to the new category. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9190 PyObject *v = PyUnicode_New(PyUnicode_GET_LENGTH(self), maxchar_new);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9191 if (v == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9192 Py_DECREF(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9193 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9194 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9195 if (maxchar_new > maxchar_old) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9196 /* If the maxchar increased so that the kind changed, not all
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9197 characters are representable anymore and we need to fix the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9198 string again. This only happens in very few cases. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9199 copy_characters(v, 0, self, 0, PyUnicode_GET_LENGTH(self));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9200 maxchar_old = fixfct(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9201 assert(maxchar_old > 0 && maxchar_old <= maxchar_new);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9202 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9203 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9204 copy_characters(v, 0, u, 0, PyUnicode_GET_LENGTH(self));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9205 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9206
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9207 Py_DECREF(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9208 assert(_PyUnicode_CheckConsistency(v, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9209 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9210 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9211 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9212
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9213 static Py_UCS4
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9214 fixupper(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9215 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9216 /* No need to call PyUnicode_READY(self) because this function is only
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9217 called as a callback from fixup() which does it already. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9218 const Py_ssize_t len = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9219 const int kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9220 void *data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9221 int touched = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9222 Py_UCS4 maxchar = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9223 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9224
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9225 for (i = 0; i < len; ++i) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9226 const Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9227 const Py_UCS4 up = Py_UNICODE_TOUPPER(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9228 if (up != ch) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9229 if (up > maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9230 maxchar = up;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9231 PyUnicode_WRITE(kind, data, i, up);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9232 touched = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9233 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9234 else if (ch > maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9235 maxchar = ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9236 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9237
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9238 if (touched)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9239 return maxchar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9240 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9241 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9242 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9243
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9244 static Py_UCS4
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9245 fixlower(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9246 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9247 /* No need to call PyUnicode_READY(self) because fixup() which does it. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9248 const Py_ssize_t len = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9249 const int kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9250 void *data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9251 int touched = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9252 Py_UCS4 maxchar = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9253 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9254
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9255 for(i = 0; i < len; ++i) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9256 const Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9257 const Py_UCS4 lo = Py_UNICODE_TOLOWER(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9258 if (lo != ch) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9259 if (lo > maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9260 maxchar = lo;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9261 PyUnicode_WRITE(kind, data, i, lo);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9262 touched = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9263 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9264 else if (ch > maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9265 maxchar = ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9266 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9267
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9268 if (touched)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9269 return maxchar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9270 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9271 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9272 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9273
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9274 static Py_UCS4
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9275 fixswapcase(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9276 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9277 /* No need to call PyUnicode_READY(self) because fixup() which does it. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9278 const Py_ssize_t len = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9279 const int kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9280 void *data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9281 int touched = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9282 Py_UCS4 maxchar = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9283 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9284
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9285 for(i = 0; i < len; ++i) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9286 const Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9287 Py_UCS4 nu = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9288
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9289 if (Py_UNICODE_ISUPPER(ch))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9290 nu = Py_UNICODE_TOLOWER(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9291 else if (Py_UNICODE_ISLOWER(ch))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9292 nu = Py_UNICODE_TOUPPER(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9293
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9294 if (nu != 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9295 if (nu > maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9296 maxchar = nu;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9297 PyUnicode_WRITE(kind, data, i, nu);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9298 touched = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9299 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9300 else if (ch > maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9301 maxchar = ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9302 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9303
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9304 if (touched)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9305 return maxchar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9306 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9307 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9308 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9309
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9310 static Py_UCS4
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9311 fixcapitalize(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9312 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9313 /* No need to call PyUnicode_READY(self) because fixup() which does it. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9314 const Py_ssize_t len = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9315 const int kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9316 void *data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9317 int touched = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9318 Py_UCS4 maxchar = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9319 Py_ssize_t i = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9320 Py_UCS4 ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9321
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9322 if (len == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9323 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9324
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9325 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9326 if (!Py_UNICODE_ISUPPER(ch)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9327 maxchar = Py_UNICODE_TOUPPER(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9328 PyUnicode_WRITE(kind, data, i, maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9329 touched = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9330 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9331 ++i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9332 for(; i < len; ++i) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9333 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9334 if (!Py_UNICODE_ISLOWER(ch)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9335 const Py_UCS4 lo = Py_UNICODE_TOLOWER(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9336 if (lo > maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9337 maxchar = lo;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9338 PyUnicode_WRITE(kind, data, i, lo);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9339 touched = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9340 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9341 else if (ch > maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9342 maxchar = ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9343 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9344
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9345 if (touched)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9346 return maxchar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9347 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9348 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9349 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9350
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9351 static Py_UCS4
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9352 fixtitle(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9353 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9354 /* No need to call PyUnicode_READY(self) because fixup() which does it. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9355 const Py_ssize_t len = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9356 const int kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9357 void *data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9358 Py_UCS4 maxchar = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9359 Py_ssize_t i = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9360 int previous_is_cased;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9361
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9362 /* Shortcut for single character strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9363 if (len == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9364 const Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9365 const Py_UCS4 ti = Py_UNICODE_TOTITLE(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9366 if (ti != ch) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9367 PyUnicode_WRITE(kind, data, i, ti);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9368 return ti;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9369 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9370 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9371 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9372 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9373 previous_is_cased = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9374 for(; i < len; ++i) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9375 const Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9376 Py_UCS4 nu;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9377
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9378 if (previous_is_cased)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9379 nu = Py_UNICODE_TOLOWER(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9380 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9381 nu = Py_UNICODE_TOTITLE(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9382
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9383 if (nu > maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9384 maxchar = nu;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9385 PyUnicode_WRITE(kind, data, i, nu);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9386
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9387 if (Py_UNICODE_ISLOWER(ch) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9388 Py_UNICODE_ISUPPER(ch) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9389 Py_UNICODE_ISTITLE(ch))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9390 previous_is_cased = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9391 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9392 previous_is_cased = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9393 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9394 return maxchar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9395 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9396
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9397 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9398 PyUnicode_Join(PyObject *separator, PyObject *seq)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9399 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9400 PyObject *sep = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9401 Py_ssize_t seplen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9402 PyObject *res = NULL; /* the result */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9403 PyObject *fseq; /* PySequence_Fast(seq) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9404 Py_ssize_t seqlen; /* len(fseq) -- number of items in sequence */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9405 PyObject **items;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9406 PyObject *item;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9407 Py_ssize_t sz, i, res_offset;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9408 Py_UCS4 maxchar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9409 Py_UCS4 item_maxchar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9410 int use_memcpy;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9411 unsigned char *res_data = NULL, *sep_data = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9412 PyObject *last_obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9413 unsigned int kind = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9414
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9415 fseq = PySequence_Fast(seq, "");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9416 if (fseq == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9417 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9418 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9419
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9420 /* NOTE: the following code can't call back into Python code,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9421 * so we are sure that fseq won't be mutated.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9422 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9423
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9424 seqlen = PySequence_Fast_GET_SIZE(fseq);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9425 /* If empty sequence, return u"". */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9426 if (seqlen == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9427 Py_DECREF(fseq);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9428 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9429 res = unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9430 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9431 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9432
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9433 /* If singleton sequence with an exact Unicode, return that. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9434 last_obj = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9435 items = PySequence_Fast_ITEMS(fseq);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9436 if (seqlen == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9437 if (PyUnicode_CheckExact(items[0])) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9438 res = items[0];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9439 Py_INCREF(res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9440 Py_DECREF(fseq);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9441 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9442 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9443 seplen = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9444 maxchar = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9445 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9446 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9447 /* Set up sep and seplen */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9448 if (separator == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9449 /* fall back to a blank space separator */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9450 sep = PyUnicode_FromOrdinal(' ');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9451 if (!sep)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9452 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9453 seplen = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9454 maxchar = 32;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9455 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9456 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9457 if (!PyUnicode_Check(separator)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9458 PyErr_Format(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9459 "separator: expected str instance,"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9460 " %.80s found",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9461 Py_TYPE(separator)->tp_name);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9462 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9463 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9464 if (PyUnicode_READY(separator))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9465 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9466 sep = separator;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9467 seplen = PyUnicode_GET_LENGTH(separator);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9468 maxchar = PyUnicode_MAX_CHAR_VALUE(separator);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9469 /* inc refcount to keep this code path symmetric with the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9470 above case of a blank separator */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9471 Py_INCREF(sep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9472 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9473 last_obj = sep;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9474 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9475
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9476 /* There are at least two things to join, or else we have a subclass
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9477 * of str in the sequence.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9478 * Do a pre-pass to figure out the total amount of space we'll
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9479 * need (sz), and see whether all argument are strings.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9480 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9481 sz = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9482 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9483 use_memcpy = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9484 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9485 use_memcpy = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9486 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9487 for (i = 0; i < seqlen; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9488 const Py_ssize_t old_sz = sz;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9489 item = items[i];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9490 if (!PyUnicode_Check(item)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9491 PyErr_Format(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9492 "sequence item %zd: expected str instance,"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9493 " %.80s found",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9494 i, Py_TYPE(item)->tp_name);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9495 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9496 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9497 if (PyUnicode_READY(item) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9498 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9499 sz += PyUnicode_GET_LENGTH(item);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9500 item_maxchar = PyUnicode_MAX_CHAR_VALUE(item);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9501 maxchar = Py_MAX(maxchar, item_maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9502 if (i != 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9503 sz += seplen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9504 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9505 PyErr_SetString(PyExc_OverflowError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9506 "join() result is too long for a Python string");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9507 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9508 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9509 if (use_memcpy && last_obj != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9510 if (PyUnicode_KIND(last_obj) != PyUnicode_KIND(item))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9511 use_memcpy = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9512 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9513 last_obj = item;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9514 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9515
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9516 res = PyUnicode_New(sz, maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9517 if (res == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9518 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9519
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9520 /* Catenate everything. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9521 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9522 use_memcpy = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9523 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9524 if (use_memcpy) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9525 res_data = PyUnicode_1BYTE_DATA(res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9526 kind = PyUnicode_KIND(res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9527 if (seplen != 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9528 sep_data = PyUnicode_1BYTE_DATA(sep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9529 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9530 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9531 for (i = 0, res_offset = 0; i < seqlen; ++i) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9532 Py_ssize_t itemlen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9533 item = items[i];
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9534 /* Copy item, and maybe the separator. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9535 if (i && seplen != 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9536 if (use_memcpy) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9537 Py_MEMCPY(res_data,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9538 sep_data,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9539 kind * seplen);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9540 res_data += kind * seplen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9541 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9542 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9543 copy_characters(res, res_offset, sep, 0, seplen);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9544 res_offset += seplen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9545 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9546 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9547 itemlen = PyUnicode_GET_LENGTH(item);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9548 if (itemlen != 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9549 if (use_memcpy) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9550 Py_MEMCPY(res_data,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9551 PyUnicode_DATA(item),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9552 kind * itemlen);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9553 res_data += kind * itemlen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9554 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9555 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9556 copy_characters(res, res_offset, item, 0, itemlen);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9557 res_offset += itemlen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9558 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9559 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9560 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9561 if (use_memcpy)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9562 assert(res_data == PyUnicode_1BYTE_DATA(res)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9563 + kind * PyUnicode_GET_LENGTH(res));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9564 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9565 assert(res_offset == PyUnicode_GET_LENGTH(res));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9566
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9567 Py_DECREF(fseq);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9568 Py_XDECREF(sep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9569 assert(_PyUnicode_CheckConsistency(res, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9570 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9571
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9572 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9573 Py_DECREF(fseq);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9574 Py_XDECREF(sep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9575 Py_XDECREF(res);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9576 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9577 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9578
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9579 #define FILL(kind, data, value, start, length) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9580 do { \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9581 Py_ssize_t i_ = 0; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9582 assert(kind != PyUnicode_WCHAR_KIND); \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9583 switch ((kind)) { \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9584 case PyUnicode_1BYTE_KIND: { \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9585 unsigned char * to_ = (unsigned char *)((data)) + (start); \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9586 memset(to_, (unsigned char)value, length); \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9587 break; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9588 } \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9589 case PyUnicode_2BYTE_KIND: { \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9590 Py_UCS2 * to_ = (Py_UCS2 *)((data)) + (start); \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9591 for (; i_ < (length); ++i_, ++to_) *to_ = (value); \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9592 break; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9593 } \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9594 default: { \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9595 Py_UCS4 * to_ = (Py_UCS4 *)((data)) + (start); \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9596 for (; i_ < (length); ++i_, ++to_) *to_ = (value); \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9597 break; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9598 } \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9599 } \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9600 } while (0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9601
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9602 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9603 pad(PyObject *self,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9604 Py_ssize_t left,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9605 Py_ssize_t right,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9606 Py_UCS4 fill)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9607 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9608 PyObject *u;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9609 Py_UCS4 maxchar;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9610 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9611 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9612
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9613 if (left < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9614 left = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9615 if (right < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9616 right = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9617
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9618 if (left == 0 && right == 0 && PyUnicode_CheckExact(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9619 Py_INCREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9620 return self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9621 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9622
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9623 if (left > PY_SSIZE_T_MAX - _PyUnicode_LENGTH(self) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9624 right > PY_SSIZE_T_MAX - (left + _PyUnicode_LENGTH(self))) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9625 PyErr_SetString(PyExc_OverflowError, "padded string is too long");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9626 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9627 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9628 maxchar = PyUnicode_MAX_CHAR_VALUE(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9629 if (fill > maxchar)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9630 maxchar = fill;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9631 u = PyUnicode_New(left + _PyUnicode_LENGTH(self) + right, maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9632 if (!u)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9633 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9634
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9635 kind = PyUnicode_KIND(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9636 data = PyUnicode_DATA(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9637 if (left)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9638 FILL(kind, data, fill, 0, left);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9639 if (right)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9640 FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9641 copy_characters(u, left, self, 0, _PyUnicode_LENGTH(self));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9642 assert(_PyUnicode_CheckConsistency(u, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9643 return u;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9644 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9645 #undef FILL
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9646
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9647 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9648 PyUnicode_Splitlines(PyObject *string, int keepends)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9649 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9650 PyObject *list;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9651
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9652 string = PyUnicode_FromObject(string);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9653 if (string == NULL || PyUnicode_READY(string) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9654 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9655
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9656 switch(PyUnicode_KIND(string)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9657 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9658 if (PyUnicode_IS_ASCII(string))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9659 list = asciilib_splitlines(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9660 string, PyUnicode_1BYTE_DATA(string),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9661 PyUnicode_GET_LENGTH(string), keepends);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9662 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9663 list = ucs1lib_splitlines(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9664 string, PyUnicode_1BYTE_DATA(string),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9665 PyUnicode_GET_LENGTH(string), keepends);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9666 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9667 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9668 list = ucs2lib_splitlines(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9669 string, PyUnicode_2BYTE_DATA(string),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9670 PyUnicode_GET_LENGTH(string), keepends);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9671 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9672 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9673 list = ucs4lib_splitlines(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9674 string, PyUnicode_4BYTE_DATA(string),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9675 PyUnicode_GET_LENGTH(string), keepends);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9676 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9677 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9678 assert(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9679 list = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9680 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9681 Py_DECREF(string);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9682 return list;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9683 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9684
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9685 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9686 split(PyObject *self,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9687 PyObject *substring,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9688 Py_ssize_t maxcount)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9689 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9690 int kind1, kind2, kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9691 void *buf1, *buf2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9692 Py_ssize_t len1, len2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9693 PyObject* out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9694
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9695 if (maxcount < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9696 maxcount = PY_SSIZE_T_MAX;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9697
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9698 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9699 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9700
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9701 if (substring == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9702 switch(PyUnicode_KIND(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9703 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9704 if (PyUnicode_IS_ASCII(self))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9705 return asciilib_split_whitespace(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9706 self, PyUnicode_1BYTE_DATA(self),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9707 PyUnicode_GET_LENGTH(self), maxcount
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9708 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9709 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9710 return ucs1lib_split_whitespace(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9711 self, PyUnicode_1BYTE_DATA(self),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9712 PyUnicode_GET_LENGTH(self), maxcount
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9713 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9714 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9715 return ucs2lib_split_whitespace(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9716 self, PyUnicode_2BYTE_DATA(self),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9717 PyUnicode_GET_LENGTH(self), maxcount
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9718 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9719 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9720 return ucs4lib_split_whitespace(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9721 self, PyUnicode_4BYTE_DATA(self),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9722 PyUnicode_GET_LENGTH(self), maxcount
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9723 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9724 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9725 assert(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9726 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9727 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9728
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9729 if (PyUnicode_READY(substring) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9730 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9731
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9732 kind1 = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9733 kind2 = PyUnicode_KIND(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9734 kind = kind1 > kind2 ? kind1 : kind2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9735 buf1 = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9736 buf2 = PyUnicode_DATA(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9737 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9738 buf1 = _PyUnicode_AsKind(self, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9739 if (!buf1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9740 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9741 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9742 buf2 = _PyUnicode_AsKind(substring, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9743 if (!buf2) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9744 if (kind1 != kind) PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9745 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9746 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9747 len1 = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9748 len2 = PyUnicode_GET_LENGTH(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9749
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9750 switch(kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9751 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9752 if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9753 out = asciilib_split(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9754 self, buf1, len1, buf2, len2, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9755 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9756 out = ucs1lib_split(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9757 self, buf1, len1, buf2, len2, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9758 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9759 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9760 out = ucs2lib_split(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9761 self, buf1, len1, buf2, len2, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9762 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9763 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9764 out = ucs4lib_split(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9765 self, buf1, len1, buf2, len2, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9766 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9767 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9768 out = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9769 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9770 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9771 PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9772 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9773 PyMem_Free(buf2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9774 return out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9775 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9776
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9777 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9778 rsplit(PyObject *self,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9779 PyObject *substring,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9780 Py_ssize_t maxcount)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9781 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9782 int kind1, kind2, kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9783 void *buf1, *buf2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9784 Py_ssize_t len1, len2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9785 PyObject* out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9786
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9787 if (maxcount < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9788 maxcount = PY_SSIZE_T_MAX;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9789
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9790 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9791 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9792
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9793 if (substring == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9794 switch(PyUnicode_KIND(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9795 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9796 if (PyUnicode_IS_ASCII(self))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9797 return asciilib_rsplit_whitespace(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9798 self, PyUnicode_1BYTE_DATA(self),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9799 PyUnicode_GET_LENGTH(self), maxcount
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9800 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9801 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9802 return ucs1lib_rsplit_whitespace(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9803 self, PyUnicode_1BYTE_DATA(self),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9804 PyUnicode_GET_LENGTH(self), maxcount
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9805 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9806 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9807 return ucs2lib_rsplit_whitespace(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9808 self, PyUnicode_2BYTE_DATA(self),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9809 PyUnicode_GET_LENGTH(self), maxcount
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9810 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9811 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9812 return ucs4lib_rsplit_whitespace(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9813 self, PyUnicode_4BYTE_DATA(self),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9814 PyUnicode_GET_LENGTH(self), maxcount
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9815 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9816 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9817 assert(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9818 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9819 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9820
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9821 if (PyUnicode_READY(substring) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9822 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9823
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9824 kind1 = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9825 kind2 = PyUnicode_KIND(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9826 kind = kind1 > kind2 ? kind1 : kind2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9827 buf1 = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9828 buf2 = PyUnicode_DATA(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9829 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9830 buf1 = _PyUnicode_AsKind(self, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9831 if (!buf1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9832 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9833 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9834 buf2 = _PyUnicode_AsKind(substring, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9835 if (!buf2) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9836 if (kind1 != kind) PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9837 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9838 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9839 len1 = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9840 len2 = PyUnicode_GET_LENGTH(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9841
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9842 switch(kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9843 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9844 if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9845 out = asciilib_rsplit(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9846 self, buf1, len1, buf2, len2, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9847 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9848 out = ucs1lib_rsplit(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9849 self, buf1, len1, buf2, len2, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9850 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9851 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9852 out = ucs2lib_rsplit(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9853 self, buf1, len1, buf2, len2, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9854 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9855 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9856 out = ucs4lib_rsplit(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9857 self, buf1, len1, buf2, len2, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9858 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9859 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9860 out = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9861 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9862 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9863 PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9864 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9865 PyMem_Free(buf2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9866 return out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9867 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9868
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9869 static Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9870 anylib_find(int kind, PyObject *str1, void *buf1, Py_ssize_t len1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9871 PyObject *str2, void *buf2, Py_ssize_t len2, Py_ssize_t offset)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9872 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9873 switch(kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9874 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9875 if (PyUnicode_IS_ASCII(str1) && PyUnicode_IS_ASCII(str2))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9876 return asciilib_find(buf1, len1, buf2, len2, offset);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9877 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9878 return ucs1lib_find(buf1, len1, buf2, len2, offset);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9879 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9880 return ucs2lib_find(buf1, len1, buf2, len2, offset);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9881 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9882 return ucs4lib_find(buf1, len1, buf2, len2, offset);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9883 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9884 assert(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9885 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9886 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9887
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9888 static Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9889 anylib_count(int kind, PyObject *sstr, void* sbuf, Py_ssize_t slen,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9890 PyObject *str1, void *buf1, Py_ssize_t len1, Py_ssize_t maxcount)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9891 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9892 switch(kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9893 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9894 if (PyUnicode_IS_ASCII(sstr) && PyUnicode_IS_ASCII(str1))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9895 return asciilib_count(sbuf, slen, buf1, len1, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9896 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9897 return ucs1lib_count(sbuf, slen, buf1, len1, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9898 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9899 return ucs2lib_count(sbuf, slen, buf1, len1, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9900 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9901 return ucs4lib_count(sbuf, slen, buf1, len1, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9902 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9903 assert(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9904 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9905 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9906
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9907 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9908 replace(PyObject *self, PyObject *str1,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9909 PyObject *str2, Py_ssize_t maxcount)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9910 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9911 PyObject *u;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9912 char *sbuf = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9913 char *buf1 = PyUnicode_DATA(str1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9914 char *buf2 = PyUnicode_DATA(str2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9915 int srelease = 0, release1 = 0, release2 = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9916 int skind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9917 int kind1 = PyUnicode_KIND(str1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9918 int kind2 = PyUnicode_KIND(str2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9919 Py_ssize_t slen = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9920 Py_ssize_t len1 = PyUnicode_GET_LENGTH(str1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9921 Py_ssize_t len2 = PyUnicode_GET_LENGTH(str2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9922 int mayshrink;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9923 Py_UCS4 maxchar, maxchar_str2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9924
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9925 if (maxcount < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9926 maxcount = PY_SSIZE_T_MAX;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9927 else if (maxcount == 0 || slen == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9928 goto nothing;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9929
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9930 if (str1 == str2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9931 goto nothing;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9932 if (skind < kind1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9933 /* substring too wide to be present */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9934 goto nothing;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9935
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9936 maxchar = PyUnicode_MAX_CHAR_VALUE(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9937 maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9938 /* Replacing str1 with str2 may cause a maxchar reduction in the
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9939 result string. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9940 mayshrink = (maxchar_str2 < maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9941 maxchar = Py_MAX(maxchar, maxchar_str2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9942
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9943 if (len1 == len2) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9944 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9945 /* same length */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9946 if (len1 == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9947 goto nothing;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9948 if (len1 == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9949 /* replace characters */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9950 Py_UCS4 u1, u2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9951 int rkind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9952 u1 = PyUnicode_READ_CHAR(str1, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9953 if (findchar(sbuf, PyUnicode_KIND(self),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9954 slen, u1, 1) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9955 goto nothing;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9956 u2 = PyUnicode_READ_CHAR(str2, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9957 u = PyUnicode_New(slen, maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9958 if (!u)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9959 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9960 copy_characters(u, 0, self, 0, slen);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9961 rkind = PyUnicode_KIND(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9962 for (i = 0; i < PyUnicode_GET_LENGTH(u); i++)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9963 if (PyUnicode_READ(rkind, PyUnicode_DATA(u), i) == u1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9964 if (--maxcount < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9965 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9966 PyUnicode_WRITE(rkind, PyUnicode_DATA(u), i, u2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9967 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9968 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9969 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9970 int rkind = skind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9971 char *res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9972
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9973 if (kind1 < rkind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9974 /* widen substring */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9975 buf1 = _PyUnicode_AsKind(str1, rkind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9976 if (!buf1) goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9977 release1 = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9978 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9979 i = anylib_find(rkind, self, sbuf, slen, str1, buf1, len1, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9980 if (i < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9981 goto nothing;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9982 if (rkind > kind2) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9983 /* widen replacement */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9984 buf2 = _PyUnicode_AsKind(str2, rkind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9985 if (!buf2) goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9986 release2 = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9987 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9988 else if (rkind < kind2) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9989 /* widen self and buf1 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9990 rkind = kind2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9991 if (release1) PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9992 sbuf = _PyUnicode_AsKind(self, rkind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9993 if (!sbuf) goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9994 srelease = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9995 buf1 = _PyUnicode_AsKind(str1, rkind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9996 if (!buf1) goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9997 release1 = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9998 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
9999 u = PyUnicode_New(slen, maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10000 if (!u)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10001 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10002 assert(PyUnicode_KIND(u) == rkind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10003 res = PyUnicode_DATA(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10004
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10005 memcpy(res, sbuf, rkind * slen);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10006 /* change everything in-place, starting with this one */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10007 memcpy(res + rkind * i,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10008 buf2,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10009 rkind * len2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10010 i += len1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10011
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10012 while ( --maxcount > 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10013 i = anylib_find(rkind, self,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10014 sbuf+rkind*i, slen-i,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10015 str1, buf1, len1, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10016 if (i == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10017 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10018 memcpy(res + rkind * i,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10019 buf2,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10020 rkind * len2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10021 i += len1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10022 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10023 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10024 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10025 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10026 Py_ssize_t n, i, j, ires;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10027 Py_ssize_t product, new_size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10028 int rkind = skind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10029 char *res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10030
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10031 if (kind1 < rkind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10032 /* widen substring */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10033 buf1 = _PyUnicode_AsKind(str1, rkind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10034 if (!buf1) goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10035 release1 = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10036 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10037 n = anylib_count(rkind, self, sbuf, slen, str1, buf1, len1, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10038 if (n == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10039 goto nothing;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10040 if (kind2 < rkind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10041 /* widen replacement */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10042 buf2 = _PyUnicode_AsKind(str2, rkind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10043 if (!buf2) goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10044 release2 = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10045 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10046 else if (kind2 > rkind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10047 /* widen self and buf1 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10048 rkind = kind2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10049 sbuf = _PyUnicode_AsKind(self, rkind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10050 if (!sbuf) goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10051 srelease = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10052 if (release1) PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10053 buf1 = _PyUnicode_AsKind(str1, rkind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10054 if (!buf1) goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10055 release1 = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10056 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10057 /* new_size = PyUnicode_GET_LENGTH(self) + n * (PyUnicode_GET_LENGTH(str2) -
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10058 PyUnicode_GET_LENGTH(str1))); */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10059 product = n * (len2-len1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10060 if ((product / (len2-len1)) != n) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10061 PyErr_SetString(PyExc_OverflowError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10062 "replace string is too long");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10063 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10064 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10065 new_size = slen + product;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10066 if (new_size == 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10067 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10068 u = unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10069 goto done;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10070 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10071 if (new_size < 0 || new_size > (PY_SSIZE_T_MAX >> (rkind-1))) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10072 PyErr_SetString(PyExc_OverflowError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10073 "replace string is too long");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10074 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10075 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10076 u = PyUnicode_New(new_size, maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10077 if (!u)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10078 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10079 assert(PyUnicode_KIND(u) == rkind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10080 res = PyUnicode_DATA(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10081 ires = i = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10082 if (len1 > 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10083 while (n-- > 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10084 /* look for next match */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10085 j = anylib_find(rkind, self,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10086 sbuf + rkind * i, slen-i,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10087 str1, buf1, len1, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10088 if (j == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10089 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10090 else if (j > i) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10091 /* copy unchanged part [i:j] */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10092 memcpy(res + rkind * ires,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10093 sbuf + rkind * i,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10094 rkind * (j-i));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10095 ires += j - i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10096 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10097 /* copy substitution string */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10098 if (len2 > 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10099 memcpy(res + rkind * ires,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10100 buf2,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10101 rkind * len2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10102 ires += len2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10103 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10104 i = j + len1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10105 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10106 if (i < slen)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10107 /* copy tail [i:] */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10108 memcpy(res + rkind * ires,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10109 sbuf + rkind * i,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10110 rkind * (slen-i));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10111 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10112 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10113 /* interleave */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10114 while (n > 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10115 memcpy(res + rkind * ires,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10116 buf2,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10117 rkind * len2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10118 ires += len2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10119 if (--n <= 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10120 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10121 memcpy(res + rkind * ires,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10122 sbuf + rkind * i,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10123 rkind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10124 ires++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10125 i++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10126 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10127 memcpy(res + rkind * ires,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10128 sbuf + rkind * i,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10129 rkind * (slen-i));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10130 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10131 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10132
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10133 if (mayshrink) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10134 unicode_adjust_maxchar(&u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10135 if (u == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10136 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10137 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10138
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10139 done:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10140 if (srelease)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10141 PyMem_FREE(sbuf);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10142 if (release1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10143 PyMem_FREE(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10144 if (release2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10145 PyMem_FREE(buf2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10146 assert(_PyUnicode_CheckConsistency(u, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10147 return u;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10148
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10149 nothing:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10150 /* nothing to replace; return original string (when possible) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10151 if (srelease)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10152 PyMem_FREE(sbuf);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10153 if (release1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10154 PyMem_FREE(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10155 if (release2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10156 PyMem_FREE(buf2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10157 if (PyUnicode_CheckExact(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10158 Py_INCREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10159 return self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10160 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10161 return PyUnicode_Copy(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10162 error:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10163 if (srelease && sbuf)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10164 PyMem_FREE(sbuf);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10165 if (release1 && buf1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10166 PyMem_FREE(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10167 if (release2 && buf2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10168 PyMem_FREE(buf2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10169 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10170 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10171
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10172 /* --- Unicode Object Methods --------------------------------------------- */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10173
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10174 PyDoc_STRVAR(title__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10175 "S.title() -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10176 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10177 Return a titlecased version of S, i.e. words start with title case\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10178 characters, all remaining cased characters have lower case.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10179
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10180 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10181 unicode_title(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10182 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10183 return fixup(self, fixtitle);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10184 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10185
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10186 PyDoc_STRVAR(capitalize__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10187 "S.capitalize() -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10188 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10189 Return a capitalized version of S, i.e. make the first character\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10190 have upper case and the rest lower case.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10191
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10192 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10193 unicode_capitalize(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10194 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10195 return fixup(self, fixcapitalize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10196 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10197
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10198 #if 0
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10199 PyDoc_STRVAR(capwords__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10200 "S.capwords() -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10201 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10202 Apply .capitalize() to all words in S and return the result with\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10203 normalized whitespace (all whitespace strings are replaced by ' ').");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10204
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10205 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10206 unicode_capwords(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10207 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10208 PyObject *list;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10209 PyObject *item;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10210 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10211
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10212 /* Split into words */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10213 list = split(self, NULL, -1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10214 if (!list)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10215 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10216
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10217 /* Capitalize each word */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10218 for (i = 0; i < PyList_GET_SIZE(list); i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10219 item = fixup(PyList_GET_ITEM(list, i),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10220 fixcapitalize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10221 if (item == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10222 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10223 Py_DECREF(PyList_GET_ITEM(list, i));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10224 PyList_SET_ITEM(list, i, item);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10225 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10226
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10227 /* Join the words to form a new string */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10228 item = PyUnicode_Join(NULL, list);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10229
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10230 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10231 Py_DECREF(list);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10232 return item;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10233 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10234 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10235
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10236 /* Argument converter. Coerces to a single unicode character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10237
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10238 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10239 convert_uc(PyObject *obj, void *addr)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10240 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10241 Py_UCS4 *fillcharloc = (Py_UCS4 *)addr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10242 PyObject *uniobj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10243
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10244 uniobj = PyUnicode_FromObject(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10245 if (uniobj == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10246 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10247 "The fill character cannot be converted to Unicode");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10248 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10249 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10250 if (PyUnicode_GET_LENGTH(uniobj) != 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10251 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10252 "The fill character must be exactly one character long");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10253 Py_DECREF(uniobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10254 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10255 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10256 *fillcharloc = PyUnicode_READ_CHAR(uniobj, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10257 Py_DECREF(uniobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10258 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10259 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10260
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10261 PyDoc_STRVAR(center__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10262 "S.center(width[, fillchar]) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10263 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10264 Return S centered in a string of length width. Padding is\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10265 done using the specified fill character (default is a space)");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10266
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10267 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10268 unicode_center(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10269 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10270 Py_ssize_t marg, left;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10271 Py_ssize_t width;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10272 Py_UCS4 fillchar = ' ';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10273
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10274 if (!PyArg_ParseTuple(args, "n|O&:center", &width, convert_uc, &fillchar))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10275 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10276
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10277 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10278 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10279
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10280 if (_PyUnicode_LENGTH(self) >= width && PyUnicode_CheckExact(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10281 Py_INCREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10282 return self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10283 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10284
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10285 marg = width - _PyUnicode_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10286 left = marg / 2 + (marg & width & 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10287
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10288 return pad(self, left, marg - left, fillchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10289 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10290
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10291 /* This function assumes that str1 and str2 are readied by the caller. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10292
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10293 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10294 unicode_compare(PyObject *str1, PyObject *str2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10295 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10296 int kind1, kind2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10297 void *data1, *data2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10298 Py_ssize_t len1, len2, i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10299
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10300 kind1 = PyUnicode_KIND(str1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10301 kind2 = PyUnicode_KIND(str2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10302 data1 = PyUnicode_DATA(str1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10303 data2 = PyUnicode_DATA(str2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10304 len1 = PyUnicode_GET_LENGTH(str1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10305 len2 = PyUnicode_GET_LENGTH(str2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10306
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10307 for (i = 0; i < len1 && i < len2; ++i) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10308 Py_UCS4 c1, c2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10309 c1 = PyUnicode_READ(kind1, data1, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10310 c2 = PyUnicode_READ(kind2, data2, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10311
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10312 if (c1 != c2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10313 return (c1 < c2) ? -1 : 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10314 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10315
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10316 return (len1 < len2) ? -1 : (len1 != len2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10317 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10318
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10319 int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10320 PyUnicode_Compare(PyObject *left, PyObject *right)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10321 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10322 if (PyUnicode_Check(left) && PyUnicode_Check(right)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10323 if (PyUnicode_READY(left) == -1 ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10324 PyUnicode_READY(right) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10325 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10326 return unicode_compare(left, right);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10327 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10328 PyErr_Format(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10329 "Can't compare %.100s and %.100s",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10330 left->ob_type->tp_name,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10331 right->ob_type->tp_name);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10332 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10333 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10334
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10335 int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10336 PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10337 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10338 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10339 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10340 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10341 Py_UCS4 chr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10342
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10343 assert(_PyUnicode_CHECK(uni));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10344 if (PyUnicode_READY(uni) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10345 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10346 kind = PyUnicode_KIND(uni);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10347 data = PyUnicode_DATA(uni);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10348 /* Compare Unicode string and source character set string */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10349 for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10350 if (chr != str[i])
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10351 return (chr < (unsigned char)(str[i])) ? -1 : 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10352 /* This check keeps Python strings that end in '\0' from comparing equal
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10353 to C strings identical up to that point. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10354 if (PyUnicode_GET_LENGTH(uni) != i || chr)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10355 return 1; /* uni is longer */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10356 if (str[i])
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10357 return -1; /* str is longer */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10358 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10359 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10360
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10361
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10362 #define TEST_COND(cond) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10363 ((cond) ? Py_True : Py_False)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10364
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10365 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10366 PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10367 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10368 int result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10369
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10370 if (PyUnicode_Check(left) && PyUnicode_Check(right)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10371 PyObject *v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10372 if (PyUnicode_READY(left) == -1 ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10373 PyUnicode_READY(right) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10374 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10375 if (PyUnicode_GET_LENGTH(left) != PyUnicode_GET_LENGTH(right) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10376 PyUnicode_KIND(left) != PyUnicode_KIND(right)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10377 if (op == Py_EQ) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10378 Py_INCREF(Py_False);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10379 return Py_False;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10380 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10381 if (op == Py_NE) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10382 Py_INCREF(Py_True);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10383 return Py_True;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10384 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10385 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10386 if (left == right)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10387 result = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10388 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10389 result = unicode_compare(left, right);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10390
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10391 /* Convert the return value to a Boolean */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10392 switch (op) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10393 case Py_EQ:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10394 v = TEST_COND(result == 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10395 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10396 case Py_NE:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10397 v = TEST_COND(result != 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10398 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10399 case Py_LE:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10400 v = TEST_COND(result <= 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10401 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10402 case Py_GE:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10403 v = TEST_COND(result >= 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10404 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10405 case Py_LT:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10406 v = TEST_COND(result == -1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10407 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10408 case Py_GT:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10409 v = TEST_COND(result == 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10410 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10411 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10412 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10413 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10414 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10415 Py_INCREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10416 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10417 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10418
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10419 Py_RETURN_NOTIMPLEMENTED;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10420 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10421
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10422 int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10423 PyUnicode_Contains(PyObject *container, PyObject *element)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10424 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10425 PyObject *str, *sub;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10426 int kind1, kind2, kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10427 void *buf1, *buf2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10428 Py_ssize_t len1, len2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10429 int result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10430
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10431 /* Coerce the two arguments */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10432 sub = PyUnicode_FromObject(element);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10433 if (!sub) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10434 PyErr_Format(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10435 "'in <string>' requires string as left operand, not %s",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10436 element->ob_type->tp_name);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10437 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10438 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10439 if (PyUnicode_READY(sub) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10440 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10441
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10442 str = PyUnicode_FromObject(container);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10443 if (!str || PyUnicode_READY(str) == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10444 Py_DECREF(sub);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10445 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10446 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10447
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10448 kind1 = PyUnicode_KIND(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10449 kind2 = PyUnicode_KIND(sub);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10450 kind = kind1 > kind2 ? kind1 : kind2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10451 buf1 = PyUnicode_DATA(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10452 buf2 = PyUnicode_DATA(sub);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10453 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10454 buf1 = _PyUnicode_AsKind(str, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10455 if (!buf1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10456 Py_DECREF(sub);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10457 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10458 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10459 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10460 buf2 = _PyUnicode_AsKind(sub, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10461 if (!buf2) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10462 Py_DECREF(sub);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10463 if (kind1 != kind) PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10464 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10465 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10466 len1 = PyUnicode_GET_LENGTH(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10467 len2 = PyUnicode_GET_LENGTH(sub);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10468
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10469 switch(kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10470 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10471 result = ucs1lib_find(buf1, len1, buf2, len2, 0) != -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10472 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10473 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10474 result = ucs2lib_find(buf1, len1, buf2, len2, 0) != -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10475 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10476 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10477 result = ucs4lib_find(buf1, len1, buf2, len2, 0) != -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10478 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10479 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10480 result = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10481 assert(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10482 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10483
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10484 Py_DECREF(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10485 Py_DECREF(sub);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10486
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10487 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10488 PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10489 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10490 PyMem_Free(buf2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10491
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10492 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10493 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10494
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10495 /* Concat to string or Unicode object giving a new Unicode object. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10496
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10497 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10498 PyUnicode_Concat(PyObject *left, PyObject *right)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10499 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10500 PyObject *u = NULL, *v = NULL, *w;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10501 Py_UCS4 maxchar, maxchar2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10502
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10503 /* Coerce the two arguments */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10504 u = PyUnicode_FromObject(left);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10505 if (u == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10506 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10507 v = PyUnicode_FromObject(right);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10508 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10509 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10510
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10511 /* Shortcuts */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10512 if (v == unicode_empty) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10513 Py_DECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10514 return u;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10515 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10516 if (u == unicode_empty) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10517 Py_DECREF(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10518 return v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10519 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10520
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10521 maxchar = PyUnicode_MAX_CHAR_VALUE(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10522 maxchar2 = PyUnicode_MAX_CHAR_VALUE(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10523 maxchar = Py_MAX(maxchar, maxchar2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10524
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10525 /* Concat the two Unicode strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10526 w = PyUnicode_New(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10527 PyUnicode_GET_LENGTH(u) + PyUnicode_GET_LENGTH(v),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10528 maxchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10529 if (w == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10530 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10531 copy_characters(w, 0, u, 0, PyUnicode_GET_LENGTH(u));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10532 copy_characters(w, PyUnicode_GET_LENGTH(u), v, 0, PyUnicode_GET_LENGTH(v));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10533 Py_DECREF(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10534 Py_DECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10535 assert(_PyUnicode_CheckConsistency(w, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10536 return w;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10537
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10538 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10539 Py_XDECREF(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10540 Py_XDECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10541 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10542 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10543
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10544 static void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10545 unicode_append_inplace(PyObject **p_left, PyObject *right)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10546 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10547 Py_ssize_t left_len, right_len, new_len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10548
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10549 assert(PyUnicode_IS_READY(*p_left));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10550 assert(PyUnicode_IS_READY(right));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10551
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10552 left_len = PyUnicode_GET_LENGTH(*p_left);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10553 right_len = PyUnicode_GET_LENGTH(right);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10554 if (left_len > PY_SSIZE_T_MAX - right_len) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10555 PyErr_SetString(PyExc_OverflowError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10556 "strings are too large to concat");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10557 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10558 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10559 new_len = left_len + right_len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10560
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10561 /* Now we own the last reference to 'left', so we can resize it
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10562 * in-place.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10563 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10564 if (unicode_resize(p_left, new_len) != 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10565 /* XXX if _PyUnicode_Resize() fails, 'left' has been
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10566 * deallocated so it cannot be put back into
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10567 * 'variable'. The MemoryError is raised when there
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10568 * is no value in 'variable', which might (very
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10569 * remotely) be a cause of incompatibilities.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10570 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10571 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10572 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10573 /* copy 'right' into the newly allocated area of 'left' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10574 copy_characters(*p_left, left_len, right, 0, right_len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10575 _PyUnicode_DIRTY(*p_left);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10576 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10577
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10578 error:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10579 Py_DECREF(*p_left);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10580 *p_left = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10581 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10582
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10583 void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10584 PyUnicode_Append(PyObject **p_left, PyObject *right)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10585 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10586 PyObject *left, *res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10587
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10588 if (p_left == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10589 if (!PyErr_Occurred())
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10590 PyErr_BadInternalCall();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10591 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10592 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10593 left = *p_left;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10594 if (right == NULL || !PyUnicode_Check(left)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10595 if (!PyErr_Occurred())
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10596 PyErr_BadInternalCall();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10597 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10598 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10599
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10600 if (PyUnicode_READY(left))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10601 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10602 if (PyUnicode_READY(right))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10603 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10604
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10605 if (PyUnicode_CheckExact(left) && left != unicode_empty
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10606 && PyUnicode_CheckExact(right) && right != unicode_empty
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10607 && unicode_resizable(left)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10608 && (_PyUnicode_KIND(right) <= _PyUnicode_KIND(left)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10609 || _PyUnicode_WSTR(left) != NULL))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10610 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10611 /* Don't resize for ascii += latin1. Convert ascii to latin1 requires
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10612 to change the structure size, but characters are stored just after
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10613 the structure, and so it requires to move all characters which is
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10614 not so different than duplicating the string. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10615 if (!(PyUnicode_IS_ASCII(left) && !PyUnicode_IS_ASCII(right)))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10616 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10617 unicode_append_inplace(p_left, right);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10618 assert(p_left == NULL || _PyUnicode_CheckConsistency(*p_left, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10619 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10620 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10621 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10622
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10623 res = PyUnicode_Concat(left, right);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10624 if (res == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10625 goto error;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10626 Py_DECREF(left);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10627 *p_left = res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10628 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10629
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10630 error:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10631 Py_DECREF(*p_left);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10632 *p_left = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10633 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10634
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10635 void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10636 PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10637 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10638 PyUnicode_Append(pleft, right);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10639 Py_XDECREF(right);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10640 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10641
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10642 PyDoc_STRVAR(count__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10643 "S.count(sub[, start[, end]]) -> int\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10644 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10645 Return the number of non-overlapping occurrences of substring sub in\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10646 string S[start:end]. Optional arguments start and end are\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10647 interpreted as in slice notation.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10648
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10649 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10650 unicode_count(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10651 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10652 PyObject *substring;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10653 Py_ssize_t start = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10654 Py_ssize_t end = PY_SSIZE_T_MAX;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10655 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10656 int kind1, kind2, kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10657 void *buf1, *buf2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10658 Py_ssize_t len1, len2, iresult;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10659
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10660 if (!stringlib_parse_args_finds_unicode("count", args, &substring,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10661 &start, &end))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10662 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10663
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10664 kind1 = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10665 kind2 = PyUnicode_KIND(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10666 kind = kind1 > kind2 ? kind1 : kind2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10667 buf1 = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10668 buf2 = PyUnicode_DATA(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10669 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10670 buf1 = _PyUnicode_AsKind(self, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10671 if (!buf1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10672 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10673 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10674 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10675 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10676 buf2 = _PyUnicode_AsKind(substring, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10677 if (!buf2) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10678 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10679 if (kind1 != kind) PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10680 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10681 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10682 len1 = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10683 len2 = PyUnicode_GET_LENGTH(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10684
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10685 ADJUST_INDICES(start, end, len1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10686 switch(kind) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10687 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10688 iresult = ucs1lib_count(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10689 ((Py_UCS1*)buf1) + start, end - start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10690 buf2, len2, PY_SSIZE_T_MAX
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10691 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10692 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10693 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10694 iresult = ucs2lib_count(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10695 ((Py_UCS2*)buf1) + start, end - start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10696 buf2, len2, PY_SSIZE_T_MAX
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10697 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10698 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10699 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10700 iresult = ucs4lib_count(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10701 ((Py_UCS4*)buf1) + start, end - start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10702 buf2, len2, PY_SSIZE_T_MAX
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10703 );
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10704 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10705 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10706 assert(0); iresult = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10707 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10708
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10709 result = PyLong_FromSsize_t(iresult);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10710
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10711 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10712 PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10713 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10714 PyMem_Free(buf2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10715
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10716 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10717
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10718 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10719 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10720
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10721 PyDoc_STRVAR(encode__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10722 "S.encode(encoding='utf-8', errors='strict') -> bytes\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10723 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10724 Encode S using the codec registered for encoding. Default encoding\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10725 is 'utf-8'. errors may be given to set a different error\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10726 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10727 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10728 'xmlcharrefreplace' as well as any other name registered with\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10729 codecs.register_error that can handle UnicodeEncodeErrors.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10730
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10731 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10732 unicode_encode(PyObject *self, PyObject *args, PyObject *kwargs)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10733 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10734 static char *kwlist[] = {"encoding", "errors", 0};
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10735 char *encoding = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10736 char *errors = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10737
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10738 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10739 kwlist, &encoding, &errors))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10740 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10741 return PyUnicode_AsEncodedString(self, encoding, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10742 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10743
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10744 PyDoc_STRVAR(expandtabs__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10745 "S.expandtabs([tabsize]) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10746 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10747 Return a copy of S where all tab characters are expanded using spaces.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10748 If tabsize is not given, a tab size of 8 characters is assumed.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10749
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10750 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10751 unicode_expandtabs(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10752 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10753 Py_ssize_t i, j, line_pos, src_len, incr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10754 Py_UCS4 ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10755 PyObject *u;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10756 void *src_data, *dest_data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10757 int tabsize = 8;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10758 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10759 int found;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10760
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10761 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10762 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10763
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10764 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10765 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10766
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10767 /* First pass: determine size of output string */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10768 src_len = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10769 i = j = line_pos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10770 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10771 src_data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10772 found = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10773 for (; i < src_len; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10774 ch = PyUnicode_READ(kind, src_data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10775 if (ch == '\t') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10776 found = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10777 if (tabsize > 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10778 incr = tabsize - (line_pos % tabsize); /* cannot overflow */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10779 if (j > PY_SSIZE_T_MAX - incr)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10780 goto overflow;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10781 line_pos += incr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10782 j += incr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10783 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10784 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10785 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10786 if (j > PY_SSIZE_T_MAX - 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10787 goto overflow;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10788 line_pos++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10789 j++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10790 if (ch == '\n' || ch == '\r')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10791 line_pos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10792 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10793 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10794 if (!found && PyUnicode_CheckExact(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10795 Py_INCREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10796 return self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10797 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10798
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10799 /* Second pass: create output string and fill it */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10800 u = PyUnicode_New(j, PyUnicode_MAX_CHAR_VALUE(self));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10801 if (!u)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10802 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10803 dest_data = PyUnicode_DATA(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10804
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10805 i = j = line_pos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10806
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10807 for (; i < src_len; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10808 ch = PyUnicode_READ(kind, src_data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10809 if (ch == '\t') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10810 if (tabsize > 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10811 incr = tabsize - (line_pos % tabsize);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10812 line_pos += incr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10813 while (incr--) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10814 PyUnicode_WRITE(kind, dest_data, j, ' ');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10815 j++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10816 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10817 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10818 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10819 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10820 line_pos++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10821 PyUnicode_WRITE(kind, dest_data, j, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10822 j++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10823 if (ch == '\n' || ch == '\r')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10824 line_pos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10825 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10826 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10827 assert (j == PyUnicode_GET_LENGTH(u));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10828 return unicode_result(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10829
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10830 overflow:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10831 PyErr_SetString(PyExc_OverflowError, "new string is too long");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10832 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10833 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10834
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10835 PyDoc_STRVAR(find__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10836 "S.find(sub[, start[, end]]) -> int\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10837 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10838 Return the lowest index in S where substring sub is found,\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10839 such that sub is contained within S[start:end]. Optional\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10840 arguments start and end are interpreted as in slice notation.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10841 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10842 Return -1 on failure.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10843
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10844 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10845 unicode_find(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10846 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10847 PyObject *substring;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10848 Py_ssize_t start;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10849 Py_ssize_t end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10850 Py_ssize_t result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10851
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10852 if (!stringlib_parse_args_finds_unicode("find", args, &substring,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10853 &start, &end))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10854 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10855
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10856 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10857 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10858 if (PyUnicode_READY(substring) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10859 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10860
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10861 result = any_find_slice(1, self, substring, start, end);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10862
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10863 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10864
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10865 if (result == -2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10866 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10867
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10868 return PyLong_FromSsize_t(result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10869 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10870
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10871 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10872 unicode_getitem(PyObject *self, Py_ssize_t index)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10873 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10874 Py_UCS4 ch = PyUnicode_ReadChar(self, index);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10875 if (ch == (Py_UCS4)-1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10876 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10877 return PyUnicode_FromOrdinal(ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10878 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10879
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10880 /* Believe it or not, this produces the same value for ASCII strings
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10881 as bytes_hash(). */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10882 static Py_hash_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10883 unicode_hash(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10884 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10885 Py_ssize_t len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10886 Py_uhash_t x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10887
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10888 if (_PyUnicode_HASH(self) != -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10889 return _PyUnicode_HASH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10890 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10891 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10892 len = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10893
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10894 /* The hash function as a macro, gets expanded three times below. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10895 #define HASH(P) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10896 x = (Py_uhash_t)*P << 7; \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10897 while (--len >= 0) \
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10898 x = (1000003*x) ^ (Py_uhash_t)*P++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10899
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10900 switch (PyUnicode_KIND(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10901 case PyUnicode_1BYTE_KIND: {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10902 const unsigned char *c = PyUnicode_1BYTE_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10903 HASH(c);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10904 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10905 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10906 case PyUnicode_2BYTE_KIND: {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10907 const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10908 HASH(s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10909 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10910 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10911 default: {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10912 Py_UCS4 *l;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10913 assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10914 "Impossible switch case in unicode_hash");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10915 l = PyUnicode_4BYTE_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10916 HASH(l);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10917 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10918 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10919 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10920 x ^= (Py_uhash_t)PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10921
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10922 if (x == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10923 x = -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10924 _PyUnicode_HASH(self) = x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10925 return x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10926 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10927 #undef HASH
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10928
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10929 PyDoc_STRVAR(index__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10930 "S.index(sub[, start[, end]]) -> int\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10931 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10932 Like S.find() but raise ValueError when the substring is not found.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10933
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10934 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10935 unicode_index(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10936 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10937 Py_ssize_t result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10938 PyObject *substring;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10939 Py_ssize_t start;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10940 Py_ssize_t end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10941
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10942 if (!stringlib_parse_args_finds_unicode("index", args, &substring,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10943 &start, &end))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10944 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10945
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10946 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10947 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10948 if (PyUnicode_READY(substring) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10949 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10950
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10951 result = any_find_slice(1, self, substring, start, end);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10952
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10953 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10954
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10955 if (result == -2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10956 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10957
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10958 if (result < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10959 PyErr_SetString(PyExc_ValueError, "substring not found");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10960 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10961 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10962
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10963 return PyLong_FromSsize_t(result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10964 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10965
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10966 PyDoc_STRVAR(islower__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10967 "S.islower() -> bool\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10968 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10969 Return True if all cased characters in S are lowercase and there is\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10970 at least one cased character in S, False otherwise.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10971
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10972 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10973 unicode_islower(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10974 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10975 Py_ssize_t i, length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10976 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10977 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10978 int cased;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10979
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10980 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10981 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10982 length = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10983 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10984 data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10985
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10986 /* Shortcut for single character strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10987 if (length == 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10988 return PyBool_FromLong(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10989 Py_UNICODE_ISLOWER(PyUnicode_READ(kind, data, 0)));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10990
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10991 /* Special case for empty strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10992 if (length == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10993 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10994
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10995 cased = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10996 for (i = 0; i < length; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10997 const Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10998
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
10999 if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11000 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11001 else if (!cased && Py_UNICODE_ISLOWER(ch))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11002 cased = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11003 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11004 return PyBool_FromLong(cased);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11005 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11006
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11007 PyDoc_STRVAR(isupper__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11008 "S.isupper() -> bool\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11009 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11010 Return True if all cased characters in S are uppercase and there is\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11011 at least one cased character in S, False otherwise.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11012
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11013 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11014 unicode_isupper(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11015 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11016 Py_ssize_t i, length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11017 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11018 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11019 int cased;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11020
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11021 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11022 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11023 length = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11024 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11025 data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11026
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11027 /* Shortcut for single character strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11028 if (length == 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11029 return PyBool_FromLong(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11030 Py_UNICODE_ISUPPER(PyUnicode_READ(kind, data, 0)) != 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11031
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11032 /* Special case for empty strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11033 if (length == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11034 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11035
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11036 cased = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11037 for (i = 0; i < length; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11038 const Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11039
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11040 if (Py_UNICODE_ISLOWER(ch) || Py_UNICODE_ISTITLE(ch))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11041 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11042 else if (!cased && Py_UNICODE_ISUPPER(ch))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11043 cased = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11044 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11045 return PyBool_FromLong(cased);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11046 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11047
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11048 PyDoc_STRVAR(istitle__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11049 "S.istitle() -> bool\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11050 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11051 Return True if S is a titlecased string and there is at least one\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11052 character in S, i.e. upper- and titlecase characters may only\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11053 follow uncased characters and lowercase characters only cased ones.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11054 Return False otherwise.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11055
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11056 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11057 unicode_istitle(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11058 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11059 Py_ssize_t i, length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11060 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11061 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11062 int cased, previous_is_cased;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11063
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11064 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11065 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11066 length = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11067 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11068 data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11069
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11070 /* Shortcut for single character strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11071 if (length == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11072 Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11073 return PyBool_FromLong((Py_UNICODE_ISTITLE(ch) != 0) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11074 (Py_UNICODE_ISUPPER(ch) != 0));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11075 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11076
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11077 /* Special case for empty strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11078 if (length == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11079 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11080
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11081 cased = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11082 previous_is_cased = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11083 for (i = 0; i < length; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11084 const Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11085
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11086 if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11087 if (previous_is_cased)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11088 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11089 previous_is_cased = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11090 cased = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11091 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11092 else if (Py_UNICODE_ISLOWER(ch)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11093 if (!previous_is_cased)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11094 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11095 previous_is_cased = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11096 cased = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11097 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11098 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11099 previous_is_cased = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11100 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11101 return PyBool_FromLong(cased);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11102 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11103
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11104 PyDoc_STRVAR(isspace__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11105 "S.isspace() -> bool\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11106 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11107 Return True if all characters in S are whitespace\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11108 and there is at least one character in S, False otherwise.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11109
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11110 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11111 unicode_isspace(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11112 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11113 Py_ssize_t i, length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11114 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11115 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11116
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11117 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11118 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11119 length = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11120 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11121 data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11122
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11123 /* Shortcut for single character strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11124 if (length == 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11125 return PyBool_FromLong(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11126 Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, 0)));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11127
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11128 /* Special case for empty strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11129 if (length == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11130 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11131
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11132 for (i = 0; i < length; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11133 const Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11134 if (!Py_UNICODE_ISSPACE(ch))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11135 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11136 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11137 return PyBool_FromLong(1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11138 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11139
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11140 PyDoc_STRVAR(isalpha__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11141 "S.isalpha() -> bool\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11142 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11143 Return True if all characters in S are alphabetic\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11144 and there is at least one character in S, False otherwise.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11145
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11146 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11147 unicode_isalpha(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11148 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11149 Py_ssize_t i, length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11150 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11151 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11152
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11153 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11154 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11155 length = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11156 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11157 data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11158
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11159 /* Shortcut for single character strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11160 if (length == 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11161 return PyBool_FromLong(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11162 Py_UNICODE_ISALPHA(PyUnicode_READ(kind, data, 0)));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11163
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11164 /* Special case for empty strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11165 if (length == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11166 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11167
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11168 for (i = 0; i < length; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11169 if (!Py_UNICODE_ISALPHA(PyUnicode_READ(kind, data, i)))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11170 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11171 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11172 return PyBool_FromLong(1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11173 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11174
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11175 PyDoc_STRVAR(isalnum__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11176 "S.isalnum() -> bool\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11177 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11178 Return True if all characters in S are alphanumeric\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11179 and there is at least one character in S, False otherwise.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11180
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11181 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11182 unicode_isalnum(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11183 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11184 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11185 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11186 Py_ssize_t len, i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11187
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11188 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11189 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11190
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11191 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11192 data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11193 len = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11194
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11195 /* Shortcut for single character strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11196 if (len == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11197 const Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11198 return PyBool_FromLong(Py_UNICODE_ISALNUM(ch));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11199 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11200
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11201 /* Special case for empty strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11202 if (len == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11203 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11204
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11205 for (i = 0; i < len; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11206 const Py_UCS4 ch = PyUnicode_READ(kind, data, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11207 if (!Py_UNICODE_ISALNUM(ch))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11208 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11209 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11210 return PyBool_FromLong(1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11211 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11212
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11213 PyDoc_STRVAR(isdecimal__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11214 "S.isdecimal() -> bool\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11215 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11216 Return True if there are only decimal characters in S,\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11217 False otherwise.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11218
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11219 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11220 unicode_isdecimal(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11221 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11222 Py_ssize_t i, length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11223 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11224 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11225
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11226 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11227 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11228 length = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11229 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11230 data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11231
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11232 /* Shortcut for single character strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11233 if (length == 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11234 return PyBool_FromLong(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11235 Py_UNICODE_ISDECIMAL(PyUnicode_READ(kind, data, 0)));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11236
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11237 /* Special case for empty strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11238 if (length == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11239 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11240
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11241 for (i = 0; i < length; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11242 if (!Py_UNICODE_ISDECIMAL(PyUnicode_READ(kind, data, i)))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11243 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11244 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11245 return PyBool_FromLong(1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11246 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11247
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11248 PyDoc_STRVAR(isdigit__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11249 "S.isdigit() -> bool\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11250 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11251 Return True if all characters in S are digits\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11252 and there is at least one character in S, False otherwise.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11253
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11254 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11255 unicode_isdigit(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11256 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11257 Py_ssize_t i, length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11258 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11259 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11260
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11261 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11262 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11263 length = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11264 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11265 data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11266
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11267 /* Shortcut for single character strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11268 if (length == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11269 const Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11270 return PyBool_FromLong(Py_UNICODE_ISDIGIT(ch));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11271 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11272
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11273 /* Special case for empty strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11274 if (length == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11275 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11276
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11277 for (i = 0; i < length; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11278 if (!Py_UNICODE_ISDIGIT(PyUnicode_READ(kind, data, i)))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11279 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11280 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11281 return PyBool_FromLong(1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11282 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11283
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11284 PyDoc_STRVAR(isnumeric__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11285 "S.isnumeric() -> bool\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11286 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11287 Return True if there are only numeric characters in S,\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11288 False otherwise.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11289
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11290 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11291 unicode_isnumeric(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11292 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11293 Py_ssize_t i, length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11294 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11295 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11296
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11297 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11298 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11299 length = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11300 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11301 data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11302
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11303 /* Shortcut for single character strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11304 if (length == 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11305 return PyBool_FromLong(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11306 Py_UNICODE_ISNUMERIC(PyUnicode_READ(kind, data, 0)));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11307
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11308 /* Special case for empty strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11309 if (length == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11310 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11311
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11312 for (i = 0; i < length; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11313 if (!Py_UNICODE_ISNUMERIC(PyUnicode_READ(kind, data, i)))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11314 return PyBool_FromLong(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11315 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11316 return PyBool_FromLong(1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11317 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11318
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11319 int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11320 PyUnicode_IsIdentifier(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11321 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11322 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11323 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11324 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11325 Py_UCS4 first;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11326
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11327 if (PyUnicode_READY(self) == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11328 Py_FatalError("identifier not ready");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11329 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11330 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11331
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11332 /* Special case for empty strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11333 if (PyUnicode_GET_LENGTH(self) == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11334 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11335 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11336 data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11337
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11338 /* PEP 3131 says that the first character must be in
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11339 XID_Start and subsequent characters in XID_Continue,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11340 and for the ASCII range, the 2.x rules apply (i.e
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11341 start with letters and underscore, continue with
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11342 letters, digits, underscore). However, given the current
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11343 definition of XID_Start and XID_Continue, it is sufficient
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11344 to check just for these, except that _ must be allowed
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11345 as starting an identifier. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11346 first = PyUnicode_READ(kind, data, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11347 if (!_PyUnicode_IsXidStart(first) && first != 0x5F /* LOW LINE */)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11348 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11349
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11350 for (i = 1; i < PyUnicode_GET_LENGTH(self); i++)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11351 if (!_PyUnicode_IsXidContinue(PyUnicode_READ(kind, data, i)))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11352 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11353 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11354 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11355
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11356 PyDoc_STRVAR(isidentifier__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11357 "S.isidentifier() -> bool\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11358 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11359 Return True if S is a valid identifier according\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11360 to the language definition.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11361
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11362 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11363 unicode_isidentifier(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11364 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11365 return PyBool_FromLong(PyUnicode_IsIdentifier(self));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11366 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11367
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11368 PyDoc_STRVAR(isprintable__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11369 "S.isprintable() -> bool\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11370 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11371 Return True if all characters in S are considered\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11372 printable in repr() or S is empty, False otherwise.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11373
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11374 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11375 unicode_isprintable(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11376 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11377 Py_ssize_t i, length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11378 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11379 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11380
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11381 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11382 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11383 length = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11384 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11385 data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11386
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11387 /* Shortcut for single character strings */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11388 if (length == 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11389 return PyBool_FromLong(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11390 Py_UNICODE_ISPRINTABLE(PyUnicode_READ(kind, data, 0)));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11391
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11392 for (i = 0; i < length; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11393 if (!Py_UNICODE_ISPRINTABLE(PyUnicode_READ(kind, data, i))) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11394 Py_RETURN_FALSE;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11395 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11396 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11397 Py_RETURN_TRUE;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11398 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11399
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11400 PyDoc_STRVAR(join__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11401 "S.join(iterable) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11402 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11403 Return a string which is the concatenation of the strings in the\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11404 iterable. The separator between elements is S.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11405
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11406 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11407 unicode_join(PyObject *self, PyObject *data)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11408 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11409 return PyUnicode_Join(self, data);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11410 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11411
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11412 static Py_ssize_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11413 unicode_length(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11414 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11415 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11416 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11417 return PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11418 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11419
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11420 PyDoc_STRVAR(ljust__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11421 "S.ljust(width[, fillchar]) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11422 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11423 Return S left-justified in a Unicode string of length width. Padding is\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11424 done using the specified fill character (default is a space).");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11425
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11426 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11427 unicode_ljust(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11428 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11429 Py_ssize_t width;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11430 Py_UCS4 fillchar = ' ';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11431
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11432 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11433 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11434
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11435 if (!PyArg_ParseTuple(args, "n|O&:ljust", &width, convert_uc, &fillchar))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11436 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11437
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11438 if (_PyUnicode_LENGTH(self) >= width && PyUnicode_CheckExact(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11439 Py_INCREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11440 return self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11441 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11442
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11443 return pad(self, 0, width - _PyUnicode_LENGTH(self), fillchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11444 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11445
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11446 PyDoc_STRVAR(lower__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11447 "S.lower() -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11448 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11449 Return a copy of the string S converted to lowercase.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11450
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11451 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11452 unicode_lower(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11453 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11454 return fixup(self, fixlower);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11455 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11456
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11457 #define LEFTSTRIP 0
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11458 #define RIGHTSTRIP 1
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11459 #define BOTHSTRIP 2
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11460
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11461 /* Arrays indexed by above */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11462 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11463
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11464 #define STRIPNAME(i) (stripformat[i]+3)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11465
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11466 /* externally visible for str.strip(unicode) */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11467 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11468 _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11469 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11470 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11471 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11472 Py_ssize_t i, j, len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11473 BLOOM_MASK sepmask;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11474
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11475 if (PyUnicode_READY(self) == -1 || PyUnicode_READY(sepobj) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11476 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11477
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11478 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11479 data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11480 len = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11481 sepmask = make_bloom_mask(PyUnicode_KIND(sepobj),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11482 PyUnicode_DATA(sepobj),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11483 PyUnicode_GET_LENGTH(sepobj));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11484
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11485 i = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11486 if (striptype != RIGHTSTRIP) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11487 while (i < len &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11488 BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, i), sepobj)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11489 i++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11490 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11491 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11492
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11493 j = len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11494 if (striptype != LEFTSTRIP) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11495 do {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11496 j--;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11497 } while (j >= i &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11498 BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, j), sepobj));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11499 j++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11500 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11501
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11502 return PyUnicode_Substring(self, i, j);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11503 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11504
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11505 PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11506 PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11507 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11508 unsigned char *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11509 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11510 Py_ssize_t length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11511
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11512 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11513 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11514
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11515 end = Py_MIN(end, PyUnicode_GET_LENGTH(self));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11516
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11517 if (start == 0 && end == PyUnicode_GET_LENGTH(self))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11518 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11519 if (PyUnicode_CheckExact(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11520 Py_INCREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11521 return self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11522 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11523 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11524 return PyUnicode_Copy(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11525 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11526
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11527 length = end - start;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11528 if (length == 1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11529 return unicode_getitem(self, start);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11530
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11531 if (start < 0 || end < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11532 PyErr_SetString(PyExc_IndexError, "string index out of range");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11533 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11534 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11535
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11536 if (PyUnicode_IS_ASCII(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11537 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11538 data = PyUnicode_1BYTE_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11539 return unicode_fromascii(data + start, length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11540 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11541 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11542 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11543 data = PyUnicode_1BYTE_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11544 return PyUnicode_FromKindAndData(kind,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11545 data + kind * start,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11546 length);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11547 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11548 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11549
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11550 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11551 do_strip(PyObject *self, int striptype)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11552 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11553 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11554 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11555 Py_ssize_t len, i, j;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11556
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11557 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11558 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11559
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11560 kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11561 data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11562 len = PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11563
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11564 i = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11565 if (striptype != RIGHTSTRIP) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11566 while (i < len && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11567 i++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11568 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11569 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11570
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11571 j = len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11572 if (striptype != LEFTSTRIP) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11573 do {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11574 j--;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11575 } while (j >= i && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j)));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11576 j++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11577 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11578
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11579 return PyUnicode_Substring(self, i, j);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11580 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11581
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11582
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11583 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11584 do_argstrip(PyObject *self, int striptype, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11585 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11586 PyObject *sep = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11587
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11588 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11589 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11590
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11591 if (sep != NULL && sep != Py_None) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11592 if (PyUnicode_Check(sep))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11593 return _PyUnicode_XStrip(self, striptype, sep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11594 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11595 PyErr_Format(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11596 "%s arg must be None or str",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11597 STRIPNAME(striptype));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11598 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11599 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11600 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11601
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11602 return do_strip(self, striptype);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11603 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11604
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11605
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11606 PyDoc_STRVAR(strip__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11607 "S.strip([chars]) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11608 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11609 Return a copy of the string S with leading and trailing\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11610 whitespace removed.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11611 If chars is given and not None, remove characters in chars instead.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11612
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11613 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11614 unicode_strip(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11615 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11616 if (PyTuple_GET_SIZE(args) == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11617 return do_strip(self, BOTHSTRIP); /* Common case */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11618 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11619 return do_argstrip(self, BOTHSTRIP, args);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11620 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11621
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11622
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11623 PyDoc_STRVAR(lstrip__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11624 "S.lstrip([chars]) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11625 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11626 Return a copy of the string S with leading whitespace removed.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11627 If chars is given and not None, remove characters in chars instead.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11628
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11629 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11630 unicode_lstrip(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11631 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11632 if (PyTuple_GET_SIZE(args) == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11633 return do_strip(self, LEFTSTRIP); /* Common case */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11634 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11635 return do_argstrip(self, LEFTSTRIP, args);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11636 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11637
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11638
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11639 PyDoc_STRVAR(rstrip__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11640 "S.rstrip([chars]) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11641 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11642 Return a copy of the string S with trailing whitespace removed.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11643 If chars is given and not None, remove characters in chars instead.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11644
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11645 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11646 unicode_rstrip(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11647 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11648 if (PyTuple_GET_SIZE(args) == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11649 return do_strip(self, RIGHTSTRIP); /* Common case */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11650 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11651 return do_argstrip(self, RIGHTSTRIP, args);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11652 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11653
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11654
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11655 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11656 unicode_repeat(PyObject *str, Py_ssize_t len)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11657 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11658 PyObject *u;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11659 Py_ssize_t nchars, n;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11660
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11661 if (len < 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11662 Py_INCREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11663 return unicode_empty;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11664 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11665
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11666 if (len == 1 && PyUnicode_CheckExact(str)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11667 /* no repeat, return original string */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11668 Py_INCREF(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11669 return str;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11670 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11671
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11672 if (PyUnicode_READY(str) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11673 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11674
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11675 if (PyUnicode_GET_LENGTH(str) > PY_SSIZE_T_MAX / len) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11676 PyErr_SetString(PyExc_OverflowError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11677 "repeated string is too long");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11678 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11679 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11680 nchars = len * PyUnicode_GET_LENGTH(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11681
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11682 u = PyUnicode_New(nchars, PyUnicode_MAX_CHAR_VALUE(str));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11683 if (!u)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11684 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11685 assert(PyUnicode_KIND(u) == PyUnicode_KIND(str));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11686
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11687 if (PyUnicode_GET_LENGTH(str) == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11688 const int kind = PyUnicode_KIND(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11689 const Py_UCS4 fill_char = PyUnicode_READ(kind, PyUnicode_DATA(str), 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11690 void *to = PyUnicode_DATA(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11691 if (kind == PyUnicode_1BYTE_KIND)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11692 memset(to, (unsigned char)fill_char, len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11693 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11694 for (n = 0; n < len; ++n)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11695 PyUnicode_WRITE(kind, to, n, fill_char);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11696 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11697 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11698 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11699 /* number of characters copied this far */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11700 Py_ssize_t done = PyUnicode_GET_LENGTH(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11701 const Py_ssize_t char_size = PyUnicode_KIND(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11702 char *to = (char *) PyUnicode_DATA(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11703 Py_MEMCPY(to, PyUnicode_DATA(str),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11704 PyUnicode_GET_LENGTH(str) * char_size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11705 while (done < nchars) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11706 n = (done <= nchars-done) ? done : nchars-done;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11707 Py_MEMCPY(to + (done * char_size), to, n * char_size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11708 done += n;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11709 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11710 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11711
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11712 assert(_PyUnicode_CheckConsistency(u, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11713 return u;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11714 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11715
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11716 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11717 PyUnicode_Replace(PyObject *obj,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11718 PyObject *subobj,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11719 PyObject *replobj,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11720 Py_ssize_t maxcount)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11721 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11722 PyObject *self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11723 PyObject *str1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11724 PyObject *str2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11725 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11726
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11727 self = PyUnicode_FromObject(obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11728 if (self == NULL || PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11729 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11730 str1 = PyUnicode_FromObject(subobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11731 if (str1 == NULL || PyUnicode_READY(str1) == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11732 Py_DECREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11733 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11734 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11735 str2 = PyUnicode_FromObject(replobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11736 if (str2 == NULL || PyUnicode_READY(str2)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11737 Py_DECREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11738 Py_DECREF(str1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11739 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11740 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11741 result = replace(self, str1, str2, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11742 Py_DECREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11743 Py_DECREF(str1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11744 Py_DECREF(str2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11745 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11746 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11747
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11748 PyDoc_STRVAR(replace__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11749 "S.replace(old, new[, count]) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11750 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11751 Return a copy of S with all occurrences of substring\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11752 old replaced by new. If the optional argument count is\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11753 given, only the first count occurrences are replaced.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11754
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11755 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11756 unicode_replace(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11757 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11758 PyObject *str1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11759 PyObject *str2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11760 Py_ssize_t maxcount = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11761 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11762
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11763 if (!PyArg_ParseTuple(args, "OO|n:replace", &str1, &str2, &maxcount))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11764 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11765 if (!PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11766 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11767 str1 = PyUnicode_FromObject(str1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11768 if (str1 == NULL || PyUnicode_READY(str1) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11769 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11770 str2 = PyUnicode_FromObject(str2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11771 if (str2 == NULL || PyUnicode_READY(str2) == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11772 Py_DECREF(str1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11773 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11774 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11775
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11776 result = replace(self, str1, str2, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11777
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11778 Py_DECREF(str1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11779 Py_DECREF(str2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11780 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11781 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11782
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11783 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11784 unicode_repr(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11785 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11786 PyObject *repr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11787 Py_ssize_t isize;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11788 Py_ssize_t osize, squote, dquote, i, o;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11789 Py_UCS4 max, quote;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11790 int ikind, okind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11791 void *idata, *odata;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11792
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11793 if (PyUnicode_READY(unicode) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11794 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11795
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11796 isize = PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11797 idata = PyUnicode_DATA(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11798
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11799 /* Compute length of output, quote characters, and
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11800 maximum character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11801 osize = 2; /* quotes */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11802 max = 127;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11803 squote = dquote = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11804 ikind = PyUnicode_KIND(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11805 for (i = 0; i < isize; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11806 Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11807 switch (ch) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11808 case '\'': squote++; osize++; break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11809 case '"': dquote++; osize++; break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11810 case '\\': case '\t': case '\r': case '\n':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11811 osize += 2; break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11812 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11813 /* Fast-path ASCII */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11814 if (ch < ' ' || ch == 0x7f)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11815 osize += 4; /* \xHH */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11816 else if (ch < 0x7f)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11817 osize++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11818 else if (Py_UNICODE_ISPRINTABLE(ch)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11819 osize++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11820 max = ch > max ? ch : max;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11821 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11822 else if (ch < 0x100)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11823 osize += 4; /* \xHH */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11824 else if (ch < 0x10000)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11825 osize += 6; /* \uHHHH */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11826 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11827 osize += 10; /* \uHHHHHHHH */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11828 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11829 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11830
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11831 quote = '\'';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11832 if (squote) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11833 if (dquote)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11834 /* Both squote and dquote present. Use squote,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11835 and escape them */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11836 osize += squote;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11837 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11838 quote = '"';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11839 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11840
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11841 repr = PyUnicode_New(osize, max);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11842 if (repr == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11843 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11844 okind = PyUnicode_KIND(repr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11845 odata = PyUnicode_DATA(repr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11846
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11847 PyUnicode_WRITE(okind, odata, 0, quote);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11848 PyUnicode_WRITE(okind, odata, osize-1, quote);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11849
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11850 for (i = 0, o = 1; i < isize; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11851 Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11852
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11853 /* Escape quotes and backslashes */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11854 if ((ch == quote) || (ch == '\\')) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11855 PyUnicode_WRITE(okind, odata, o++, '\\');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11856 PyUnicode_WRITE(okind, odata, o++, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11857 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11858 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11859
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11860 /* Map special whitespace to '\t', \n', '\r' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11861 if (ch == '\t') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11862 PyUnicode_WRITE(okind, odata, o++, '\\');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11863 PyUnicode_WRITE(okind, odata, o++, 't');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11864 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11865 else if (ch == '\n') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11866 PyUnicode_WRITE(okind, odata, o++, '\\');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11867 PyUnicode_WRITE(okind, odata, o++, 'n');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11868 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11869 else if (ch == '\r') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11870 PyUnicode_WRITE(okind, odata, o++, '\\');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11871 PyUnicode_WRITE(okind, odata, o++, 'r');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11872 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11873
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11874 /* Map non-printable US ASCII to '\xhh' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11875 else if (ch < ' ' || ch == 0x7F) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11876 PyUnicode_WRITE(okind, odata, o++, '\\');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11877 PyUnicode_WRITE(okind, odata, o++, 'x');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11878 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11879 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11880 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11881
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11882 /* Copy ASCII characters as-is */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11883 else if (ch < 0x7F) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11884 PyUnicode_WRITE(okind, odata, o++, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11885 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11886
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11887 /* Non-ASCII characters */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11888 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11889 /* Map Unicode whitespace and control characters
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11890 (categories Z* and C* except ASCII space)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11891 */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11892 if (!Py_UNICODE_ISPRINTABLE(ch)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11893 /* Map 8-bit characters to '\xhh' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11894 if (ch <= 0xff) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11895 PyUnicode_WRITE(okind, odata, o++, '\\');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11896 PyUnicode_WRITE(okind, odata, o++, 'x');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11897 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11898 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11899 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11900 /* Map 21-bit characters to '\U00xxxxxx' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11901 else if (ch >= 0x10000) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11902 PyUnicode_WRITE(okind, odata, o++, '\\');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11903 PyUnicode_WRITE(okind, odata, o++, 'U');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11904 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11905 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11906 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11907 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11908 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11909 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11910 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11911 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11912 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11913 /* Map 16-bit characters to '\uxxxx' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11914 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11915 PyUnicode_WRITE(okind, odata, o++, '\\');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11916 PyUnicode_WRITE(okind, odata, o++, 'u');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11917 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11918 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11919 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11920 PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11921 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11922 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11923 /* Copy characters as-is */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11924 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11925 PyUnicode_WRITE(okind, odata, o++, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11926 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11927 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11928 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11929 /* Closing quote already added at the beginning */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11930 assert(_PyUnicode_CheckConsistency(repr, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11931 return repr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11932 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11933
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11934 PyDoc_STRVAR(rfind__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11935 "S.rfind(sub[, start[, end]]) -> int\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11936 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11937 Return the highest index in S where substring sub is found,\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11938 such that sub is contained within S[start:end]. Optional\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11939 arguments start and end are interpreted as in slice notation.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11940 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11941 Return -1 on failure.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11942
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11943 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11944 unicode_rfind(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11945 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11946 PyObject *substring;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11947 Py_ssize_t start;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11948 Py_ssize_t end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11949 Py_ssize_t result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11950
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11951 if (!stringlib_parse_args_finds_unicode("rfind", args, &substring,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11952 &start, &end))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11953 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11954
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11955 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11956 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11957 if (PyUnicode_READY(substring) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11958 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11959
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11960 result = any_find_slice(-1, self, substring, start, end);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11961
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11962 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11963
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11964 if (result == -2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11965 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11966
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11967 return PyLong_FromSsize_t(result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11968 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11969
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11970 PyDoc_STRVAR(rindex__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11971 "S.rindex(sub[, start[, end]]) -> int\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11972 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11973 Like S.rfind() but raise ValueError when the substring is not found.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11974
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11975 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11976 unicode_rindex(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11977 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11978 PyObject *substring;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11979 Py_ssize_t start;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11980 Py_ssize_t end;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11981 Py_ssize_t result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11982
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11983 if (!stringlib_parse_args_finds_unicode("rindex", args, &substring,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11984 &start, &end))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11985 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11986
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11987 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11988 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11989 if (PyUnicode_READY(substring) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11990 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11991
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11992 result = any_find_slice(-1, self, substring, start, end);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11993
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11994 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11995
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11996 if (result == -2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11997 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11998
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
11999 if (result < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12000 PyErr_SetString(PyExc_ValueError, "substring not found");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12001 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12002 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12003
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12004 return PyLong_FromSsize_t(result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12005 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12006
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12007 PyDoc_STRVAR(rjust__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12008 "S.rjust(width[, fillchar]) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12009 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12010 Return S right-justified in a string of length width. Padding is\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12011 done using the specified fill character (default is a space).");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12012
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12013 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12014 unicode_rjust(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12015 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12016 Py_ssize_t width;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12017 Py_UCS4 fillchar = ' ';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12018
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12019 if (!PyArg_ParseTuple(args, "n|O&:rjust", &width, convert_uc, &fillchar))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12020 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12021
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12022 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12023 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12024
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12025 if (_PyUnicode_LENGTH(self) >= width && PyUnicode_CheckExact(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12026 Py_INCREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12027 return self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12028 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12029
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12030 return pad(self, width - _PyUnicode_LENGTH(self), 0, fillchar);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12031 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12032
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12033 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12034 PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12035 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12036 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12037
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12038 s = PyUnicode_FromObject(s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12039 if (s == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12040 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12041 if (sep != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12042 sep = PyUnicode_FromObject(sep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12043 if (sep == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12044 Py_DECREF(s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12045 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12046 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12047 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12048
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12049 result = split(s, sep, maxsplit);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12050
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12051 Py_DECREF(s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12052 Py_XDECREF(sep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12053 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12054 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12055
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12056 PyDoc_STRVAR(split__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12057 "S.split([sep[, maxsplit]]) -> list of strings\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12058 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12059 Return a list of the words in S, using sep as the\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12060 delimiter string. If maxsplit is given, at most maxsplit\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12061 splits are done. If sep is not specified or is None, any\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12062 whitespace string is a separator and empty strings are\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12063 removed from the result.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12064
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12065 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12066 unicode_split(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12067 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12068 PyObject *substring = Py_None;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12069 Py_ssize_t maxcount = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12070
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12071 if (!PyArg_ParseTuple(args, "|On:split", &substring, &maxcount))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12072 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12073
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12074 if (substring == Py_None)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12075 return split(self, NULL, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12076 else if (PyUnicode_Check(substring))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12077 return split(self, substring, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12078 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12079 return PyUnicode_Split(self, substring, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12080 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12081
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12082 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12083 PyUnicode_Partition(PyObject *str_in, PyObject *sep_in)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12084 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12085 PyObject* str_obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12086 PyObject* sep_obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12087 PyObject* out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12088 int kind1, kind2, kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12089 void *buf1 = NULL, *buf2 = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12090 Py_ssize_t len1, len2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12091
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12092 str_obj = PyUnicode_FromObject(str_in);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12093 if (!str_obj || PyUnicode_READY(str_obj) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12094 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12095 sep_obj = PyUnicode_FromObject(sep_in);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12096 if (!sep_obj || PyUnicode_READY(sep_obj) == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12097 Py_DECREF(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12098 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12099 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12100
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12101 kind1 = PyUnicode_KIND(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12102 kind2 = PyUnicode_KIND(sep_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12103 kind = Py_MAX(kind1, kind2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12104 buf1 = PyUnicode_DATA(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12105 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12106 buf1 = _PyUnicode_AsKind(str_obj, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12107 if (!buf1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12108 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12109 buf2 = PyUnicode_DATA(sep_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12110 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12111 buf2 = _PyUnicode_AsKind(sep_obj, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12112 if (!buf2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12113 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12114 len1 = PyUnicode_GET_LENGTH(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12115 len2 = PyUnicode_GET_LENGTH(sep_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12116
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12117 switch(PyUnicode_KIND(str_obj)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12118 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12119 if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12120 out = asciilib_partition(str_obj, buf1, len1, sep_obj, buf2, len2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12121 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12122 out = ucs1lib_partition(str_obj, buf1, len1, sep_obj, buf2, len2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12123 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12124 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12125 out = ucs2lib_partition(str_obj, buf1, len1, sep_obj, buf2, len2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12126 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12127 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12128 out = ucs4lib_partition(str_obj, buf1, len1, sep_obj, buf2, len2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12129 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12130 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12131 assert(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12132 out = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12133 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12134
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12135 Py_DECREF(sep_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12136 Py_DECREF(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12137 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12138 PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12139 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12140 PyMem_Free(buf2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12141
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12142 return out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12143 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12144 Py_DECREF(sep_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12145 Py_DECREF(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12146 if (kind1 != kind && buf1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12147 PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12148 if (kind2 != kind && buf2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12149 PyMem_Free(buf2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12150 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12151 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12152
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12153
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12154 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12155 PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12156 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12157 PyObject* str_obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12158 PyObject* sep_obj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12159 PyObject* out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12160 int kind1, kind2, kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12161 void *buf1 = NULL, *buf2 = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12162 Py_ssize_t len1, len2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12163
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12164 str_obj = PyUnicode_FromObject(str_in);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12165 if (!str_obj)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12166 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12167 sep_obj = PyUnicode_FromObject(sep_in);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12168 if (!sep_obj) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12169 Py_DECREF(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12170 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12171 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12172
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12173 kind1 = PyUnicode_KIND(str_in);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12174 kind2 = PyUnicode_KIND(sep_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12175 kind = Py_MAX(kind1, kind2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12176 buf1 = PyUnicode_DATA(str_in);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12177 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12178 buf1 = _PyUnicode_AsKind(str_in, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12179 if (!buf1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12180 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12181 buf2 = PyUnicode_DATA(sep_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12182 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12183 buf2 = _PyUnicode_AsKind(sep_obj, kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12184 if (!buf2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12185 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12186 len1 = PyUnicode_GET_LENGTH(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12187 len2 = PyUnicode_GET_LENGTH(sep_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12188
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12189 switch(PyUnicode_KIND(str_in)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12190 case PyUnicode_1BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12191 if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12192 out = asciilib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12193 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12194 out = ucs1lib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12195 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12196 case PyUnicode_2BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12197 out = ucs2lib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12198 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12199 case PyUnicode_4BYTE_KIND:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12200 out = ucs4lib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12201 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12202 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12203 assert(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12204 out = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12205 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12206
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12207 Py_DECREF(sep_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12208 Py_DECREF(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12209 if (kind1 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12210 PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12211 if (kind2 != kind)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12212 PyMem_Free(buf2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12213
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12214 return out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12215 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12216 Py_DECREF(sep_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12217 Py_DECREF(str_obj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12218 if (kind1 != kind && buf1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12219 PyMem_Free(buf1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12220 if (kind2 != kind && buf2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12221 PyMem_Free(buf2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12222 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12223 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12224
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12225 PyDoc_STRVAR(partition__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12226 "S.partition(sep) -> (head, sep, tail)\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12227 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12228 Search for the separator sep in S, and return the part before it,\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12229 the separator itself, and the part after it. If the separator is not\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12230 found, return S and two empty strings.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12231
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12232 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12233 unicode_partition(PyObject *self, PyObject *separator)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12234 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12235 return PyUnicode_Partition(self, separator);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12236 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12237
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12238 PyDoc_STRVAR(rpartition__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12239 "S.rpartition(sep) -> (head, sep, tail)\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12240 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12241 Search for the separator sep in S, starting at the end of S, and return\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12242 the part before it, the separator itself, and the part after it. If the\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12243 separator is not found, return two empty strings and S.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12244
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12245 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12246 unicode_rpartition(PyObject *self, PyObject *separator)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12247 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12248 return PyUnicode_RPartition(self, separator);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12249 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12250
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12251 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12252 PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12253 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12254 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12255
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12256 s = PyUnicode_FromObject(s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12257 if (s == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12258 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12259 if (sep != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12260 sep = PyUnicode_FromObject(sep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12261 if (sep == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12262 Py_DECREF(s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12263 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12264 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12265 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12266
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12267 result = rsplit(s, sep, maxsplit);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12268
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12269 Py_DECREF(s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12270 Py_XDECREF(sep);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12271 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12272 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12273
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12274 PyDoc_STRVAR(rsplit__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12275 "S.rsplit([sep[, maxsplit]]) -> list of strings\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12276 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12277 Return a list of the words in S, using sep as the\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12278 delimiter string, starting at the end of the string and\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12279 working to the front. If maxsplit is given, at most maxsplit\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12280 splits are done. If sep is not specified, any whitespace string\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12281 is a separator.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12282
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12283 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12284 unicode_rsplit(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12285 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12286 PyObject *substring = Py_None;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12287 Py_ssize_t maxcount = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12288
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12289 if (!PyArg_ParseTuple(args, "|On:rsplit", &substring, &maxcount))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12290 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12291
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12292 if (substring == Py_None)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12293 return rsplit(self, NULL, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12294 else if (PyUnicode_Check(substring))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12295 return rsplit(self, substring, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12296 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12297 return PyUnicode_RSplit(self, substring, maxcount);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12298 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12299
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12300 PyDoc_STRVAR(splitlines__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12301 "S.splitlines([keepends]) -> list of strings\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12302 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12303 Return a list of the lines in S, breaking at line boundaries.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12304 Line breaks are not included in the resulting list unless keepends\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12305 is given and true.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12306
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12307 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12308 unicode_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12309 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12310 static char *kwlist[] = {"keepends", 0};
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12311 int keepends = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12312
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12313 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12314 kwlist, &keepends))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12315 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12316
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12317 return PyUnicode_Splitlines(self, keepends);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12318 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12319
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12320 static
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12321 PyObject *unicode_str(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12322 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12323 if (PyUnicode_CheckExact(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12324 Py_INCREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12325 return self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12326 } else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12327 /* Subtype -- return genuine unicode string with the same value. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12328 return PyUnicode_Copy(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12329 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12330
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12331 PyDoc_STRVAR(swapcase__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12332 "S.swapcase() -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12333 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12334 Return a copy of S with uppercase characters converted to lowercase\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12335 and vice versa.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12336
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12337 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12338 unicode_swapcase(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12339 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12340 return fixup(self, fixswapcase);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12341 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12342
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12343 PyDoc_STRVAR(maketrans__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12344 "str.maketrans(x[, y[, z]]) -> dict (static method)\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12345 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12346 Return a translation table usable for str.translate().\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12347 If there is only one argument, it must be a dictionary mapping Unicode\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12348 ordinals (integers) or characters to Unicode ordinals, strings or None.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12349 Character keys will be then converted to ordinals.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12350 If there are two arguments, they must be strings of equal length, and\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12351 in the resulting dictionary, each character in x will be mapped to the\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12352 character at the same position in y. If there is a third argument, it\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12353 must be a string, whose characters will be mapped to None in the result.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12354
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12355 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12356 unicode_maketrans(PyObject *null, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12357 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12358 PyObject *x, *y = NULL, *z = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12359 PyObject *new = NULL, *key, *value;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12360 Py_ssize_t i = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12361 int res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12362
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12363 if (!PyArg_ParseTuple(args, "O|UU:maketrans", &x, &y, &z))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12364 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12365 new = PyDict_New();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12366 if (!new)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12367 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12368 if (y != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12369 int x_kind, y_kind, z_kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12370 void *x_data, *y_data, *z_data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12371
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12372 /* x must be a string too, of equal length */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12373 if (!PyUnicode_Check(x)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12374 PyErr_SetString(PyExc_TypeError, "first maketrans argument must "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12375 "be a string if there is a second argument");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12376 goto err;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12377 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12378 if (PyUnicode_GET_LENGTH(x) != PyUnicode_GET_LENGTH(y)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12379 PyErr_SetString(PyExc_ValueError, "the first two maketrans "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12380 "arguments must have equal length");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12381 goto err;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12382 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12383 /* create entries for translating chars in x to those in y */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12384 x_kind = PyUnicode_KIND(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12385 y_kind = PyUnicode_KIND(y);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12386 x_data = PyUnicode_DATA(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12387 y_data = PyUnicode_DATA(y);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12388 for (i = 0; i < PyUnicode_GET_LENGTH(x); i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12389 key = PyLong_FromLong(PyUnicode_READ(x_kind, x_data, i));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12390 value = PyLong_FromLong(PyUnicode_READ(y_kind, y_data, i));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12391 if (!key || !value)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12392 goto err;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12393 res = PyDict_SetItem(new, key, value);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12394 Py_DECREF(key);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12395 Py_DECREF(value);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12396 if (res < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12397 goto err;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12398 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12399 /* create entries for deleting chars in z */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12400 if (z != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12401 z_kind = PyUnicode_KIND(z);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12402 z_data = PyUnicode_DATA(z);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12403 for (i = 0; i < PyUnicode_GET_LENGTH(z); i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12404 key = PyLong_FromLong(PyUnicode_READ(z_kind, z_data, i));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12405 if (!key)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12406 goto err;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12407 res = PyDict_SetItem(new, key, Py_None);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12408 Py_DECREF(key);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12409 if (res < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12410 goto err;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12411 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12412 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12413 } else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12414 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12415 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12416
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12417 /* x must be a dict */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12418 if (!PyDict_CheckExact(x)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12419 PyErr_SetString(PyExc_TypeError, "if you give only one argument "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12420 "to maketrans it must be a dict");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12421 goto err;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12422 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12423 /* copy entries into the new dict, converting string keys to int keys */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12424 while (PyDict_Next(x, &i, &key, &value)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12425 if (PyUnicode_Check(key)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12426 /* convert string keys to integer keys */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12427 PyObject *newkey;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12428 if (PyUnicode_GET_LENGTH(key) != 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12429 PyErr_SetString(PyExc_ValueError, "string keys in translate "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12430 "table must be of length 1");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12431 goto err;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12432 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12433 kind = PyUnicode_KIND(key);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12434 data = PyUnicode_DATA(key);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12435 newkey = PyLong_FromLong(PyUnicode_READ(kind, data, 0));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12436 if (!newkey)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12437 goto err;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12438 res = PyDict_SetItem(new, newkey, value);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12439 Py_DECREF(newkey);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12440 if (res < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12441 goto err;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12442 } else if (PyLong_Check(key)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12443 /* just keep integer keys */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12444 if (PyDict_SetItem(new, key, value) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12445 goto err;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12446 } else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12447 PyErr_SetString(PyExc_TypeError, "keys in translate table must "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12448 "be strings or integers");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12449 goto err;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12450 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12451 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12452 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12453 return new;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12454 err:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12455 Py_DECREF(new);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12456 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12457 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12458
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12459 PyDoc_STRVAR(translate__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12460 "S.translate(table) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12461 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12462 Return a copy of the string S, where all characters have been mapped\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12463 through the given translation table, which must be a mapping of\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12464 Unicode ordinals to Unicode ordinals, strings, or None.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12465 Unmapped characters are left untouched. Characters mapped to None\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12466 are deleted.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12467
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12468 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12469 unicode_translate(PyObject *self, PyObject *table)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12470 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12471 return _PyUnicode_TranslateCharmap(self, table, "ignore");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12472 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12473
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12474 PyDoc_STRVAR(upper__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12475 "S.upper() -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12476 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12477 Return a copy of S converted to uppercase.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12478
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12479 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12480 unicode_upper(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12481 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12482 return fixup(self, fixupper);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12483 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12484
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12485 PyDoc_STRVAR(zfill__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12486 "S.zfill(width) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12487 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12488 Pad a numeric string S with zeros on the left, to fill a field\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12489 of the specified width. The string S is never truncated.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12490
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12491 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12492 unicode_zfill(PyObject *self, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12493 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12494 Py_ssize_t fill;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12495 PyObject *u;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12496 Py_ssize_t width;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12497 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12498 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12499 Py_UCS4 chr;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12500
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12501 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12502 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12503
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12504 if (!PyArg_ParseTuple(args, "n:zfill", &width))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12505 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12506
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12507 if (PyUnicode_GET_LENGTH(self) >= width) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12508 if (PyUnicode_CheckExact(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12509 Py_INCREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12510 return self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12511 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12512 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12513 return PyUnicode_Copy(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12514 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12515
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12516 fill = width - _PyUnicode_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12517
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12518 u = pad(self, fill, 0, '0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12519
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12520 if (u == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12521 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12522
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12523 kind = PyUnicode_KIND(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12524 data = PyUnicode_DATA(u);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12525 chr = PyUnicode_READ(kind, data, fill);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12526
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12527 if (chr == '+' || chr == '-') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12528 /* move sign to beginning of string */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12529 PyUnicode_WRITE(kind, data, 0, chr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12530 PyUnicode_WRITE(kind, data, fill, '0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12531 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12532
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12533 assert(_PyUnicode_CheckConsistency(u, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12534 return u;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12535 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12536
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12537 #if 0
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12538 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12539 unicode__decimal2ascii(PyObject *self)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12540 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12541 return PyUnicode_TransformDecimalAndSpaceToASCII(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12542 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12543 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12544
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12545 PyDoc_STRVAR(startswith__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12546 "S.startswith(prefix[, start[, end]]) -> bool\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12547 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12548 Return True if S starts with the specified prefix, False otherwise.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12549 With optional start, test S beginning at that position.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12550 With optional end, stop comparing S at that position.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12551 prefix can also be a tuple of strings to try.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12552
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12553 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12554 unicode_startswith(PyObject *self,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12555 PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12556 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12557 PyObject *subobj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12558 PyObject *substring;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12559 Py_ssize_t start = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12560 Py_ssize_t end = PY_SSIZE_T_MAX;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12561 int result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12562
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12563 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12564 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12565 if (PyTuple_Check(subobj)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12566 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12567 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12568 substring = PyUnicode_FromObject(PyTuple_GET_ITEM(subobj, i));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12569 if (substring == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12570 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12571 result = tailmatch(self, substring, start, end, -1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12572 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12573 if (result) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12574 Py_RETURN_TRUE;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12575 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12576 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12577 /* nothing matched */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12578 Py_RETURN_FALSE;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12579 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12580 substring = PyUnicode_FromObject(subobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12581 if (substring == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12582 if (PyErr_ExceptionMatches(PyExc_TypeError))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12583 PyErr_Format(PyExc_TypeError, "startswith first arg must be str or "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12584 "a tuple of str, not %s", Py_TYPE(subobj)->tp_name);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12585 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12586 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12587 result = tailmatch(self, substring, start, end, -1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12588 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12589 return PyBool_FromLong(result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12590 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12591
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12592
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12593 PyDoc_STRVAR(endswith__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12594 "S.endswith(suffix[, start[, end]]) -> bool\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12595 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12596 Return True if S ends with the specified suffix, False otherwise.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12597 With optional start, test S beginning at that position.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12598 With optional end, stop comparing S at that position.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12599 suffix can also be a tuple of strings to try.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12600
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12601 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12602 unicode_endswith(PyObject *self,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12603 PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12604 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12605 PyObject *subobj;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12606 PyObject *substring;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12607 Py_ssize_t start = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12608 Py_ssize_t end = PY_SSIZE_T_MAX;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12609 int result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12610
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12611 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12612 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12613 if (PyTuple_Check(subobj)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12614 Py_ssize_t i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12615 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12616 substring = PyUnicode_FromObject(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12617 PyTuple_GET_ITEM(subobj, i));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12618 if (substring == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12619 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12620 result = tailmatch(self, substring, start, end, +1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12621 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12622 if (result) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12623 Py_RETURN_TRUE;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12624 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12625 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12626 Py_RETURN_FALSE;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12627 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12628 substring = PyUnicode_FromObject(subobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12629 if (substring == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12630 if (PyErr_ExceptionMatches(PyExc_TypeError))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12631 PyErr_Format(PyExc_TypeError, "endswith first arg must be str or "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12632 "a tuple of str, not %s", Py_TYPE(subobj)->tp_name);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12633 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12634 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12635 result = tailmatch(self, substring, start, end, +1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12636 Py_DECREF(substring);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12637 return PyBool_FromLong(result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12638 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12639
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12640 #include "stringlib/unicode_format.h"
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12641
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12642 PyDoc_STRVAR(format__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12643 "S.format(*args, **kwargs) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12644 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12645 Return a formatted version of S, using substitutions from args and kwargs.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12646 The substitutions are identified by braces ('{' and '}').");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12647
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12648 PyDoc_STRVAR(format_map__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12649 "S.format_map(mapping) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12650 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12651 Return a formatted version of S, using substitutions from mapping.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12652 The substitutions are identified by braces ('{' and '}').");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12653
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12654 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12655 unicode__format__(PyObject* self, PyObject* args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12656 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12657 PyObject *format_spec, *out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12658
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12659 if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12660 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12661
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12662 out = _PyUnicode_FormatAdvanced(self, format_spec, 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12663 PyUnicode_GET_LENGTH(format_spec));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12664 return out;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12665 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12666
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12667 PyDoc_STRVAR(p_format__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12668 "S.__format__(format_spec) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12669 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12670 Return a formatted version of S as described by format_spec.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12671
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12672 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12673 unicode__sizeof__(PyObject *v)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12674 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12675 Py_ssize_t size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12676
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12677 /* If it's a compact object, account for base structure +
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12678 character data. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12679 if (PyUnicode_IS_COMPACT_ASCII(v))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12680 size = sizeof(PyASCIIObject) + PyUnicode_GET_LENGTH(v) + 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12681 else if (PyUnicode_IS_COMPACT(v))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12682 size = sizeof(PyCompactUnicodeObject) +
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12683 (PyUnicode_GET_LENGTH(v) + 1) * PyUnicode_KIND(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12684 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12685 /* If it is a two-block object, account for base object, and
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12686 for character block if present. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12687 size = sizeof(PyUnicodeObject);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12688 if (_PyUnicode_DATA_ANY(v))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12689 size += (PyUnicode_GET_LENGTH(v) + 1) *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12690 PyUnicode_KIND(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12691 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12692 /* If the wstr pointer is present, account for it unless it is shared
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12693 with the data pointer. Check if the data is not shared. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12694 if (_PyUnicode_HAS_WSTR_MEMORY(v))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12695 size += (PyUnicode_WSTR_LENGTH(v) + 1) * sizeof(wchar_t);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12696 if (_PyUnicode_HAS_UTF8_MEMORY(v))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12697 size += PyUnicode_UTF8_LENGTH(v) + 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12698
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12699 return PyLong_FromSsize_t(size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12700 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12701
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12702 PyDoc_STRVAR(sizeof__doc__,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12703 "S.__sizeof__() -> size of S in memory, in bytes");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12704
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12705 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12706 unicode_getnewargs(PyObject *v)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12707 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12708 PyObject *copy = PyUnicode_Copy(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12709 if (!copy)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12710 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12711 return Py_BuildValue("(N)", copy);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12712 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12713
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12714 static PyMethodDef unicode_methods[] = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12715
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12716 /* Order is according to common usage: often used methods should
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12717 appear first, since lookup is done sequentially. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12718
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12719 {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12720 {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12721 {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12722 {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12723 {"join", (PyCFunction) unicode_join, METH_O, join__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12724 {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12725 {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12726 {"center", (PyCFunction) unicode_center, METH_VARARGS, center__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12727 {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12728 {"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS, expandtabs__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12729 {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12730 {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12731 {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12732 {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12733 {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12734 {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12735 {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12736 {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12737 {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12738 {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12739 {"rpartition", (PyCFunction) unicode_rpartition, METH_O, rpartition__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12740 {"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS | METH_KEYWORDS, splitlines__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12741 {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12742 {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12743 {"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12744 {"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12745 {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12746 {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12747 {"islower", (PyCFunction) unicode_islower, METH_NOARGS, islower__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12748 {"isupper", (PyCFunction) unicode_isupper, METH_NOARGS, isupper__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12749 {"istitle", (PyCFunction) unicode_istitle, METH_NOARGS, istitle__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12750 {"isspace", (PyCFunction) unicode_isspace, METH_NOARGS, isspace__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12751 {"isdecimal", (PyCFunction) unicode_isdecimal, METH_NOARGS, isdecimal__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12752 {"isdigit", (PyCFunction) unicode_isdigit, METH_NOARGS, isdigit__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12753 {"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12754 {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12755 {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12756 {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12757 {"isprintable", (PyCFunction) unicode_isprintable, METH_NOARGS, isprintable__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12758 {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12759 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12760 {"format_map", (PyCFunction) do_string_format_map, METH_O, format_map__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12761 {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12762 {"maketrans", (PyCFunction) unicode_maketrans,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12763 METH_VARARGS | METH_STATIC, maketrans__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12764 {"__sizeof__", (PyCFunction) unicode__sizeof__, METH_NOARGS, sizeof__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12765 #if 0
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12766 {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12767 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12768
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12769 #if 0
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12770 /* These methods are just used for debugging the implementation. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12771 {"_decimal2ascii", (PyCFunction) unicode__decimal2ascii, METH_NOARGS},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12772 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12773
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12774 {"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12775 {NULL, NULL}
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12776 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12777
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12778 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12779 unicode_mod(PyObject *v, PyObject *w)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12780 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12781 if (!PyUnicode_Check(v))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12782 Py_RETURN_NOTIMPLEMENTED;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12783 return PyUnicode_Format(v, w);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12784 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12785
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12786 static PyNumberMethods unicode_as_number = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12787 0, /*nb_add*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12788 0, /*nb_subtract*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12789 0, /*nb_multiply*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12790 unicode_mod, /*nb_remainder*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12791 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12792
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12793 static PySequenceMethods unicode_as_sequence = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12794 (lenfunc) unicode_length, /* sq_length */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12795 PyUnicode_Concat, /* sq_concat */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12796 (ssizeargfunc) unicode_repeat, /* sq_repeat */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12797 (ssizeargfunc) unicode_getitem, /* sq_item */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12798 0, /* sq_slice */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12799 0, /* sq_ass_item */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12800 0, /* sq_ass_slice */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12801 PyUnicode_Contains, /* sq_contains */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12802 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12803
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12804 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12805 unicode_subscript(PyObject* self, PyObject* item)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12806 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12807 if (PyUnicode_READY(self) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12808 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12809
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12810 if (PyIndex_Check(item)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12811 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12812 if (i == -1 && PyErr_Occurred())
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12813 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12814 if (i < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12815 i += PyUnicode_GET_LENGTH(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12816 return unicode_getitem(self, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12817 } else if (PySlice_Check(item)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12818 Py_ssize_t start, stop, step, slicelength, cur, i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12819 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12820 void *src_data, *dest_data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12821 int src_kind, dest_kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12822 Py_UCS4 ch, max_char, kind_limit;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12823
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12824 if (PySlice_GetIndicesEx(item, PyUnicode_GET_LENGTH(self),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12825 &start, &stop, &step, &slicelength) < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12826 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12827 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12828
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12829 if (slicelength <= 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12830 return PyUnicode_New(0, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12831 } else if (start == 0 && step == 1 &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12832 slicelength == PyUnicode_GET_LENGTH(self) &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12833 PyUnicode_CheckExact(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12834 Py_INCREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12835 return self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12836 } else if (step == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12837 return PyUnicode_Substring(self,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12838 start, start + slicelength);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12839 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12840 /* General case */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12841 src_kind = PyUnicode_KIND(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12842 src_data = PyUnicode_DATA(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12843 if (!PyUnicode_IS_ASCII(self)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12844 kind_limit = kind_maxchar_limit(src_kind);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12845 max_char = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12846 for (cur = start, i = 0; i < slicelength; cur += step, i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12847 ch = PyUnicode_READ(src_kind, src_data, cur);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12848 if (ch > max_char) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12849 max_char = ch;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12850 if (max_char >= kind_limit)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12851 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12852 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12853 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12854 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12855 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12856 max_char = 127;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12857 result = PyUnicode_New(slicelength, max_char);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12858 if (result == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12859 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12860 dest_kind = PyUnicode_KIND(result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12861 dest_data = PyUnicode_DATA(result);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12862
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12863 for (cur = start, i = 0; i < slicelength; cur += step, i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12864 Py_UCS4 ch = PyUnicode_READ(src_kind, src_data, cur);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12865 PyUnicode_WRITE(dest_kind, dest_data, i, ch);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12866 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12867 assert(_PyUnicode_CheckConsistency(result, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12868 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12869 } else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12870 PyErr_SetString(PyExc_TypeError, "string indices must be integers");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12871 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12872 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12873 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12874
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12875 static PyMappingMethods unicode_as_mapping = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12876 (lenfunc)unicode_length, /* mp_length */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12877 (binaryfunc)unicode_subscript, /* mp_subscript */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12878 (objobjargproc)0, /* mp_ass_subscript */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12879 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12880
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12881
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12882 /* Helpers for PyUnicode_Format() */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12883
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12884 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12885 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12886 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12887 Py_ssize_t argidx = *p_argidx;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12888 if (argidx < arglen) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12889 (*p_argidx)++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12890 if (arglen < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12891 return args;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12892 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12893 return PyTuple_GetItem(args, argidx);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12894 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12895 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12896 "not enough arguments for format string");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12897 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12898 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12899
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12900 /* Returns a new reference to a PyUnicode object, or NULL on failure. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12901
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12902 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12903 formatfloat(PyObject *v, int flags, int prec, int type)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12904 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12905 char *p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12906 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12907 double x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12908
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12909 x = PyFloat_AsDouble(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12910 if (x == -1.0 && PyErr_Occurred())
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12911 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12912
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12913 if (prec < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12914 prec = 6;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12915
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12916 p = PyOS_double_to_string(x, type, prec,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12917 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12918 if (p == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12919 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12920 result = PyUnicode_DecodeASCII(p, strlen(p), NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12921 PyMem_Free(p);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12922 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12923 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12924
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12925 static PyObject*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12926 formatlong(PyObject *val, int flags, int prec, int type)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12927 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12928 char *buf;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12929 int len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12930 PyObject *str; /* temporary string object. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12931 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12932
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12933 str = _PyBytes_FormatLong(val, flags, prec, type, &buf, &len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12934 if (!str)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12935 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12936 result = PyUnicode_DecodeASCII(buf, len, NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12937 Py_DECREF(str);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12938 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12939 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12940
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12941 static Py_UCS4
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12942 formatchar(PyObject *v)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12943 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12944 /* presume that the buffer is at least 3 characters long */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12945 if (PyUnicode_Check(v)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12946 if (PyUnicode_GET_LENGTH(v) == 1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12947 return PyUnicode_READ_CHAR(v, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12948 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12949 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12950 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12951 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12952 /* Integer input truncated to a character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12953 long x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12954 x = PyLong_AsLong(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12955 if (x == -1 && PyErr_Occurred())
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12956 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12957
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12958 if (x < 0 || x > MAX_UNICODE) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12959 PyErr_SetString(PyExc_OverflowError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12960 "%c arg not in range(0x110000)");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12961 return (Py_UCS4) -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12962 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12963
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12964 return (Py_UCS4) x;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12965 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12966
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12967 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12968 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12969 "%c requires int or char");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12970 return (Py_UCS4) -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12971 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12972
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12973 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12974 repeat_accumulate(_PyAccu *acc, PyObject *obj, Py_ssize_t count)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12975 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12976 int r;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12977 assert(count > 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12978 assert(PyUnicode_Check(obj));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12979 if (count > 5) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12980 PyObject *repeated = unicode_repeat(obj, count);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12981 if (repeated == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12982 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12983 r = _PyAccu_Accumulate(acc, repeated);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12984 Py_DECREF(repeated);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12985 return r;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12986 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12987 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12988 do {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12989 if (_PyAccu_Accumulate(acc, obj))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12990 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12991 } while (--count);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12992 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12993 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12994 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12995
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12996 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12997 PyUnicode_Format(PyObject *format, PyObject *args)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12998 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
12999 void *fmt;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13000 int fmtkind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13001 PyObject *result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13002 int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13003 int r;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13004 Py_ssize_t fmtcnt, fmtpos, arglen, argidx;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13005 int args_owned = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13006 PyObject *dict = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13007 PyObject *temp = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13008 PyObject *second = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13009 PyObject *uformat;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13010 _PyAccu acc;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13011 static PyObject *plus, *minus, *blank, *zero, *percent;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13012
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13013 if (!plus && !(plus = get_latin1_char('+')))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13014 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13015 if (!minus && !(minus = get_latin1_char('-')))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13016 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13017 if (!blank && !(blank = get_latin1_char(' ')))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13018 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13019 if (!zero && !(zero = get_latin1_char('0')))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13020 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13021 if (!percent && !(percent = get_latin1_char('%')))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13022 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13023
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13024 if (format == NULL || args == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13025 PyErr_BadInternalCall();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13026 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13027 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13028 uformat = PyUnicode_FromObject(format);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13029 if (uformat == NULL || PyUnicode_READY(uformat) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13030 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13031 if (_PyAccu_Init(&acc))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13032 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13033 fmt = PyUnicode_DATA(uformat);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13034 fmtkind = PyUnicode_KIND(uformat);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13035 fmtcnt = PyUnicode_GET_LENGTH(uformat);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13036 fmtpos = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13037
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13038 if (PyTuple_Check(args)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13039 arglen = PyTuple_Size(args);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13040 argidx = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13041 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13042 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13043 arglen = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13044 argidx = -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13045 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13046 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13047 !PyUnicode_Check(args))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13048 dict = args;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13049
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13050 while (--fmtcnt >= 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13051 if (PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13052 PyObject *nonfmt;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13053 Py_ssize_t nonfmtpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13054 nonfmtpos = fmtpos++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13055 while (fmtcnt >= 0 &&
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13056 PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13057 fmtpos++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13058 fmtcnt--;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13059 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13060 nonfmt = PyUnicode_Substring(uformat, nonfmtpos, fmtpos);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13061 if (nonfmt == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13062 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13063 r = _PyAccu_Accumulate(&acc, nonfmt);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13064 Py_DECREF(nonfmt);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13065 if (r)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13066 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13067 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13068 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13069 /* Got a format specifier */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13070 int flags = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13071 Py_ssize_t width = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13072 int prec = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13073 Py_UCS4 c = '\0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13074 Py_UCS4 fill, sign;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13075 int isnumok;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13076 PyObject *v = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13077 void *pbuf = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13078 Py_ssize_t pindex, len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13079 PyObject *signobj = NULL, *fillobj = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13080
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13081 fmtpos++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13082 if (PyUnicode_READ(fmtkind, fmt, fmtpos) == '(') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13083 Py_ssize_t keystart;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13084 Py_ssize_t keylen;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13085 PyObject *key;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13086 int pcount = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13087
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13088 if (dict == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13089 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13090 "format requires a mapping");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13091 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13092 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13093 ++fmtpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13094 --fmtcnt;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13095 keystart = fmtpos;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13096 /* Skip over balanced parentheses */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13097 while (pcount > 0 && --fmtcnt >= 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13098 if (PyUnicode_READ(fmtkind, fmt, fmtpos) == ')')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13099 --pcount;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13100 else if (PyUnicode_READ(fmtkind, fmt, fmtpos) == '(')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13101 ++pcount;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13102 fmtpos++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13103 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13104 keylen = fmtpos - keystart - 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13105 if (fmtcnt < 0 || pcount > 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13106 PyErr_SetString(PyExc_ValueError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13107 "incomplete format key");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13108 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13109 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13110 key = PyUnicode_Substring(uformat,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13111 keystart, keystart + keylen);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13112 if (key == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13113 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13114 if (args_owned) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13115 Py_DECREF(args);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13116 args_owned = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13117 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13118 args = PyObject_GetItem(dict, key);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13119 Py_DECREF(key);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13120 if (args == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13121 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13122 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13123 args_owned = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13124 arglen = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13125 argidx = -2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13126 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13127 while (--fmtcnt >= 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13128 switch (c = PyUnicode_READ(fmtkind, fmt, fmtpos++)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13129 case '-': flags |= F_LJUST; continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13130 case '+': flags |= F_SIGN; continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13131 case ' ': flags |= F_BLANK; continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13132 case '#': flags |= F_ALT; continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13133 case '0': flags |= F_ZERO; continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13134 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13135 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13136 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13137 if (c == '*') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13138 v = getnextarg(args, arglen, &argidx);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13139 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13140 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13141 if (!PyLong_Check(v)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13142 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13143 "* wants int");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13144 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13145 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13146 width = PyLong_AsLong(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13147 if (width == -1 && PyErr_Occurred())
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13148 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13149 if (width < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13150 flags |= F_LJUST;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13151 width = -width;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13152 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13153 if (--fmtcnt >= 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13154 c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13155 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13156 else if (c >= '0' && c <= '9') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13157 width = c - '0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13158 while (--fmtcnt >= 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13159 c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13160 if (c < '0' || c > '9')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13161 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13162 if ((width*10) / 10 != width) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13163 PyErr_SetString(PyExc_ValueError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13164 "width too big");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13165 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13166 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13167 width = width*10 + (c - '0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13168 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13169 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13170 if (c == '.') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13171 prec = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13172 if (--fmtcnt >= 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13173 c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13174 if (c == '*') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13175 v = getnextarg(args, arglen, &argidx);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13176 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13177 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13178 if (!PyLong_Check(v)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13179 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13180 "* wants int");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13181 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13182 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13183 prec = PyLong_AsLong(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13184 if (prec == -1 && PyErr_Occurred())
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13185 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13186 if (prec < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13187 prec = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13188 if (--fmtcnt >= 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13189 c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13190 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13191 else if (c >= '0' && c <= '9') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13192 prec = c - '0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13193 while (--fmtcnt >= 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13194 c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13195 if (c < '0' || c > '9')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13196 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13197 if ((prec*10) / 10 != prec) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13198 PyErr_SetString(PyExc_ValueError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13199 "prec too big");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13200 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13201 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13202 prec = prec*10 + (c - '0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13203 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13204 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13205 } /* prec */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13206 if (fmtcnt >= 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13207 if (c == 'h' || c == 'l' || c == 'L') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13208 if (--fmtcnt >= 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13209 c = PyUnicode_READ(fmtkind, fmt, fmtpos++);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13210 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13211 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13212 if (fmtcnt < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13213 PyErr_SetString(PyExc_ValueError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13214 "incomplete format");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13215 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13216 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13217 if (c != '%') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13218 v = getnextarg(args, arglen, &argidx);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13219 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13220 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13221 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13222 sign = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13223 fill = ' ';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13224 fillobj = blank;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13225 switch (c) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13226
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13227 case '%':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13228 _PyAccu_Accumulate(&acc, percent);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13229 continue;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13230
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13231 case 's':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13232 case 'r':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13233 case 'a':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13234 if (PyUnicode_CheckExact(v) && c == 's') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13235 temp = v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13236 Py_INCREF(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13237 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13238 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13239 if (c == 's')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13240 temp = PyObject_Str(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13241 else if (c == 'r')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13242 temp = PyObject_Repr(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13243 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13244 temp = PyObject_ASCII(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13245 if (temp == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13246 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13247 if (PyUnicode_Check(temp))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13248 /* nothing to do */;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13249 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13250 Py_DECREF(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13251 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13252 "%s argument has non-string str()");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13253 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13254 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13255 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13256 if (PyUnicode_READY(temp) == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13257 Py_CLEAR(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13258 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13259 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13260 pbuf = PyUnicode_DATA(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13261 kind = PyUnicode_KIND(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13262 len = PyUnicode_GET_LENGTH(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13263 if (prec >= 0 && len > prec)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13264 len = prec;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13265 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13266
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13267 case 'i':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13268 case 'd':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13269 case 'u':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13270 case 'o':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13271 case 'x':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13272 case 'X':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13273 isnumok = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13274 if (PyNumber_Check(v)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13275 PyObject *iobj=NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13276
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13277 if (PyLong_Check(v)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13278 iobj = v;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13279 Py_INCREF(iobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13280 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13281 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13282 iobj = PyNumber_Long(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13283 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13284 if (iobj!=NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13285 if (PyLong_Check(iobj)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13286 isnumok = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13287 temp = formatlong(iobj, flags, prec, (c == 'i'? 'd': c));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13288 Py_DECREF(iobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13289 if (!temp)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13290 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13291 if (PyUnicode_READY(temp) == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13292 Py_CLEAR(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13293 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13294 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13295 pbuf = PyUnicode_DATA(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13296 kind = PyUnicode_KIND(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13297 len = PyUnicode_GET_LENGTH(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13298 sign = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13299 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13300 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13301 Py_DECREF(iobj);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13302 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13303 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13304 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13305 if (!isnumok) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13306 PyErr_Format(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13307 "%%%c format: a number is required, "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13308 "not %.200s", (char)c, Py_TYPE(v)->tp_name);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13309 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13310 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13311 if (flags & F_ZERO) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13312 fill = '0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13313 fillobj = zero;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13314 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13315 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13316
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13317 case 'e':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13318 case 'E':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13319 case 'f':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13320 case 'F':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13321 case 'g':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13322 case 'G':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13323 temp = formatfloat(v, flags, prec, c);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13324 if (!temp)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13325 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13326 if (PyUnicode_READY(temp) == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13327 Py_CLEAR(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13328 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13329 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13330 pbuf = PyUnicode_DATA(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13331 kind = PyUnicode_KIND(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13332 len = PyUnicode_GET_LENGTH(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13333 sign = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13334 if (flags & F_ZERO) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13335 fill = '0';
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13336 fillobj = zero;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13337 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13338 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13339
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13340 case 'c':
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13341 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13342 Py_UCS4 ch = formatchar(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13343 if (ch == (Py_UCS4) -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13344 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13345 temp = _PyUnicode_FromUCS4(&ch, 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13346 if (temp == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13347 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13348 pbuf = PyUnicode_DATA(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13349 kind = PyUnicode_KIND(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13350 len = PyUnicode_GET_LENGTH(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13351 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13352 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13353
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13354 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13355 PyErr_Format(PyExc_ValueError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13356 "unsupported format character '%c' (0x%x) "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13357 "at index %zd",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13358 (31<=c && c<=126) ? (char)c : '?',
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13359 (int)c,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13360 fmtpos - 1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13361 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13362 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13363 /* pbuf is initialized here. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13364 pindex = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13365 if (sign) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13366 if (PyUnicode_READ(kind, pbuf, pindex) == '-') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13367 signobj = minus;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13368 len--;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13369 pindex++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13370 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13371 else if (PyUnicode_READ(kind, pbuf, pindex) == '+') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13372 signobj = plus;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13373 len--;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13374 pindex++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13375 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13376 else if (flags & F_SIGN)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13377 signobj = plus;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13378 else if (flags & F_BLANK)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13379 signobj = blank;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13380 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13381 sign = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13382 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13383 if (width < len)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13384 width = len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13385 if (sign) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13386 if (fill != ' ') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13387 assert(signobj != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13388 if (_PyAccu_Accumulate(&acc, signobj))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13389 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13390 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13391 if (width > len)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13392 width--;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13393 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13394 if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13395 assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13396 assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13397 if (fill != ' ') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13398 second = get_latin1_char(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13399 PyUnicode_READ(kind, pbuf, pindex + 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13400 pindex += 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13401 if (second == NULL ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13402 _PyAccu_Accumulate(&acc, zero) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13403 _PyAccu_Accumulate(&acc, second))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13404 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13405 Py_CLEAR(second);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13406 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13407 width -= 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13408 if (width < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13409 width = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13410 len -= 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13411 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13412 if (width > len && !(flags & F_LJUST)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13413 assert(fillobj != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13414 if (repeat_accumulate(&acc, fillobj, width - len))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13415 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13416 width = len;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13417 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13418 if (fill == ' ') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13419 if (sign) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13420 assert(signobj != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13421 if (_PyAccu_Accumulate(&acc, signobj))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13422 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13423 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13424 if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13425 assert(PyUnicode_READ(kind, pbuf, pindex) == '0');
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13426 assert(PyUnicode_READ(kind, pbuf, pindex+1) == c);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13427 second = get_latin1_char(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13428 PyUnicode_READ(kind, pbuf, pindex + 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13429 pindex += 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13430 if (second == NULL ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13431 _PyAccu_Accumulate(&acc, zero) ||
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13432 _PyAccu_Accumulate(&acc, second))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13433 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13434 Py_CLEAR(second);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13435 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13436 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13437 /* Copy all characters, preserving len */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13438 if (temp != NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13439 assert(pbuf == PyUnicode_DATA(temp));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13440 v = PyUnicode_Substring(temp, pindex, pindex + len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13441 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13442 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13443 const char *p = (const char *) pbuf;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13444 assert(pbuf != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13445 p += kind * pindex;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13446 v = PyUnicode_FromKindAndData(kind, p, len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13447 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13448 if (v == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13449 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13450 r = _PyAccu_Accumulate(&acc, v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13451 Py_DECREF(v);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13452 if (r)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13453 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13454 if (width > len && repeat_accumulate(&acc, blank, width - len))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13455 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13456 if (dict && (argidx < arglen) && c != '%') {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13457 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13458 "not all arguments converted during string formatting");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13459 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13460 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13461 Py_CLEAR(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13462 } /* '%' */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13463 } /* until end */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13464 if (argidx < arglen && !dict) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13465 PyErr_SetString(PyExc_TypeError,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13466 "not all arguments converted during string formatting");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13467 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13468 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13469
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13470 result = _PyAccu_Finish(&acc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13471 if (args_owned) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13472 Py_DECREF(args);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13473 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13474 Py_DECREF(uformat);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13475 Py_XDECREF(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13476 Py_XDECREF(second);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13477 return result;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13478
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13479 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13480 Py_DECREF(uformat);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13481 Py_XDECREF(temp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13482 Py_XDECREF(second);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13483 _PyAccu_Destroy(&acc);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13484 if (args_owned) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13485 Py_DECREF(args);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13486 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13487 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13488 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13489
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13490 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13491 unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13492
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13493 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13494 unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13495 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13496 PyObject *x = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13497 static char *kwlist[] = {"object", "encoding", "errors", 0};
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13498 char *encoding = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13499 char *errors = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13500
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13501 if (type != &PyUnicode_Type)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13502 return unicode_subtype_new(type, args, kwds);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13503 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13504 kwlist, &x, &encoding, &errors))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13505 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13506 if (x == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13507 return PyUnicode_New(0, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13508 if (encoding == NULL && errors == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13509 return PyObject_Str(x);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13510 else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13511 return PyUnicode_FromEncodedObject(x, encoding, errors);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13512 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13513
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13514 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13515 unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13516 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13517 PyObject *unicode, *self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13518 Py_ssize_t length, char_size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13519 int share_wstr, share_utf8;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13520 unsigned int kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13521 void *data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13522
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13523 assert(PyType_IsSubtype(type, &PyUnicode_Type));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13524
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13525 unicode = unicode_new(&PyUnicode_Type, args, kwds);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13526 if (unicode == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13527 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13528 assert(_PyUnicode_CHECK(unicode));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13529 if (PyUnicode_READY(unicode))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13530 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13531
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13532 self = type->tp_alloc(type, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13533 if (self == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13534 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13535 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13536 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13537 kind = PyUnicode_KIND(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13538 length = PyUnicode_GET_LENGTH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13539
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13540 _PyUnicode_LENGTH(self) = length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13541 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13542 _PyUnicode_HASH(self) = -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13543 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13544 _PyUnicode_HASH(self) = _PyUnicode_HASH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13545 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13546 _PyUnicode_STATE(self).interned = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13547 _PyUnicode_STATE(self).kind = kind;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13548 _PyUnicode_STATE(self).compact = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13549 _PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13550 _PyUnicode_STATE(self).ready = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13551 _PyUnicode_WSTR(self) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13552 _PyUnicode_UTF8_LENGTH(self) = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13553 _PyUnicode_UTF8(self) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13554 _PyUnicode_WSTR_LENGTH(self) = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13555 _PyUnicode_DATA_ANY(self) = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13556
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13557 share_utf8 = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13558 share_wstr = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13559 if (kind == PyUnicode_1BYTE_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13560 char_size = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13561 if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13562 share_utf8 = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13563 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13564 else if (kind == PyUnicode_2BYTE_KIND) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13565 char_size = 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13566 if (sizeof(wchar_t) == 2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13567 share_wstr = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13568 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13569 else {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13570 assert(kind == PyUnicode_4BYTE_KIND);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13571 char_size = 4;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13572 if (sizeof(wchar_t) == 4)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13573 share_wstr = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13574 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13575
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13576 /* Ensure we won't overflow the length. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13577 if (length > (PY_SSIZE_T_MAX / char_size - 1)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13578 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13579 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13580 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13581 data = PyObject_MALLOC((length + 1) * char_size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13582 if (data == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13583 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13584 goto onError;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13585 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13586
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13587 _PyUnicode_DATA_ANY(self) = data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13588 if (share_utf8) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13589 _PyUnicode_UTF8_LENGTH(self) = length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13590 _PyUnicode_UTF8(self) = data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13591 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13592 if (share_wstr) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13593 _PyUnicode_WSTR_LENGTH(self) = length;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13594 _PyUnicode_WSTR(self) = (wchar_t *)data;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13595 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13596
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13597 Py_MEMCPY(data, PyUnicode_DATA(unicode),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13598 kind * (length + 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13599 assert(_PyUnicode_CheckConsistency(self, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13600 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13601 _PyUnicode_HASH(self) = _PyUnicode_HASH(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13602 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13603 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13604 return self;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13605
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13606 onError:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13607 Py_DECREF(unicode);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13608 Py_DECREF(self);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13609 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13610 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13611
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13612 PyDoc_STRVAR(unicode_doc,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13613 "str(string[, encoding[, errors]]) -> str\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13614 \n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13615 Create a new string object from the given encoded string.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13616 encoding defaults to the current default string encoding.\n\
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13617 errors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13618
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13619 static PyObject *unicode_iter(PyObject *seq);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13620
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13621 PyTypeObject PyUnicode_Type = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13622 PyVarObject_HEAD_INIT(&PyType_Type, 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13623 "str", /* tp_name */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13624 sizeof(PyUnicodeObject), /* tp_size */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13625 0, /* tp_itemsize */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13626 /* Slots */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13627 (destructor)unicode_dealloc, /* tp_dealloc */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13628 0, /* tp_print */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13629 0, /* tp_getattr */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13630 0, /* tp_setattr */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13631 0, /* tp_reserved */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13632 unicode_repr, /* tp_repr */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13633 &unicode_as_number, /* tp_as_number */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13634 &unicode_as_sequence, /* tp_as_sequence */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13635 &unicode_as_mapping, /* tp_as_mapping */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13636 (hashfunc) unicode_hash, /* tp_hash*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13637 0, /* tp_call*/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13638 (reprfunc) unicode_str, /* tp_str */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13639 PyObject_GenericGetAttr, /* tp_getattro */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13640 0, /* tp_setattro */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13641 0, /* tp_as_buffer */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13642 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13643 Py_TPFLAGS_UNICODE_SUBCLASS, /* tp_flags */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13644 unicode_doc, /* tp_doc */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13645 0, /* tp_traverse */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13646 0, /* tp_clear */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13647 PyUnicode_RichCompare, /* tp_richcompare */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13648 0, /* tp_weaklistoffset */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13649 unicode_iter, /* tp_iter */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13650 0, /* tp_iternext */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13651 unicode_methods, /* tp_methods */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13652 0, /* tp_members */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13653 0, /* tp_getset */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13654 &PyBaseObject_Type, /* tp_base */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13655 0, /* tp_dict */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13656 0, /* tp_descr_get */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13657 0, /* tp_descr_set */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13658 0, /* tp_dictoffset */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13659 0, /* tp_init */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13660 0, /* tp_alloc */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13661 unicode_new, /* tp_new */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13662 PyObject_Del, /* tp_free */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13663 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13664
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13665 /* Initialize the Unicode implementation */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13666
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13667 int _PyUnicode_Init(void)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13668 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13669 int i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13670
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13671 /* XXX - move this array to unicodectype.c ? */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13672 Py_UCS2 linebreak[] = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13673 0x000A, /* LINE FEED */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13674 0x000D, /* CARRIAGE RETURN */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13675 0x001C, /* FILE SEPARATOR */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13676 0x001D, /* GROUP SEPARATOR */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13677 0x001E, /* RECORD SEPARATOR */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13678 0x0085, /* NEXT LINE */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13679 0x2028, /* LINE SEPARATOR */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13680 0x2029, /* PARAGRAPH SEPARATOR */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13681 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13682
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13683 /* Init the implementation */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13684 unicode_empty = PyUnicode_New(0, 0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13685 if (!unicode_empty)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13686 Py_FatalError("Can't create empty string");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13687 assert(_PyUnicode_CheckConsistency(unicode_empty, 1));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13688
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13689 for (i = 0; i < 256; i++)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13690 unicode_latin1[i] = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13691 if (PyType_Ready(&PyUnicode_Type) < 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13692 Py_FatalError("Can't initialize 'unicode'");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13693
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13694 /* initialize the linebreak bloom filter */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13695 bloom_linebreak = make_bloom_mask(
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13696 PyUnicode_2BYTE_KIND, linebreak,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13697 Py_ARRAY_LENGTH(linebreak));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13698
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13699 PyType_Ready(&EncodingMapType);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13700
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13701 #ifdef HAVE_MBCS
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13702 winver.dwOSVersionInfoSize = sizeof(winver);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13703 if (!GetVersionEx((OSVERSIONINFO*)&winver)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13704 PyErr_SetFromWindowsErr(0);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13705 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13706 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13707 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13708 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13709 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13710
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13711 /* Finalize the Unicode implementation */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13712
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13713 int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13714 PyUnicode_ClearFreeList(void)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13715 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13716 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13717 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13718
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13719 void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13720 _PyUnicode_Fini(void)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13721 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13722 int i;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13723
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13724 Py_XDECREF(unicode_empty);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13725 unicode_empty = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13726
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13727 for (i = 0; i < 256; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13728 if (unicode_latin1[i]) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13729 Py_DECREF(unicode_latin1[i]);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13730 unicode_latin1[i] = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13731 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13732 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13733 _PyUnicode_ClearStaticStrings();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13734 (void)PyUnicode_ClearFreeList();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13735 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13736
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13737 void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13738 PyUnicode_InternInPlace(PyObject **p)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13739 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13740 register PyObject *s = *p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13741 PyObject *t;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13742 #ifdef Py_DEBUG
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13743 assert(s != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13744 assert(_PyUnicode_CHECK(s));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13745 #else
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13746 if (s == NULL || !PyUnicode_Check(s))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13747 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13748 #endif
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13749 /* If it's a subclass, we don't really know what putting
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13750 it in the interned dict might do. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13751 if (!PyUnicode_CheckExact(s))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13752 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13753 if (PyUnicode_CHECK_INTERNED(s))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13754 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13755 if (interned == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13756 interned = PyDict_New();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13757 if (interned == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13758 PyErr_Clear(); /* Don't leave an exception */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13759 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13760 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13761 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13762 /* It might be that the GetItem call fails even
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13763 though the key is present in the dictionary,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13764 namely when this happens during a stack overflow. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13765 Py_ALLOW_RECURSION
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13766 t = PyDict_GetItem(interned, s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13767 Py_END_ALLOW_RECURSION
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13768
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13769 if (t) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13770 Py_INCREF(t);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13771 Py_DECREF(*p);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13772 *p = t;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13773 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13774 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13775
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13776 PyThreadState_GET()->recursion_critical = 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13777 if (PyDict_SetItem(interned, s, s) < 0) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13778 PyErr_Clear();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13779 PyThreadState_GET()->recursion_critical = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13780 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13781 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13782 PyThreadState_GET()->recursion_critical = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13783 /* The two references in interned are not counted by refcnt.
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13784 The deallocator will take care of this */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13785 Py_REFCNT(s) -= 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13786 _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13787 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13788
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13789 void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13790 PyUnicode_InternImmortal(PyObject **p)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13791 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13792 PyUnicode_InternInPlace(p);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13793 if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13794 _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13795 Py_INCREF(*p);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13796 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13797 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13798
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13799 PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13800 PyUnicode_InternFromString(const char *cp)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13801 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13802 PyObject *s = PyUnicode_FromString(cp);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13803 if (s == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13804 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13805 PyUnicode_InternInPlace(&s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13806 return s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13807 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13808
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13809 void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13810 _Py_ReleaseInternedUnicodeStrings(void)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13811 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13812 PyObject *keys;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13813 PyObject *s;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13814 Py_ssize_t i, n;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13815 Py_ssize_t immortal_size = 0, mortal_size = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13816
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13817 if (interned == NULL || !PyDict_Check(interned))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13818 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13819 keys = PyDict_Keys(interned);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13820 if (keys == NULL || !PyList_Check(keys)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13821 PyErr_Clear();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13822 return;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13823 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13824
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13825 /* Since _Py_ReleaseInternedUnicodeStrings() is intended to help a leak
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13826 detector, interned unicode strings are not forcibly deallocated;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13827 rather, we give them their stolen references back, and then clear
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13828 and DECREF the interned dict. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13829
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13830 n = PyList_GET_SIZE(keys);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13831 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13832 n);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13833 for (i = 0; i < n; i++) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13834 s = PyList_GET_ITEM(keys, i);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13835 if (PyUnicode_READY(s) == -1) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13836 assert(0 && "could not ready string");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13837 fprintf(stderr, "could not ready string\n");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13838 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13839 switch (PyUnicode_CHECK_INTERNED(s)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13840 case SSTATE_NOT_INTERNED:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13841 /* XXX Shouldn't happen */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13842 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13843 case SSTATE_INTERNED_IMMORTAL:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13844 Py_REFCNT(s) += 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13845 immortal_size += PyUnicode_GET_LENGTH(s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13846 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13847 case SSTATE_INTERNED_MORTAL:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13848 Py_REFCNT(s) += 2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13849 mortal_size += PyUnicode_GET_LENGTH(s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13850 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13851 default:
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13852 Py_FatalError("Inconsistent interned string state.");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13853 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13854 _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13855 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13856 fprintf(stderr, "total size of all interned strings: "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13857 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13858 "mortal/immortal\n", mortal_size, immortal_size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13859 Py_DECREF(keys);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13860 PyDict_Clear(interned);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13861 Py_DECREF(interned);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13862 interned = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13863 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13864
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13865
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13866 /********************* Unicode Iterator **************************/
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13867
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13868 typedef struct {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13869 PyObject_HEAD
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13870 Py_ssize_t it_index;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13871 PyObject *it_seq; /* Set to NULL when iterator is exhausted */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13872 } unicodeiterobject;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13873
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13874 static void
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13875 unicodeiter_dealloc(unicodeiterobject *it)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13876 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13877 _PyObject_GC_UNTRACK(it);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13878 Py_XDECREF(it->it_seq);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13879 PyObject_GC_Del(it);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13880 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13881
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13882 static int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13883 unicodeiter_traverse(unicodeiterobject *it, visitproc visit, void *arg)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13884 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13885 Py_VISIT(it->it_seq);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13886 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13887 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13888
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13889 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13890 unicodeiter_next(unicodeiterobject *it)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13891 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13892 PyObject *seq, *item;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13893
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13894 assert(it != NULL);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13895 seq = it->it_seq;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13896 if (seq == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13897 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13898 assert(_PyUnicode_CHECK(seq));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13899
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13900 if (it->it_index < PyUnicode_GET_LENGTH(seq)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13901 int kind = PyUnicode_KIND(seq);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13902 void *data = PyUnicode_DATA(seq);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13903 Py_UCS4 chr = PyUnicode_READ(kind, data, it->it_index);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13904 item = PyUnicode_FromOrdinal(chr);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13905 if (item != NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13906 ++it->it_index;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13907 return item;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13908 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13909
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13910 Py_DECREF(seq);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13911 it->it_seq = NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13912 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13913 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13914
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13915 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13916 unicodeiter_len(unicodeiterobject *it)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13917 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13918 Py_ssize_t len = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13919 if (it->it_seq)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13920 len = PyUnicode_GET_LENGTH(it->it_seq) - it->it_index;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13921 return PyLong_FromSsize_t(len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13922 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13923
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13924 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13925
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13926 static PyMethodDef unicodeiter_methods[] = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13927 {"__length_hint__", (PyCFunction)unicodeiter_len, METH_NOARGS,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13928 length_hint_doc},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13929 {NULL, NULL} /* sentinel */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13930 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13931
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13932 PyTypeObject PyUnicodeIter_Type = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13933 PyVarObject_HEAD_INIT(&PyType_Type, 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13934 "str_iterator", /* tp_name */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13935 sizeof(unicodeiterobject), /* tp_basicsize */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13936 0, /* tp_itemsize */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13937 /* methods */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13938 (destructor)unicodeiter_dealloc, /* tp_dealloc */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13939 0, /* tp_print */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13940 0, /* tp_getattr */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13941 0, /* tp_setattr */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13942 0, /* tp_reserved */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13943 0, /* tp_repr */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13944 0, /* tp_as_number */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13945 0, /* tp_as_sequence */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13946 0, /* tp_as_mapping */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13947 0, /* tp_hash */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13948 0, /* tp_call */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13949 0, /* tp_str */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13950 PyObject_GenericGetAttr, /* tp_getattro */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13951 0, /* tp_setattro */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13952 0, /* tp_as_buffer */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13953 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13954 0, /* tp_doc */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13955 (traverseproc)unicodeiter_traverse, /* tp_traverse */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13956 0, /* tp_clear */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13957 0, /* tp_richcompare */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13958 0, /* tp_weaklistoffset */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13959 PyObject_SelfIter, /* tp_iter */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13960 (iternextfunc)unicodeiter_next, /* tp_iternext */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13961 unicodeiter_methods, /* tp_methods */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13962 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13963 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13964
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13965 static PyObject *
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13966 unicode_iter(PyObject *seq)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13967 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13968 unicodeiterobject *it;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13969
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13970 if (!PyUnicode_Check(seq)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13971 PyErr_BadInternalCall();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13972 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13973 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13974 if (PyUnicode_READY(seq) == -1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13975 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13976 it = PyObject_GC_New(unicodeiterobject, &PyUnicodeIter_Type);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13977 if (it == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13978 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13979 it->it_index = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13980 Py_INCREF(seq);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13981 it->it_seq = seq;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13982 _PyObject_GC_TRACK(it);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13983 return (PyObject *)it;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13984 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13985
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13986
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13987 size_t
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13988 Py_UNICODE_strlen(const Py_UNICODE *u)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13989 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13990 int res = 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13991 while(*u++)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13992 res++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13993 return res;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13994 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13995
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13996 Py_UNICODE*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13997 Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13998 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
13999 Py_UNICODE *u = s1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14000 while ((*u++ = *s2++));
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14001 return s1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14002 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14003
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14004 Py_UNICODE*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14005 Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14006 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14007 Py_UNICODE *u = s1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14008 while ((*u++ = *s2++))
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14009 if (n-- == 0)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14010 break;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14011 return s1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14012 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14013
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14014 Py_UNICODE*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14015 Py_UNICODE_strcat(Py_UNICODE *s1, const Py_UNICODE *s2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14016 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14017 Py_UNICODE *u1 = s1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14018 u1 += Py_UNICODE_strlen(u1);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14019 Py_UNICODE_strcpy(u1, s2);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14020 return s1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14021 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14022
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14023 int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14024 Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14025 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14026 while (*s1 && *s2 && *s1 == *s2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14027 s1++, s2++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14028 if (*s1 && *s2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14029 return (*s1 < *s2) ? -1 : +1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14030 if (*s1)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14031 return 1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14032 if (*s2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14033 return -1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14034 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14035 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14036
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14037 int
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14038 Py_UNICODE_strncmp(const Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14039 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14040 register Py_UNICODE u1, u2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14041 for (; n != 0; n--) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14042 u1 = *s1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14043 u2 = *s2;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14044 if (u1 != u2)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14045 return (u1 < u2) ? -1 : +1;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14046 if (u1 == '\0')
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14047 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14048 s1++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14049 s2++;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14050 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14051 return 0;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14052 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14053
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14054 Py_UNICODE*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14055 Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14056 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14057 const Py_UNICODE *p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14058 for (p = s; *p; p++)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14059 if (*p == c)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14060 return (Py_UNICODE*)p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14061 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14062 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14063
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14064 Py_UNICODE*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14065 Py_UNICODE_strrchr(const Py_UNICODE *s, Py_UNICODE c)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14066 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14067 const Py_UNICODE *p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14068 p = s + Py_UNICODE_strlen(s);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14069 while (p != s) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14070 p--;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14071 if (*p == c)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14072 return (Py_UNICODE*)p;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14073 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14074 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14075 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14076
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14077 Py_UNICODE*
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14078 PyUnicode_AsUnicodeCopy(PyObject *unicode)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14079 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14080 Py_UNICODE *u, *copy;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14081 Py_ssize_t len, size;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14082
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14083 if (!PyUnicode_Check(unicode)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14084 PyErr_BadArgument();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14085 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14086 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14087 u = PyUnicode_AsUnicodeAndSize(unicode, &len);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14088 if (u == NULL)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14089 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14090 /* Ensure we won't overflow the size. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14091 if (len > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14092 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14093 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14094 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14095 size = len + 1; /* copy the null character */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14096 size *= sizeof(Py_UNICODE);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14097 copy = PyMem_Malloc(size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14098 if (copy == NULL) {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14099 PyErr_NoMemory();
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14100 return NULL;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14101 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14102 memcpy(copy, u, size);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14103 return copy;
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14104 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14105
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14106 /* A _string module, to export formatter_parser and formatter_field_name_split
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14107 to the string.Formatter class implemented in Python. */
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14108
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14109 static PyMethodDef _string_methods[] = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14110 {"formatter_field_name_split", (PyCFunction) formatter_field_name_split,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14111 METH_O, PyDoc_STR("split the argument as a field name")},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14112 {"formatter_parser", (PyCFunction) formatter_parser,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14113 METH_O, PyDoc_STR("parse the argument as a format string")},
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14114 {NULL, NULL}
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14115 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14116
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14117 static struct PyModuleDef _string_module = {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14118 PyModuleDef_HEAD_INIT,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14119 "_string",
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14120 PyDoc_STR("string helper module"),
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14121 0,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14122 _string_methods,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14123 NULL,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14124 NULL,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14125 NULL,
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14126 NULL
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14127 };
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14128
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14129 PyMODINIT_FUNC
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14130 PyInit__string(void)
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14131 {
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14132 return PyModule_Create(&_string_module);
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14133 }
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14134
7f74363f4c82 Added some files for the python port
windel
parents:
diff changeset
14135