# HG changeset patch # User windel # Date 1325008742 -3600 # Node ID 7f74363f4c8214286f8288a4cadaa4c8069a64ed # Parent dcce92b1efbc9a7b32598479298fba686a0ce002 Added some files for the python port diff -r dcce92b1efbc -r 7f74363f4c82 .hgignore --- a/.hgignore Tue Dec 27 17:36:52 2011 +0100 +++ b/.hgignore Tue Dec 27 18:59:02 2011 +0100 @@ -5,6 +5,5 @@ *.o *.bin -cos/python cos/bootdisk.img diff -r dcce92b1efbc -r 7f74363f4c82 cos/Makefile --- a/cos/Makefile Tue Dec 27 17:36:52 2011 +0100 +++ b/cos/Makefile Tue Dec 27 18:59:02 2011 +0100 @@ -15,7 +15,8 @@ -nostartfiles \ -mno-red-zone \ -fno-builtin \ - -mcmodel=large + -mcmodel=large \ + -Ipython/Include OBJECTS = \ kernel/video.o \ diff -r dcce92b1efbc -r 7f74363f4c82 cos/kernel/kernel.c --- a/cos/kernel/kernel.c Tue Dec 27 17:36:52 2011 +0100 +++ b/cos/kernel/kernel.c Tue Dec 27 18:59:02 2011 +0100 @@ -1,11 +1,5 @@ #include "kernel.h" -void startPython() -{ - // TODO: connect to Py_Main - //PyRun_SimpleString("print('hello world')"); -} - void testMalloc() { char *a, *b; @@ -34,7 +28,7 @@ while (1==1) { char buffer[70]; - printf(">>>"); + printf(">"); getline(buffer, 70); // TODO: interpret this line with python :) printf("\n"); diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Include/Python.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Include/Python.h Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,76 @@ +#ifndef Py_PYTHON_H +#define Py_PYTHON_H + +/* Include nearly all Python header files */ + +// #include "pyatomic.h" + +//#include "pymath.h" +#include "pytime.h" +#include "pymem.h" + +#include "object.h" +#include "objimpl.h" +#include "typeslots.h" + +#include "pydebug.h" + +#include "bytearrayobject.h" +#include "bytesobject.h" +#include "unicodeobject.h" +#include "longobject.h" +#include "longintrepr.h" +#include "boolobject.h" +#include "floatobject.h" +#include "complexobject.h" +#include "rangeobject.h" +#include "memoryobject.h" +#include "tupleobject.h" +#include "listobject.h" +#include "dictobject.h" +#include "enumobject.h" +#include "setobject.h" +#include "methodobject.h" +#include "moduleobject.h" +#include "funcobject.h" +#include "classobject.h" +#include "fileobject.h" +#include "pycapsule.h" +#include "traceback.h" +#include "sliceobject.h" +#include "cellobject.h" +#include "iterobject.h" +#include "genobject.h" +#include "descrobject.h" +#include "warnings.h" +#include "weakrefobject.h" +#include "structseq.h" +#include "accu.h" + +#include "codecs.h" +#include "pyerrors.h" + +#include "pystate.h" + +#include "pyarena.h" +#include "modsupport.h" +#include "pythonrun.h" +#include "ceval.h" +#include "sysmodule.h" +#include "intrcheck.h" +#include "import.h" + +#include "abstract.h" +#include "bltinmodule.h" + +#include "compile.h" +#include "eval.h" + +#include "pyctype.h" +#include "pystrtod.h" +#include "pystrcmp.h" +#include "dtoa.h" +#include "fileutils.h" +#include "pyfpe.h" + +#endif /* !Py_PYTHON_H */ diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Include/boolobject.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Include/boolobject.h Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,27 @@ +/* Boolean object interface */ + +#ifndef Py_BOOLOBJECT_H +#define Py_BOOLOBJECT_H + +PyAPI_DATA(PyTypeObject) PyBool_Type; + +#define PyBool_Check(x) (Py_TYPE(x) == &PyBool_Type) + +/* Py_False and Py_True are the only two bools in existence. +Don't forget to apply Py_INCREF() when returning either!!! */ + +/* Don't use these directly */ +PyAPI_DATA(struct _longobject) _Py_FalseStruct, _Py_TrueStruct; + +/* Use these macros */ +#define Py_False ((PyObject *) &_Py_FalseStruct) +#define Py_True ((PyObject *) &_Py_TrueStruct) + +/* Macros for returning Py_True or Py_False, respectively */ +#define Py_RETURN_TRUE return Py_INCREF(Py_True), Py_True +#define Py_RETURN_FALSE return Py_INCREF(Py_False), Py_False + +/* Function to return a bool from a C long */ +PyAPI_FUNC(PyObject *) PyBool_FromLong(long); + +#endif /* !Py_BOOLOBJECT_H */ diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Include/ceval.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Include/ceval.h Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,180 @@ +#ifndef Py_CEVAL_H +#define Py_CEVAL_H + +/* Interface to random parts in ceval.c */ + +PyAPI_FUNC(PyObject *) PyEval_CallObjectWithKeywords( + PyObject *, PyObject *, PyObject *); + +/* Inline this */ +#define PyEval_CallObject(func,arg) \ + PyEval_CallObjectWithKeywords(func, arg, (PyObject *)NULL) + +PyAPI_FUNC(PyObject *) PyEval_CallFunction(PyObject *obj, + const char *format, ...); +PyAPI_FUNC(PyObject *) PyEval_CallMethod(PyObject *obj, + const char *methodname, + const char *format, ...); + +PyAPI_FUNC(void) PyEval_SetProfile(Py_tracefunc, PyObject *); +PyAPI_FUNC(void) PyEval_SetTrace(Py_tracefunc, PyObject *); + +struct _frame; /* Avoid including frameobject.h */ + +PyAPI_FUNC(PyObject *) PyEval_GetBuiltins(void); +PyAPI_FUNC(PyObject *) PyEval_GetGlobals(void); +PyAPI_FUNC(PyObject *) PyEval_GetLocals(void); +PyAPI_FUNC(struct _frame *) PyEval_GetFrame(void); + +/* Look at the current frame's (if any) code's co_flags, and turn on + the corresponding compiler flags in cf->cf_flags. Return 1 if any + flag was set, else return 0. */ +PyAPI_FUNC(int) PyEval_MergeCompilerFlags(PyCompilerFlags *cf); + +PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg); +PyAPI_FUNC(int) Py_MakePendingCalls(void); + +/* Protection against deeply nested recursive calls + + In Python 3.0, this protection has two levels: + * normal anti-recursion protection is triggered when the recursion level + exceeds the current recursion limit. It raises a RuntimeError, and sets + the "overflowed" flag in the thread state structure. This flag + temporarily *disables* the normal protection; this allows cleanup code + to potentially outgrow the recursion limit while processing the + RuntimeError. + * "last chance" anti-recursion protection is triggered when the recursion + level exceeds "current recursion limit + 50". By construction, this + protection can only be triggered when the "overflowed" flag is set. It + means the cleanup code has itself gone into an infinite loop, or the + RuntimeError has been mistakingly ignored. When this protection is + triggered, the interpreter aborts with a Fatal Error. + + In addition, the "overflowed" flag is automatically reset when the + recursion level drops below "current recursion limit - 50". This heuristic + is meant to ensure that the normal anti-recursion protection doesn't get + disabled too long. + + Please note: this scheme has its own limitations. See: + http://mail.python.org/pipermail/python-dev/2008-August/082106.html + for some observations. +*/ +PyAPI_FUNC(void) Py_SetRecursionLimit(int); +PyAPI_FUNC(int) Py_GetRecursionLimit(void); + +#define Py_EnterRecursiveCall(where) \ + (_Py_MakeRecCheck(PyThreadState_GET()->recursion_depth) && \ + _Py_CheckRecursiveCall(where)) +#define Py_LeaveRecursiveCall() \ + do{ if(_Py_MakeEndRecCheck(PyThreadState_GET()->recursion_depth)) \ + PyThreadState_GET()->overflowed = 0; \ + } while(0) +PyAPI_FUNC(int) _Py_CheckRecursiveCall(char *where); +PyAPI_DATA(int) _Py_CheckRecursionLimit; + +# define _Py_MakeRecCheck(x) (++(x) > _Py_CheckRecursionLimit) + +#define _Py_MakeEndRecCheck(x) \ + (--(x) < ((_Py_CheckRecursionLimit > 100) \ + ? (_Py_CheckRecursionLimit - 50) \ + : (3 * (_Py_CheckRecursionLimit >> 2)))) + +#define Py_ALLOW_RECURSION \ + do { unsigned char _old = PyThreadState_GET()->recursion_critical;\ + PyThreadState_GET()->recursion_critical = 1; + +#define Py_END_ALLOW_RECURSION \ + PyThreadState_GET()->recursion_critical = _old; \ + } while(0); + +PyAPI_FUNC(const char *) PyEval_GetFuncName(PyObject *); +PyAPI_FUNC(const char *) PyEval_GetFuncDesc(PyObject *); + +PyAPI_FUNC(PyObject *) PyEval_GetCallStats(PyObject *); +PyAPI_FUNC(PyObject *) PyEval_EvalFrame(struct _frame *); +PyAPI_FUNC(PyObject *) PyEval_EvalFrameEx(struct _frame *f, int exc); + +/* Interface for threads. + + A module that plans to do a blocking system call (or something else + that lasts a long time and doesn't touch Python data) can allow other + threads to run as follows: + + ...preparations here... + Py_BEGIN_ALLOW_THREADS + ...blocking system call here... + Py_END_ALLOW_THREADS + ...interpret result here... + + The Py_BEGIN_ALLOW_THREADS/Py_END_ALLOW_THREADS pair expands to a + {}-surrounded block. + To leave the block in the middle (e.g., with return), you must insert + a line containing Py_BLOCK_THREADS before the return, e.g. + + if (...premature_exit...) { + Py_BLOCK_THREADS + PyErr_SetFromErrno(PyExc_IOError); + return NULL; + } + + An alternative is: + + Py_BLOCK_THREADS + if (...premature_exit...) { + PyErr_SetFromErrno(PyExc_IOError); + return NULL; + } + Py_UNBLOCK_THREADS + + For convenience, that the value of 'errno' is restored across + Py_END_ALLOW_THREADS and Py_BLOCK_THREADS. + + WARNING: NEVER NEST CALLS TO Py_BEGIN_ALLOW_THREADS AND + Py_END_ALLOW_THREADS!!! + + The function PyEval_InitThreads() should be called only from + init_thread() in "_threadmodule.c". + + Note that not yet all candidates have been converted to use this + mechanism! +*/ + +PyAPI_FUNC(PyThreadState *) PyEval_SaveThread(void); +PyAPI_FUNC(void) PyEval_RestoreThread(PyThreadState *); + +#ifdef WITH_THREAD + +PyAPI_FUNC(int) PyEval_ThreadsInitialized(void); +PyAPI_FUNC(void) PyEval_InitThreads(void); +PyAPI_FUNC(void) _PyEval_FiniThreads(void); +PyAPI_FUNC(void) PyEval_AcquireLock(void); +PyAPI_FUNC(void) PyEval_ReleaseLock(void); +PyAPI_FUNC(void) PyEval_AcquireThread(PyThreadState *tstate); +PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); +PyAPI_FUNC(void) PyEval_ReInitThreads(void); + +PyAPI_FUNC(void) _PyEval_SetSwitchInterval(unsigned long microseconds); +PyAPI_FUNC(unsigned long) _PyEval_GetSwitchInterval(void); + +#define Py_BEGIN_ALLOW_THREADS { \ + PyThreadState *_save; \ + _save = PyEval_SaveThread(); +#define Py_BLOCK_THREADS PyEval_RestoreThread(_save); +#define Py_UNBLOCK_THREADS _save = PyEval_SaveThread(); +#define Py_END_ALLOW_THREADS PyEval_RestoreThread(_save); \ + } + +#else /* !WITH_THREAD */ + +#define Py_BEGIN_ALLOW_THREADS { +#define Py_BLOCK_THREADS +#define Py_UNBLOCK_THREADS +#define Py_END_ALLOW_THREADS } + +#endif /* !WITH_THREAD */ + +PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *); +PyAPI_FUNC(void) _PyEval_SignalAsyncExc(void); + + +#endif /* !Py_CEVAL_H */ diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Include/longobject.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Include/longobject.h Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,166 @@ +#ifndef Py_LONGOBJECT_H +#define Py_LONGOBJECT_H + +/* Long (arbitrary precision) integer object interface */ + +typedef struct _longobject PyLongObject; /* Revealed in longintrepr.h */ + +PyAPI_DATA(PyTypeObject) PyLong_Type; + +#define PyLong_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_LONG_SUBCLASS) +#define PyLong_CheckExact(op) (Py_TYPE(op) == &PyLong_Type) + +PyAPI_FUNC(PyObject *) PyLong_FromLong(long); +PyAPI_FUNC(PyObject *) PyLong_FromUnsignedLong(unsigned long); +PyAPI_FUNC(PyObject *) PyLong_FromSize_t(size_t); +PyAPI_FUNC(PyObject *) PyLong_FromSsize_t(Py_ssize_t); +PyAPI_FUNC(PyObject *) PyLong_FromDouble(double); +PyAPI_FUNC(long) PyLong_AsLong(PyObject *); +PyAPI_FUNC(long) PyLong_AsLongAndOverflow(PyObject *, int *); +PyAPI_FUNC(Py_ssize_t) PyLong_AsSsize_t(PyObject *); +PyAPI_FUNC(size_t) PyLong_AsSize_t(PyObject *); +PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLong(PyObject *); +PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLongMask(PyObject *); +PyAPI_FUNC(PyObject *) PyLong_GetInfo(void); + +/* It may be useful in the future. I've added it in the PyInt -> PyLong + cleanup to keep the extra information. [CH] */ +#define PyLong_AS_LONG(op) PyLong_AsLong(op) + +/* Issue #1983: pid_t can be longer than a C long on some systems */ +#if !defined(SIZEOF_PID_T) || SIZEOF_PID_T == SIZEOF_INT +#define _Py_PARSE_PID "i" +#define PyLong_FromPid PyLong_FromLong +#define PyLong_AsPid PyLong_AsLong +#elif SIZEOF_PID_T == SIZEOF_LONG +#define _Py_PARSE_PID "l" +#define PyLong_FromPid PyLong_FromLong +#define PyLong_AsPid PyLong_AsLong +#elif defined(SIZEOF_LONG_LONG) && SIZEOF_PID_T == SIZEOF_LONG_LONG +#define _Py_PARSE_PID "L" +#define PyLong_FromPid PyLong_FromLongLong +#define PyLong_AsPid PyLong_AsLongLong +#else +#error "sizeof(pid_t) is neither sizeof(int), sizeof(long) or sizeof(long long)" +#endif /* SIZEOF_PID_T */ + +/* Used by Python/mystrtoul.c. */ +#ifndef Py_LIMITED_API +PyAPI_DATA(unsigned char) _PyLong_DigitValue[256]; +#endif + +/* _PyLong_Frexp returns a double x and an exponent e such that the + true value is approximately equal to x * 2**e. e is >= 0. x is + 0.0 if and only if the input is 0 (in which case, e and x are both + zeroes); otherwise, 0.5 <= abs(x) < 1.0. On overflow, which is + possible if the number of bits doesn't fit into a Py_ssize_t, sets + OverflowError and returns -1.0 for x, 0 for e. */ +#ifndef Py_LIMITED_API +PyAPI_FUNC(double) _PyLong_Frexp(PyLongObject *a, Py_ssize_t *e); +#endif + +PyAPI_FUNC(double) PyLong_AsDouble(PyObject *); +PyAPI_FUNC(PyObject *) PyLong_FromVoidPtr(void *); +PyAPI_FUNC(void *) PyLong_AsVoidPtr(PyObject *); + +#ifdef HAVE_LONG_LONG +PyAPI_FUNC(PyObject *) PyLong_FromLongLong(PY_LONG_LONG); +PyAPI_FUNC(PyObject *) PyLong_FromUnsignedLongLong(unsigned PY_LONG_LONG); +PyAPI_FUNC(PY_LONG_LONG) PyLong_AsLongLong(PyObject *); +PyAPI_FUNC(unsigned PY_LONG_LONG) PyLong_AsUnsignedLongLong(PyObject *); +PyAPI_FUNC(unsigned PY_LONG_LONG) PyLong_AsUnsignedLongLongMask(PyObject *); +PyAPI_FUNC(PY_LONG_LONG) PyLong_AsLongLongAndOverflow(PyObject *, int *); +#endif /* HAVE_LONG_LONG */ + +PyAPI_FUNC(PyObject *) PyLong_FromString(char *, char **, int); +#ifndef Py_LIMITED_API +PyAPI_FUNC(PyObject *) PyLong_FromUnicode(Py_UNICODE*, Py_ssize_t, int); +PyAPI_FUNC(PyObject *) PyLong_FromUnicodeObject(PyObject *u, int base); +#endif + +#ifndef Py_LIMITED_API +/* _PyLong_Sign. Return 0 if v is 0, -1 if v < 0, +1 if v > 0. + v must not be NULL, and must be a normalized long. + There are no error cases. +*/ +PyAPI_FUNC(int) _PyLong_Sign(PyObject *v); + + +/* _PyLong_NumBits. Return the number of bits needed to represent the + absolute value of a long. For example, this returns 1 for 1 and -1, 2 + for 2 and -2, and 2 for 3 and -3. It returns 0 for 0. + v must not be NULL, and must be a normalized long. + (size_t)-1 is returned and OverflowError set if the true result doesn't + fit in a size_t. +*/ +PyAPI_FUNC(size_t) _PyLong_NumBits(PyObject *v); + +/* _PyLong_DivmodNear. Given integers a and b, compute the nearest + integer q to the exact quotient a / b, rounding to the nearest even integer + in the case of a tie. Return (q, r), where r = a - q*b. The remainder r + will satisfy abs(r) <= abs(b)/2, with equality possible only if q is + even. +*/ +PyAPI_FUNC(PyObject *) _PyLong_DivmodNear(PyObject *, PyObject *); + +/* _PyLong_FromByteArray: View the n unsigned bytes as a binary integer in + base 256, and return a Python long with the same numeric value. + If n is 0, the integer is 0. Else: + If little_endian is 1/true, bytes[n-1] is the MSB and bytes[0] the LSB; + else (little_endian is 0/false) bytes[0] is the MSB and bytes[n-1] the + LSB. + If is_signed is 0/false, view the bytes as a non-negative integer. + If is_signed is 1/true, view the bytes as a 2's-complement integer, + non-negative if bit 0x80 of the MSB is clear, negative if set. + Error returns: + + Return NULL with the appropriate exception set if there's not + enough memory to create the Python long. +*/ +PyAPI_FUNC(PyObject *) _PyLong_FromByteArray( + const unsigned char* bytes, size_t n, + int little_endian, int is_signed); + +/* _PyLong_AsByteArray: Convert the least-significant 8*n bits of long + v to a base-256 integer, stored in array bytes. Normally return 0, + return -1 on error. + If little_endian is 1/true, store the MSB at bytes[n-1] and the LSB at + bytes[0]; else (little_endian is 0/false) store the MSB at bytes[0] and + the LSB at bytes[n-1]. + If is_signed is 0/false, it's an error if v < 0; else (v >= 0) n bytes + are filled and there's nothing special about bit 0x80 of the MSB. + If is_signed is 1/true, bytes is filled with the 2's-complement + representation of v's value. Bit 0x80 of the MSB is the sign bit. + Error returns (-1): + + is_signed is 0 and v < 0. TypeError is set in this case, and bytes + isn't altered. + + n isn't big enough to hold the full mathematical value of v. For + example, if is_signed is 0 and there are more digits in the v than + fit in n; or if is_signed is 1, v < 0, and n is just 1 bit shy of + being large enough to hold a sign bit. OverflowError is set in this + case, but bytes holds the least-signficant n bytes of the true value. +*/ +PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v, + unsigned char* bytes, size_t n, + int little_endian, int is_signed); + + +/* _PyLong_Format: Convert the long to a string object with given base, + appending a base prefix of 0[box] if base is 2, 8 or 16. */ +PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base); + +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); +#endif /* Py_LIMITED_API */ + +/* These aren't really part of the long object, but they're handy. The + functions are in Python/mystrtoul.c. + */ +PyAPI_FUNC(unsigned long) PyOS_strtoul(char *, char **, int); +PyAPI_FUNC(long) PyOS_strtol(char *, char **, int); + +#endif /* !Py_LONGOBJECT_H */ diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Include/object.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Include/object.h Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,838 @@ +#ifndef Py_OBJECT_H +#define Py_OBJECT_H + +/* Object and type object interface */ + +/* +Objects are structures allocated on the heap. Special rules apply to +the use of objects to ensure they are properly garbage-collected. +Objects are never allocated statically or on the stack; they must be +accessed through special macros and functions only. (Type objects are +exceptions to the first rule; the standard types are represented by +statically initialized type objects, although work on type/class unification +for Python 2.2 made it possible to have heap-allocated type objects too). + +An object has a 'reference count' that is increased or decreased when a +pointer to the object is copied or deleted; when the reference count +reaches zero there are no references to the object left and it can be +removed from the heap. + +An object has a 'type' that determines what it represents and what kind +of data it contains. An object's type is fixed when it is created. +Types themselves are represented as objects; an object contains a +pointer to the corresponding type object. The type itself has a type +pointer pointing to the object representing the type 'type', which +contains a pointer to itself!). + +Objects do not float around in memory; once allocated an object keeps +the same size and address. Objects that must hold variable-size data +can contain pointers to variable-size parts of the object. Not all +objects of the same type have the same size; but the size cannot change +after allocation. (These restrictions are made so a reference to an +object can be simply a pointer -- moving an object would require +updating all the pointers, and changing an object's size would require +moving it if there was another object right next to it.) + +Objects are always accessed through pointers of the type 'PyObject *'. +The type 'PyObject' is a structure that only contains the reference count +and the type pointer. The actual memory allocated for an object +contains other data that can only be accessed after casting the pointer +to a pointer to a longer structure type. This longer type must start +with the reference count and type fields; the macro PyObject_HEAD should be +used for this (to accommodate for future changes). The implementation +of a particular object type can cast the object pointer to the proper +type and back. + +A standard interface exists for objects that contain an array of items +whose size is determined when the object is allocated. +*/ + +/* PyObject_HEAD defines the initial segment of every PyObject. */ +#define PyObject_HEAD PyObject ob_base; + +#define PyObject_HEAD_INIT(type) { 1, type }, + +#define PyVarObject_HEAD_INIT(type, size) \ + { PyObject_HEAD_INIT(type) size }, + +/* PyObject_VAR_HEAD defines the initial segment of all variable-size + * container objects. These end with a declaration of an array with 1 + * element, but enough space is malloc'ed so that the array actually + * has room for ob_size elements. Note that ob_size is an element count, + * not necessarily a byte count. + */ +#define PyObject_VAR_HEAD PyVarObject ob_base; +#define Py_INVALID_SIZE (Py_ssize_t)-1 + +/* Nothing is actually declared to be a PyObject, but every pointer to + * a Python object can be cast to a PyObject*. This is inheritance built + * by hand. Similarly every pointer to a variable-size Python object can, + * in addition, be cast to PyVarObject*. + */ +typedef struct _object { + int ob_refcnt; + struct _typeobject *ob_type; +} PyObject; + +typedef struct { + PyObject ob_base; + int ob_size; /* Number of items in variable part */ +} PyVarObject; + +#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt) +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) + +/* +Type objects contain a string containing the type name (to help somewhat +in debugging), the allocation parameters (see PyObject_New() and +PyObject_NewVar()), +and methods for accessing objects of the type. Methods are optional, a +nil pointer meaning that particular kind of access is not available for +this type. The Py_DECREF() macro uses the tp_dealloc method without +checking for a nil pointer; it should always be implemented except if +the implementation can guarantee that the reference count will never +reach zero (e.g., for statically allocated type objects). + +NB: the methods for certain type groups are now contained in separate +method blocks. +*/ + +typedef PyObject * (*unaryfunc)(PyObject *); +typedef PyObject * (*binaryfunc)(PyObject *, PyObject *); +typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); +typedef int (*inquiry)(PyObject *); +typedef int (*lenfunc)(PyObject *); +typedef PyObject *(*ssizeargfunc)(PyObject *, int); +typedef PyObject *(*ssizessizeargfunc)(PyObject *, int, int); +typedef int(*ssizeobjargproc)(PyObject *, int, PyObject *); +typedef int(*ssizessizeobjargproc)(PyObject *, int, int, PyObject *); +typedef int(*objobjargproc)(PyObject *, PyObject *, PyObject *); + +/* buffer interface */ +typedef struct bufferinfo { + void *buf; + PyObject *obj; /* owned reference */ + Py_ssize_t len; + Py_ssize_t itemsize; /* This is Py_ssize_t so it can be + pointed to by strides in simple case.*/ + int readonly; + int ndim; + char *format; + Py_ssize_t *shape; + Py_ssize_t *strides; + Py_ssize_t *suboffsets; + Py_ssize_t smalltable[2]; /* static store for shape and strides of + mono-dimensional buffers. */ + void *internal; +} Py_buffer; + +typedef int (*getbufferproc)(PyObject *, Py_buffer *, int); +typedef void (*releasebufferproc)(PyObject *, Py_buffer *); + + /* Flags for getting buffers */ +#define PyBUF_SIMPLE 0 +#define PyBUF_WRITABLE 0x0001 +/* we used to include an E, backwards compatible alias */ +#define PyBUF_WRITEABLE PyBUF_WRITABLE +#define PyBUF_FORMAT 0x0004 +#define PyBUF_ND 0x0008 +#define PyBUF_STRIDES (0x0010 | PyBUF_ND) +#define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES) +#define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES) +#define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES) +#define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES) + +#define PyBUF_CONTIG (PyBUF_ND | PyBUF_WRITABLE) +#define PyBUF_CONTIG_RO (PyBUF_ND) + +#define PyBUF_STRIDED (PyBUF_STRIDES | PyBUF_WRITABLE) +#define PyBUF_STRIDED_RO (PyBUF_STRIDES) + +#define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_WRITABLE | PyBUF_FORMAT) +#define PyBUF_RECORDS_RO (PyBUF_STRIDES | PyBUF_FORMAT) + +#define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_WRITABLE | PyBUF_FORMAT) +#define PyBUF_FULL_RO (PyBUF_INDIRECT | PyBUF_FORMAT) + + +#define PyBUF_READ 0x100 +#define PyBUF_WRITE 0x200 + +/* End buffer interface */ + +typedef int (*objobjproc)(PyObject *, PyObject *); +typedef int (*visitproc)(PyObject *, void *); +typedef int (*traverseproc)(PyObject *, visitproc, void *); + +typedef struct { + /* Number implementations must check *both* + arguments for proper type and implement the necessary conversions + in the slot functions themselves. */ + + binaryfunc nb_add; + binaryfunc nb_subtract; + binaryfunc nb_multiply; + binaryfunc nb_remainder; + binaryfunc nb_divmod; + ternaryfunc nb_power; + unaryfunc nb_negative; + unaryfunc nb_positive; + unaryfunc nb_absolute; + inquiry nb_bool; + unaryfunc nb_invert; + binaryfunc nb_lshift; + binaryfunc nb_rshift; + binaryfunc nb_and; + binaryfunc nb_xor; + binaryfunc nb_or; + unaryfunc nb_int; + void *nb_reserved; /* the slot formerly known as nb_long */ + unaryfunc nb_float; + + binaryfunc nb_inplace_add; + binaryfunc nb_inplace_subtract; + binaryfunc nb_inplace_multiply; + binaryfunc nb_inplace_remainder; + ternaryfunc nb_inplace_power; + binaryfunc nb_inplace_lshift; + binaryfunc nb_inplace_rshift; + binaryfunc nb_inplace_and; + binaryfunc nb_inplace_xor; + binaryfunc nb_inplace_or; + + binaryfunc nb_floor_divide; + binaryfunc nb_true_divide; + binaryfunc nb_inplace_floor_divide; + binaryfunc nb_inplace_true_divide; + + unaryfunc nb_index; +} PyNumberMethods; + +typedef struct { + lenfunc sq_length; + binaryfunc sq_concat; + ssizeargfunc sq_repeat; + ssizeargfunc sq_item; + void *was_sq_slice; + ssizeobjargproc sq_ass_item; + void *was_sq_ass_slice; + objobjproc sq_contains; + + binaryfunc sq_inplace_concat; + ssizeargfunc sq_inplace_repeat; +} PySequenceMethods; + +typedef struct { + lenfunc mp_length; + binaryfunc mp_subscript; + objobjargproc mp_ass_subscript; +} PyMappingMethods; + + +typedef struct { + getbufferproc bf_getbuffer; + releasebufferproc bf_releasebuffer; +} PyBufferProcs; + +typedef void (*freefunc)(void *); +typedef void (*destructor)(PyObject *); +/* We can't provide a full compile-time check that limited-API + users won't implement tp_print. However, not defining printfunc + and making tp_print of a different function pointer type + should at least cause a warning in most cases. */ +typedef int (*printfunc)(PyObject *, FILE *, int); +typedef PyObject *(*getattrfunc)(PyObject *, char *); +typedef PyObject *(*getattrofunc)(PyObject *, PyObject *); +typedef int (*setattrfunc)(PyObject *, char *, PyObject *); +typedef int (*setattrofunc)(PyObject *, PyObject *, PyObject *); +typedef PyObject *(*reprfunc)(PyObject *); +typedef Py_hash_t (*hashfunc)(PyObject *); +typedef PyObject *(*richcmpfunc) (PyObject *, PyObject *, int); +typedef PyObject *(*getiterfunc) (PyObject *); +typedef PyObject *(*iternextfunc) (PyObject *); +typedef PyObject *(*descrgetfunc) (PyObject *, PyObject *, PyObject *); +typedef int (*descrsetfunc) (PyObject *, PyObject *, PyObject *); +typedef int (*initproc)(PyObject *, PyObject *, PyObject *); +typedef PyObject *(*newfunc)(struct _typeobject *, PyObject *, PyObject *); +typedef PyObject *(*allocfunc)(struct _typeobject *, int); + +typedef struct _typeobject PyTypeObject; /* opaque */ +typedef struct _typeobject { + PyObject_VAR_HEAD + const char *tp_name; /* For printing, in format "." */ + int tp_basicsize, tp_itemsize; /* For allocation */ + + /* Methods to implement standard operations */ + + destructor tp_dealloc; + printfunc tp_print; + getattrfunc tp_getattr; + setattrfunc tp_setattr; + void *tp_reserved; /* formerly known as tp_compare */ + reprfunc tp_repr; + + /* Method suites for standard classes */ + + PyNumberMethods *tp_as_number; + PySequenceMethods *tp_as_sequence; + PyMappingMethods *tp_as_mapping; + + /* More standard operations (here for binary compatibility) */ + + hashfunc tp_hash; + ternaryfunc tp_call; + reprfunc tp_str; + getattrofunc tp_getattro; + setattrofunc tp_setattro; + + /* Functions to access object as input/output buffer */ + PyBufferProcs *tp_as_buffer; + + /* Flags to define presence of optional/expanded features */ + long tp_flags; + + const char *tp_doc; /* Documentation string */ + + /* Assigned meaning in release 2.0 */ + /* call function for all accessible objects */ + traverseproc tp_traverse; + + /* delete references to contained objects */ + inquiry tp_clear; + + /* Assigned meaning in release 2.1 */ + /* rich comparisons */ + richcmpfunc tp_richcompare; + + /* weak reference enabler */ + Py_ssize_t tp_weaklistoffset; + + /* Iterators */ + getiterfunc tp_iter; + iternextfunc tp_iternext; + + /* Attribute descriptor and subclassing stuff */ + struct PyMethodDef *tp_methods; + struct PyMemberDef *tp_members; + struct PyGetSetDef *tp_getset; + struct _typeobject *tp_base; + PyObject *tp_dict; + descrgetfunc tp_descr_get; + descrsetfunc tp_descr_set; + Py_ssize_t tp_dictoffset; + initproc tp_init; + allocfunc tp_alloc; + newfunc tp_new; + freefunc tp_free; /* Low-level free-memory routine */ + inquiry tp_is_gc; /* For PyObject_IS_GC */ + PyObject *tp_bases; + PyObject *tp_mro; /* method resolution order */ + PyObject *tp_cache; + PyObject *tp_subclasses; + PyObject *tp_weaklist; + destructor tp_del; + + /* Type attribute cache version tag. Added in version 2.6 */ + unsigned int tp_version_tag; + +} PyTypeObject; + +typedef struct{ + int slot; /* slot id, see below */ + void *pfunc; /* function pointer */ +} PyType_Slot; + +typedef struct{ + const char* name; + int basicsize; + int itemsize; + int flags; + PyType_Slot *slots; /* terminated by slot==0. */ +} PyType_Spec; + +PyAPI_FUNC(PyObject*) PyType_FromSpec(PyType_Spec*); + +#ifndef Py_LIMITED_API +/* The *real* layout of a type object when allocated on the heap */ +typedef struct _heaptypeobject { + /* Note: there's a dependency on the order of these members + in slotptr() in typeobject.c . */ + PyTypeObject ht_type; + PyNumberMethods as_number; + PyMappingMethods as_mapping; + PySequenceMethods as_sequence; /* as_sequence comes after as_mapping, + so that the mapping wins when both + the mapping and the sequence define + a given operator (e.g. __getitem__). + see add_operators() in typeobject.c . */ + PyBufferProcs as_buffer; + PyObject *ht_name, *ht_slots, *ht_qualname; + /* here are optional user slots, followed by the members. */ +} PyHeapTypeObject; + +/* access macro to the members which are floating "behind" the object */ +#define PyHeapType_GET_MEMBERS(etype) \ + ((PyMemberDef *)(((char *)etype) + Py_TYPE(etype)->tp_basicsize)) +#endif + +/* Generic type check */ +PyAPI_FUNC(int) PyType_IsSubtype(PyTypeObject *, PyTypeObject *); +#define PyObject_TypeCheck(ob, tp) \ + (Py_TYPE(ob) == (tp) || PyType_IsSubtype(Py_TYPE(ob), (tp))) + +PyAPI_DATA(PyTypeObject) PyType_Type; /* built-in 'type' */ +PyAPI_DATA(PyTypeObject) PyBaseObject_Type; /* built-in 'object' */ +PyAPI_DATA(PyTypeObject) PySuper_Type; /* built-in 'super' */ + +PyAPI_FUNC(long) PyType_GetFlags(PyTypeObject*); + +#define PyType_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_TYPE_SUBCLASS) +#define PyType_CheckExact(op) (Py_TYPE(op) == &PyType_Type) + +PyAPI_FUNC(int) PyType_Ready(PyTypeObject *); +PyAPI_FUNC(PyObject *) PyType_GenericAlloc(PyTypeObject *, Py_ssize_t); +PyAPI_FUNC(PyObject *) PyType_GenericNew(PyTypeObject *, + PyObject *, PyObject *); +#ifndef Py_LIMITED_API +PyAPI_FUNC(PyObject *) _PyType_Lookup(PyTypeObject *, PyObject *); +PyAPI_FUNC(PyObject *) _PyObject_LookupSpecial(PyObject *, char *, PyObject **); +PyAPI_FUNC(PyTypeObject *) _PyType_CalculateMetaclass(PyTypeObject *, PyObject *); +#endif +PyAPI_FUNC(unsigned int) PyType_ClearCache(void); +PyAPI_FUNC(void) PyType_Modified(PyTypeObject *); + +/* Generic operations on objects */ +struct _Py_Identifier; +#ifndef Py_LIMITED_API +PyAPI_FUNC(int) PyObject_Print(PyObject *, FILE *, int); +PyAPI_FUNC(void) _Py_BreakPoint(void); +PyAPI_FUNC(void) _PyObject_Dump(PyObject *); +#endif +PyAPI_FUNC(PyObject *) PyObject_Repr(PyObject *); +PyAPI_FUNC(PyObject *) PyObject_Str(PyObject *); +PyAPI_FUNC(PyObject *) PyObject_ASCII(PyObject *); +PyAPI_FUNC(PyObject *) PyObject_Bytes(PyObject *); +PyAPI_FUNC(PyObject *) PyObject_RichCompare(PyObject *, PyObject *, int); +PyAPI_FUNC(int) PyObject_RichCompareBool(PyObject *, PyObject *, int); +PyAPI_FUNC(PyObject *) PyObject_GetAttrString(PyObject *, const char *); +PyAPI_FUNC(int) PyObject_SetAttrString(PyObject *, const char *, PyObject *); +PyAPI_FUNC(int) PyObject_HasAttrString(PyObject *, const char *); +PyAPI_FUNC(PyObject *) PyObject_GetAttr(PyObject *, PyObject *); +PyAPI_FUNC(int) PyObject_SetAttr(PyObject *, PyObject *, PyObject *); +PyAPI_FUNC(int) PyObject_HasAttr(PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) _PyObject_GetAttrId(PyObject *, struct _Py_Identifier *); +PyAPI_FUNC(int) _PyObject_SetAttrId(PyObject *, struct _Py_Identifier *, PyObject *); +PyAPI_FUNC(int) _PyObject_HasAttrId(PyObject *, struct _Py_Identifier *); +#ifndef Py_LIMITED_API +PyAPI_FUNC(PyObject **) _PyObject_GetDictPtr(PyObject *); +#endif +PyAPI_FUNC(PyObject *) PyObject_SelfIter(PyObject *); +#ifndef Py_LIMITED_API +PyAPI_FUNC(PyObject *) _PyObject_NextNotImplemented(PyObject *); +#endif +PyAPI_FUNC(PyObject *) PyObject_GenericGetAttr(PyObject *, PyObject *); +PyAPI_FUNC(int) PyObject_GenericSetAttr(PyObject *, + PyObject *, PyObject *); +PyAPI_FUNC(Py_hash_t) PyObject_Hash(PyObject *); +PyAPI_FUNC(Py_hash_t) PyObject_HashNotImplemented(PyObject *); +PyAPI_FUNC(int) PyObject_IsTrue(PyObject *); +PyAPI_FUNC(int) PyObject_Not(PyObject *); +PyAPI_FUNC(int) PyCallable_Check(PyObject *); + +PyAPI_FUNC(void) PyObject_ClearWeakRefs(PyObject *); + +/* Same as PyObject_Generic{Get,Set}Attr, but passing the attributes + dict as the last parameter. */ +PyAPI_FUNC(PyObject *) +_PyObject_GenericGetAttrWithDict(PyObject *, PyObject *, PyObject *); +PyAPI_FUNC(int) +_PyObject_GenericSetAttrWithDict(PyObject *, PyObject *, + PyObject *, PyObject *); + + +/* PyObject_Dir(obj) acts like Python builtins.dir(obj), returning a + list of strings. PyObject_Dir(NULL) is like builtins.dir(), + returning the names of the current locals. In this case, if there are + no current locals, NULL is returned, and PyErr_Occurred() is false. +*/ +PyAPI_FUNC(PyObject *) PyObject_Dir(PyObject *); + + +/* Helpers for printing recursive container types */ +PyAPI_FUNC(int) Py_ReprEnter(PyObject *); +PyAPI_FUNC(void) Py_ReprLeave(PyObject *); + +/* Helpers for hash functions */ +PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double); +PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*); +PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t); + +/* Helper for passing objects to printf and the like */ +#define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj)) + +/* Flag bits for printing: */ +#define Py_PRINT_RAW 1 /* No string quotes etc. */ + +/* +`Type flags (tp_flags) + +These flags are used to extend the type structure in a backwards-compatible +fashion. Extensions can use the flags to indicate (and test) when a given +type structure contains a new feature. The Python core will use these when +introducing new functionality between major revisions (to avoid mid-version +changes in the PYTHON_API_VERSION). + +Arbitration of the flag bit positions will need to be coordinated among +all extension writers who publically release their extensions (this will +be fewer than you might expect!).. + +Most flags were removed as of Python 3.0 to make room for new flags. (Some +flags are not for backwards compatibility but to indicate the presence of an +optional feature; these flags remain of course.) + +Type definitions should use Py_TPFLAGS_DEFAULT for their tp_flags value. + +Code can use PyType_HasFeature(type_ob, flag_value) to test whether the +given type object has a specified feature. +*/ + +/* Set if the type object is dynamically allocated */ +#define Py_TPFLAGS_HEAPTYPE (1L<<9) + +/* Set if the type allows subclassing */ +#define Py_TPFLAGS_BASETYPE (1L<<10) + +/* Set if the type is 'ready' -- fully initialized */ +#define Py_TPFLAGS_READY (1L<<12) + +/* Set while the type is being 'readied', to prevent recursive ready calls */ +#define Py_TPFLAGS_READYING (1L<<13) + +/* Objects support garbage collection (see objimp.h) */ +#define Py_TPFLAGS_HAVE_GC (1L<<14) + +/* These two bits are preserved for Stackless Python, next after this is 17 */ +#ifdef STACKLESS +#define Py_TPFLAGS_HAVE_STACKLESS_EXTENSION (3L<<15) +#else +#define Py_TPFLAGS_HAVE_STACKLESS_EXTENSION 0 +#endif + +/* Objects support type attribute cache */ +#define Py_TPFLAGS_HAVE_VERSION_TAG (1L<<18) +#define Py_TPFLAGS_VALID_VERSION_TAG (1L<<19) + +/* Type is abstract and cannot be instantiated */ +#define Py_TPFLAGS_IS_ABSTRACT (1L<<20) + +/* These flags are used to determine if a type is a subclass. */ +#define Py_TPFLAGS_INT_SUBCLASS (1L<<23) +#define Py_TPFLAGS_LONG_SUBCLASS (1L<<24) +#define Py_TPFLAGS_LIST_SUBCLASS (1L<<25) +#define Py_TPFLAGS_TUPLE_SUBCLASS (1L<<26) +#define Py_TPFLAGS_BYTES_SUBCLASS (1L<<27) +#define Py_TPFLAGS_UNICODE_SUBCLASS (1L<<28) +#define Py_TPFLAGS_DICT_SUBCLASS (1L<<29) +#define Py_TPFLAGS_BASE_EXC_SUBCLASS (1L<<30) +#define Py_TPFLAGS_TYPE_SUBCLASS (1L<<31) + +#define Py_TPFLAGS_DEFAULT ( \ + Py_TPFLAGS_HAVE_STACKLESS_EXTENSION | \ + Py_TPFLAGS_HAVE_VERSION_TAG | \ + 0) + +#ifdef Py_LIMITED_API +#define PyType_HasFeature(t,f) ((PyType_GetFlags(t) & (f)) != 0) +#else +#define PyType_HasFeature(t,f) (((t)->tp_flags & (f)) != 0) +#endif +#define PyType_FastSubclass(t,f) PyType_HasFeature(t,f) + + +/* +The macros Py_INCREF(op) and Py_DECREF(op) are used to increment or decrement +reference counts. Py_DECREF calls the object's deallocator function when +the refcount falls to 0; for +objects that don't contain references to other objects or heap memory +this can be the standard function free(). Both macros can be used +wherever a void expression is allowed. The argument must not be a +NULL pointer. If it may be NULL, use Py_XINCREF/Py_XDECREF instead. +The macro _Py_NewReference(op) initialize reference counts to 1, and +in special builds (Py_REF_DEBUG, Py_TRACE_REFS) performs additional +bookkeeping appropriate to the special build. + +We assume that the reference count field can never overflow; this can +be proven when the size of the field is the same as the pointer size, so +we ignore the possibility. Provided a C int is at least 32 bits (which +is implicitly assumed in many parts of this code), that's enough for +about 2**31 references to an object. + +XXX The following became out of date in Python 2.2, but I'm not sure +XXX what the full truth is now. Certainly, heap-allocated type objects +XXX can and should be deallocated. +Type objects should never be deallocated; the type pointer in an object +is not considered to be a reference to the type object, to save +complications in the deallocation function. (This is actually a +decision that's up to the implementer of each new type so if you want, +you can count such references to the type object.) + +*** WARNING*** The Py_DECREF macro must have a side-effect-free argument +since it may evaluate its argument multiple times. (The alternative +would be to mace it a proper function or assign it to a global temporary +variable first, both of which are slower; and in a multi-threaded +environment the global variable trick is not safe.) +*/ + +/* First define a pile of simple helper macros, one set per special + * build symbol. These either expand to the obvious things, or to + * nothing at all when the special mode isn't in effect. The main + * macros can later be defined just once then, yet expand to different + * things depending on which special build options are and aren't in effect. + * Trust me : while painful, this is 20x easier to understand than, + * e.g, defining _Py_NewReference five different times in a maze of nested + * #ifdefs (we used to do that -- it was impenetrable). + */ +#define _Py_INC_REFTOTAL +#define _Py_DEC_REFTOTAL +#define _Py_REF_DEBUG_COMMA +#define _Py_CHECK_REFCNT(OP) /* a semicolon */; + +#define _Py_INC_TPALLOCS(OP) +#define _Py_INC_TPFREES(OP) +#define _Py_DEC_TPFREES(OP) +#define _Py_COUNT_ALLOCS_COMMA + +/* Without Py_TRACE_REFS, there's little enough to do that we expand code + * inline. + */ +#define _Py_NewReference(op) ( \ + _Py_INC_TPALLOCS(op) \ + _Py_INC_REFTOTAL \ + Py_REFCNT(op) = 1) + +#define _Py_ForgetReference(op) _Py_INC_TPFREES(op) + +#define _Py_Dealloc(op) ( \ + _Py_INC_TPFREES(op) _Py_COUNT_ALLOCS_COMMA \ + (*Py_TYPE(op)->tp_dealloc)((PyObject *)(op))) + +#define Py_INCREF(op) ( \ + _Py_INC_REFTOTAL \ + ((PyObject*)(op))->ob_refcnt++) + +#define Py_DECREF(op) \ + do { \ + if (_Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA \ + --((PyObject*)(op))->ob_refcnt != 0) \ + _Py_CHECK_REFCNT(op) \ + else \ + _Py_Dealloc((PyObject *)(op)); \ + } while (0) + +/* Safely decref `op` and set `op` to NULL, especially useful in tp_clear + * and tp_dealloc implementatons. + * + * Note that "the obvious" code can be deadly: + * + * Py_XDECREF(op); + * op = NULL; + * + * Typically, `op` is something like self->containee, and `self` is done + * using its `containee` member. In the code sequence above, suppose + * `containee` is non-NULL with a refcount of 1. Its refcount falls to + * 0 on the first line, which can trigger an arbitrary amount of code, + * possibly including finalizers (like __del__ methods or weakref callbacks) + * coded in Python, which in turn can release the GIL and allow other threads + * to run, etc. Such code may even invoke methods of `self` again, or cause + * cyclic gc to trigger, but-- oops! --self->containee still points to the + * object being torn down, and it may be in an insane state while being torn + * down. This has in fact been a rich historic source of miserable (rare & + * hard-to-diagnose) segfaulting (and other) bugs. + * + * The safe way is: + * + * Py_CLEAR(op); + * + * That arranges to set `op` to NULL _before_ decref'ing, so that any code + * triggered as a side-effect of `op` getting torn down no longer believes + * `op` points to a valid object. + * + * There are cases where it's safe to use the naive code, but they're brittle. + * For example, if `op` points to a Python integer, you know that destroying + * one of those can't cause problems -- but in part that relies on that + * Python integers aren't currently weakly referencable. Best practice is + * to use Py_CLEAR() even if you can't think of a reason for why you need to. + */ +#define Py_CLEAR(op) \ + do { \ + if (op) { \ + PyObject *_py_tmp = (PyObject *)(op); \ + (op) = NULL; \ + Py_DECREF(_py_tmp); \ + } \ + } while (0) + +/* Macros to use in case the object pointer may be NULL: */ +#define Py_XINCREF(op) do { if ((op) == NULL) ; else Py_INCREF(op); } while (0) +#define Py_XDECREF(op) do { if ((op) == NULL) ; else Py_DECREF(op); } while (0) + +/* +These are provided as conveniences to Python runtime embedders, so that +they can have object code that is not dependent on Python compilation flags. +*/ +PyAPI_FUNC(void) Py_IncRef(PyObject *); +PyAPI_FUNC(void) Py_DecRef(PyObject *); + +/* +_Py_NoneStruct is an object of undefined type which can be used in contexts +where NULL (nil) is not suitable (since NULL often means 'error'). + +Don't forget to apply Py_INCREF() when returning this value!!! +*/ +PyAPI_DATA(PyObject) _Py_NoneStruct; /* Don't use this directly */ +#define Py_None (&_Py_NoneStruct) + +/* Macro for returning Py_None from a function */ +#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None + +/* +Py_NotImplemented is a singleton used to signal that an operation is +not implemented for a given type combination. +*/ +PyAPI_DATA(PyObject) _Py_NotImplementedStruct; /* Don't use this directly */ +#define Py_NotImplemented (&_Py_NotImplementedStruct) + +/* Macro for returning Py_NotImplemented from a function */ +#define Py_RETURN_NOTIMPLEMENTED \ + return Py_INCREF(Py_NotImplemented), Py_NotImplemented + +/* Rich comparison opcodes */ +#define Py_LT 0 +#define Py_LE 1 +#define Py_EQ 2 +#define Py_NE 3 +#define Py_GT 4 +#define Py_GE 5 + +/* Maps Py_LT to Py_GT, ..., Py_GE to Py_LE. + * Defined in object.c. + */ +PyAPI_DATA(int) _Py_SwappedOp[]; + + +/* +More conventions +================ + +Argument Checking +----------------- + +Functions that take objects as arguments normally don't check for nil +arguments, but they do check the type of the argument, and return an +error if the function doesn't apply to the type. + +Failure Modes +------------- + +Functions may fail for a variety of reasons, including running out of +memory. This is communicated to the caller in two ways: an error string +is set (see errors.h), and the function result differs: functions that +normally return a pointer return NULL for failure, functions returning +an integer return -1 (which could be a legal return value too!), and +other functions return 0 for success and -1 for failure. +Callers should always check for errors before using the result. If +an error was set, the caller must either explicitly clear it, or pass +the error on to its caller. + +Reference Counts +---------------- + +It takes a while to get used to the proper usage of reference counts. + +Functions that create an object set the reference count to 1; such new +objects must be stored somewhere or destroyed again with Py_DECREF(). +Some functions that 'store' objects, such as PyTuple_SetItem() and +PyList_SetItem(), +don't increment the reference count of the object, since the most +frequent use is to store a fresh object. Functions that 'retrieve' +objects, such as PyTuple_GetItem() and PyDict_GetItemString(), also +don't increment +the reference count, since most frequently the object is only looked at +quickly. Thus, to retrieve an object and store it again, the caller +must call Py_INCREF() explicitly. + +NOTE: functions that 'consume' a reference count, like +PyList_SetItem(), consume the reference even if the object wasn't +successfully stored, to simplify error handling. + +It seems attractive to make other functions that take an object as +argument consume a reference count; however, this may quickly get +confusing (even the current practice is already confusing). Consider +it carefully, it may save lots of calls to Py_INCREF() and Py_DECREF() at +times. +*/ + + +/* Trashcan mechanism, thanks to Christian Tismer. + +When deallocating a container object, it's possible to trigger an unbounded +chain of deallocations, as each Py_DECREF in turn drops the refcount on "the +next" object in the chain to 0. This can easily lead to stack faults, and +especially in threads (which typically have less stack space to work with). + +A container object that participates in cyclic gc can avoid this by +bracketing the body of its tp_dealloc function with a pair of macros: + +static void +mytype_dealloc(mytype *p) +{ + ... declarations go here ... + + PyObject_GC_UnTrack(p); // must untrack first + Py_TRASHCAN_SAFE_BEGIN(p) + ... The body of the deallocator goes here, including all calls ... + ... to Py_DECREF on contained objects. ... + Py_TRASHCAN_SAFE_END(p) +} + +CAUTION: Never return from the middle of the body! If the body needs to +"get out early", put a label immediately before the Py_TRASHCAN_SAFE_END +call, and goto it. Else the call-depth counter (see below) will stay +above 0 forever, and the trashcan will never get emptied. + +How it works: The BEGIN macro increments a call-depth counter. So long +as this counter is small, the body of the deallocator is run directly without +further ado. But if the counter gets large, it instead adds p to a list of +objects to be deallocated later, skips the body of the deallocator, and +resumes execution after the END macro. The tp_dealloc routine then returns +without deallocating anything (and so unbounded call-stack depth is avoided). + +When the call stack finishes unwinding again, code generated by the END macro +notices this, and calls another routine to deallocate all the objects that +may have been added to the list of deferred deallocations. In effect, a +chain of N deallocations is broken into N / PyTrash_UNWIND_LEVEL pieces, +with the call stack never exceeding a depth of PyTrash_UNWIND_LEVEL. +*/ + +PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*); +PyAPI_FUNC(void) _PyTrash_destroy_chain(void); +PyAPI_DATA(int) _PyTrash_delete_nesting; +PyAPI_DATA(PyObject *) _PyTrash_delete_later; + +#define PyTrash_UNWIND_LEVEL 50 + +#define Py_TRASHCAN_SAFE_BEGIN(op) \ + if (_PyTrash_delete_nesting < PyTrash_UNWIND_LEVEL) { \ + ++_PyTrash_delete_nesting; + /* The body of the deallocator is here. */ +#define Py_TRASHCAN_SAFE_END(op) \ + --_PyTrash_delete_nesting; \ + if (_PyTrash_delete_later && _PyTrash_delete_nesting <= 0) \ + _PyTrash_destroy_chain(); \ + } \ + else \ + _PyTrash_deposit_object((PyObject*)op); + +#endif /* !Py_OBJECT_H */ diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Modules/gcmodule.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Modules/gcmodule.c Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,1513 @@ +/* + + Reference Cycle Garbage Collection + ================================== + + Neil Schemenauer + + Based on a post on the python-dev list. Ideas from Guido van Rossum, + Eric Tiedemann, and various others. + + http://www.arctrix.com/nas/python/gc/ + + The following mailing list threads provide a historical perspective on + the design of this module. Note that a fair amount of refinement has + occurred since those discussions. + + http://mail.python.org/pipermail/python-dev/2000-March/002385.html + http://mail.python.org/pipermail/python-dev/2000-March/002434.html + http://mail.python.org/pipermail/python-dev/2000-March/002497.html + + For a highlevel view of the collection process, read the collect + function. + +*/ + +#include "Python.h" +#include "frameobject.h" /* for PyFrame_ClearFreeList */ + +/* Get an object's GC head */ +#define AS_GC(o) ((PyGC_Head *)(o)-1) + +/* Get the object given the GC head */ +#define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1)) + +/*** Global GC state ***/ + +struct gc_generation { + PyGC_Head head; + int threshold; /* collection threshold */ + int count; /* count of allocations or collections of younger + generations */ +}; + +#define NUM_GENERATIONS 3 +#define GEN_HEAD(n) (&generations[n].head) + +/* linked lists of container objects */ +static struct gc_generation generations[NUM_GENERATIONS] = { + /* PyGC_Head, threshold, count */ + {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0}, + {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0}, + {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0}, +}; + +PyGC_Head *_PyGC_generation0 = GEN_HEAD(0); + +static int enabled = 1; /* automatic collection enabled? */ + +/* true if we are currently running the collector */ +static int collecting = 0; + +/* list of uncollectable objects */ +static PyObject *garbage = NULL; + +/* Python string to use if unhandled exception occurs */ +static PyObject *gc_str = NULL; + +/* Python string used to look for __del__ attribute. */ +static PyObject *delstr = NULL; + +/* This is the number of objects who survived the last full collection. It + approximates the number of long lived objects tracked by the GC. + + (by "full collection", we mean a collection of the oldest generation). +*/ +static Py_ssize_t long_lived_total = 0; + +/* This is the number of objects who survived all "non-full" collections, + and are awaiting to undergo a full collection for the first time. + +*/ +static Py_ssize_t long_lived_pending = 0; + +/* + NOTE: about the counting of long-lived objects. + + To limit the cost of garbage collection, there are two strategies; + - make each collection faster, e.g. by scanning fewer objects + - do less collections + This heuristic is about the latter strategy. + + In addition to the various configurable thresholds, we only trigger a + full collection if the ratio + long_lived_pending / long_lived_total + is above a given value (hardwired to 25%). + + The reason is that, while "non-full" collections (i.e., collections of + the young and middle generations) will always examine roughly the same + number of objects -- determined by the aforementioned thresholds --, + the cost of a full collection is proportional to the total number of + long-lived objects, which is virtually unbounded. + + Indeed, it has been remarked that doing a full collection every + of object creations entails a dramatic performance + degradation in workloads which consist in creating and storing lots of + long-lived objects (e.g. building a large list of GC-tracked objects would + show quadratic performance, instead of linear as expected: see issue #4074). + + Using the above ratio, instead, yields amortized linear performance in + the total number of objects (the effect of which can be summarized + thusly: "each full garbage collection is more and more costly as the + number of objects grows, but we do fewer and fewer of them"). + + This heuristic was suggested by Martin von Löwis on python-dev in + June 2008. His original analysis and proposal can be found at: + http://mail.python.org/pipermail/python-dev/2008-June/080579.html +*/ + + +/* set for debugging information */ +#define DEBUG_STATS (1<<0) /* print collection statistics */ +#define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */ +#define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */ +#define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */ +#define DEBUG_LEAK DEBUG_COLLECTABLE | \ + DEBUG_UNCOLLECTABLE | \ + DEBUG_SAVEALL +static int debug; +static PyObject *tmod = NULL; + +/*-------------------------------------------------------------------------- +gc_refs values. + +Between collections, every gc'ed object has one of two gc_refs values: + +GC_UNTRACKED + The initial state; objects returned by PyObject_GC_Malloc are in this + state. The object doesn't live in any generation list, and its + tp_traverse slot must not be called. + +GC_REACHABLE + The object lives in some generation list, and its tp_traverse is safe to + call. An object transitions to GC_REACHABLE when PyObject_GC_Track + is called. + +During a collection, gc_refs can temporarily take on other states: + +>= 0 + At the start of a collection, update_refs() copies the true refcount + to gc_refs, for each object in the generation being collected. + subtract_refs() then adjusts gc_refs so that it equals the number of + times an object is referenced directly from outside the generation + being collected. + gc_refs remains >= 0 throughout these steps. + +GC_TENTATIVELY_UNREACHABLE + move_unreachable() then moves objects not reachable (whether directly or + indirectly) from outside the generation into an "unreachable" set. + Objects that are found to be reachable have gc_refs set to GC_REACHABLE + again. Objects that are found to be unreachable have gc_refs set to + GC_TENTATIVELY_UNREACHABLE. It's "tentatively" because the pass doing + this can't be sure until it ends, and GC_TENTATIVELY_UNREACHABLE may + transition back to GC_REACHABLE. + + Only objects with GC_TENTATIVELY_UNREACHABLE still set are candidates + for collection. If it's decided not to collect such an object (e.g., + it has a __del__ method), its gc_refs is restored to GC_REACHABLE again. +---------------------------------------------------------------------------- +*/ +#define GC_UNTRACKED _PyGC_REFS_UNTRACKED +#define GC_REACHABLE _PyGC_REFS_REACHABLE +#define GC_TENTATIVELY_UNREACHABLE _PyGC_REFS_TENTATIVELY_UNREACHABLE + +#define IS_TRACKED(o) ((AS_GC(o))->gc.gc_refs != GC_UNTRACKED) +#define IS_REACHABLE(o) ((AS_GC(o))->gc.gc_refs == GC_REACHABLE) +#define IS_TENTATIVELY_UNREACHABLE(o) ( \ + (AS_GC(o))->gc.gc_refs == GC_TENTATIVELY_UNREACHABLE) + +/*** list functions ***/ + +static void +gc_list_init(PyGC_Head *list) +{ + list->gc.gc_prev = list; + list->gc.gc_next = list; +} + +static int +gc_list_is_empty(PyGC_Head *list) +{ + return (list->gc.gc_next == list); +} + +#if 0 +/* This became unused after gc_list_move() was introduced. */ +/* Append `node` to `list`. */ +static void +gc_list_append(PyGC_Head *node, PyGC_Head *list) +{ + node->gc.gc_next = list; + node->gc.gc_prev = list->gc.gc_prev; + node->gc.gc_prev->gc.gc_next = node; + list->gc.gc_prev = node; +} +#endif + +/* Remove `node` from the gc list it's currently in. */ +static void +gc_list_remove(PyGC_Head *node) +{ + node->gc.gc_prev->gc.gc_next = node->gc.gc_next; + node->gc.gc_next->gc.gc_prev = node->gc.gc_prev; + node->gc.gc_next = NULL; /* object is not currently tracked */ +} + +/* Move `node` from the gc list it's currently in (which is not explicitly + * named here) to the end of `list`. This is semantically the same as + * gc_list_remove(node) followed by gc_list_append(node, list). + */ +static void +gc_list_move(PyGC_Head *node, PyGC_Head *list) +{ + PyGC_Head *new_prev; + PyGC_Head *current_prev = node->gc.gc_prev; + PyGC_Head *current_next = node->gc.gc_next; + /* Unlink from current list. */ + current_prev->gc.gc_next = current_next; + current_next->gc.gc_prev = current_prev; + /* Relink at end of new list. */ + new_prev = node->gc.gc_prev = list->gc.gc_prev; + new_prev->gc.gc_next = list->gc.gc_prev = node; + node->gc.gc_next = list; +} + +/* append list `from` onto list `to`; `from` becomes an empty list */ +static void +gc_list_merge(PyGC_Head *from, PyGC_Head *to) +{ + PyGC_Head *tail; + assert(from != to); + if (!gc_list_is_empty(from)) { + tail = to->gc.gc_prev; + tail->gc.gc_next = from->gc.gc_next; + tail->gc.gc_next->gc.gc_prev = tail; + to->gc.gc_prev = from->gc.gc_prev; + to->gc.gc_prev->gc.gc_next = to; + } + gc_list_init(from); +} + +static Py_ssize_t +gc_list_size(PyGC_Head *list) +{ + PyGC_Head *gc; + Py_ssize_t n = 0; + for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) { + n++; + } + return n; +} + +/* Append objects in a GC list to a Python list. + * Return 0 if all OK, < 0 if error (out of memory for list). + */ +static int +append_objects(PyObject *py_list, PyGC_Head *gc_list) +{ + PyGC_Head *gc; + for (gc = gc_list->gc.gc_next; gc != gc_list; gc = gc->gc.gc_next) { + PyObject *op = FROM_GC(gc); + if (op != py_list) { + if (PyList_Append(py_list, op)) { + return -1; /* exception */ + } + } + } + return 0; +} + +/*** end of list stuff ***/ + + +/* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects + * in containers, and is GC_REACHABLE for all tracked gc objects not in + * containers. + */ +static void +update_refs(PyGC_Head *containers) +{ + PyGC_Head *gc = containers->gc.gc_next; + for (; gc != containers; gc = gc->gc.gc_next) { + assert(gc->gc.gc_refs == GC_REACHABLE); + gc->gc.gc_refs = Py_REFCNT(FROM_GC(gc)); + /* Python's cyclic gc should never see an incoming refcount + * of 0: if something decref'ed to 0, it should have been + * deallocated immediately at that time. + * Possible cause (if the assert triggers): a tp_dealloc + * routine left a gc-aware object tracked during its teardown + * phase, and did something-- or allowed something to happen -- + * that called back into Python. gc can trigger then, and may + * see the still-tracked dying object. Before this assert + * was added, such mistakes went on to allow gc to try to + * delete the object again. In a debug build, that caused + * a mysterious segfault, when _Py_ForgetReference tried + * to remove the object from the doubly-linked list of all + * objects a second time. In a release build, an actual + * double deallocation occurred, which leads to corruption + * of the allocator's internal bookkeeping pointers. That's + * so serious that maybe this should be a release-build + * check instead of an assert? + */ + assert(gc->gc.gc_refs != 0); + } +} + +/* A traversal callback for subtract_refs. */ +static int +visit_decref(PyObject *op, void *data) +{ + assert(op != NULL); + if (PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + /* We're only interested in gc_refs for objects in the + * generation being collected, which can be recognized + * because only they have positive gc_refs. + */ + assert(gc->gc.gc_refs != 0); /* else refcount was too small */ + if (gc->gc.gc_refs > 0) + gc->gc.gc_refs--; + } + return 0; +} + +/* Subtract internal references from gc_refs. After this, gc_refs is >= 0 + * for all objects in containers, and is GC_REACHABLE for all tracked gc + * objects not in containers. The ones with gc_refs > 0 are directly + * reachable from outside containers, and so can't be collected. + */ +static void +subtract_refs(PyGC_Head *containers) +{ + traverseproc traverse; + PyGC_Head *gc = containers->gc.gc_next; + for (; gc != containers; gc=gc->gc.gc_next) { + traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; + (void) traverse(FROM_GC(gc), + (visitproc)visit_decref, + NULL); + } +} + +/* A traversal callback for move_unreachable. */ +static int +visit_reachable(PyObject *op, PyGC_Head *reachable) +{ + if (PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + const Py_ssize_t gc_refs = gc->gc.gc_refs; + + if (gc_refs == 0) { + /* This is in move_unreachable's 'young' list, but + * the traversal hasn't yet gotten to it. All + * we need to do is tell move_unreachable that it's + * reachable. + */ + gc->gc.gc_refs = 1; + } + else if (gc_refs == GC_TENTATIVELY_UNREACHABLE) { + /* This had gc_refs = 0 when move_unreachable got + * to it, but turns out it's reachable after all. + * Move it back to move_unreachable's 'young' list, + * and move_unreachable will eventually get to it + * again. + */ + gc_list_move(gc, reachable); + gc->gc.gc_refs = 1; + } + /* Else there's nothing to do. + * If gc_refs > 0, it must be in move_unreachable's 'young' + * list, and move_unreachable will eventually get to it. + * If gc_refs == GC_REACHABLE, it's either in some other + * generation so we don't care about it, or move_unreachable + * already dealt with it. + * If gc_refs == GC_UNTRACKED, it must be ignored. + */ + else { + assert(gc_refs > 0 + || gc_refs == GC_REACHABLE + || gc_refs == GC_UNTRACKED); + } + } + return 0; +} + +/* Move the unreachable objects from young to unreachable. After this, + * all objects in young have gc_refs = GC_REACHABLE, and all objects in + * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked + * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE. + * All objects in young after this are directly or indirectly reachable + * from outside the original young; and all objects in unreachable are + * not. + */ +static void +move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) +{ + PyGC_Head *gc = young->gc.gc_next; + + /* Invariants: all objects "to the left" of us in young have gc_refs + * = GC_REACHABLE, and are indeed reachable (directly or indirectly) + * from outside the young list as it was at entry. All other objects + * from the original young "to the left" of us are in unreachable now, + * and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the + * left of us in 'young' now have been scanned, and no objects here + * or to the right have been scanned yet. + */ + + while (gc != young) { + PyGC_Head *next; + + if (gc->gc.gc_refs) { + /* gc is definitely reachable from outside the + * original 'young'. Mark it as such, and traverse + * its pointers to find any other objects that may + * be directly reachable from it. Note that the + * call to tp_traverse may append objects to young, + * so we have to wait until it returns to determine + * the next object to visit. + */ + PyObject *op = FROM_GC(gc); + traverseproc traverse = Py_TYPE(op)->tp_traverse; + assert(gc->gc.gc_refs > 0); + gc->gc.gc_refs = GC_REACHABLE; + (void) traverse(op, + (visitproc)visit_reachable, + (void *)young); + next = gc->gc.gc_next; + if (PyTuple_CheckExact(op)) { + _PyTuple_MaybeUntrack(op); + } + else if (PyDict_CheckExact(op)) { + _PyDict_MaybeUntrack(op); + } + } + else { + /* This *may* be unreachable. To make progress, + * assume it is. gc isn't directly reachable from + * any object we've already traversed, but may be + * reachable from an object we haven't gotten to yet. + * visit_reachable will eventually move gc back into + * young if that's so, and we'll see it again. + */ + next = gc->gc.gc_next; + gc_list_move(gc, unreachable); + gc->gc.gc_refs = GC_TENTATIVELY_UNREACHABLE; + } + gc = next; + } +} + +/* Return true if object has a finalization method. */ +static int +has_finalizer(PyObject *op) +{ + if (PyGen_CheckExact(op)) + return PyGen_NeedsFinalizing((PyGenObject *)op); + else + return op->ob_type->tp_del != NULL; +} + +/* Move the objects in unreachable with __del__ methods into `finalizers`. + * Objects moved into `finalizers` have gc_refs set to GC_REACHABLE; the + * objects remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE. + */ +static void +move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) +{ + PyGC_Head *gc; + PyGC_Head *next; + + /* March over unreachable. Move objects with finalizers into + * `finalizers`. + */ + for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) { + PyObject *op = FROM_GC(gc); + + assert(IS_TENTATIVELY_UNREACHABLE(op)); + next = gc->gc.gc_next; + + if (has_finalizer(op)) { + gc_list_move(gc, finalizers); + gc->gc.gc_refs = GC_REACHABLE; + } + } +} + +/* A traversal callback for move_finalizer_reachable. */ +static int +visit_move(PyObject *op, PyGC_Head *tolist) +{ + if (PyObject_IS_GC(op)) { + if (IS_TENTATIVELY_UNREACHABLE(op)) { + PyGC_Head *gc = AS_GC(op); + gc_list_move(gc, tolist); + gc->gc.gc_refs = GC_REACHABLE; + } + } + return 0; +} + +/* Move objects that are reachable from finalizers, from the unreachable set + * into finalizers set. + */ +static void +move_finalizer_reachable(PyGC_Head *finalizers) +{ + traverseproc traverse; + PyGC_Head *gc = finalizers->gc.gc_next; + for (; gc != finalizers; gc = gc->gc.gc_next) { + /* Note that the finalizers list may grow during this. */ + traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; + (void) traverse(FROM_GC(gc), + (visitproc)visit_move, + (void *)finalizers); + } +} + +/* Clear all weakrefs to unreachable objects, and if such a weakref has a + * callback, invoke it if necessary. Note that it's possible for such + * weakrefs to be outside the unreachable set -- indeed, those are precisely + * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for + * overview & some details. Some weakrefs with callbacks may be reclaimed + * directly by this routine; the number reclaimed is the return value. Other + * weakrefs with callbacks may be moved into the `old` generation. Objects + * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in + * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns, + * no object in `unreachable` is weakly referenced anymore. + */ +static int +handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) +{ + PyGC_Head *gc; + PyObject *op; /* generally FROM_GC(gc) */ + PyWeakReference *wr; /* generally a cast of op */ + PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */ + PyGC_Head *next; + int num_freed = 0; + + gc_list_init(&wrcb_to_call); + + /* Clear all weakrefs to the objects in unreachable. If such a weakref + * also has a callback, move it into `wrcb_to_call` if the callback + * needs to be invoked. Note that we cannot invoke any callbacks until + * all weakrefs to unreachable objects are cleared, lest the callback + * resurrect an unreachable object via a still-active weakref. We + * make another pass over wrcb_to_call, invoking callbacks, after this + * pass completes. + */ + for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) { + PyWeakReference **wrlist; + + op = FROM_GC(gc); + assert(IS_TENTATIVELY_UNREACHABLE(op)); + next = gc->gc.gc_next; + + if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) + continue; + + /* It supports weakrefs. Does it have any? */ + wrlist = (PyWeakReference **) + PyObject_GET_WEAKREFS_LISTPTR(op); + + /* `op` may have some weakrefs. March over the list, clear + * all the weakrefs, and move the weakrefs with callbacks + * that must be called into wrcb_to_call. + */ + for (wr = *wrlist; wr != NULL; wr = *wrlist) { + PyGC_Head *wrasgc; /* AS_GC(wr) */ + + /* _PyWeakref_ClearRef clears the weakref but leaves + * the callback pointer intact. Obscure: it also + * changes *wrlist. + */ + assert(wr->wr_object == op); + _PyWeakref_ClearRef(wr); + assert(wr->wr_object == Py_None); + if (wr->wr_callback == NULL) + continue; /* no callback */ + + /* Headache time. `op` is going away, and is weakly referenced by + * `wr`, which has a callback. Should the callback be invoked? If wr + * is also trash, no: + * + * 1. There's no need to call it. The object and the weakref are + * both going away, so it's legitimate to pretend the weakref is + * going away first. The user has to ensure a weakref outlives its + * referent if they want a guarantee that the wr callback will get + * invoked. + * + * 2. It may be catastrophic to call it. If the callback is also in + * cyclic trash (CT), then although the CT is unreachable from + * outside the current generation, CT may be reachable from the + * callback. Then the callback could resurrect insane objects. + * + * Since the callback is never needed and may be unsafe in this case, + * wr is simply left in the unreachable set. Note that because we + * already called _PyWeakref_ClearRef(wr), its callback will never + * trigger. + * + * OTOH, if wr isn't part of CT, we should invoke the callback: the + * weakref outlived the trash. Note that since wr isn't CT in this + * case, its callback can't be CT either -- wr acted as an external + * root to this generation, and therefore its callback did too. So + * nothing in CT is reachable from the callback either, so it's hard + * to imagine how calling it later could create a problem for us. wr + * is moved to wrcb_to_call in this case. + */ + if (IS_TENTATIVELY_UNREACHABLE(wr)) + continue; + assert(IS_REACHABLE(wr)); + + /* Create a new reference so that wr can't go away + * before we can process it again. + */ + Py_INCREF(wr); + + /* Move wr to wrcb_to_call, for the next pass. */ + wrasgc = AS_GC(wr); + assert(wrasgc != next); /* wrasgc is reachable, but + next isn't, so they can't + be the same */ + gc_list_move(wrasgc, &wrcb_to_call); + } + } + + /* Invoke the callbacks we decided to honor. It's safe to invoke them + * because they can't reference unreachable objects. + */ + while (! gc_list_is_empty(&wrcb_to_call)) { + PyObject *temp; + PyObject *callback; + + gc = wrcb_to_call.gc.gc_next; + op = FROM_GC(gc); + assert(IS_REACHABLE(op)); + assert(PyWeakref_Check(op)); + wr = (PyWeakReference *)op; + callback = wr->wr_callback; + assert(callback != NULL); + + /* copy-paste of weakrefobject.c's handle_callback() */ + temp = PyObject_CallFunctionObjArgs(callback, wr, NULL); + if (temp == NULL) + PyErr_WriteUnraisable(callback); + else + Py_DECREF(temp); + + /* Give up the reference we created in the first pass. When + * op's refcount hits 0 (which it may or may not do right now), + * op's tp_dealloc will decref op->wr_callback too. Note + * that the refcount probably will hit 0 now, and because this + * weakref was reachable to begin with, gc didn't already + * add it to its count of freed objects. Example: a reachable + * weak value dict maps some key to this reachable weakref. + * The callback removes this key->weakref mapping from the + * dict, leaving no other references to the weakref (excepting + * ours). + */ + Py_DECREF(op); + if (wrcb_to_call.gc.gc_next == gc) { + /* object is still alive -- move it */ + gc_list_move(gc, old); + } + else + ++num_freed; + } + + return num_freed; +} + +static void +debug_cycle(char *msg, PyObject *op) +{ + PySys_WriteStderr("gc: %.100s <%.100s %p>\n", + msg, Py_TYPE(op)->tp_name, op); +} + +/* Handle uncollectable garbage (cycles with finalizers, and stuff reachable + * only from such cycles). + * If DEBUG_SAVEALL, all objects in finalizers are appended to the module + * garbage list (a Python list), else only the objects in finalizers with + * __del__ methods are appended to garbage. All objects in finalizers are + * merged into the old list regardless. + * Returns 0 if all OK, <0 on error (out of memory to grow the garbage list). + * The finalizers list is made empty on a successful return. + */ +static int +handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old) +{ + PyGC_Head *gc = finalizers->gc.gc_next; + + if (garbage == NULL) { + garbage = PyList_New(0); + if (garbage == NULL) + Py_FatalError("gc couldn't create gc.garbage list"); + } + for (; gc != finalizers; gc = gc->gc.gc_next) { + PyObject *op = FROM_GC(gc); + + if ((debug & DEBUG_SAVEALL) || has_finalizer(op)) { + if (PyList_Append(garbage, op) < 0) + return -1; + } + } + + gc_list_merge(finalizers, old); + return 0; +} + +/* Break reference cycles by clearing the containers involved. This is + * tricky business as the lists can be changing and we don't know which + * objects may be freed. It is possible I screwed something up here. + */ +static void +delete_garbage(PyGC_Head *collectable, PyGC_Head *old) +{ + inquiry clear; + + while (!gc_list_is_empty(collectable)) { + PyGC_Head *gc = collectable->gc.gc_next; + PyObject *op = FROM_GC(gc); + + assert(IS_TENTATIVELY_UNREACHABLE(op)); + if (debug & DEBUG_SAVEALL) { + PyList_Append(garbage, op); + } + else { + if ((clear = Py_TYPE(op)->tp_clear) != NULL) { + Py_INCREF(op); + clear(op); + Py_DECREF(op); + } + } + if (collectable->gc.gc_next == gc) { + /* object is still alive, move it, it may die later */ + gc_list_move(gc, old); + gc->gc.gc_refs = GC_REACHABLE; + } + } +} + +/* Clear all free lists + * All free lists are cleared during the collection of the highest generation. + * Allocated items in the free list may keep a pymalloc arena occupied. + * Clearing the free lists may give back memory to the OS earlier. + */ +static void +clear_freelists(void) +{ + (void)PyMethod_ClearFreeList(); + (void)PyFrame_ClearFreeList(); + (void)PyCFunction_ClearFreeList(); + (void)PyTuple_ClearFreeList(); + (void)PyUnicode_ClearFreeList(); + (void)PyFloat_ClearFreeList(); +} + +static double +get_time(void) +{ + double result = 0; + if (tmod != NULL) { + PyObject *f = PyObject_CallMethod(tmod, "time", NULL); + if (f == NULL) { + PyErr_Clear(); + } + else { + if (PyFloat_Check(f)) + result = PyFloat_AsDouble(f); + Py_DECREF(f); + } + } + return result; +} + +/* This is the main function. Read this to understand how the + * collection process works. */ +static Py_ssize_t +collect(int generation) +{ + int i; + Py_ssize_t m = 0; /* # objects collected */ + Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */ + PyGC_Head *young; /* the generation we are examining */ + PyGC_Head *old; /* next older generation */ + PyGC_Head unreachable; /* non-problematic unreachable trash */ + PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ + PyGC_Head *gc; + double t1 = 0.0; + + if (delstr == NULL) { + delstr = PyUnicode_InternFromString("__del__"); + if (delstr == NULL) + Py_FatalError("gc couldn't allocate \"__del__\""); + } + + if (debug & DEBUG_STATS) { + PySys_WriteStderr("gc: collecting generation %d...\n", + generation); + PySys_WriteStderr("gc: objects in each generation:"); + for (i = 0; i < NUM_GENERATIONS; i++) + PySys_WriteStderr(" %" PY_FORMAT_SIZE_T "d", + gc_list_size(GEN_HEAD(i))); + t1 = get_time(); + PySys_WriteStderr("\n"); + } + + /* update collection and allocation counters */ + if (generation+1 < NUM_GENERATIONS) + generations[generation+1].count += 1; + for (i = 0; i <= generation; i++) + generations[i].count = 0; + + /* merge younger generations with one we are currently collecting */ + for (i = 0; i < generation; i++) { + gc_list_merge(GEN_HEAD(i), GEN_HEAD(generation)); + } + + /* handy references */ + young = GEN_HEAD(generation); + if (generation < NUM_GENERATIONS-1) + old = GEN_HEAD(generation+1); + else + old = young; + + /* Using ob_refcnt and gc_refs, calculate which objects in the + * container set are reachable from outside the set (i.e., have a + * refcount greater than 0 when all the references within the + * set are taken into account). + */ + update_refs(young); + subtract_refs(young); + + /* Leave everything reachable from outside young in young, and move + * everything else (in young) to unreachable. + * NOTE: This used to move the reachable objects into a reachable + * set instead. But most things usually turn out to be reachable, + * so it's more efficient to move the unreachable things. + */ + gc_list_init(&unreachable); + move_unreachable(young, &unreachable); + + /* Move reachable objects to next generation. */ + if (young != old) { + if (generation == NUM_GENERATIONS - 2) { + long_lived_pending += gc_list_size(young); + } + gc_list_merge(young, old); + } + else { + long_lived_pending = 0; + long_lived_total = gc_list_size(young); + } + + /* All objects in unreachable are trash, but objects reachable from + * finalizers can't safely be deleted. Python programmers should take + * care not to create such things. For Python, finalizers means + * instance objects with __del__ methods. Weakrefs with callbacks + * can also call arbitrary Python code but they will be dealt with by + * handle_weakrefs(). + */ + gc_list_init(&finalizers); + move_finalizers(&unreachable, &finalizers); + /* finalizers contains the unreachable objects with a finalizer; + * unreachable objects reachable *from* those are also uncollectable, + * and we move those into the finalizers list too. + */ + move_finalizer_reachable(&finalizers); + + /* Collect statistics on collectable objects found and print + * debugging information. + */ + for (gc = unreachable.gc.gc_next; gc != &unreachable; + gc = gc->gc.gc_next) { + m++; + if (debug & DEBUG_COLLECTABLE) { + debug_cycle("collectable", FROM_GC(gc)); + } + } + + /* Clear weakrefs and invoke callbacks as necessary. */ + m += handle_weakrefs(&unreachable, old); + + /* Call tp_clear on objects in the unreachable set. This will cause + * the reference cycles to be broken. It may also cause some objects + * in finalizers to be freed. + */ + delete_garbage(&unreachable, old); + + /* Collect statistics on uncollectable objects found and print + * debugging information. */ + for (gc = finalizers.gc.gc_next; + gc != &finalizers; + gc = gc->gc.gc_next) { + n++; + if (debug & DEBUG_UNCOLLECTABLE) + debug_cycle("uncollectable", FROM_GC(gc)); + } + if (debug & DEBUG_STATS) { + double t2 = get_time(); + if (m == 0 && n == 0) + PySys_WriteStderr("gc: done"); + else + PySys_WriteStderr( + "gc: done, " + "%" PY_FORMAT_SIZE_T "d unreachable, " + "%" PY_FORMAT_SIZE_T "d uncollectable", + n+m, n); + if (t1 && t2) { + PySys_WriteStderr(", %.4fs elapsed", t2-t1); + } + PySys_WriteStderr(".\n"); + } + + /* Append instances in the uncollectable set to a Python + * reachable list of garbage. The programmer has to deal with + * this if they insist on creating this type of structure. + */ + (void)handle_finalizers(&finalizers, old); + + /* Clear free list only during the collection of the highest + * generation */ + if (generation == NUM_GENERATIONS-1) { + clear_freelists(); + } + + if (PyErr_Occurred()) { + if (gc_str == NULL) + gc_str = PyUnicode_FromString("garbage collection"); + PyErr_WriteUnraisable(gc_str); + Py_FatalError("unexpected exception during garbage collection"); + } + return n+m; +} + +static Py_ssize_t +collect_generations(void) +{ + int i; + Py_ssize_t n = 0; + + /* Find the oldest generation (highest numbered) where the count + * exceeds the threshold. Objects in the that generation and + * generations younger than it will be collected. */ + for (i = NUM_GENERATIONS-1; i >= 0; i--) { + if (generations[i].count > generations[i].threshold) { + /* Avoid quadratic performance degradation in number + of tracked objects. See comments at the beginning + of this file, and issue #4074. + */ + if (i == NUM_GENERATIONS - 1 + && long_lived_pending < long_lived_total / 4) + continue; + n = collect(i); + break; + } + } + return n; +} + +PyDoc_STRVAR(gc_enable__doc__, +"enable() -> None\n" +"\n" +"Enable automatic garbage collection.\n"); + +static PyObject * +gc_enable(PyObject *self, PyObject *noargs) +{ + enabled = 1; + Py_INCREF(Py_None); + return Py_None; +} + +PyDoc_STRVAR(gc_disable__doc__, +"disable() -> None\n" +"\n" +"Disable automatic garbage collection.\n"); + +static PyObject * +gc_disable(PyObject *self, PyObject *noargs) +{ + enabled = 0; + Py_INCREF(Py_None); + return Py_None; +} + +PyDoc_STRVAR(gc_isenabled__doc__, +"isenabled() -> status\n" +"\n" +"Returns true if automatic garbage collection is enabled.\n"); + +static PyObject * +gc_isenabled(PyObject *self, PyObject *noargs) +{ + return PyBool_FromLong((long)enabled); +} + +PyDoc_STRVAR(gc_collect__doc__, +"collect([generation]) -> n\n" +"\n" +"With no arguments, run a full collection. The optional argument\n" +"may be an integer specifying which generation to collect. A ValueError\n" +"is raised if the generation number is invalid.\n\n" +"The number of unreachable objects is returned.\n"); + +static PyObject * +gc_collect(PyObject *self, PyObject *args, PyObject *kws) +{ + static char *keywords[] = {"generation", NULL}; + int genarg = NUM_GENERATIONS - 1; + Py_ssize_t n; + + if (!PyArg_ParseTupleAndKeywords(args, kws, "|i", keywords, &genarg)) + return NULL; + + else if (genarg < 0 || genarg >= NUM_GENERATIONS) { + PyErr_SetString(PyExc_ValueError, "invalid generation"); + return NULL; + } + + if (collecting) + n = 0; /* already collecting, don't do anything */ + else { + collecting = 1; + n = collect(genarg); + collecting = 0; + } + + return PyLong_FromSsize_t(n); +} + +PyDoc_STRVAR(gc_set_debug__doc__, +"set_debug(flags) -> None\n" +"\n" +"Set the garbage collection debugging flags. Debugging information is\n" +"written to sys.stderr.\n" +"\n" +"flags is an integer and can have the following bits turned on:\n" +"\n" +" DEBUG_STATS - Print statistics during collection.\n" +" DEBUG_COLLECTABLE - Print collectable objects found.\n" +" DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects found.\n" +" DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.\n" +" DEBUG_LEAK - Debug leaking programs (everything but STATS).\n"); + +static PyObject * +gc_set_debug(PyObject *self, PyObject *args) +{ + if (!PyArg_ParseTuple(args, "i:set_debug", &debug)) + return NULL; + + Py_INCREF(Py_None); + return Py_None; +} + +PyDoc_STRVAR(gc_get_debug__doc__, +"get_debug() -> flags\n" +"\n" +"Get the garbage collection debugging flags.\n"); + +static PyObject * +gc_get_debug(PyObject *self, PyObject *noargs) +{ + return Py_BuildValue("i", debug); +} + +PyDoc_STRVAR(gc_set_thresh__doc__, +"set_threshold(threshold0, [threshold1, threshold2]) -> None\n" +"\n" +"Sets the collection thresholds. Setting threshold0 to zero disables\n" +"collection.\n"); + +static PyObject * +gc_set_thresh(PyObject *self, PyObject *args) +{ + int i; + if (!PyArg_ParseTuple(args, "i|ii:set_threshold", + &generations[0].threshold, + &generations[1].threshold, + &generations[2].threshold)) + return NULL; + for (i = 2; i < NUM_GENERATIONS; i++) { + /* generations higher than 2 get the same threshold */ + generations[i].threshold = generations[2].threshold; + } + + Py_INCREF(Py_None); + return Py_None; +} + +PyDoc_STRVAR(gc_get_thresh__doc__, +"get_threshold() -> (threshold0, threshold1, threshold2)\n" +"\n" +"Return the current collection thresholds\n"); + +static PyObject * +gc_get_thresh(PyObject *self, PyObject *noargs) +{ + return Py_BuildValue("(iii)", + generations[0].threshold, + generations[1].threshold, + generations[2].threshold); +} + +PyDoc_STRVAR(gc_get_count__doc__, +"get_count() -> (count0, count1, count2)\n" +"\n" +"Return the current collection counts\n"); + +static PyObject * +gc_get_count(PyObject *self, PyObject *noargs) +{ + return Py_BuildValue("(iii)", + generations[0].count, + generations[1].count, + generations[2].count); +} + +static int +referrersvisit(PyObject* obj, PyObject *objs) +{ + Py_ssize_t i; + for (i = 0; i < PyTuple_GET_SIZE(objs); i++) + if (PyTuple_GET_ITEM(objs, i) == obj) + return 1; + return 0; +} + +static int +gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist) +{ + PyGC_Head *gc; + PyObject *obj; + traverseproc traverse; + for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) { + obj = FROM_GC(gc); + traverse = Py_TYPE(obj)->tp_traverse; + if (obj == objs || obj == resultlist) + continue; + if (traverse(obj, (visitproc)referrersvisit, objs)) { + if (PyList_Append(resultlist, obj) < 0) + return 0; /* error */ + } + } + return 1; /* no error */ +} + +PyDoc_STRVAR(gc_get_referrers__doc__, +"get_referrers(*objs) -> list\n\ +Return the list of objects that directly refer to any of objs."); + +static PyObject * +gc_get_referrers(PyObject *self, PyObject *args) +{ + int i; + PyObject *result = PyList_New(0); + if (!result) return NULL; + + for (i = 0; i < NUM_GENERATIONS; i++) { + if (!(gc_referrers_for(args, GEN_HEAD(i), result))) { + Py_DECREF(result); + return NULL; + } + } + return result; +} + +/* Append obj to list; return true if error (out of memory), false if OK. */ +static int +referentsvisit(PyObject *obj, PyObject *list) +{ + return PyList_Append(list, obj) < 0; +} + +PyDoc_STRVAR(gc_get_referents__doc__, +"get_referents(*objs) -> list\n\ +Return the list of objects that are directly referred to by objs."); + +static PyObject * +gc_get_referents(PyObject *self, PyObject *args) +{ + Py_ssize_t i; + PyObject *result = PyList_New(0); + + if (result == NULL) + return NULL; + + for (i = 0; i < PyTuple_GET_SIZE(args); i++) { + traverseproc traverse; + PyObject *obj = PyTuple_GET_ITEM(args, i); + + if (! PyObject_IS_GC(obj)) + continue; + traverse = Py_TYPE(obj)->tp_traverse; + if (! traverse) + continue; + if (traverse(obj, (visitproc)referentsvisit, result)) { + Py_DECREF(result); + return NULL; + } + } + return result; +} + +PyDoc_STRVAR(gc_get_objects__doc__, +"get_objects() -> [...]\n" +"\n" +"Return a list of objects tracked by the collector (excluding the list\n" +"returned).\n"); + +static PyObject * +gc_get_objects(PyObject *self, PyObject *noargs) +{ + int i; + PyObject* result; + + result = PyList_New(0); + if (result == NULL) + return NULL; + for (i = 0; i < NUM_GENERATIONS; i++) { + if (append_objects(result, GEN_HEAD(i))) { + Py_DECREF(result); + return NULL; + } + } + return result; +} + +PyDoc_STRVAR(gc_is_tracked__doc__, +"is_tracked(obj) -> bool\n" +"\n" +"Returns true if the object is tracked by the garbage collector.\n" +"Simple atomic objects will return false.\n" +); + +static PyObject * +gc_is_tracked(PyObject *self, PyObject *obj) +{ + PyObject *result; + + if (PyObject_IS_GC(obj) && IS_TRACKED(obj)) + result = Py_True; + else + result = Py_False; + Py_INCREF(result); + return result; +} + + +PyDoc_STRVAR(gc__doc__, +"This module provides access to the garbage collector for reference cycles.\n" +"\n" +"enable() -- Enable automatic garbage collection.\n" +"disable() -- Disable automatic garbage collection.\n" +"isenabled() -- Returns true if automatic collection is enabled.\n" +"collect() -- Do a full collection right now.\n" +"get_count() -- Return the current collection counts.\n" +"set_debug() -- Set debugging flags.\n" +"get_debug() -- Get debugging flags.\n" +"set_threshold() -- Set the collection thresholds.\n" +"get_threshold() -- Return the current the collection thresholds.\n" +"get_objects() -- Return a list of all objects tracked by the collector.\n" +"is_tracked() -- Returns true if a given object is tracked.\n" +"get_referrers() -- Return the list of objects that refer to an object.\n" +"get_referents() -- Return the list of objects that an object refers to.\n"); + +static PyMethodDef GcMethods[] = { + {"enable", gc_enable, METH_NOARGS, gc_enable__doc__}, + {"disable", gc_disable, METH_NOARGS, gc_disable__doc__}, + {"isenabled", gc_isenabled, METH_NOARGS, gc_isenabled__doc__}, + {"set_debug", gc_set_debug, METH_VARARGS, gc_set_debug__doc__}, + {"get_debug", gc_get_debug, METH_NOARGS, gc_get_debug__doc__}, + {"get_count", gc_get_count, METH_NOARGS, gc_get_count__doc__}, + {"set_threshold", gc_set_thresh, METH_VARARGS, gc_set_thresh__doc__}, + {"get_threshold", gc_get_thresh, METH_NOARGS, gc_get_thresh__doc__}, + {"collect", (PyCFunction)gc_collect, + METH_VARARGS | METH_KEYWORDS, gc_collect__doc__}, + {"get_objects", gc_get_objects,METH_NOARGS, gc_get_objects__doc__}, + {"is_tracked", gc_is_tracked, METH_O, gc_is_tracked__doc__}, + {"get_referrers", gc_get_referrers, METH_VARARGS, + gc_get_referrers__doc__}, + {"get_referents", gc_get_referents, METH_VARARGS, + gc_get_referents__doc__}, + {NULL, NULL} /* Sentinel */ +}; + +static struct PyModuleDef gcmodule = { + PyModuleDef_HEAD_INIT, + "gc", /* m_name */ + gc__doc__, /* m_doc */ + -1, /* m_size */ + GcMethods, /* m_methods */ + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL /* m_free */ +}; + +PyMODINIT_FUNC +PyInit_gc(void) +{ + PyObject *m; + + m = PyModule_Create(&gcmodule); + + if (m == NULL) + return NULL; + + if (garbage == NULL) { + garbage = PyList_New(0); + if (garbage == NULL) + return NULL; + } + Py_INCREF(garbage); + if (PyModule_AddObject(m, "garbage", garbage) < 0) + return NULL; + + /* Importing can't be done in collect() because collect() + * can be called via PyGC_Collect() in Py_Finalize(). + * This wouldn't be a problem, except that is + * reset to 0 before calling collect which trips up + * the import and triggers an assertion. + */ + if (tmod == NULL) { + tmod = PyImport_ImportModuleNoBlock("time"); + if (tmod == NULL) + PyErr_Clear(); + } + +#define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return NULL + ADD_INT(DEBUG_STATS); + ADD_INT(DEBUG_COLLECTABLE); + ADD_INT(DEBUG_UNCOLLECTABLE); + ADD_INT(DEBUG_SAVEALL); + ADD_INT(DEBUG_LEAK); +#undef ADD_INT + return m; +} + +/* API to invoke gc.collect() from C */ +Py_ssize_t +PyGC_Collect(void) +{ + Py_ssize_t n; + + if (collecting) + n = 0; /* already collecting, don't do anything */ + else { + collecting = 1; + n = collect(NUM_GENERATIONS - 1); + collecting = 0; + } + + return n; +} + +void +_PyGC_Fini(void) +{ + if (!(debug & DEBUG_SAVEALL) + && garbage != NULL && PyList_GET_SIZE(garbage) > 0) { + char *message; + if (debug & DEBUG_UNCOLLECTABLE) + message = "gc: %zd uncollectable objects at " \ + "shutdown"; + else + message = "gc: %zd uncollectable objects at " \ + "shutdown; use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them"; + if (PyErr_WarnFormat(PyExc_ResourceWarning, 0, message, + PyList_GET_SIZE(garbage)) < 0) + PyErr_WriteUnraisable(NULL); + if (debug & DEBUG_UNCOLLECTABLE) { + PyObject *repr = NULL, *bytes = NULL; + repr = PyObject_Repr(garbage); + if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) + PyErr_WriteUnraisable(garbage); + else { + PySys_WriteStderr( + " %s\n", + PyBytes_AS_STRING(bytes) + ); + } + Py_XDECREF(repr); + Py_XDECREF(bytes); + } + } +} + +/* for debugging */ +void +_PyGC_Dump(PyGC_Head *g) +{ + _PyObject_Dump(FROM_GC(g)); +} + +/* extension modules might be compiled with GC support so these + functions must always be available */ + +#undef PyObject_GC_Track +#undef PyObject_GC_UnTrack +#undef PyObject_GC_Del +#undef _PyObject_GC_Malloc + +void +PyObject_GC_Track(void *op) +{ + _PyObject_GC_TRACK(op); +} + +/* for binary compatibility with 2.2 */ +void +_PyObject_GC_Track(PyObject *op) +{ + PyObject_GC_Track(op); +} + +void +PyObject_GC_UnTrack(void *op) +{ + /* Obscure: the Py_TRASHCAN mechanism requires that we be able to + * call PyObject_GC_UnTrack twice on an object. + */ + if (IS_TRACKED(op)) + _PyObject_GC_UNTRACK(op); +} + +/* for binary compatibility with 2.2 */ +void +_PyObject_GC_UnTrack(PyObject *op) +{ + PyObject_GC_UnTrack(op); +} + +PyObject * +_PyObject_GC_Malloc(size_t basicsize) +{ + PyObject *op; + PyGC_Head *g; + if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) + return PyErr_NoMemory(); + g = (PyGC_Head *)PyObject_MALLOC( + sizeof(PyGC_Head) + basicsize); + if (g == NULL) + return PyErr_NoMemory(); + g->gc.gc_refs = GC_UNTRACKED; + generations[0].count++; /* number of allocated GC objects */ + if (generations[0].count > generations[0].threshold && + enabled && + generations[0].threshold && + !collecting && + !PyErr_Occurred()) { + collecting = 1; + collect_generations(); + collecting = 0; + } + op = FROM_GC(g); + return op; +} + +PyObject * +_PyObject_GC_New(PyTypeObject *tp) +{ + PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp)); + if (op != NULL) + op = PyObject_INIT(op, tp); + return op; +} + +PyVarObject * +_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems) +{ + const size_t size = _PyObject_VAR_SIZE(tp, nitems); + PyVarObject *op = (PyVarObject *) _PyObject_GC_Malloc(size); + if (op != NULL) + op = PyObject_INIT_VAR(op, tp, nitems); + return op; +} + +PyVarObject * +_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) +{ + const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems); + PyGC_Head *g = AS_GC(op); + if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) + return (PyVarObject *)PyErr_NoMemory(); + g = (PyGC_Head *)PyObject_REALLOC(g, sizeof(PyGC_Head) + basicsize); + if (g == NULL) + return (PyVarObject *)PyErr_NoMemory(); + op = (PyVarObject *) FROM_GC(g); + Py_SIZE(op) = nitems; + return op; +} + +void +PyObject_GC_Del(void *op) +{ + PyGC_Head *g = AS_GC(op); + if (IS_TRACKED(op)) + gc_list_remove(g); + if (generations[0].count > 0) { + generations[0].count--; + } + PyObject_FREE(g); +} diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Modules/main.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Modules/main.c Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,2 @@ + + diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Objects/boolobject.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Objects/boolobject.c Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,184 @@ +/* Boolean type, a subtype of int */ + +#include "Python.h" +#include "longintrepr.h" + +/* We define bool_repr to return "False" or "True" */ + +static PyObject *false_str = NULL; +static PyObject *true_str = NULL; + +static PyObject * +bool_repr(PyObject *self) +{ + PyObject *s; + + if (self == Py_True) + s = true_str ? true_str : + (true_str = PyUnicode_InternFromString("True")); + else + s = false_str ? false_str : + (false_str = PyUnicode_InternFromString("False")); + Py_XINCREF(s); + return s; +} + +/* Function to return a bool from a C long */ + +PyObject *PyBool_FromLong(long ok) +{ + PyObject *result; + + if (ok) + result = Py_True; + else + result = Py_False; + Py_INCREF(result); + return result; +} + +/* We define bool_new to always return either Py_True or Py_False */ + +static PyObject * +bool_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"x", 0}; + PyObject *x = Py_False; + long ok; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:bool", kwlist, &x)) + return NULL; + ok = PyObject_IsTrue(x); + if (ok < 0) + return NULL; + return PyBool_FromLong(ok); +} + +/* Arithmetic operations redefined to return bool if both args are bool. */ + +static PyObject * +bool_and(PyObject *a, PyObject *b) +{ + if (!PyBool_Check(a) || !PyBool_Check(b)) + return PyLong_Type.tp_as_number->nb_and(a, b); + return PyBool_FromLong((a == Py_True) & (b == Py_True)); +} + +static PyObject * +bool_or(PyObject *a, PyObject *b) +{ + if (!PyBool_Check(a) || !PyBool_Check(b)) + return PyLong_Type.tp_as_number->nb_or(a, b); + return PyBool_FromLong((a == Py_True) | (b == Py_True)); +} + +static PyObject * +bool_xor(PyObject *a, PyObject *b) +{ + if (!PyBool_Check(a) || !PyBool_Check(b)) + return PyLong_Type.tp_as_number->nb_xor(a, b); + return PyBool_FromLong((a == Py_True) ^ (b == Py_True)); +} + +/* Doc string */ + +PyDoc_STRVAR(bool_doc, +"bool(x) -> bool\n\ +\n\ +Returns True when the argument x is true, False otherwise.\n\ +The builtins True and False are the only two instances of the class bool.\n\ +The class bool is a subclass of the class int, and cannot be subclassed."); + +/* Arithmetic methods -- only so we can override &, |, ^. */ + +static PyNumberMethods bool_as_number = { + 0, /* nb_add */ + 0, /* nb_subtract */ + 0, /* nb_multiply */ + 0, /* nb_remainder */ + 0, /* nb_divmod */ + 0, /* nb_power */ + 0, /* nb_negative */ + 0, /* nb_positive */ + 0, /* nb_absolute */ + 0, /* nb_bool */ + 0, /* nb_invert */ + 0, /* nb_lshift */ + 0, /* nb_rshift */ + bool_and, /* nb_and */ + bool_xor, /* nb_xor */ + bool_or, /* nb_or */ + 0, /* nb_int */ + 0, /* nb_reserved */ + 0, /* nb_float */ + 0, /* nb_inplace_add */ + 0, /* nb_inplace_subtract */ + 0, /* nb_inplace_multiply */ + 0, /* nb_inplace_remainder */ + 0, /* nb_inplace_power */ + 0, /* nb_inplace_lshift */ + 0, /* nb_inplace_rshift */ + 0, /* nb_inplace_and */ + 0, /* nb_inplace_xor */ + 0, /* nb_inplace_or */ + 0, /* nb_floor_divide */ + 0, /* nb_true_divide */ + 0, /* nb_inplace_floor_divide */ + 0, /* nb_inplace_true_divide */ + 0, /* nb_index */ +}; + +/* The type object for bool. Note that this cannot be subclassed! */ + +PyTypeObject PyBool_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "bool", + sizeof(struct _longobject), + 0, + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + bool_repr, /* tp_repr */ + &bool_as_number, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + bool_repr, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + bool_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + &PyLong_Type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + bool_new, /* tp_new */ +}; + +/* The objects representing bool values False and True */ + +struct _longobject _Py_FalseStruct = { + PyVarObject_HEAD_INIT(&PyBool_Type, 0) + { 0 } +}; + +struct _longobject _Py_TrueStruct = { + PyVarObject_HEAD_INIT(&PyBool_Type, 1) + { 1 } +}; diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Objects/listobject.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Objects/listobject.c Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,2849 @@ +/* List object implementation */ + +#include "Python.h" + +/* Ensure ob_item has room for at least newsize elements, and set + * ob_size to newsize. If newsize > ob_size on entry, the content + * of the new slots at exit is undefined heap trash; it's the caller's + * responsibility to overwrite them with sane values. + * The number of allocated elements may grow, shrink, or stay the same. + * Failure is impossible if newsize <= self.allocated on entry, although + * that partly relies on an assumption that the system realloc() never + * fails when passed a number of bytes <= the number of bytes last + * allocated (the C standard doesn't guarantee this, but it's hard to + * imagine a realloc implementation where it wouldn't be true). + * Note that self->ob_item may change, and even if newsize is less + * than ob_size on entry. + */ +static int +list_resize(PyListObject *self, Py_ssize_t newsize) +{ + PyObject **items; + size_t new_allocated; + Py_ssize_t allocated = self->allocated; + + /* Bypass realloc() when a previous overallocation is large enough + to accommodate the newsize. If the newsize falls lower than half + the allocated size, then proceed with the realloc() to shrink the list. + */ + if (allocated >= newsize && newsize >= (allocated >> 1)) { + assert(self->ob_item != NULL || newsize == 0); + Py_SIZE(self) = newsize; + return 0; + } + + /* This over-allocates proportional to the list size, making room + * for additional growth. The over-allocation is mild, but is + * enough to give linear-time amortized behavior over a long + * sequence of appends() in the presence of a poorly-performing + * system realloc(). + * The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... + */ + new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6); + + /* check for integer overflow */ + if (new_allocated > PY_SIZE_MAX - newsize) { + PyErr_NoMemory(); + return -1; + } else { + new_allocated += newsize; + } + + if (newsize == 0) + new_allocated = 0; + items = self->ob_item; + if (new_allocated <= (PY_SIZE_MAX / sizeof(PyObject *))) + PyMem_RESIZE(items, PyObject *, new_allocated); + else + items = NULL; + if (items == NULL) { + PyErr_NoMemory(); + return -1; + } + self->ob_item = items; + Py_SIZE(self) = newsize; + self->allocated = new_allocated; + return 0; +} + + +/* Empty list reuse scheme to save calls to malloc and free */ +#ifndef PyList_MAXFREELIST +#define PyList_MAXFREELIST 80 +#endif +static PyListObject *free_list[PyList_MAXFREELIST]; +static int numfree = 0; + +int +PyList_ClearFreeList(void) +{ + PyListObject *op; + int ret = numfree; + while (numfree) { + op = free_list[--numfree]; + assert(PyList_CheckExact(op)); + PyObject_GC_Del(op); + } + return ret; +} + +void +PyList_Fini(void) +{ + PyList_ClearFreeList(); +} + +PyObject * +PyList_New(Py_ssize_t size) +{ + PyListObject *op; + size_t nbytes; + + if (size < 0) { + PyErr_BadInternalCall(); + return NULL; + } + /* Check for overflow without an actual overflow, + * which can cause compiler to optimise out */ + if ((size_t)size > PY_SIZE_MAX / sizeof(PyObject *)) + return PyErr_NoMemory(); + nbytes = size * sizeof(PyObject *); + if (numfree) { + numfree--; + op = free_list[numfree]; + _Py_NewReference((PyObject *)op); + } else { + op = PyObject_GC_New(PyListObject, &PyList_Type); + if (op == NULL) + return NULL; + } + if (size <= 0) + op->ob_item = NULL; + else { + op->ob_item = (PyObject **) PyMem_MALLOC(nbytes); + if (op->ob_item == NULL) { + Py_DECREF(op); + return PyErr_NoMemory(); + } + memset(op->ob_item, 0, nbytes); + } + Py_SIZE(op) = size; + op->allocated = size; + _PyObject_GC_TRACK(op); + return (PyObject *) op; +} + +Py_ssize_t +PyList_Size(PyObject *op) +{ + if (!PyList_Check(op)) { + PyErr_BadInternalCall(); + return -1; + } + else + return Py_SIZE(op); +} + +static PyObject *indexerr = NULL; + +PyObject * +PyList_GetItem(PyObject *op, Py_ssize_t i) +{ + if (!PyList_Check(op)) { + PyErr_BadInternalCall(); + return NULL; + } + if (i < 0 || i >= Py_SIZE(op)) { + if (indexerr == NULL) { + indexerr = PyUnicode_FromString( + "list index out of range"); + if (indexerr == NULL) + return NULL; + } + PyErr_SetObject(PyExc_IndexError, indexerr); + return NULL; + } + return ((PyListObject *)op) -> ob_item[i]; +} + +int +PyList_SetItem(register PyObject *op, register int i, + register PyObject *newitem) +{ + register PyObject *olditem; + register PyObject **p; + if (!PyList_Check(op)) + { + Py_XDECREF(newitem); + PyErr_BadInternalCall(); + return -1; + } + if (i < 0 || i >= Py_SIZE(op)) + { + Py_XDECREF(newitem); + PyErr_SetString(PyExc_IndexError, + "list assignment index out of range"); + return -1; + } + p = ((PyListObject *)op) -> ob_item + i; + olditem = *p; + *p = newitem; + Py_XDECREF(olditem); + return 0; +} + +static int +ins1(PyListObject *self, int where, PyObject *v) +{ + int i, n = Py_SIZE(self); + PyObject **items; + if (v == NULL) { + PyErr_BadInternalCall(); + return -1; + } + if (n == PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, + "cannot add more objects to list"); + return -1; + } + + if (list_resize(self, n+1) == -1) + return -1; + + if (where < 0) { + where += n; + if (where < 0) + where = 0; + } + if (where > n) + where = n; + items = self->ob_item; + for (i = n; --i >= where; ) + items[i+1] = items[i]; + Py_INCREF(v); + items[where] = v; + return 0; +} + +int +PyList_Insert(PyObject *op, int where, PyObject *newitem) +{ + if (!PyList_Check(op)) { + PyErr_BadInternalCall(); + return -1; + } + return ins1((PyListObject *)op, where, newitem); +} + +static int +app1(PyListObject *self, PyObject *v) +{ + Py_ssize_t n = PyList_GET_SIZE(self); + + assert (v != NULL); + if (n == PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, + "cannot add more objects to list"); + return -1; + } + + if (list_resize(self, n+1) == -1) + return -1; + + Py_INCREF(v); + PyList_SET_ITEM(self, n, v); + return 0; +} + +int +PyList_Append(PyObject *op, PyObject *newitem) +{ + if (PyList_Check(op) && (newitem != NULL)) + return app1((PyListObject *)op, newitem); + PyErr_BadInternalCall(); + return -1; +} + +/* Methods */ + +static void +list_dealloc(PyListObject *op) +{ + Py_ssize_t i; + PyObject_GC_UnTrack(op); + Py_TRASHCAN_SAFE_BEGIN(op) + if (op->ob_item != NULL) { + /* Do it backwards, for Christian Tismer. + There's a simple test case where somehow this reduces + thrashing when a *very* large list is created and + immediately deleted. */ + i = Py_SIZE(op); + while (--i >= 0) { + Py_XDECREF(op->ob_item[i]); + } + PyMem_FREE(op->ob_item); + } + if (numfree < PyList_MAXFREELIST && PyList_CheckExact(op)) + free_list[numfree++] = op; + else + Py_TYPE(op)->tp_free((PyObject *)op); + Py_TRASHCAN_SAFE_END(op) +} + +static PyObject * +list_repr(PyListObject *v) +{ + Py_ssize_t i; + PyObject *s = NULL; + _PyAccu acc; + static PyObject *sep = NULL; + + if (Py_SIZE(v) == 0) { + return PyUnicode_FromString("[]"); + } + + if (sep == NULL) { + sep = PyUnicode_FromString(", "); + if (sep == NULL) + return NULL; + } + + i = Py_ReprEnter((PyObject*)v); + if (i != 0) { + return i > 0 ? PyUnicode_FromString("[...]") : NULL; + } + + if (_PyAccu_Init(&acc)) + goto error; + + s = PyUnicode_FromString("["); + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); + + /* Do repr() on each element. Note that this may mutate the list, + so must refetch the list size on each iteration. */ + for (i = 0; i < Py_SIZE(v); ++i) { + if (Py_EnterRecursiveCall(" while getting the repr of a list")) + goto error; + s = PyObject_Repr(v->ob_item[i]); + Py_LeaveRecursiveCall(); + if (i > 0 && _PyAccu_Accumulate(&acc, sep)) + goto error; + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); + } + s = PyUnicode_FromString("]"); + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); + + Py_ReprLeave((PyObject *)v); + return _PyAccu_Finish(&acc); + +error: + _PyAccu_Destroy(&acc); + Py_XDECREF(s); + Py_ReprLeave((PyObject *)v); + return NULL; +} + +static Py_ssize_t +list_length(PyListObject *a) +{ + return Py_SIZE(a); +} + +static int +list_contains(PyListObject *a, PyObject *el) +{ + Py_ssize_t i; + int cmp; + + for (i = 0, cmp = 0 ; cmp == 0 && i < Py_SIZE(a); ++i) + cmp = PyObject_RichCompareBool(el, PyList_GET_ITEM(a, i), + Py_EQ); + return cmp; +} + +static PyObject * +list_item(PyListObject *a, int i) +{ + if (i < 0 || i >= Py_SIZE(a)) { + if (indexerr == NULL) { + indexerr = PyUnicode_FromString( + "list index out of range"); + if (indexerr == NULL) + return NULL; + } + PyErr_SetObject(PyExc_IndexError, indexerr); + return NULL; + } + Py_INCREF(a->ob_item[i]); + return a->ob_item[i]; +} + +static PyObject * +list_slice(PyListObject *a, int ilow, int ihigh) +{ + PyListObject *np; + PyObject **src, **dest; + int i, len; + if (ilow < 0) + ilow = 0; + else if (ilow > Py_SIZE(a)) + ilow = Py_SIZE(a); + if (ihigh < ilow) + ihigh = ilow; + else if (ihigh > Py_SIZE(a)) + ihigh = Py_SIZE(a); + len = ihigh - ilow; + np = (PyListObject *) PyList_New(len); + if (np == NULL) + return NULL; + + src = a->ob_item + ilow; + dest = np->ob_item; + for (i = 0; i < len; i++) { + PyObject *v = src[i]; + Py_INCREF(v); + dest[i] = v; + } + return (PyObject *)np; +} + +PyObject * +PyList_GetSlice(PyObject *a, int ilow, int ihigh) +{ + if (!PyList_Check(a)) { + PyErr_BadInternalCall(); + return NULL; + } + return list_slice((PyListObject *)a, ilow, ihigh); +} + +static PyObject * +list_concat(PyListObject *a, PyObject *bb) +{ + Py_ssize_t size; + Py_ssize_t i; + PyObject **src, **dest; + PyListObject *np; + if (!PyList_Check(bb)) { + PyErr_Format(PyExc_TypeError, + "can only concatenate list (not \"%.200s\") to list", + bb->ob_type->tp_name); + return NULL; + } +#define b ((PyListObject *)bb) + size = Py_SIZE(a) + Py_SIZE(b); + if (size < 0) + return PyErr_NoMemory(); + np = (PyListObject *) PyList_New(size); + if (np == NULL) { + return NULL; + } + src = a->ob_item; + dest = np->ob_item; + for (i = 0; i < Py_SIZE(a); i++) { + PyObject *v = src[i]; + Py_INCREF(v); + dest[i] = v; + } + src = b->ob_item; + dest = np->ob_item + Py_SIZE(a); + for (i = 0; i < Py_SIZE(b); i++) { + PyObject *v = src[i]; + Py_INCREF(v); + dest[i] = v; + } + return (PyObject *)np; +#undef b +} + +static PyObject * +list_repeat(PyListObject *a, int n) +{ + Py_ssize_t i, j; + Py_ssize_t size; + PyListObject *np; + PyObject **p, **items; + PyObject *elem; + if (n < 0) + n = 0; + if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) + return PyErr_NoMemory(); + size = Py_SIZE(a) * n; + if (size == 0) + return PyList_New(0); + np = (PyListObject *) PyList_New(size); + if (np == NULL) + return NULL; + + items = np->ob_item; + if (Py_SIZE(a) == 1) { + elem = a->ob_item[0]; + for (i = 0; i < n; i++) { + items[i] = elem; + Py_INCREF(elem); + } + return (PyObject *) np; + } + p = np->ob_item; + items = a->ob_item; + for (i = 0; i < n; i++) { + for (j = 0; j < Py_SIZE(a); j++) { + *p = items[j]; + Py_INCREF(*p); + p++; + } + } + return (PyObject *) np; +} + +static int +list_clear(PyListObject *a) +{ + Py_ssize_t i; + PyObject **item = a->ob_item; + if (item != NULL) { + /* Because XDECREF can recursively invoke operations on + this list, we make it empty first. */ + i = Py_SIZE(a); + Py_SIZE(a) = 0; + a->ob_item = NULL; + a->allocated = 0; + while (--i >= 0) { + Py_XDECREF(item[i]); + } + PyMem_FREE(item); + } + /* Never fails; the return value can be ignored. + Note that there is no guarantee that the list is actually empty + at this point, because XDECREF may have populated it again! */ + return 0; +} + +/* a[ilow:ihigh] = v if v != NULL. + * del a[ilow:ihigh] if v == NULL. + * + * Special speed gimmick: when v is NULL and ihigh - ilow <= 8, it's + * guaranteed the call cannot fail. + */ +static int +list_ass_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v) +{ + /* Because [X]DECREF can recursively invoke list operations on + this list, we must postpone all [X]DECREF activity until + after the list is back in its canonical shape. Therefore + we must allocate an additional array, 'recycle', into which + we temporarily copy the items that are deleted from the + list. :-( */ + PyObject *recycle_on_stack[8]; + PyObject **recycle = recycle_on_stack; /* will allocate more if needed */ + PyObject **item; + PyObject **vitem = NULL; + PyObject *v_as_SF = NULL; /* PySequence_Fast(v) */ + Py_ssize_t n; /* # of elements in replacement list */ + Py_ssize_t norig; /* # of elements in list getting replaced */ + Py_ssize_t d; /* Change in size */ + Py_ssize_t k; + size_t s; + int result = -1; /* guilty until proved innocent */ +#define b ((PyListObject *)v) + if (v == NULL) + n = 0; + else { + if (a == b) { + /* Special case "a[i:j] = a" -- copy b first */ + v = list_slice(b, 0, Py_SIZE(b)); + if (v == NULL) + return result; + result = list_ass_slice(a, ilow, ihigh, v); + Py_DECREF(v); + return result; + } + v_as_SF = PySequence_Fast(v, "can only assign an iterable"); + if(v_as_SF == NULL) + goto Error; + n = PySequence_Fast_GET_SIZE(v_as_SF); + vitem = PySequence_Fast_ITEMS(v_as_SF); + } + if (ilow < 0) + ilow = 0; + else if (ilow > Py_SIZE(a)) + ilow = Py_SIZE(a); + + if (ihigh < ilow) + ihigh = ilow; + else if (ihigh > Py_SIZE(a)) + ihigh = Py_SIZE(a); + + norig = ihigh - ilow; + assert(norig >= 0); + d = n - norig; + if (Py_SIZE(a) + d == 0) { + Py_XDECREF(v_as_SF); + return list_clear(a); + } + item = a->ob_item; + /* recycle the items that we are about to remove */ + s = norig * sizeof(PyObject *); + if (s > sizeof(recycle_on_stack)) { + recycle = (PyObject **)PyMem_MALLOC(s); + if (recycle == NULL) { + PyErr_NoMemory(); + goto Error; + } + } + memcpy(recycle, &item[ilow], s); + + if (d < 0) { /* Delete -d items */ + memmove(&item[ihigh+d], &item[ihigh], + (Py_SIZE(a) - ihigh)*sizeof(PyObject *)); + list_resize(a, Py_SIZE(a) + d); + item = a->ob_item; + } + else if (d > 0) { /* Insert d items */ + k = Py_SIZE(a); + if (list_resize(a, k+d) < 0) + goto Error; + item = a->ob_item; + memmove(&item[ihigh+d], &item[ihigh], + (k - ihigh)*sizeof(PyObject *)); + } + for (k = 0; k < n; k++, ilow++) { + PyObject *w = vitem[k]; + Py_XINCREF(w); + item[ilow] = w; + } + for (k = norig - 1; k >= 0; --k) + Py_XDECREF(recycle[k]); + result = 0; + Error: + if (recycle != recycle_on_stack) + PyMem_FREE(recycle); + Py_XDECREF(v_as_SF); + return result; +#undef b +} + +int +PyList_SetSlice(PyObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v) +{ + if (!PyList_Check(a)) { + PyErr_BadInternalCall(); + return -1; + } + return list_ass_slice((PyListObject *)a, ilow, ihigh, v); +} + +static PyObject * +list_inplace_repeat(PyListObject *self, Py_ssize_t n) +{ + PyObject **items; + Py_ssize_t size, i, j, p; + + + size = PyList_GET_SIZE(self); + if (size == 0 || n == 1) { + Py_INCREF(self); + return (PyObject *)self; + } + + if (n < 1) { + (void)list_clear(self); + Py_INCREF(self); + return (PyObject *)self; + } + + if (size > PY_SSIZE_T_MAX / n) { + return PyErr_NoMemory(); + } + + if (list_resize(self, size*n) == -1) + return NULL; + + p = size; + items = self->ob_item; + for (i = 1; i < n; i++) { /* Start counting at 1, not 0 */ + for (j = 0; j < size; j++) { + PyObject *o = items[j]; + Py_INCREF(o); + items[p++] = o; + } + } + Py_INCREF(self); + return (PyObject *)self; +} + +static int +list_ass_item(PyListObject *a, Py_ssize_t i, PyObject *v) +{ + PyObject *old_value; + if (i < 0 || i >= Py_SIZE(a)) { + PyErr_SetString(PyExc_IndexError, + "list assignment index out of range"); + return -1; + } + if (v == NULL) + return list_ass_slice(a, i, i+1, v); + Py_INCREF(v); + old_value = a->ob_item[i]; + a->ob_item[i] = v; + Py_DECREF(old_value); + return 0; +} + +static PyObject * +listinsert(PyListObject *self, PyObject *args) +{ + Py_ssize_t i; + PyObject *v; + if (!PyArg_ParseTuple(args, "nO:insert", &i, &v)) + return NULL; + if (ins1(self, i, v) == 0) + Py_RETURN_NONE; + return NULL; +} + +static PyObject * +listclear(PyListObject *self) +{ + list_clear(self); + Py_RETURN_NONE; +} + +static PyObject * +listcopy(PyListObject *self) +{ + return list_slice(self, 0, Py_SIZE(self)); +} + +static PyObject * +listappend(PyListObject *self, PyObject *v) +{ + if (app1(self, v) == 0) + Py_RETURN_NONE; + return NULL; +} + +static PyObject * +listextend(PyListObject *self, PyObject *b) +{ + PyObject *it; /* iter(v) */ + Py_ssize_t m; /* size of self */ + Py_ssize_t n; /* guess for size of b */ + Py_ssize_t mn; /* m + n */ + Py_ssize_t i; + PyObject *(*iternext)(PyObject *); + + /* Special cases: + 1) lists and tuples which can use PySequence_Fast ops + 2) extending self to self requires making a copy first + */ + if (PyList_CheckExact(b) || PyTuple_CheckExact(b) || (PyObject *)self == b) { + PyObject **src, **dest; + b = PySequence_Fast(b, "argument must be iterable"); + if (!b) + return NULL; + n = PySequence_Fast_GET_SIZE(b); + if (n == 0) { + /* short circuit when b is empty */ + Py_DECREF(b); + Py_RETURN_NONE; + } + m = Py_SIZE(self); + if (list_resize(self, m + n) == -1) { + Py_DECREF(b); + return NULL; + } + /* note that we may still have self == b here for the + * situation a.extend(a), but the following code works + * in that case too. Just make sure to resize self + * before calling PySequence_Fast_ITEMS. + */ + /* populate the end of self with b's items */ + src = PySequence_Fast_ITEMS(b); + dest = self->ob_item + m; + for (i = 0; i < n; i++) { + PyObject *o = src[i]; + Py_INCREF(o); + dest[i] = o; + } + Py_DECREF(b); + Py_RETURN_NONE; + } + + it = PyObject_GetIter(b); + if (it == NULL) + return NULL; + iternext = *it->ob_type->tp_iternext; + + /* Guess a result list size. */ + n = _PyObject_LengthHint(b, 8); + if (n == -1) { + Py_DECREF(it); + return NULL; + } + m = Py_SIZE(self); + mn = m + n; + if (mn >= m) { + /* Make room. */ + if (list_resize(self, mn) == -1) + goto error; + /* Make the list sane again. */ + Py_SIZE(self) = m; + } + /* Else m + n overflowed; on the chance that n lied, and there really + * is enough room, ignore it. If n was telling the truth, we'll + * eventually run out of memory during the loop. + */ + + /* Run iterator to exhaustion. */ + for (;;) { + PyObject *item = iternext(it); + if (item == NULL) { + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_StopIteration)) + PyErr_Clear(); + else + goto error; + } + break; + } + if (Py_SIZE(self) < self->allocated) { + /* steals ref */ + PyList_SET_ITEM(self, Py_SIZE(self), item); + ++Py_SIZE(self); + } + else { + int status = app1(self, item); + Py_DECREF(item); /* append creates a new ref */ + if (status < 0) + goto error; + } + } + + /* Cut back result list if initial guess was too large. */ + if (Py_SIZE(self) < self->allocated) + list_resize(self, Py_SIZE(self)); /* shrinking can't fail */ + + Py_DECREF(it); + Py_RETURN_NONE; + + error: + Py_DECREF(it); + return NULL; +} + +PyObject * +_PyList_Extend(PyListObject *self, PyObject *b) +{ + return listextend(self, b); +} + +static PyObject * +list_inplace_concat(PyListObject *self, PyObject *other) +{ + PyObject *result; + + result = listextend(self, other); + if (result == NULL) + return result; + Py_DECREF(result); + Py_INCREF(self); + return (PyObject *)self; +} + +static PyObject * +listpop(PyListObject *self, PyObject *args) +{ + Py_ssize_t i = -1; + PyObject *v; + int status; + + if (!PyArg_ParseTuple(args, "|n:pop", &i)) + return NULL; + + if (Py_SIZE(self) == 0) { + /* Special-case most common failure cause */ + PyErr_SetString(PyExc_IndexError, "pop from empty list"); + return NULL; + } + if (i < 0) + i += Py_SIZE(self); + if (i < 0 || i >= Py_SIZE(self)) { + PyErr_SetString(PyExc_IndexError, "pop index out of range"); + return NULL; + } + v = self->ob_item[i]; + if (i == Py_SIZE(self) - 1) { + status = list_resize(self, Py_SIZE(self) - 1); + assert(status >= 0); + return v; /* and v now owns the reference the list had */ + } + Py_INCREF(v); + status = list_ass_slice(self, i, i+1, (PyObject *)NULL); + assert(status >= 0); + /* Use status, so that in a release build compilers don't + * complain about the unused name. + */ + (void) status; + + return v; +} + +/* Reverse a slice of a list in place, from lo up to (exclusive) hi. */ +static void +reverse_slice(PyObject **lo, PyObject **hi) +{ + assert(lo && hi); + + --hi; + while (lo < hi) { + PyObject *t = *lo; + *lo = *hi; + *hi = t; + ++lo; + --hi; + } +} + +/* Lots of code for an adaptive, stable, natural mergesort. There are many + * pieces to this algorithm; read listsort.txt for overviews and details. + */ + +/* A sortslice contains a pointer to an array of keys and a pointer to + * an array of corresponding values. In other words, keys[i] + * corresponds with values[i]. If values == NULL, then the keys are + * also the values. + * + * Several convenience routines are provided here, so that keys and + * values are always moved in sync. + */ + +typedef struct { + PyObject **keys; + PyObject **values; +} sortslice; + +Py_LOCAL_INLINE(void) +sortslice_copy(sortslice *s1, int i, sortslice *s2, int j) +{ + s1->keys[i] = s2->keys[j]; + if (s1->values != NULL) + s1->values[i] = s2->values[j]; +} + +Py_LOCAL_INLINE(void) +sortslice_copy_incr(sortslice *dst, sortslice *src) +{ + *dst->keys++ = *src->keys++; + if (dst->values != NULL) + *dst->values++ = *src->values++; +} + +Py_LOCAL_INLINE(void) +sortslice_copy_decr(sortslice *dst, sortslice *src) +{ + *dst->keys-- = *src->keys--; + if (dst->values != NULL) + *dst->values-- = *src->values--; +} + + +Py_LOCAL_INLINE(void) +sortslice_memcpy(sortslice *s1, int i, sortslice *s2, int j, int n) +{ + memcpy(&s1->keys[i], &s2->keys[j], sizeof(PyObject *) * n); + if (s1->values != NULL) + memcpy(&s1->values[i], &s2->values[j], sizeof(PyObject *) * n); +} + +Py_LOCAL_INLINE(void) +sortslice_memmove(sortslice *s1, Py_ssize_t i, sortslice *s2, Py_ssize_t j, + Py_ssize_t n) +{ + memmove(&s1->keys[i], &s2->keys[j], sizeof(PyObject *) * n); + if (s1->values != NULL) + memmove(&s1->values[i], &s2->values[j], sizeof(PyObject *) * n); +} + +Py_LOCAL_INLINE(void) +sortslice_advance(sortslice *slice, Py_ssize_t n) +{ + slice->keys += n; + if (slice->values != NULL) + slice->values += n; +} + +/* Comparison function: PyObject_RichCompareBool with Py_LT. + * Returns -1 on error, 1 if x < y, 0 if x >= y. + */ + +#define ISLT(X, Y) (PyObject_RichCompareBool(X, Y, Py_LT)) + +/* Compare X to Y via "<". Goto "fail" if the comparison raises an + error. Else "k" is set to true iff X. X and Y are PyObject*s. +*/ +#define IFLT(X, Y) if ((k = ISLT(X, Y)) < 0) goto fail; \ + if (k) + +/* binarysort is the best method for sorting small arrays: it does + few compares, but can do data movement quadratic in the number of + elements. + [lo, hi) is a contiguous slice of a list, and is sorted via + binary insertion. This sort is stable. + On entry, must have lo <= start <= hi, and that [lo, start) is already + sorted (pass start == lo if you don't know!). + If islt() complains return -1, else 0. + Even in case of error, the output slice will be some permutation of + the input (nothing is lost or duplicated). +*/ +static int +binarysort(sortslice lo, PyObject **hi, PyObject **start) +{ + register Py_ssize_t k; + register PyObject **l, **p, **r; + register PyObject *pivot; + + assert(lo.keys <= start && start <= hi); + /* assert [lo, start) is sorted */ + if (lo.keys == start) + ++start; + for (; start < hi; ++start) { + /* set l to where *start belongs */ + l = lo.keys; + r = start; + pivot = *r; + /* Invariants: + * pivot >= all in [lo, l). + * pivot < all in [r, start). + * The second is vacuously true at the start. + */ + assert(l < r); + do { + p = l + ((r - l) >> 1); + IFLT(pivot, *p) + r = p; + else + l = p+1; + } while (l < r); + assert(l == r); + /* The invariants still hold, so pivot >= all in [lo, l) and + pivot < all in [l, start), so pivot belongs at l. Note + that if there are elements equal to pivot, l points to the + first slot after them -- that's why this sort is stable. + Slide over to make room. + Caution: using memmove is much slower under MSVC 5; + we're not usually moving many slots. */ + for (p = start; p > l; --p) + *p = *(p-1); + *l = pivot; + if (lo.values != NULL) { + Py_ssize_t offset = lo.values - lo.keys; + p = start + offset; + pivot = *p; + l += offset; + for (p = start + offset; p > l; --p) + *p = *(p-1); + *l = pivot; + } + } + return 0; + + fail: + return -1; +} + +/* +Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi +is required on entry. "A run" is the longest ascending sequence, with + + lo[0] <= lo[1] <= lo[2] <= ... + +or the longest descending sequence, with + + lo[0] > lo[1] > lo[2] > ... + +Boolean *descending is set to 0 in the former case, or to 1 in the latter. +For its intended use in a stable mergesort, the strictness of the defn of +"descending" is needed so that the caller can safely reverse a descending +sequence without violating stability (strict > ensures there are no equal +elements to get out of order). + +Returns -1 in case of error. +*/ +static Py_ssize_t +count_run(PyObject **lo, PyObject **hi, int *descending) +{ + Py_ssize_t k; + Py_ssize_t n; + + assert(lo < hi); + *descending = 0; + ++lo; + if (lo == hi) + return 1; + + n = 2; + IFLT(*lo, *(lo-1)) { + *descending = 1; + for (lo = lo+1; lo < hi; ++lo, ++n) { + IFLT(*lo, *(lo-1)) + ; + else + break; + } + } + else { + for (lo = lo+1; lo < hi; ++lo, ++n) { + IFLT(*lo, *(lo-1)) + break; + } + } + + return n; +fail: + return -1; +} + +/* +Locate the proper position of key in a sorted vector; if the vector contains +an element equal to key, return the position immediately to the left of +the leftmost equal element. [gallop_right() does the same except returns +the position to the right of the rightmost equal element (if any).] + +"a" is a sorted vector with n elements, starting at a[0]. n must be > 0. + +"hint" is an index at which to begin the search, 0 <= hint < n. The closer +hint is to the final result, the faster this runs. + +The return value is the int k in 0..n such that + + a[k-1] < key <= a[k] + +pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW, +key belongs at index k; or, IOW, the first k elements of a should precede +key, and the last n-k should follow key. + +Returns -1 on error. See listsort.txt for info on the method. +*/ +static Py_ssize_t +gallop_left(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint) +{ + Py_ssize_t ofs; + Py_ssize_t lastofs; + Py_ssize_t k; + + assert(key && a && n > 0 && hint >= 0 && hint < n); + + a += hint; + lastofs = 0; + ofs = 1; + IFLT(*a, key) { + /* a[hint] < key -- gallop right, until + * a[hint + lastofs] < key <= a[hint + ofs] + */ + const Py_ssize_t maxofs = n - hint; /* &a[n-1] is highest */ + while (ofs < maxofs) { + IFLT(a[ofs], key) { + lastofs = ofs; + ofs = (ofs << 1) + 1; + if (ofs <= 0) /* int overflow */ + ofs = maxofs; + } + else /* key <= a[hint + ofs] */ + break; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to offsets relative to &a[0]. */ + lastofs += hint; + ofs += hint; + } + else { + /* key <= a[hint] -- gallop left, until + * a[hint - ofs] < key <= a[hint - lastofs] + */ + const Py_ssize_t maxofs = hint + 1; /* &a[0] is lowest */ + while (ofs < maxofs) { + IFLT(*(a-ofs), key) + break; + /* key <= a[hint - ofs] */ + lastofs = ofs; + ofs = (ofs << 1) + 1; + if (ofs <= 0) /* int overflow */ + ofs = maxofs; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to positive offsets relative to &a[0]. */ + k = lastofs; + lastofs = hint - ofs; + ofs = hint - k; + } + a -= hint; + + assert(-1 <= lastofs && lastofs < ofs && ofs <= n); + /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the + * right of lastofs but no farther right than ofs. Do a binary + * search, with invariant a[lastofs-1] < key <= a[ofs]. + */ + ++lastofs; + while (lastofs < ofs) { + Py_ssize_t m = lastofs + ((ofs - lastofs) >> 1); + + IFLT(a[m], key) + lastofs = m+1; /* a[m] < key */ + else + ofs = m; /* key <= a[m] */ + } + assert(lastofs == ofs); /* so a[ofs-1] < key <= a[ofs] */ + return ofs; + +fail: + return -1; +} + +/* +Exactly like gallop_left(), except that if key already exists in a[0:n], +finds the position immediately to the right of the rightmost equal value. + +The return value is the int k in 0..n such that + + a[k-1] <= key < a[k] + +or -1 if error. + +The code duplication is massive, but this is enough different given that +we're sticking to "<" comparisons that it's much harder to follow if +written as one routine with yet another "left or right?" flag. +*/ +static Py_ssize_t +gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint) +{ + Py_ssize_t ofs; + Py_ssize_t lastofs; + Py_ssize_t k; + + assert(key && a && n > 0 && hint >= 0 && hint < n); + + a += hint; + lastofs = 0; + ofs = 1; + IFLT(key, *a) { + /* key < a[hint] -- gallop left, until + * a[hint - ofs] <= key < a[hint - lastofs] + */ + const Py_ssize_t maxofs = hint + 1; /* &a[0] is lowest */ + while (ofs < maxofs) { + IFLT(key, *(a-ofs)) { + lastofs = ofs; + ofs = (ofs << 1) + 1; + if (ofs <= 0) /* int overflow */ + ofs = maxofs; + } + else /* a[hint - ofs] <= key */ + break; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to positive offsets relative to &a[0]. */ + k = lastofs; + lastofs = hint - ofs; + ofs = hint - k; + } + else { + /* a[hint] <= key -- gallop right, until + * a[hint + lastofs] <= key < a[hint + ofs] + */ + const Py_ssize_t maxofs = n - hint; /* &a[n-1] is highest */ + while (ofs < maxofs) { + IFLT(key, a[ofs]) + break; + /* a[hint + ofs] <= key */ + lastofs = ofs; + ofs = (ofs << 1) + 1; + if (ofs <= 0) /* int overflow */ + ofs = maxofs; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to offsets relative to &a[0]. */ + lastofs += hint; + ofs += hint; + } + a -= hint; + + assert(-1 <= lastofs && lastofs < ofs && ofs <= n); + /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the + * right of lastofs but no farther right than ofs. Do a binary + * search, with invariant a[lastofs-1] <= key < a[ofs]. + */ + ++lastofs; + while (lastofs < ofs) { + Py_ssize_t m = lastofs + ((ofs - lastofs) >> 1); + + IFLT(key, a[m]) + ofs = m; /* key < a[m] */ + else + lastofs = m+1; /* a[m] <= key */ + } + assert(lastofs == ofs); /* so a[ofs-1] <= key < a[ofs] */ + return ofs; + +fail: + return -1; +} + +/* The maximum number of entries in a MergeState's pending-runs stack. + * This is enough to sort arrays of size up to about + * 32 * phi ** MAX_MERGE_PENDING + * where phi ~= 1.618. 85 is ridiculouslylarge enough, good for an array + * with 2**64 elements. + */ +#define MAX_MERGE_PENDING 85 + +/* When we get into galloping mode, we stay there until both runs win less + * often than MIN_GALLOP consecutive times. See listsort.txt for more info. + */ +#define MIN_GALLOP 7 + +/* Avoid malloc for small temp arrays. */ +#define MERGESTATE_TEMP_SIZE 256 + +/* One MergeState exists on the stack per invocation of mergesort. It's just + * a convenient way to pass state around among the helper functions. + */ +struct s_slice { + sortslice base; + Py_ssize_t len; +}; + +typedef struct s_MergeState { + /* This controls when we get *into* galloping mode. It's initialized + * to MIN_GALLOP. merge_lo and merge_hi tend to nudge it higher for + * random data, and lower for highly structured data. + */ + Py_ssize_t min_gallop; + + /* 'a' is temp storage to help with merges. It contains room for + * alloced entries. + */ + sortslice a; /* may point to temparray below */ + Py_ssize_t alloced; + + /* A stack of n pending runs yet to be merged. Run #i starts at + * address base[i] and extends for len[i] elements. It's always + * true (so long as the indices are in bounds) that + * + * pending[i].base + pending[i].len == pending[i+1].base + * + * so we could cut the storage for this, but it's a minor amount, + * and keeping all the info explicit simplifies the code. + */ + int n; + struct s_slice pending[MAX_MERGE_PENDING]; + + /* 'a' points to this when possible, rather than muck with malloc. */ + PyObject *temparray[MERGESTATE_TEMP_SIZE]; +} MergeState; + +/* Conceptually a MergeState's constructor. */ +static void +merge_init(MergeState *ms, Py_ssize_t list_size, int has_keyfunc) +{ + assert(ms != NULL); + if (has_keyfunc) { + /* The temporary space for merging will need at most half the list + * size rounded up. Use the minimum possible space so we can use the + * rest of temparray for other things. In particular, if there is + * enough extra space, listsort() will use it to store the keys. + */ + ms->alloced = (list_size + 1) / 2; + + /* ms->alloced describes how many keys will be stored at + ms->temparray, but we also need to store the values. Hence, + ms->alloced is capped at half of MERGESTATE_TEMP_SIZE. */ + if (MERGESTATE_TEMP_SIZE / 2 < ms->alloced) + ms->alloced = MERGESTATE_TEMP_SIZE / 2; + ms->a.values = &ms->temparray[ms->alloced]; + } + else { + ms->alloced = MERGESTATE_TEMP_SIZE; + ms->a.values = NULL; + } + ms->a.keys = ms->temparray; + ms->n = 0; + ms->min_gallop = MIN_GALLOP; +} + +/* Free all the temp memory owned by the MergeState. This must be called + * when you're done with a MergeState, and may be called before then if + * you want to free the temp memory early. + */ +static void +merge_freemem(MergeState *ms) +{ + assert(ms != NULL); + if (ms->a.keys != ms->temparray) + PyMem_Free(ms->a.keys); +} + +/* Ensure enough temp memory for 'need' array slots is available. + * Returns 0 on success and -1 if the memory can't be gotten. + */ +static int +merge_getmem(MergeState *ms, Py_ssize_t need) +{ + int multiplier; + + assert(ms != NULL); + if (need <= ms->alloced) + return 0; + + multiplier = ms->a.values != NULL ? 2 : 1; + + /* Don't realloc! That can cost cycles to copy the old data, but + * we don't care what's in the block. + */ + merge_freemem(ms); + if ((size_t)need > PY_SSIZE_T_MAX / sizeof(PyObject*) / multiplier) { + PyErr_NoMemory(); + return -1; + } + ms->a.keys = (PyObject**)PyMem_Malloc(multiplier * need + * sizeof(PyObject *)); + if (ms->a.keys != NULL) { + ms->alloced = need; + if (ms->a.values != NULL) + ms->a.values = &ms->a.keys[need]; + return 0; + } + PyErr_NoMemory(); + return -1; +} +#define MERGE_GETMEM(MS, NEED) ((NEED) <= (MS)->alloced ? 0 : \ + merge_getmem(MS, NEED)) + +/* Merge the na elements starting at ssa with the nb elements starting at + * ssb.keys = ssa.keys + na in a stable way, in-place. na and nb must be > 0. + * Must also have that ssa.keys[na-1] belongs at the end of the merge, and + * should have na <= nb. See listsort.txt for more info. Return 0 if + * successful, -1 if error. + */ +static Py_ssize_t +merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, + sortslice ssb, Py_ssize_t nb) +{ + Py_ssize_t k; + sortslice dest; + int result = -1; /* guilty until proved innocent */ + Py_ssize_t min_gallop; + + assert(ms && ssa.keys && ssb.keys && na > 0 && nb > 0); + assert(ssa.keys + na == ssb.keys); + if (MERGE_GETMEM(ms, na) < 0) + return -1; + sortslice_memcpy(&ms->a, 0, &ssa, 0, na); + dest = ssa; + ssa = ms->a; + + sortslice_copy_incr(&dest, &ssb); + --nb; + if (nb == 0) + goto Succeed; + if (na == 1) + goto CopyB; + + min_gallop = ms->min_gallop; + for (;;) { + Py_ssize_t acount = 0; /* # of times A won in a row */ + Py_ssize_t bcount = 0; /* # of times B won in a row */ + + /* Do the straightforward thing until (if ever) one run + * appears to win consistently. + */ + for (;;) { + assert(na > 1 && nb > 0); + k = ISLT(ssb.keys[0], ssa.keys[0]); + if (k) { + if (k < 0) + goto Fail; + sortslice_copy_incr(&dest, &ssb); + ++bcount; + acount = 0; + --nb; + if (nb == 0) + goto Succeed; + if (bcount >= min_gallop) + break; + } + else { + sortslice_copy_incr(&dest, &ssa); + ++acount; + bcount = 0; + --na; + if (na == 1) + goto CopyB; + if (acount >= min_gallop) + break; + } + } + + /* One run is winning so consistently that galloping may + * be a huge win. So try that, and continue galloping until + * (if ever) neither run appears to be winning consistently + * anymore. + */ + ++min_gallop; + do { + assert(na > 1 && nb > 0); + min_gallop -= min_gallop > 1; + ms->min_gallop = min_gallop; + k = gallop_right(ssb.keys[0], ssa.keys, na, 0); + acount = k; + if (k) { + if (k < 0) + goto Fail; + sortslice_memcpy(&dest, 0, &ssa, 0, k); + sortslice_advance(&dest, k); + sortslice_advance(&ssa, k); + na -= k; + if (na == 1) + goto CopyB; + /* na==0 is impossible now if the comparison + * function is consistent, but we can't assume + * that it is. + */ + if (na == 0) + goto Succeed; + } + sortslice_copy_incr(&dest, &ssb); + --nb; + if (nb == 0) + goto Succeed; + + k = gallop_left(ssa.keys[0], ssb.keys, nb, 0); + bcount = k; + if (k) { + if (k < 0) + goto Fail; + sortslice_memmove(&dest, 0, &ssb, 0, k); + sortslice_advance(&dest, k); + sortslice_advance(&ssb, k); + nb -= k; + if (nb == 0) + goto Succeed; + } + sortslice_copy_incr(&dest, &ssa); + --na; + if (na == 1) + goto CopyB; + } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); + ++min_gallop; /* penalize it for leaving galloping mode */ + ms->min_gallop = min_gallop; + } +Succeed: + result = 0; +Fail: + if (na) + sortslice_memcpy(&dest, 0, &ssa, 0, na); + return result; +CopyB: + assert(na == 1 && nb > 0); + /* The last element of ssa belongs at the end of the merge. */ + sortslice_memmove(&dest, 0, &ssb, 0, nb); + sortslice_copy(&dest, nb, &ssa, 0); + return 0; +} + +/* Merge the na elements starting at pa with the nb elements starting at + * ssb.keys = ssa.keys + na in a stable way, in-place. na and nb must be > 0. + * Must also have that ssa.keys[na-1] belongs at the end of the merge, and + * should have na >= nb. See listsort.txt for more info. Return 0 if + * successful, -1 if error. + */ +static Py_ssize_t +merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, + sortslice ssb, Py_ssize_t nb) +{ + Py_ssize_t k; + sortslice dest, basea, baseb; + int result = -1; /* guilty until proved innocent */ + Py_ssize_t min_gallop; + + assert(ms && ssa.keys && ssb.keys && na > 0 && nb > 0); + assert(ssa.keys + na == ssb.keys); + if (MERGE_GETMEM(ms, nb) < 0) + return -1; + dest = ssb; + sortslice_advance(&dest, nb-1); + sortslice_memcpy(&ms->a, 0, &ssb, 0, nb); + basea = ssa; + baseb = ms->a; + ssb.keys = ms->a.keys + nb - 1; + if (ssb.values != NULL) + ssb.values = ms->a.values + nb - 1; + sortslice_advance(&ssa, na - 1); + + sortslice_copy_decr(&dest, &ssa); + --na; + if (na == 0) + goto Succeed; + if (nb == 1) + goto CopyA; + + min_gallop = ms->min_gallop; + for (;;) { + Py_ssize_t acount = 0; /* # of times A won in a row */ + Py_ssize_t bcount = 0; /* # of times B won in a row */ + + /* Do the straightforward thing until (if ever) one run + * appears to win consistently. + */ + for (;;) { + assert(na > 0 && nb > 1); + k = ISLT(ssb.keys[0], ssa.keys[0]); + if (k) { + if (k < 0) + goto Fail; + sortslice_copy_decr(&dest, &ssa); + ++acount; + bcount = 0; + --na; + if (na == 0) + goto Succeed; + if (acount >= min_gallop) + break; + } + else { + sortslice_copy_decr(&dest, &ssb); + ++bcount; + acount = 0; + --nb; + if (nb == 1) + goto CopyA; + if (bcount >= min_gallop) + break; + } + } + + /* One run is winning so consistently that galloping may + * be a huge win. So try that, and continue galloping until + * (if ever) neither run appears to be winning consistently + * anymore. + */ + ++min_gallop; + do { + assert(na > 0 && nb > 1); + min_gallop -= min_gallop > 1; + ms->min_gallop = min_gallop; + k = gallop_right(ssb.keys[0], basea.keys, na, na-1); + if (k < 0) + goto Fail; + k = na - k; + acount = k; + if (k) { + sortslice_advance(&dest, -k); + sortslice_advance(&ssa, -k); + sortslice_memmove(&dest, 1, &ssa, 1, k); + na -= k; + if (na == 0) + goto Succeed; + } + sortslice_copy_decr(&dest, &ssb); + --nb; + if (nb == 1) + goto CopyA; + + k = gallop_left(ssa.keys[0], baseb.keys, nb, nb-1); + if (k < 0) + goto Fail; + k = nb - k; + bcount = k; + if (k) { + sortslice_advance(&dest, -k); + sortslice_advance(&ssb, -k); + sortslice_memcpy(&dest, 1, &ssb, 1, k); + nb -= k; + if (nb == 1) + goto CopyA; + /* nb==0 is impossible now if the comparison + * function is consistent, but we can't assume + * that it is. + */ + if (nb == 0) + goto Succeed; + } + sortslice_copy_decr(&dest, &ssa); + --na; + if (na == 0) + goto Succeed; + } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); + ++min_gallop; /* penalize it for leaving galloping mode */ + ms->min_gallop = min_gallop; + } +Succeed: + result = 0; +Fail: + if (nb) + sortslice_memcpy(&dest, -(nb-1), &baseb, 0, nb); + return result; +CopyA: + assert(nb == 1 && na > 0); + /* The first element of ssb belongs at the front of the merge. */ + sortslice_memmove(&dest, 1-na, &ssa, 1-na, na); + sortslice_advance(&dest, -na); + sortslice_advance(&ssa, -na); + sortslice_copy(&dest, 0, &ssb, 0); + return 0; +} + +/* Merge the two runs at stack indices i and i+1. + * Returns 0 on success, -1 on error. + */ +static Py_ssize_t +merge_at(MergeState *ms, Py_ssize_t i) +{ + sortslice ssa, ssb; + Py_ssize_t na, nb; + Py_ssize_t k; + + assert(ms != NULL); + assert(ms->n >= 2); + assert(i >= 0); + assert(i == ms->n - 2 || i == ms->n - 3); + + ssa = ms->pending[i].base; + na = ms->pending[i].len; + ssb = ms->pending[i+1].base; + nb = ms->pending[i+1].len; + assert(na > 0 && nb > 0); + assert(ssa.keys + na == ssb.keys); + + /* Record the length of the combined runs; if i is the 3rd-last + * run now, also slide over the last run (which isn't involved + * in this merge). The current run i+1 goes away in any case. + */ + ms->pending[i].len = na + nb; + if (i == ms->n - 3) + ms->pending[i+1] = ms->pending[i+2]; + --ms->n; + + /* Where does b start in a? Elements in a before that can be + * ignored (already in place). + */ + k = gallop_right(*ssb.keys, ssa.keys, na, 0); + if (k < 0) + return -1; + sortslice_advance(&ssa, k); + na -= k; + if (na == 0) + return 0; + + /* Where does a end in b? Elements in b after that can be + * ignored (already in place). + */ + nb = gallop_left(ssa.keys[na-1], ssb.keys, nb, nb-1); + if (nb <= 0) + return nb; + + /* Merge what remains of the runs, using a temp array with + * min(na, nb) elements. + */ + if (na <= nb) + return merge_lo(ms, ssa, na, ssb, nb); + else + return merge_hi(ms, ssa, na, ssb, nb); +} + +/* Examine the stack of runs waiting to be merged, merging adjacent runs + * until the stack invariants are re-established: + * + * 1. len[-3] > len[-2] + len[-1] + * 2. len[-2] > len[-1] + * + * See listsort.txt for more info. + * + * Returns 0 on success, -1 on error. + */ +static int +merge_collapse(MergeState *ms) +{ + struct s_slice *p = ms->pending; + + assert(ms); + while (ms->n > 1) { + Py_ssize_t n = ms->n - 2; + if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) { + if (p[n-1].len < p[n+1].len) + --n; + if (merge_at(ms, n) < 0) + return -1; + } + else if (p[n].len <= p[n+1].len) { + if (merge_at(ms, n) < 0) + return -1; + } + else + break; + } + return 0; +} + +/* Regardless of invariants, merge all runs on the stack until only one + * remains. This is used at the end of the mergesort. + * + * Returns 0 on success, -1 on error. + */ +static int +merge_force_collapse(MergeState *ms) +{ + struct s_slice *p = ms->pending; + + assert(ms); + while (ms->n > 1) { + Py_ssize_t n = ms->n - 2; + if (n > 0 && p[n-1].len < p[n+1].len) + --n; + if (merge_at(ms, n) < 0) + return -1; + } + return 0; +} + +/* Compute a good value for the minimum run length; natural runs shorter + * than this are boosted artificially via binary insertion. + * + * If n < 64, return n (it's too small to bother with fancy stuff). + * Else if n is an exact power of 2, return 32. + * Else return an int k, 32 <= k <= 64, such that n/k is close to, but + * strictly less than, an exact power of 2. + * + * See listsort.txt for more info. + */ +static Py_ssize_t +merge_compute_minrun(Py_ssize_t n) +{ + Py_ssize_t r = 0; /* becomes 1 if any 1 bits are shifted off */ + + assert(n >= 0); + while (n >= 64) { + r |= n & 1; + n >>= 1; + } + return n + r; +} + +static void +reverse_sortslice(sortslice *s, Py_ssize_t n) +{ + reverse_slice(s->keys, &s->keys[n]); + if (s->values != NULL) + reverse_slice(s->values, &s->values[n]); +} + +/* An adaptive, stable, natural mergesort. See listsort.txt. + * Returns Py_None on success, NULL on error. Even in case of error, the + * list will be some permutation of its input state (nothing is lost or + * duplicated). + */ +static PyObject * +listsort(PyListObject *self, PyObject *args, PyObject *kwds) +{ + MergeState ms; + Py_ssize_t nremaining; + Py_ssize_t minrun; + sortslice lo; + Py_ssize_t saved_ob_size, saved_allocated; + PyObject **saved_ob_item; + PyObject **final_ob_item; + PyObject *result = NULL; /* guilty until proved innocent */ + int reverse = 0; + PyObject *keyfunc = NULL; + Py_ssize_t i; + static char *kwlist[] = {"key", "reverse", 0}; + PyObject **keys; + + assert(self != NULL); + assert (PyList_Check(self)); + if (args != NULL) { + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oi:sort", + kwlist, &keyfunc, &reverse)) + return NULL; + if (Py_SIZE(args) > 0) { + PyErr_SetString(PyExc_TypeError, + "must use keyword argument for key function"); + return NULL; + } + } + if (keyfunc == Py_None) + keyfunc = NULL; + + /* The list is temporarily made empty, so that mutations performed + * by comparison functions can't affect the slice of memory we're + * sorting (allowing mutations during sorting is a core-dump + * factory, since ob_item may change). + */ + saved_ob_size = Py_SIZE(self); + saved_ob_item = self->ob_item; + saved_allocated = self->allocated; + Py_SIZE(self) = 0; + self->ob_item = NULL; + self->allocated = -1; /* any operation will reset it to >= 0 */ + + if (keyfunc == NULL) { + keys = NULL; + lo.keys = saved_ob_item; + lo.values = NULL; + } + else { + if (saved_ob_size < MERGESTATE_TEMP_SIZE/2) + /* Leverage stack space we allocated but won't otherwise use */ + keys = &ms.temparray[saved_ob_size+1]; + else { + keys = PyMem_MALLOC(sizeof(PyObject *) * saved_ob_size); + if (keys == NULL) + return NULL; + } + + for (i = 0; i < saved_ob_size ; i++) { + keys[i] = PyObject_CallFunctionObjArgs(keyfunc, saved_ob_item[i], + NULL); + if (keys[i] == NULL) { + for (i=i-1 ; i>=0 ; i--) + Py_DECREF(keys[i]); + if (keys != &ms.temparray[saved_ob_size+1]) + PyMem_FREE(keys); + goto keyfunc_fail; + } + } + + lo.keys = keys; + lo.values = saved_ob_item; + } + + merge_init(&ms, saved_ob_size, keys != NULL); + + nremaining = saved_ob_size; + if (nremaining < 2) + goto succeed; + + /* Reverse sort stability achieved by initially reversing the list, + applying a stable forward sort, then reversing the final result. */ + if (reverse) { + if (keys != NULL) + reverse_slice(&keys[0], &keys[saved_ob_size]); + reverse_slice(&saved_ob_item[0], &saved_ob_item[saved_ob_size]); + } + + /* March over the array once, left to right, finding natural runs, + * and extending short natural runs to minrun elements. + */ + minrun = merge_compute_minrun(nremaining); + do { + int descending; + Py_ssize_t n; + + /* Identify next run. */ + n = count_run(lo.keys, lo.keys + nremaining, &descending); + if (n < 0) + goto fail; + if (descending) + reverse_sortslice(&lo, n); + /* If short, extend to min(minrun, nremaining). */ + if (n < minrun) { + const Py_ssize_t force = nremaining <= minrun ? + nremaining : minrun; + if (binarysort(lo, lo.keys + force, lo.keys + n) < 0) + goto fail; + n = force; + } + /* Push run onto pending-runs stack, and maybe merge. */ + assert(ms.n < MAX_MERGE_PENDING); + ms.pending[ms.n].base = lo; + ms.pending[ms.n].len = n; + ++ms.n; + if (merge_collapse(&ms) < 0) + goto fail; + /* Advance to find next run. */ + sortslice_advance(&lo, n); + nremaining -= n; + } while (nremaining); + + if (merge_force_collapse(&ms) < 0) + goto fail; + assert(ms.n == 1); + assert(keys == NULL + ? ms.pending[0].base.keys == saved_ob_item + : ms.pending[0].base.keys == &keys[0]); + assert(ms.pending[0].len == saved_ob_size); + lo = ms.pending[0].base; + +succeed: + result = Py_None; +fail: + if (keys != NULL) { + for (i = 0; i < saved_ob_size; i++) + Py_DECREF(keys[i]); + if (keys != &ms.temparray[saved_ob_size+1]) + PyMem_FREE(keys); + } + + if (self->allocated != -1 && result != NULL) { + /* The user mucked with the list during the sort, + * and we don't already have another error to report. + */ + PyErr_SetString(PyExc_ValueError, "list modified during sort"); + result = NULL; + } + + if (reverse && saved_ob_size > 1) + reverse_slice(saved_ob_item, saved_ob_item + saved_ob_size); + + merge_freemem(&ms); + +keyfunc_fail: + final_ob_item = self->ob_item; + i = Py_SIZE(self); + Py_SIZE(self) = saved_ob_size; + self->ob_item = saved_ob_item; + self->allocated = saved_allocated; + if (final_ob_item != NULL) { + /* we cannot use list_clear() for this because it does not + guarantee that the list is really empty when it returns */ + while (--i >= 0) { + Py_XDECREF(final_ob_item[i]); + } + PyMem_FREE(final_ob_item); + } + Py_XINCREF(result); + return result; +} +#undef IFLT +#undef ISLT + +int +PyList_Sort(PyObject *v) +{ + if (v == NULL || !PyList_Check(v)) { + PyErr_BadInternalCall(); + return -1; + } + v = listsort((PyListObject *)v, (PyObject *)NULL, (PyObject *)NULL); + if (v == NULL) + return -1; + Py_DECREF(v); + return 0; +} + +static PyObject * +listreverse(PyListObject *self) +{ + if (Py_SIZE(self) > 1) + reverse_slice(self->ob_item, self->ob_item + Py_SIZE(self)); + Py_RETURN_NONE; +} + +int +PyList_Reverse(PyObject *v) +{ + PyListObject *self = (PyListObject *)v; + + if (v == NULL || !PyList_Check(v)) { + PyErr_BadInternalCall(); + return -1; + } + if (Py_SIZE(self) > 1) + reverse_slice(self->ob_item, self->ob_item + Py_SIZE(self)); + return 0; +} + +PyObject * +PyList_AsTuple(PyObject *v) +{ + PyObject *w; + PyObject **p, **q; + Py_ssize_t n; + if (v == NULL || !PyList_Check(v)) { + PyErr_BadInternalCall(); + return NULL; + } + n = Py_SIZE(v); + w = PyTuple_New(n); + if (w == NULL) + return NULL; + p = ((PyTupleObject *)w)->ob_item; + q = ((PyListObject *)v)->ob_item; + while (--n >= 0) { + Py_INCREF(*q); + *p = *q; + p++; + q++; + } + return w; +} + +static PyObject * +listindex(PyListObject *self, PyObject *args) +{ + Py_ssize_t i, start=0, stop=Py_SIZE(self); + PyObject *v; + + if (!PyArg_ParseTuple(args, "O|O&O&:index", &v, + _PyEval_SliceIndex, &start, + _PyEval_SliceIndex, &stop)) + return NULL; + if (start < 0) { + start += Py_SIZE(self); + if (start < 0) + start = 0; + } + if (stop < 0) { + stop += Py_SIZE(self); + if (stop < 0) + stop = 0; + } + for (i = start; i < stop && i < Py_SIZE(self); i++) { + int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ); + if (cmp > 0) + return PyLong_FromSsize_t(i); + else if (cmp < 0) + return NULL; + } + PyErr_Format(PyExc_ValueError, "%R is not in list", v); + return NULL; +} + +static PyObject * +listcount(PyListObject *self, PyObject *v) +{ + Py_ssize_t count = 0; + Py_ssize_t i; + + for (i = 0; i < Py_SIZE(self); i++) { + int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ); + if (cmp > 0) + count++; + else if (cmp < 0) + return NULL; + } + return PyLong_FromSsize_t(count); +} + +static PyObject * +listremove(PyListObject *self, PyObject *v) +{ + Py_ssize_t i; + + for (i = 0; i < Py_SIZE(self); i++) { + int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ); + if (cmp > 0) { + if (list_ass_slice(self, i, i+1, + (PyObject *)NULL) == 0) + Py_RETURN_NONE; + return NULL; + } + else if (cmp < 0) + return NULL; + } + PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list"); + return NULL; +} + +static int +list_traverse(PyListObject *o, visitproc visit, void *arg) +{ + Py_ssize_t i; + + for (i = Py_SIZE(o); --i >= 0; ) + Py_VISIT(o->ob_item[i]); + return 0; +} + +static PyObject * +list_richcompare(PyObject *v, PyObject *w, int op) +{ + PyListObject *vl, *wl; + Py_ssize_t i; + + if (!PyList_Check(v) || !PyList_Check(w)) + Py_RETURN_NOTIMPLEMENTED; + + vl = (PyListObject *)v; + wl = (PyListObject *)w; + + if (Py_SIZE(vl) != Py_SIZE(wl) && (op == Py_EQ || op == Py_NE)) { + /* Shortcut: if the lengths differ, the lists differ */ + PyObject *res; + if (op == Py_EQ) + res = Py_False; + else + res = Py_True; + Py_INCREF(res); + return res; + } + + /* Search for the first index where items are different */ + for (i = 0; i < Py_SIZE(vl) && i < Py_SIZE(wl); i++) { + int k = PyObject_RichCompareBool(vl->ob_item[i], + wl->ob_item[i], Py_EQ); + if (k < 0) + return NULL; + if (!k) + break; + } + + if (i >= Py_SIZE(vl) || i >= Py_SIZE(wl)) { + /* No more items to compare -- compare sizes */ + Py_ssize_t vs = Py_SIZE(vl); + Py_ssize_t ws = Py_SIZE(wl); + int cmp; + PyObject *res; + switch (op) { + case Py_LT: cmp = vs < ws; break; + case Py_LE: cmp = vs <= ws; break; + case Py_EQ: cmp = vs == ws; break; + case Py_NE: cmp = vs != ws; break; + case Py_GT: cmp = vs > ws; break; + case Py_GE: cmp = vs >= ws; break; + default: return NULL; /* cannot happen */ + } + if (cmp) + res = Py_True; + else + res = Py_False; + Py_INCREF(res); + return res; + } + + /* We have an item that differs -- shortcuts for EQ/NE */ + if (op == Py_EQ) { + Py_INCREF(Py_False); + return Py_False; + } + if (op == Py_NE) { + Py_INCREF(Py_True); + return Py_True; + } + + /* Compare the final item again using the proper operator */ + return PyObject_RichCompare(vl->ob_item[i], wl->ob_item[i], op); +} + +static int +list_init(PyListObject *self, PyObject *args, PyObject *kw) +{ + PyObject *arg = NULL; + static char *kwlist[] = {"sequence", 0}; + + if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:list", kwlist, &arg)) + return -1; + + /* Verify list invariants established by PyType_GenericAlloc() */ + assert(0 <= Py_SIZE(self)); + assert(Py_SIZE(self) <= self->allocated || self->allocated == -1); + assert(self->ob_item != NULL || + self->allocated == 0 || self->allocated == -1); + + /* Empty previous contents */ + if (self->ob_item != NULL) { + (void)list_clear(self); + } + if (arg != NULL) { + PyObject *rv = listextend(self, arg); + if (rv == NULL) + return -1; + Py_DECREF(rv); + } + return 0; +} + +static PyObject * +list_sizeof(PyListObject *self) +{ + Py_ssize_t res; + + res = sizeof(PyListObject) + self->allocated * sizeof(void*); + return PyLong_FromSsize_t(res); +} + +static PyObject *list_iter(PyObject *seq); +static PyObject *list_reversed(PyListObject* seq, PyObject* unused); + +PyDoc_STRVAR(getitem_doc, +"x.__getitem__(y) <==> x[y]"); +PyDoc_STRVAR(reversed_doc, +"L.__reversed__() -- return a reverse iterator over the list"); +PyDoc_STRVAR(sizeof_doc, +"L.__sizeof__() -- size of L in memory, in bytes"); +PyDoc_STRVAR(clear_doc, +"L.clear() -> None -- remove all items from L"); +PyDoc_STRVAR(copy_doc, +"L.copy() -> list -- a shallow copy of L"); +PyDoc_STRVAR(append_doc, +"L.append(object) -> None -- append object to end"); +PyDoc_STRVAR(extend_doc, +"L.extend(iterable) -> None -- extend list by appending elements from the iterable"); +PyDoc_STRVAR(insert_doc, +"L.insert(index, object) -- insert object before index"); +PyDoc_STRVAR(pop_doc, +"L.pop([index]) -> item -- remove and return item at index (default last).\n" +"Raises IndexError if list is empty or index is out of range."); +PyDoc_STRVAR(remove_doc, +"L.remove(value) -> None -- remove first occurrence of value.\n" +"Raises ValueError if the value is not present."); +PyDoc_STRVAR(index_doc, +"L.index(value, [start, [stop]]) -> integer -- return first index of value.\n" +"Raises ValueError if the value is not present."); +PyDoc_STRVAR(count_doc, +"L.count(value) -> integer -- return number of occurrences of value"); +PyDoc_STRVAR(reverse_doc, +"L.reverse() -- reverse *IN PLACE*"); +PyDoc_STRVAR(sort_doc, +"L.sort(key=None, reverse=False) -> None -- stable sort *IN PLACE*"); + +static PyObject *list_subscript(PyListObject*, PyObject*); + +static PyMethodDef list_methods[] = { + {"__getitem__", (PyCFunction)list_subscript, METH_O|METH_COEXIST, getitem_doc}, + {"__reversed__",(PyCFunction)list_reversed, METH_NOARGS, reversed_doc}, + {"__sizeof__", (PyCFunction)list_sizeof, METH_NOARGS, sizeof_doc}, + {"clear", (PyCFunction)listclear, METH_NOARGS, clear_doc}, + {"copy", (PyCFunction)listcopy, METH_NOARGS, copy_doc}, + {"append", (PyCFunction)listappend, METH_O, append_doc}, + {"insert", (PyCFunction)listinsert, METH_VARARGS, insert_doc}, + {"extend", (PyCFunction)listextend, METH_O, extend_doc}, + {"pop", (PyCFunction)listpop, METH_VARARGS, pop_doc}, + {"remove", (PyCFunction)listremove, METH_O, remove_doc}, + {"index", (PyCFunction)listindex, METH_VARARGS, index_doc}, + {"count", (PyCFunction)listcount, METH_O, count_doc}, + {"reverse", (PyCFunction)listreverse, METH_NOARGS, reverse_doc}, + {"sort", (PyCFunction)listsort, METH_VARARGS | METH_KEYWORDS, sort_doc}, + {NULL, NULL} /* sentinel */ +}; + +static PySequenceMethods list_as_sequence = { + (lenfunc)list_length, /* sq_length */ + (binaryfunc)list_concat, /* sq_concat */ + (ssizeargfunc)list_repeat, /* sq_repeat */ + (ssizeargfunc)list_item, /* sq_item */ + 0, /* sq_slice */ + (ssizeobjargproc)list_ass_item, /* sq_ass_item */ + 0, /* sq_ass_slice */ + (objobjproc)list_contains, /* sq_contains */ + (binaryfunc)list_inplace_concat, /* sq_inplace_concat */ + (ssizeargfunc)list_inplace_repeat, /* sq_inplace_repeat */ +}; + +PyDoc_STRVAR(list_doc, +"list() -> new empty list\n" +"list(iterable) -> new list initialized from iterable's items"); + +static PyObject * +list_subscript(PyListObject* self, PyObject* item) +{ + if (PyIndex_Check(item)) { + Py_ssize_t i; + i = PyNumber_AsSsize_t(item, PyExc_IndexError); + if (i == -1 && PyErr_Occurred()) + return NULL; + if (i < 0) + i += PyList_GET_SIZE(self); + return list_item(self, i); + } + else if (PySlice_Check(item)) { + Py_ssize_t start, stop, step, slicelength, cur, i; + PyObject* result; + PyObject* it; + PyObject **src, **dest; + + if (PySlice_GetIndicesEx(item, Py_SIZE(self), + &start, &stop, &step, &slicelength) < 0) { + return NULL; + } + + if (slicelength <= 0) { + return PyList_New(0); + } + else if (step == 1) { + return list_slice(self, start, stop); + } + else { + result = PyList_New(slicelength); + if (!result) return NULL; + + src = self->ob_item; + dest = ((PyListObject *)result)->ob_item; + for (cur = start, i = 0; i < slicelength; + cur += (size_t)step, i++) { + it = src[cur]; + Py_INCREF(it); + dest[i] = it; + } + + return result; + } + } + else { + PyErr_Format(PyExc_TypeError, + "list indices must be integers, not %.200s", + item->ob_type->tp_name); + return NULL; + } +} + +static int +list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value) +{ + if (PyIndex_Check(item)) { + Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); + if (i == -1 && PyErr_Occurred()) + return -1; + if (i < 0) + i += PyList_GET_SIZE(self); + return list_ass_item(self, i, value); + } + else if (PySlice_Check(item)) { + Py_ssize_t start, stop, step, slicelength; + + if (PySlice_GetIndicesEx(item, Py_SIZE(self), + &start, &stop, &step, &slicelength) < 0) { + return -1; + } + + if (step == 1) + return list_ass_slice(self, start, stop, value); + + /* Make sure s[5:2] = [..] inserts at the right place: + before 5, not before 2. */ + if ((step < 0 && start < stop) || + (step > 0 && start > stop)) + stop = start; + + if (value == NULL) { + /* delete slice */ + PyObject **garbage; + size_t cur; + Py_ssize_t i; + + if (slicelength <= 0) + return 0; + + if (step < 0) { + stop = start + 1; + start = stop + step*(slicelength - 1) - 1; + step = -step; + } + + assert((size_t)slicelength <= + PY_SIZE_MAX / sizeof(PyObject*)); + + garbage = (PyObject**) + PyMem_MALLOC(slicelength*sizeof(PyObject*)); + if (!garbage) { + PyErr_NoMemory(); + return -1; + } + + /* drawing pictures might help understand these for + loops. Basically, we memmove the parts of the + list that are *not* part of the slice: step-1 + items for each item that is part of the slice, + and then tail end of the list that was not + covered by the slice */ + for (cur = start, i = 0; + cur < (size_t)stop; + cur += step, i++) { + Py_ssize_t lim = step - 1; + + garbage[i] = PyList_GET_ITEM(self, cur); + + if (cur + step >= (size_t)Py_SIZE(self)) { + lim = Py_SIZE(self) - cur - 1; + } + + memmove(self->ob_item + cur - i, + self->ob_item + cur + 1, + lim * sizeof(PyObject *)); + } + cur = start + (size_t)slicelength * step; + if (cur < (size_t)Py_SIZE(self)) { + memmove(self->ob_item + cur - slicelength, + self->ob_item + cur, + (Py_SIZE(self) - cur) * + sizeof(PyObject *)); + } + + Py_SIZE(self) -= slicelength; + list_resize(self, Py_SIZE(self)); + + for (i = 0; i < slicelength; i++) { + Py_DECREF(garbage[i]); + } + PyMem_FREE(garbage); + + return 0; + } + else { + /* assign slice */ + PyObject *ins, *seq; + PyObject **garbage, **seqitems, **selfitems; + Py_ssize_t cur, i; + + /* protect against a[::-1] = a */ + if (self == (PyListObject*)value) { + seq = list_slice((PyListObject*)value, 0, + PyList_GET_SIZE(value)); + } + else { + seq = PySequence_Fast(value, + "must assign iterable " + "to extended slice"); + } + if (!seq) + return -1; + + if (PySequence_Fast_GET_SIZE(seq) != slicelength) { + PyErr_Format(PyExc_ValueError, + "attempt to assign sequence of " + "size %zd to extended slice of " + "size %zd", + PySequence_Fast_GET_SIZE(seq), + slicelength); + Py_DECREF(seq); + return -1; + } + + if (!slicelength) { + Py_DECREF(seq); + return 0; + } + + garbage = (PyObject**) + PyMem_MALLOC(slicelength*sizeof(PyObject*)); + if (!garbage) { + Py_DECREF(seq); + PyErr_NoMemory(); + return -1; + } + + selfitems = self->ob_item; + seqitems = PySequence_Fast_ITEMS(seq); + for (cur = start, i = 0; i < slicelength; + cur += (size_t)step, i++) { + garbage[i] = selfitems[cur]; + ins = seqitems[i]; + Py_INCREF(ins); + selfitems[cur] = ins; + } + + for (i = 0; i < slicelength; i++) { + Py_DECREF(garbage[i]); + } + + PyMem_FREE(garbage); + Py_DECREF(seq); + + return 0; + } + } + else { + PyErr_Format(PyExc_TypeError, + "list indices must be integers, not %.200s", + item->ob_type->tp_name); + return -1; + } +} + +static PyMappingMethods list_as_mapping = { + (lenfunc)list_length, + (binaryfunc)list_subscript, + (objobjargproc)list_ass_subscript +}; + +PyTypeObject PyList_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "list", + sizeof(PyListObject), + 0, + (destructor)list_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + (reprfunc)list_repr, /* tp_repr */ + 0, /* tp_as_number */ + &list_as_sequence, /* tp_as_sequence */ + &list_as_mapping, /* tp_as_mapping */ + PyObject_HashNotImplemented, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE | Py_TPFLAGS_LIST_SUBCLASS, /* tp_flags */ + list_doc, /* tp_doc */ + (traverseproc)list_traverse, /* tp_traverse */ + (inquiry)list_clear, /* tp_clear */ + list_richcompare, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + list_iter, /* tp_iter */ + 0, /* tp_iternext */ + list_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)list_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + + +/*********************** List Iterator **************************/ + +typedef struct { + PyObject_HEAD + long it_index; + PyListObject *it_seq; /* Set to NULL when iterator is exhausted */ +} listiterobject; + +static PyObject *list_iter(PyObject *); +static void listiter_dealloc(listiterobject *); +static int listiter_traverse(listiterobject *, visitproc, void *); +static PyObject *listiter_next(listiterobject *); +static PyObject *listiter_len(listiterobject *); + +PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); + +static PyMethodDef listiter_methods[] = { + {"__length_hint__", (PyCFunction)listiter_len, METH_NOARGS, length_hint_doc}, + {NULL, NULL} /* sentinel */ +}; + +PyTypeObject PyListIter_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "list_iterator", /* tp_name */ + sizeof(listiterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)listiter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ + 0, /* tp_doc */ + (traverseproc)listiter_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)listiter_next, /* tp_iternext */ + listiter_methods, /* tp_methods */ + 0, /* tp_members */ +}; + + +static PyObject * +list_iter(PyObject *seq) +{ + listiterobject *it; + + if (!PyList_Check(seq)) { + PyErr_BadInternalCall(); + return NULL; + } + it = PyObject_GC_New(listiterobject, &PyListIter_Type); + if (it == NULL) + return NULL; + it->it_index = 0; + Py_INCREF(seq); + it->it_seq = (PyListObject *)seq; + _PyObject_GC_TRACK(it); + return (PyObject *)it; +} + +static void +listiter_dealloc(listiterobject *it) +{ + _PyObject_GC_UNTRACK(it); + Py_XDECREF(it->it_seq); + PyObject_GC_Del(it); +} + +static int +listiter_traverse(listiterobject *it, visitproc visit, void *arg) +{ + Py_VISIT(it->it_seq); + return 0; +} + +static PyObject * +listiter_next(listiterobject *it) +{ + PyListObject *seq; + PyObject *item; + + assert(it != NULL); + seq = it->it_seq; + if (seq == NULL) + return NULL; + assert(PyList_Check(seq)); + + if (it->it_index < PyList_GET_SIZE(seq)) { + item = PyList_GET_ITEM(seq, it->it_index); + ++it->it_index; + Py_INCREF(item); + return item; + } + + Py_DECREF(seq); + it->it_seq = NULL; + return NULL; +} + +static PyObject * +listiter_len(listiterobject *it) +{ + Py_ssize_t len; + if (it->it_seq) { + len = PyList_GET_SIZE(it->it_seq) - it->it_index; + if (len >= 0) + return PyLong_FromSsize_t(len); + } + return PyLong_FromLong(0); +} +/*********************** List Reverse Iterator **************************/ + +typedef struct { + PyObject_HEAD + Py_ssize_t it_index; + PyListObject *it_seq; /* Set to NULL when iterator is exhausted */ +} listreviterobject; + +static PyObject *list_reversed(PyListObject *, PyObject *); +static void listreviter_dealloc(listreviterobject *); +static int listreviter_traverse(listreviterobject *, visitproc, void *); +static PyObject *listreviter_next(listreviterobject *); +static PyObject *listreviter_len(listreviterobject *); + +static PyMethodDef listreviter_methods[] = { + {"__length_hint__", (PyCFunction)listreviter_len, METH_NOARGS, length_hint_doc}, + {NULL, NULL} /* sentinel */ +}; + +PyTypeObject PyListRevIter_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "list_reverseiterator", /* tp_name */ + sizeof(listreviterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)listreviter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ + 0, /* tp_doc */ + (traverseproc)listreviter_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)listreviter_next, /* tp_iternext */ + listreviter_methods, /* tp_methods */ + 0, +}; + +static PyObject * +list_reversed(PyListObject *seq, PyObject *unused) +{ + listreviterobject *it; + + it = PyObject_GC_New(listreviterobject, &PyListRevIter_Type); + if (it == NULL) + return NULL; + assert(PyList_Check(seq)); + it->it_index = PyList_GET_SIZE(seq) - 1; + Py_INCREF(seq); + it->it_seq = seq; + PyObject_GC_Track(it); + return (PyObject *)it; +} + +static void +listreviter_dealloc(listreviterobject *it) +{ + PyObject_GC_UnTrack(it); + Py_XDECREF(it->it_seq); + PyObject_GC_Del(it); +} + +static int +listreviter_traverse(listreviterobject *it, visitproc visit, void *arg) +{ + Py_VISIT(it->it_seq); + return 0; +} + +static PyObject * +listreviter_next(listreviterobject *it) +{ + PyObject *item; + Py_ssize_t index = it->it_index; + PyListObject *seq = it->it_seq; + + if (index>=0 && index < PyList_GET_SIZE(seq)) { + item = PyList_GET_ITEM(seq, index); + it->it_index--; + Py_INCREF(item); + return item; + } + it->it_index = -1; + if (seq != NULL) { + it->it_seq = NULL; + Py_DECREF(seq); + } + return NULL; +} + +static PyObject * +listreviter_len(listreviterobject *it) +{ + Py_ssize_t len = it->it_index + 1; + if (it->it_seq == NULL || PyList_GET_SIZE(it->it_seq) < len) + len = 0; + return PyLong_FromSsize_t(len); +} diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Objects/longobject.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Objects/longobject.c Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,4836 @@ +/* Long (arbitrary precision) integer object implementation */ + +/* XXX The functional organization of this file is terrible */ + +#include "Python.h" +#include "longintrepr.h" + + +/* convert a PyLong of size 1, 0 or -1 to an sdigit */ +#define MEDIUM_VALUE(x) (Py_SIZE(x) < 0 ? -(sdigit)(x)->ob_digit[0] : \ + (Py_SIZE(x) == 0 ? (sdigit)0 : \ + (sdigit)(x)->ob_digit[0])) +#define ABS(x) ((x) < 0 ? -(x) : (x)) + +/* If a freshly-allocated long is already shared, it must + be a small integer, so negating it must go to PyLong_FromLong */ +#define NEGATE(x) \ + do if (Py_REFCNT(x) == 1) Py_SIZE(x) = -Py_SIZE(x); \ + else { PyObject* tmp=PyLong_FromLong(-MEDIUM_VALUE(x)); \ + Py_DECREF(x); (x) = (PyLongObject*)tmp; } \ + while(0) +/* For long multiplication, use the O(N**2) school algorithm unless + * both operands contain more than KARATSUBA_CUTOFF digits (this + * being an internal Python long digit, in base BASE). + */ +#define KARATSUBA_CUTOFF 70 +#define KARATSUBA_SQUARE_CUTOFF (2 * KARATSUBA_CUTOFF) + +/* For exponentiation, use the binary left-to-right algorithm + * unless the exponent contains more than FIVEARY_CUTOFF digits. + * In that case, do 5 bits at a time. The potential drawback is that + * a table of 2**5 intermediate results is computed. + */ +#define FIVEARY_CUTOFF 8 + +#undef MIN +#undef MAX +#define MAX(x, y) ((x) < (y) ? (y) : (x)) +#define MIN(x, y) ((x) > (y) ? (y) : (x)) + +#define SIGCHECK(PyTryBlock) \ + do { \ + if (PyErr_CheckSignals()) PyTryBlock \ + } while(0) + +/* Normalize (remove leading zeros from) a long int object. + Doesn't attempt to free the storage--in most cases, due to the nature + of the algorithms used, this could save at most be one word anyway. */ + +static PyLongObject * +long_normalize(register PyLongObject *v) +{ + Py_ssize_t j = ABS(Py_SIZE(v)); + Py_ssize_t i = j; + + while (i > 0 && v->ob_digit[i-1] == 0) + --i; + if (i != j) + Py_SIZE(v) = (Py_SIZE(v) < 0) ? -(i) : i; + return v; +} + +/* Allocate a new long int object with size digits. + Return NULL and set exception if we run out of memory. */ + +#define MAX_LONG_DIGITS \ + ((PY_SSIZE_T_MAX - offsetof(PyLongObject, ob_digit))/sizeof(digit)) + +PyLongObject * +_PyLong_New(Py_ssize_t size) +{ + PyLongObject *result; + /* Number of bytes needed is: offsetof(PyLongObject, ob_digit) + + sizeof(digit)*size. Previous incarnations of this code used + sizeof(PyVarObject) instead of the offsetof, but this risks being + incorrect in the presence of padding between the PyVarObject header + and the digits. */ + if (size > (Py_ssize_t)MAX_LONG_DIGITS) { + PyErr_SetString(PyExc_OverflowError, + "too many digits in integer"); + return NULL; + } + result = PyObject_MALLOC(offsetof(PyLongObject, ob_digit) + + size*sizeof(digit)); + if (!result) { + PyErr_NoMemory(); + return NULL; + } + return (PyLongObject*)PyObject_INIT_VAR(result, &PyLong_Type, size); +} + +PyObject * +_PyLong_Copy(PyLongObject *src) +{ + PyLongObject *result; + Py_ssize_t i; + + assert(src != NULL); + i = Py_SIZE(src); + if (i < 0) + i = -(i); + if (i < 2) { + sdigit ival = src->ob_digit[0]; + if (Py_SIZE(src) < 0) + ival = -ival; + CHECK_SMALL_INT(ival); + } + result = _PyLong_New(i); + if (result != NULL) { + Py_SIZE(result) = Py_SIZE(src); + while (--i >= 0) + result->ob_digit[i] = src->ob_digit[i]; + } + return (PyObject *)result; +} + +/* Create a new long int object from a C long int */ + +PyObject * +PyLong_FromLong(long ival) +{ + PyLongObject *v; + unsigned long abs_ival; + unsigned long t; /* unsigned so >> doesn't propagate sign bit */ + int ndigits = 0; + int sign = 1; + + if (ival < 0) { + /* negate: can't write this as abs_ival = -ival since that + invokes undefined behaviour when ival is LONG_MIN */ + abs_ival = 0U-(unsigned long)ival; + sign = -1; + } + else { + abs_ival = (unsigned long)ival; + } + + /* Fast path for single-digit ints */ + if (!(abs_ival >> PyLong_SHIFT)) { + v = _PyLong_New(1); + if (v) { + Py_SIZE(v) = sign; + v->ob_digit[0] = Py_SAFE_DOWNCAST( + abs_ival, unsigned long, digit); + } + return (PyObject*)v; + } + +#if PyLong_SHIFT==15 + /* 2 digits */ + if (!(abs_ival >> 2*PyLong_SHIFT)) { + v = _PyLong_New(2); + if (v) { + Py_SIZE(v) = 2*sign; + v->ob_digit[0] = Py_SAFE_DOWNCAST( + abs_ival & PyLong_MASK, unsigned long, digit); + v->ob_digit[1] = Py_SAFE_DOWNCAST( + abs_ival >> PyLong_SHIFT, unsigned long, digit); + } + return (PyObject*)v; + } +#endif + + /* Larger numbers: loop to determine number of digits */ + t = abs_ival; + while (t) { + ++ndigits; + t >>= PyLong_SHIFT; + } + v = _PyLong_New(ndigits); + if (v != NULL) { + digit *p = v->ob_digit; + Py_SIZE(v) = ndigits*sign; + t = abs_ival; + while (t) { + *p++ = Py_SAFE_DOWNCAST( + t & PyLong_MASK, unsigned long, digit); + t >>= PyLong_SHIFT; + } + } + return (PyObject *)v; +} + +/* Create a new long int object from a C unsigned long int */ + +PyObject * +PyLong_FromUnsignedLong(unsigned long ival) +{ + PyLongObject *v; + unsigned long t; + int ndigits = 0; + + if (ival < PyLong_BASE) + return PyLong_FromLong(ival); + /* Count the number of Python digits. */ + t = (unsigned long)ival; + while (t) { + ++ndigits; + t >>= PyLong_SHIFT; + } + v = _PyLong_New(ndigits); + if (v != NULL) { + digit *p = v->ob_digit; + Py_SIZE(v) = ndigits; + while (ival) { + *p++ = (digit)(ival & PyLong_MASK); + ival >>= PyLong_SHIFT; + } + } + return (PyObject *)v; +} + +/* Create a new long int object from a C double */ + +PyObject * +PyLong_FromDouble(double dval) +{ + PyLongObject *v; + double frac; + int i, ndig, expo, neg; + neg = 0; + if (Py_IS_INFINITY(dval)) { + PyErr_SetString(PyExc_OverflowError, + "cannot convert float infinity to integer"); + return NULL; + } + if (Py_IS_NAN(dval)) { + PyErr_SetString(PyExc_ValueError, + "cannot convert float NaN to integer"); + return NULL; + } + if (dval < 0.0) { + neg = 1; + dval = -dval; + } + frac = frexp(dval, &expo); /* dval = frac*2**expo; 0.0 <= frac < 1.0 */ + if (expo <= 0) + return PyLong_FromLong(0L); + ndig = (expo-1) / PyLong_SHIFT + 1; /* Number of 'digits' in result */ + v = _PyLong_New(ndig); + if (v == NULL) + return NULL; + frac = ldexp(frac, (expo-1) % PyLong_SHIFT + 1); + for (i = ndig; --i >= 0; ) { + digit bits = (digit)frac; + v->ob_digit[i] = bits; + frac = frac - (double)bits; + frac = ldexp(frac, PyLong_SHIFT); + } + if (neg) + Py_SIZE(v) = -(Py_SIZE(v)); + return (PyObject *)v; +} + +/* Checking for overflow in PyLong_AsLong is a PITA since C doesn't define + * anything about what happens when a signed integer operation overflows, + * and some compilers think they're doing you a favor by being "clever" + * then. The bit pattern for the largest postive signed long is + * (unsigned long)LONG_MAX, and for the smallest negative signed long + * it is abs(LONG_MIN), which we could write -(unsigned long)LONG_MIN. + * However, some other compilers warn about applying unary minus to an + * unsigned operand. Hence the weird "0-". + */ +#define PY_ABS_LONG_MIN (0-(unsigned long)LONG_MIN) +#define PY_ABS_SSIZE_T_MIN (0-(size_t)PY_SSIZE_T_MIN) + +/* Get a C long int from a long int object or any object that has an __int__ + method. + + On overflow, return -1 and set *overflow to 1 or -1 depending on the sign of + the result. Otherwise *overflow is 0. + + For other errors (e.g., TypeError), return -1 and set an error condition. + In this case *overflow will be 0. +*/ + +long +PyLong_AsLongAndOverflow(PyObject *vv, int *overflow) +{ + /* This version by Tim Peters */ + register PyLongObject *v; + unsigned long x, prev; + long res; + Py_ssize_t i; + int sign; + int do_decref = 0; /* if nb_int was called */ + + *overflow = 0; + if (vv == NULL) { + PyErr_BadInternalCall(); + return -1; + } + + if (!PyLong_Check(vv)) { + PyNumberMethods *nb; + nb = vv->ob_type->tp_as_number; + if (nb == NULL || nb->nb_int == NULL) { + PyErr_SetString(PyExc_TypeError, + "an integer is required"); + return -1; + } + vv = (*nb->nb_int) (vv); + if (vv == NULL) + return -1; + do_decref = 1; + if (!PyLong_Check(vv)) { + Py_DECREF(vv); + PyErr_SetString(PyExc_TypeError, + "nb_int should return int object"); + return -1; + } + } + + res = -1; + v = (PyLongObject *)vv; + i = Py_SIZE(v); + + switch (i) { + case -1: + res = -(sdigit)v->ob_digit[0]; + break; + case 0: + res = 0; + break; + case 1: + res = v->ob_digit[0]; + break; + default: + sign = 1; + x = 0; + if (i < 0) { + sign = -1; + i = -(i); + } + while (--i >= 0) { + prev = x; + x = (x << PyLong_SHIFT) | v->ob_digit[i]; + if ((x >> PyLong_SHIFT) != prev) { + *overflow = sign; + goto exit; + } + } + /* Haven't lost any bits, but casting to long requires extra + * care (see comment above). + */ + if (x <= (unsigned long)LONG_MAX) { + res = (long)x * sign; + } + else if (sign < 0 && x == PY_ABS_LONG_MIN) { + res = LONG_MIN; + } + else { + *overflow = sign; + /* res is already set to -1 */ + } + } + exit: + if (do_decref) { + Py_DECREF(vv); + } + return res; +} + +/* Get a C long int from a long int object or any object that has an __int__ + method. Return -1 and set an error if overflow occurs. */ + +long +PyLong_AsLong(PyObject *obj) +{ + int overflow; + long result = PyLong_AsLongAndOverflow(obj, &overflow); + if (overflow) { + /* XXX: could be cute and give a different + message for overflow == -1 */ + PyErr_SetString(PyExc_OverflowError, + "Python int too large to convert to C long"); + } + return result; +} + +/* Get a Py_ssize_t from a long int object. + Returns -1 and sets an error condition if overflow occurs. */ + +Py_ssize_t +PyLong_AsSsize_t(PyObject *vv) { + register PyLongObject *v; + size_t x, prev; + Py_ssize_t i; + int sign; + + if (vv == NULL) { + PyErr_BadInternalCall(); + return -1; + } + if (!PyLong_Check(vv)) { + PyErr_SetString(PyExc_TypeError, "an integer is required"); + return -1; + } + + v = (PyLongObject *)vv; + i = Py_SIZE(v); + switch (i) { + case -1: return -(sdigit)v->ob_digit[0]; + case 0: return 0; + case 1: return v->ob_digit[0]; + } + sign = 1; + x = 0; + if (i < 0) { + sign = -1; + i = -(i); + } + while (--i >= 0) { + prev = x; + x = (x << PyLong_SHIFT) | v->ob_digit[i]; + if ((x >> PyLong_SHIFT) != prev) + goto overflow; + } + /* Haven't lost any bits, but casting to a signed type requires + * extra care (see comment above). + */ + if (x <= (size_t)PY_SSIZE_T_MAX) { + return (Py_ssize_t)x * sign; + } + else if (sign < 0 && x == PY_ABS_SSIZE_T_MIN) { + return PY_SSIZE_T_MIN; + } + /* else overflow */ + + overflow: + PyErr_SetString(PyExc_OverflowError, + "Python int too large to convert to C ssize_t"); + return -1; +} + +/* Get a C unsigned long int from a long int object. + Returns -1 and sets an error condition if overflow occurs. */ + +unsigned long +PyLong_AsUnsignedLong(PyObject *vv) +{ + register PyLongObject *v; + unsigned long x, prev; + Py_ssize_t i; + + if (vv == NULL) { + PyErr_BadInternalCall(); + return (unsigned long)-1; + } + if (!PyLong_Check(vv)) { + PyErr_SetString(PyExc_TypeError, "an integer is required"); + return (unsigned long)-1; + } + + v = (PyLongObject *)vv; + i = Py_SIZE(v); + x = 0; + if (i < 0) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to unsigned int"); + return (unsigned long) -1; + } + switch (i) { + case 0: return 0; + case 1: return v->ob_digit[0]; + } + while (--i >= 0) { + prev = x; + x = (x << PyLong_SHIFT) | v->ob_digit[i]; + if ((x >> PyLong_SHIFT) != prev) { + PyErr_SetString(PyExc_OverflowError, + "python int too large to convert " + "to C unsigned long"); + return (unsigned long) -1; + } + } + return x; +} + +/* Get a C size_t from a long int object. Returns (size_t)-1 and sets + an error condition if overflow occurs. */ + +size_t +PyLong_AsSize_t(PyObject *vv) +{ + register PyLongObject *v; + size_t x, prev; + Py_ssize_t i; + + if (vv == NULL) { + PyErr_BadInternalCall(); + return (size_t) -1; + } + if (!PyLong_Check(vv)) { + PyErr_SetString(PyExc_TypeError, "an integer is required"); + return (size_t)-1; + } + + v = (PyLongObject *)vv; + i = Py_SIZE(v); + x = 0; + if (i < 0) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to size_t"); + return (size_t) -1; + } + switch (i) { + case 0: return 0; + case 1: return v->ob_digit[0]; + } + while (--i >= 0) { + prev = x; + x = (x << PyLong_SHIFT) | v->ob_digit[i]; + if ((x >> PyLong_SHIFT) != prev) { + PyErr_SetString(PyExc_OverflowError, + "Python int too large to convert to C size_t"); + return (size_t) -1; + } + } + return x; +} + +/* Get a C unsigned long int from a long int object, ignoring the high bits. + Returns -1 and sets an error condition if an error occurs. */ + +static unsigned long +_PyLong_AsUnsignedLongMask(PyObject *vv) +{ + register PyLongObject *v; + unsigned long x; + Py_ssize_t i; + int sign; + + if (vv == NULL || !PyLong_Check(vv)) { + PyErr_BadInternalCall(); + return (unsigned long) -1; + } + v = (PyLongObject *)vv; + i = Py_SIZE(v); + switch (i) { + case 0: return 0; + case 1: return v->ob_digit[0]; + } + sign = 1; + x = 0; + if (i < 0) { + sign = -1; + i = -i; + } + while (--i >= 0) { + x = (x << PyLong_SHIFT) | v->ob_digit[i]; + } + return x * sign; +} + +unsigned long +PyLong_AsUnsignedLongMask(register PyObject *op) +{ + PyNumberMethods *nb; + PyLongObject *lo; + unsigned long val; + + if (op && PyLong_Check(op)) + return _PyLong_AsUnsignedLongMask(op); + + if (op == NULL || (nb = op->ob_type->tp_as_number) == NULL || + nb->nb_int == NULL) { + PyErr_SetString(PyExc_TypeError, "an integer is required"); + return (unsigned long)-1; + } + + lo = (PyLongObject*) (*nb->nb_int) (op); + if (lo == NULL) + return (unsigned long)-1; + if (PyLong_Check(lo)) { + val = _PyLong_AsUnsignedLongMask((PyObject *)lo); + Py_DECREF(lo); + if (PyErr_Occurred()) + return (unsigned long)-1; + return val; + } + else + { + Py_DECREF(lo); + PyErr_SetString(PyExc_TypeError, + "nb_int should return int object"); + return (unsigned long)-1; + } +} + +int +_PyLong_Sign(PyObject *vv) +{ + PyLongObject *v = (PyLongObject *)vv; + + assert(v != NULL); + assert(PyLong_Check(v)); + + return Py_SIZE(v) == 0 ? 0 : (Py_SIZE(v) < 0 ? -1 : 1); +} + +size_t +_PyLong_NumBits(PyObject *vv) +{ + PyLongObject *v = (PyLongObject *)vv; + size_t result = 0; + Py_ssize_t ndigits; + + assert(v != NULL); + assert(PyLong_Check(v)); + ndigits = ABS(Py_SIZE(v)); + assert(ndigits == 0 || v->ob_digit[ndigits - 1] != 0); + if (ndigits > 0) { + digit msd = v->ob_digit[ndigits - 1]; + + result = (ndigits - 1) * PyLong_SHIFT; + if (result / PyLong_SHIFT != (size_t)(ndigits - 1)) + goto Overflow; + do { + ++result; + if (result == 0) + goto Overflow; + msd >>= 1; + } while (msd); + } + return result; + + Overflow: + PyErr_SetString(PyExc_OverflowError, "int has too many bits " + "to express in a platform size_t"); + return (size_t)-1; +} + +PyObject * +_PyLong_FromByteArray(const unsigned char* bytes, size_t n, + int little_endian, int is_signed) +{ + const unsigned char* pstartbyte; /* LSB of bytes */ + int incr; /* direction to move pstartbyte */ + const unsigned char* pendbyte; /* MSB of bytes */ + size_t numsignificantbytes; /* number of bytes that matter */ + Py_ssize_t ndigits; /* number of Python long digits */ + PyLongObject* v; /* result */ + Py_ssize_t idigit = 0; /* next free index in v->ob_digit */ + + if (n == 0) + return PyLong_FromLong(0L); + + if (little_endian) { + pstartbyte = bytes; + pendbyte = bytes + n - 1; + incr = 1; + } + else { + pstartbyte = bytes + n - 1; + pendbyte = bytes; + incr = -1; + } + + if (is_signed) + is_signed = *pendbyte >= 0x80; + + /* Compute numsignificantbytes. This consists of finding the most + significant byte. Leading 0 bytes are insignificant if the number + is positive, and leading 0xff bytes if negative. */ + { + size_t i; + const unsigned char* p = pendbyte; + const int pincr = -incr; /* search MSB to LSB */ + const unsigned char insignficant = is_signed ? 0xff : 0x00; + + for (i = 0; i < n; ++i, p += pincr) { + if (*p != insignficant) + break; + } + numsignificantbytes = n - i; + /* 2's-comp is a bit tricky here, e.g. 0xff00 == -0x0100, so + actually has 2 significant bytes. OTOH, 0xff0001 == + -0x00ffff, so we wouldn't *need* to bump it there; but we + do for 0xffff = -0x0001. To be safe without bothering to + check every case, bump it regardless. */ + if (is_signed && numsignificantbytes < n) + ++numsignificantbytes; + } + + /* How many Python long digits do we need? We have + 8*numsignificantbytes bits, and each Python long digit has + PyLong_SHIFT bits, so it's the ceiling of the quotient. */ + /* catch overflow before it happens */ + if (numsignificantbytes > (PY_SSIZE_T_MAX - PyLong_SHIFT) / 8) { + PyErr_SetString(PyExc_OverflowError, + "byte array too long to convert to int"); + return NULL; + } + ndigits = (numsignificantbytes * 8 + PyLong_SHIFT - 1) / PyLong_SHIFT; + v = _PyLong_New(ndigits); + if (v == NULL) + return NULL; + + /* Copy the bits over. The tricky parts are computing 2's-comp on + the fly for signed numbers, and dealing with the mismatch between + 8-bit bytes and (probably) 15-bit Python digits.*/ + { + size_t i; + twodigits carry = 1; /* for 2's-comp calculation */ + twodigits accum = 0; /* sliding register */ + unsigned int accumbits = 0; /* number of bits in accum */ + const unsigned char* p = pstartbyte; + + for (i = 0; i < numsignificantbytes; ++i, p += incr) { + twodigits thisbyte = *p; + /* Compute correction for 2's comp, if needed. */ + if (is_signed) { + thisbyte = (0xff ^ thisbyte) + carry; + carry = thisbyte >> 8; + thisbyte &= 0xff; + } + /* Because we're going LSB to MSB, thisbyte is + more significant than what's already in accum, + so needs to be prepended to accum. */ + accum |= (twodigits)thisbyte << accumbits; + accumbits += 8; + if (accumbits >= PyLong_SHIFT) { + /* There's enough to fill a Python digit. */ + assert(idigit < ndigits); + v->ob_digit[idigit] = (digit)(accum & PyLong_MASK); + ++idigit; + accum >>= PyLong_SHIFT; + accumbits -= PyLong_SHIFT; + assert(accumbits < PyLong_SHIFT); + } + } + assert(accumbits < PyLong_SHIFT); + if (accumbits) { + assert(idigit < ndigits); + v->ob_digit[idigit] = (digit)accum; + ++idigit; + } + } + + Py_SIZE(v) = is_signed ? -idigit : idigit; + return (PyObject *)long_normalize(v); +} + +int +_PyLong_AsByteArray(PyLongObject* v, + unsigned char* bytes, size_t n, + int little_endian, int is_signed) +{ + Py_ssize_t i; /* index into v->ob_digit */ + Py_ssize_t ndigits; /* |v->ob_size| */ + twodigits accum; /* sliding register */ + unsigned int accumbits; /* # bits in accum */ + int do_twos_comp; /* store 2's-comp? is_signed and v < 0 */ + digit carry; /* for computing 2's-comp */ + size_t j; /* # bytes filled */ + unsigned char* p; /* pointer to next byte in bytes */ + int pincr; /* direction to move p */ + + assert(v != NULL && PyLong_Check(v)); + + if (Py_SIZE(v) < 0) { + ndigits = -(Py_SIZE(v)); + if (!is_signed) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative int to unsigned"); + return -1; + } + do_twos_comp = 1; + } + else { + ndigits = Py_SIZE(v); + do_twos_comp = 0; + } + + if (little_endian) { + p = bytes; + pincr = 1; + } + else { + p = bytes + n - 1; + pincr = -1; + } + + /* Copy over all the Python digits. + It's crucial that every Python digit except for the MSD contribute + exactly PyLong_SHIFT bits to the total, so first assert that the long is + normalized. */ + assert(ndigits == 0 || v->ob_digit[ndigits - 1] != 0); + j = 0; + accum = 0; + accumbits = 0; + carry = do_twos_comp ? 1 : 0; + for (i = 0; i < ndigits; ++i) { + digit thisdigit = v->ob_digit[i]; + if (do_twos_comp) { + thisdigit = (thisdigit ^ PyLong_MASK) + carry; + carry = thisdigit >> PyLong_SHIFT; + thisdigit &= PyLong_MASK; + } + /* Because we're going LSB to MSB, thisdigit is more + significant than what's already in accum, so needs to be + prepended to accum. */ + accum |= (twodigits)thisdigit << accumbits; + + /* The most-significant digit may be (probably is) at least + partly empty. */ + if (i == ndigits - 1) { + /* Count # of sign bits -- they needn't be stored, + * although for signed conversion we need later to + * make sure at least one sign bit gets stored. */ + digit s = do_twos_comp ? thisdigit ^ PyLong_MASK : thisdigit; + while (s != 0) { + s >>= 1; + accumbits++; + } + } + else + accumbits += PyLong_SHIFT; + + /* Store as many bytes as possible. */ + while (accumbits >= 8) { + if (j >= n) + goto Overflow; + ++j; + *p = (unsigned char)(accum & 0xff); + p += pincr; + accumbits -= 8; + accum >>= 8; + } + } + + /* Store the straggler (if any). */ + assert(accumbits < 8); + assert(carry == 0); /* else do_twos_comp and *every* digit was 0 */ + if (accumbits > 0) { + if (j >= n) + goto Overflow; + ++j; + if (do_twos_comp) { + /* Fill leading bits of the byte with sign bits + (appropriately pretending that the long had an + infinite supply of sign bits). */ + accum |= (~(twodigits)0) << accumbits; + } + *p = (unsigned char)(accum & 0xff); + p += pincr; + } + else if (j == n && n > 0 && is_signed) { + /* The main loop filled the byte array exactly, so the code + just above didn't get to ensure there's a sign bit, and the + loop below wouldn't add one either. Make sure a sign bit + exists. */ + unsigned char msb = *(p - pincr); + int sign_bit_set = msb >= 0x80; + assert(accumbits == 0); + if (sign_bit_set == do_twos_comp) + return 0; + else + goto Overflow; + } + + /* Fill remaining bytes with copies of the sign bit. */ + { + unsigned char signbyte = do_twos_comp ? 0xffU : 0U; + for ( ; j < n; ++j, p += pincr) + *p = signbyte; + } + + return 0; + + Overflow: + PyErr_SetString(PyExc_OverflowError, "int too big to convert"); + return -1; + +} + +/* Create a new long int object from a C pointer */ + +PyObject * +PyLong_FromVoidPtr(void *p) +{ +#ifndef HAVE_LONG_LONG +# error "PyLong_FromVoidPtr: sizeof(void*) > sizeof(long), but no long long" +#endif +#if SIZEOF_LONG_LONG < SIZEOF_VOID_P +# error "PyLong_FromVoidPtr: sizeof(PY_LONG_LONG) < sizeof(void*)" +#endif + /* special-case null pointer */ + if (!p) + return PyLong_FromLong(0); + return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)(Py_uintptr_t)p); + +} + +/* Get a C pointer from a long int object. */ + +void * +PyLong_AsVoidPtr(PyObject *vv) +{ +#if SIZEOF_VOID_P <= SIZEOF_LONG + long x; + + if (PyLong_Check(vv) && _PyLong_Sign(vv) < 0) + x = PyLong_AsLong(vv); + else + x = PyLong_AsUnsignedLong(vv); +#else + +#ifndef HAVE_LONG_LONG +# error "PyLong_AsVoidPtr: sizeof(void*) > sizeof(long), but no long long" +#endif +#if SIZEOF_LONG_LONG < SIZEOF_VOID_P +# error "PyLong_AsVoidPtr: sizeof(PY_LONG_LONG) < sizeof(void*)" +#endif + PY_LONG_LONG x; + + if (PyLong_Check(vv) && _PyLong_Sign(vv) < 0) + x = PyLong_AsLongLong(vv); + else + x = PyLong_AsUnsignedLongLong(vv); + +#endif /* SIZEOF_VOID_P <= SIZEOF_LONG */ + + if (x == -1 && PyErr_Occurred()) + return NULL; + return (void *)x; +} + +#ifdef HAVE_LONG_LONG + +/* Initial PY_LONG_LONG support by Chris Herborth (chrish@qnx.com), later + * rewritten to use the newer PyLong_{As,From}ByteArray API. + */ + +#define IS_LITTLE_ENDIAN (int)*(unsigned char*)&one +#define PY_ABS_LLONG_MIN (0-(unsigned PY_LONG_LONG)PY_LLONG_MIN) + +/* Create a new long int object from a C PY_LONG_LONG int. */ + +PyObject * +PyLong_FromLongLong(PY_LONG_LONG ival) +{ + PyLongObject *v; + unsigned PY_LONG_LONG abs_ival; + unsigned PY_LONG_LONG t; /* unsigned so >> doesn't propagate sign bit */ + int ndigits = 0; + int negative = 0; + + if (ival < 0) { + /* avoid signed overflow on negation; see comments + in PyLong_FromLong above. */ + abs_ival = (unsigned PY_LONG_LONG)(-1-ival) + 1; + negative = 1; + } + else { + abs_ival = (unsigned PY_LONG_LONG)ival; + } + + /* Count the number of Python digits. + We used to pick 5 ("big enough for anything"), but that's a + waste of time and space given that 5*15 = 75 bits are rarely + needed. */ + t = abs_ival; + while (t) { + ++ndigits; + t >>= PyLong_SHIFT; + } + v = _PyLong_New(ndigits); + if (v != NULL) { + digit *p = v->ob_digit; + Py_SIZE(v) = negative ? -ndigits : ndigits; + t = abs_ival; + while (t) { + *p++ = (digit)(t & PyLong_MASK); + t >>= PyLong_SHIFT; + } + } + return (PyObject *)v; +} + +/* Create a new long int object from a C unsigned PY_LONG_LONG int. */ + +PyObject * +PyLong_FromUnsignedLongLong(unsigned PY_LONG_LONG ival) +{ + PyLongObject *v; + unsigned PY_LONG_LONG t; + int ndigits = 0; + + if (ival < PyLong_BASE) + return PyLong_FromLong((long)ival); + /* Count the number of Python digits. */ + t = (unsigned PY_LONG_LONG)ival; + while (t) { + ++ndigits; + t >>= PyLong_SHIFT; + } + v = _PyLong_New(ndigits); + if (v != NULL) { + digit *p = v->ob_digit; + Py_SIZE(v) = ndigits; + while (ival) { + *p++ = (digit)(ival & PyLong_MASK); + ival >>= PyLong_SHIFT; + } + } + return (PyObject *)v; +} + +/* Create a new long int object from a C Py_ssize_t. */ + +PyObject * +PyLong_FromSsize_t(Py_ssize_t ival) +{ + PyLongObject *v; + size_t abs_ival; + size_t t; /* unsigned so >> doesn't propagate sign bit */ + int ndigits = 0; + int negative = 0; + + CHECK_SMALL_INT(ival); + if (ival < 0) { + /* avoid signed overflow when ival = SIZE_T_MIN */ + abs_ival = (size_t)(-1-ival)+1; + negative = 1; + } + else { + abs_ival = (size_t)ival; + } + + /* Count the number of Python digits. */ + t = abs_ival; + while (t) { + ++ndigits; + t >>= PyLong_SHIFT; + } + v = _PyLong_New(ndigits); + if (v != NULL) { + digit *p = v->ob_digit; + Py_SIZE(v) = negative ? -ndigits : ndigits; + t = abs_ival; + while (t) { + *p++ = (digit)(t & PyLong_MASK); + t >>= PyLong_SHIFT; + } + } + return (PyObject *)v; +} + +/* Create a new long int object from a C size_t. */ + +PyObject * +PyLong_FromSize_t(size_t ival) +{ + PyLongObject *v; + size_t t; + int ndigits = 0; + + if (ival < PyLong_BASE) + return PyLong_FromLong((long)ival); + /* Count the number of Python digits. */ + t = ival; + while (t) { + ++ndigits; + t >>= PyLong_SHIFT; + } + v = _PyLong_New(ndigits); + if (v != NULL) { + digit *p = v->ob_digit; + Py_SIZE(v) = ndigits; + while (ival) { + *p++ = (digit)(ival & PyLong_MASK); + ival >>= PyLong_SHIFT; + } + } + return (PyObject *)v; +} + +/* Get a C long long int from a long int object or any object that has an + __int__ method. Return -1 and set an error if overflow occurs. */ + +PY_LONG_LONG +PyLong_AsLongLong(PyObject *vv) +{ + PyLongObject *v; + PY_LONG_LONG bytes; + int one = 1; + int res; + + if (vv == NULL) { + PyErr_BadInternalCall(); + return -1; + } + if (!PyLong_Check(vv)) { + PyNumberMethods *nb; + PyObject *io; + if ((nb = vv->ob_type->tp_as_number) == NULL || + nb->nb_int == NULL) { + PyErr_SetString(PyExc_TypeError, "an integer is required"); + return -1; + } + io = (*nb->nb_int) (vv); + if (io == NULL) + return -1; + if (PyLong_Check(io)) { + bytes = PyLong_AsLongLong(io); + Py_DECREF(io); + return bytes; + } + Py_DECREF(io); + PyErr_SetString(PyExc_TypeError, "integer conversion failed"); + return -1; + } + + v = (PyLongObject*)vv; + switch(Py_SIZE(v)) { + case -1: return -(sdigit)v->ob_digit[0]; + case 0: return 0; + case 1: return v->ob_digit[0]; + } + res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes, + SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 1); + + /* Plan 9 can't handle PY_LONG_LONG in ? : expressions */ + if (res < 0) + return (PY_LONG_LONG)-1; + else + return bytes; +} + +/* Get a C unsigned PY_LONG_LONG int from a long int object. + Return -1 and set an error if overflow occurs. */ + +unsigned PY_LONG_LONG +PyLong_AsUnsignedLongLong(PyObject *vv) +{ + PyLongObject *v; + unsigned PY_LONG_LONG bytes; + int one = 1; + int res; + + if (vv == NULL) { + PyErr_BadInternalCall(); + return (unsigned PY_LONG_LONG)-1; + } + if (!PyLong_Check(vv)) { + PyErr_SetString(PyExc_TypeError, "an integer is required"); + return (unsigned PY_LONG_LONG)-1; + } + + v = (PyLongObject*)vv; + switch(Py_SIZE(v)) { + case 0: return 0; + case 1: return v->ob_digit[0]; + } + + res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes, + SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 0); + + /* Plan 9 can't handle PY_LONG_LONG in ? : expressions */ + if (res < 0) + return (unsigned PY_LONG_LONG)res; + else + return bytes; +} + +/* Get a C unsigned long int from a long int object, ignoring the high bits. + Returns -1 and sets an error condition if an error occurs. */ + +static unsigned PY_LONG_LONG +_PyLong_AsUnsignedLongLongMask(PyObject *vv) +{ + register PyLongObject *v; + unsigned PY_LONG_LONG x; + Py_ssize_t i; + int sign; + + if (vv == NULL || !PyLong_Check(vv)) { + PyErr_BadInternalCall(); + return (unsigned long) -1; + } + v = (PyLongObject *)vv; + switch(Py_SIZE(v)) { + case 0: return 0; + case 1: return v->ob_digit[0]; + } + i = Py_SIZE(v); + sign = 1; + x = 0; + if (i < 0) { + sign = -1; + i = -i; + } + while (--i >= 0) { + x = (x << PyLong_SHIFT) | v->ob_digit[i]; + } + return x * sign; +} + +unsigned PY_LONG_LONG +PyLong_AsUnsignedLongLongMask(register PyObject *op) +{ + PyNumberMethods *nb; + PyLongObject *lo; + unsigned PY_LONG_LONG val; + + if (op && PyLong_Check(op)) + return _PyLong_AsUnsignedLongLongMask(op); + + if (op == NULL || (nb = op->ob_type->tp_as_number) == NULL || + nb->nb_int == NULL) { + PyErr_SetString(PyExc_TypeError, "an integer is required"); + return (unsigned PY_LONG_LONG)-1; + } + + lo = (PyLongObject*) (*nb->nb_int) (op); + if (lo == NULL) + return (unsigned PY_LONG_LONG)-1; + if (PyLong_Check(lo)) { + val = _PyLong_AsUnsignedLongLongMask((PyObject *)lo); + Py_DECREF(lo); + if (PyErr_Occurred()) + return (unsigned PY_LONG_LONG)-1; + return val; + } + else + { + Py_DECREF(lo); + PyErr_SetString(PyExc_TypeError, + "nb_int should return int object"); + return (unsigned PY_LONG_LONG)-1; + } +} +#undef IS_LITTLE_ENDIAN + +/* Get a C long long int from a long int object or any object that has an + __int__ method. + + On overflow, return -1 and set *overflow to 1 or -1 depending on the sign of + the result. Otherwise *overflow is 0. + + For other errors (e.g., TypeError), return -1 and set an error condition. + In this case *overflow will be 0. +*/ + +PY_LONG_LONG +PyLong_AsLongLongAndOverflow(PyObject *vv, int *overflow) +{ + /* This version by Tim Peters */ + register PyLongObject *v; + unsigned PY_LONG_LONG x, prev; + PY_LONG_LONG res; + Py_ssize_t i; + int sign; + int do_decref = 0; /* if nb_int was called */ + + *overflow = 0; + if (vv == NULL) { + PyErr_BadInternalCall(); + return -1; + } + + if (!PyLong_Check(vv)) { + PyNumberMethods *nb; + nb = vv->ob_type->tp_as_number; + if (nb == NULL || nb->nb_int == NULL) { + PyErr_SetString(PyExc_TypeError, + "an integer is required"); + return -1; + } + vv = (*nb->nb_int) (vv); + if (vv == NULL) + return -1; + do_decref = 1; + if (!PyLong_Check(vv)) { + Py_DECREF(vv); + PyErr_SetString(PyExc_TypeError, + "nb_int should return int object"); + return -1; + } + } + + res = -1; + v = (PyLongObject *)vv; + i = Py_SIZE(v); + + switch (i) { + case -1: + res = -(sdigit)v->ob_digit[0]; + break; + case 0: + res = 0; + break; + case 1: + res = v->ob_digit[0]; + break; + default: + sign = 1; + x = 0; + if (i < 0) { + sign = -1; + i = -(i); + } + while (--i >= 0) { + prev = x; + x = (x << PyLong_SHIFT) + v->ob_digit[i]; + if ((x >> PyLong_SHIFT) != prev) { + *overflow = sign; + goto exit; + } + } + /* Haven't lost any bits, but casting to long requires extra + * care (see comment above). + */ + if (x <= (unsigned PY_LONG_LONG)PY_LLONG_MAX) { + res = (PY_LONG_LONG)x * sign; + } + else if (sign < 0 && x == PY_ABS_LLONG_MIN) { + res = PY_LLONG_MIN; + } + else { + *overflow = sign; + /* res is already set to -1 */ + } + } + exit: + if (do_decref) { + Py_DECREF(vv); + } + return res; +} + +#endif /* HAVE_LONG_LONG */ + +#define CHECK_BINOP(v,w) \ + do { \ + if (!PyLong_Check(v) || !PyLong_Check(w)) \ + Py_RETURN_NOTIMPLEMENTED; \ + } while(0) + +/* bits_in_digit(d) returns the unique integer k such that 2**(k-1) <= d < + 2**k if d is nonzero, else 0. */ + +static const unsigned char BitLengthTable[32] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 +}; + +static int +bits_in_digit(digit d) +{ + int d_bits = 0; + while (d >= 32) { + d_bits += 6; + d >>= 6; + } + d_bits += (int)BitLengthTable[d]; + return d_bits; +} + +/* x[0:m] and y[0:n] are digit vectors, LSD first, m >= n required. x[0:n] + * is modified in place, by adding y to it. Carries are propagated as far as + * x[m-1], and the remaining carry (0 or 1) is returned. + */ +static digit +v_iadd(digit *x, Py_ssize_t m, digit *y, Py_ssize_t n) +{ + Py_ssize_t i; + digit carry = 0; + + assert(m >= n); + for (i = 0; i < n; ++i) { + carry += x[i] + y[i]; + x[i] = carry & PyLong_MASK; + carry >>= PyLong_SHIFT; + assert((carry & 1) == carry); + } + for (; carry && i < m; ++i) { + carry += x[i]; + x[i] = carry & PyLong_MASK; + carry >>= PyLong_SHIFT; + assert((carry & 1) == carry); + } + return carry; +} + +/* x[0:m] and y[0:n] are digit vectors, LSD first, m >= n required. x[0:n] + * is modified in place, by subtracting y from it. Borrows are propagated as + * far as x[m-1], and the remaining borrow (0 or 1) is returned. + */ +static digit +v_isub(digit *x, Py_ssize_t m, digit *y, Py_ssize_t n) +{ + Py_ssize_t i; + digit borrow = 0; + + assert(m >= n); + for (i = 0; i < n; ++i) { + borrow = x[i] - y[i] - borrow; + x[i] = borrow & PyLong_MASK; + borrow >>= PyLong_SHIFT; + borrow &= 1; /* keep only 1 sign bit */ + } + for (; borrow && i < m; ++i) { + borrow = x[i] - borrow; + x[i] = borrow & PyLong_MASK; + borrow >>= PyLong_SHIFT; + borrow &= 1; + } + return borrow; +} + +/* Shift digit vector a[0:m] d bits left, with 0 <= d < PyLong_SHIFT. Put + * result in z[0:m], and return the d bits shifted out of the top. + */ +static digit +v_lshift(digit *z, digit *a, Py_ssize_t m, int d) +{ + Py_ssize_t i; + digit carry = 0; + + assert(0 <= d && d < PyLong_SHIFT); + for (i=0; i < m; i++) { + twodigits acc = (twodigits)a[i] << d | carry; + z[i] = (digit)acc & PyLong_MASK; + carry = (digit)(acc >> PyLong_SHIFT); + } + return carry; +} + +/* Shift digit vector a[0:m] d bits right, with 0 <= d < PyLong_SHIFT. Put + * result in z[0:m], and return the d bits shifted out of the bottom. + */ +static digit +v_rshift(digit *z, digit *a, Py_ssize_t m, int d) +{ + Py_ssize_t i; + digit carry = 0; + digit mask = ((digit)1 << d) - 1U; + + assert(0 <= d && d < PyLong_SHIFT); + for (i=m; i-- > 0;) { + twodigits acc = (twodigits)carry << PyLong_SHIFT | a[i]; + carry = (digit)acc & mask; + z[i] = (digit)(acc >> d); + } + return carry; +} + +/* Divide long pin, w/ size digits, by non-zero digit n, storing quotient + in pout, and returning the remainder. pin and pout point at the LSD. + It's OK for pin == pout on entry, which saves oodles of mallocs/frees in + _PyLong_Format, but that should be done with great care since longs are + immutable. */ + +static digit +inplace_divrem1(digit *pout, digit *pin, Py_ssize_t size, digit n) +{ + twodigits rem = 0; + + assert(n > 0 && n <= PyLong_MASK); + pin += size; + pout += size; + while (--size >= 0) { + digit hi; + rem = (rem << PyLong_SHIFT) | *--pin; + *--pout = hi = (digit)(rem / n); + rem -= (twodigits)hi * n; + } + return (digit)rem; +} + +/* Divide a long integer by a digit, returning both the quotient + (as function result) and the remainder (through *prem). + The sign of a is ignored; n should not be zero. */ + +static PyLongObject * +divrem1(PyLongObject *a, digit n, digit *prem) +{ + const Py_ssize_t size = ABS(Py_SIZE(a)); + PyLongObject *z; + + assert(n > 0 && n <= PyLong_MASK); + z = _PyLong_New(size); + if (z == NULL) + return NULL; + *prem = inplace_divrem1(z->ob_digit, a->ob_digit, size, n); + return long_normalize(z); +} + +/* Convert a long integer to a base 10 string. Returns a new non-shared + string. (Return value is non-shared so that callers can modify the + returned value if necessary.) */ + +static PyObject * +long_to_decimal_string(PyObject *aa) +{ + PyLongObject *scratch, *a; + PyObject *str; + Py_ssize_t size, strlen, size_a, i, j; + digit *pout, *pin, rem, tenpow; + unsigned char *p; + int negative; + + a = (PyLongObject *)aa; + if (a == NULL || !PyLong_Check(a)) { + PyErr_BadInternalCall(); + return NULL; + } + size_a = ABS(Py_SIZE(a)); + negative = Py_SIZE(a) < 0; + + /* quick and dirty upper bound for the number of digits + required to express a in base _PyLong_DECIMAL_BASE: + + #digits = 1 + floor(log2(a) / log2(_PyLong_DECIMAL_BASE)) + + But log2(a) < size_a * PyLong_SHIFT, and + log2(_PyLong_DECIMAL_BASE) = log2(10) * _PyLong_DECIMAL_SHIFT + > 3 * _PyLong_DECIMAL_SHIFT + */ + if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) { + PyErr_SetString(PyExc_OverflowError, + "long is too large to format"); + return NULL; + } + /* the expression size_a * PyLong_SHIFT is now safe from overflow */ + size = 1 + size_a * PyLong_SHIFT / (3 * _PyLong_DECIMAL_SHIFT); + scratch = _PyLong_New(size); + if (scratch == NULL) + return NULL; + + /* convert array of base _PyLong_BASE digits in pin to an array of + base _PyLong_DECIMAL_BASE digits in pout, following Knuth (TAOCP, + Volume 2 (3rd edn), section 4.4, Method 1b). */ + pin = a->ob_digit; + pout = scratch->ob_digit; + size = 0; + for (i = size_a; --i >= 0; ) { + digit hi = pin[i]; + for (j = 0; j < size; j++) { + twodigits z = (twodigits)pout[j] << PyLong_SHIFT | hi; + hi = (digit)(z / _PyLong_DECIMAL_BASE); + pout[j] = (digit)(z - (twodigits)hi * + _PyLong_DECIMAL_BASE); + } + while (hi) { + pout[size++] = hi % _PyLong_DECIMAL_BASE; + hi /= _PyLong_DECIMAL_BASE; + } + /* check for keyboard interrupt */ + SIGCHECK({ + Py_DECREF(scratch); + return NULL; + }); + } + /* pout should have at least one digit, so that the case when a = 0 + works correctly */ + if (size == 0) + pout[size++] = 0; + + /* calculate exact length of output string, and allocate */ + strlen = negative + 1 + (size - 1) * _PyLong_DECIMAL_SHIFT; + tenpow = 10; + rem = pout[size-1]; + while (rem >= tenpow) { + tenpow *= 10; + strlen++; + } + str = PyUnicode_New(strlen, '9'); + if (str == NULL) { + Py_DECREF(scratch); + return NULL; + } + + /* fill the string right-to-left */ + assert(PyUnicode_KIND(str) == PyUnicode_1BYTE_KIND); + p = PyUnicode_1BYTE_DATA(str) + strlen; + *p = '\0'; + /* pout[0] through pout[size-2] contribute exactly + _PyLong_DECIMAL_SHIFT digits each */ + for (i=0; i < size - 1; i++) { + rem = pout[i]; + for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) { + *--p = '0' + rem % 10; + rem /= 10; + } + } + /* pout[size-1]: always produce at least one decimal digit */ + rem = pout[i]; + do { + *--p = '0' + rem % 10; + rem /= 10; + } while (rem != 0); + + /* and sign */ + if (negative) + *--p = '-'; + + /* check we've counted correctly */ + assert(p == PyUnicode_1BYTE_DATA(str)); + Py_DECREF(scratch); + return (PyObject *)str; +} + +/* Convert a long int object to a string, using a given conversion base, + which should be one of 2, 8, 10 or 16. Return a string object. + If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'. */ + +PyObject * +_PyLong_Format(PyObject *aa, int base) +{ + register PyLongObject *a = (PyLongObject *)aa; + PyObject *v; + Py_ssize_t i, sz; + Py_ssize_t size_a; + char *p; + char sign = '\0'; + char *buffer; + int bits; + + assert(base == 2 || base == 8 || base == 10 || base == 16); + if (base == 10) + return long_to_decimal_string((PyObject *)a); + + if (a == NULL || !PyLong_Check(a)) { + PyErr_BadInternalCall(); + return NULL; + } + size_a = ABS(Py_SIZE(a)); + + /* Compute a rough upper bound for the length of the string */ + switch (base) { + case 16: + bits = 4; + break; + case 8: + bits = 3; + break; + case 2: + bits = 1; + break; + default: + assert(0); /* shouldn't ever get here */ + bits = 0; /* to silence gcc warning */ + } + /* compute length of output string: allow 2 characters for prefix and + 1 for possible '-' sign. */ + if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT / sizeof(Py_UCS4)) { + PyErr_SetString(PyExc_OverflowError, + "int is too large to format"); + return NULL; + } + /* now size_a * PyLong_SHIFT + 3 <= PY_SSIZE_T_MAX, so the RHS below + is safe from overflow */ + sz = 3 + (size_a * PyLong_SHIFT + (bits - 1)) / bits; + assert(sz >= 0); + buffer = PyMem_Malloc(sz); + if (buffer == NULL) { + PyErr_NoMemory(); + return NULL; + } + p = &buffer[sz]; + if (Py_SIZE(a) < 0) + sign = '-'; + + if (Py_SIZE(a) == 0) { + *--p = '0'; + } + else { + /* JRH: special case for power-of-2 bases */ + twodigits accum = 0; + int accumbits = 0; /* # of bits in accum */ + for (i = 0; i < size_a; ++i) { + accum |= (twodigits)a->ob_digit[i] << accumbits; + accumbits += PyLong_SHIFT; + assert(accumbits >= bits); + do { + char cdigit; + cdigit = (char)(accum & (base - 1)); + cdigit += (cdigit < 10) ? '0' : 'a'-10; + assert(p > buffer); + *--p = cdigit; + accumbits -= bits; + accum >>= bits; + } while (i < size_a-1 ? accumbits >= bits : accum > 0); + } + } + + if (base == 16) + *--p = 'x'; + else if (base == 8) + *--p = 'o'; + else /* (base == 2) */ + *--p = 'b'; + *--p = '0'; + if (sign) + *--p = sign; + v = PyUnicode_DecodeASCII(p, &buffer[sz] - p, NULL); + PyMem_Free(buffer); + return v; +} + +/* Table of digit values for 8-bit string -> integer conversion. + * '0' maps to 0, ..., '9' maps to 9. + * 'a' and 'A' map to 10, ..., 'z' and 'Z' map to 35. + * All other indices map to 37. + * Note that when converting a base B string, a char c is a legitimate + * base B digit iff _PyLong_DigitValue[Py_CHARPyLong_MASK(c)] < B. + */ +unsigned char _PyLong_DigitValue[256] = { + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 37, 37, 37, 37, 37, 37, + 37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37, + 37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, +}; + +/* *str points to the first digit in a string of base `base` digits. base + * is a power of 2 (2, 4, 8, 16, or 32). *str is set to point to the first + * non-digit (which may be *str!). A normalized long is returned. + * The point to this routine is that it takes time linear in the number of + * string characters. + */ +static PyLongObject * +long_from_binary_base(char **str, int base) +{ + char *p = *str; + char *start = p; + int bits_per_char; + Py_ssize_t n; + PyLongObject *z; + twodigits accum; + int bits_in_accum; + digit *pdigit; + + assert(base >= 2 && base <= 32 && (base & (base - 1)) == 0); + n = base; + for (bits_per_char = -1; n; ++bits_per_char) + n >>= 1; + /* n <- total # of bits needed, while setting p to end-of-string */ + while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base) + ++p; + *str = p; + /* n <- # of Python digits needed, = ceiling(n/PyLong_SHIFT). */ + n = (p - start) * bits_per_char + PyLong_SHIFT - 1; + if (n / bits_per_char < p - start) { + PyErr_SetString(PyExc_ValueError, + "int string too large to convert"); + return NULL; + } + n = n / PyLong_SHIFT; + z = _PyLong_New(n); + if (z == NULL) + return NULL; + /* Read string from right, and fill in long from left; i.e., + * from least to most significant in both. + */ + accum = 0; + bits_in_accum = 0; + pdigit = z->ob_digit; + while (--p >= start) { + int k = (int)_PyLong_DigitValue[Py_CHARMASK(*p)]; + assert(k >= 0 && k < base); + accum |= (twodigits)k << bits_in_accum; + bits_in_accum += bits_per_char; + if (bits_in_accum >= PyLong_SHIFT) { + *pdigit++ = (digit)(accum & PyLong_MASK); + assert(pdigit - z->ob_digit <= n); + accum >>= PyLong_SHIFT; + bits_in_accum -= PyLong_SHIFT; + assert(bits_in_accum < PyLong_SHIFT); + } + } + if (bits_in_accum) { + assert(bits_in_accum <= PyLong_SHIFT); + *pdigit++ = (digit)accum; + assert(pdigit - z->ob_digit <= n); + } + while (pdigit - z->ob_digit < n) + *pdigit++ = 0; + return long_normalize(z); +} + +PyObject * +PyLong_FromString(char *str, char **pend, int base) +{ + int sign = 1, error_if_nonzero = 0; + char *start, *orig_str = str; + PyLongObject *z = NULL; + PyObject *strobj; + Py_ssize_t slen; + + if ((base != 0 && base < 2) || base > 36) { + PyErr_SetString(PyExc_ValueError, + "int() arg 2 must be >= 2 and <= 36"); + return NULL; + } + while (*str != '\0' && isspace(Py_CHARMASK(*str))) + str++; + if (*str == '+') + ++str; + else if (*str == '-') { + ++str; + sign = -1; + } + if (base == 0) { + if (str[0] != '0') + base = 10; + else if (str[1] == 'x' || str[1] == 'X') + base = 16; + else if (str[1] == 'o' || str[1] == 'O') + base = 8; + else if (str[1] == 'b' || str[1] == 'B') + base = 2; + else { + /* "old" (C-style) octal literal, now invalid. + it might still be zero though */ + error_if_nonzero = 1; + base = 10; + } + } + if (str[0] == '0' && + ((base == 16 && (str[1] == 'x' || str[1] == 'X')) || + (base == 8 && (str[1] == 'o' || str[1] == 'O')) || + (base == 2 && (str[1] == 'b' || str[1] == 'B')))) + str += 2; + + start = str; + if ((base & (base - 1)) == 0) + z = long_from_binary_base(&str, base); + else { +/*** +Binary bases can be converted in time linear in the number of digits, because +Python's representation base is binary. Other bases (including decimal!) use +the simple quadratic-time algorithm below, complicated by some speed tricks. + +First some math: the largest integer that can be expressed in N base-B digits +is B**N-1. Consequently, if we have an N-digit input in base B, the worst- +case number of Python digits needed to hold it is the smallest integer n s.t. + + BASE**n-1 >= B**N-1 [or, adding 1 to both sides] + BASE**n >= B**N [taking logs to base BASE] + n >= log(B**N)/log(BASE) = N * log(B)/log(BASE) + +The static array log_base_BASE[base] == log(base)/log(BASE) so we can compute +this quickly. A Python long with that much space is reserved near the start, +and the result is computed into it. + +The input string is actually treated as being in base base**i (i.e., i digits +are processed at a time), where two more static arrays hold: + + convwidth_base[base] = the largest integer i such that base**i <= BASE + convmultmax_base[base] = base ** convwidth_base[base] + +The first of these is the largest i such that i consecutive input digits +must fit in a single Python digit. The second is effectively the input +base we're really using. + +Viewing the input as a sequence of digits in base +convmultmax_base[base], the result is "simply" + + (((c0*B + c1)*B + c2)*B + c3)*B + ... ))) + c_n-1 + +where B = convmultmax_base[base]. + +Error analysis: as above, the number of Python digits `n` needed is worst- +case + + n >= N * log(B)/log(BASE) + +where `N` is the number of input digits in base `B`. This is computed via + + size_z = (Py_ssize_t)((scan - str) * log_base_BASE[base]) + 1; + +below. Two numeric concerns are how much space this can waste, and whether +the computed result can be too small. To be concrete, assume BASE = 2**15, +which is the default (and it's unlikely anyone changes that). + +Waste isn't a problem: provided the first input digit isn't 0, the difference +between the worst-case input with N digits and the smallest input with N +digits is about a factor of B, but B is small compared to BASE so at most +one allocated Python digit can remain unused on that count. If +N*log(B)/log(BASE) is mathematically an exact integer, then truncating that +and adding 1 returns a result 1 larger than necessary. However, that can't +happen: whenever B is a power of 2, long_from_binary_base() is called +instead, and it's impossible for B**i to be an integer power of 2**15 when +B is not a power of 2 (i.e., it's impossible for N*log(B)/log(BASE) to be +an exact integer when B is not a power of 2, since B**i has a prime factor +other than 2 in that case, but (2**15)**j's only prime factor is 2). + +The computed result can be too small if the true value of N*log(B)/log(BASE) +is a little bit larger than an exact integer, but due to roundoff errors (in +computing log(B), log(BASE), their quotient, and/or multiplying that by N) +yields a numeric result a little less than that integer. Unfortunately, "how +close can a transcendental function get to an integer over some range?" +questions are generally theoretically intractable. Computer analysis via +continued fractions is practical: expand log(B)/log(BASE) via continued +fractions, giving a sequence i/j of "the best" rational approximations. Then +j*log(B)/log(BASE) is approximately equal to (the integer) i. This shows that +we can get very close to being in trouble, but very rarely. For example, +76573 is a denominator in one of the continued-fraction approximations to +log(10)/log(2**15), and indeed: + + >>> log(10)/log(2**15)*76573 + 16958.000000654003 + +is very close to an integer. If we were working with IEEE single-precision, +rounding errors could kill us. Finding worst cases in IEEE double-precision +requires better-than-double-precision log() functions, and Tim didn't bother. +Instead the code checks to see whether the allocated space is enough as each +new Python digit is added, and copies the whole thing to a larger long if not. +This should happen extremely rarely, and in fact I don't have a test case +that triggers it(!). Instead the code was tested by artificially allocating +just 1 digit at the start, so that the copying code was exercised for every +digit beyond the first. +***/ + register twodigits c; /* current input character */ + Py_ssize_t size_z; + int i; + int convwidth; + twodigits convmultmax, convmult; + digit *pz, *pzstop; + char* scan; + + static double log_base_BASE[37] = {0.0e0,}; + static int convwidth_base[37] = {0,}; + static twodigits convmultmax_base[37] = {0,}; + + if (log_base_BASE[base] == 0.0) { + twodigits convmax = base; + int i = 1; + + log_base_BASE[base] = (log((double)base) / + log((double)PyLong_BASE)); + for (;;) { + twodigits next = convmax * base; + if (next > PyLong_BASE) + break; + convmax = next; + ++i; + } + convmultmax_base[base] = convmax; + assert(i > 0); + convwidth_base[base] = i; + } + + /* Find length of the string of numeric characters. */ + scan = str; + while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base) + ++scan; + + /* Create a long object that can contain the largest possible + * integer with this base and length. Note that there's no + * need to initialize z->ob_digit -- no slot is read up before + * being stored into. + */ + size_z = (Py_ssize_t)((scan - str) * log_base_BASE[base]) + 1; + /* Uncomment next line to test exceedingly rare copy code */ + /* size_z = 1; */ + assert(size_z > 0); + z = _PyLong_New(size_z); + if (z == NULL) + return NULL; + Py_SIZE(z) = 0; + + /* `convwidth` consecutive input digits are treated as a single + * digit in base `convmultmax`. + */ + convwidth = convwidth_base[base]; + convmultmax = convmultmax_base[base]; + + /* Work ;-) */ + while (str < scan) { + /* grab up to convwidth digits from the input string */ + c = (digit)_PyLong_DigitValue[Py_CHARMASK(*str++)]; + for (i = 1; i < convwidth && str != scan; ++i, ++str) { + c = (twodigits)(c * base + + (int)_PyLong_DigitValue[Py_CHARMASK(*str)]); + assert(c < PyLong_BASE); + } + + convmult = convmultmax; + /* Calculate the shift only if we couldn't get + * convwidth digits. + */ + if (i != convwidth) { + convmult = base; + for ( ; i > 1; --i) + convmult *= base; + } + + /* Multiply z by convmult, and add c. */ + pz = z->ob_digit; + pzstop = pz + Py_SIZE(z); + for (; pz < pzstop; ++pz) { + c += (twodigits)*pz * convmult; + *pz = (digit)(c & PyLong_MASK); + c >>= PyLong_SHIFT; + } + /* carry off the current end? */ + if (c) { + assert(c < PyLong_BASE); + if (Py_SIZE(z) < size_z) { + *pz = (digit)c; + ++Py_SIZE(z); + } + else { + PyLongObject *tmp; + /* Extremely rare. Get more space. */ + assert(Py_SIZE(z) == size_z); + tmp = _PyLong_New(size_z + 1); + if (tmp == NULL) { + Py_DECREF(z); + return NULL; + } + memcpy(tmp->ob_digit, + z->ob_digit, + sizeof(digit) * size_z); + Py_DECREF(z); + z = tmp; + z->ob_digit[size_z] = (digit)c; + ++size_z; + } + } + } + } + if (z == NULL) + return NULL; + if (error_if_nonzero) { + /* reset the base to 0, else the exception message + doesn't make too much sense */ + base = 0; + if (Py_SIZE(z) != 0) + goto onError; + /* there might still be other problems, therefore base + remains zero here for the same reason */ + } + if (str == start) + goto onError; + if (sign < 0) + Py_SIZE(z) = -(Py_SIZE(z)); + while (*str && isspace(Py_CHARMASK(*str))) + str++; + if (*str != '\0') + goto onError; + if (pend) + *pend = str; + long_normalize(z); + return (PyObject *) maybe_small_long(z); + + onError: + Py_XDECREF(z); + slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200; + strobj = PyUnicode_FromStringAndSize(orig_str, slen); + if (strobj == NULL) + return NULL; + PyErr_Format(PyExc_ValueError, + "invalid literal for int() with base %d: %R", + base, strobj); + Py_DECREF(strobj); + return NULL; +} + +PyObject * +PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base) +{ + PyObject *v, *unicode = PyUnicode_FromUnicode(u, length); + if (unicode == NULL) + return NULL; + v = PyLong_FromUnicodeObject(unicode, base); + Py_DECREF(unicode); + return v; +} + +PyObject * +PyLong_FromUnicodeObject(PyObject *u, int base) +{ + PyObject *result; + PyObject *asciidig; + char *buffer, *end; + Py_ssize_t buflen; + + asciidig = _PyUnicode_TransformDecimalAndSpaceToASCII(u); + if (asciidig == NULL) + return NULL; + buffer = PyUnicode_AsUTF8AndSize(asciidig, &buflen); + if (buffer == NULL) { + Py_DECREF(asciidig); + return NULL; + } + result = PyLong_FromString(buffer, &end, base); + if (result != NULL && end != buffer + buflen) { + PyErr_SetString(PyExc_ValueError, + "null byte in argument for int()"); + Py_DECREF(result); + result = NULL; + } + Py_DECREF(asciidig); + return result; +} + +/* forward */ +static PyLongObject *x_divrem + (PyLongObject *, PyLongObject *, PyLongObject **); +static PyObject *long_long(PyObject *v); + +/* Long division with remainder, top-level routine */ + +static int +long_divrem(PyLongObject *a, PyLongObject *b, + PyLongObject **pdiv, PyLongObject **prem) +{ + Py_ssize_t size_a = ABS(Py_SIZE(a)), size_b = ABS(Py_SIZE(b)); + PyLongObject *z; + + if (size_b == 0) { + PyErr_SetString(PyExc_ZeroDivisionError, + "integer division or modulo by zero"); + return -1; + } + if (size_a < size_b || + (size_a == size_b && + a->ob_digit[size_a-1] < b->ob_digit[size_b-1])) { + /* |a| < |b|. */ + *pdiv = (PyLongObject*)PyLong_FromLong(0); + if (*pdiv == NULL) + return -1; + Py_INCREF(a); + *prem = (PyLongObject *) a; + return 0; + } + if (size_b == 1) { + digit rem = 0; + z = divrem1(a, b->ob_digit[0], &rem); + if (z == NULL) + return -1; + *prem = (PyLongObject *) PyLong_FromLong((long)rem); + if (*prem == NULL) { + Py_DECREF(z); + return -1; + } + } + else { + z = x_divrem(a, b, prem); + if (z == NULL) + return -1; + } + /* Set the signs. + The quotient z has the sign of a*b; + the remainder r has the sign of a, + so a = b*z + r. */ + if ((Py_SIZE(a) < 0) != (Py_SIZE(b) < 0)) + NEGATE(z); + if (Py_SIZE(a) < 0 && Py_SIZE(*prem) != 0) + NEGATE(*prem); + *pdiv = maybe_small_long(z); + return 0; +} + +/* Unsigned long division with remainder -- the algorithm. The arguments v1 + and w1 should satisfy 2 <= ABS(Py_SIZE(w1)) <= ABS(Py_SIZE(v1)). */ + +static PyLongObject * +x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem) +{ + PyLongObject *v, *w, *a; + Py_ssize_t i, k, size_v, size_w; + int d; + digit wm1, wm2, carry, q, r, vtop, *v0, *vk, *w0, *ak; + twodigits vv; + sdigit zhi; + stwodigits z; + + /* We follow Knuth [The Art of Computer Programming, Vol. 2 (3rd + edn.), section 4.3.1, Algorithm D], except that we don't explicitly + handle the special case when the initial estimate q for a quotient + digit is >= PyLong_BASE: the max value for q is PyLong_BASE+1, and + that won't overflow a digit. */ + + /* allocate space; w will also be used to hold the final remainder */ + size_v = ABS(Py_SIZE(v1)); + size_w = ABS(Py_SIZE(w1)); + assert(size_v >= size_w && size_w >= 2); /* Assert checks by div() */ + v = _PyLong_New(size_v+1); + if (v == NULL) { + *prem = NULL; + return NULL; + } + w = _PyLong_New(size_w); + if (w == NULL) { + Py_DECREF(v); + *prem = NULL; + return NULL; + } + + /* normalize: shift w1 left so that its top digit is >= PyLong_BASE/2. + shift v1 left by the same amount. Results go into w and v. */ + d = PyLong_SHIFT - bits_in_digit(w1->ob_digit[size_w-1]); + carry = v_lshift(w->ob_digit, w1->ob_digit, size_w, d); + assert(carry == 0); + carry = v_lshift(v->ob_digit, v1->ob_digit, size_v, d); + if (carry != 0 || v->ob_digit[size_v-1] >= w->ob_digit[size_w-1]) { + v->ob_digit[size_v] = carry; + size_v++; + } + + /* Now v->ob_digit[size_v-1] < w->ob_digit[size_w-1], so quotient has + at most (and usually exactly) k = size_v - size_w digits. */ + k = size_v - size_w; + assert(k >= 0); + a = _PyLong_New(k); + if (a == NULL) { + Py_DECREF(w); + Py_DECREF(v); + *prem = NULL; + return NULL; + } + v0 = v->ob_digit; + w0 = w->ob_digit; + wm1 = w0[size_w-1]; + wm2 = w0[size_w-2]; + for (vk = v0+k, ak = a->ob_digit + k; vk-- > v0;) { + /* inner loop: divide vk[0:size_w+1] by w0[0:size_w], giving + single-digit quotient q, remainder in vk[0:size_w]. */ + + SIGCHECK({ + Py_DECREF(a); + Py_DECREF(w); + Py_DECREF(v); + *prem = NULL; + return NULL; + }); + + /* estimate quotient digit q; may overestimate by 1 (rare) */ + vtop = vk[size_w]; + assert(vtop <= wm1); + vv = ((twodigits)vtop << PyLong_SHIFT) | vk[size_w-1]; + q = (digit)(vv / wm1); + r = (digit)(vv - (twodigits)wm1 * q); /* r = vv % wm1 */ + while ((twodigits)wm2 * q > (((twodigits)r << PyLong_SHIFT) + | vk[size_w-2])) { + --q; + r += wm1; + if (r >= PyLong_BASE) + break; + } + assert(q <= PyLong_BASE); + + /* subtract q*w0[0:size_w] from vk[0:size_w+1] */ + zhi = 0; + for (i = 0; i < size_w; ++i) { + /* invariants: -PyLong_BASE <= -q <= zhi <= 0; + -PyLong_BASE * q <= z < PyLong_BASE */ + z = (sdigit)vk[i] + zhi - + (stwodigits)q * (stwodigits)w0[i]; + vk[i] = (digit)z & PyLong_MASK; + zhi = (sdigit)Py_ARITHMETIC_RIGHT_SHIFT(stwodigits, + z, PyLong_SHIFT); + } + + /* add w back if q was too large (this branch taken rarely) */ + assert((sdigit)vtop + zhi == -1 || (sdigit)vtop + zhi == 0); + if ((sdigit)vtop + zhi < 0) { + carry = 0; + for (i = 0; i < size_w; ++i) { + carry += vk[i] + w0[i]; + vk[i] = carry & PyLong_MASK; + carry >>= PyLong_SHIFT; + } + --q; + } + + /* store quotient digit */ + assert(q < PyLong_BASE); + *--ak = q; + } + + /* unshift remainder; we reuse w to store the result */ + carry = v_rshift(w0, v0, size_w, d); + assert(carry==0); + Py_DECREF(v); + + *prem = long_normalize(w); + return long_normalize(a); +} + +/* For a nonzero PyLong a, express a in the form x * 2**e, with 0.5 <= + abs(x) < 1.0 and e >= 0; return x and put e in *e. Here x is + rounded to DBL_MANT_DIG significant bits using round-half-to-even. + If a == 0, return 0.0 and set *e = 0. If the resulting exponent + e is larger than PY_SSIZE_T_MAX, raise OverflowError and return + -1.0. */ + +/* attempt to define 2.0**DBL_MANT_DIG as a compile-time constant */ +#if DBL_MANT_DIG == 53 +#define EXP2_DBL_MANT_DIG 9007199254740992.0 +#else +#define EXP2_DBL_MANT_DIG (ldexp(1.0, DBL_MANT_DIG)) +#endif + +double +_PyLong_Frexp(PyLongObject *a, Py_ssize_t *e) +{ + Py_ssize_t a_size, a_bits, shift_digits, shift_bits, x_size; + /* See below for why x_digits is always large enough. */ + digit rem, x_digits[2 + (DBL_MANT_DIG + 1) / PyLong_SHIFT]; + double dx; + /* Correction term for round-half-to-even rounding. For a digit x, + "x + half_even_correction[x & 7]" gives x rounded to the nearest + multiple of 4, rounding ties to a multiple of 8. */ + static const int half_even_correction[8] = {0, -1, -2, 1, 0, -1, 2, 1}; + + a_size = ABS(Py_SIZE(a)); + if (a_size == 0) { + /* Special case for 0: significand 0.0, exponent 0. */ + *e = 0; + return 0.0; + } + a_bits = bits_in_digit(a->ob_digit[a_size-1]); + /* The following is an overflow-free version of the check + "if ((a_size - 1) * PyLong_SHIFT + a_bits > PY_SSIZE_T_MAX) ..." */ + if (a_size >= (PY_SSIZE_T_MAX - 1) / PyLong_SHIFT + 1 && + (a_size > (PY_SSIZE_T_MAX - 1) / PyLong_SHIFT + 1 || + a_bits > (PY_SSIZE_T_MAX - 1) % PyLong_SHIFT + 1)) + goto overflow; + a_bits = (a_size - 1) * PyLong_SHIFT + a_bits; + + /* Shift the first DBL_MANT_DIG + 2 bits of a into x_digits[0:x_size] + (shifting left if a_bits <= DBL_MANT_DIG + 2). + + Number of digits needed for result: write // for floor division. + Then if shifting left, we end up using + + 1 + a_size + (DBL_MANT_DIG + 2 - a_bits) // PyLong_SHIFT + + digits. If shifting right, we use + + a_size - (a_bits - DBL_MANT_DIG - 2) // PyLong_SHIFT + + digits. Using a_size = 1 + (a_bits - 1) // PyLong_SHIFT along with + the inequalities + + m // PyLong_SHIFT + n // PyLong_SHIFT <= (m + n) // PyLong_SHIFT + m // PyLong_SHIFT - n // PyLong_SHIFT <= + 1 + (m - n - 1) // PyLong_SHIFT, + + valid for any integers m and n, we find that x_size satisfies + + x_size <= 2 + (DBL_MANT_DIG + 1) // PyLong_SHIFT + + in both cases. + */ + if (a_bits <= DBL_MANT_DIG + 2) { + shift_digits = (DBL_MANT_DIG + 2 - a_bits) / PyLong_SHIFT; + shift_bits = (DBL_MANT_DIG + 2 - a_bits) % PyLong_SHIFT; + x_size = 0; + while (x_size < shift_digits) + x_digits[x_size++] = 0; + rem = v_lshift(x_digits + x_size, a->ob_digit, a_size, + (int)shift_bits); + x_size += a_size; + x_digits[x_size++] = rem; + } + else { + shift_digits = (a_bits - DBL_MANT_DIG - 2) / PyLong_SHIFT; + shift_bits = (a_bits - DBL_MANT_DIG - 2) % PyLong_SHIFT; + rem = v_rshift(x_digits, a->ob_digit + shift_digits, + a_size - shift_digits, (int)shift_bits); + x_size = a_size - shift_digits; + /* For correct rounding below, we need the least significant + bit of x to be 'sticky' for this shift: if any of the bits + shifted out was nonzero, we set the least significant bit + of x. */ + if (rem) + x_digits[0] |= 1; + else + while (shift_digits > 0) + if (a->ob_digit[--shift_digits]) { + x_digits[0] |= 1; + break; + } + } + assert(1 <= x_size && x_size <= (Py_ssize_t)Py_ARRAY_LENGTH(x_digits)); + + /* Round, and convert to double. */ + x_digits[0] += half_even_correction[x_digits[0] & 7]; + dx = x_digits[--x_size]; + while (x_size > 0) + dx = dx * PyLong_BASE + x_digits[--x_size]; + + /* Rescale; make correction if result is 1.0. */ + dx /= 4.0 * EXP2_DBL_MANT_DIG; + if (dx == 1.0) { + if (a_bits == PY_SSIZE_T_MAX) + goto overflow; + dx = 0.5; + a_bits += 1; + } + + *e = a_bits; + return Py_SIZE(a) < 0 ? -dx : dx; + + overflow: + /* exponent > PY_SSIZE_T_MAX */ + PyErr_SetString(PyExc_OverflowError, + "huge integer: number of bits overflows a Py_ssize_t"); + *e = 0; + return -1.0; +} + +/* Get a C double from a long int object. Rounds to the nearest double, + using the round-half-to-even rule in the case of a tie. */ + +double +PyLong_AsDouble(PyObject *v) +{ + Py_ssize_t exponent; + double x; + + if (v == NULL) { + PyErr_BadInternalCall(); + return -1.0; + } + if (!PyLong_Check(v)) { + PyErr_SetString(PyExc_TypeError, "an integer is required"); + return -1.0; + } + x = _PyLong_Frexp((PyLongObject *)v, &exponent); + if ((x == -1.0 && PyErr_Occurred()) || exponent > DBL_MAX_EXP) { + PyErr_SetString(PyExc_OverflowError, + "long int too large to convert to float"); + return -1.0; + } + return ldexp(x, (int)exponent); +} + +/* Methods */ + +static void +long_dealloc(PyObject *v) +{ + Py_TYPE(v)->tp_free(v); +} + +static int +long_compare(PyLongObject *a, PyLongObject *b) +{ + Py_ssize_t sign; + + if (Py_SIZE(a) != Py_SIZE(b)) { + sign = Py_SIZE(a) - Py_SIZE(b); + } + else { + Py_ssize_t i = ABS(Py_SIZE(a)); + while (--i >= 0 && a->ob_digit[i] == b->ob_digit[i]) + ; + if (i < 0) + sign = 0; + else { + sign = (sdigit)a->ob_digit[i] - (sdigit)b->ob_digit[i]; + if (Py_SIZE(a) < 0) + sign = -sign; + } + } + return sign < 0 ? -1 : sign > 0 ? 1 : 0; +} + +#define TEST_COND(cond) \ + ((cond) ? Py_True : Py_False) + +static PyObject * +long_richcompare(PyObject *self, PyObject *other, int op) +{ + int result; + PyObject *v; + CHECK_BINOP(self, other); + if (self == other) + result = 0; + else + result = long_compare((PyLongObject*)self, (PyLongObject*)other); + /* Convert the return value to a Boolean */ + switch (op) { + case Py_EQ: + v = TEST_COND(result == 0); + break; + case Py_NE: + v = TEST_COND(result != 0); + break; + case Py_LE: + v = TEST_COND(result <= 0); + break; + case Py_GE: + v = TEST_COND(result >= 0); + break; + case Py_LT: + v = TEST_COND(result == -1); + break; + case Py_GT: + v = TEST_COND(result == 1); + break; + default: + PyErr_BadArgument(); + return NULL; + } + Py_INCREF(v); + return v; +} + +static Py_hash_t +long_hash(PyLongObject *v) +{ + Py_uhash_t x; + Py_ssize_t i; + int sign; + + i = Py_SIZE(v); + switch(i) { + case -1: return v->ob_digit[0]==1 ? -2 : -(sdigit)v->ob_digit[0]; + case 0: return 0; + case 1: return v->ob_digit[0]; + } + sign = 1; + x = 0; + if (i < 0) { + sign = -1; + i = -(i); + } + while (--i >= 0) { + /* Here x is a quantity in the range [0, _PyHASH_MODULUS); we + want to compute x * 2**PyLong_SHIFT + v->ob_digit[i] modulo + _PyHASH_MODULUS. + + The computation of x * 2**PyLong_SHIFT % _PyHASH_MODULUS + amounts to a rotation of the bits of x. To see this, write + + x * 2**PyLong_SHIFT = y * 2**_PyHASH_BITS + z + + where y = x >> (_PyHASH_BITS - PyLong_SHIFT) gives the top + PyLong_SHIFT bits of x (those that are shifted out of the + original _PyHASH_BITS bits, and z = (x << PyLong_SHIFT) & + _PyHASH_MODULUS gives the bottom _PyHASH_BITS - PyLong_SHIFT + bits of x, shifted up. Then since 2**_PyHASH_BITS is + congruent to 1 modulo _PyHASH_MODULUS, y*2**_PyHASH_BITS is + congruent to y modulo _PyHASH_MODULUS. So + + x * 2**PyLong_SHIFT = y + z (mod _PyHASH_MODULUS). + + The right-hand side is just the result of rotating the + _PyHASH_BITS bits of x left by PyLong_SHIFT places; since + not all _PyHASH_BITS bits of x are 1s, the same is true + after rotation, so 0 <= y+z < _PyHASH_MODULUS and y + z is + the reduction of x*2**PyLong_SHIFT modulo + _PyHASH_MODULUS. */ + x = ((x << PyLong_SHIFT) & _PyHASH_MODULUS) | + (x >> (_PyHASH_BITS - PyLong_SHIFT)); + x += v->ob_digit[i]; + if (x >= _PyHASH_MODULUS) + x -= _PyHASH_MODULUS; + } + x = x * sign; + if (x == (Py_uhash_t)-1) + x = (Py_uhash_t)-2; + return (Py_hash_t)x; +} + + +/* Add the absolute values of two long integers. */ + +static PyLongObject * +x_add(PyLongObject *a, PyLongObject *b) +{ + Py_ssize_t size_a = ABS(Py_SIZE(a)), size_b = ABS(Py_SIZE(b)); + PyLongObject *z; + Py_ssize_t i; + digit carry = 0; + + /* Ensure a is the larger of the two: */ + if (size_a < size_b) { + { PyLongObject *temp = a; a = b; b = temp; } + { Py_ssize_t size_temp = size_a; + size_a = size_b; + size_b = size_temp; } + } + z = _PyLong_New(size_a+1); + if (z == NULL) + return NULL; + for (i = 0; i < size_b; ++i) { + carry += a->ob_digit[i] + b->ob_digit[i]; + z->ob_digit[i] = carry & PyLong_MASK; + carry >>= PyLong_SHIFT; + } + for (; i < size_a; ++i) { + carry += a->ob_digit[i]; + z->ob_digit[i] = carry & PyLong_MASK; + carry >>= PyLong_SHIFT; + } + z->ob_digit[i] = carry; + return long_normalize(z); +} + +/* Subtract the absolute values of two integers. */ + +static PyLongObject * +x_sub(PyLongObject *a, PyLongObject *b) +{ + Py_ssize_t size_a = ABS(Py_SIZE(a)), size_b = ABS(Py_SIZE(b)); + PyLongObject *z; + Py_ssize_t i; + int sign = 1; + digit borrow = 0; + + /* Ensure a is the larger of the two: */ + if (size_a < size_b) { + sign = -1; + { PyLongObject *temp = a; a = b; b = temp; } + { Py_ssize_t size_temp = size_a; + size_a = size_b; + size_b = size_temp; } + } + else if (size_a == size_b) { + /* Find highest digit where a and b differ: */ + i = size_a; + while (--i >= 0 && a->ob_digit[i] == b->ob_digit[i]) + ; + if (i < 0) + return (PyLongObject *)PyLong_FromLong(0); + if (a->ob_digit[i] < b->ob_digit[i]) { + sign = -1; + { PyLongObject *temp = a; a = b; b = temp; } + } + size_a = size_b = i+1; + } + z = _PyLong_New(size_a); + if (z == NULL) + return NULL; + for (i = 0; i < size_b; ++i) { + /* The following assumes unsigned arithmetic + works module 2**N for some N>PyLong_SHIFT. */ + borrow = a->ob_digit[i] - b->ob_digit[i] - borrow; + z->ob_digit[i] = borrow & PyLong_MASK; + borrow >>= PyLong_SHIFT; + borrow &= 1; /* Keep only one sign bit */ + } + for (; i < size_a; ++i) { + borrow = a->ob_digit[i] - borrow; + z->ob_digit[i] = borrow & PyLong_MASK; + borrow >>= PyLong_SHIFT; + borrow &= 1; /* Keep only one sign bit */ + } + assert(borrow == 0); + if (sign < 0) + NEGATE(z); + return long_normalize(z); +} + +static PyObject * +long_add(PyLongObject *a, PyLongObject *b) +{ + PyLongObject *z; + + CHECK_BINOP(a, b); + + if (ABS(Py_SIZE(a)) <= 1 && ABS(Py_SIZE(b)) <= 1) { + PyObject *result = PyLong_FromLong(MEDIUM_VALUE(a) + + MEDIUM_VALUE(b)); + return result; + } + if (Py_SIZE(a) < 0) { + if (Py_SIZE(b) < 0) { + z = x_add(a, b); + if (z != NULL && Py_SIZE(z) != 0) + Py_SIZE(z) = -(Py_SIZE(z)); + } + else + z = x_sub(b, a); + } + else { + if (Py_SIZE(b) < 0) + z = x_sub(a, b); + else + z = x_add(a, b); + } + return (PyObject *)z; +} + +static PyObject * +long_sub(PyLongObject *a, PyLongObject *b) +{ + PyLongObject *z; + + CHECK_BINOP(a, b); + + if (ABS(Py_SIZE(a)) <= 1 && ABS(Py_SIZE(b)) <= 1) { + PyObject* r; + r = PyLong_FromLong(MEDIUM_VALUE(a)-MEDIUM_VALUE(b)); + return r; + } + if (Py_SIZE(a) < 0) { + if (Py_SIZE(b) < 0) + z = x_sub(a, b); + else + z = x_add(a, b); + if (z != NULL && Py_SIZE(z) != 0) + Py_SIZE(z) = -(Py_SIZE(z)); + } + else { + if (Py_SIZE(b) < 0) + z = x_add(a, b); + else + z = x_sub(a, b); + } + return (PyObject *)z; +} + +/* Grade school multiplication, ignoring the signs. + * Returns the absolute value of the product, or NULL if error. + */ +static PyLongObject * +x_mul(PyLongObject *a, PyLongObject *b) +{ + PyLongObject *z; + Py_ssize_t size_a = ABS(Py_SIZE(a)); + Py_ssize_t size_b = ABS(Py_SIZE(b)); + Py_ssize_t i; + + z = _PyLong_New(size_a + size_b); + if (z == NULL) + return NULL; + + memset(z->ob_digit, 0, Py_SIZE(z) * sizeof(digit)); + if (a == b) { + /* Efficient squaring per HAC, Algorithm 14.16: + * http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf + * Gives slightly less than a 2x speedup when a == b, + * via exploiting that each entry in the multiplication + * pyramid appears twice (except for the size_a squares). + */ + for (i = 0; i < size_a; ++i) { + twodigits carry; + twodigits f = a->ob_digit[i]; + digit *pz = z->ob_digit + (i << 1); + digit *pa = a->ob_digit + i + 1; + digit *paend = a->ob_digit + size_a; + + SIGCHECK({ + Py_DECREF(z); + return NULL; + }); + + carry = *pz + f * f; + *pz++ = (digit)(carry & PyLong_MASK); + carry >>= PyLong_SHIFT; + assert(carry <= PyLong_MASK); + + /* Now f is added in twice in each column of the + * pyramid it appears. Same as adding f<<1 once. + */ + f <<= 1; + while (pa < paend) { + carry += *pz + *pa++ * f; + *pz++ = (digit)(carry & PyLong_MASK); + carry >>= PyLong_SHIFT; + assert(carry <= (PyLong_MASK << 1)); + } + if (carry) { + carry += *pz; + *pz++ = (digit)(carry & PyLong_MASK); + carry >>= PyLong_SHIFT; + } + if (carry) + *pz += (digit)(carry & PyLong_MASK); + assert((carry >> PyLong_SHIFT) == 0); + } + } + else { /* a is not the same as b -- gradeschool long mult */ + for (i = 0; i < size_a; ++i) { + twodigits carry = 0; + twodigits f = a->ob_digit[i]; + digit *pz = z->ob_digit + i; + digit *pb = b->ob_digit; + digit *pbend = b->ob_digit + size_b; + + SIGCHECK({ + Py_DECREF(z); + return NULL; + }); + + while (pb < pbend) { + carry += *pz + *pb++ * f; + *pz++ = (digit)(carry & PyLong_MASK); + carry >>= PyLong_SHIFT; + assert(carry <= PyLong_MASK); + } + if (carry) + *pz += (digit)(carry & PyLong_MASK); + assert((carry >> PyLong_SHIFT) == 0); + } + } + return long_normalize(z); +} + +/* A helper for Karatsuba multiplication (k_mul). + Takes a long "n" and an integer "size" representing the place to + split, and sets low and high such that abs(n) == (high << size) + low, + viewing the shift as being by digits. The sign bit is ignored, and + the return values are >= 0. + Returns 0 on success, -1 on failure. +*/ +static int +kmul_split(PyLongObject *n, + Py_ssize_t size, + PyLongObject **high, + PyLongObject **low) +{ + PyLongObject *hi, *lo; + Py_ssize_t size_lo, size_hi; + const Py_ssize_t size_n = ABS(Py_SIZE(n)); + + size_lo = MIN(size_n, size); + size_hi = size_n - size_lo; + + if ((hi = _PyLong_New(size_hi)) == NULL) + return -1; + if ((lo = _PyLong_New(size_lo)) == NULL) { + Py_DECREF(hi); + return -1; + } + + memcpy(lo->ob_digit, n->ob_digit, size_lo * sizeof(digit)); + memcpy(hi->ob_digit, n->ob_digit + size_lo, size_hi * sizeof(digit)); + + *high = long_normalize(hi); + *low = long_normalize(lo); + return 0; +} + +static PyLongObject *k_lopsided_mul(PyLongObject *a, PyLongObject *b); + +/* Karatsuba multiplication. Ignores the input signs, and returns the + * absolute value of the product (or NULL if error). + * See Knuth Vol. 2 Chapter 4.3.3 (Pp. 294-295). + */ +static PyLongObject * +k_mul(PyLongObject *a, PyLongObject *b) +{ + Py_ssize_t asize = ABS(Py_SIZE(a)); + Py_ssize_t bsize = ABS(Py_SIZE(b)); + PyLongObject *ah = NULL; + PyLongObject *al = NULL; + PyLongObject *bh = NULL; + PyLongObject *bl = NULL; + PyLongObject *ret = NULL; + PyLongObject *t1, *t2, *t3; + Py_ssize_t shift; /* the number of digits we split off */ + Py_ssize_t i; + + /* (ah*X+al)(bh*X+bl) = ah*bh*X*X + (ah*bl + al*bh)*X + al*bl + * Let k = (ah+al)*(bh+bl) = ah*bl + al*bh + ah*bh + al*bl + * Then the original product is + * ah*bh*X*X + (k - ah*bh - al*bl)*X + al*bl + * By picking X to be a power of 2, "*X" is just shifting, and it's + * been reduced to 3 multiplies on numbers half the size. + */ + + /* We want to split based on the larger number; fiddle so that b + * is largest. + */ + if (asize > bsize) { + t1 = a; + a = b; + b = t1; + + i = asize; + asize = bsize; + bsize = i; + } + + /* Use gradeschool math when either number is too small. */ + i = a == b ? KARATSUBA_SQUARE_CUTOFF : KARATSUBA_CUTOFF; + if (asize <= i) { + if (asize == 0) + return (PyLongObject *)PyLong_FromLong(0); + else + return x_mul(a, b); + } + + /* If a is small compared to b, splitting on b gives a degenerate + * case with ah==0, and Karatsuba may be (even much) less efficient + * than "grade school" then. However, we can still win, by viewing + * b as a string of "big digits", each of width a->ob_size. That + * leads to a sequence of balanced calls to k_mul. + */ + if (2 * asize <= bsize) + return k_lopsided_mul(a, b); + + /* Split a & b into hi & lo pieces. */ + shift = bsize >> 1; + if (kmul_split(a, shift, &ah, &al) < 0) goto fail; + assert(Py_SIZE(ah) > 0); /* the split isn't degenerate */ + + if (a == b) { + bh = ah; + bl = al; + Py_INCREF(bh); + Py_INCREF(bl); + } + else if (kmul_split(b, shift, &bh, &bl) < 0) goto fail; + + /* The plan: + * 1. Allocate result space (asize + bsize digits: that's always + * enough). + * 2. Compute ah*bh, and copy into result at 2*shift. + * 3. Compute al*bl, and copy into result at 0. Note that this + * can't overlap with #2. + * 4. Subtract al*bl from the result, starting at shift. This may + * underflow (borrow out of the high digit), but we don't care: + * we're effectively doing unsigned arithmetic mod + * BASE**(sizea + sizeb), and so long as the *final* result fits, + * borrows and carries out of the high digit can be ignored. + * 5. Subtract ah*bh from the result, starting at shift. + * 6. Compute (ah+al)*(bh+bl), and add it into the result starting + * at shift. + */ + + /* 1. Allocate result space. */ + ret = _PyLong_New(asize + bsize); + if (ret == NULL) goto fail; +#ifdef Py_DEBUG + /* Fill with trash, to catch reference to uninitialized digits. */ + memset(ret->ob_digit, 0xDF, Py_SIZE(ret) * sizeof(digit)); +#endif + + /* 2. t1 <- ah*bh, and copy into high digits of result. */ + if ((t1 = k_mul(ah, bh)) == NULL) goto fail; + assert(Py_SIZE(t1) >= 0); + assert(2*shift + Py_SIZE(t1) <= Py_SIZE(ret)); + memcpy(ret->ob_digit + 2*shift, t1->ob_digit, + Py_SIZE(t1) * sizeof(digit)); + + /* Zero-out the digits higher than the ah*bh copy. */ + i = Py_SIZE(ret) - 2*shift - Py_SIZE(t1); + if (i) + memset(ret->ob_digit + 2*shift + Py_SIZE(t1), 0, + i * sizeof(digit)); + + /* 3. t2 <- al*bl, and copy into the low digits. */ + if ((t2 = k_mul(al, bl)) == NULL) { + Py_DECREF(t1); + goto fail; + } + assert(Py_SIZE(t2) >= 0); + assert(Py_SIZE(t2) <= 2*shift); /* no overlap with high digits */ + memcpy(ret->ob_digit, t2->ob_digit, Py_SIZE(t2) * sizeof(digit)); + + /* Zero out remaining digits. */ + i = 2*shift - Py_SIZE(t2); /* number of uninitialized digits */ + if (i) + memset(ret->ob_digit + Py_SIZE(t2), 0, i * sizeof(digit)); + + /* 4 & 5. Subtract ah*bh (t1) and al*bl (t2). We do al*bl first + * because it's fresher in cache. + */ + i = Py_SIZE(ret) - shift; /* # digits after shift */ + (void)v_isub(ret->ob_digit + shift, i, t2->ob_digit, Py_SIZE(t2)); + Py_DECREF(t2); + + (void)v_isub(ret->ob_digit + shift, i, t1->ob_digit, Py_SIZE(t1)); + Py_DECREF(t1); + + /* 6. t3 <- (ah+al)(bh+bl), and add into result. */ + if ((t1 = x_add(ah, al)) == NULL) goto fail; + Py_DECREF(ah); + Py_DECREF(al); + ah = al = NULL; + + if (a == b) { + t2 = t1; + Py_INCREF(t2); + } + else if ((t2 = x_add(bh, bl)) == NULL) { + Py_DECREF(t1); + goto fail; + } + Py_DECREF(bh); + Py_DECREF(bl); + bh = bl = NULL; + + t3 = k_mul(t1, t2); + Py_DECREF(t1); + Py_DECREF(t2); + if (t3 == NULL) goto fail; + assert(Py_SIZE(t3) >= 0); + + /* Add t3. It's not obvious why we can't run out of room here. + * See the (*) comment after this function. + */ + (void)v_iadd(ret->ob_digit + shift, i, t3->ob_digit, Py_SIZE(t3)); + Py_DECREF(t3); + + return long_normalize(ret); + + fail: + Py_XDECREF(ret); + Py_XDECREF(ah); + Py_XDECREF(al); + Py_XDECREF(bh); + Py_XDECREF(bl); + return NULL; +} + +/* (*) Why adding t3 can't "run out of room" above. + +Let f(x) mean the floor of x and c(x) mean the ceiling of x. Some facts +to start with: + +1. For any integer i, i = c(i/2) + f(i/2). In particular, + bsize = c(bsize/2) + f(bsize/2). +2. shift = f(bsize/2) +3. asize <= bsize +4. Since we call k_lopsided_mul if asize*2 <= bsize, asize*2 > bsize in this + routine, so asize > bsize/2 >= f(bsize/2) in this routine. + +We allocated asize + bsize result digits, and add t3 into them at an offset +of shift. This leaves asize+bsize-shift allocated digit positions for t3 +to fit into, = (by #1 and #2) asize + f(bsize/2) + c(bsize/2) - f(bsize/2) = +asize + c(bsize/2) available digit positions. + +bh has c(bsize/2) digits, and bl at most f(size/2) digits. So bh+hl has +at most c(bsize/2) digits + 1 bit. + +If asize == bsize, ah has c(bsize/2) digits, else ah has at most f(bsize/2) +digits, and al has at most f(bsize/2) digits in any case. So ah+al has at +most (asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 1 bit. + +The product (ah+al)*(bh+bl) therefore has at most + + c(bsize/2) + (asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 2 bits + +and we have asize + c(bsize/2) available digit positions. We need to show +this is always enough. An instance of c(bsize/2) cancels out in both, so +the question reduces to whether asize digits is enough to hold +(asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 2 bits. If asize < bsize, +then we're asking whether asize digits >= f(bsize/2) digits + 2 bits. By #4, +asize is at least f(bsize/2)+1 digits, so this in turn reduces to whether 1 +digit is enough to hold 2 bits. This is so since PyLong_SHIFT=15 >= 2. If +asize == bsize, then we're asking whether bsize digits is enough to hold +c(bsize/2) digits + 2 bits, or equivalently (by #1) whether f(bsize/2) digits +is enough to hold 2 bits. This is so if bsize >= 2, which holds because +bsize >= KARATSUBA_CUTOFF >= 2. + +Note that since there's always enough room for (ah+al)*(bh+bl), and that's +clearly >= each of ah*bh and al*bl, there's always enough room to subtract +ah*bh and al*bl too. +*/ + +/* b has at least twice the digits of a, and a is big enough that Karatsuba + * would pay off *if* the inputs had balanced sizes. View b as a sequence + * of slices, each with a->ob_size digits, and multiply the slices by a, + * one at a time. This gives k_mul balanced inputs to work with, and is + * also cache-friendly (we compute one double-width slice of the result + * at a time, then move on, never backtracking except for the helpful + * single-width slice overlap between successive partial sums). + */ +static PyLongObject * +k_lopsided_mul(PyLongObject *a, PyLongObject *b) +{ + const Py_ssize_t asize = ABS(Py_SIZE(a)); + Py_ssize_t bsize = ABS(Py_SIZE(b)); + Py_ssize_t nbdone; /* # of b digits already multiplied */ + PyLongObject *ret; + PyLongObject *bslice = NULL; + + assert(asize > KARATSUBA_CUTOFF); + assert(2 * asize <= bsize); + + /* Allocate result space, and zero it out. */ + ret = _PyLong_New(asize + bsize); + if (ret == NULL) + return NULL; + memset(ret->ob_digit, 0, Py_SIZE(ret) * sizeof(digit)); + + /* Successive slices of b are copied into bslice. */ + bslice = _PyLong_New(asize); + if (bslice == NULL) + goto fail; + + nbdone = 0; + while (bsize > 0) { + PyLongObject *product; + const Py_ssize_t nbtouse = MIN(bsize, asize); + + /* Multiply the next slice of b by a. */ + memcpy(bslice->ob_digit, b->ob_digit + nbdone, + nbtouse * sizeof(digit)); + Py_SIZE(bslice) = nbtouse; + product = k_mul(a, bslice); + if (product == NULL) + goto fail; + + /* Add into result. */ + (void)v_iadd(ret->ob_digit + nbdone, Py_SIZE(ret) - nbdone, + product->ob_digit, Py_SIZE(product)); + Py_DECREF(product); + + bsize -= nbtouse; + nbdone += nbtouse; + } + + Py_DECREF(bslice); + return long_normalize(ret); + + fail: + Py_DECREF(ret); + Py_XDECREF(bslice); + return NULL; +} + +static PyObject * +long_mul(PyLongObject *a, PyLongObject *b) +{ + PyLongObject *z; + + CHECK_BINOP(a, b); + + /* fast path for single-digit multiplication */ + if (ABS(Py_SIZE(a)) <= 1 && ABS(Py_SIZE(b)) <= 1) { + stwodigits v = (stwodigits)(MEDIUM_VALUE(a)) * MEDIUM_VALUE(b); +#ifdef HAVE_LONG_LONG + return PyLong_FromLongLong((PY_LONG_LONG)v); +#else + /* if we don't have long long then we're almost certainly + using 15-bit digits, so v will fit in a long. In the + unlikely event that we're using 30-bit digits on a platform + without long long, a large v will just cause us to fall + through to the general multiplication code below. */ + if (v >= LONG_MIN && v <= LONG_MAX) + return PyLong_FromLong((long)v); +#endif + } + + z = k_mul(a, b); + /* Negate if exactly one of the inputs is negative. */ + if (((Py_SIZE(a) ^ Py_SIZE(b)) < 0) && z) + NEGATE(z); + return (PyObject *)z; +} + +/* The / and % operators are now defined in terms of divmod(). + The expression a mod b has the value a - b*floor(a/b). + The long_divrem function gives the remainder after division of + |a| by |b|, with the sign of a. This is also expressed + as a - b*trunc(a/b), if trunc truncates towards zero. + Some examples: + a b a rem b a mod b + 13 10 3 3 + -13 10 -3 7 + 13 -10 3 -7 + -13 -10 -3 -3 + So, to get from rem to mod, we have to add b if a and b + have different signs. We then subtract one from the 'div' + part of the outcome to keep the invariant intact. */ + +/* Compute + * *pdiv, *pmod = divmod(v, w) + * NULL can be passed for pdiv or pmod, in which case that part of + * the result is simply thrown away. The caller owns a reference to + * each of these it requests (does not pass NULL for). + */ +static int +l_divmod(PyLongObject *v, PyLongObject *w, + PyLongObject **pdiv, PyLongObject **pmod) +{ + PyLongObject *div, *mod; + + if (long_divrem(v, w, &div, &mod) < 0) + return -1; + if ((Py_SIZE(mod) < 0 && Py_SIZE(w) > 0) || + (Py_SIZE(mod) > 0 && Py_SIZE(w) < 0)) { + PyLongObject *temp; + PyLongObject *one; + temp = (PyLongObject *) long_add(mod, w); + Py_DECREF(mod); + mod = temp; + if (mod == NULL) { + Py_DECREF(div); + return -1; + } + one = (PyLongObject *) PyLong_FromLong(1L); + if (one == NULL || + (temp = (PyLongObject *) long_sub(div, one)) == NULL) { + Py_DECREF(mod); + Py_DECREF(div); + Py_XDECREF(one); + return -1; + } + Py_DECREF(one); + Py_DECREF(div); + div = temp; + } + if (pdiv != NULL) + *pdiv = div; + else + Py_DECREF(div); + + if (pmod != NULL) + *pmod = mod; + else + Py_DECREF(mod); + + return 0; +} + +static PyObject * +long_div(PyObject *a, PyObject *b) +{ + PyLongObject *div; + + CHECK_BINOP(a, b); + if (l_divmod((PyLongObject*)a, (PyLongObject*)b, &div, NULL) < 0) + div = NULL; + return (PyObject *)div; +} + +/* PyLong/PyLong -> float, with correctly rounded result. */ + +#define MANT_DIG_DIGITS (DBL_MANT_DIG / PyLong_SHIFT) +#define MANT_DIG_BITS (DBL_MANT_DIG % PyLong_SHIFT) + +static PyObject * +long_true_divide(PyObject *v, PyObject *w) +{ + PyLongObject *a, *b, *x; + Py_ssize_t a_size, b_size, shift, extra_bits, diff, x_size, x_bits; + digit mask, low; + int inexact, negate, a_is_small, b_is_small; + double dx, result; + + CHECK_BINOP(v, w); + a = (PyLongObject *)v; + b = (PyLongObject *)w; + + /* + Method in a nutshell: + + 0. reduce to case a, b > 0; filter out obvious underflow/overflow + 1. choose a suitable integer 'shift' + 2. use integer arithmetic to compute x = floor(2**-shift*a/b) + 3. adjust x for correct rounding + 4. convert x to a double dx with the same value + 5. return ldexp(dx, shift). + + In more detail: + + 0. For any a, a/0 raises ZeroDivisionError; for nonzero b, 0/b + returns either 0.0 or -0.0, depending on the sign of b. For a and + b both nonzero, ignore signs of a and b, and add the sign back in + at the end. Now write a_bits and b_bits for the bit lengths of a + and b respectively (that is, a_bits = 1 + floor(log_2(a)); likewise + for b). Then + + 2**(a_bits - b_bits - 1) < a/b < 2**(a_bits - b_bits + 1). + + So if a_bits - b_bits > DBL_MAX_EXP then a/b > 2**DBL_MAX_EXP and + so overflows. Similarly, if a_bits - b_bits < DBL_MIN_EXP - + DBL_MANT_DIG - 1 then a/b underflows to 0. With these cases out of + the way, we can assume that + + DBL_MIN_EXP - DBL_MANT_DIG - 1 <= a_bits - b_bits <= DBL_MAX_EXP. + + 1. The integer 'shift' is chosen so that x has the right number of + bits for a double, plus two or three extra bits that will be used + in the rounding decisions. Writing a_bits and b_bits for the + number of significant bits in a and b respectively, a + straightforward formula for shift is: + + shift = a_bits - b_bits - DBL_MANT_DIG - 2 + + This is fine in the usual case, but if a/b is smaller than the + smallest normal float then it can lead to double rounding on an + IEEE 754 platform, giving incorrectly rounded results. So we + adjust the formula slightly. The actual formula used is: + + shift = MAX(a_bits - b_bits, DBL_MIN_EXP) - DBL_MANT_DIG - 2 + + 2. The quantity x is computed by first shifting a (left -shift bits + if shift <= 0, right shift bits if shift > 0) and then dividing by + b. For both the shift and the division, we keep track of whether + the result is inexact, in a flag 'inexact'; this information is + needed at the rounding stage. + + With the choice of shift above, together with our assumption that + a_bits - b_bits >= DBL_MIN_EXP - DBL_MANT_DIG - 1, it follows + that x >= 1. + + 3. Now x * 2**shift <= a/b < (x+1) * 2**shift. We want to replace + this with an exactly representable float of the form + + round(x/2**extra_bits) * 2**(extra_bits+shift). + + For float representability, we need x/2**extra_bits < + 2**DBL_MANT_DIG and extra_bits + shift >= DBL_MIN_EXP - + DBL_MANT_DIG. This translates to the condition: + + extra_bits >= MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG + + To round, we just modify the bottom digit of x in-place; this can + end up giving a digit with value > PyLONG_MASK, but that's not a + problem since digits can hold values up to 2*PyLONG_MASK+1. + + With the original choices for shift above, extra_bits will always + be 2 or 3. Then rounding under the round-half-to-even rule, we + round up iff the most significant of the extra bits is 1, and + either: (a) the computation of x in step 2 had an inexact result, + or (b) at least one other of the extra bits is 1, or (c) the least + significant bit of x (above those to be rounded) is 1. + + 4. Conversion to a double is straightforward; all floating-point + operations involved in the conversion are exact, so there's no + danger of rounding errors. + + 5. Use ldexp(x, shift) to compute x*2**shift, the final result. + The result will always be exactly representable as a double, except + in the case that it overflows. To avoid dependence on the exact + behaviour of ldexp on overflow, we check for overflow before + applying ldexp. The result of ldexp is adjusted for sign before + returning. + */ + + /* Reduce to case where a and b are both positive. */ + a_size = ABS(Py_SIZE(a)); + b_size = ABS(Py_SIZE(b)); + negate = (Py_SIZE(a) < 0) ^ (Py_SIZE(b) < 0); + if (b_size == 0) { + PyErr_SetString(PyExc_ZeroDivisionError, + "division by zero"); + goto error; + } + if (a_size == 0) + goto underflow_or_zero; + + /* Fast path for a and b small (exactly representable in a double). + Relies on floating-point division being correctly rounded; results + may be subject to double rounding on x86 machines that operate with + the x87 FPU set to 64-bit precision. */ + a_is_small = a_size <= MANT_DIG_DIGITS || + (a_size == MANT_DIG_DIGITS+1 && + a->ob_digit[MANT_DIG_DIGITS] >> MANT_DIG_BITS == 0); + b_is_small = b_size <= MANT_DIG_DIGITS || + (b_size == MANT_DIG_DIGITS+1 && + b->ob_digit[MANT_DIG_DIGITS] >> MANT_DIG_BITS == 0); + if (a_is_small && b_is_small) { + double da, db; + da = a->ob_digit[--a_size]; + while (a_size > 0) + da = da * PyLong_BASE + a->ob_digit[--a_size]; + db = b->ob_digit[--b_size]; + while (b_size > 0) + db = db * PyLong_BASE + b->ob_digit[--b_size]; + result = da / db; + goto success; + } + + /* Catch obvious cases of underflow and overflow */ + diff = a_size - b_size; + if (diff > PY_SSIZE_T_MAX/PyLong_SHIFT - 1) + /* Extreme overflow */ + goto overflow; + else if (diff < 1 - PY_SSIZE_T_MAX/PyLong_SHIFT) + /* Extreme underflow */ + goto underflow_or_zero; + /* Next line is now safe from overflowing a Py_ssize_t */ + diff = diff * PyLong_SHIFT + bits_in_digit(a->ob_digit[a_size - 1]) - + bits_in_digit(b->ob_digit[b_size - 1]); + /* Now diff = a_bits - b_bits. */ + if (diff > DBL_MAX_EXP) + goto overflow; + else if (diff < DBL_MIN_EXP - DBL_MANT_DIG - 1) + goto underflow_or_zero; + + /* Choose value for shift; see comments for step 1 above. */ + shift = MAX(diff, DBL_MIN_EXP) - DBL_MANT_DIG - 2; + + inexact = 0; + + /* x = abs(a * 2**-shift) */ + if (shift <= 0) { + Py_ssize_t i, shift_digits = -shift / PyLong_SHIFT; + digit rem; + /* x = a << -shift */ + if (a_size >= PY_SSIZE_T_MAX - 1 - shift_digits) { + /* In practice, it's probably impossible to end up + here. Both a and b would have to be enormous, + using close to SIZE_T_MAX bytes of memory each. */ + PyErr_SetString(PyExc_OverflowError, + "intermediate overflow during division"); + goto error; + } + x = _PyLong_New(a_size + shift_digits + 1); + if (x == NULL) + goto error; + for (i = 0; i < shift_digits; i++) + x->ob_digit[i] = 0; + rem = v_lshift(x->ob_digit + shift_digits, a->ob_digit, + a_size, -shift % PyLong_SHIFT); + x->ob_digit[a_size + shift_digits] = rem; + } + else { + Py_ssize_t shift_digits = shift / PyLong_SHIFT; + digit rem; + /* x = a >> shift */ + assert(a_size >= shift_digits); + x = _PyLong_New(a_size - shift_digits); + if (x == NULL) + goto error; + rem = v_rshift(x->ob_digit, a->ob_digit + shift_digits, + a_size - shift_digits, shift % PyLong_SHIFT); + /* set inexact if any of the bits shifted out is nonzero */ + if (rem) + inexact = 1; + while (!inexact && shift_digits > 0) + if (a->ob_digit[--shift_digits]) + inexact = 1; + } + long_normalize(x); + x_size = Py_SIZE(x); + + /* x //= b. If the remainder is nonzero, set inexact. We own the only + reference to x, so it's safe to modify it in-place. */ + if (b_size == 1) { + digit rem = inplace_divrem1(x->ob_digit, x->ob_digit, x_size, + b->ob_digit[0]); + long_normalize(x); + if (rem) + inexact = 1; + } + else { + PyLongObject *div, *rem; + div = x_divrem(x, b, &rem); + Py_DECREF(x); + x = div; + if (x == NULL) + goto error; + if (Py_SIZE(rem)) + inexact = 1; + Py_DECREF(rem); + } + x_size = ABS(Py_SIZE(x)); + assert(x_size > 0); /* result of division is never zero */ + x_bits = (x_size-1)*PyLong_SHIFT+bits_in_digit(x->ob_digit[x_size-1]); + + /* The number of extra bits that have to be rounded away. */ + extra_bits = MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG; + assert(extra_bits == 2 || extra_bits == 3); + + /* Round by directly modifying the low digit of x. */ + mask = (digit)1 << (extra_bits - 1); + low = x->ob_digit[0] | inexact; + if (low & mask && low & (3*mask-1)) + low += mask; + x->ob_digit[0] = low & ~(mask-1U); + + /* Convert x to a double dx; the conversion is exact. */ + dx = x->ob_digit[--x_size]; + while (x_size > 0) + dx = dx * PyLong_BASE + x->ob_digit[--x_size]; + Py_DECREF(x); + + /* Check whether ldexp result will overflow a double. */ + if (shift + x_bits >= DBL_MAX_EXP && + (shift + x_bits > DBL_MAX_EXP || dx == ldexp(1.0, (int)x_bits))) + goto overflow; + result = ldexp(dx, (int)shift); + + success: + return PyFloat_FromDouble(negate ? -result : result); + + underflow_or_zero: + return PyFloat_FromDouble(negate ? -0.0 : 0.0); + + overflow: + PyErr_SetString(PyExc_OverflowError, + "integer division result too large for a float"); + error: + return NULL; +} + +static PyObject * +long_mod(PyObject *a, PyObject *b) +{ + PyLongObject *mod; + + CHECK_BINOP(a, b); + + if (l_divmod((PyLongObject*)a, (PyLongObject*)b, NULL, &mod) < 0) + mod = NULL; + return (PyObject *)mod; +} + +static PyObject * +long_divmod(PyObject *a, PyObject *b) +{ + PyLongObject *div, *mod; + PyObject *z; + + CHECK_BINOP(a, b); + + if (l_divmod((PyLongObject*)a, (PyLongObject*)b, &div, &mod) < 0) { + return NULL; + } + z = PyTuple_New(2); + if (z != NULL) { + PyTuple_SetItem(z, 0, (PyObject *) div); + PyTuple_SetItem(z, 1, (PyObject *) mod); + } + else { + Py_DECREF(div); + Py_DECREF(mod); + } + return z; +} + +/* pow(v, w, x) */ +static PyObject * +long_pow(PyObject *v, PyObject *w, PyObject *x) +{ + PyLongObject *a, *b, *c; /* a,b,c = v,w,x */ + int negativeOutput = 0; /* if x<0 return negative output */ + + PyLongObject *z = NULL; /* accumulated result */ + Py_ssize_t i, j, k; /* counters */ + PyLongObject *temp = NULL; + + /* 5-ary values. If the exponent is large enough, table is + * precomputed so that table[i] == a**i % c for i in range(32). + */ + PyLongObject *table[32] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + + /* a, b, c = v, w, x */ + CHECK_BINOP(v, w); + a = (PyLongObject*)v; Py_INCREF(a); + b = (PyLongObject*)w; Py_INCREF(b); + if (PyLong_Check(x)) { + c = (PyLongObject *)x; + Py_INCREF(x); + } + else if (x == Py_None) + c = NULL; + else { + Py_DECREF(a); + Py_DECREF(b); + Py_RETURN_NOTIMPLEMENTED; + } + + if (Py_SIZE(b) < 0) { /* if exponent is negative */ + if (c) { + PyErr_SetString(PyExc_TypeError, "pow() 2nd argument " + "cannot be negative when 3rd argument specified"); + goto Error; + } + else { + /* else return a float. This works because we know + that this calls float_pow() which converts its + arguments to double. */ + Py_DECREF(a); + Py_DECREF(b); + return PyFloat_Type.tp_as_number->nb_power(v, w, x); + } + } + + if (c) { + /* if modulus == 0: + raise ValueError() */ + if (Py_SIZE(c) == 0) { + PyErr_SetString(PyExc_ValueError, + "pow() 3rd argument cannot be 0"); + goto Error; + } + + /* if modulus < 0: + negativeOutput = True + modulus = -modulus */ + if (Py_SIZE(c) < 0) { + negativeOutput = 1; + temp = (PyLongObject *)_PyLong_Copy(c); + if (temp == NULL) + goto Error; + Py_DECREF(c); + c = temp; + temp = NULL; + NEGATE(c); + } + + /* if modulus == 1: + return 0 */ + if ((Py_SIZE(c) == 1) && (c->ob_digit[0] == 1)) { + z = (PyLongObject *)PyLong_FromLong(0L); + goto Done; + } + + /* if base < 0: + base = base % modulus + Having the base positive just makes things easier. */ + if (Py_SIZE(a) < 0) { + if (l_divmod(a, c, NULL, &temp) < 0) + goto Error; + Py_DECREF(a); + a = temp; + temp = NULL; + } + } + + /* At this point a, b, and c are guaranteed non-negative UNLESS + c is NULL, in which case a may be negative. */ + + z = (PyLongObject *)PyLong_FromLong(1L); + if (z == NULL) + goto Error; + + /* Perform a modular reduction, X = X % c, but leave X alone if c + * is NULL. + */ +#define REDUCE(X) \ + do { \ + if (c != NULL) { \ + if (l_divmod(X, c, NULL, &temp) < 0) \ + goto Error; \ + Py_XDECREF(X); \ + X = temp; \ + temp = NULL; \ + } \ + } while(0) + + /* Multiply two values, then reduce the result: + result = X*Y % c. If c is NULL, skip the mod. */ +#define MULT(X, Y, result) \ + do { \ + temp = (PyLongObject *)long_mul(X, Y); \ + if (temp == NULL) \ + goto Error; \ + Py_XDECREF(result); \ + result = temp; \ + temp = NULL; \ + REDUCE(result); \ + } while(0) + + if (Py_SIZE(b) <= FIVEARY_CUTOFF) { + /* Left-to-right binary exponentiation (HAC Algorithm 14.79) */ + /* http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf */ + for (i = Py_SIZE(b) - 1; i >= 0; --i) { + digit bi = b->ob_digit[i]; + + for (j = (digit)1 << (PyLong_SHIFT-1); j != 0; j >>= 1) { + MULT(z, z, z); + if (bi & j) + MULT(z, a, z); + } + } + } + else { + /* Left-to-right 5-ary exponentiation (HAC Algorithm 14.82) */ + Py_INCREF(z); /* still holds 1L */ + table[0] = z; + for (i = 1; i < 32; ++i) + MULT(table[i-1], a, table[i]); + + for (i = Py_SIZE(b) - 1; i >= 0; --i) { + const digit bi = b->ob_digit[i]; + + for (j = PyLong_SHIFT - 5; j >= 0; j -= 5) { + const int index = (bi >> j) & 0x1f; + for (k = 0; k < 5; ++k) + MULT(z, z, z); + if (index) + MULT(z, table[index], z); + } + } + } + + if (negativeOutput && (Py_SIZE(z) != 0)) { + temp = (PyLongObject *)long_sub(z, c); + if (temp == NULL) + goto Error; + Py_DECREF(z); + z = temp; + temp = NULL; + } + goto Done; + + Error: + if (z != NULL) { + Py_DECREF(z); + z = NULL; + } + /* fall through */ + Done: + if (Py_SIZE(b) > FIVEARY_CUTOFF) { + for (i = 0; i < 32; ++i) + Py_XDECREF(table[i]); + } + Py_DECREF(a); + Py_DECREF(b); + Py_XDECREF(c); + Py_XDECREF(temp); + return (PyObject *)z; +} + +static PyObject * +long_invert(PyLongObject *v) +{ + /* Implement ~x as -(x+1) */ + PyLongObject *x; + PyLongObject *w; + if (ABS(Py_SIZE(v)) <=1) + return PyLong_FromLong(-(MEDIUM_VALUE(v)+1)); + w = (PyLongObject *)PyLong_FromLong(1L); + if (w == NULL) + return NULL; + x = (PyLongObject *) long_add(v, w); + Py_DECREF(w); + if (x == NULL) + return NULL; + Py_SIZE(x) = -(Py_SIZE(x)); + return (PyObject *)maybe_small_long(x); +} + +static PyObject * +long_neg(PyLongObject *v) +{ + PyLongObject *z; + if (ABS(Py_SIZE(v)) <= 1) + return PyLong_FromLong(-MEDIUM_VALUE(v)); + z = (PyLongObject *)_PyLong_Copy(v); + if (z != NULL) + Py_SIZE(z) = -(Py_SIZE(v)); + return (PyObject *)z; +} + +static PyObject * +long_abs(PyLongObject *v) +{ + if (Py_SIZE(v) < 0) + return long_neg(v); + else + return long_long((PyObject *)v); +} + +static int +long_bool(PyLongObject *v) +{ + return Py_SIZE(v) != 0; +} + +static PyObject * +long_rshift(PyLongObject *a, PyLongObject *b) +{ + PyLongObject *z = NULL; + Py_ssize_t shiftby, newsize, wordshift, loshift, hishift, i, j; + digit lomask, himask; + + CHECK_BINOP(a, b); + + if (Py_SIZE(a) < 0) { + /* Right shifting negative numbers is harder */ + PyLongObject *a1, *a2; + a1 = (PyLongObject *) long_invert(a); + if (a1 == NULL) + goto rshift_error; + a2 = (PyLongObject *) long_rshift(a1, b); + Py_DECREF(a1); + if (a2 == NULL) + goto rshift_error; + z = (PyLongObject *) long_invert(a2); + Py_DECREF(a2); + } + else { + shiftby = PyLong_AsSsize_t((PyObject *)b); + if (shiftby == -1L && PyErr_Occurred()) + goto rshift_error; + if (shiftby < 0) { + PyErr_SetString(PyExc_ValueError, + "negative shift count"); + goto rshift_error; + } + wordshift = shiftby / PyLong_SHIFT; + newsize = ABS(Py_SIZE(a)) - wordshift; + if (newsize <= 0) + return PyLong_FromLong(0); + loshift = shiftby % PyLong_SHIFT; + hishift = PyLong_SHIFT - loshift; + lomask = ((digit)1 << hishift) - 1; + himask = PyLong_MASK ^ lomask; + z = _PyLong_New(newsize); + if (z == NULL) + goto rshift_error; + if (Py_SIZE(a) < 0) + Py_SIZE(z) = -(Py_SIZE(z)); + for (i = 0, j = wordshift; i < newsize; i++, j++) { + z->ob_digit[i] = (a->ob_digit[j] >> loshift) & lomask; + if (i+1 < newsize) + z->ob_digit[i] |= (a->ob_digit[j+1] << hishift) & himask; + } + z = long_normalize(z); + } + rshift_error: + return (PyObject *) maybe_small_long(z); + +} + +static PyObject * +long_lshift(PyObject *v, PyObject *w) +{ + /* This version due to Tim Peters */ + PyLongObject *a = (PyLongObject*)v; + PyLongObject *b = (PyLongObject*)w; + PyLongObject *z = NULL; + Py_ssize_t shiftby, oldsize, newsize, wordshift, remshift, i, j; + twodigits accum; + + CHECK_BINOP(a, b); + + shiftby = PyLong_AsSsize_t((PyObject *)b); + if (shiftby == -1L && PyErr_Occurred()) + goto lshift_error; + if (shiftby < 0) { + PyErr_SetString(PyExc_ValueError, "negative shift count"); + goto lshift_error; + } + /* wordshift, remshift = divmod(shiftby, PyLong_SHIFT) */ + wordshift = shiftby / PyLong_SHIFT; + remshift = shiftby - wordshift * PyLong_SHIFT; + + oldsize = ABS(Py_SIZE(a)); + newsize = oldsize + wordshift; + if (remshift) + ++newsize; + z = _PyLong_New(newsize); + if (z == NULL) + goto lshift_error; + if (Py_SIZE(a) < 0) + NEGATE(z); + for (i = 0; i < wordshift; i++) + z->ob_digit[i] = 0; + accum = 0; + for (i = wordshift, j = 0; j < oldsize; i++, j++) { + accum |= (twodigits)a->ob_digit[j] << remshift; + z->ob_digit[i] = (digit)(accum & PyLong_MASK); + accum >>= PyLong_SHIFT; + } + if (remshift) + z->ob_digit[newsize-1] = (digit)accum; + else + assert(!accum); + z = long_normalize(z); + lshift_error: + return (PyObject *) maybe_small_long(z); +} + +/* Compute two's complement of digit vector a[0:m], writing result to + z[0:m]. The digit vector a need not be normalized, but should not + be entirely zero. a and z may point to the same digit vector. */ + +static void +v_complement(digit *z, digit *a, Py_ssize_t m) +{ + Py_ssize_t i; + digit carry = 1; + for (i = 0; i < m; ++i) { + carry += a[i] ^ PyLong_MASK; + z[i] = carry & PyLong_MASK; + carry >>= PyLong_SHIFT; + } + assert(carry == 0); +} + +/* Bitwise and/xor/or operations */ + +static PyObject * +long_bitwise(PyLongObject *a, + int op, /* '&', '|', '^' */ + PyLongObject *b) +{ + int nega, negb, negz; + Py_ssize_t size_a, size_b, size_z, i; + PyLongObject *z; + + /* Bitwise operations for negative numbers operate as though + on a two's complement representation. So convert arguments + from sign-magnitude to two's complement, and convert the + result back to sign-magnitude at the end. */ + + /* If a is negative, replace it by its two's complement. */ + size_a = ABS(Py_SIZE(a)); + nega = Py_SIZE(a) < 0; + if (nega) { + z = _PyLong_New(size_a); + if (z == NULL) + return NULL; + v_complement(z->ob_digit, a->ob_digit, size_a); + a = z; + } + else + /* Keep reference count consistent. */ + Py_INCREF(a); + + /* Same for b. */ + size_b = ABS(Py_SIZE(b)); + negb = Py_SIZE(b) < 0; + if (negb) { + z = _PyLong_New(size_b); + if (z == NULL) { + Py_DECREF(a); + return NULL; + } + v_complement(z->ob_digit, b->ob_digit, size_b); + b = z; + } + else + Py_INCREF(b); + + /* Swap a and b if necessary to ensure size_a >= size_b. */ + if (size_a < size_b) { + z = a; a = b; b = z; + size_z = size_a; size_a = size_b; size_b = size_z; + negz = nega; nega = negb; negb = negz; + } + + /* JRH: The original logic here was to allocate the result value (z) + as the longer of the two operands. However, there are some cases + where the result is guaranteed to be shorter than that: AND of two + positives, OR of two negatives: use the shorter number. AND with + mixed signs: use the positive number. OR with mixed signs: use the + negative number. + */ + switch (op) { + case '^': + negz = nega ^ negb; + size_z = size_a; + break; + case '&': + negz = nega & negb; + size_z = negb ? size_a : size_b; + break; + case '|': + negz = nega | negb; + size_z = negb ? size_b : size_a; + break; + default: + PyErr_BadArgument(); + return NULL; + } + + /* We allow an extra digit if z is negative, to make sure that + the final two's complement of z doesn't overflow. */ + z = _PyLong_New(size_z + negz); + if (z == NULL) { + Py_DECREF(a); + Py_DECREF(b); + return NULL; + } + + /* Compute digits for overlap of a and b. */ + switch(op) { + case '&': + for (i = 0; i < size_b; ++i) + z->ob_digit[i] = a->ob_digit[i] & b->ob_digit[i]; + break; + case '|': + for (i = 0; i < size_b; ++i) + z->ob_digit[i] = a->ob_digit[i] | b->ob_digit[i]; + break; + case '^': + for (i = 0; i < size_b; ++i) + z->ob_digit[i] = a->ob_digit[i] ^ b->ob_digit[i]; + break; + default: + PyErr_BadArgument(); + return NULL; + } + + /* Copy any remaining digits of a, inverting if necessary. */ + if (op == '^' && negb) + for (; i < size_z; ++i) + z->ob_digit[i] = a->ob_digit[i] ^ PyLong_MASK; + else if (i < size_z) + memcpy(&z->ob_digit[i], &a->ob_digit[i], + (size_z-i)*sizeof(digit)); + + /* Complement result if negative. */ + if (negz) { + Py_SIZE(z) = -(Py_SIZE(z)); + z->ob_digit[size_z] = PyLong_MASK; + v_complement(z->ob_digit, z->ob_digit, size_z+1); + } + + Py_DECREF(a); + Py_DECREF(b); + return (PyObject *)maybe_small_long(long_normalize(z)); +} + +static PyObject * +long_and(PyObject *a, PyObject *b) +{ + PyObject *c; + CHECK_BINOP(a, b); + c = long_bitwise((PyLongObject*)a, '&', (PyLongObject*)b); + return c; +} + +static PyObject * +long_xor(PyObject *a, PyObject *b) +{ + PyObject *c; + CHECK_BINOP(a, b); + c = long_bitwise((PyLongObject*)a, '^', (PyLongObject*)b); + return c; +} + +static PyObject * +long_or(PyObject *a, PyObject *b) +{ + PyObject *c; + CHECK_BINOP(a, b); + c = long_bitwise((PyLongObject*)a, '|', (PyLongObject*)b); + return c; +} + +static PyObject * +long_long(PyObject *v) +{ + if (PyLong_CheckExact(v)) + Py_INCREF(v); + else + v = _PyLong_Copy((PyLongObject *)v); + return v; +} + +static PyObject * +long_float(PyObject *v) +{ + double result; + result = PyLong_AsDouble(v); + if (result == -1.0 && PyErr_Occurred()) + return NULL; + return PyFloat_FromDouble(result); +} + +static PyObject * +long_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); + +static PyObject * +long_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *obase = NULL, *x = NULL; + long base; + int overflow; + static char *kwlist[] = {"x", "base", 0}; + + if (type != &PyLong_Type) + return long_subtype_new(type, args, kwds); /* Wimp out */ + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OO:int", kwlist, + &x, &obase)) + return NULL; + if (x == NULL) + return PyLong_FromLong(0L); + if (obase == NULL) + return PyNumber_Long(x); + + base = PyLong_AsLongAndOverflow(obase, &overflow); + if (base == -1 && PyErr_Occurred()) + return NULL; + if (overflow || (base != 0 && base < 2) || base > 36) { + PyErr_SetString(PyExc_ValueError, + "int() arg 2 must be >= 2 and <= 36"); + return NULL; + } + + if (PyUnicode_Check(x)) + return PyLong_FromUnicodeObject(x, (int)base); + else if (PyByteArray_Check(x) || PyBytes_Check(x)) { + /* Since PyLong_FromString doesn't have a length parameter, + * check here for possible NULs in the string. */ + char *string; + Py_ssize_t size = Py_SIZE(x); + if (PyByteArray_Check(x)) + string = PyByteArray_AS_STRING(x); + else + string = PyBytes_AS_STRING(x); + if (strlen(string) != (size_t)size) { + /* We only see this if there's a null byte in x, + x is a bytes or buffer, *and* a base is given. */ + PyErr_Format(PyExc_ValueError, + "invalid literal for int() with base %d: %R", + (int)base, x); + return NULL; + } + return PyLong_FromString(string, NULL, (int)base); + } + else { + PyErr_SetString(PyExc_TypeError, + "int() can't convert non-string with explicit base"); + return NULL; + } +} + +/* Wimpy, slow approach to tp_new calls for subtypes of long: + first create a regular long from whatever arguments we got, + then allocate a subtype instance and initialize it from + the regular long. The regular long is then thrown away. +*/ +static PyObject * +long_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyLongObject *tmp, *newobj; + Py_ssize_t i, n; + + assert(PyType_IsSubtype(type, &PyLong_Type)); + tmp = (PyLongObject *)long_new(&PyLong_Type, args, kwds); + if (tmp == NULL) + return NULL; + assert(PyLong_CheckExact(tmp)); + n = Py_SIZE(tmp); + if (n < 0) + n = -n; + newobj = (PyLongObject *)type->tp_alloc(type, n); + if (newobj == NULL) { + Py_DECREF(tmp); + return NULL; + } + assert(PyLong_Check(newobj)); + Py_SIZE(newobj) = Py_SIZE(tmp); + for (i = 0; i < n; i++) + newobj->ob_digit[i] = tmp->ob_digit[i]; + Py_DECREF(tmp); + return (PyObject *)newobj; +} + +static PyObject * +long_getnewargs(PyLongObject *v) +{ + return Py_BuildValue("(N)", _PyLong_Copy(v)); +} + +static PyObject * +long_get0(PyLongObject *v, void *context) { + return PyLong_FromLong(0L); +} + +static PyObject * +long_get1(PyLongObject *v, void *context) { + return PyLong_FromLong(1L); +} + +static PyObject * +long__format__(PyObject *self, PyObject *args) +{ + PyObject *format_spec; + + if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) + return NULL; + return _PyLong_FormatAdvanced(self, format_spec, 0, + PyUnicode_GET_LENGTH(format_spec)); +} + +/* Return a pair (q, r) such that a = b * q + r, and + abs(r) <= abs(b)/2, with equality possible only if q is even. + In other words, q == a / b, rounded to the nearest integer using + round-half-to-even. */ + +PyObject * +_PyLong_DivmodNear(PyObject *a, PyObject *b) +{ + PyLongObject *quo = NULL, *rem = NULL; + PyObject *one = NULL, *twice_rem, *result, *temp; + int cmp, quo_is_odd, quo_is_neg; + + /* Equivalent Python code: + + def divmod_near(a, b): + q, r = divmod(a, b) + # round up if either r / b > 0.5, or r / b == 0.5 and q is odd. + # The expression r / b > 0.5 is equivalent to 2 * r > b if b is + # positive, 2 * r < b if b negative. + greater_than_half = 2*r > b if b > 0 else 2*r < b + exactly_half = 2*r == b + if greater_than_half or exactly_half and q % 2 == 1: + q += 1 + r -= b + return q, r + + */ + if (!PyLong_Check(a) || !PyLong_Check(b)) { + PyErr_SetString(PyExc_TypeError, + "non-integer arguments in division"); + return NULL; + } + + /* Do a and b have different signs? If so, quotient is negative. */ + quo_is_neg = (Py_SIZE(a) < 0) != (Py_SIZE(b) < 0); + + one = PyLong_FromLong(1L); + if (one == NULL) + return NULL; + + if (long_divrem((PyLongObject*)a, (PyLongObject*)b, &quo, &rem) < 0) + goto error; + + /* compare twice the remainder with the divisor, to see + if we need to adjust the quotient and remainder */ + twice_rem = long_lshift((PyObject *)rem, one); + if (twice_rem == NULL) + goto error; + if (quo_is_neg) { + temp = long_neg((PyLongObject*)twice_rem); + Py_DECREF(twice_rem); + twice_rem = temp; + if (twice_rem == NULL) + goto error; + } + cmp = long_compare((PyLongObject *)twice_rem, (PyLongObject *)b); + Py_DECREF(twice_rem); + + quo_is_odd = Py_SIZE(quo) != 0 && ((quo->ob_digit[0] & 1) != 0); + if ((Py_SIZE(b) < 0 ? cmp < 0 : cmp > 0) || (cmp == 0 && quo_is_odd)) { + /* fix up quotient */ + if (quo_is_neg) + temp = long_sub(quo, (PyLongObject *)one); + else + temp = long_add(quo, (PyLongObject *)one); + Py_DECREF(quo); + quo = (PyLongObject *)temp; + if (quo == NULL) + goto error; + /* and remainder */ + if (quo_is_neg) + temp = long_add(rem, (PyLongObject *)b); + else + temp = long_sub(rem, (PyLongObject *)b); + Py_DECREF(rem); + rem = (PyLongObject *)temp; + if (rem == NULL) + goto error; + } + + result = PyTuple_New(2); + if (result == NULL) + goto error; + + /* PyTuple_SET_ITEM steals references */ + PyTuple_SET_ITEM(result, 0, (PyObject *)quo); + PyTuple_SET_ITEM(result, 1, (PyObject *)rem); + Py_DECREF(one); + return result; + + error: + Py_XDECREF(quo); + Py_XDECREF(rem); + Py_XDECREF(one); + return NULL; +} + +static PyObject * +long_round(PyObject *self, PyObject *args) +{ + PyObject *o_ndigits=NULL, *temp, *result, *ndigits; + + /* To round an integer m to the nearest 10**n (n positive), we make use of + * the divmod_near operation, defined by: + * + * divmod_near(a, b) = (q, r) + * + * where q is the nearest integer to the quotient a / b (the + * nearest even integer in the case of a tie) and r == a - q * b. + * Hence q * b = a - r is the nearest multiple of b to a, + * preferring even multiples in the case of a tie. + * + * So the nearest multiple of 10**n to m is: + * + * m - divmod_near(m, 10**n)[1]. + */ + if (!PyArg_ParseTuple(args, "|O", &o_ndigits)) + return NULL; + if (o_ndigits == NULL) + return long_long(self); + + ndigits = PyNumber_Index(o_ndigits); + if (ndigits == NULL) + return NULL; + + /* if ndigits >= 0 then no rounding is necessary; return self unchanged */ + if (Py_SIZE(ndigits) >= 0) { + Py_DECREF(ndigits); + return long_long(self); + } + + /* result = self - divmod_near(self, 10 ** -ndigits)[1] */ + temp = long_neg((PyLongObject*)ndigits); + Py_DECREF(ndigits); + ndigits = temp; + if (ndigits == NULL) + return NULL; + + result = PyLong_FromLong(10L); + if (result == NULL) { + Py_DECREF(ndigits); + return NULL; + } + + temp = long_pow(result, ndigits, Py_None); + Py_DECREF(ndigits); + Py_DECREF(result); + result = temp; + if (result == NULL) + return NULL; + + temp = _PyLong_DivmodNear(self, result); + Py_DECREF(result); + result = temp; + if (result == NULL) + return NULL; + + temp = long_sub((PyLongObject *)self, + (PyLongObject *)PyTuple_GET_ITEM(result, 1)); + Py_DECREF(result); + result = temp; + + return result; +} + +static PyObject * +long_sizeof(PyLongObject *v) +{ + Py_ssize_t res; + + res = offsetof(PyLongObject, ob_digit) + ABS(Py_SIZE(v))*sizeof(digit); + return PyLong_FromSsize_t(res); +} + +static PyObject * +long_bit_length(PyLongObject *v) +{ + PyLongObject *result, *x, *y; + Py_ssize_t ndigits, msd_bits = 0; + digit msd; + + assert(v != NULL); + assert(PyLong_Check(v)); + + ndigits = ABS(Py_SIZE(v)); + if (ndigits == 0) + return PyLong_FromLong(0); + + msd = v->ob_digit[ndigits-1]; + while (msd >= 32) { + msd_bits += 6; + msd >>= 6; + } + msd_bits += (long)(BitLengthTable[msd]); + + if (ndigits <= PY_SSIZE_T_MAX/PyLong_SHIFT) + return PyLong_FromSsize_t((ndigits-1)*PyLong_SHIFT + msd_bits); + + /* expression above may overflow; use Python integers instead */ + result = (PyLongObject *)PyLong_FromSsize_t(ndigits - 1); + if (result == NULL) + return NULL; + x = (PyLongObject *)PyLong_FromLong(PyLong_SHIFT); + if (x == NULL) + goto error; + y = (PyLongObject *)long_mul(result, x); + Py_DECREF(x); + if (y == NULL) + goto error; + Py_DECREF(result); + result = y; + + x = (PyLongObject *)PyLong_FromLong((long)msd_bits); + if (x == NULL) + goto error; + y = (PyLongObject *)long_add(result, x); + Py_DECREF(x); + if (y == NULL) + goto error; + Py_DECREF(result); + result = y; + + return (PyObject *)result; + + error: + Py_DECREF(result); + return NULL; +} + +PyDoc_STRVAR(long_bit_length_doc, +"int.bit_length() -> int\n\ +\n\ +Number of bits necessary to represent self in binary.\n\ +>>> bin(37)\n\ +'0b100101'\n\ +>>> (37).bit_length()\n\ +6"); + +#if 0 +static PyObject * +long_is_finite(PyObject *v) +{ + Py_RETURN_TRUE; +} +#endif + + +static PyObject * +long_to_bytes(PyLongObject *v, PyObject *args, PyObject *kwds) +{ + PyObject *byteorder_str; + PyObject *is_signed_obj = NULL; + Py_ssize_t length; + int little_endian; + int is_signed; + PyObject *bytes; + static char *kwlist[] = {"length", "byteorder", "signed", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "nU|O:to_bytes", kwlist, + &length, &byteorder_str, + &is_signed_obj)) + return NULL; + + if (args != NULL && Py_SIZE(args) > 2) { + PyErr_SetString(PyExc_TypeError, + "'signed' is a keyword-only argument"); + return NULL; + } + + if (!PyUnicode_CompareWithASCIIString(byteorder_str, "little")) + little_endian = 1; + else if (!PyUnicode_CompareWithASCIIString(byteorder_str, "big")) + little_endian = 0; + else { + PyErr_SetString(PyExc_ValueError, + "byteorder must be either 'little' or 'big'"); + return NULL; + } + + if (is_signed_obj != NULL) { + int cmp = PyObject_IsTrue(is_signed_obj); + if (cmp < 0) + return NULL; + is_signed = cmp ? 1 : 0; + } + else { + /* If the signed argument was omitted, use False as the + default. */ + is_signed = 0; + } + + if (length < 0) { + PyErr_SetString(PyExc_ValueError, + "length argument must be non-negative"); + return NULL; + } + + bytes = PyBytes_FromStringAndSize(NULL, length); + if (bytes == NULL) + return NULL; + + if (_PyLong_AsByteArray(v, (unsigned char *)PyBytes_AS_STRING(bytes), + length, little_endian, is_signed) < 0) { + Py_DECREF(bytes); + return NULL; + } + + return bytes; +} + +PyDoc_STRVAR(long_to_bytes_doc, +"int.to_bytes(length, byteorder, *, signed=False) -> bytes\n\ +\n\ +Return an array of bytes representing an integer.\n\ +\n\ +The integer is represented using length bytes. An OverflowError is\n\ +raised if the integer is not representable with the given number of\n\ +bytes.\n\ +\n\ +The byteorder argument determines the byte order used to represent the\n\ +integer. If byteorder is 'big', the most significant byte is at the\n\ +beginning of the byte array. If byteorder is 'little', the most\n\ +significant byte is at the end of the byte array. To request the native\n\ +byte order of the host system, use `sys.byteorder' as the byte order value.\n\ +\n\ +The signed keyword-only argument determines whether two's complement is\n\ +used to represent the integer. If signed is False and a negative integer\n\ +is given, an OverflowError is raised."); + +static PyObject * +long_from_bytes(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *byteorder_str; + PyObject *is_signed_obj = NULL; + int little_endian; + int is_signed; + PyObject *obj; + PyObject *bytes; + PyObject *long_obj; + static char *kwlist[] = {"bytes", "byteorder", "signed", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OU|O:from_bytes", kwlist, + &obj, &byteorder_str, + &is_signed_obj)) + return NULL; + + if (args != NULL && Py_SIZE(args) > 2) { + PyErr_SetString(PyExc_TypeError, + "'signed' is a keyword-only argument"); + return NULL; + } + + if (!PyUnicode_CompareWithASCIIString(byteorder_str, "little")) + little_endian = 1; + else if (!PyUnicode_CompareWithASCIIString(byteorder_str, "big")) + little_endian = 0; + else { + PyErr_SetString(PyExc_ValueError, + "byteorder must be either 'little' or 'big'"); + return NULL; + } + + if (is_signed_obj != NULL) { + int cmp = PyObject_IsTrue(is_signed_obj); + if (cmp < 0) + return NULL; + is_signed = cmp ? 1 : 0; + } + else { + /* If the signed argument was omitted, use False as the + default. */ + is_signed = 0; + } + + bytes = PyObject_Bytes(obj); + if (bytes == NULL) + return NULL; + + long_obj = _PyLong_FromByteArray( + (unsigned char *)PyBytes_AS_STRING(bytes), Py_SIZE(bytes), + little_endian, is_signed); + Py_DECREF(bytes); + + /* If from_bytes() was used on subclass, allocate new subclass + * instance, initialize it with decoded long value and return it. + */ + if (type != &PyLong_Type && PyType_IsSubtype(type, &PyLong_Type)) { + PyLongObject *newobj; + int i; + Py_ssize_t n = ABS(Py_SIZE(long_obj)); + + newobj = (PyLongObject *)type->tp_alloc(type, n); + if (newobj == NULL) { + Py_DECREF(long_obj); + return NULL; + } + assert(PyLong_Check(newobj)); + Py_SIZE(newobj) = Py_SIZE(long_obj); + for (i = 0; i < n; i++) { + newobj->ob_digit[i] = + ((PyLongObject *)long_obj)->ob_digit[i]; + } + Py_DECREF(long_obj); + return (PyObject *)newobj; + } + + return long_obj; +} + +PyDoc_STRVAR(long_from_bytes_doc, +"int.from_bytes(bytes, byteorder, *, signed=False) -> int\n\ +\n\ +Return the integer represented by the given array of bytes.\n\ +\n\ +The bytes argument must either support the buffer protocol or be an\n\ +iterable object producing bytes. Bytes and bytearray are examples of\n\ +built-in objects that support the buffer protocol.\n\ +\n\ +The byteorder argument determines the byte order used to represent the\n\ +integer. If byteorder is 'big', the most significant byte is at the\n\ +beginning of the byte array. If byteorder is 'little', the most\n\ +significant byte is at the end of the byte array. To request the native\n\ +byte order of the host system, use `sys.byteorder' as the byte order value.\n\ +\n\ +The signed keyword-only argument indicates whether two's complement is\n\ +used to represent the integer."); + +static PyMethodDef long_methods[] = { + {"conjugate", (PyCFunction)long_long, METH_NOARGS, + "Returns self, the complex conjugate of any int."}, + {"bit_length", (PyCFunction)long_bit_length, METH_NOARGS, + long_bit_length_doc}, +#if 0 + {"is_finite", (PyCFunction)long_is_finite, METH_NOARGS, + "Returns always True."}, +#endif + {"to_bytes", (PyCFunction)long_to_bytes, + METH_VARARGS|METH_KEYWORDS, long_to_bytes_doc}, + {"from_bytes", (PyCFunction)long_from_bytes, + METH_VARARGS|METH_KEYWORDS|METH_CLASS, long_from_bytes_doc}, + {"__trunc__", (PyCFunction)long_long, METH_NOARGS, + "Truncating an Integral returns itself."}, + {"__floor__", (PyCFunction)long_long, METH_NOARGS, + "Flooring an Integral returns itself."}, + {"__ceil__", (PyCFunction)long_long, METH_NOARGS, + "Ceiling of an Integral returns itself."}, + {"__round__", (PyCFunction)long_round, METH_VARARGS, + "Rounding an Integral returns itself.\n" + "Rounding with an ndigits argument also returns an integer."}, + {"__getnewargs__", (PyCFunction)long_getnewargs, METH_NOARGS}, + {"__format__", (PyCFunction)long__format__, METH_VARARGS}, + {"__sizeof__", (PyCFunction)long_sizeof, METH_NOARGS, + "Returns size in memory, in bytes"}, + {NULL, NULL} /* sentinel */ +}; + +static PyGetSetDef long_getset[] = { + {"real", + (getter)long_long, (setter)NULL, + "the real part of a complex number", + NULL}, + {"imag", + (getter)long_get0, (setter)NULL, + "the imaginary part of a complex number", + NULL}, + {"numerator", + (getter)long_long, (setter)NULL, + "the numerator of a rational number in lowest terms", + NULL}, + {"denominator", + (getter)long_get1, (setter)NULL, + "the denominator of a rational number in lowest terms", + NULL}, + {NULL} /* Sentinel */ +}; + +PyDoc_STRVAR(long_doc, +"int(x[, base]) -> integer\n\ +\n\ +Convert a string or number to an integer, if possible. A floating\n\ +point argument will be truncated towards zero (this does not include a\n\ +string representation of a floating point number!) When converting a\n\ +string, use the optional base. It is an error to supply a base when\n\ +converting a non-string."); + +static PyNumberMethods long_as_number = { + (binaryfunc)long_add, /*nb_add*/ + (binaryfunc)long_sub, /*nb_subtract*/ + (binaryfunc)long_mul, /*nb_multiply*/ + long_mod, /*nb_remainder*/ + long_divmod, /*nb_divmod*/ + long_pow, /*nb_power*/ + (unaryfunc)long_neg, /*nb_negative*/ + (unaryfunc)long_long, /*tp_positive*/ + (unaryfunc)long_abs, /*tp_absolute*/ + (inquiry)long_bool, /*tp_bool*/ + (unaryfunc)long_invert, /*nb_invert*/ + long_lshift, /*nb_lshift*/ + (binaryfunc)long_rshift, /*nb_rshift*/ + long_and, /*nb_and*/ + long_xor, /*nb_xor*/ + long_or, /*nb_or*/ + long_long, /*nb_int*/ + 0, /*nb_reserved*/ + long_float, /*nb_float*/ + 0, /* nb_inplace_add */ + 0, /* nb_inplace_subtract */ + 0, /* nb_inplace_multiply */ + 0, /* nb_inplace_remainder */ + 0, /* nb_inplace_power */ + 0, /* nb_inplace_lshift */ + 0, /* nb_inplace_rshift */ + 0, /* nb_inplace_and */ + 0, /* nb_inplace_xor */ + 0, /* nb_inplace_or */ + long_div, /* nb_floor_divide */ + long_true_divide, /* nb_true_divide */ + 0, /* nb_inplace_floor_divide */ + 0, /* nb_inplace_true_divide */ + long_long, /* nb_index */ +}; + +PyTypeObject PyLong_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "int", /* tp_name */ + offsetof(PyLongObject, ob_digit), /* tp_basicsize */ + sizeof(digit), /* tp_itemsize */ + long_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + long_to_decimal_string, /* tp_repr */ + &long_as_number, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + (hashfunc)long_hash, /* tp_hash */ + 0, /* tp_call */ + long_to_decimal_string, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | + Py_TPFLAGS_LONG_SUBCLASS, /* tp_flags */ + long_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + long_richcompare, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + long_methods, /* tp_methods */ + 0, /* tp_members */ + long_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + long_new, /* tp_new */ + PyObject_Del, /* tp_free */ +}; + +static PyTypeObject Int_InfoType; + +PyDoc_STRVAR(int_info__doc__, +"sys.int_info\n\ +\n\ +A struct sequence that holds information about Python's\n\ +internal representation of integers. The attributes are read only."); + +static PyStructSequence_Field int_info_fields[] = { + {"bits_per_digit", "size of a digit in bits"}, + {"sizeof_digit", "size in bytes of the C type used to represent a digit"}, + {NULL, NULL} +}; + +static PyStructSequence_Desc int_info_desc = { + "sys.int_info", /* name */ + int_info__doc__, /* doc */ + int_info_fields, /* fields */ + 2 /* number of fields */ +}; + +PyObject * +PyLong_GetInfo(void) +{ + PyObject* int_info; + int field = 0; + int_info = PyStructSequence_New(&Int_InfoType); + if (int_info == NULL) + return NULL; + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(PyLong_SHIFT)); + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(sizeof(digit))); + if (PyErr_Occurred()) { + Py_CLEAR(int_info); + return NULL; + } + return int_info; +} + +int +_PyLong_Init(void) +{ +#if NSMALLNEGINTS + NSMALLPOSINTS > 0 + int ival, size; + PyLongObject *v = small_ints; + + for (ival = -NSMALLNEGINTS; ival < NSMALLPOSINTS; ival++, v++) { + size = (ival < 0) ? -1 : ((ival == 0) ? 0 : 1); + if (Py_TYPE(v) == &PyLong_Type) { + /* The element is already initialized, most likely + * the Python interpreter was initialized before. + */ + Py_ssize_t refcnt; + PyObject* op = (PyObject*)v; + + refcnt = Py_REFCNT(op) < 0 ? 0 : Py_REFCNT(op); + _Py_NewReference(op); + /* _Py_NewReference sets the ref count to 1 but + * the ref count might be larger. Set the refcnt + * to the original refcnt + 1 */ + Py_REFCNT(op) = refcnt + 1; + assert(Py_SIZE(op) == size); + assert(v->ob_digit[0] == abs(ival)); + } + else { + PyObject_INIT(v, &PyLong_Type); + } + Py_SIZE(v) = size; + v->ob_digit[0] = abs(ival); + } +#endif + /* initialize int_info */ + if (Int_InfoType.tp_name == 0) + PyStructSequence_InitType(&Int_InfoType, &int_info_desc); + + return 1; +} + +void +PyLong_Fini(void) +{ + /* Integers are currently statically allocated. Py_DECREF is not + needed, but Python must forget about the reference or multiple + reinitializations will fail. */ +#if NSMALLNEGINTS + NSMALLPOSINTS > 0 + int i; + PyLongObject *v = small_ints; + for (i = 0; i < NSMALLNEGINTS + NSMALLPOSINTS; i++, v++) { + _Py_DEC_REFTOTAL; + _Py_ForgetReference((PyObject*)v); + } +#endif +} diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Objects/object.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Objects/object.c Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,1737 @@ + +/* Generic object operations; and implementation of None */ + +#include "Python.h" +#include "frameobject.h" + +/* Object allocation routines used by NEWOBJ and NEWVAROBJ macros. + These are used by the individual routines for object creation. + Do not call them otherwise, they do not initialize the object! */ + +void +Py_IncRef(PyObject *o) +{ + Py_XINCREF(o); +} + +void +Py_DecRef(PyObject *o) +{ + Py_XDECREF(o); +} + +PyObject * +PyObject_Init(PyObject *op, PyTypeObject *tp) +{ + if (op == NULL) + return PyErr_NoMemory(); + /* Any changes should be reflected in PyObject_INIT (objimpl.h) */ + Py_TYPE(op) = tp; + _Py_NewReference(op); + return op; +} + +PyVarObject * +PyObject_InitVar(PyVarObject *op, PyTypeObject *tp, Py_ssize_t size) +{ + if (op == NULL) + return (PyVarObject *) PyErr_NoMemory(); + /* Any changes should be reflected in PyObject_INIT_VAR */ + op->ob_size = size; + Py_TYPE(op) = tp; + _Py_NewReference((PyObject *)op); + return op; +} + +PyObject * +_PyObject_New(PyTypeObject *tp) +{ + PyObject *op; + op = (PyObject *) PyObject_MALLOC(_PyObject_SIZE(tp)); + if (op == NULL) + return PyErr_NoMemory(); + return PyObject_INIT(op, tp); +} + +PyVarObject * +_PyObject_NewVar(PyTypeObject *tp, Py_ssize_t nitems) +{ + PyVarObject *op; + const size_t size = _PyObject_VAR_SIZE(tp, nitems); + op = (PyVarObject *) PyObject_MALLOC(size); + if (op == NULL) + return (PyVarObject *)PyErr_NoMemory(); + return PyObject_INIT_VAR(op, tp, nitems); +} + +int +PyObject_Print(PyObject *op, FILE *fp, int flags) +{ + int ret = 0; + if (PyErr_CheckSignals()) + return -1; +#ifdef USE_STACKCHECK + if (PyOS_CheckStack()) { + PyErr_SetString(PyExc_MemoryError, "stack overflow"); + return -1; + } +#endif + clearerr(fp); /* Clear any previous error condition */ + if (op == NULL) { + Py_BEGIN_ALLOW_THREADS + fprintf(fp, ""); + Py_END_ALLOW_THREADS + } + else { + if (op->ob_refcnt <= 0) + /* XXX(twouters) cast refcount to long until %zd is + universally available */ + Py_BEGIN_ALLOW_THREADS + fprintf(fp, "", + (long)op->ob_refcnt, op); + Py_END_ALLOW_THREADS + else { + PyObject *s; + if (flags & Py_PRINT_RAW) + s = PyObject_Str(op); + else + s = PyObject_Repr(op); + if (s == NULL) + ret = -1; + else if (PyBytes_Check(s)) { + fwrite(PyBytes_AS_STRING(s), 1, + PyBytes_GET_SIZE(s), fp); + } + else if (PyUnicode_Check(s)) { + PyObject *t; + t = PyUnicode_AsEncodedString(s, "utf-8", "backslashreplace"); + if (t == NULL) + ret = 0; + else { + fwrite(PyBytes_AS_STRING(t), 1, + PyBytes_GET_SIZE(t), fp); + Py_DECREF(t); + } + } + else { + PyErr_Format(PyExc_TypeError, + "str() or repr() returned '%.100s'", + s->ob_type->tp_name); + ret = -1; + } + Py_XDECREF(s); + } + } + if (ret == 0) { + if (ferror(fp)) { + PyErr_SetFromErrno(PyExc_IOError); + clearerr(fp); + ret = -1; + } + } + return ret; +} + +/* For debugging convenience. Set a breakpoint here and call it from your DLL */ +void +_Py_BreakPoint(void) +{ +} + + +/* For debugging convenience. See Misc/gdbinit for some useful gdb hooks */ +void +_PyObject_Dump(PyObject* op) +{ + if (op == NULL) + fprintf(stderr, "NULL\n"); + else { +#ifdef WITH_THREAD + PyGILState_STATE gil; +#endif + fprintf(stderr, "object : "); +#ifdef WITH_THREAD + gil = PyGILState_Ensure(); +#endif + (void)PyObject_Print(op, stderr, 0); +#ifdef WITH_THREAD + PyGILState_Release(gil); +#endif + /* XXX(twouters) cast refcount to long until %zd is + universally available */ + fprintf(stderr, "\n" + "type : %s\n" + "refcount: %ld\n" + "address : %p\n", + Py_TYPE(op)==NULL ? "NULL" : Py_TYPE(op)->tp_name, + (long)op->ob_refcnt, + op); + } +} + +PyObject * +PyObject_Repr(PyObject *v) +{ + PyObject *res; + if (PyErr_CheckSignals()) + return NULL; +#ifdef USE_STACKCHECK + if (PyOS_CheckStack()) { + PyErr_SetString(PyExc_MemoryError, "stack overflow"); + return NULL; + } +#endif + if (v == NULL) + return PyUnicode_FromString(""); + if (Py_TYPE(v)->tp_repr == NULL) + return PyUnicode_FromFormat("<%s object at %p>", + v->ob_type->tp_name, v); + res = (*v->ob_type->tp_repr)(v); + if (res == NULL) + return NULL; + if (!PyUnicode_Check(res)) { + PyErr_Format(PyExc_TypeError, + "__repr__ returned non-string (type %.200s)", + res->ob_type->tp_name); + Py_DECREF(res); + return NULL; + } +#ifndef Py_DEBUG + if (PyUnicode_READY(res) < 0) + return NULL; +#endif + return res; +} + +PyObject * +PyObject_Str(PyObject *v) +{ + PyObject *res; + if (PyErr_CheckSignals()) + return NULL; +#ifdef USE_STACKCHECK + if (PyOS_CheckStack()) { + PyErr_SetString(PyExc_MemoryError, "stack overflow"); + return NULL; + } +#endif + if (v == NULL) + return PyUnicode_FromString(""); + if (PyUnicode_CheckExact(v)) { +#ifndef Py_DEBUG + if (PyUnicode_READY(v) < 0) + return NULL; +#endif + Py_INCREF(v); + return v; + } + if (Py_TYPE(v)->tp_str == NULL) + return PyObject_Repr(v); + + /* It is possible for a type to have a tp_str representation that loops + infinitely. */ + if (Py_EnterRecursiveCall(" while getting the str of an object")) + return NULL; + res = (*Py_TYPE(v)->tp_str)(v); + Py_LeaveRecursiveCall(); + if (res == NULL) + return NULL; + if (!PyUnicode_Check(res)) { + PyErr_Format(PyExc_TypeError, + "__str__ returned non-string (type %.200s)", + Py_TYPE(res)->tp_name); + Py_DECREF(res); + return NULL; + } +#ifndef Py_DEBUG + if (PyUnicode_READY(res) < 0) + return NULL; +#endif + assert(_PyUnicode_CheckConsistency(res, 1)); + return res; +} + +PyObject * +PyObject_ASCII(PyObject *v) +{ + PyObject *repr, *ascii, *res; + + repr = PyObject_Repr(v); + if (repr == NULL) + return NULL; + + /* repr is guaranteed to be a PyUnicode object by PyObject_Repr */ + ascii = _PyUnicode_AsASCIIString(repr, "backslashreplace"); + Py_DECREF(repr); + if (ascii == NULL) + return NULL; + + res = PyUnicode_DecodeASCII( + PyBytes_AS_STRING(ascii), + PyBytes_GET_SIZE(ascii), + NULL); + + Py_DECREF(ascii); + return res; +} + +PyObject * +PyObject_Bytes(PyObject *v) +{ + PyObject *result, *func; + static PyObject *bytesstring = NULL; + + if (v == NULL) + return PyBytes_FromString(""); + + if (PyBytes_CheckExact(v)) { + Py_INCREF(v); + return v; + } + + func = _PyObject_LookupSpecial(v, "__bytes__", &bytesstring); + if (func != NULL) { + result = PyObject_CallFunctionObjArgs(func, NULL); + Py_DECREF(func); + if (result == NULL) + return NULL; + if (!PyBytes_Check(result)) { + PyErr_Format(PyExc_TypeError, + "__bytes__ returned non-bytes (type %.200s)", + Py_TYPE(result)->tp_name); + Py_DECREF(result); + return NULL; + } + return result; + } + else if (PyErr_Occurred()) + return NULL; + return PyBytes_FromObject(v); +} + +/* For Python 3.0.1 and later, the old three-way comparison has been + completely removed in favour of rich comparisons. PyObject_Compare() and + PyObject_Cmp() are gone, and the builtin cmp function no longer exists. + The old tp_compare slot has been renamed to tp_reserved, and should no + longer be used. Use tp_richcompare instead. + + See (*) below for practical amendments. + + tp_richcompare gets called with a first argument of the appropriate type + and a second object of an arbitrary type. We never do any kind of + coercion. + + The tp_richcompare slot should return an object, as follows: + + NULL if an exception occurred + NotImplemented if the requested comparison is not implemented + any other false value if the requested comparison is false + any other true value if the requested comparison is true + + The PyObject_RichCompare[Bool]() wrappers raise TypeError when they get + NotImplemented. + + (*) Practical amendments: + + - If rich comparison returns NotImplemented, == and != are decided by + comparing the object pointer (i.e. falling back to the base object + implementation). + +*/ + +/* Map rich comparison operators to their swapped version, e.g. LT <--> GT */ +int _Py_SwappedOp[] = {Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE}; + +static char *opstrings[] = {"<", "<=", "==", "!=", ">", ">="}; + +/* Perform a rich comparison, raising TypeError when the requested comparison + operator is not supported. */ +static PyObject * +do_richcompare(PyObject *v, PyObject *w, int op) +{ + richcmpfunc f; + PyObject *res; + int checked_reverse_op = 0; + + if (v->ob_type != w->ob_type && + PyType_IsSubtype(w->ob_type, v->ob_type) && + (f = w->ob_type->tp_richcompare) != NULL) { + checked_reverse_op = 1; + res = (*f)(w, v, _Py_SwappedOp[op]); + if (res != Py_NotImplemented) + return res; + Py_DECREF(res); + } + if ((f = v->ob_type->tp_richcompare) != NULL) { + res = (*f)(v, w, op); + if (res != Py_NotImplemented) + return res; + Py_DECREF(res); + } + if (!checked_reverse_op && (f = w->ob_type->tp_richcompare) != NULL) { + res = (*f)(w, v, _Py_SwappedOp[op]); + if (res != Py_NotImplemented) + return res; + Py_DECREF(res); + } + /* If neither object implements it, provide a sensible default + for == and !=, but raise an exception for ordering. */ + switch (op) { + case Py_EQ: + res = (v == w) ? Py_True : Py_False; + break; + case Py_NE: + res = (v != w) ? Py_True : Py_False; + break; + default: + /* XXX Special-case None so it doesn't show as NoneType() */ + PyErr_Format(PyExc_TypeError, + "unorderable types: %.100s() %s %.100s()", + v->ob_type->tp_name, + opstrings[op], + w->ob_type->tp_name); + return NULL; + } + Py_INCREF(res); + return res; +} + +/* Perform a rich comparison with object result. This wraps do_richcompare() + with a check for NULL arguments and a recursion check. */ + +PyObject * +PyObject_RichCompare(PyObject *v, PyObject *w, int op) +{ + PyObject *res; + + assert(Py_LT <= op && op <= Py_GE); + if (v == NULL || w == NULL) { + if (!PyErr_Occurred()) + PyErr_BadInternalCall(); + return NULL; + } + if (Py_EnterRecursiveCall(" in comparison")) + return NULL; + res = do_richcompare(v, w, op); + Py_LeaveRecursiveCall(); + return res; +} + +/* Perform a rich comparison with integer result. This wraps + PyObject_RichCompare(), returning -1 for error, 0 for false, 1 for true. */ +int +PyObject_RichCompareBool(PyObject *v, PyObject *w, int op) +{ + PyObject *res; + int ok; + + /* Quick result when objects are the same. + Guarantees that identity implies equality. */ + if (v == w) { + if (op == Py_EQ) + return 1; + else if (op == Py_NE) + return 0; + } + + res = PyObject_RichCompare(v, w, op); + if (res == NULL) + return -1; + if (PyBool_Check(res)) + ok = (res == Py_True); + else + ok = PyObject_IsTrue(res); + Py_DECREF(res); + return ok; +} + +/* Set of hash utility functions to help maintaining the invariant that + if a==b then hash(a)==hash(b) + + All the utility functions (_Py_Hash*()) return "-1" to signify an error. +*/ + +/* For numeric types, the hash of a number x is based on the reduction + of x modulo the prime P = 2**_PyHASH_BITS - 1. It's designed so that + hash(x) == hash(y) whenever x and y are numerically equal, even if + x and y have different types. + + A quick summary of the hashing strategy: + + (1) First define the 'reduction of x modulo P' for any rational + number x; this is a standard extension of the usual notion of + reduction modulo P for integers. If x == p/q (written in lowest + terms), the reduction is interpreted as the reduction of p times + the inverse of the reduction of q, all modulo P; if q is exactly + divisible by P then define the reduction to be infinity. So we've + got a well-defined map + + reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }. + + (2) Now for a rational number x, define hash(x) by: + + reduce(x) if x >= 0 + -reduce(-x) if x < 0 + + If the result of the reduction is infinity (this is impossible for + integers, floats and Decimals) then use the predefined hash value + _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead. + _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the + hashes of float and Decimal infinities and nans. + + A selling point for the above strategy is that it makes it possible + to compute hashes of decimal and binary floating-point numbers + efficiently, even if the exponent of the binary or decimal number + is large. The key point is that + + reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS) + + provided that {reduce(x), reduce(y)} != {0, infinity}. The reduction of a + binary or decimal float is never infinity, since the denominator is a power + of 2 (for binary) or a divisor of a power of 10 (for decimal). So we have, + for nonnegative x, + + reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS + + reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS + + and reduce(10**e) can be computed efficiently by the usual modular + exponentiation algorithm. For reduce(2**e) it's even better: since + P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication + by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits. + + */ + +Py_hash_t +_Py_HashDouble(double v) +{ + int e, sign; + double m; + Py_uhash_t x, y; + + if (!Py_IS_FINITE(v)) { + if (Py_IS_INFINITY(v)) + return v > 0 ? _PyHASH_INF : -_PyHASH_INF; + else + return _PyHASH_NAN; + } + + m = frexp(v, &e); + + sign = 1; + if (m < 0) { + sign = -1; + m = -m; + } + + /* process 28 bits at a time; this should work well both for binary + and hexadecimal floating point. */ + x = 0; + while (m) { + x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28); + m *= 268435456.0; /* 2**28 */ + e -= 28; + y = (Py_uhash_t)m; /* pull out integer part */ + m -= y; + x += y; + if (x >= _PyHASH_MODULUS) + x -= _PyHASH_MODULUS; + } + + /* adjust for the exponent; first reduce it modulo _PyHASH_BITS */ + e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS); + x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e); + + x = x * sign; + if (x == (Py_uhash_t)-1) + x = (Py_uhash_t)-2; + return (Py_hash_t)x; +} + +Py_hash_t +_Py_HashPointer(void *p) +{ + Py_hash_t x; + size_t y = (size_t)p; + /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid + excessive hash collisions for dicts and sets */ + y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4)); + x = (Py_hash_t)y; + if (x == -1) + x = -2; + return x; +} + +Py_hash_t +_Py_HashBytes(unsigned char *p, Py_ssize_t len) +{ + Py_uhash_t x; + Py_ssize_t i; + + x = (Py_uhash_t) *p << 7; + for (i = 0; i < len; i++) + x = (1000003U * x) ^ (Py_uhash_t) *p++; + x ^= (Py_uhash_t) len; + if (x == -1) + x = -2; + return x; +} + +Py_hash_t +PyObject_HashNotImplemented(PyObject *v) +{ + PyErr_Format(PyExc_TypeError, "unhashable type: '%.200s'", + Py_TYPE(v)->tp_name); + return -1; +} + +Py_hash_t +PyObject_Hash(PyObject *v) +{ + PyTypeObject *tp = Py_TYPE(v); + if (tp->tp_hash != NULL) + return (*tp->tp_hash)(v); + /* To keep to the general practice that inheriting + * solely from object in C code should work without + * an explicit call to PyType_Ready, we implicitly call + * PyType_Ready here and then check the tp_hash slot again + */ + if (tp->tp_dict == NULL) { + if (PyType_Ready(tp) < 0) + return -1; + if (tp->tp_hash != NULL) + return (*tp->tp_hash)(v); + } + /* Otherwise, the object can't be hashed */ + return PyObject_HashNotImplemented(v); +} + +PyObject * +PyObject_GetAttrString(PyObject *v, const char *name) +{ + PyObject *w, *res; + + if (Py_TYPE(v)->tp_getattr != NULL) + return (*Py_TYPE(v)->tp_getattr)(v, (char*)name); + w = PyUnicode_InternFromString(name); + if (w == NULL) + return NULL; + res = PyObject_GetAttr(v, w); + Py_XDECREF(w); + return res; +} + +int +PyObject_HasAttrString(PyObject *v, const char *name) +{ + PyObject *res = PyObject_GetAttrString(v, name); + if (res != NULL) { + Py_DECREF(res); + return 1; + } + PyErr_Clear(); + return 0; +} + +int +PyObject_SetAttrString(PyObject *v, const char *name, PyObject *w) +{ + PyObject *s; + int res; + + if (Py_TYPE(v)->tp_setattr != NULL) + return (*Py_TYPE(v)->tp_setattr)(v, (char*)name, w); + s = PyUnicode_InternFromString(name); + if (s == NULL) + return -1; + res = PyObject_SetAttr(v, s, w); + Py_XDECREF(s); + return res; +} + +int +_PyObject_IsAbstract(PyObject *obj) +{ + int res; + PyObject* isabstract; + _Py_IDENTIFIER(__isabstractmethod__); + + if (obj == NULL) + return 0; + + isabstract = _PyObject_GetAttrId(obj, &PyId___isabstractmethod__); + if (isabstract == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + return 0; + } + return -1; + } + res = PyObject_IsTrue(isabstract); + Py_DECREF(isabstract); + return res; +} + +PyObject * +_PyObject_GetAttrId(PyObject *v, _Py_Identifier *name) +{ + PyObject *result; + PyObject *oname = _PyUnicode_FromId(name); /* borrowed */ + if (!oname) + return NULL; + result = PyObject_GetAttr(v, oname); + return result; +} + +int +_PyObject_HasAttrId(PyObject *v, _Py_Identifier *name) +{ + int result; + PyObject *oname = _PyUnicode_FromId(name); /* borrowed */ + if (!oname) + return -1; + result = PyObject_HasAttr(v, oname); + return result; +} + +int +_PyObject_SetAttrId(PyObject *v, _Py_Identifier *name, PyObject *w) +{ + int result; + PyObject *oname = _PyUnicode_FromId(name); /* borrowed */ + if (!oname) + return -1; + result = PyObject_SetAttr(v, oname, w); + return result; +} + +PyObject * +PyObject_GetAttr(PyObject *v, PyObject *name) +{ + PyTypeObject *tp = Py_TYPE(v); + + if (!PyUnicode_Check(name)) { + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); + return NULL; + } + if (tp->tp_getattro != NULL) + return (*tp->tp_getattro)(v, name); + if (tp->tp_getattr != NULL) { + char *name_str = _PyUnicode_AsString(name); + if (name_str == NULL) + return NULL; + return (*tp->tp_getattr)(v, name_str); + } + PyErr_Format(PyExc_AttributeError, + "'%.50s' object has no attribute '%U'", + tp->tp_name, name); + return NULL; +} + +int +PyObject_HasAttr(PyObject *v, PyObject *name) +{ + PyObject *res = PyObject_GetAttr(v, name); + if (res != NULL) { + Py_DECREF(res); + return 1; + } + PyErr_Clear(); + return 0; +} + +int +PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) +{ + PyTypeObject *tp = Py_TYPE(v); + int err; + + if (!PyUnicode_Check(name)) { + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); + return -1; + } + Py_INCREF(name); + + PyUnicode_InternInPlace(&name); + if (tp->tp_setattro != NULL) { + err = (*tp->tp_setattro)(v, name, value); + Py_DECREF(name); + return err; + } + if (tp->tp_setattr != NULL) { + char *name_str = _PyUnicode_AsString(name); + if (name_str == NULL) + return -1; + err = (*tp->tp_setattr)(v, name_str, value); + Py_DECREF(name); + return err; + } + Py_DECREF(name); + assert(name->ob_refcnt >= 1); + if (tp->tp_getattr == NULL && tp->tp_getattro == NULL) + PyErr_Format(PyExc_TypeError, + "'%.100s' object has no attributes " + "(%s .%U)", + tp->tp_name, + value==NULL ? "del" : "assign to", + name); + else + PyErr_Format(PyExc_TypeError, + "'%.100s' object has only read-only attributes " + "(%s .%U)", + tp->tp_name, + value==NULL ? "del" : "assign to", + name); + return -1; +} + +/* Helper to get a pointer to an object's __dict__ slot, if any */ + +PyObject ** +_PyObject_GetDictPtr(PyObject *obj) +{ + Py_ssize_t dictoffset; + PyTypeObject *tp = Py_TYPE(obj); + + dictoffset = tp->tp_dictoffset; + if (dictoffset == 0) + return NULL; + if (dictoffset < 0) { + Py_ssize_t tsize; + size_t size; + + tsize = ((PyVarObject *)obj)->ob_size; + if (tsize < 0) + tsize = -tsize; + size = _PyObject_VAR_SIZE(tp, tsize); + + dictoffset += (long)size; + assert(dictoffset > 0); + assert(dictoffset % SIZEOF_VOID_P == 0); + } + return (PyObject **) ((char *)obj + dictoffset); +} + +PyObject * +PyObject_SelfIter(PyObject *obj) +{ + Py_INCREF(obj); + return obj; +} + +/* Helper used when the __next__ method is removed from a type: + tp_iternext is never NULL and can be safely called without checking + on every iteration. + */ + +PyObject * +_PyObject_NextNotImplemented(PyObject *self) +{ + PyErr_Format(PyExc_TypeError, + "'%.200s' object is not iterable", + Py_TYPE(self)->tp_name); + return NULL; +} + +/* Generic GetAttr functions - put these in your tp_[gs]etattro slot */ + +PyObject * +_PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name, PyObject *dict) +{ + PyTypeObject *tp = Py_TYPE(obj); + PyObject *descr = NULL; + PyObject *res = NULL; + descrgetfunc f; + Py_ssize_t dictoffset; + PyObject **dictptr; + + if (!PyUnicode_Check(name)){ + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); + return NULL; + } + else + Py_INCREF(name); + + if (tp->tp_dict == NULL) { + if (PyType_Ready(tp) < 0) + goto done; + } + + descr = _PyType_Lookup(tp, name); + Py_XINCREF(descr); + + f = NULL; + if (descr != NULL) { + f = descr->ob_type->tp_descr_get; + if (f != NULL && PyDescr_IsData(descr)) { + res = f(descr, obj, (PyObject *)obj->ob_type); + Py_DECREF(descr); + goto done; + } + } + + if (dict == NULL) { + /* Inline _PyObject_GetDictPtr */ + dictoffset = tp->tp_dictoffset; + if (dictoffset != 0) { + if (dictoffset < 0) { + Py_ssize_t tsize; + size_t size; + + tsize = ((PyVarObject *)obj)->ob_size; + if (tsize < 0) + tsize = -tsize; + size = _PyObject_VAR_SIZE(tp, tsize); + + dictoffset += (long)size; + assert(dictoffset > 0); + assert(dictoffset % SIZEOF_VOID_P == 0); + } + dictptr = (PyObject **) ((char *)obj + dictoffset); + dict = *dictptr; + } + } + if (dict != NULL) { + Py_INCREF(dict); + res = PyDict_GetItem(dict, name); + if (res != NULL) { + Py_INCREF(res); + Py_XDECREF(descr); + Py_DECREF(dict); + goto done; + } + Py_DECREF(dict); + } + + if (f != NULL) { + res = f(descr, obj, (PyObject *)Py_TYPE(obj)); + Py_DECREF(descr); + goto done; + } + + if (descr != NULL) { + res = descr; + /* descr was already increfed above */ + goto done; + } + + PyErr_Format(PyExc_AttributeError, + "'%.50s' object has no attribute '%U'", + tp->tp_name, name); + done: + Py_DECREF(name); + return res; +} + +PyObject * +PyObject_GenericGetAttr(PyObject *obj, PyObject *name) +{ + return _PyObject_GenericGetAttrWithDict(obj, name, NULL); +} + +int +_PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name, + PyObject *value, PyObject *dict) +{ + PyTypeObject *tp = Py_TYPE(obj); + PyObject *descr; + descrsetfunc f; + PyObject **dictptr; + int res = -1; + + if (!PyUnicode_Check(name)){ + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + name->ob_type->tp_name); + return -1; + } + else + Py_INCREF(name); + + if (tp->tp_dict == NULL) { + if (PyType_Ready(tp) < 0) + goto done; + } + + descr = _PyType_Lookup(tp, name); + f = NULL; + if (descr != NULL) { + f = descr->ob_type->tp_descr_set; + if (f != NULL && PyDescr_IsData(descr)) { + res = f(descr, obj, value); + goto done; + } + } + + if (dict == NULL) { + dictptr = _PyObject_GetDictPtr(obj); + if (dictptr != NULL) { + dict = *dictptr; + if (dict == NULL && value != NULL) { + dict = PyDict_New(); + if (dict == NULL) + goto done; + *dictptr = dict; + } + } + } + if (dict != NULL) { + Py_INCREF(dict); + if (value == NULL) + res = PyDict_DelItem(dict, name); + else + res = PyDict_SetItem(dict, name, value); + if (res < 0 && PyErr_ExceptionMatches(PyExc_KeyError)) + PyErr_SetObject(PyExc_AttributeError, name); + Py_DECREF(dict); + goto done; + } + + if (f != NULL) { + res = f(descr, obj, value); + goto done; + } + + if (descr == NULL) { + PyErr_Format(PyExc_AttributeError, + "'%.100s' object has no attribute '%U'", + tp->tp_name, name); + goto done; + } + + PyErr_Format(PyExc_AttributeError, + "'%.50s' object attribute '%U' is read-only", + tp->tp_name, name); + done: + Py_DECREF(name); + return res; +} + +int +PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value) +{ + return _PyObject_GenericSetAttrWithDict(obj, name, value, NULL); +} + + +/* Test a value used as condition, e.g., in a for or if statement. + Return -1 if an error occurred */ + +int +PyObject_IsTrue(PyObject *v) +{ + Py_ssize_t res; + if (v == Py_True) + return 1; + if (v == Py_False) + return 0; + if (v == Py_None) + return 0; + else if (v->ob_type->tp_as_number != NULL && + v->ob_type->tp_as_number->nb_bool != NULL) + res = (*v->ob_type->tp_as_number->nb_bool)(v); + else if (v->ob_type->tp_as_mapping != NULL && + v->ob_type->tp_as_mapping->mp_length != NULL) + res = (*v->ob_type->tp_as_mapping->mp_length)(v); + else if (v->ob_type->tp_as_sequence != NULL && + v->ob_type->tp_as_sequence->sq_length != NULL) + res = (*v->ob_type->tp_as_sequence->sq_length)(v); + else + return 1; + /* if it is negative, it should be either -1 or -2 */ + return (res > 0) ? 1 : Py_SAFE_DOWNCAST(res, Py_ssize_t, int); +} + +/* equivalent of 'not v' + Return -1 if an error occurred */ + +int +PyObject_Not(PyObject *v) +{ + int res; + res = PyObject_IsTrue(v); + if (res < 0) + return res; + return res == 0; +} + +/* Test whether an object can be called */ + +int +PyCallable_Check(PyObject *x) +{ + if (x == NULL) + return 0; + return x->ob_type->tp_call != NULL; +} + + +/* Helper for PyObject_Dir without arguments: returns the local scope. */ +static PyObject * +_dir_locals(void) +{ + PyObject *names; + PyObject *locals = PyEval_GetLocals(); + + if (locals == NULL) { + PyErr_SetString(PyExc_SystemError, "frame does not exist"); + return NULL; + } + + names = PyMapping_Keys(locals); + if (!names) + return NULL; + if (!PyList_Check(names)) { + PyErr_Format(PyExc_TypeError, + "dir(): expected keys() of locals to be a list, " + "not '%.200s'", Py_TYPE(names)->tp_name); + Py_DECREF(names); + return NULL; + } + if (PyList_Sort(names)) { + Py_DECREF(names); + return NULL; + } + /* the locals don't need to be DECREF'd */ + return names; +} + +/* Helper for PyObject_Dir: object introspection. */ +static PyObject * +_dir_object(PyObject *obj) +{ + PyObject *result, *sorted; + static PyObject *dir_str = NULL; + PyObject *dirfunc = _PyObject_LookupSpecial(obj, "__dir__", &dir_str); + + assert(obj); + if (dirfunc == NULL) { + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_TypeError, "object does not provide __dir__"); + return NULL; + } + /* use __dir__ */ + result = PyObject_CallFunctionObjArgs(dirfunc, NULL); + Py_DECREF(dirfunc); + if (result == NULL) + return NULL; + /* return sorted(result) */ + sorted = PySequence_List(result); + Py_DECREF(result); + if (sorted == NULL) + return NULL; + if (PyList_Sort(sorted)) { + Py_DECREF(sorted); + return NULL; + } + return sorted; +} + +/* Implementation of dir() -- if obj is NULL, returns the names in the current + (local) scope. Otherwise, performs introspection of the object: returns a + sorted list of attribute names (supposedly) accessible from the object +*/ +PyObject * +PyObject_Dir(PyObject *obj) +{ + return (obj == NULL) ? _dir_locals() : _dir_object(obj); +} + +/* +None is a non-NULL undefined value. +There is (and should be!) no way to create other objects of this type, +so there is exactly one (which is indestructible, by the way). +*/ + +/* ARGSUSED */ +static PyObject * +none_repr(PyObject *op) +{ + return PyUnicode_FromString("None"); +} + +/* ARGUSED */ +static void +none_dealloc(PyObject* ignore) +{ + /* This should never get called, but we also don't want to SEGV if + * we accidentally decref None out of existence. + */ + Py_FatalError("deallocating None"); +} + +static PyObject * +none_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + if (PyTuple_GET_SIZE(args) || (kwargs && PyDict_Size(kwargs))) { + PyErr_SetString(PyExc_TypeError, "NoneType takes no arguments"); + return NULL; + } + Py_RETURN_NONE; +} + +static int +none_bool(PyObject *v) +{ + return 0; +} + +static PyNumberMethods none_as_number = { + 0, /* nb_add */ + 0, /* nb_subtract */ + 0, /* nb_multiply */ + 0, /* nb_remainder */ + 0, /* nb_divmod */ + 0, /* nb_power */ + 0, /* nb_negative */ + 0, /* nb_positive */ + 0, /* nb_absolute */ + (inquiry)none_bool, /* nb_bool */ + 0, /* nb_invert */ + 0, /* nb_lshift */ + 0, /* nb_rshift */ + 0, /* nb_and */ + 0, /* nb_xor */ + 0, /* nb_or */ + 0, /* nb_int */ + 0, /* nb_reserved */ + 0, /* nb_float */ + 0, /* nb_inplace_add */ + 0, /* nb_inplace_subtract */ + 0, /* nb_inplace_multiply */ + 0, /* nb_inplace_remainder */ + 0, /* nb_inplace_power */ + 0, /* nb_inplace_lshift */ + 0, /* nb_inplace_rshift */ + 0, /* nb_inplace_and */ + 0, /* nb_inplace_xor */ + 0, /* nb_inplace_or */ + 0, /* nb_floor_divide */ + 0, /* nb_true_divide */ + 0, /* nb_inplace_floor_divide */ + 0, /* nb_inplace_true_divide */ + 0, /* nb_index */ +}; + +static PyTypeObject PyNone_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "NoneType", + 0, + 0, + none_dealloc, /*tp_dealloc*/ /*never called*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + none_repr, /*tp_repr*/ + &none_as_number, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call */ + 0, /*tp_str */ + 0, /*tp_getattro */ + 0, /*tp_setattro */ + 0, /*tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /*tp_flags */ + 0, /*tp_doc */ + 0, /*tp_traverse */ + 0, /*tp_clear */ + 0, /*tp_richcompare */ + 0, /*tp_weaklistoffset */ + 0, /*tp_iter */ + 0, /*tp_iternext */ + 0, /*tp_methods */ + 0, /*tp_members */ + 0, /*tp_getset */ + 0, /*tp_base */ + 0, /*tp_dict */ + 0, /*tp_descr_get */ + 0, /*tp_descr_set */ + 0, /*tp_dictoffset */ + 0, /*tp_init */ + 0, /*tp_alloc */ + none_new, /*tp_new */ +}; + +PyObject _Py_NoneStruct = { + _PyObject_EXTRA_INIT + 1, &PyNone_Type +}; + +/* NotImplemented is an object that can be used to signal that an + operation is not implemented for the given type combination. */ + +static PyObject * +NotImplemented_repr(PyObject *op) +{ + return PyUnicode_FromString("NotImplemented"); +} + +static PyObject * +notimplemented_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + if (PyTuple_GET_SIZE(args) || (kwargs && PyDict_Size(kwargs))) { + PyErr_SetString(PyExc_TypeError, "NotImplementedType takes no arguments"); + return NULL; + } + Py_RETURN_NOTIMPLEMENTED; +} + +static PyTypeObject PyNotImplemented_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "NotImplementedType", + 0, + 0, + none_dealloc, /*tp_dealloc*/ /*never called*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + NotImplemented_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call */ + 0, /*tp_str */ + 0, /*tp_getattro */ + 0, /*tp_setattro */ + 0, /*tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /*tp_flags */ + 0, /*tp_doc */ + 0, /*tp_traverse */ + 0, /*tp_clear */ + 0, /*tp_richcompare */ + 0, /*tp_weaklistoffset */ + 0, /*tp_iter */ + 0, /*tp_iternext */ + 0, /*tp_methods */ + 0, /*tp_members */ + 0, /*tp_getset */ + 0, /*tp_base */ + 0, /*tp_dict */ + 0, /*tp_descr_get */ + 0, /*tp_descr_set */ + 0, /*tp_dictoffset */ + 0, /*tp_init */ + 0, /*tp_alloc */ + notimplemented_new, /*tp_new */ +}; + +PyObject _Py_NotImplementedStruct = { + _PyObject_EXTRA_INIT + 1, &PyNotImplemented_Type +}; + +void +_Py_ReadyTypes(void) +{ + if (PyType_Ready(&PyType_Type) < 0) + Py_FatalError("Can't initialize type type"); + + if (PyType_Ready(&_PyWeakref_RefType) < 0) + Py_FatalError("Can't initialize weakref type"); + + if (PyType_Ready(&_PyWeakref_CallableProxyType) < 0) + Py_FatalError("Can't initialize callable weakref proxy type"); + + if (PyType_Ready(&_PyWeakref_ProxyType) < 0) + Py_FatalError("Can't initialize weakref proxy type"); + + if (PyType_Ready(&PyBool_Type) < 0) + Py_FatalError("Can't initialize bool type"); + + if (PyType_Ready(&PyByteArray_Type) < 0) + Py_FatalError("Can't initialize bytearray type"); + + if (PyType_Ready(&PyBytes_Type) < 0) + Py_FatalError("Can't initialize 'str'"); + + if (PyType_Ready(&PyList_Type) < 0) + Py_FatalError("Can't initialize list type"); + + if (PyType_Ready(&PyNone_Type) < 0) + Py_FatalError("Can't initialize None type"); + + if (PyType_Ready(&PyNotImplemented_Type) < 0) + Py_FatalError("Can't initialize NotImplemented type"); + + if (PyType_Ready(&PyTraceBack_Type) < 0) + Py_FatalError("Can't initialize traceback type"); + + if (PyType_Ready(&PySuper_Type) < 0) + Py_FatalError("Can't initialize super type"); + + if (PyType_Ready(&PyBaseObject_Type) < 0) + Py_FatalError("Can't initialize object type"); + + if (PyType_Ready(&PyRange_Type) < 0) + Py_FatalError("Can't initialize range type"); + + if (PyType_Ready(&PyDict_Type) < 0) + Py_FatalError("Can't initialize dict type"); + + if (PyType_Ready(&PySet_Type) < 0) + Py_FatalError("Can't initialize set type"); + + if (PyType_Ready(&PyUnicode_Type) < 0) + Py_FatalError("Can't initialize str type"); + + if (PyType_Ready(&PySlice_Type) < 0) + Py_FatalError("Can't initialize slice type"); + + if (PyType_Ready(&PyStaticMethod_Type) < 0) + Py_FatalError("Can't initialize static method type"); + + if (PyType_Ready(&PyComplex_Type) < 0) + Py_FatalError("Can't initialize complex type"); + + if (PyType_Ready(&PyFloat_Type) < 0) + Py_FatalError("Can't initialize float type"); + + if (PyType_Ready(&PyLong_Type) < 0) + Py_FatalError("Can't initialize int type"); + + if (PyType_Ready(&PyFrozenSet_Type) < 0) + Py_FatalError("Can't initialize frozenset type"); + + if (PyType_Ready(&PyProperty_Type) < 0) + Py_FatalError("Can't initialize property type"); + + if (PyType_Ready(&PyMemoryView_Type) < 0) + Py_FatalError("Can't initialize memoryview type"); + + if (PyType_Ready(&PyTuple_Type) < 0) + Py_FatalError("Can't initialize tuple type"); + + if (PyType_Ready(&PyEnum_Type) < 0) + Py_FatalError("Can't initialize enumerate type"); + + if (PyType_Ready(&PyReversed_Type) < 0) + Py_FatalError("Can't initialize reversed type"); + + if (PyType_Ready(&PyStdPrinter_Type) < 0) + Py_FatalError("Can't initialize StdPrinter"); + + if (PyType_Ready(&PyCode_Type) < 0) + Py_FatalError("Can't initialize code type"); + + if (PyType_Ready(&PyFrame_Type) < 0) + Py_FatalError("Can't initialize frame type"); + + if (PyType_Ready(&PyCFunction_Type) < 0) + Py_FatalError("Can't initialize builtin function type"); + + if (PyType_Ready(&PyMethod_Type) < 0) + Py_FatalError("Can't initialize method type"); + + if (PyType_Ready(&PyFunction_Type) < 0) + Py_FatalError("Can't initialize function type"); + + if (PyType_Ready(&PyDictProxy_Type) < 0) + Py_FatalError("Can't initialize dict proxy type"); + + if (PyType_Ready(&PyGen_Type) < 0) + Py_FatalError("Can't initialize generator type"); + + if (PyType_Ready(&PyGetSetDescr_Type) < 0) + Py_FatalError("Can't initialize get-set descriptor type"); + + if (PyType_Ready(&PyWrapperDescr_Type) < 0) + Py_FatalError("Can't initialize wrapper type"); + + if (PyType_Ready(&_PyMethodWrapper_Type) < 0) + Py_FatalError("Can't initialize method wrapper type"); + + if (PyType_Ready(&PyEllipsis_Type) < 0) + Py_FatalError("Can't initialize ellipsis type"); + + if (PyType_Ready(&PyMemberDescr_Type) < 0) + Py_FatalError("Can't initialize member descriptor type"); + + if (PyType_Ready(&PyFilter_Type) < 0) + Py_FatalError("Can't initialize filter type"); + + if (PyType_Ready(&PyMap_Type) < 0) + Py_FatalError("Can't initialize map type"); + + if (PyType_Ready(&PyZip_Type) < 0) + Py_FatalError("Can't initialize zip type"); +} + + +#ifdef Py_TRACE_REFS + +void +_Py_NewReference(PyObject *op) +{ + _Py_INC_REFTOTAL; + op->ob_refcnt = 1; + _Py_AddToAllObjects(op, 1); + _Py_INC_TPALLOCS(op); +} + +void +_Py_ForgetReference(register PyObject *op) +{ +#ifdef SLOW_UNREF_CHECK + register PyObject *p; +#endif + if (op->ob_refcnt < 0) + Py_FatalError("UNREF negative refcnt"); + if (op == &refchain || + op->_ob_prev->_ob_next != op || op->_ob_next->_ob_prev != op) { + fprintf(stderr, "* ob\n"); + _PyObject_Dump(op); + fprintf(stderr, "* op->_ob_prev->_ob_next\n"); + _PyObject_Dump(op->_ob_prev->_ob_next); + fprintf(stderr, "* op->_ob_next->_ob_prev\n"); + _PyObject_Dump(op->_ob_next->_ob_prev); + Py_FatalError("UNREF invalid object"); + } +#ifdef SLOW_UNREF_CHECK + for (p = refchain._ob_next; p != &refchain; p = p->_ob_next) { + if (p == op) + break; + } + if (p == &refchain) /* Not found */ + Py_FatalError("UNREF unknown object"); +#endif + op->_ob_next->_ob_prev = op->_ob_prev; + op->_ob_prev->_ob_next = op->_ob_next; + op->_ob_next = op->_ob_prev = NULL; + _Py_INC_TPFREES(op); +} + +void +_Py_Dealloc(PyObject *op) +{ + destructor dealloc = Py_TYPE(op)->tp_dealloc; + _Py_ForgetReference(op); + (*dealloc)(op); +} + +/* Print all live objects. Because PyObject_Print is called, the + * interpreter must be in a healthy state. + */ +void +_Py_PrintReferences(FILE *fp) +{ + PyObject *op; + fprintf(fp, "Remaining objects:\n"); + for (op = refchain._ob_next; op != &refchain; op = op->_ob_next) { + fprintf(fp, "%p [%" PY_FORMAT_SIZE_T "d] ", op, op->ob_refcnt); + if (PyObject_Print(op, fp, 0) != 0) + PyErr_Clear(); + putc('\n', fp); + } +} + +/* Print the addresses of all live objects. Unlike _Py_PrintReferences, this + * doesn't make any calls to the Python C API, so is always safe to call. + */ +void +_Py_PrintReferenceAddresses(FILE *fp) +{ + PyObject *op; + fprintf(fp, "Remaining object addresses:\n"); + for (op = refchain._ob_next; op != &refchain; op = op->_ob_next) + fprintf(fp, "%p [%" PY_FORMAT_SIZE_T "d] %s\n", op, + op->ob_refcnt, Py_TYPE(op)->tp_name); +} + +PyObject * +_Py_GetObjects(PyObject *self, PyObject *args) +{ + int i, n; + PyObject *t = NULL; + PyObject *res, *op; + + if (!PyArg_ParseTuple(args, "i|O", &n, &t)) + return NULL; + op = refchain._ob_next; + res = PyList_New(0); + if (res == NULL) + return NULL; + for (i = 0; (n == 0 || i < n) && op != &refchain; i++) { + while (op == self || op == args || op == res || op == t || + (t != NULL && Py_TYPE(op) != (PyTypeObject *) t)) { + op = op->_ob_next; + if (op == &refchain) + return res; + } + if (PyList_Append(res, op) < 0) { + Py_DECREF(res); + return NULL; + } + op = op->_ob_next; + } + return res; +} + +#endif + +/* Hack to force loading of pycapsule.o */ +PyTypeObject *_PyCapsule_hack = &PyCapsule_Type; + + +/* Hack to force loading of abstract.o */ +Py_ssize_t (*_Py_abstract_hack)(PyObject *) = PyObject_Size; + + +/* Python's malloc wrappers (see pymem.h) */ + +void * +PyMem_Malloc(size_t nbytes) +{ + return PyMem_MALLOC(nbytes); +} + +void * +PyMem_Realloc(void *p, size_t nbytes) +{ + return PyMem_REALLOC(p, nbytes); +} + +void +PyMem_Free(void *p) +{ + PyMem_FREE(p); +} + + +/* These methods are used to control infinite recursion in repr, str, print, + etc. Container objects that may recursively contain themselves, + e.g. builtin dictionaries and lists, should used Py_ReprEnter() and + Py_ReprLeave() to avoid infinite recursion. + + Py_ReprEnter() returns 0 the first time it is called for a particular + object and 1 every time thereafter. It returns -1 if an exception + occurred. Py_ReprLeave() has no return value. + + See dictobject.c and listobject.c for examples of use. +*/ + +#define KEY "Py_Repr" + +int +Py_ReprEnter(PyObject *obj) +{ + PyObject *dict; + PyObject *list; + Py_ssize_t i; + + dict = PyThreadState_GetDict(); + if (dict == NULL) + return 0; + list = PyDict_GetItemString(dict, KEY); + if (list == NULL) { + list = PyList_New(0); + if (list == NULL) + return -1; + if (PyDict_SetItemString(dict, KEY, list) < 0) + return -1; + Py_DECREF(list); + } + i = PyList_GET_SIZE(list); + while (--i >= 0) { + if (PyList_GET_ITEM(list, i) == obj) + return 1; + } + PyList_Append(list, obj); + return 0; +} + +void +Py_ReprLeave(PyObject *obj) +{ + PyObject *dict; + PyObject *list; + Py_ssize_t i; + + dict = PyThreadState_GetDict(); + if (dict == NULL) + return; + list = PyDict_GetItemString(dict, KEY); + if (list == NULL || !PyList_Check(list)) + return; + i = PyList_GET_SIZE(list); + /* Count backwards because we always expect obj to be list[-1] */ + while (--i >= 0) { + if (PyList_GET_ITEM(list, i) == obj) { + PyList_SetSlice(list, i, i + 1, NULL); + break; + } + } +} + +/* Trashcan support. */ + +/* Current call-stack depth of tp_dealloc calls. */ +int _PyTrash_delete_nesting = 0; + +/* List of objects that still need to be cleaned up, singly linked via their + * gc headers' gc_prev pointers. + */ +PyObject *_PyTrash_delete_later = NULL; + +/* Add op to the _PyTrash_delete_later list. Called when the current + * call-stack depth gets large. op must be a currently untracked gc'ed + * object, with refcount 0. Py_DECREF must already have been called on it. + */ +void +_PyTrash_deposit_object(PyObject *op) +{ + assert(PyObject_IS_GC(op)); + assert(_Py_AS_GC(op)->gc.gc_refs == _PyGC_REFS_UNTRACKED); + assert(op->ob_refcnt == 0); + _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyTrash_delete_later; + _PyTrash_delete_later = op; +} + +/* Dealloccate all the objects in the _PyTrash_delete_later list. Called when + * the call-stack unwinds again. + */ +void +_PyTrash_destroy_chain(void) +{ + while (_PyTrash_delete_later) { + PyObject *op = _PyTrash_delete_later; + destructor dealloc = Py_TYPE(op)->tp_dealloc; + + _PyTrash_delete_later = + (PyObject*) _Py_AS_GC(op)->gc.gc_prev; + + /* Call the deallocator directly. This used to try to + * fool Py_DECREF into calling it indirectly, but + * Py_DECREF was already called on this object, and in + * assorted non-release builds calling Py_DECREF again ends + * up distorting allocation statistics. + */ + assert(op->ob_refcnt == 0); + ++_PyTrash_delete_nesting; + (*dealloc)(op); + --_PyTrash_delete_nesting; + } +} + +#ifndef Py_TRACE_REFS +/* For Py_LIMITED_API, we need an out-of-line version of _Py_Dealloc. + Define this here, so we can undefine the macro. */ +#undef _Py_Dealloc +PyAPI_FUNC(void) _Py_Dealloc(PyObject *); +void +_Py_Dealloc(PyObject *op) +{ + _Py_INC_TPFREES(op) _Py_COUNT_ALLOCS_COMMA + (*Py_TYPE(op)->tp_dealloc)(op); +} +#endif + diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Objects/tupleobject.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Objects/tupleobject.c Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,907 @@ + +/* Tuple object implementation */ + +#include "Python.h" + +PyObject * +PyTuple_New(register Py_ssize_t size) +{ + register PyTupleObject *op; + Py_ssize_t i; + if (size < 0) { + PyErr_BadInternalCall(); + return NULL; + } + { + Py_ssize_t nbytes = size * sizeof(PyObject *); + /* Check for overflow */ + if (nbytes / sizeof(PyObject *) != (size_t)size || + (nbytes > PY_SSIZE_T_MAX - sizeof(PyTupleObject) - sizeof(PyObject *))) + { + return PyErr_NoMemory(); + } + /* nbytes += sizeof(PyTupleObject) - sizeof(PyObject *); */ + + op = PyObject_GC_NewVar(PyTupleObject, &PyTuple_Type, size); + if (op == NULL) + return NULL; + } + for (i=0; i < size; i++) + op->ob_item[i] = NULL; + _PyObject_GC_TRACK(op); + return (PyObject *) op; +} + +Py_ssize_t +PyTuple_Size(register PyObject *op) +{ + if (!PyTuple_Check(op)) { + PyErr_BadInternalCall(); + return -1; + } + else + return Py_SIZE(op); +} + +PyObject * +PyTuple_GetItem(register PyObject *op, register Py_ssize_t i) +{ + if (!PyTuple_Check(op)) { + PyErr_BadInternalCall(); + return NULL; + } + if (i < 0 || i >= Py_SIZE(op)) { + PyErr_SetString(PyExc_IndexError, "tuple index out of range"); + return NULL; + } + return ((PyTupleObject *)op) -> ob_item[i]; +} + +int +PyTuple_SetItem(register PyObject *op, register Py_ssize_t i, PyObject *newitem) +{ + register PyObject *olditem; + register PyObject **p; + if (!PyTuple_Check(op) || op->ob_refcnt != 1) { + Py_XDECREF(newitem); + PyErr_BadInternalCall(); + return -1; + } + if (i < 0 || i >= Py_SIZE(op)) { + Py_XDECREF(newitem); + PyErr_SetString(PyExc_IndexError, + "tuple assignment index out of range"); + return -1; + } + p = ((PyTupleObject *)op) -> ob_item + i; + olditem = *p; + *p = newitem; + Py_XDECREF(olditem); + return 0; +} + +void +_PyTuple_MaybeUntrack(PyObject *op) +{ + PyTupleObject *t; + Py_ssize_t i, n; + + if (!PyTuple_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op)) + return; + t = (PyTupleObject *) op; + n = Py_SIZE(t); + for (i = 0; i < n; i++) { + PyObject *elt = PyTuple_GET_ITEM(t, i); + /* Tuple with NULL elements aren't + fully constructed, don't untrack + them yet. */ + if (!elt || + _PyObject_GC_MAY_BE_TRACKED(elt)) + return; + } + _PyObject_GC_UNTRACK(op); +} + +PyObject * +PyTuple_Pack(Py_ssize_t n, ...) +{ + Py_ssize_t i; + PyObject *o; + PyObject *result; + PyObject **items; + va_list vargs; + + va_start(vargs, n); + result = PyTuple_New(n); + if (result == NULL) + return NULL; + items = ((PyTupleObject *)result)->ob_item; + for (i = 0; i < n; i++) { + o = va_arg(vargs, PyObject *); + Py_INCREF(o); + items[i] = o; + } + va_end(vargs); + return result; +} + + +/* Methods */ + +static void +tupledealloc(register PyTupleObject *op) +{ + register Py_ssize_t i; + register Py_ssize_t len = Py_SIZE(op); + PyObject_GC_UnTrack(op); + Py_TRASHCAN_SAFE_BEGIN(op) + if (len > 0) { + i = len; + while (--i >= 0) + Py_XDECREF(op->ob_item[i]); + } + Py_TYPE(op)->tp_free((PyObject *)op); +done: + Py_TRASHCAN_SAFE_END(op) +} + +static PyObject * +tuplerepr(PyTupleObject *v) +{ + Py_ssize_t i, n; + PyObject *s = NULL; + _PyAccu acc; + static PyObject *sep = NULL; + + n = Py_SIZE(v); + if (n == 0) + return PyUnicode_FromString("()"); + + if (sep == NULL) { + sep = PyUnicode_FromString(", "); + if (sep == NULL) + return NULL; + } + + /* While not mutable, it is still possible to end up with a cycle in a + tuple through an object that stores itself within a tuple (and thus + infinitely asks for the repr of itself). This should only be + possible within a type. */ + i = Py_ReprEnter((PyObject *)v); + if (i != 0) { + return i > 0 ? PyUnicode_FromString("(...)") : NULL; + } + + if (_PyAccu_Init(&acc)) + goto error; + + s = PyUnicode_FromString("("); + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); + + /* Do repr() on each element. */ + for (i = 0; i < n; ++i) { + if (Py_EnterRecursiveCall(" while getting the repr of a tuple")) + goto error; + s = PyObject_Repr(v->ob_item[i]); + Py_LeaveRecursiveCall(); + if (i > 0 && _PyAccu_Accumulate(&acc, sep)) + goto error; + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); + } + if (n > 1) + s = PyUnicode_FromString(")"); + else + s = PyUnicode_FromString(",)"); + if (s == NULL || _PyAccu_Accumulate(&acc, s)) + goto error; + Py_CLEAR(s); + + Py_ReprLeave((PyObject *)v); + return _PyAccu_Finish(&acc); + +error: + _PyAccu_Destroy(&acc); + Py_XDECREF(s); + Py_ReprLeave((PyObject *)v); + return NULL; +} + +/* The addend 82520, was selected from the range(0, 1000000) for + generating the greatest number of prime multipliers for tuples + upto length eight: + + 1082527, 1165049, 1082531, 1165057, 1247581, 1330103, 1082533, + 1330111, 1412633, 1165069, 1247599, 1495177, 1577699 +*/ + +static Py_hash_t +tuplehash(PyTupleObject *v) +{ + register Py_uhash_t x; + register Py_hash_t y; + register Py_ssize_t len = Py_SIZE(v); + register PyObject **p; + Py_uhash_t mult = 1000003; + x = 0x345678; + p = v->ob_item; + while (--len >= 0) { + y = PyObject_Hash(*p++); + if (y == -1) + return -1; + x = (x ^ y) * mult; + /* the cast might truncate len; that doesn't change hash stability */ + mult += (Py_hash_t)(82520L + len + len); + } + x += 97531L; + if (x == (Py_uhash_t)-1) + x = -2; + return x; +} + +static Py_ssize_t +tuplelength(PyTupleObject *a) +{ + return Py_SIZE(a); +} + +static int +tuplecontains(PyTupleObject *a, PyObject *el) +{ + Py_ssize_t i; + int cmp; + + for (i = 0, cmp = 0 ; cmp == 0 && i < Py_SIZE(a); ++i) + cmp = PyObject_RichCompareBool(el, PyTuple_GET_ITEM(a, i), + Py_EQ); + return cmp; +} + +static PyObject * +tupleitem(register PyTupleObject *a, register Py_ssize_t i) +{ + if (i < 0 || i >= Py_SIZE(a)) { + PyErr_SetString(PyExc_IndexError, "tuple index out of range"); + return NULL; + } + Py_INCREF(a->ob_item[i]); + return a->ob_item[i]; +} + +static PyObject * +tupleslice(register PyTupleObject *a, register Py_ssize_t ilow, + register Py_ssize_t ihigh) +{ + register PyTupleObject *np; + PyObject **src, **dest; + register Py_ssize_t i; + Py_ssize_t len; + if (ilow < 0) + ilow = 0; + if (ihigh > Py_SIZE(a)) + ihigh = Py_SIZE(a); + if (ihigh < ilow) + ihigh = ilow; + if (ilow == 0 && ihigh == Py_SIZE(a) && PyTuple_CheckExact(a)) { + Py_INCREF(a); + return (PyObject *)a; + } + len = ihigh - ilow; + np = (PyTupleObject *)PyTuple_New(len); + if (np == NULL) + return NULL; + src = a->ob_item + ilow; + dest = np->ob_item; + for (i = 0; i < len; i++) { + PyObject *v = src[i]; + Py_INCREF(v); + dest[i] = v; + } + return (PyObject *)np; +} + +PyObject * +PyTuple_GetSlice(PyObject *op, Py_ssize_t i, Py_ssize_t j) +{ + if (op == NULL || !PyTuple_Check(op)) { + PyErr_BadInternalCall(); + return NULL; + } + return tupleslice((PyTupleObject *)op, i, j); +} + +static PyObject * +tupleconcat(register PyTupleObject *a, register PyObject *bb) +{ + register Py_ssize_t size; + register Py_ssize_t i; + PyObject **src, **dest; + PyTupleObject *np; + if (!PyTuple_Check(bb)) { + PyErr_Format(PyExc_TypeError, + "can only concatenate tuple (not \"%.200s\") to tuple", + Py_TYPE(bb)->tp_name); + return NULL; + } +#define b ((PyTupleObject *)bb) + size = Py_SIZE(a) + Py_SIZE(b); + if (size < 0) + return PyErr_NoMemory(); + np = (PyTupleObject *) PyTuple_New(size); + if (np == NULL) { + return NULL; + } + src = a->ob_item; + dest = np->ob_item; + for (i = 0; i < Py_SIZE(a); i++) { + PyObject *v = src[i]; + Py_INCREF(v); + dest[i] = v; + } + src = b->ob_item; + dest = np->ob_item + Py_SIZE(a); + for (i = 0; i < Py_SIZE(b); i++) { + PyObject *v = src[i]; + Py_INCREF(v); + dest[i] = v; + } + return (PyObject *)np; +#undef b +} + +static PyObject * +tuplerepeat(PyTupleObject *a, Py_ssize_t n) +{ + Py_ssize_t i, j; + Py_ssize_t size; + PyTupleObject *np; + PyObject **p, **items; + if (n < 0) + n = 0; + if (Py_SIZE(a) == 0 || n == 1) { + if (PyTuple_CheckExact(a)) { + /* Since tuples are immutable, we can return a shared + copy in this case */ + Py_INCREF(a); + return (PyObject *)a; + } + if (Py_SIZE(a) == 0) + return PyTuple_New(0); + } + size = Py_SIZE(a) * n; + if (size/Py_SIZE(a) != n) + return PyErr_NoMemory(); + np = (PyTupleObject *) PyTuple_New(size); + if (np == NULL) + return NULL; + p = np->ob_item; + items = a->ob_item; + for (i = 0; i < n; i++) { + for (j = 0; j < Py_SIZE(a); j++) { + *p = items[j]; + Py_INCREF(*p); + p++; + } + } + return (PyObject *) np; +} + +static PyObject * +tupleindex(PyTupleObject *self, PyObject *args) +{ + Py_ssize_t i, start=0, stop=Py_SIZE(self); + PyObject *v; + + if (!PyArg_ParseTuple(args, "O|O&O&:index", &v, + _PyEval_SliceIndex, &start, + _PyEval_SliceIndex, &stop)) + return NULL; + if (start < 0) { + start += Py_SIZE(self); + if (start < 0) + start = 0; + } + if (stop < 0) { + stop += Py_SIZE(self); + if (stop < 0) + stop = 0; + } + for (i = start; i < stop && i < Py_SIZE(self); i++) { + int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ); + if (cmp > 0) + return PyLong_FromSsize_t(i); + else if (cmp < 0) + return NULL; + } + PyErr_SetString(PyExc_ValueError, "tuple.index(x): x not in tuple"); + return NULL; +} + +static PyObject * +tuplecount(PyTupleObject *self, PyObject *v) +{ + Py_ssize_t count = 0; + Py_ssize_t i; + + for (i = 0; i < Py_SIZE(self); i++) { + int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ); + if (cmp > 0) + count++; + else if (cmp < 0) + return NULL; + } + return PyLong_FromSsize_t(count); +} + +static int +tupletraverse(PyTupleObject *o, visitproc visit, void *arg) +{ + Py_ssize_t i; + + for (i = Py_SIZE(o); --i >= 0; ) + Py_VISIT(o->ob_item[i]); + return 0; +} + +static PyObject * +tuplerichcompare(PyObject *v, PyObject *w, int op) +{ + PyTupleObject *vt, *wt; + Py_ssize_t i; + Py_ssize_t vlen, wlen; + + if (!PyTuple_Check(v) || !PyTuple_Check(w)) + Py_RETURN_NOTIMPLEMENTED; + + vt = (PyTupleObject *)v; + wt = (PyTupleObject *)w; + + vlen = Py_SIZE(vt); + wlen = Py_SIZE(wt); + + /* Note: the corresponding code for lists has an "early out" test + * here when op is EQ or NE and the lengths differ. That pays there, + * but Tim was unable to find any real code where EQ/NE tuple + * compares don't have the same length, so testing for it here would + * have cost without benefit. + */ + + /* Search for the first index where items are different. + * Note that because tuples are immutable, it's safe to reuse + * vlen and wlen across the comparison calls. + */ + for (i = 0; i < vlen && i < wlen; i++) { + int k = PyObject_RichCompareBool(vt->ob_item[i], + wt->ob_item[i], Py_EQ); + if (k < 0) + return NULL; + if (!k) + break; + } + + if (i >= vlen || i >= wlen) { + /* No more items to compare -- compare sizes */ + int cmp; + PyObject *res; + switch (op) { + case Py_LT: cmp = vlen < wlen; break; + case Py_LE: cmp = vlen <= wlen; break; + case Py_EQ: cmp = vlen == wlen; break; + case Py_NE: cmp = vlen != wlen; break; + case Py_GT: cmp = vlen > wlen; break; + case Py_GE: cmp = vlen >= wlen; break; + default: return NULL; /* cannot happen */ + } + if (cmp) + res = Py_True; + else + res = Py_False; + Py_INCREF(res); + return res; + } + + /* We have an item that differs -- shortcuts for EQ/NE */ + if (op == Py_EQ) { + Py_INCREF(Py_False); + return Py_False; + } + if (op == Py_NE) { + Py_INCREF(Py_True); + return Py_True; + } + + /* Compare the final item again using the proper operator */ + return PyObject_RichCompare(vt->ob_item[i], wt->ob_item[i], op); +} + +static PyObject * +tuple_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); + +static PyObject * +tuple_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *arg = NULL; + static char *kwlist[] = {"sequence", 0}; + + if (type != &PyTuple_Type) + return tuple_subtype_new(type, args, kwds); + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:tuple", kwlist, &arg)) + return NULL; + + if (arg == NULL) + return PyTuple_New(0); + else + return PySequence_Tuple(arg); +} + +static PyObject * +tuple_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *tmp, *newobj, *item; + Py_ssize_t i, n; + + assert(PyType_IsSubtype(type, &PyTuple_Type)); + tmp = tuple_new(&PyTuple_Type, args, kwds); + if (tmp == NULL) + return NULL; + assert(PyTuple_Check(tmp)); + newobj = type->tp_alloc(type, n = PyTuple_GET_SIZE(tmp)); + if (newobj == NULL) + return NULL; + for (i = 0; i < n; i++) { + item = PyTuple_GET_ITEM(tmp, i); + Py_INCREF(item); + PyTuple_SET_ITEM(newobj, i, item); + } + Py_DECREF(tmp); + return newobj; +} + +PyDoc_STRVAR(tuple_doc, +"tuple() -> empty tuple\n\ +tuple(iterable) -> tuple initialized from iterable's items\n\ +\n\ +If the argument is a tuple, the return value is the same object."); + +static PySequenceMethods tuple_as_sequence = { + (lenfunc)tuplelength, /* sq_length */ + (binaryfunc)tupleconcat, /* sq_concat */ + (ssizeargfunc)tuplerepeat, /* sq_repeat */ + (ssizeargfunc)tupleitem, /* sq_item */ + 0, /* sq_slice */ + 0, /* sq_ass_item */ + 0, /* sq_ass_slice */ + (objobjproc)tuplecontains, /* sq_contains */ +}; + +static PyObject* +tuplesubscript(PyTupleObject* self, PyObject* item) +{ + if (PyIndex_Check(item)) { + Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); + if (i == -1 && PyErr_Occurred()) + return NULL; + if (i < 0) + i += PyTuple_GET_SIZE(self); + return tupleitem(self, i); + } + else if (PySlice_Check(item)) { + Py_ssize_t start, stop, step, slicelength, cur, i; + PyObject* result; + PyObject* it; + PyObject **src, **dest; + + if (PySlice_GetIndicesEx(item, + PyTuple_GET_SIZE(self), + &start, &stop, &step, &slicelength) < 0) { + return NULL; + } + + if (slicelength <= 0) { + return PyTuple_New(0); + } + else if (start == 0 && step == 1 && + slicelength == PyTuple_GET_SIZE(self) && + PyTuple_CheckExact(self)) { + Py_INCREF(self); + return (PyObject *)self; + } + else { + result = PyTuple_New(slicelength); + if (!result) return NULL; + + src = self->ob_item; + dest = ((PyTupleObject *)result)->ob_item; + for (cur = start, i = 0; i < slicelength; + cur += step, i++) { + it = src[cur]; + Py_INCREF(it); + dest[i] = it; + } + + return result; + } + } + else { + PyErr_Format(PyExc_TypeError, + "tuple indices must be integers, not %.200s", + Py_TYPE(item)->tp_name); + return NULL; + } +} + +static PyObject * +tuple_getnewargs(PyTupleObject *v) +{ + return Py_BuildValue("(N)", tupleslice(v, 0, Py_SIZE(v))); + +} + +static PyObject * +tuple_sizeof(PyTupleObject *self) +{ + Py_ssize_t res; + + res = PyTuple_Type.tp_basicsize + Py_SIZE(self) * sizeof(PyObject *); + return PyLong_FromSsize_t(res); +} + +PyDoc_STRVAR(index_doc, +"T.index(value, [start, [stop]]) -> integer -- return first index of value.\n" +"Raises ValueError if the value is not present." +); +PyDoc_STRVAR(count_doc, +"T.count(value) -> integer -- return number of occurrences of value"); +PyDoc_STRVAR(sizeof_doc, +"T.__sizeof__() -- size of T in memory, in bytes"); + +static PyMethodDef tuple_methods[] = { + {"__getnewargs__", (PyCFunction)tuple_getnewargs, METH_NOARGS}, + {"__sizeof__", (PyCFunction)tuple_sizeof, METH_NOARGS, sizeof_doc}, + {"index", (PyCFunction)tupleindex, METH_VARARGS, index_doc}, + {"count", (PyCFunction)tuplecount, METH_O, count_doc}, + {NULL, NULL} /* sentinel */ +}; + +static PyMappingMethods tuple_as_mapping = { + (lenfunc)tuplelength, + (binaryfunc)tuplesubscript, + 0 +}; + +static PyObject *tuple_iter(PyObject *seq); + +PyTypeObject PyTuple_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "tuple", + sizeof(PyTupleObject) - sizeof(PyObject *), + sizeof(PyObject *), + (destructor)tupledealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + (reprfunc)tuplerepr, /* tp_repr */ + 0, /* tp_as_number */ + &tuple_as_sequence, /* tp_as_sequence */ + &tuple_as_mapping, /* tp_as_mapping */ + (hashfunc)tuplehash, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE | Py_TPFLAGS_TUPLE_SUBCLASS, /* tp_flags */ + tuple_doc, /* tp_doc */ + (traverseproc)tupletraverse, /* tp_traverse */ + 0, /* tp_clear */ + tuplerichcompare, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + tuple_iter, /* tp_iter */ + 0, /* tp_iternext */ + tuple_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + tuple_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + +/* The following function breaks the notion that tuples are immutable: + it changes the size of a tuple. We get away with this only if there + is only one module referencing the object. You can also think of it + as creating a new tuple object and destroying the old one, only more + efficiently. In any case, don't use this if the tuple may already be + known to some other part of the code. */ + +int +_PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) +{ + register PyTupleObject *v; + register PyTupleObject *sv; + Py_ssize_t i; + Py_ssize_t oldsize; + + v = (PyTupleObject *) *pv; + if (v == NULL || Py_TYPE(v) != &PyTuple_Type || + (Py_SIZE(v) != 0 && Py_REFCNT(v) != 1)) { + *pv = 0; + Py_XDECREF(v); + PyErr_BadInternalCall(); + return -1; + } + oldsize = Py_SIZE(v); + if (oldsize == newsize) + return 0; + + if (oldsize == 0) { + /* Empty tuples are often shared, so we should never + resize them in-place even if we do own the only + (current) reference */ + Py_DECREF(v); + *pv = PyTuple_New(newsize); + return *pv == NULL ? -1 : 0; + } + + /* XXX UNREF/NEWREF interface should be more symmetrical */ + _Py_DEC_REFTOTAL; + if (_PyObject_GC_IS_TRACKED(v)) + _PyObject_GC_UNTRACK(v); + _Py_ForgetReference((PyObject *) v); + /* DECREF items deleted by shrinkage */ + for (i = newsize; i < oldsize; i++) { + Py_XDECREF(v->ob_item[i]); + v->ob_item[i] = NULL; + } + sv = PyObject_GC_Resize(PyTupleObject, v, newsize); + if (sv == NULL) { + *pv = NULL; + PyObject_GC_Del(v); + return -1; + } + _Py_NewReference((PyObject *) sv); + /* Zero out items added by growing */ + if (newsize > oldsize) + memset(&sv->ob_item[oldsize], 0, + sizeof(*sv->ob_item) * (newsize - oldsize)); + *pv = (PyObject *) sv; + _PyObject_GC_TRACK(sv); + return 0; +} + +int +PyTuple_ClearFreeList(void) +{ + int freelist_size = 0; + return freelist_size; +} + +void +PyTuple_Fini(void) +{ +} + +/*********************** Tuple Iterator **************************/ + +typedef struct { + PyObject_HEAD + long it_index; + PyTupleObject *it_seq; /* Set to NULL when iterator is exhausted */ +} tupleiterobject; + +static void +tupleiter_dealloc(tupleiterobject *it) +{ + _PyObject_GC_UNTRACK(it); + Py_XDECREF(it->it_seq); + PyObject_GC_Del(it); +} + +static int +tupleiter_traverse(tupleiterobject *it, visitproc visit, void *arg) +{ + Py_VISIT(it->it_seq); + return 0; +} + +static PyObject * +tupleiter_next(tupleiterobject *it) +{ + PyTupleObject *seq; + PyObject *item; + + assert(it != NULL); + seq = it->it_seq; + if (seq == NULL) + return NULL; + assert(PyTuple_Check(seq)); + + if (it->it_index < PyTuple_GET_SIZE(seq)) { + item = PyTuple_GET_ITEM(seq, it->it_index); + ++it->it_index; + Py_INCREF(item); + return item; + } + + Py_DECREF(seq); + it->it_seq = NULL; + return NULL; +} + +static PyObject * +tupleiter_len(tupleiterobject *it) +{ + Py_ssize_t len = 0; + if (it->it_seq) + len = PyTuple_GET_SIZE(it->it_seq) - it->it_index; + return PyLong_FromSsize_t(len); +} + +PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); + +static PyMethodDef tupleiter_methods[] = { + {"__length_hint__", (PyCFunction)tupleiter_len, METH_NOARGS, length_hint_doc}, + {NULL, NULL} /* sentinel */ +}; + +PyTypeObject PyTupleIter_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "tuple_iterator", /* tp_name */ + sizeof(tupleiterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)tupleiter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ + 0, /* tp_doc */ + (traverseproc)tupleiter_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)tupleiter_next, /* tp_iternext */ + tupleiter_methods, /* tp_methods */ + 0, +}; + +static PyObject * +tuple_iter(PyObject *seq) +{ + tupleiterobject *it; + + if (!PyTuple_Check(seq)) { + PyErr_BadInternalCall(); + return NULL; + } + it = PyObject_GC_New(tupleiterobject, &PyTupleIter_Type); + if (it == NULL) + return NULL; + it->it_index = 0; + Py_INCREF(seq); + it->it_seq = (PyTupleObject *)seq; + _PyObject_GC_TRACK(it); + return (PyObject *)it; +} diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Objects/unicodeobject.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Objects/unicodeobject.c Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,14135 @@ +/* + +Unicode implementation based on original code by Fredrik Lundh, +modified by Marc-Andre Lemburg . + +Major speed upgrades to the method implementations at the Reykjavik +NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. + +Copyright (c) Corporation for National Research Initiatives. + +-------------------------------------------------------------------- +The original string type implementation is: + + Copyright (c) 1999 by Secret Labs AB + Copyright (c) 1999 by Fredrik Lundh + +By obtaining, using, and/or copying this software and/or its +associated documentation, you agree that you have read, understood, +and will comply with the following terms and conditions: + +Permission to use, copy, modify, and distribute this software and its +associated documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appears in all +copies, and that both that copyright notice and this permission notice +appear in supporting documentation, and that the name of Secret Labs +AB or the author not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR +ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +-------------------------------------------------------------------- + +*/ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "ucnhash.h" + +/* Endianness switches; defaults to little endian */ + +#ifdef WORDS_BIGENDIAN +# define BYTEORDER_IS_BIG_ENDIAN +#else +# define BYTEORDER_IS_LITTLE_ENDIAN +#endif + +/* --- Globals ------------------------------------------------------------ + + The globals are initialized by the _PyUnicode_Init() API and should + not be used before calling that API. + +*/ + + +/* Maximum code point of Unicode 6.0: 0x10ffff (1,114,111) */ +#define MAX_UNICODE 0x10ffff + +#ifdef Py_DEBUG +# define _PyUnicode_CHECK(op) _PyUnicode_CheckConsistency(op, 0) +#else +# define _PyUnicode_CHECK(op) PyUnicode_Check(op) +#endif + +#define _PyUnicode_UTF8(op) \ + (((PyCompactUnicodeObject*)(op))->utf8) +#define PyUnicode_UTF8(op) \ + (assert(_PyUnicode_CHECK(op)), \ + assert(PyUnicode_IS_READY(op)), \ + PyUnicode_IS_COMPACT_ASCII(op) ? \ + ((char*)((PyASCIIObject*)(op) + 1)) : \ + _PyUnicode_UTF8(op)) +#define _PyUnicode_UTF8_LENGTH(op) \ + (((PyCompactUnicodeObject*)(op))->utf8_length) +#define PyUnicode_UTF8_LENGTH(op) \ + (assert(_PyUnicode_CHECK(op)), \ + assert(PyUnicode_IS_READY(op)), \ + PyUnicode_IS_COMPACT_ASCII(op) ? \ + ((PyASCIIObject*)(op))->length : \ + _PyUnicode_UTF8_LENGTH(op)) +#define _PyUnicode_WSTR(op) \ + (((PyASCIIObject*)(op))->wstr) +#define _PyUnicode_WSTR_LENGTH(op) \ + (((PyCompactUnicodeObject*)(op))->wstr_length) +#define _PyUnicode_LENGTH(op) \ + (((PyASCIIObject *)(op))->length) +#define _PyUnicode_STATE(op) \ + (((PyASCIIObject *)(op))->state) +#define _PyUnicode_HASH(op) \ + (((PyASCIIObject *)(op))->hash) +#define _PyUnicode_KIND(op) \ + (assert(_PyUnicode_CHECK(op)), \ + ((PyASCIIObject *)(op))->state.kind) +#define _PyUnicode_GET_LENGTH(op) \ + (assert(_PyUnicode_CHECK(op)), \ + ((PyASCIIObject *)(op))->length) +#define _PyUnicode_DATA_ANY(op) \ + (((PyUnicodeObject*)(op))->data.any) + +#undef PyUnicode_READY +#define PyUnicode_READY(op) \ + (assert(_PyUnicode_CHECK(op)), \ + (PyUnicode_IS_READY(op) ? \ + 0 : \ + _PyUnicode_Ready(op))) + +#define _PyUnicode_SHARE_UTF8(op) \ + (assert(_PyUnicode_CHECK(op)), \ + assert(!PyUnicode_IS_COMPACT_ASCII(op)), \ + (_PyUnicode_UTF8(op) == PyUnicode_DATA(op))) +#define _PyUnicode_SHARE_WSTR(op) \ + (assert(_PyUnicode_CHECK(op)), \ + (_PyUnicode_WSTR(unicode) == PyUnicode_DATA(op))) + +/* true if the Unicode object has an allocated UTF-8 memory block + (not shared with other data) */ +#define _PyUnicode_HAS_UTF8_MEMORY(op) \ + (assert(_PyUnicode_CHECK(op)), \ + (!PyUnicode_IS_COMPACT_ASCII(op) \ + && _PyUnicode_UTF8(op) \ + && _PyUnicode_UTF8(op) != PyUnicode_DATA(op))) + +/* true if the Unicode object has an allocated wstr memory block + (not shared with other data) */ +#define _PyUnicode_HAS_WSTR_MEMORY(op) \ + (assert(_PyUnicode_CHECK(op)), \ + (_PyUnicode_WSTR(op) && \ + (!PyUnicode_IS_READY(op) || \ + _PyUnicode_WSTR(op) != PyUnicode_DATA(op)))) + +/* Generic helper macro to convert characters of different types. + from_type and to_type have to be valid type names, begin and end + are pointers to the source characters which should be of type + "from_type *". to is a pointer of type "to_type *" and points to the + buffer where the result characters are written to. */ +#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \ + do { \ + to_type *_to = (to_type *) to; \ + const from_type *_iter = (begin); \ + const from_type *_end = (end); \ + Py_ssize_t n = (_end) - (_iter); \ + const from_type *_unrolled_end = \ + _iter + (n & ~ (Py_ssize_t) 3); \ + while (_iter < (_unrolled_end)) { \ + _to[0] = (to_type) _iter[0]; \ + _to[1] = (to_type) _iter[1]; \ + _to[2] = (to_type) _iter[2]; \ + _to[3] = (to_type) _iter[3]; \ + _iter += 4; _to += 4; \ + } \ + while (_iter < (_end)) \ + *_to++ = (to_type) *_iter++; \ + } while (0) + +/* The Unicode string has been modified: reset the hash */ +#define _PyUnicode_DIRTY(op) do { _PyUnicode_HASH(op) = -1; } while (0) + +/* This dictionary holds all interned unicode strings. Note that references + to strings in this dictionary are *not* counted in the string's ob_refcnt. + When the interned string reaches a refcnt of 0 the string deallocation + function will delete the reference from this dictionary. + + Another way to look at this is that to say that the actual reference + count of a string is: s->ob_refcnt + (s->state ? 2 : 0) +*/ +static PyObject *interned; + +/* The empty Unicode object is shared to improve performance. */ +static PyObject *unicode_empty; + +/* List of static strings. */ +static _Py_Identifier *static_strings; + +/* Single character Unicode strings in the Latin-1 range are being + shared as well. */ +static PyObject *unicode_latin1[256]; + +/* Fast detection of the most frequent whitespace characters */ +const unsigned char _Py_ascii_whitespace[] = { + 0, 0, 0, 0, 0, 0, 0, 0, +/* case 0x0009: * CHARACTER TABULATION */ +/* case 0x000A: * LINE FEED */ +/* case 0x000B: * LINE TABULATION */ +/* case 0x000C: * FORM FEED */ +/* case 0x000D: * CARRIAGE RETURN */ + 0, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +/* case 0x001C: * FILE SEPARATOR */ +/* case 0x001D: * GROUP SEPARATOR */ +/* case 0x001E: * RECORD SEPARATOR */ +/* case 0x001F: * UNIT SEPARATOR */ + 0, 0, 0, 0, 1, 1, 1, 1, +/* case 0x0020: * SPACE */ + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* forward */ +static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length); +static PyObject* get_latin1_char(unsigned char ch); +static void copy_characters( + PyObject *to, Py_ssize_t to_start, + PyObject *from, Py_ssize_t from_start, + Py_ssize_t how_many); + +static PyObject * +unicode_fromascii(const unsigned char *s, Py_ssize_t size); +static PyObject * +_PyUnicode_FromUCS1(const unsigned char *s, Py_ssize_t size); +static PyObject * +_PyUnicode_FromUCS2(const Py_UCS2 *s, Py_ssize_t size); +static PyObject * +_PyUnicode_FromUCS4(const Py_UCS4 *s, Py_ssize_t size); + +static PyObject * +unicode_encode_call_errorhandler(const char *errors, + PyObject **errorHandler,const char *encoding, const char *reason, + PyObject *unicode, PyObject **exceptionObject, + Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos); + +static void +raise_encode_exception(PyObject **exceptionObject, + const char *encoding, + PyObject *unicode, + Py_ssize_t startpos, Py_ssize_t endpos, + const char *reason); + +/* Same for linebreaks */ +static unsigned char ascii_linebreak[] = { + 0, 0, 0, 0, 0, 0, 0, 0, +/* 0x000A, * LINE FEED */ +/* 0x000B, * LINE TABULATION */ +/* 0x000C, * FORM FEED */ +/* 0x000D, * CARRIAGE RETURN */ + 0, 0, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +/* 0x001C, * FILE SEPARATOR */ +/* 0x001D, * GROUP SEPARATOR */ +/* 0x001E, * RECORD SEPARATOR */ + 0, 0, 0, 0, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* The max unicode value is always 0x10FFFF while using the PEP-393 API. + This function is kept for backward compatibility with the old API. */ +Py_UNICODE +PyUnicode_GetMax(void) +{ +#ifdef Py_UNICODE_WIDE + return 0x10FFFF; +#else + /* This is actually an illegal character, so it should + not be passed to unichr. */ + return 0xFFFF; +#endif +} + +#ifdef Py_DEBUG +int +_PyUnicode_CheckConsistency(PyObject *op, int check_content) +{ + PyASCIIObject *ascii; + unsigned int kind; + + assert(PyUnicode_Check(op)); + + ascii = (PyASCIIObject *)op; + kind = ascii->state.kind; + + if (ascii->state.ascii == 1 && ascii->state.compact == 1) { + assert(kind == PyUnicode_1BYTE_KIND); + assert(ascii->state.ready == 1); + } + else { + PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op; + void *data; + + if (ascii->state.compact == 1) { + data = compact + 1; + assert(kind == PyUnicode_1BYTE_KIND + || kind == PyUnicode_2BYTE_KIND + || kind == PyUnicode_4BYTE_KIND); + assert(ascii->state.ascii == 0); + assert(ascii->state.ready == 1); + assert (compact->utf8 != data); + } + else { + PyUnicodeObject *unicode = (PyUnicodeObject *)op; + + data = unicode->data.any; + if (kind == PyUnicode_WCHAR_KIND) { + assert(ascii->length == 0); + assert(ascii->hash == -1); + assert(ascii->state.compact == 0); + assert(ascii->state.ascii == 0); + assert(ascii->state.ready == 0); + assert(ascii->state.interned == SSTATE_NOT_INTERNED); + assert(ascii->wstr != NULL); + assert(data == NULL); + assert(compact->utf8 == NULL); + } + else { + assert(kind == PyUnicode_1BYTE_KIND + || kind == PyUnicode_2BYTE_KIND + || kind == PyUnicode_4BYTE_KIND); + assert(ascii->state.compact == 0); + assert(ascii->state.ready == 1); + assert(data != NULL); + if (ascii->state.ascii) { + assert (compact->utf8 == data); + assert (compact->utf8_length == ascii->length); + } + else + assert (compact->utf8 != data); + } + } + if (kind != PyUnicode_WCHAR_KIND) { + if ( + kind == PyUnicode_4BYTE_KIND + ) + { + assert(ascii->wstr == data); + assert(compact->wstr_length == ascii->length); + } else + assert(ascii->wstr != data); + } + + if (compact->utf8 == NULL) + assert(compact->utf8_length == 0); + if (ascii->wstr == NULL) + assert(compact->wstr_length == 0); + } + /* check that the best kind is used */ + if (check_content && kind != PyUnicode_WCHAR_KIND) + { + Py_ssize_t i; + Py_UCS4 maxchar = 0; + void *data = PyUnicode_DATA(ascii); + for (i=0; i < ascii->length; i++) + { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (ch > maxchar) + maxchar = ch; + } + if (kind == PyUnicode_1BYTE_KIND) { + if (ascii->state.ascii == 0) { + assert(maxchar >= 128); + assert(maxchar <= 255); + } + else + assert(maxchar < 128); + } + else if (kind == PyUnicode_2BYTE_KIND) { + assert(maxchar >= 0x100); + assert(maxchar <= 0xFFFF); + } + else { + assert(maxchar >= 0x10000); + assert(maxchar <= MAX_UNICODE); + } + } + return 1; +} +#endif + +static PyObject* +unicode_result_wchar(PyObject *unicode) +{ +#ifndef Py_DEBUG + Py_ssize_t len; + + assert(Py_REFCNT(unicode) == 1); + + len = _PyUnicode_WSTR_LENGTH(unicode); + if (len == 0) { + Py_INCREF(unicode_empty); + Py_DECREF(unicode); + return unicode_empty; + } + + if (len == 1) { + wchar_t ch = _PyUnicode_WSTR(unicode)[0]; + if (ch < 256) { + PyObject *latin1_char = get_latin1_char((unsigned char)ch); + Py_DECREF(unicode); + return latin1_char; + } + } + + if (_PyUnicode_Ready(unicode) < 0) { + Py_XDECREF(unicode); + return NULL; + } +#else + /* don't make the result ready in debug mode to ensure that the caller + makes the string ready before using it */ + assert(_PyUnicode_CheckConsistency(unicode, 1)); +#endif + return unicode; +} + +static PyObject* +unicode_result_ready(PyObject *unicode) +{ + Py_ssize_t length; + + length = PyUnicode_GET_LENGTH(unicode); + if (length == 0) { + if (unicode != unicode_empty) { + Py_INCREF(unicode_empty); + Py_DECREF(unicode); + } + return unicode_empty; + } + + if (length == 1) { + Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0); + if (ch < 256) { + PyObject *latin1_char = unicode_latin1[ch]; + if (latin1_char != NULL) { + if (unicode != latin1_char) { + Py_INCREF(latin1_char); + Py_DECREF(unicode); + } + return latin1_char; + } + else { + assert(_PyUnicode_CheckConsistency(unicode, 1)); + Py_INCREF(unicode); + unicode_latin1[ch] = unicode; + return unicode; + } + } + } + + assert(_PyUnicode_CheckConsistency(unicode, 1)); + return unicode; +} + +static PyObject* +unicode_result(PyObject *unicode) +{ + assert(_PyUnicode_CHECK(unicode)); + if (PyUnicode_IS_READY(unicode)) + return unicode_result_ready(unicode); + else + return unicode_result_wchar(unicode); +} + +#ifdef HAVE_MBCS +static OSVERSIONINFOEX winver; +#endif + +/* --- Bloom Filters ----------------------------------------------------- */ + +/* stuff to implement simple "bloom filters" for Unicode characters. + to keep things simple, we use a single bitmask, using the least 5 + bits from each unicode characters as the bit index. */ + +/* the linebreak mask is set up by Unicode_Init below */ + +#if LONG_BIT >= 128 +#define BLOOM_WIDTH 128 +#elif LONG_BIT >= 64 +#define BLOOM_WIDTH 64 +#elif LONG_BIT >= 32 +#define BLOOM_WIDTH 32 +#else +#error "LONG_BIT is smaller than 32" +#endif + +#define BLOOM_MASK unsigned long + +static BLOOM_MASK bloom_linebreak; + +#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1))))) +#define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1))))) + +#define BLOOM_LINEBREAK(ch) \ + ((ch) < 128U ? ascii_linebreak[(ch)] : \ + (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch))) + +Py_LOCAL_INLINE(BLOOM_MASK) +make_bloom_mask(int kind, void* ptr, Py_ssize_t len) +{ + /* calculate simple bloom-style bitmask for a given unicode string */ + + BLOOM_MASK mask; + Py_ssize_t i; + + mask = 0; + for (i = 0; i < len; i++) + BLOOM_ADD(mask, PyUnicode_READ(kind, ptr, i)); + + return mask; +} + +#define BLOOM_MEMBER(mask, chr, str) \ + (BLOOM(mask, chr) \ + && (PyUnicode_FindChar(str, chr, 0, PyUnicode_GET_LENGTH(str), 1) >= 0)) + +/* Compilation of templated routines */ + +#include "stringlib/asciilib.h" +#include "stringlib/fastsearch.h" +#include "stringlib/partition.h" +#include "stringlib/split.h" +#include "stringlib/count.h" +#include "stringlib/find.h" +#include "stringlib/find_max_char.h" +#include "stringlib/localeutil.h" +#include "stringlib/undef.h" + +#include "stringlib/ucs1lib.h" +#include "stringlib/fastsearch.h" +#include "stringlib/partition.h" +#include "stringlib/split.h" +#include "stringlib/count.h" +#include "stringlib/find.h" +#include "stringlib/find_max_char.h" +#include "stringlib/localeutil.h" +#include "stringlib/undef.h" + +#include "stringlib/ucs2lib.h" +#include "stringlib/fastsearch.h" +#include "stringlib/partition.h" +#include "stringlib/split.h" +#include "stringlib/count.h" +#include "stringlib/find.h" +#include "stringlib/find_max_char.h" +#include "stringlib/localeutil.h" +#include "stringlib/undef.h" + +#include "stringlib/ucs4lib.h" +#include "stringlib/fastsearch.h" +#include "stringlib/partition.h" +#include "stringlib/split.h" +#include "stringlib/count.h" +#include "stringlib/find.h" +#include "stringlib/find_max_char.h" +#include "stringlib/localeutil.h" +#include "stringlib/undef.h" + +#include "stringlib/unicodedefs.h" +#include "stringlib/fastsearch.h" +#include "stringlib/count.h" +#include "stringlib/find.h" +#include "stringlib/undef.h" + +/* --- Unicode Object ----------------------------------------------------- */ + +static PyObject * +fixup(PyObject *self, Py_UCS4 (*fixfct)(PyObject *s)); + +Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind, + Py_ssize_t size, Py_UCS4 ch, + int direction) +{ + int mode = (direction == 1) ? FAST_SEARCH : FAST_RSEARCH; + + switch (kind) { + case PyUnicode_1BYTE_KIND: + { + Py_UCS1 ch1 = (Py_UCS1) ch; + if (ch1 == ch) + return ucs1lib_fastsearch((Py_UCS1 *) s, size, &ch1, 1, 0, mode); + else + return -1; + } + case PyUnicode_2BYTE_KIND: + { + Py_UCS2 ch2 = (Py_UCS2) ch; + if (ch2 == ch) + return ucs2lib_fastsearch((Py_UCS2 *) s, size, &ch2, 1, 0, mode); + else + return -1; + } + case PyUnicode_4BYTE_KIND: + return ucs4lib_fastsearch((Py_UCS4 *) s, size, &ch, 1, 0, mode); + default: + assert(0); + return -1; + } +} + +static PyObject* +resize_compact(PyObject *unicode, Py_ssize_t length) +{ + Py_ssize_t char_size; + Py_ssize_t struct_size; + Py_ssize_t new_size; + int share_wstr; + + assert(PyUnicode_IS_READY(unicode)); + char_size = PyUnicode_KIND(unicode); + if (PyUnicode_IS_COMPACT_ASCII(unicode)) + struct_size = sizeof(PyASCIIObject); + else + struct_size = sizeof(PyCompactUnicodeObject); + share_wstr = _PyUnicode_SHARE_WSTR(unicode); + + _Py_DEC_REFTOTAL; + _Py_ForgetReference(unicode); + + if (length > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) { + PyErr_NoMemory(); + return NULL; + } + new_size = (struct_size + (length + 1) * char_size); + + unicode = (PyObject *)PyObject_REALLOC((char *)unicode, new_size); + if (unicode == NULL) { + PyObject_Del(unicode); + PyErr_NoMemory(); + return NULL; + } + _Py_NewReference(unicode); + _PyUnicode_LENGTH(unicode) = length; + if (share_wstr) { + _PyUnicode_WSTR(unicode) = PyUnicode_DATA(unicode); + if (!PyUnicode_IS_COMPACT_ASCII(unicode)) + _PyUnicode_WSTR_LENGTH(unicode) = length; + } + PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode), + length, 0); + return unicode; +} + +static int +resize_inplace(PyObject *unicode, Py_ssize_t length) +{ + wchar_t *wstr; + assert(!PyUnicode_IS_COMPACT(unicode)); + assert(Py_REFCNT(unicode) == 1); + + _PyUnicode_DIRTY(unicode); + + if (PyUnicode_IS_READY(unicode)) { + Py_ssize_t char_size; + Py_ssize_t new_size; + int share_wstr, share_utf8; + void *data; + + data = _PyUnicode_DATA_ANY(unicode); + assert(data != NULL); + char_size = PyUnicode_KIND(unicode); + share_wstr = _PyUnicode_SHARE_WSTR(unicode); + share_utf8 = _PyUnicode_SHARE_UTF8(unicode); + if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode)) + { + PyObject_DEL(_PyUnicode_UTF8(unicode)); + _PyUnicode_UTF8(unicode) = NULL; + _PyUnicode_UTF8_LENGTH(unicode) = 0; + } + + if (length > (PY_SSIZE_T_MAX / char_size - 1)) { + PyErr_NoMemory(); + return -1; + } + new_size = (length + 1) * char_size; + + data = (PyObject *)PyObject_REALLOC(data, new_size); + if (data == NULL) { + PyErr_NoMemory(); + return -1; + } + _PyUnicode_DATA_ANY(unicode) = data; + if (share_wstr) { + _PyUnicode_WSTR(unicode) = data; + _PyUnicode_WSTR_LENGTH(unicode) = length; + } + if (share_utf8) { + _PyUnicode_UTF8(unicode) = data; + _PyUnicode_UTF8_LENGTH(unicode) = length; + } + _PyUnicode_LENGTH(unicode) = length; + PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0); + if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) { + assert(_PyUnicode_CheckConsistency(unicode, 0)); + return 0; + } + } + assert(_PyUnicode_WSTR(unicode) != NULL); + + /* check for integer overflow */ + if (length > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) { + PyErr_NoMemory(); + return -1; + } + wstr = _PyUnicode_WSTR(unicode); + wstr = PyObject_REALLOC(wstr, sizeof(wchar_t) * (length + 1)); + if (!wstr) { + PyErr_NoMemory(); + return -1; + } + _PyUnicode_WSTR(unicode) = wstr; + _PyUnicode_WSTR(unicode)[length] = 0; + _PyUnicode_WSTR_LENGTH(unicode) = length; + assert(_PyUnicode_CheckConsistency(unicode, 0)); + return 0; +} + +static PyObject* +resize_copy(PyObject *unicode, Py_ssize_t length) +{ + Py_ssize_t copy_length; + if (PyUnicode_IS_COMPACT(unicode)) { + PyObject *copy; + assert(PyUnicode_IS_READY(unicode)); + + copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); + if (copy == NULL) + return NULL; + + copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); + copy_characters(copy, 0, unicode, 0, copy_length); + return copy; + } + else { + PyObject *w; + assert(_PyUnicode_WSTR(unicode) != NULL); + assert(_PyUnicode_DATA_ANY(unicode) == NULL); + w = (PyObject*)_PyUnicode_New(length); + if (w == NULL) + return NULL; + copy_length = _PyUnicode_WSTR_LENGTH(unicode); + copy_length = Py_MIN(copy_length, length); + Py_UNICODE_COPY(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode), + copy_length); + return w; + } +} + +/* We allocate one more byte to make sure the string is + Ux0000 terminated; some code (e.g. new_identifier) + relies on that. + + XXX This allocator could further be enhanced by assuring that the + free list never reduces its size below 1. + +*/ + +#ifdef Py_DEBUG +static int unicode_old_new_calls = 0; +#endif + +static PyUnicodeObject * +_PyUnicode_New(Py_ssize_t length) +{ + register PyUnicodeObject *unicode; + size_t new_size; + + /* Optimization for empty strings */ + if (length == 0 && unicode_empty != NULL) { + Py_INCREF(unicode_empty); + return (PyUnicodeObject*)unicode_empty; + } + + /* Ensure we won't overflow the size. */ + if (length > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) { + return (PyUnicodeObject *)PyErr_NoMemory(); + } + if (length < 0) { + PyErr_SetString(PyExc_SystemError, + "Negative size passed to _PyUnicode_New"); + return NULL; + } + +#ifdef Py_DEBUG + ++unicode_old_new_calls; +#endif + + unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); + if (unicode == NULL) + return NULL; + new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); + _PyUnicode_WSTR(unicode) = (Py_UNICODE*) PyObject_MALLOC(new_size); + if (!_PyUnicode_WSTR(unicode)) { + PyErr_NoMemory(); + goto onError; + } + + /* Initialize the first element to guard against cases where + * the caller fails before initializing str -- unicode_resize() + * reads str[0], and the Keep-Alive optimization can keep memory + * allocated for str alive across a call to unicode_dealloc(unicode). + * We don't want unicode_resize to read uninitialized memory in + * that case. + */ + _PyUnicode_WSTR(unicode)[0] = 0; + _PyUnicode_WSTR(unicode)[length] = 0; + _PyUnicode_WSTR_LENGTH(unicode) = length; + _PyUnicode_HASH(unicode) = -1; + _PyUnicode_STATE(unicode).interned = 0; + _PyUnicode_STATE(unicode).kind = 0; + _PyUnicode_STATE(unicode).compact = 0; + _PyUnicode_STATE(unicode).ready = 0; + _PyUnicode_STATE(unicode).ascii = 0; + _PyUnicode_DATA_ANY(unicode) = NULL; + _PyUnicode_LENGTH(unicode) = 0; + _PyUnicode_UTF8(unicode) = NULL; + _PyUnicode_UTF8_LENGTH(unicode) = 0; + assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0)); + return unicode; + + onError: + /* XXX UNREF/NEWREF interface should be more symmetrical */ + _Py_DEC_REFTOTAL; + _Py_ForgetReference((PyObject *)unicode); + PyObject_Del(unicode); + return NULL; +} + +static const char* +unicode_kind_name(PyObject *unicode) +{ + /* don't check consistency: unicode_kind_name() is called from + _PyUnicode_Dump() */ + if (!PyUnicode_IS_COMPACT(unicode)) + { + if (!PyUnicode_IS_READY(unicode)) + return "wstr"; + switch(PyUnicode_KIND(unicode)) + { + case PyUnicode_1BYTE_KIND: + if (PyUnicode_IS_ASCII(unicode)) + return "legacy ascii"; + else + return "legacy latin1"; + case PyUnicode_2BYTE_KIND: + return "legacy UCS2"; + case PyUnicode_4BYTE_KIND: + return "legacy UCS4"; + default: + return ""; + } + } + assert(PyUnicode_IS_READY(unicode)); + switch(PyUnicode_KIND(unicode)) + { + case PyUnicode_1BYTE_KIND: + if (PyUnicode_IS_ASCII(unicode)) + return "ascii"; + else + return "latin1"; + case PyUnicode_2BYTE_KIND: + return "UCS2"; + case PyUnicode_4BYTE_KIND: + return "UCS4"; + default: + return ""; + } +} + +#ifdef Py_DEBUG +static int unicode_new_new_calls = 0; + +/* Functions wrapping macros for use in debugger */ +char *_PyUnicode_utf8(void *unicode){ + return PyUnicode_UTF8(unicode); +} + +void *_PyUnicode_compact_data(void *unicode) { + return _PyUnicode_COMPACT_DATA(unicode); +} +void *_PyUnicode_data(void *unicode){ + printf("obj %p\n", unicode); + printf("compact %d\n", PyUnicode_IS_COMPACT(unicode)); + printf("compact ascii %d\n", PyUnicode_IS_COMPACT_ASCII(unicode)); + printf("ascii op %p\n", ((void*)((PyASCIIObject*)(unicode) + 1))); + printf("compact op %p\n", ((void*)((PyCompactUnicodeObject*)(unicode) + 1))); + printf("compact data %p\n", _PyUnicode_COMPACT_DATA(unicode)); + return PyUnicode_DATA(unicode); +} + +void +_PyUnicode_Dump(PyObject *op) +{ + PyASCIIObject *ascii = (PyASCIIObject *)op; + PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op; + PyUnicodeObject *unicode = (PyUnicodeObject *)op; + void *data; + + if (ascii->state.compact) + { + if (ascii->state.ascii) + data = (ascii + 1); + else + data = (compact + 1); + } + else + data = unicode->data.any; + printf("%s: len=%zu, ",unicode_kind_name(op), ascii->length); + + if (ascii->wstr == data) + printf("shared "); + printf("wstr=%p", ascii->wstr); + + if (!(ascii->state.ascii == 1 && ascii->state.compact == 1)) { + printf(" (%zu), ", compact->wstr_length); + if (!ascii->state.compact && compact->utf8 == unicode->data.any) + printf("shared "); + printf("utf8=%p (%zu)", compact->utf8, compact->utf8_length); + } + printf(", data=%p\n", data); +} +#endif + +PyObject * +PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) +{ + PyObject *obj; + PyCompactUnicodeObject *unicode; + void *data; + int kind_state; + int is_sharing, is_ascii; + Py_ssize_t char_size; + Py_ssize_t struct_size; + + /* Optimization for empty strings */ + if (size == 0 && unicode_empty != NULL) { + Py_INCREF(unicode_empty); + return unicode_empty; + } + +#ifdef Py_DEBUG + ++unicode_new_new_calls; +#endif + + is_ascii = 0; + is_sharing = 0; + struct_size = sizeof(PyCompactUnicodeObject); + if (maxchar < 128) { + kind_state = PyUnicode_1BYTE_KIND; + char_size = 1; + is_ascii = 1; + struct_size = sizeof(PyASCIIObject); + } + else if (maxchar < 256) { + kind_state = PyUnicode_1BYTE_KIND; + char_size = 1; + } + else if (maxchar < 65536) { + kind_state = PyUnicode_2BYTE_KIND; + char_size = 2; + if (sizeof(wchar_t) == 2) + is_sharing = 1; + } + else { + kind_state = PyUnicode_4BYTE_KIND; + char_size = 4; + if (sizeof(wchar_t) == 4) + is_sharing = 1; + } + + /* Ensure we won't overflow the size. */ + if (size < 0) { + PyErr_SetString(PyExc_SystemError, + "Negative size passed to PyUnicode_New"); + return NULL; + } + if (size > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) + return PyErr_NoMemory(); + + /* Duplicated allocation code from _PyObject_New() instead of a call to + * PyObject_New() so we are able to allocate space for the object and + * it's data buffer. + */ + obj = (PyObject *) PyObject_MALLOC(struct_size + (size + 1) * char_size); + if (obj == NULL) + return PyErr_NoMemory(); + obj = PyObject_INIT(obj, &PyUnicode_Type); + if (obj == NULL) + return NULL; + + unicode = (PyCompactUnicodeObject *)obj; + if (is_ascii) + data = ((PyASCIIObject*)obj) + 1; + else + data = unicode + 1; + _PyUnicode_LENGTH(unicode) = size; + _PyUnicode_HASH(unicode) = -1; + _PyUnicode_STATE(unicode).interned = 0; + _PyUnicode_STATE(unicode).kind = kind_state; + _PyUnicode_STATE(unicode).compact = 1; + _PyUnicode_STATE(unicode).ready = 1; + _PyUnicode_STATE(unicode).ascii = is_ascii; + if (is_ascii) { + ((char*)data)[size] = 0; + _PyUnicode_WSTR(unicode) = NULL; + } + else if (kind_state == PyUnicode_1BYTE_KIND) { + ((char*)data)[size] = 0; + _PyUnicode_WSTR(unicode) = NULL; + _PyUnicode_WSTR_LENGTH(unicode) = 0; + unicode->utf8 = NULL; + unicode->utf8_length = 0; + } + else { + unicode->utf8 = NULL; + unicode->utf8_length = 0; + if (kind_state == PyUnicode_2BYTE_KIND) + ((Py_UCS2*)data)[size] = 0; + else /* kind_state == PyUnicode_4BYTE_KIND */ + ((Py_UCS4*)data)[size] = 0; + if (is_sharing) { + _PyUnicode_WSTR_LENGTH(unicode) = size; + _PyUnicode_WSTR(unicode) = (wchar_t *)data; + } + else { + _PyUnicode_WSTR_LENGTH(unicode) = 0; + _PyUnicode_WSTR(unicode) = NULL; + } + } + assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0)); + return obj; +} + +static int +_PyUnicode_Dirty(PyObject *unicode) +{ + assert(_PyUnicode_CHECK(unicode)); + if (Py_REFCNT(unicode) != 1) { + PyErr_SetString(PyExc_SystemError, + "Cannot modify a string having more than 1 reference"); + return -1; + } + _PyUnicode_DIRTY(unicode); + return 0; +} + +static int +_copy_characters(PyObject *to, Py_ssize_t to_start, + PyObject *from, Py_ssize_t from_start, + Py_ssize_t how_many, int check_maxchar) +{ + unsigned int from_kind, to_kind; + void *from_data, *to_data; + int fast; + + assert(PyUnicode_Check(from)); + assert(PyUnicode_Check(to)); + assert(PyUnicode_IS_READY(from)); + assert(PyUnicode_IS_READY(to)); + + assert(PyUnicode_GET_LENGTH(from) >= how_many); + assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); + assert(0 <= how_many); + + if (how_many == 0) + return 0; + + from_kind = PyUnicode_KIND(from); + from_data = PyUnicode_DATA(from); + to_kind = PyUnicode_KIND(to); + to_data = PyUnicode_DATA(to); + +#ifdef Py_DEBUG + if (!check_maxchar + && (from_kind > to_kind + || (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to)))) + { + const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to); + Py_UCS4 ch; + Py_ssize_t i; + for (i=0; i < how_many; i++) { + ch = PyUnicode_READ(from_kind, from_data, from_start + i); + assert(ch <= to_maxchar); + } + } +#endif + fast = (from_kind == to_kind); + if (check_maxchar + && (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to))) + { + /* deny latin1 => ascii */ + fast = 0; + } + + if (fast) { + Py_MEMCPY((char*)to_data + to_kind * to_start, + (char*)from_data + from_kind * from_start, + to_kind * how_many); + } + else if (from_kind == PyUnicode_1BYTE_KIND + && to_kind == PyUnicode_2BYTE_KIND) + { + _PyUnicode_CONVERT_BYTES( + Py_UCS1, Py_UCS2, + PyUnicode_1BYTE_DATA(from) + from_start, + PyUnicode_1BYTE_DATA(from) + from_start + how_many, + PyUnicode_2BYTE_DATA(to) + to_start + ); + } + else if (from_kind == PyUnicode_1BYTE_KIND + && to_kind == PyUnicode_4BYTE_KIND) + { + _PyUnicode_CONVERT_BYTES( + Py_UCS1, Py_UCS4, + PyUnicode_1BYTE_DATA(from) + from_start, + PyUnicode_1BYTE_DATA(from) + from_start + how_many, + PyUnicode_4BYTE_DATA(to) + to_start + ); + } + else if (from_kind == PyUnicode_2BYTE_KIND + && to_kind == PyUnicode_4BYTE_KIND) + { + _PyUnicode_CONVERT_BYTES( + Py_UCS2, Py_UCS4, + PyUnicode_2BYTE_DATA(from) + from_start, + PyUnicode_2BYTE_DATA(from) + from_start + how_many, + PyUnicode_4BYTE_DATA(to) + to_start + ); + } + else { + /* check if max_char(from substring) <= max_char(to) */ + if (from_kind > to_kind + /* latin1 => ascii */ + || (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to))) + { + /* slow path to check for character overflow */ + const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to); + Py_UCS4 ch; + Py_ssize_t i; + +#ifdef Py_DEBUG + for (i=0; i < how_many; i++) { + ch = PyUnicode_READ(from_kind, from_data, from_start + i); + assert(ch <= to_maxchar); + PyUnicode_WRITE(to_kind, to_data, to_start + i, ch); + } +#else + if (!check_maxchar) { + for (i=0; i < how_many; i++) { + ch = PyUnicode_READ(from_kind, from_data, from_start + i); + PyUnicode_WRITE(to_kind, to_data, to_start + i, ch); + } + } + else { + for (i=0; i < how_many; i++) { + ch = PyUnicode_READ(from_kind, from_data, from_start + i); + if (ch > to_maxchar) + return 1; + PyUnicode_WRITE(to_kind, to_data, to_start + i, ch); + } + } +#endif + } + else { + assert(0 && "inconsistent state"); + return 1; + } + } + return 0; +} + +static void +copy_characters(PyObject *to, Py_ssize_t to_start, + PyObject *from, Py_ssize_t from_start, + Py_ssize_t how_many) +{ + (void)_copy_characters(to, to_start, from, from_start, how_many, 0); +} + +Py_ssize_t +PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, + PyObject *from, Py_ssize_t from_start, + Py_ssize_t how_many) +{ + int err; + + if (!PyUnicode_Check(from) || !PyUnicode_Check(to)) { + PyErr_BadInternalCall(); + return -1; + } + + if (PyUnicode_READY(from)) + return -1; + if (PyUnicode_READY(to)) + return -1; + + how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many); + if (to_start + how_many > PyUnicode_GET_LENGTH(to)) { + PyErr_Format(PyExc_SystemError, + "Cannot write %zi characters at %zi " + "in a string of %zi characters", + how_many, to_start, PyUnicode_GET_LENGTH(to)); + return -1; + } + + if (how_many == 0) + return 0; + + if (_PyUnicode_Dirty(to)) + return -1; + + err = _copy_characters(to, to_start, from, from_start, how_many, 1); + if (err) { + PyErr_Format(PyExc_SystemError, + "Cannot copy %s characters " + "into a string of %s characters", + unicode_kind_name(from), + unicode_kind_name(to)); + return -1; + } + return how_many; +} + +/* Find the maximum code point and count the number of surrogate pairs so a + correct string length can be computed before converting a string to UCS4. + This function counts single surrogates as a character and not as a pair. + + Return 0 on success, or -1 on error. */ +static int +find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, + Py_UCS4 *maxchar, Py_ssize_t *num_surrogates) +{ + const wchar_t *iter; + Py_UCS4 ch; + + assert(num_surrogates != NULL && maxchar != NULL); + *num_surrogates = 0; + *maxchar = 0; + + for (iter = begin; iter < end; ) { + { + ch = *iter; + iter++; + } + if (ch > *maxchar) { + *maxchar = ch; + if (*maxchar > MAX_UNICODE) { + PyErr_Format(PyExc_ValueError, + "character U+%x is not in range [U+0000; U+10ffff]", + ch); + return -1; + } + } + } + return 0; +} + +int +_PyUnicode_Ready(PyObject *unicode) +{ + wchar_t *end; + Py_UCS4 maxchar = 0; + Py_ssize_t num_surrogates; + + /* _PyUnicode_Ready() is only intended for old-style API usage where + strings were created using _PyObject_New() and where no canonical + representation (the str field) has been set yet aka strings + which are not yet ready. */ + assert(_PyUnicode_CHECK(unicode)); + assert(_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND); + assert(_PyUnicode_WSTR(unicode) != NULL); + assert(_PyUnicode_DATA_ANY(unicode) == NULL); + assert(_PyUnicode_UTF8(unicode) == NULL); + /* Actually, it should neither be interned nor be anything else: */ + assert(_PyUnicode_STATE(unicode).interned == SSTATE_NOT_INTERNED); + + end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode); + if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end, + &maxchar, &num_surrogates) == -1) + return -1; + + if (maxchar < 256) { + _PyUnicode_DATA_ANY(unicode) = PyObject_MALLOC(_PyUnicode_WSTR_LENGTH(unicode) + 1); + if (!_PyUnicode_DATA_ANY(unicode)) { + PyErr_NoMemory(); + return -1; + } + _PyUnicode_CONVERT_BYTES(wchar_t, unsigned char, + _PyUnicode_WSTR(unicode), end, + PyUnicode_1BYTE_DATA(unicode)); + PyUnicode_1BYTE_DATA(unicode)[_PyUnicode_WSTR_LENGTH(unicode)] = '\0'; + _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode); + _PyUnicode_STATE(unicode).kind = PyUnicode_1BYTE_KIND; + if (maxchar < 128) { + _PyUnicode_STATE(unicode).ascii = 1; + _PyUnicode_UTF8(unicode) = _PyUnicode_DATA_ANY(unicode); + _PyUnicode_UTF8_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode); + } + else { + _PyUnicode_STATE(unicode).ascii = 0; + _PyUnicode_UTF8(unicode) = NULL; + _PyUnicode_UTF8_LENGTH(unicode) = 0; + } + PyObject_FREE(_PyUnicode_WSTR(unicode)); + _PyUnicode_WSTR(unicode) = NULL; + _PyUnicode_WSTR_LENGTH(unicode) = 0; + } + /* In this case we might have to convert down from 4-byte native + wchar_t to 2-byte unicode. */ + else if (maxchar < 65536) { + assert(num_surrogates == 0 && + "FindMaxCharAndNumSurrogatePairs() messed up"); + + /* sizeof(wchar_t) == 4 */ + _PyUnicode_DATA_ANY(unicode) = PyObject_MALLOC( + 2 * (_PyUnicode_WSTR_LENGTH(unicode) + 1)); + if (!_PyUnicode_DATA_ANY(unicode)) { + PyErr_NoMemory(); + return -1; + } + _PyUnicode_CONVERT_BYTES(wchar_t, Py_UCS2, + _PyUnicode_WSTR(unicode), end, + PyUnicode_2BYTE_DATA(unicode)); + PyUnicode_2BYTE_DATA(unicode)[_PyUnicode_WSTR_LENGTH(unicode)] = '\0'; + _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode); + _PyUnicode_STATE(unicode).kind = PyUnicode_2BYTE_KIND; + _PyUnicode_UTF8(unicode) = NULL; + _PyUnicode_UTF8_LENGTH(unicode) = 0; + PyObject_FREE(_PyUnicode_WSTR(unicode)); + _PyUnicode_WSTR(unicode) = NULL; + _PyUnicode_WSTR_LENGTH(unicode) = 0; + } + /* maxchar exeeds 16 bit, wee need 4 bytes for unicode characters */ + else { + assert(num_surrogates == 0); + + _PyUnicode_DATA_ANY(unicode) = _PyUnicode_WSTR(unicode); + _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode); + _PyUnicode_UTF8(unicode) = NULL; + _PyUnicode_UTF8_LENGTH(unicode) = 0; + _PyUnicode_STATE(unicode).kind = PyUnicode_4BYTE_KIND; + PyUnicode_4BYTE_DATA(unicode)[_PyUnicode_LENGTH(unicode)] = '\0'; + } + _PyUnicode_STATE(unicode).ready = 1; + assert(_PyUnicode_CheckConsistency(unicode, 1)); + return 0; +} + +static void +unicode_dealloc(register PyObject *unicode) +{ + switch (PyUnicode_CHECK_INTERNED(unicode)) { + case SSTATE_NOT_INTERNED: + break; + + case SSTATE_INTERNED_MORTAL: + /* revive dead object temporarily for DelItem */ + Py_REFCNT(unicode) = 3; + if (PyDict_DelItem(interned, unicode) != 0) + Py_FatalError( + "deletion of interned string failed"); + break; + + case SSTATE_INTERNED_IMMORTAL: + Py_FatalError("Immortal interned string died."); + + default: + Py_FatalError("Inconsistent interned string state."); + } + + if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) + PyObject_DEL(_PyUnicode_WSTR(unicode)); + if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) + PyObject_DEL(_PyUnicode_UTF8(unicode)); + + if (PyUnicode_IS_COMPACT(unicode)) { + Py_TYPE(unicode)->tp_free(unicode); + } + else { + if (_PyUnicode_DATA_ANY(unicode)) + PyObject_DEL(_PyUnicode_DATA_ANY(unicode)); + Py_TYPE(unicode)->tp_free(unicode); + } +} + +#ifdef Py_DEBUG +static int +unicode_is_singleton(PyObject *unicode) +{ + PyASCIIObject *ascii = (PyASCIIObject *)unicode; + if (unicode == unicode_empty) + return 1; + if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1) + { + Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0); + if (ch < 256 && unicode_latin1[ch] == unicode) + return 1; + } + return 0; +} +#endif + +static int +unicode_resizable(PyObject *unicode) +{ + if (Py_REFCNT(unicode) != 1) + return 0; + if (PyUnicode_CHECK_INTERNED(unicode)) + return 0; +#ifdef Py_DEBUG + /* singleton refcount is greater than 1 */ + assert(!unicode_is_singleton(unicode)); +#endif + return 1; +} + +static int +unicode_resize(PyObject **p_unicode, Py_ssize_t length) +{ + PyObject *unicode; + Py_ssize_t old_length; + + assert(p_unicode != NULL); + unicode = *p_unicode; + + assert(unicode != NULL); + assert(PyUnicode_Check(unicode)); + assert(0 <= length); + + if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND) + old_length = PyUnicode_WSTR_LENGTH(unicode); + else + old_length = PyUnicode_GET_LENGTH(unicode); + if (old_length == length) + return 0; + + if (length == 0) { + Py_DECREF(*p_unicode); + *p_unicode = unicode_empty; + Py_INCREF(*p_unicode); + return 0; + } + + if (!unicode_resizable(unicode)) { + PyObject *copy = resize_copy(unicode, length); + if (copy == NULL) + return -1; + Py_DECREF(*p_unicode); + *p_unicode = copy; + return 0; + } + + if (PyUnicode_IS_COMPACT(unicode)) { + *p_unicode = resize_compact(unicode, length); + if (*p_unicode == NULL) + return -1; + assert(_PyUnicode_CheckConsistency(*p_unicode, 0)); + return 0; + } + return resize_inplace(unicode, length); +} + +int +PyUnicode_Resize(PyObject **p_unicode, Py_ssize_t length) +{ + PyObject *unicode; + if (p_unicode == NULL) { + PyErr_BadInternalCall(); + return -1; + } + unicode = *p_unicode; + if (unicode == NULL || !PyUnicode_Check(unicode) || length < 0) + { + PyErr_BadInternalCall(); + return -1; + } + return unicode_resize(p_unicode, length); +} + +static int +unicode_widen(PyObject **p_unicode, unsigned int maxchar) +{ + PyObject *result; + assert(PyUnicode_IS_READY(*p_unicode)); + if (maxchar <= PyUnicode_MAX_CHAR_VALUE(*p_unicode)) + return 0; + result = PyUnicode_New(PyUnicode_GET_LENGTH(*p_unicode), + maxchar); + if (result == NULL) + return -1; + PyUnicode_CopyCharacters(result, 0, *p_unicode, 0, + PyUnicode_GET_LENGTH(*p_unicode)); + Py_DECREF(*p_unicode); + *p_unicode = result; + return 0; +} + +static int +unicode_putchar(PyObject **p_unicode, Py_ssize_t *pos, + Py_UCS4 ch) +{ + if (unicode_widen(p_unicode, ch) < 0) + return -1; + PyUnicode_WRITE(PyUnicode_KIND(*p_unicode), + PyUnicode_DATA(*p_unicode), + (*pos)++, ch); + return 0; +} + +static PyObject* +get_latin1_char(unsigned char ch) +{ + PyObject *unicode = unicode_latin1[ch]; + if (!unicode) { + unicode = PyUnicode_New(1, ch); + if (!unicode) + return NULL; + PyUnicode_1BYTE_DATA(unicode)[0] = ch; + assert(_PyUnicode_CheckConsistency(unicode, 1)); + unicode_latin1[ch] = unicode; + } + Py_INCREF(unicode); + return unicode; +} + +PyObject * +PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size) +{ + PyObject *unicode; + Py_UCS4 maxchar = 0; + Py_ssize_t num_surrogates; + + if (u == NULL) + return (PyObject*)_PyUnicode_New(size); + + /* If the Unicode data is known at construction time, we can apply + some optimizations which share commonly used objects. */ + + /* Optimization for empty strings */ + if (size == 0 && unicode_empty != NULL) { + Py_INCREF(unicode_empty); + return unicode_empty; + } + + /* Single character Unicode objects in the Latin-1 range are + shared when using this constructor */ + if (size == 1 && *u < 256) + return get_latin1_char((unsigned char)*u); + + /* If not empty and not single character, copy the Unicode data + into the new object */ + if (find_maxchar_surrogates(u, u + size, + &maxchar, &num_surrogates) == -1) + return NULL; + + unicode = PyUnicode_New(size - num_surrogates, maxchar); + if (!unicode) + return NULL; + + switch (PyUnicode_KIND(unicode)) { + case PyUnicode_1BYTE_KIND: + _PyUnicode_CONVERT_BYTES(Py_UNICODE, unsigned char, + u, u + size, PyUnicode_1BYTE_DATA(unicode)); + break; + case PyUnicode_2BYTE_KIND: +#if Py_UNICODE_SIZE == 2 + Py_MEMCPY(PyUnicode_2BYTE_DATA(unicode), u, size * 2); +#else + _PyUnicode_CONVERT_BYTES(Py_UNICODE, Py_UCS2, + u, u + size, PyUnicode_2BYTE_DATA(unicode)); +#endif + break; + case PyUnicode_4BYTE_KIND: +#if SIZEOF_WCHAR_T == 2 + /* This is the only case which has to process surrogates, thus + a simple copy loop is not enough and we need a function. */ + unicode_convert_wchar_to_ucs4(u, u + size, unicode); +#else + assert(num_surrogates == 0); + Py_MEMCPY(PyUnicode_4BYTE_DATA(unicode), u, size * 4); +#endif + break; + default: + assert(0 && "Impossible state"); + } + + return unicode_result(unicode); +} + +PyObject * +PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) +{ + if (size < 0) { + PyErr_SetString(PyExc_SystemError, + "Negative size passed to PyUnicode_FromStringAndSize"); + return NULL; + } + + /* If the Unicode data is known at construction time, we can apply + some optimizations which share commonly used objects. + Also, this means the input must be UTF-8, so fall back to the + UTF-8 decoder at the end. */ + if (u != NULL) { + + /* Optimization for empty strings */ + if (size == 0 && unicode_empty != NULL) { + Py_INCREF(unicode_empty); + return unicode_empty; + } + + /* Single characters are shared when using this constructor. + Restrict to ASCII, since the input must be UTF-8. */ + if (size == 1 && (unsigned char)*u < 128) + return get_latin1_char((unsigned char)*u); + + return PyUnicode_DecodeUTF8(u, size, NULL); + } + + return (PyObject *)_PyUnicode_New(size); +} + +PyObject * +PyUnicode_FromString(const char *u) +{ + size_t size = strlen(u); + if (size > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, "input too long"); + return NULL; + } + + return PyUnicode_FromStringAndSize(u, size); +} + +PyObject * +_PyUnicode_FromId(_Py_Identifier *id) +{ + if (!id->object) { + id->object = PyUnicode_FromString(id->string); + if (!id->object) + return NULL; + PyUnicode_InternInPlace(&id->object); + assert(!id->next); + id->next = static_strings; + static_strings = id; + } + return id->object; +} + +void +_PyUnicode_ClearStaticStrings() +{ + _Py_Identifier *i; + for (i = static_strings; i; i = i->next) { + Py_DECREF(i->object); + i->object = NULL; + i->next = NULL; + } +} + +/* Internal function, don't check maximum character */ + +static PyObject* +unicode_fromascii(const unsigned char* s, Py_ssize_t size) +{ + PyObject *res; +#ifdef Py_DEBUG + const unsigned char *p; + const unsigned char *end = s + size; + for (p=s; p < end; p++) { + assert(*p < 128); + } +#endif + if (size == 1) + return get_latin1_char(s[0]); + res = PyUnicode_New(size, 127); + if (!res) + return NULL; + memcpy(PyUnicode_1BYTE_DATA(res), s, size); + return res; +} + +static Py_UCS4 +kind_maxchar_limit(unsigned int kind) +{ + switch(kind) { + case PyUnicode_1BYTE_KIND: + return 0x80; + case PyUnicode_2BYTE_KIND: + return 0x100; + case PyUnicode_4BYTE_KIND: + return 0x10000; + default: + assert(0 && "invalid kind"); + return MAX_UNICODE; + } +} + +static PyObject* +_PyUnicode_FromUCS1(const unsigned char* u, Py_ssize_t size) +{ + PyObject *res; + unsigned char max_char; + + if (size == 0) { + Py_INCREF(unicode_empty); + return unicode_empty; + } + assert(size > 0); + if (size == 1) + return get_latin1_char(u[0]); + + max_char = ucs1lib_find_max_char(u, u + size); + res = PyUnicode_New(size, max_char); + if (!res) + return NULL; + memcpy(PyUnicode_1BYTE_DATA(res), u, size); + assert(_PyUnicode_CheckConsistency(res, 1)); + return res; +} + +static PyObject* +_PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size) +{ + PyObject *res; + Py_UCS2 max_char; + + if (size == 0) { + Py_INCREF(unicode_empty); + return unicode_empty; + } + assert(size > 0); + if (size == 1 && u[0] < 256) + return get_latin1_char((unsigned char)u[0]); + + max_char = ucs2lib_find_max_char(u, u + size); + res = PyUnicode_New(size, max_char); + if (!res) + return NULL; + if (max_char >= 256) + memcpy(PyUnicode_2BYTE_DATA(res), u, sizeof(Py_UCS2)*size); + else { + _PyUnicode_CONVERT_BYTES( + Py_UCS2, Py_UCS1, u, u + size, PyUnicode_1BYTE_DATA(res)); + } + assert(_PyUnicode_CheckConsistency(res, 1)); + return res; +} + +static PyObject* +_PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size) +{ + PyObject *res; + Py_UCS4 max_char; + + if (size == 0) { + Py_INCREF(unicode_empty); + return unicode_empty; + } + assert(size > 0); + if (size == 1 && u[0] < 256) + return get_latin1_char((unsigned char)u[0]); + + max_char = ucs4lib_find_max_char(u, u + size); + res = PyUnicode_New(size, max_char); + if (!res) + return NULL; + if (max_char < 256) + _PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS1, u, u + size, + PyUnicode_1BYTE_DATA(res)); + else if (max_char < 0x10000) + _PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS2, u, u + size, + PyUnicode_2BYTE_DATA(res)); + else + memcpy(PyUnicode_4BYTE_DATA(res), u, sizeof(Py_UCS4)*size); + assert(_PyUnicode_CheckConsistency(res, 1)); + return res; +} + +PyObject* +PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size) +{ + if (size < 0) { + PyErr_SetString(PyExc_ValueError, "size must be positive"); + return NULL; + } + switch(kind) { + case PyUnicode_1BYTE_KIND: + return _PyUnicode_FromUCS1(buffer, size); + case PyUnicode_2BYTE_KIND: + return _PyUnicode_FromUCS2(buffer, size); + case PyUnicode_4BYTE_KIND: + return _PyUnicode_FromUCS4(buffer, size); + default: + PyErr_SetString(PyExc_SystemError, "invalid kind"); + return NULL; + } +} + +/* Ensure that a string uses the most efficient storage, if it is not the + case: create a new string with of the right kind. Write NULL into *p_unicode + on error. */ +static void +unicode_adjust_maxchar(PyObject **p_unicode) +{ + PyObject *unicode, *copy; + Py_UCS4 max_char; + Py_ssize_t len; + unsigned int kind; + + assert(p_unicode != NULL); + unicode = *p_unicode; + assert(PyUnicode_IS_READY(unicode)); + if (PyUnicode_IS_ASCII(unicode)) + return; + + len = PyUnicode_GET_LENGTH(unicode); + kind = PyUnicode_KIND(unicode); + if (kind == PyUnicode_1BYTE_KIND) { + const Py_UCS1 *u = PyUnicode_1BYTE_DATA(unicode); + max_char = ucs1lib_find_max_char(u, u + len); + if (max_char >= 128) + return; + } + else if (kind == PyUnicode_2BYTE_KIND) { + const Py_UCS2 *u = PyUnicode_2BYTE_DATA(unicode); + max_char = ucs2lib_find_max_char(u, u + len); + if (max_char >= 256) + return; + } + else { + const Py_UCS4 *u = PyUnicode_4BYTE_DATA(unicode); + assert(kind == PyUnicode_4BYTE_KIND); + max_char = ucs4lib_find_max_char(u, u + len); + if (max_char >= 0x10000) + return; + } + copy = PyUnicode_New(len, max_char); + copy_characters(copy, 0, unicode, 0, len); + Py_DECREF(unicode); + *p_unicode = copy; +} + +PyObject* +PyUnicode_Copy(PyObject *unicode) +{ + Py_ssize_t length; + PyObject *copy; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadInternalCall(); + return NULL; + } + if (PyUnicode_READY(unicode)) + return NULL; + + length = PyUnicode_GET_LENGTH(unicode); + copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); + if (!copy) + return NULL; + assert(PyUnicode_KIND(copy) == PyUnicode_KIND(unicode)); + + Py_MEMCPY(PyUnicode_DATA(copy), PyUnicode_DATA(unicode), + length * PyUnicode_KIND(unicode)); + assert(_PyUnicode_CheckConsistency(copy, 1)); + return copy; +} + + +/* Widen Unicode objects to larger buffers. Don't write terminating null + character. Return NULL on error. */ + +void* +_PyUnicode_AsKind(PyObject *s, unsigned int kind) +{ + Py_ssize_t len; + void *result; + unsigned int skind; + + if (PyUnicode_READY(s)) + return NULL; + + len = PyUnicode_GET_LENGTH(s); + skind = PyUnicode_KIND(s); + if (skind >= kind) { + PyErr_SetString(PyExc_SystemError, "invalid widening attempt"); + return NULL; + } + switch(kind) { + case PyUnicode_2BYTE_KIND: + result = PyMem_Malloc(len * sizeof(Py_UCS2)); + if (!result) + return PyErr_NoMemory(); + assert(skind == PyUnicode_1BYTE_KIND); + _PyUnicode_CONVERT_BYTES( + Py_UCS1, Py_UCS2, + PyUnicode_1BYTE_DATA(s), + PyUnicode_1BYTE_DATA(s) + len, + result); + return result; + case PyUnicode_4BYTE_KIND: + result = PyMem_Malloc(len * sizeof(Py_UCS4)); + if (!result) + return PyErr_NoMemory(); + if (skind == PyUnicode_2BYTE_KIND) { + _PyUnicode_CONVERT_BYTES( + Py_UCS2, Py_UCS4, + PyUnicode_2BYTE_DATA(s), + PyUnicode_2BYTE_DATA(s) + len, + result); + } + else { + assert(skind == PyUnicode_1BYTE_KIND); + _PyUnicode_CONVERT_BYTES( + Py_UCS1, Py_UCS4, + PyUnicode_1BYTE_DATA(s), + PyUnicode_1BYTE_DATA(s) + len, + result); + } + return result; + default: + break; + } + PyErr_SetString(PyExc_SystemError, "invalid kind"); + return NULL; +} + +static Py_UCS4* +as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize, + int copy_null) +{ + int kind; + void *data; + Py_ssize_t len, targetlen; + if (PyUnicode_READY(string) == -1) + return NULL; + kind = PyUnicode_KIND(string); + data = PyUnicode_DATA(string); + len = PyUnicode_GET_LENGTH(string); + targetlen = len; + if (copy_null) + targetlen++; + if (!target) { + if (PY_SSIZE_T_MAX / sizeof(Py_UCS4) < targetlen) { + PyErr_NoMemory(); + return NULL; + } + target = PyMem_Malloc(targetlen * sizeof(Py_UCS4)); + if (!target) { + PyErr_NoMemory(); + return NULL; + } + } + else { + if (targetsize < targetlen) { + PyErr_Format(PyExc_SystemError, + "string is longer than the buffer"); + if (copy_null && 0 < targetsize) + target[0] = 0; + return NULL; + } + } + if (kind == PyUnicode_1BYTE_KIND) { + Py_UCS1 *start = (Py_UCS1 *) data; + _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS4, start, start + len, target); + } + else if (kind == PyUnicode_2BYTE_KIND) { + Py_UCS2 *start = (Py_UCS2 *) data; + _PyUnicode_CONVERT_BYTES(Py_UCS2, Py_UCS4, start, start + len, target); + } + else { + assert(kind == PyUnicode_4BYTE_KIND); + Py_MEMCPY(target, data, len * sizeof(Py_UCS4)); + } + if (copy_null) + target[len] = 0; + return target; +} + +Py_UCS4* +PyUnicode_AsUCS4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize, + int copy_null) +{ + if (target == NULL || targetsize < 0) { + PyErr_BadInternalCall(); + return NULL; + } + return as_ucs4(string, target, targetsize, copy_null); +} + +Py_UCS4* +PyUnicode_AsUCS4Copy(PyObject *string) +{ + return as_ucs4(string, NULL, 0, 1); +} + +#ifdef HAVE_WCHAR_H + +PyObject * +PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size) +{ + if (w == NULL) { + if (size == 0) + return PyUnicode_New(0, 0); + PyErr_BadInternalCall(); + return NULL; + } + + if (size == -1) { + size = wcslen(w); + } + + return PyUnicode_FromUnicode(w, size); +} + +#endif /* HAVE_WCHAR_H */ + +static void +makefmt(char *fmt, int longflag, int longlongflag, int size_tflag, + int zeropad, int width, int precision, char c) +{ + *fmt++ = '%'; + if (width) { + if (zeropad) + *fmt++ = '0'; + fmt += sprintf(fmt, "%d", width); + } + if (precision) + fmt += sprintf(fmt, ".%d", precision); + if (longflag) + *fmt++ = 'l'; + else if (longlongflag) { + /* longlongflag should only ever be nonzero on machines with + HAVE_LONG_LONG defined */ +#ifdef HAVE_LONG_LONG + char *f = PY_FORMAT_LONG_LONG; + while (*f) + *fmt++ = *f++; +#else + /* we shouldn't ever get here */ + assert(0); + *fmt++ = 'l'; +#endif + } + else if (size_tflag) { + char *f = PY_FORMAT_SIZE_T; + while (*f) + *fmt++ = *f++; + } + *fmt++ = c; + *fmt = '\0'; +} + +/* helper for PyUnicode_FromFormatV() */ + +static const char* +parse_format_flags(const char *f, + int *p_width, int *p_precision, + int *p_longflag, int *p_longlongflag, int *p_size_tflag) +{ + int width, precision, longflag, longlongflag, size_tflag; + + /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */ + f++; + width = 0; + while (Py_ISDIGIT((unsigned)*f)) + width = (width*10) + *f++ - '0'; + precision = 0; + if (*f == '.') { + f++; + while (Py_ISDIGIT((unsigned)*f)) + precision = (precision*10) + *f++ - '0'; + if (*f == '%') { + /* "%.3%s" => f points to "3" */ + f--; + } + } + if (*f == '\0') { + /* bogus format "%.1" => go backward, f points to "1" */ + f--; + } + if (p_width != NULL) + *p_width = width; + if (p_precision != NULL) + *p_precision = precision; + + /* Handle %ld, %lu, %lld and %llu. */ + longflag = 0; + longlongflag = 0; + size_tflag = 0; + + if (*f == 'l') { + if (f[1] == 'd' || f[1] == 'u' || f[1] == 'i') { + longflag = 1; + ++f; + } +#ifdef HAVE_LONG_LONG + else if (f[1] == 'l' && + (f[2] == 'd' || f[2] == 'u' || f[2] == 'i')) { + longlongflag = 1; + f += 2; + } +#endif + } + /* handle the size_t flag. */ + else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u' || f[1] == 'i')) { + size_tflag = 1; + ++f; + } + if (p_longflag != NULL) + *p_longflag = longflag; + if (p_longlongflag != NULL) + *p_longlongflag = longlongflag; + if (p_size_tflag != NULL) + *p_size_tflag = size_tflag; + return f; +} + +/* maximum number of characters required for output of %ld. 21 characters + allows for 64-bit integers (in decimal) and an optional sign. */ +#define MAX_LONG_CHARS 21 +/* maximum number of characters required for output of %lld. + We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits, + plus 1 for the sign. 53/22 is an upper bound for log10(256). */ +#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22) + +PyObject * +PyUnicode_FromFormatV(const char *format, va_list vargs) +{ + va_list count; + Py_ssize_t callcount = 0; + PyObject **callresults = NULL; + PyObject **callresult = NULL; + Py_ssize_t n = 0; + int width = 0; + int precision = 0; + int zeropad; + const char* f; + PyObject *string; + /* used by sprintf */ + char fmt[61]; /* should be enough for %0width.precisionlld */ + Py_UCS4 maxchar = 127; /* result is ASCII by default */ + Py_UCS4 argmaxchar; + Py_ssize_t numbersize = 0; + char *numberresults = NULL; + char *numberresult = NULL; + Py_ssize_t i; + int kind; + void *data; + + Py_VA_COPY(count, vargs); + /* step 1: count the number of %S/%R/%A/%s format specifications + * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/ + * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the + * result in an array) + * also estimate a upper bound for all the number formats in the string, + * numbers will be formatted in step 3 and be kept in a '\0'-separated + * buffer before putting everything together. */ + for (f = format; *f; f++) { + if (*f == '%') { + int longlongflag; + /* skip width or width.precision (eg. "1.2" of "%1.2f") */ + f = parse_format_flags(f, &width, NULL, NULL, &longlongflag, NULL); + if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V') + ++callcount; + + else if (*f == 'd' || *f=='u' || *f=='i' || *f=='x' || *f=='p') { +#ifdef HAVE_LONG_LONG + if (longlongflag) { + if (width < MAX_LONG_LONG_CHARS) + width = MAX_LONG_LONG_CHARS; + } + else +#endif + /* MAX_LONG_CHARS is enough to hold a 64-bit integer, + including sign. Decimal takes the most space. This + isn't enough for octal. If a width is specified we + need more (which we allocate later). */ + if (width < MAX_LONG_CHARS) + width = MAX_LONG_CHARS; + + /* account for the size + '\0' to separate numbers + inside of the numberresults buffer */ + numbersize += (width + 1); + } + } + else if ((unsigned char)*f > 127) { + PyErr_Format(PyExc_ValueError, + "PyUnicode_FromFormatV() expects an ASCII-encoded format " + "string, got a non-ASCII byte: 0x%02x", + (unsigned char)*f); + return NULL; + } + } + /* step 2: allocate memory for the results of + * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */ + if (callcount) { + callresults = PyObject_Malloc(sizeof(PyObject *) * callcount); + if (!callresults) { + PyErr_NoMemory(); + return NULL; + } + callresult = callresults; + } + /* step 2.5: allocate memory for the results of formating numbers */ + if (numbersize) { + numberresults = PyObject_Malloc(numbersize); + if (!numberresults) { + PyErr_NoMemory(); + goto fail; + } + numberresult = numberresults; + } + + /* step 3: format numbers and figure out how large a buffer we need */ + for (f = format; *f; f++) { + if (*f == '%') { + const char* p; + int longflag; + int longlongflag; + int size_tflag; + int numprinted; + + p = f; + zeropad = (f[1] == '0'); + f = parse_format_flags(f, &width, &precision, + &longflag, &longlongflag, &size_tflag); + switch (*f) { + case 'c': + { + Py_UCS4 ordinal = va_arg(count, int); + maxchar = Py_MAX(maxchar, ordinal); + n++; + break; + } + case '%': + n++; + break; + case 'i': + case 'd': + makefmt(fmt, longflag, longlongflag, size_tflag, zeropad, + width, precision, *f); + if (longflag) + numprinted = sprintf(numberresult, fmt, + va_arg(count, long)); +#ifdef HAVE_LONG_LONG + else if (longlongflag) + numprinted = sprintf(numberresult, fmt, + va_arg(count, PY_LONG_LONG)); +#endif + else if (size_tflag) + numprinted = sprintf(numberresult, fmt, + va_arg(count, Py_ssize_t)); + else + numprinted = sprintf(numberresult, fmt, + va_arg(count, int)); + n += numprinted; + /* advance by +1 to skip over the '\0' */ + numberresult += (numprinted + 1); + assert(*(numberresult - 1) == '\0'); + assert(*(numberresult - 2) != '\0'); + assert(numprinted >= 0); + assert(numberresult <= numberresults + numbersize); + break; + case 'u': + makefmt(fmt, longflag, longlongflag, size_tflag, zeropad, + width, precision, 'u'); + if (longflag) + numprinted = sprintf(numberresult, fmt, + va_arg(count, unsigned long)); +#ifdef HAVE_LONG_LONG + else if (longlongflag) + numprinted = sprintf(numberresult, fmt, + va_arg(count, unsigned PY_LONG_LONG)); +#endif + else if (size_tflag) + numprinted = sprintf(numberresult, fmt, + va_arg(count, size_t)); + else + numprinted = sprintf(numberresult, fmt, + va_arg(count, unsigned int)); + n += numprinted; + numberresult += (numprinted + 1); + assert(*(numberresult - 1) == '\0'); + assert(*(numberresult - 2) != '\0'); + assert(numprinted >= 0); + assert(numberresult <= numberresults + numbersize); + break; + case 'x': + makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x'); + numprinted = sprintf(numberresult, fmt, va_arg(count, int)); + n += numprinted; + numberresult += (numprinted + 1); + assert(*(numberresult - 1) == '\0'); + assert(*(numberresult - 2) != '\0'); + assert(numprinted >= 0); + assert(numberresult <= numberresults + numbersize); + break; + case 'p': + numprinted = sprintf(numberresult, "%p", va_arg(count, void*)); + /* %p is ill-defined: ensure leading 0x. */ + if (numberresult[1] == 'X') + numberresult[1] = 'x'; + else if (numberresult[1] != 'x') { + memmove(numberresult + 2, numberresult, + strlen(numberresult) + 1); + numberresult[0] = '0'; + numberresult[1] = 'x'; + numprinted += 2; + } + n += numprinted; + numberresult += (numprinted + 1); + assert(*(numberresult - 1) == '\0'); + assert(*(numberresult - 2) != '\0'); + assert(numprinted >= 0); + assert(numberresult <= numberresults + numbersize); + break; + case 's': + { + /* UTF-8 */ + const char *s = va_arg(count, const char*); + PyObject *str = PyUnicode_DecodeUTF8(s, strlen(s), "replace"); + if (!str) + goto fail; + /* since PyUnicode_DecodeUTF8 returns already flexible + unicode objects, there is no need to call ready on them */ + argmaxchar = PyUnicode_MAX_CHAR_VALUE(str); + maxchar = Py_MAX(maxchar, argmaxchar); + n += PyUnicode_GET_LENGTH(str); + /* Remember the str and switch to the next slot */ + *callresult++ = str; + break; + } + case 'U': + { + PyObject *obj = va_arg(count, PyObject *); + assert(obj && _PyUnicode_CHECK(obj)); + if (PyUnicode_READY(obj) == -1) + goto fail; + argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj); + maxchar = Py_MAX(maxchar, argmaxchar); + n += PyUnicode_GET_LENGTH(obj); + break; + } + case 'V': + { + PyObject *obj = va_arg(count, PyObject *); + const char *str = va_arg(count, const char *); + PyObject *str_obj; + assert(obj || str); + assert(!obj || _PyUnicode_CHECK(obj)); + if (obj) { + if (PyUnicode_READY(obj) == -1) + goto fail; + argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj); + maxchar = Py_MAX(maxchar, argmaxchar); + n += PyUnicode_GET_LENGTH(obj); + *callresult++ = NULL; + } + else { + str_obj = PyUnicode_DecodeUTF8(str, strlen(str), "replace"); + if (!str_obj) + goto fail; + if (PyUnicode_READY(str_obj)) { + Py_DECREF(str_obj); + goto fail; + } + argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj); + maxchar = Py_MAX(maxchar, argmaxchar); + n += PyUnicode_GET_LENGTH(str_obj); + *callresult++ = str_obj; + } + break; + } + case 'S': + { + PyObject *obj = va_arg(count, PyObject *); + PyObject *str; + assert(obj); + str = PyObject_Str(obj); + if (!str || PyUnicode_READY(str) == -1) + goto fail; + argmaxchar = PyUnicode_MAX_CHAR_VALUE(str); + maxchar = Py_MAX(maxchar, argmaxchar); + n += PyUnicode_GET_LENGTH(str); + /* Remember the str and switch to the next slot */ + *callresult++ = str; + break; + } + case 'R': + { + PyObject *obj = va_arg(count, PyObject *); + PyObject *repr; + assert(obj); + repr = PyObject_Repr(obj); + if (!repr || PyUnicode_READY(repr) == -1) + goto fail; + argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr); + maxchar = Py_MAX(maxchar, argmaxchar); + n += PyUnicode_GET_LENGTH(repr); + /* Remember the repr and switch to the next slot */ + *callresult++ = repr; + break; + } + case 'A': + { + PyObject *obj = va_arg(count, PyObject *); + PyObject *ascii; + assert(obj); + ascii = PyObject_ASCII(obj); + if (!ascii || PyUnicode_READY(ascii) == -1) + goto fail; + argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii); + maxchar = Py_MAX(maxchar, argmaxchar); + n += PyUnicode_GET_LENGTH(ascii); + /* Remember the repr and switch to the next slot */ + *callresult++ = ascii; + break; + } + default: + /* if we stumble upon an unknown + formatting code, copy the rest of + the format string to the output + string. (we cannot just skip the + code, since there's no way to know + what's in the argument list) */ + n += strlen(p); + goto expand; + } + } else + n++; + } + expand: + /* step 4: fill the buffer */ + /* Since we've analyzed how much space we need, + we don't have to resize the string. + There can be no errors beyond this point. */ + string = PyUnicode_New(n, maxchar); + if (!string) + goto fail; + kind = PyUnicode_KIND(string); + data = PyUnicode_DATA(string); + callresult = callresults; + numberresult = numberresults; + + for (i = 0, f = format; *f; f++) { + if (*f == '%') { + const char* p; + + p = f; + f = parse_format_flags(f, NULL, NULL, NULL, NULL, NULL); + /* checking for == because the last argument could be a empty + string, which causes i to point to end, the assert at the end of + the loop */ + assert(i <= PyUnicode_GET_LENGTH(string)); + + switch (*f) { + case 'c': + { + const int ordinal = va_arg(vargs, int); + PyUnicode_WRITE(kind, data, i++, ordinal); + break; + } + case 'i': + case 'd': + case 'u': + case 'x': + case 'p': + /* unused, since we already have the result */ + if (*f == 'p') + (void) va_arg(vargs, void *); + else + (void) va_arg(vargs, int); + /* extract the result from numberresults and append. */ + for (; *numberresult; ++i, ++numberresult) + PyUnicode_WRITE(kind, data, i, *numberresult); + /* skip over the separating '\0' */ + assert(*numberresult == '\0'); + numberresult++; + assert(numberresult <= numberresults + numbersize); + break; + case 's': + { + /* unused, since we already have the result */ + Py_ssize_t size; + (void) va_arg(vargs, char *); + size = PyUnicode_GET_LENGTH(*callresult); + assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); + copy_characters(string, i, *callresult, 0, size); + i += size; + /* We're done with the unicode()/repr() => forget it */ + Py_DECREF(*callresult); + /* switch to next unicode()/repr() result */ + ++callresult; + break; + } + case 'U': + { + PyObject *obj = va_arg(vargs, PyObject *); + Py_ssize_t size; + assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); + size = PyUnicode_GET_LENGTH(obj); + copy_characters(string, i, obj, 0, size); + i += size; + break; + } + case 'V': + { + Py_ssize_t size; + PyObject *obj = va_arg(vargs, PyObject *); + va_arg(vargs, const char *); + if (obj) { + size = PyUnicode_GET_LENGTH(obj); + assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); + copy_characters(string, i, obj, 0, size); + i += size; + } else { + size = PyUnicode_GET_LENGTH(*callresult); + assert(PyUnicode_KIND(*callresult) <= + PyUnicode_KIND(string)); + copy_characters(string, i, *callresult, 0, size); + i += size; + Py_DECREF(*callresult); + } + ++callresult; + break; + } + case 'S': + case 'R': + case 'A': + { + Py_ssize_t size = PyUnicode_GET_LENGTH(*callresult); + /* unused, since we already have the result */ + (void) va_arg(vargs, PyObject *); + assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); + copy_characters(string, i, *callresult, 0, size); + i += size; + /* We're done with the unicode()/repr() => forget it */ + Py_DECREF(*callresult); + /* switch to next unicode()/repr() result */ + ++callresult; + break; + } + case '%': + PyUnicode_WRITE(kind, data, i++, '%'); + break; + default: + for (; *p; ++p, ++i) + PyUnicode_WRITE(kind, data, i, *p); + assert(i == PyUnicode_GET_LENGTH(string)); + goto end; + } + } + else { + assert(i < PyUnicode_GET_LENGTH(string)); + PyUnicode_WRITE(kind, data, i++, *f); + } + } + assert(i == PyUnicode_GET_LENGTH(string)); + + end: + if (callresults) + PyObject_Free(callresults); + if (numberresults) + PyObject_Free(numberresults); + return unicode_result(string); + fail: + if (callresults) { + PyObject **callresult2 = callresults; + while (callresult2 < callresult) { + Py_XDECREF(*callresult2); + ++callresult2; + } + PyObject_Free(callresults); + } + if (numberresults) + PyObject_Free(numberresults); + return NULL; +} + +PyObject * +PyUnicode_FromFormat(const char *format, ...) +{ + PyObject* ret; + va_list vargs; + +#ifdef HAVE_STDARG_PROTOTYPES + va_start(vargs, format); +#else + va_start(vargs); +#endif + ret = PyUnicode_FromFormatV(format, vargs); + va_end(vargs); + return ret; +} + +#ifdef HAVE_WCHAR_H + +/* Helper function for PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(): + convert a Unicode object to a wide character string. + + - If w is NULL: return the number of wide characters (including the null + character) required to convert the unicode object. Ignore size argument. + + - Otherwise: return the number of wide characters (excluding the null + character) written into w. Write at most size wide characters (including + the null character). */ +static Py_ssize_t +unicode_aswidechar(PyObject *unicode, + wchar_t *w, + Py_ssize_t size) +{ + Py_ssize_t res; + const wchar_t *wstr; + + wstr = PyUnicode_AsUnicodeAndSize(unicode, &res); + if (wstr == NULL) + return -1; + + if (w != NULL) { + if (size > res) + size = res + 1; + else + res = size; + Py_MEMCPY(w, wstr, size * sizeof(wchar_t)); + return res; + } + else + return res + 1; +} + +Py_ssize_t +PyUnicode_AsWideChar(PyObject *unicode, + wchar_t *w, + Py_ssize_t size) +{ + if (unicode == NULL) { + PyErr_BadInternalCall(); + return -1; + } + return unicode_aswidechar(unicode, w, size); +} + +wchar_t* +PyUnicode_AsWideCharString(PyObject *unicode, + Py_ssize_t *size) +{ + wchar_t* buffer; + Py_ssize_t buflen; + + if (unicode == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + + buflen = unicode_aswidechar(unicode, NULL, 0); + if (buflen == -1) + return NULL; + if (PY_SSIZE_T_MAX / sizeof(wchar_t) < buflen) { + PyErr_NoMemory(); + return NULL; + } + + buffer = PyMem_MALLOC(buflen * sizeof(wchar_t)); + if (buffer == NULL) { + PyErr_NoMemory(); + return NULL; + } + buflen = unicode_aswidechar(unicode, buffer, buflen); + if (buflen == -1) + return NULL; + if (size != NULL) + *size = buflen; + return buffer; +} + +#endif /* HAVE_WCHAR_H */ + +PyObject * +PyUnicode_FromOrdinal(int ordinal) +{ + PyObject *v; + if (ordinal < 0 || ordinal > MAX_UNICODE) { + PyErr_SetString(PyExc_ValueError, + "chr() arg not in range(0x110000)"); + return NULL; + } + + if (ordinal < 256) + return get_latin1_char(ordinal); + + v = PyUnicode_New(1, ordinal); + if (v == NULL) + return NULL; + PyUnicode_WRITE(PyUnicode_KIND(v), PyUnicode_DATA(v), 0, ordinal); + assert(_PyUnicode_CheckConsistency(v, 1)); + return v; +} + +PyObject * +PyUnicode_FromObject(register PyObject *obj) +{ + /* XXX Perhaps we should make this API an alias of + PyObject_Str() instead ?! */ + if (PyUnicode_CheckExact(obj)) { + if (PyUnicode_READY(obj)) + return NULL; + Py_INCREF(obj); + return obj; + } + if (PyUnicode_Check(obj)) { + /* For a Unicode subtype that's not a Unicode object, + return a true Unicode object with the same data. */ + return PyUnicode_Copy(obj); + } + PyErr_Format(PyExc_TypeError, + "Can't convert '%.100s' object to str implicitly", + Py_TYPE(obj)->tp_name); + return NULL; +} + +PyObject * +PyUnicode_FromEncodedObject(register PyObject *obj, + const char *encoding, + const char *errors) +{ + Py_buffer buffer; + PyObject *v; + + if (obj == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + + /* Decoding bytes objects is the most common case and should be fast */ + if (PyBytes_Check(obj)) { + if (PyBytes_GET_SIZE(obj) == 0) { + Py_INCREF(unicode_empty); + v = unicode_empty; + } + else { + v = PyUnicode_Decode( + PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj), + encoding, errors); + } + return v; + } + + if (PyUnicode_Check(obj)) { + PyErr_SetString(PyExc_TypeError, + "decoding str is not supported"); + return NULL; + } + + /* Retrieve a bytes buffer view through the PEP 3118 buffer interface */ + if (PyObject_GetBuffer(obj, &buffer, PyBUF_SIMPLE) < 0) { + PyErr_Format(PyExc_TypeError, + "coercing to str: need bytes, bytearray " + "or buffer-like object, %.80s found", + Py_TYPE(obj)->tp_name); + return NULL; + } + + if (buffer.len == 0) { + Py_INCREF(unicode_empty); + v = unicode_empty; + } + else + v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors); + + PyBuffer_Release(&buffer); + return v; +} + +/* Convert encoding to lower case and replace '_' with '-' in order to + catch e.g. UTF_8. Return 0 on error (encoding is longer than lower_len-1), + 1 on success. */ +static int +normalize_encoding(const char *encoding, + char *lower, + size_t lower_len) +{ + const char *e; + char *l; + char *l_end; + + if (encoding == NULL) { + strcpy(lower, "utf-8"); + return 1; + } + e = encoding; + l = lower; + l_end = &lower[lower_len - 1]; + while (*e) { + if (l == l_end) + return 0; + if (Py_ISUPPER(*e)) { + *l++ = Py_TOLOWER(*e++); + } + else if (*e == '_') { + *l++ = '-'; + e++; + } + else { + *l++ = *e++; + } + } + *l = '\0'; + return 1; +} + +PyObject * +PyUnicode_Decode(const char *s, + Py_ssize_t size, + const char *encoding, + const char *errors) +{ + PyObject *buffer = NULL, *unicode; + Py_buffer info; + char lower[11]; /* Enough for any encoding shortcut */ + + /* Shortcuts for common default encodings */ + if (normalize_encoding(encoding, lower, sizeof(lower))) { + if ((strcmp(lower, "utf-8") == 0) || + (strcmp(lower, "utf8") == 0)) + return PyUnicode_DecodeUTF8(s, size, errors); + else if ((strcmp(lower, "latin-1") == 0) || + (strcmp(lower, "latin1") == 0) || + (strcmp(lower, "iso-8859-1") == 0)) + return PyUnicode_DecodeLatin1(s, size, errors); +#ifdef HAVE_MBCS + else if (strcmp(lower, "mbcs") == 0) + return PyUnicode_DecodeMBCS(s, size, errors); +#endif + else if (strcmp(lower, "ascii") == 0) + return PyUnicode_DecodeASCII(s, size, errors); + else if (strcmp(lower, "utf-16") == 0) + return PyUnicode_DecodeUTF16(s, size, errors, 0); + else if (strcmp(lower, "utf-32") == 0) + return PyUnicode_DecodeUTF32(s, size, errors, 0); + } + + /* Decode via the codec registry */ + buffer = NULL; + if (PyBuffer_FillInfo(&info, NULL, (void *)s, size, 1, PyBUF_FULL_RO) < 0) + goto onError; + buffer = PyMemoryView_FromBuffer(&info); + if (buffer == NULL) + goto onError; + unicode = PyCodec_Decode(buffer, encoding, errors); + if (unicode == NULL) + goto onError; + if (!PyUnicode_Check(unicode)) { + PyErr_Format(PyExc_TypeError, + "decoder did not return a str object (type=%.400s)", + Py_TYPE(unicode)->tp_name); + Py_DECREF(unicode); + goto onError; + } + Py_DECREF(buffer); + return unicode_result(unicode); + + onError: + Py_XDECREF(buffer); + return NULL; +} + +PyObject * +PyUnicode_AsDecodedObject(PyObject *unicode, + const char *encoding, + const char *errors) +{ + PyObject *v; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + goto onError; + } + + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + + /* Decode via the codec registry */ + v = PyCodec_Decode(unicode, encoding, errors); + if (v == NULL) + goto onError; + return unicode_result(v); + + onError: + return NULL; +} + +PyObject * +PyUnicode_AsDecodedUnicode(PyObject *unicode, + const char *encoding, + const char *errors) +{ + PyObject *v; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + goto onError; + } + + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + + /* Decode via the codec registry */ + v = PyCodec_Decode(unicode, encoding, errors); + if (v == NULL) + goto onError; + if (!PyUnicode_Check(v)) { + PyErr_Format(PyExc_TypeError, + "decoder did not return a str object (type=%.400s)", + Py_TYPE(v)->tp_name); + Py_DECREF(v); + goto onError; + } + return unicode_result(v); + + onError: + return NULL; +} + +PyObject * +PyUnicode_Encode(const Py_UNICODE *s, + Py_ssize_t size, + const char *encoding, + const char *errors) +{ + PyObject *v, *unicode; + + unicode = PyUnicode_FromUnicode(s, size); + if (unicode == NULL) + return NULL; + v = PyUnicode_AsEncodedString(unicode, encoding, errors); + Py_DECREF(unicode); + return v; +} + +PyObject * +PyUnicode_AsEncodedObject(PyObject *unicode, + const char *encoding, + const char *errors) +{ + PyObject *v; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + goto onError; + } + + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + + /* Encode via the codec registry */ + v = PyCodec_Encode(unicode, encoding, errors); + if (v == NULL) + goto onError; + return v; + + onError: + return NULL; +} + +PyObject * +PyUnicode_EncodeFSDefault(PyObject *unicode) +{ +#ifdef HAVE_MBCS + return PyUnicode_EncodeCodePage(CP_ACP, unicode, NULL); +#elif defined(__APPLE__) + return _PyUnicode_AsUTF8String(unicode, "surrogateescape"); +#else + PyInterpreterState *interp = PyThreadState_GET()->interp; + /* Bootstrap check: if the filesystem codec is implemented in Python, we + cannot use it to encode and decode filenames before it is loaded. Load + the Python codec requires to encode at least its own filename. Use the C + version of the locale codec until the codec registry is initialized and + the Python codec is loaded. + + Py_FileSystemDefaultEncoding is shared between all interpreters, we + cannot only rely on it: check also interp->fscodec_initialized for + subinterpreters. */ + if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) { + return PyUnicode_AsEncodedString(unicode, + Py_FileSystemDefaultEncoding, + "surrogateescape"); + } + else { + /* locale encoding with surrogateescape */ + wchar_t *wchar; + char *bytes; + PyObject *bytes_obj; + size_t error_pos; + + wchar = PyUnicode_AsWideCharString(unicode, NULL); + if (wchar == NULL) + return NULL; + bytes = _Py_wchar2char(wchar, &error_pos); + if (bytes == NULL) { + if (error_pos != (size_t)-1) { + char *errmsg = strerror(errno); + PyObject *exc = NULL; + if (errmsg == NULL) + errmsg = "Py_wchar2char() failed"; + raise_encode_exception(&exc, + "filesystemencoding", unicode, + error_pos, error_pos+1, + errmsg); + Py_XDECREF(exc); + } + else + PyErr_NoMemory(); + PyMem_Free(wchar); + return NULL; + } + PyMem_Free(wchar); + + bytes_obj = PyBytes_FromString(bytes); + PyMem_Free(bytes); + return bytes_obj; + } +#endif +} + +PyObject * +PyUnicode_AsEncodedString(PyObject *unicode, + const char *encoding, + const char *errors) +{ + PyObject *v; + char lower[11]; /* Enough for any encoding shortcut */ + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return NULL; + } + + /* Shortcuts for common default encodings */ + if (normalize_encoding(encoding, lower, sizeof(lower))) { + if ((strcmp(lower, "utf-8") == 0) || + (strcmp(lower, "utf8") == 0)) + { + if (errors == NULL || strcmp(errors, "strict") == 0) + return _PyUnicode_AsUTF8String(unicode, NULL); + else + return _PyUnicode_AsUTF8String(unicode, errors); + } + else if ((strcmp(lower, "latin-1") == 0) || + (strcmp(lower, "latin1") == 0) || + (strcmp(lower, "iso-8859-1") == 0)) + return _PyUnicode_AsLatin1String(unicode, errors); +#ifdef HAVE_MBCS + else if (strcmp(lower, "mbcs") == 0) + return PyUnicode_EncodeCodePage(CP_ACP, unicode, errors); +#endif + else if (strcmp(lower, "ascii") == 0) + return _PyUnicode_AsASCIIString(unicode, errors); + } + + /* Encode via the codec registry */ + v = PyCodec_Encode(unicode, encoding, errors); + if (v == NULL) + return NULL; + + /* The normal path */ + if (PyBytes_Check(v)) + return v; + + /* If the codec returns a buffer, raise a warning and convert to bytes */ + if (PyByteArray_Check(v)) { + int error; + PyObject *b; + + error = PyErr_WarnFormat(PyExc_RuntimeWarning, 1, + "encoder %s returned bytearray instead of bytes", + encoding); + if (error) { + Py_DECREF(v); + return NULL; + } + + b = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v)); + Py_DECREF(v); + return b; + } + + PyErr_Format(PyExc_TypeError, + "encoder did not return a bytes object (type=%.400s)", + Py_TYPE(v)->tp_name); + Py_DECREF(v); + return NULL; +} + +PyObject * +PyUnicode_AsEncodedUnicode(PyObject *unicode, + const char *encoding, + const char *errors) +{ + PyObject *v; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + goto onError; + } + + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + + /* Encode via the codec registry */ + v = PyCodec_Encode(unicode, encoding, errors); + if (v == NULL) + goto onError; + if (!PyUnicode_Check(v)) { + PyErr_Format(PyExc_TypeError, + "encoder did not return an str object (type=%.400s)", + Py_TYPE(v)->tp_name); + Py_DECREF(v); + goto onError; + } + return v; + + onError: + return NULL; +} + +PyObject* +PyUnicode_DecodeFSDefault(const char *s) { + Py_ssize_t size = (Py_ssize_t)strlen(s); + return PyUnicode_DecodeFSDefaultAndSize(s, size); +} + +PyObject* +PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) +{ +#ifdef HAVE_MBCS + return PyUnicode_DecodeMBCS(s, size, NULL); +#elif defined(__APPLE__) + return PyUnicode_DecodeUTF8(s, size, "surrogateescape"); +#else + PyInterpreterState *interp = PyThreadState_GET()->interp; + /* Bootstrap check: if the filesystem codec is implemented in Python, we + cannot use it to encode and decode filenames before it is loaded. Load + the Python codec requires to encode at least its own filename. Use the C + version of the locale codec until the codec registry is initialized and + the Python codec is loaded. + + Py_FileSystemDefaultEncoding is shared between all interpreters, we + cannot only rely on it: check also interp->fscodec_initialized for + subinterpreters. */ + if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) { + return PyUnicode_Decode(s, size, + Py_FileSystemDefaultEncoding, + "surrogateescape"); + } + else { + /* locale encoding with surrogateescape */ + wchar_t *wchar; + PyObject *unicode; + size_t len; + + if (s[size] != '\0' || size != strlen(s)) { + PyErr_SetString(PyExc_TypeError, "embedded NUL character"); + return NULL; + } + + wchar = _Py_char2wchar(s, &len); + if (wchar == NULL) + return PyErr_NoMemory(); + + unicode = PyUnicode_FromWideChar(wchar, len); + PyMem_Free(wchar); + return unicode; + } +#endif +} + + +int +PyUnicode_FSConverter(PyObject* arg, void* addr) +{ + PyObject *output = NULL; + Py_ssize_t size; + void *data; + if (arg == NULL) { + Py_DECREF(*(PyObject**)addr); + return 1; + } + if (PyBytes_Check(arg)) { + output = arg; + Py_INCREF(output); + } + else { + arg = PyUnicode_FromObject(arg); + if (!arg) + return 0; + output = PyUnicode_EncodeFSDefault(arg); + Py_DECREF(arg); + if (!output) + return 0; + if (!PyBytes_Check(output)) { + Py_DECREF(output); + PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes"); + return 0; + } + } + size = PyBytes_GET_SIZE(output); + data = PyBytes_AS_STRING(output); + if (size != strlen(data)) { + PyErr_SetString(PyExc_TypeError, "embedded NUL character"); + Py_DECREF(output); + return 0; + } + *(PyObject**)addr = output; + return Py_CLEANUP_SUPPORTED; +} + + +int +PyUnicode_FSDecoder(PyObject* arg, void* addr) +{ + PyObject *output = NULL; + if (arg == NULL) { + Py_DECREF(*(PyObject**)addr); + return 1; + } + if (PyUnicode_Check(arg)) { + if (PyUnicode_READY(arg)) + return 0; + output = arg; + Py_INCREF(output); + } + else { + arg = PyBytes_FromObject(arg); + if (!arg) + return 0; + output = PyUnicode_DecodeFSDefaultAndSize(PyBytes_AS_STRING(arg), + PyBytes_GET_SIZE(arg)); + Py_DECREF(arg); + if (!output) + return 0; + if (!PyUnicode_Check(output)) { + Py_DECREF(output); + PyErr_SetString(PyExc_TypeError, "decoder failed to return unicode"); + return 0; + } + } + if (PyUnicode_READY(output) < 0) { + Py_DECREF(output); + return 0; + } + if (findchar(PyUnicode_DATA(output), PyUnicode_KIND(output), + PyUnicode_GET_LENGTH(output), 0, 1) >= 0) { + PyErr_SetString(PyExc_TypeError, "embedded NUL character"); + Py_DECREF(output); + return 0; + } + *(PyObject**)addr = output; + return Py_CLEANUP_SUPPORTED; +} + + +char* +PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize) +{ + PyObject *bytes; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return NULL; + } + if (PyUnicode_READY(unicode) == -1) + return NULL; + + if (PyUnicode_UTF8(unicode) == NULL) { + assert(!PyUnicode_IS_COMPACT_ASCII(unicode)); + bytes = _PyUnicode_AsUTF8String(unicode, "strict"); + if (bytes == NULL) + return NULL; + _PyUnicode_UTF8(unicode) = PyObject_MALLOC(PyBytes_GET_SIZE(bytes) + 1); + if (_PyUnicode_UTF8(unicode) == NULL) { + Py_DECREF(bytes); + return NULL; + } + _PyUnicode_UTF8_LENGTH(unicode) = PyBytes_GET_SIZE(bytes); + Py_MEMCPY(_PyUnicode_UTF8(unicode), + PyBytes_AS_STRING(bytes), + _PyUnicode_UTF8_LENGTH(unicode) + 1); + Py_DECREF(bytes); + } + + if (psize) + *psize = PyUnicode_UTF8_LENGTH(unicode); + return PyUnicode_UTF8(unicode); +} + +char* +PyUnicode_AsUTF8(PyObject *unicode) +{ + return PyUnicode_AsUTF8AndSize(unicode, NULL); +} + +#ifdef Py_DEBUG +static int unicode_as_unicode_calls = 0; +#endif + + +Py_UNICODE * +PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) +{ + const unsigned char *one_byte; +#if SIZEOF_WCHAR_T == 4 + const Py_UCS2 *two_bytes; +#else + const Py_UCS4 *four_bytes; + const Py_UCS4 *ucs4_end; + Py_ssize_t num_surrogates; +#endif + wchar_t *w; + wchar_t *wchar_end; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return NULL; + } + if (_PyUnicode_WSTR(unicode) == NULL) { + /* Non-ASCII compact unicode object */ + assert(_PyUnicode_KIND(unicode) != 0); + assert(PyUnicode_IS_READY(unicode)); + +#ifdef Py_DEBUG + ++unicode_as_unicode_calls; +#endif + + if (PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND) { +#if SIZEOF_WCHAR_T == 2 + four_bytes = PyUnicode_4BYTE_DATA(unicode); + ucs4_end = four_bytes + _PyUnicode_LENGTH(unicode); + num_surrogates = 0; + + for (; four_bytes < ucs4_end; ++four_bytes) { + if (*four_bytes > 0xFFFF) + ++num_surrogates; + } + + _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC( + sizeof(wchar_t) * (_PyUnicode_LENGTH(unicode) + 1 + num_surrogates)); + if (!_PyUnicode_WSTR(unicode)) { + PyErr_NoMemory(); + return NULL; + } + _PyUnicode_WSTR_LENGTH(unicode) = _PyUnicode_LENGTH(unicode) + num_surrogates; + + w = _PyUnicode_WSTR(unicode); + wchar_end = w + _PyUnicode_WSTR_LENGTH(unicode); + four_bytes = PyUnicode_4BYTE_DATA(unicode); + for (; four_bytes < ucs4_end; ++four_bytes, ++w) { + if (*four_bytes > 0xFFFF) { + assert(*four_bytes <= MAX_UNICODE); + /* encode surrogate pair in this case */ + *w++ = Py_UNICODE_HIGH_SURROGATE(*four_bytes); + *w = Py_UNICODE_LOW_SURROGATE(*four_bytes); + } + else + *w = *four_bytes; + + if (w > wchar_end) { + assert(0 && "Miscalculated string end"); + } + } + *w = 0; +#else + /* sizeof(wchar_t) == 4 */ + Py_FatalError("Impossible unicode object state, wstr and str " + "should share memory already."); + return NULL; +#endif + } + else { + _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) * + (_PyUnicode_LENGTH(unicode) + 1)); + if (!_PyUnicode_WSTR(unicode)) { + PyErr_NoMemory(); + return NULL; + } + if (!PyUnicode_IS_COMPACT_ASCII(unicode)) + _PyUnicode_WSTR_LENGTH(unicode) = _PyUnicode_LENGTH(unicode); + w = _PyUnicode_WSTR(unicode); + wchar_end = w + _PyUnicode_LENGTH(unicode); + + if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) { + one_byte = PyUnicode_1BYTE_DATA(unicode); + for (; w < wchar_end; ++one_byte, ++w) + *w = *one_byte; + /* null-terminate the wstr */ + *w = 0; + } + else if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) { +#if SIZEOF_WCHAR_T == 4 + two_bytes = PyUnicode_2BYTE_DATA(unicode); + for (; w < wchar_end; ++two_bytes, ++w) + *w = *two_bytes; + /* null-terminate the wstr */ + *w = 0; +#else + /* sizeof(wchar_t) == 2 */ + PyObject_FREE(_PyUnicode_WSTR(unicode)); + _PyUnicode_WSTR(unicode) = NULL; + Py_FatalError("Impossible unicode object state, wstr " + "and str should share memory already."); + return NULL; +#endif + } + else { + assert(0 && "This should never happen."); + } + } + } + if (size != NULL) + *size = PyUnicode_WSTR_LENGTH(unicode); + return _PyUnicode_WSTR(unicode); +} + +Py_UNICODE * +PyUnicode_AsUnicode(PyObject *unicode) +{ + return PyUnicode_AsUnicodeAndSize(unicode, NULL); +} + + +Py_ssize_t +PyUnicode_GetSize(PyObject *unicode) +{ + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + goto onError; + } + return PyUnicode_GET_SIZE(unicode); + + onError: + return -1; +} + +Py_ssize_t +PyUnicode_GetLength(PyObject *unicode) +{ + if (!PyUnicode_Check(unicode) || PyUnicode_READY(unicode) == -1) { + PyErr_BadArgument(); + return -1; + } + + return PyUnicode_GET_LENGTH(unicode); +} + +Py_UCS4 +PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index) +{ + if (!PyUnicode_Check(unicode) || PyUnicode_READY(unicode) == -1) { + PyErr_BadArgument(); + return (Py_UCS4)-1; + } + if (index < 0 || index >= _PyUnicode_LENGTH(unicode)) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); + return (Py_UCS4)-1; + } + return PyUnicode_READ_CHAR(unicode, index); +} + +int +PyUnicode_WriteChar(PyObject *unicode, Py_ssize_t index, Py_UCS4 ch) +{ + if (!PyUnicode_Check(unicode) || !PyUnicode_IS_COMPACT(unicode)) { + PyErr_BadArgument(); + return -1; + } + if (index < 0 || index >= _PyUnicode_LENGTH(unicode)) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); + return -1; + } + if (_PyUnicode_Dirty(unicode)) + return -1; + PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode), + index, ch); + return 0; +} + +const char * +PyUnicode_GetDefaultEncoding(void) +{ + return "utf-8"; +} + +/* create or adjust a UnicodeDecodeError */ +static void +make_decode_exception(PyObject **exceptionObject, + const char *encoding, + const char *input, Py_ssize_t length, + Py_ssize_t startpos, Py_ssize_t endpos, + const char *reason) +{ + if (*exceptionObject == NULL) { + *exceptionObject = PyUnicodeDecodeError_Create( + encoding, input, length, startpos, endpos, reason); + } + else { + if (PyUnicodeDecodeError_SetStart(*exceptionObject, startpos)) + goto onError; + if (PyUnicodeDecodeError_SetEnd(*exceptionObject, endpos)) + goto onError; + if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason)) + goto onError; + } + return; + +onError: + Py_DECREF(*exceptionObject); + *exceptionObject = NULL; +} + +/* error handling callback helper: + build arguments, call the callback and check the arguments, + if no exception occurred, copy the replacement to the output + and adjust various state variables. + return 0 on success, -1 on error +*/ + +static int +unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, + const char *encoding, const char *reason, + const char **input, const char **inend, Py_ssize_t *startinpos, + Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr, + PyObject **output, Py_ssize_t *outpos) +{ + static char *argparse = "O!n;decoding error handler must return (str, int) tuple"; + + PyObject *restuple = NULL; + PyObject *repunicode = NULL; + Py_ssize_t outsize; + Py_ssize_t insize; + Py_ssize_t requiredsize; + Py_ssize_t newpos; + PyObject *inputobj = NULL; + int res = -1; + + if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) + outsize = PyUnicode_GET_LENGTH(*output); + else + outsize = _PyUnicode_WSTR_LENGTH(*output); + + if (*errorHandler == NULL) { + *errorHandler = PyCodec_LookupError(errors); + if (*errorHandler == NULL) + goto onError; + } + + make_decode_exception(exceptionObject, + encoding, + *input, *inend - *input, + *startinpos, *endinpos, + reason); + if (*exceptionObject == NULL) + goto onError; + + restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL); + if (restuple == NULL) + goto onError; + if (!PyTuple_Check(restuple)) { + PyErr_SetString(PyExc_TypeError, &argparse[4]); + goto onError; + } + if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos)) + goto onError; + if (PyUnicode_READY(repunicode) < 0) + goto onError; + + /* Copy back the bytes variables, which might have been modified by the + callback */ + inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject); + if (!inputobj) + goto onError; + if (!PyBytes_Check(inputobj)) { + PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes"); + } + *input = PyBytes_AS_STRING(inputobj); + insize = PyBytes_GET_SIZE(inputobj); + *inend = *input + insize; + /* we can DECREF safely, as the exception has another reference, + so the object won't go away. */ + Py_DECREF(inputobj); + + if (newpos<0) + newpos = insize+newpos; + if (newpos<0 || newpos>insize) { + PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos); + goto onError; + } + + if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) { + /* need more space? (at least enough for what we + have+the replacement+the rest of the string (starting + at the new input position), so we won't have to check space + when there are no errors in the rest of the string) */ + Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode); + requiredsize = *outpos + replen + insize-newpos; + if (requiredsize > outsize) { + if (requiredsize<2*outsize) + requiredsize = 2*outsize; + if (unicode_resize(output, requiredsize) < 0) + goto onError; + } + if (unicode_widen(output, PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0) + goto onError; + copy_characters(*output, *outpos, repunicode, 0, replen); + *outpos += replen; + } + else { + wchar_t *repwstr; + Py_ssize_t repwlen; + repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen); + if (repwstr == NULL) + goto onError; + /* need more space? (at least enough for what we + have+the replacement+the rest of the string (starting + at the new input position), so we won't have to check space + when there are no errors in the rest of the string) */ + requiredsize = *outpos + repwlen + insize-newpos; + if (requiredsize > outsize) { + if (requiredsize < 2*outsize) + requiredsize = 2*outsize; + if (unicode_resize(output, requiredsize) < 0) + goto onError; + } + wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen); + *outpos += repwlen; + } + *endinpos = newpos; + *inptr = *input + newpos; + + /* we made it! */ + res = 0; + + onError: + Py_XDECREF(restuple); + return res; +} + +/* --- UTF-7 Codec -------------------------------------------------------- */ + +/* See RFC2152 for details. We encode conservatively and decode liberally. */ + +/* Three simple macros defining base-64. */ + +/* Is c a base-64 character? */ + +#define IS_BASE64(c) \ + (((c) >= 'A' && (c) <= 'Z') || \ + ((c) >= 'a' && (c) <= 'z') || \ + ((c) >= '0' && (c) <= '9') || \ + (c) == '+' || (c) == '/') + +/* given that c is a base-64 character, what is its base-64 value? */ + +#define FROM_BASE64(c) \ + (((c) >= 'A' && (c) <= 'Z') ? (c) - 'A' : \ + ((c) >= 'a' && (c) <= 'z') ? (c) - 'a' + 26 : \ + ((c) >= '0' && (c) <= '9') ? (c) - '0' + 52 : \ + (c) == '+' ? 62 : 63) + +/* What is the base-64 character of the bottom 6 bits of n? */ + +#define TO_BASE64(n) \ + ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f]) + +/* DECODE_DIRECT: this byte encountered in a UTF-7 string should be + * decoded as itself. We are permissive on decoding; the only ASCII + * byte not decoding to itself is the + which begins a base64 + * string. */ + +#define DECODE_DIRECT(c) \ + ((c) <= 127 && (c) != '+') + +/* The UTF-7 encoder treats ASCII characters differently according to + * whether they are Set D, Set O, Whitespace, or special (i.e. none of + * the above). See RFC2152. This array identifies these different + * sets: + * 0 : "Set D" + * alphanumeric and '(),-./:? + * 1 : "Set O" + * !"#$%&*;<=>@[]^_`{|} + * 2 : "whitespace" + * ht nl cr sp + * 3 : special (must be base64 encoded) + * everything else (i.e. +\~ and non-printing codes 0-8 11-12 14-31 127) + */ + +static +char utf7_category[128] = { +/* nul soh stx etx eot enq ack bel bs ht nl vt np cr so si */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, +/* dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +/* sp ! " # $ % & ' ( ) * + , - . / */ + 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 0, +/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, +/* @ A B C D E F G H I J K L M N O */ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* P Q R S T U V W X Y Z [ \ ] ^ _ */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1, +/* ` a b c d e f g h i j k l m n o */ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* p q r s t u v w x y z { | } ~ del */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3, +}; + +/* ENCODE_DIRECT: this character should be encoded as itself. The + * answer depends on whether we are encoding set O as itself, and also + * on whether we are encoding whitespace as itself. RFC2152 makes it + * clear that the answers to these questions vary between + * applications, so this code needs to be flexible. */ + +#define ENCODE_DIRECT(c, directO, directWS) \ + ((c) < 128 && (c) > 0 && \ + ((utf7_category[(c)] == 0) || \ + (directWS && (utf7_category[(c)] == 2)) || \ + (directO && (utf7_category[(c)] == 1)))) + +PyObject * +PyUnicode_DecodeUTF7(const char *s, + Py_ssize_t size, + const char *errors) +{ + return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL); +} + +/* The decoder. The only state we preserve is our read position, + * i.e. how many characters we have consumed. So if we end in the + * middle of a shift sequence we have to back off the read position + * and the output to the beginning of the sequence, otherwise we lose + * all the shift state (seen bits, number of bits seen, high + * surrogate). */ + +PyObject * +PyUnicode_DecodeUTF7Stateful(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) +{ + const char *starts = s; + Py_ssize_t startinpos; + Py_ssize_t endinpos; + Py_ssize_t outpos; + const char *e; + PyObject *unicode; + const char *errmsg = ""; + int inShift = 0; + Py_ssize_t shiftOutStart; + unsigned int base64bits = 0; + unsigned long base64buffer = 0; + Py_UCS4 surrogate = 0; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + + /* Start off assuming it's all ASCII. Widen later as necessary. */ + unicode = PyUnicode_New(size, 127); + if (!unicode) + return NULL; + if (size == 0) { + if (consumed) + *consumed = 0; + return unicode; + } + + shiftOutStart = outpos = 0; + e = s + size; + + while (s < e) { + Py_UCS4 ch; + restart: + ch = (unsigned char) *s; + + if (inShift) { /* in a base-64 section */ + if (IS_BASE64(ch)) { /* consume a base-64 character */ + base64buffer = (base64buffer << 6) | FROM_BASE64(ch); + base64bits += 6; + s++; + if (base64bits >= 16) { + /* we have enough bits for a UTF-16 value */ + Py_UCS4 outCh = (Py_UCS4)(base64buffer >> (base64bits-16)); + base64bits -= 16; + base64buffer &= (1 << base64bits) - 1; /* clear high bits */ + if (surrogate) { + /* expecting a second surrogate */ + if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) { + Py_UCS4 ch2 = Py_UNICODE_JOIN_SURROGATES(surrogate, outCh); + if (unicode_putchar(&unicode, &outpos, ch2) < 0) + goto onError; + surrogate = 0; + continue; + } + else { + if (unicode_putchar(&unicode, &outpos, surrogate) < 0) + goto onError; + surrogate = 0; + } + } + if (Py_UNICODE_IS_HIGH_SURROGATE(outCh)) { + /* first surrogate */ + surrogate = outCh; + } + else { + if (unicode_putchar(&unicode, &outpos, outCh) < 0) + goto onError; + } + } + } + else { /* now leaving a base-64 section */ + inShift = 0; + s++; + if (surrogate) { + if (unicode_putchar(&unicode, &outpos, surrogate) < 0) + goto onError; + surrogate = 0; + } + if (base64bits > 0) { /* left-over bits */ + if (base64bits >= 6) { + /* We've seen at least one base-64 character */ + errmsg = "partial character in shift sequence"; + goto utf7Error; + } + else { + /* Some bits remain; they should be zero */ + if (base64buffer != 0) { + errmsg = "non-zero padding bits in shift sequence"; + goto utf7Error; + } + } + } + if (ch != '-') { + /* '-' is absorbed; other terminating + characters are preserved */ + if (unicode_putchar(&unicode, &outpos, ch) < 0) + goto onError; + } + } + } + else if ( ch == '+' ) { + startinpos = s-starts; + s++; /* consume '+' */ + if (s < e && *s == '-') { /* '+-' encodes '+' */ + s++; + if (unicode_putchar(&unicode, &outpos, '+') < 0) + goto onError; + } + else { /* begin base64-encoded section */ + inShift = 1; + shiftOutStart = outpos; + base64bits = 0; + } + } + else if (DECODE_DIRECT(ch)) { /* character decodes as itself */ + if (unicode_putchar(&unicode, &outpos, ch) < 0) + goto onError; + s++; + } + else { + startinpos = s-starts; + s++; + errmsg = "unexpected special character"; + goto utf7Error; + } + continue; +utf7Error: + endinpos = s-starts; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "utf7", errmsg, + &starts, &e, &startinpos, &endinpos, &exc, &s, + &unicode, &outpos)) + goto onError; + } + + /* end of string */ + + if (inShift && !consumed) { /* in shift sequence, no more to follow */ + /* if we're in an inconsistent state, that's an error */ + if (surrogate || + (base64bits >= 6) || + (base64bits > 0 && base64buffer != 0)) { + endinpos = size; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "utf7", "unterminated shift sequence", + &starts, &e, &startinpos, &endinpos, &exc, &s, + &unicode, &outpos)) + goto onError; + if (s < e) + goto restart; + } + } + + /* return state */ + if (consumed) { + if (inShift) { + outpos = shiftOutStart; /* back off output */ + *consumed = startinpos; + } + else { + *consumed = s-starts; + } + } + + if (unicode_resize(&unicode, outpos) < 0) + goto onError; + + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return unicode_result(unicode); + + onError: + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + Py_DECREF(unicode); + return NULL; +} + + +PyObject * +_PyUnicode_EncodeUTF7(PyObject *str, + int base64SetO, + int base64WhiteSpace, + const char *errors) +{ + int kind; + void *data; + Py_ssize_t len; + PyObject *v; + Py_ssize_t allocated; + int inShift = 0; + Py_ssize_t i; + unsigned int base64bits = 0; + unsigned long base64buffer = 0; + char * out; + char * start; + + if (PyUnicode_READY(str) < 0) + return NULL; + kind = PyUnicode_KIND(str); + data = PyUnicode_DATA(str); + len = PyUnicode_GET_LENGTH(str); + + if (len == 0) + return PyBytes_FromStringAndSize(NULL, 0); + + /* It might be possible to tighten this worst case */ + allocated = 8 * len; + if (allocated / 8 != len) + return PyErr_NoMemory(); + + v = PyBytes_FromStringAndSize(NULL, allocated); + if (v == NULL) + return NULL; + + start = out = PyBytes_AS_STRING(v); + for (i = 0; i < len; ++i) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + + if (inShift) { + if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) { + /* shifting out */ + if (base64bits) { /* output remaining bits */ + *out++ = TO_BASE64(base64buffer << (6-base64bits)); + base64buffer = 0; + base64bits = 0; + } + inShift = 0; + /* Characters not in the BASE64 set implicitly unshift the sequence + so no '-' is required, except if the character is itself a '-' */ + if (IS_BASE64(ch) || ch == '-') { + *out++ = '-'; + } + *out++ = (char) ch; + } + else { + goto encode_char; + } + } + else { /* not in a shift sequence */ + if (ch == '+') { + *out++ = '+'; + *out++ = '-'; + } + else if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) { + *out++ = (char) ch; + } + else { + *out++ = '+'; + inShift = 1; + goto encode_char; + } + } + continue; +encode_char: + if (ch >= 0x10000) { + assert(ch <= MAX_UNICODE); + + /* code first surrogate */ + base64bits += 16; + base64buffer = (base64buffer << 16) | 0xd800 | ((ch-0x10000) >> 10); + while (base64bits >= 6) { + *out++ = TO_BASE64(base64buffer >> (base64bits-6)); + base64bits -= 6; + } + /* prepare second surrogate */ + ch = Py_UNICODE_LOW_SURROGATE(ch); + } + base64bits += 16; + base64buffer = (base64buffer << 16) | ch; + while (base64bits >= 6) { + *out++ = TO_BASE64(base64buffer >> (base64bits-6)); + base64bits -= 6; + } + } + if (base64bits) + *out++= TO_BASE64(base64buffer << (6-base64bits) ); + if (inShift) + *out++ = '-'; + if (_PyBytes_Resize(&v, out - start) < 0) + return NULL; + return v; +} +PyObject * +PyUnicode_EncodeUTF7(const Py_UNICODE *s, + Py_ssize_t size, + int base64SetO, + int base64WhiteSpace, + const char *errors) +{ + PyObject *result; + PyObject *tmp = PyUnicode_FromUnicode(s, size); + if (tmp == NULL) + return NULL; + result = _PyUnicode_EncodeUTF7(tmp, base64SetO, + base64WhiteSpace, errors); + Py_DECREF(tmp); + return result; +} + +#undef IS_BASE64 +#undef FROM_BASE64 +#undef TO_BASE64 +#undef DECODE_DIRECT +#undef ENCODE_DIRECT + +/* --- UTF-8 Codec -------------------------------------------------------- */ + +static +char utf8_code_length[256] = { + /* Map UTF-8 encoded prefix byte to sequence length. Zero means + illegal prefix. See RFC 3629 for details */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00-0F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 70-7F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80-8F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0-BF */ + 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* C0-C1 + C2-CF */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* D0-DF */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* E0-EF */ + 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F0-F4 + F5-FF */ +}; + +PyObject * +PyUnicode_DecodeUTF8(const char *s, + Py_ssize_t size, + const char *errors) +{ + return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL); +} + +#include "stringlib/ucs1lib.h" +#include "stringlib/codecs.h" +#include "stringlib/undef.h" + +#include "stringlib/ucs2lib.h" +#include "stringlib/codecs.h" +#include "stringlib/undef.h" + +#include "stringlib/ucs4lib.h" +#include "stringlib/codecs.h" +#include "stringlib/undef.h" + +/* Mask to check or force alignment of a pointer to C 'long' boundaries */ +#define LONG_PTR_MASK (size_t) (SIZEOF_LONG - 1) + +/* Mask to quickly check whether a C 'long' contains a + non-ASCII, UTF8-encoded char. */ +#if (SIZEOF_LONG == 8) +# define ASCII_CHAR_MASK 0x8080808080808080L +#elif (SIZEOF_LONG == 4) +# define ASCII_CHAR_MASK 0x80808080L +#else +# error C 'long' size should be either 4 or 8! +#endif + +/* Scans a UTF-8 string and returns the maximum character to be expected + and the size of the decoded unicode string. + + This function doesn't check for errors, these checks are performed in + PyUnicode_DecodeUTF8Stateful. + */ +static Py_UCS4 +utf8_max_char_size_and_char_count(const char *s, Py_ssize_t string_size, + Py_ssize_t *unicode_size) +{ + Py_ssize_t char_count = 0; + const unsigned char *p = (const unsigned char *)s; + const unsigned char *end = p + string_size; + const unsigned char *aligned_end = (const unsigned char *) ((size_t) end & ~LONG_PTR_MASK); + + assert(unicode_size != NULL); + + /* By having a cascade of independent loops which fallback onto each + other, we minimize the amount of work done in the average loop + iteration, and we also maximize the CPU's ability to predict + branches correctly (because a given condition will have always the + same boolean outcome except perhaps in the last iteration of the + corresponding loop). + In the general case this brings us rather close to decoding + performance pre-PEP 393, despite the two-pass decoding. + + Note that the pure ASCII loop is not duplicated once a non-ASCII + character has been encountered. It is actually a pessimization (by + a significant factor) to use this loop on text with many non-ASCII + characters, and it is important to avoid bad performance on valid + utf-8 data (invalid utf-8 being a different can of worms). + */ + + /* ASCII */ + for (; p < end; ++p) { + /* Only check value if it's not a ASCII char... */ + if (*p < 0x80) { + /* Fast path, see below in PyUnicode_DecodeUTF8Stateful for + an explanation. */ + if (!((size_t) p & LONG_PTR_MASK)) { + /* Help register allocation */ + register const unsigned char *_p = p; + while (_p < aligned_end) { + unsigned long value = *(unsigned long *) _p; + if (value & ASCII_CHAR_MASK) + break; + _p += SIZEOF_LONG; + char_count += SIZEOF_LONG; + } + p = _p; + if (p == end) + break; + } + } + if (*p < 0x80) + ++char_count; + else + goto _ucs1loop; + } + *unicode_size = char_count; + return 127; + +_ucs1loop: + for (; p < end; ++p) { + if (*p < 0xc4) + char_count += ((*p & 0xc0) != 0x80); + else + goto _ucs2loop; + } + *unicode_size = char_count; + return 255; + +_ucs2loop: + for (; p < end; ++p) { + if (*p < 0xf0) + char_count += ((*p & 0xc0) != 0x80); + else + goto _ucs4loop; + } + *unicode_size = char_count; + return 65535; + +_ucs4loop: + for (; p < end; ++p) { + char_count += ((*p & 0xc0) != 0x80); + } + *unicode_size = char_count; + return 65537; +} + +/* Called when we encountered some error that wasn't detected in the original + scan, e.g. an encoded surrogate character. The original maxchar computation + may have been incorrect, so redo it. */ +static int +refit_partial_string(PyObject **unicode, int kind, void *data, Py_ssize_t n) +{ + PyObject *tmp; + Py_ssize_t k; + Py_UCS4 maxchar; + for (k = 0, maxchar = 0; k < n; k++) + maxchar = Py_MAX(maxchar, PyUnicode_READ(kind, data, k)); + tmp = PyUnicode_New(PyUnicode_GET_LENGTH(*unicode), maxchar); + if (tmp == NULL) + return -1; + PyUnicode_CopyCharacters(tmp, 0, *unicode, 0, n); + Py_DECREF(*unicode); + *unicode = tmp; + return 0; +} + +/* Similar to PyUnicode_WRITE but may attempt to widen and resize the string + in case of errors. Implicit parameters: unicode, kind, data, has_errors, + onError. Potential resizing overallocates, so the result needs to shrink + at the end. +*/ +#define WRITE_MAYBE_FAIL(index, value) \ + do { \ + if (has_errors) { \ + Py_ssize_t pos = index; \ + if (pos > PyUnicode_GET_LENGTH(unicode) && \ + unicode_resize(&unicode, pos + pos/8) < 0) \ + goto onError; \ + if (unicode_putchar(&unicode, &pos, value) < 0) \ + goto onError; \ + } \ + else \ + PyUnicode_WRITE(kind, data, index, value); \ + } while (0) + +PyObject * +PyUnicode_DecodeUTF8Stateful(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) +{ + const char *starts = s; + int n; + int k; + Py_ssize_t startinpos; + Py_ssize_t endinpos; + const char *e, *aligned_end; + PyObject *unicode; + const char *errmsg = ""; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + Py_UCS4 maxchar = 0; + Py_ssize_t unicode_size; + Py_ssize_t i; + int kind; + void *data; + int has_errors = 0; + + if (size == 0) { + if (consumed) + *consumed = 0; + return (PyObject *)PyUnicode_New(0, 0); + } + maxchar = utf8_max_char_size_and_char_count(s, size, &unicode_size); + /* When the string is ASCII only, just use memcpy and return. + unicode_size may be != size if there is an incomplete UTF-8 + sequence at the end of the ASCII block. */ + if (maxchar < 128 && size == unicode_size) { + if (consumed) + *consumed = size; + + if (size == 1) + return get_latin1_char((unsigned char)s[0]); + + unicode = PyUnicode_New(unicode_size, maxchar); + if (!unicode) + return NULL; + Py_MEMCPY(PyUnicode_1BYTE_DATA(unicode), s, unicode_size); + assert(_PyUnicode_CheckConsistency(unicode, 1)); + return unicode; + } + + /* In case of errors, maxchar and size computation might be incorrect; + code below refits and resizes as necessary. */ + unicode = PyUnicode_New(unicode_size, maxchar); + if (!unicode) + return NULL; + kind = PyUnicode_KIND(unicode); + data = PyUnicode_DATA(unicode); + + /* Unpack UTF-8 encoded data */ + i = 0; + e = s + size; + switch (kind) { + case PyUnicode_1BYTE_KIND: + has_errors = ucs1lib_utf8_try_decode(s, e, (Py_UCS1 *) data, &s, &i); + break; + case PyUnicode_2BYTE_KIND: + has_errors = ucs2lib_utf8_try_decode(s, e, (Py_UCS2 *) data, &s, &i); + break; + case PyUnicode_4BYTE_KIND: + has_errors = ucs4lib_utf8_try_decode(s, e, (Py_UCS4 *) data, &s, &i); + break; + } + if (!has_errors) { + /* Ensure the unicode size calculation was correct */ + assert(i == unicode_size); + assert(s == e); + if (consumed) + *consumed = s-starts; + return unicode; + } + /* Fall through to the generic decoding loop for the rest of + the string */ + if (refit_partial_string(&unicode, kind, data, i) < 0) + goto onError; + + aligned_end = (const char *) ((size_t) e & ~LONG_PTR_MASK); + + while (s < e) { + Py_UCS4 ch = (unsigned char)*s; + + if (ch < 0x80) { + /* Fast path for runs of ASCII characters. Given that common UTF-8 + input will consist of an overwhelming majority of ASCII + characters, we try to optimize for this case by checking + as many characters as a C 'long' can contain. + First, check if we can do an aligned read, as most CPUs have + a penalty for unaligned reads. + */ + if (!((size_t) s & LONG_PTR_MASK)) { + /* Help register allocation */ + register const char *_s = s; + register Py_ssize_t _i = i; + while (_s < aligned_end) { + /* Read a whole long at a time (either 4 or 8 bytes), + and do a fast unrolled copy if it only contains ASCII + characters. */ + unsigned long value = *(unsigned long *) _s; + if (value & ASCII_CHAR_MASK) + break; + WRITE_MAYBE_FAIL(_i+0, _s[0]); + WRITE_MAYBE_FAIL(_i+1, _s[1]); + WRITE_MAYBE_FAIL(_i+2, _s[2]); + WRITE_MAYBE_FAIL(_i+3, _s[3]); +#if (SIZEOF_LONG == 8) + WRITE_MAYBE_FAIL(_i+4, _s[4]); + WRITE_MAYBE_FAIL(_i+5, _s[5]); + WRITE_MAYBE_FAIL(_i+6, _s[6]); + WRITE_MAYBE_FAIL(_i+7, _s[7]); +#endif + _s += SIZEOF_LONG; + _i += SIZEOF_LONG; + } + s = _s; + i = _i; + if (s == e) + break; + ch = (unsigned char)*s; + } + } + + if (ch < 0x80) { + WRITE_MAYBE_FAIL(i++, ch); + s++; + continue; + } + + n = utf8_code_length[ch]; + + if (s + n > e) { + if (consumed) + break; + else { + errmsg = "unexpected end of data"; + startinpos = s-starts; + endinpos = startinpos+1; + for (k=1; (k < size-startinpos) && ((s[k]&0xC0) == 0x80); k++) + endinpos++; + goto utf8Error; + } + } + + switch (n) { + + case 0: + errmsg = "invalid start byte"; + startinpos = s-starts; + endinpos = startinpos+1; + goto utf8Error; + + case 1: + errmsg = "internal error"; + startinpos = s-starts; + endinpos = startinpos+1; + goto utf8Error; + + case 2: + if ((s[1] & 0xc0) != 0x80) { + errmsg = "invalid continuation byte"; + startinpos = s-starts; + endinpos = startinpos + 1; + goto utf8Error; + } + ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f); + assert ((ch > 0x007F) && (ch <= 0x07FF)); + WRITE_MAYBE_FAIL(i++, ch); + break; + + case 3: + /* Decoding UTF-8 sequences in range \xed\xa0\x80-\xed\xbf\xbf + will result in surrogates in range d800-dfff. Surrogates are + not valid UTF-8 so they are rejected. + See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf + (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */ + if ((s[1] & 0xc0) != 0x80 || + (s[2] & 0xc0) != 0x80 || + ((unsigned char)s[0] == 0xE0 && + (unsigned char)s[1] < 0xA0) || + ((unsigned char)s[0] == 0xED && + (unsigned char)s[1] > 0x9F)) { + errmsg = "invalid continuation byte"; + startinpos = s-starts; + endinpos = startinpos + 1; + + /* if s[1] first two bits are 1 and 0, then the invalid + continuation byte is s[2], so increment endinpos by 1, + if not, s[1] is invalid and endinpos doesn't need to + be incremented. */ + if ((s[1] & 0xC0) == 0x80) + endinpos++; + goto utf8Error; + } + ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f); + assert ((ch > 0x07FF) && (ch <= 0xFFFF)); + WRITE_MAYBE_FAIL(i++, ch); + break; + + case 4: + if ((s[1] & 0xc0) != 0x80 || + (s[2] & 0xc0) != 0x80 || + (s[3] & 0xc0) != 0x80 || + ((unsigned char)s[0] == 0xF0 && + (unsigned char)s[1] < 0x90) || + ((unsigned char)s[0] == 0xF4 && + (unsigned char)s[1] > 0x8F)) { + errmsg = "invalid continuation byte"; + startinpos = s-starts; + endinpos = startinpos + 1; + if ((s[1] & 0xC0) == 0x80) { + endinpos++; + if ((s[2] & 0xC0) == 0x80) + endinpos++; + } + goto utf8Error; + } + ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) + + ((s[2] & 0x3f) << 6) + (s[3] & 0x3f); + assert ((ch > 0xFFFF) && (ch <= MAX_UNICODE)); + + WRITE_MAYBE_FAIL(i++, ch); + break; + } + s += n; + continue; + + utf8Error: + if (!has_errors) { + if (refit_partial_string(&unicode, kind, data, i) < 0) + goto onError; + has_errors = 1; + } + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "utf8", errmsg, + &starts, &e, &startinpos, &endinpos, &exc, &s, + &unicode, &i)) + goto onError; + /* Update data because unicode_decode_call_errorhandler might have + re-created or resized the unicode object. */ + data = PyUnicode_DATA(unicode); + kind = PyUnicode_KIND(unicode); + aligned_end = (const char *) ((size_t) e & ~LONG_PTR_MASK); + } + /* Ensure the unicode_size calculation above was correct: */ + assert(has_errors || i == unicode_size); + + if (consumed) + *consumed = s-starts; + + /* Adjust length and ready string when it contained errors and + is of the old resizable kind. */ + if (has_errors) { + if (PyUnicode_Resize(&unicode, i) < 0) + goto onError; + } + + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + assert(_PyUnicode_CheckConsistency(unicode, 1)); + return unicode; + + onError: + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + Py_DECREF(unicode); + return NULL; +} + +#undef WRITE_MAYBE_FAIL + +#ifdef __APPLE__ + +/* Simplified UTF-8 decoder using surrogateescape error handler, + used to decode the command line arguments on Mac OS X. */ + +wchar_t* +_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size) +{ + int n; + const char *e; + wchar_t *unicode, *p; + + /* Note: size will always be longer than the resulting Unicode + character count */ + if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1)) { + PyErr_NoMemory(); + return NULL; + } + unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t)); + if (!unicode) + return NULL; + + /* Unpack UTF-8 encoded data */ + p = unicode; + e = s + size; + while (s < e) { + Py_UCS4 ch = (unsigned char)*s; + + if (ch < 0x80) { + *p++ = (wchar_t)ch; + s++; + continue; + } + + n = utf8_code_length[ch]; + if (s + n > e) { + goto surrogateescape; + } + + switch (n) { + case 0: + case 1: + goto surrogateescape; + + case 2: + if ((s[1] & 0xc0) != 0x80) + goto surrogateescape; + ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f); + assert ((ch > 0x007F) && (ch <= 0x07FF)); + *p++ = (wchar_t)ch; + break; + + case 3: + /* Decoding UTF-8 sequences in range \xed\xa0\x80-\xed\xbf\xbf + will result in surrogates in range d800-dfff. Surrogates are + not valid UTF-8 so they are rejected. + See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf + (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */ + if ((s[1] & 0xc0) != 0x80 || + (s[2] & 0xc0) != 0x80 || + ((unsigned char)s[0] == 0xE0 && + (unsigned char)s[1] < 0xA0) || + ((unsigned char)s[0] == 0xED && + (unsigned char)s[1] > 0x9F)) { + + goto surrogateescape; + } + ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f); + assert ((ch > 0x07FF) && (ch <= 0xFFFF)); + *p++ = (wchar_t)ch; + break; + + case 4: + if ((s[1] & 0xc0) != 0x80 || + (s[2] & 0xc0) != 0x80 || + (s[3] & 0xc0) != 0x80 || + ((unsigned char)s[0] == 0xF0 && + (unsigned char)s[1] < 0x90) || + ((unsigned char)s[0] == 0xF4 && + (unsigned char)s[1] > 0x8F)) { + goto surrogateescape; + } + ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) + + ((s[2] & 0x3f) << 6) + (s[3] & 0x3f); + assert ((ch > 0xFFFF) && (ch <= MAX_UNICODE)); + +#if SIZEOF_WCHAR_T == 4 + *p++ = (wchar_t)ch; +#else + /* compute and append the two surrogates: */ + *p++ = (wchar_t)Py_UNICODE_HIGH_SURROGATE(ch); + *p++ = (wchar_t)Py_UNICODE_LOW_SURROGATE(ch); +#endif + break; + } + s += n; + continue; + + surrogateescape: + *p++ = 0xDC00 + ch; + s++; + } + *p = L'\0'; + return unicode; +} + +#endif /* __APPLE__ */ + +/* Primary internal function which creates utf8 encoded bytes objects. + + Allocation strategy: if the string is short, convert into a stack buffer + and allocate exactly as much space needed at the end. Else allocate the + maximum possible needed (4 result bytes per Unicode character), and return + the excess memory at the end. +*/ +PyObject * +_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors) +{ +#define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */ + + Py_ssize_t i; /* index into s of next input byte */ + PyObject *result; /* result string object */ + char *p; /* next free byte in output buffer */ + Py_ssize_t nallocated; /* number of result bytes allocated */ + Py_ssize_t nneeded; /* number of result bytes needed */ + char stackbuf[MAX_SHORT_UNICHARS * 4]; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + int kind; + void *data; + Py_ssize_t size; + PyObject *rep = NULL; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return NULL; + } + + if (PyUnicode_READY(unicode) == -1) + return NULL; + + if (PyUnicode_UTF8(unicode)) + return PyBytes_FromStringAndSize(PyUnicode_UTF8(unicode), + PyUnicode_UTF8_LENGTH(unicode)); + + kind = PyUnicode_KIND(unicode); + data = PyUnicode_DATA(unicode); + size = PyUnicode_GET_LENGTH(unicode); + + assert(size >= 0); + + if (size <= MAX_SHORT_UNICHARS) { + /* Write into the stack buffer; nallocated can't overflow. + * At the end, we'll allocate exactly as much heap space as it + * turns out we need. + */ + nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int); + result = NULL; /* will allocate after we're done */ + p = stackbuf; + } + else { + /* Overallocate on the heap, and give the excess back at the end. */ + nallocated = size * 4; + if (nallocated / 4 != size) /* overflow! */ + return PyErr_NoMemory(); + result = PyBytes_FromStringAndSize(NULL, nallocated); + if (result == NULL) + return NULL; + p = PyBytes_AS_STRING(result); + } + + for (i = 0; i < size;) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i++); + + if (ch < 0x80) + /* Encode ASCII */ + *p++ = (char) ch; + + else if (ch < 0x0800) { + /* Encode Latin-1 */ + *p++ = (char)(0xc0 | (ch >> 6)); + *p++ = (char)(0x80 | (ch & 0x3f)); + } else if (Py_UNICODE_IS_SURROGATE(ch)) { + Py_ssize_t newpos; + Py_ssize_t repsize, k, startpos; + startpos = i-1; + rep = unicode_encode_call_errorhandler( + errors, &errorHandler, "utf-8", "surrogates not allowed", + unicode, &exc, startpos, startpos+1, &newpos); + if (!rep) + goto error; + + if (PyBytes_Check(rep)) + repsize = PyBytes_GET_SIZE(rep); + else + repsize = PyUnicode_GET_LENGTH(rep); + + if (repsize > 4) { + Py_ssize_t offset; + + if (result == NULL) + offset = p - stackbuf; + else + offset = p - PyBytes_AS_STRING(result); + + if (nallocated > PY_SSIZE_T_MAX - repsize + 4) { + /* integer overflow */ + PyErr_NoMemory(); + goto error; + } + nallocated += repsize - 4; + if (result != NULL) { + if (_PyBytes_Resize(&result, nallocated) < 0) + goto error; + } else { + result = PyBytes_FromStringAndSize(NULL, nallocated); + if (result == NULL) + goto error; + Py_MEMCPY(PyBytes_AS_STRING(result), stackbuf, offset); + } + p = PyBytes_AS_STRING(result) + offset; + } + + if (PyBytes_Check(rep)) { + char *prep = PyBytes_AS_STRING(rep); + for(k = repsize; k > 0; k--) + *p++ = *prep++; + } else /* rep is unicode */ { + enum PyUnicode_Kind repkind; + void *repdata; + + if (PyUnicode_READY(rep) < 0) + goto error; + repkind = PyUnicode_KIND(rep); + repdata = PyUnicode_DATA(rep); + + for(k=0; k> 12)); + *p++ = (char)(0x80 | ((ch >> 6) & 0x3f)); + *p++ = (char)(0x80 | (ch & 0x3f)); + } else /* ch >= 0x10000 */ { + assert(ch <= MAX_UNICODE); + /* Encode UCS4 Unicode ordinals */ + *p++ = (char)(0xf0 | (ch >> 18)); + *p++ = (char)(0x80 | ((ch >> 12) & 0x3f)); + *p++ = (char)(0x80 | ((ch >> 6) & 0x3f)); + *p++ = (char)(0x80 | (ch & 0x3f)); + } + } + + if (result == NULL) { + /* This was stack allocated. */ + nneeded = p - stackbuf; + assert(nneeded <= nallocated); + result = PyBytes_FromStringAndSize(stackbuf, nneeded); + } + else { + /* Cut back to size actually needed. */ + nneeded = p - PyBytes_AS_STRING(result); + assert(nneeded <= nallocated); + _PyBytes_Resize(&result, nneeded); + } + + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return result; + error: + Py_XDECREF(rep); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + Py_XDECREF(result); + return NULL; + +#undef MAX_SHORT_UNICHARS +} + +PyObject * +PyUnicode_EncodeUTF8(const Py_UNICODE *s, + Py_ssize_t size, + const char *errors) +{ + PyObject *v, *unicode; + + unicode = PyUnicode_FromUnicode(s, size); + if (unicode == NULL) + return NULL; + v = _PyUnicode_AsUTF8String(unicode, errors); + Py_DECREF(unicode); + return v; +} + +PyObject * +PyUnicode_AsUTF8String(PyObject *unicode) +{ + return _PyUnicode_AsUTF8String(unicode, NULL); +} + +/* --- UTF-32 Codec ------------------------------------------------------- */ + +PyObject * +PyUnicode_DecodeUTF32(const char *s, + Py_ssize_t size, + const char *errors, + int *byteorder) +{ + return PyUnicode_DecodeUTF32Stateful(s, size, errors, byteorder, NULL); +} + +PyObject * +PyUnicode_DecodeUTF32Stateful(const char *s, + Py_ssize_t size, + const char *errors, + int *byteorder, + Py_ssize_t *consumed) +{ + const char *starts = s; + Py_ssize_t startinpos; + Py_ssize_t endinpos; + Py_ssize_t outpos; + PyObject *unicode; + const unsigned char *q, *e; + int bo = 0; /* assume native ordering by default */ + const char *errmsg = ""; + /* Offsets from q for retrieving bytes in the right order. */ +#ifdef BYTEORDER_IS_LITTLE_ENDIAN + int iorder[] = {0, 1, 2, 3}; +#else + int iorder[] = {3, 2, 1, 0}; +#endif + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + + q = (unsigned char *)s; + e = q + size; + + if (byteorder) + bo = *byteorder; + + /* Check for BOM marks (U+FEFF) in the input and adjust current + byte order setting accordingly. In native mode, the leading BOM + mark is skipped, in all other modes, it is copied to the output + stream as-is (giving a ZWNBSP character). */ + if (bo == 0) { + if (size >= 4) { + const Py_UCS4 bom = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) | + (q[iorder[1]] << 8) | q[iorder[0]]; +#ifdef BYTEORDER_IS_LITTLE_ENDIAN + if (bom == 0x0000FEFF) { + q += 4; + bo = -1; + } + else if (bom == 0xFFFE0000) { + q += 4; + bo = 1; + } +#else + if (bom == 0x0000FEFF) { + q += 4; + bo = 1; + } + else if (bom == 0xFFFE0000) { + q += 4; + bo = -1; + } +#endif + } + } + + if (bo == -1) { + /* force LE */ + iorder[0] = 0; + iorder[1] = 1; + iorder[2] = 2; + iorder[3] = 3; + } + else if (bo == 1) { + /* force BE */ + iorder[0] = 3; + iorder[1] = 2; + iorder[2] = 1; + iorder[3] = 0; + } + + /* This might be one to much, because of a BOM */ + unicode = PyUnicode_New((size+3)/4, 127); + if (!unicode) + return NULL; + if (size == 0) + return unicode; + outpos = 0; + + while (q < e) { + Py_UCS4 ch; + /* remaining bytes at the end? (size should be divisible by 4) */ + if (e-q<4) { + if (consumed) + break; + errmsg = "truncated data"; + startinpos = ((const char *)q)-starts; + endinpos = ((const char *)e)-starts; + goto utf32Error; + /* The remaining input chars are ignored if the callback + chooses to skip the input */ + } + ch = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) | + (q[iorder[1]] << 8) | q[iorder[0]]; + + if (ch >= 0x110000) + { + errmsg = "codepoint not in range(0x110000)"; + startinpos = ((const char *)q)-starts; + endinpos = startinpos+4; + goto utf32Error; + } + if (unicode_putchar(&unicode, &outpos, ch) < 0) + goto onError; + q += 4; + continue; + utf32Error: + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "utf32", errmsg, + &starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q, + &unicode, &outpos)) + goto onError; + } + + if (byteorder) + *byteorder = bo; + + if (consumed) + *consumed = (const char *)q-starts; + + /* Adjust length */ + if (PyUnicode_Resize(&unicode, outpos) < 0) + goto onError; + + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return unicode_result(unicode); + + onError: + Py_DECREF(unicode); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return NULL; +} + +PyObject * +_PyUnicode_EncodeUTF32(PyObject *str, + const char *errors, + int byteorder) +{ + int kind; + void *data; + Py_ssize_t len; + PyObject *v; + unsigned char *p; + Py_ssize_t nsize, bytesize, i; + /* Offsets from p for storing byte pairs in the right order. */ +#ifdef BYTEORDER_IS_LITTLE_ENDIAN + int iorder[] = {0, 1, 2, 3}; +#else + int iorder[] = {3, 2, 1, 0}; +#endif + +#define STORECHAR(CH) \ + do { \ + p[iorder[3]] = ((CH) >> 24) & 0xff; \ + p[iorder[2]] = ((CH) >> 16) & 0xff; \ + p[iorder[1]] = ((CH) >> 8) & 0xff; \ + p[iorder[0]] = (CH) & 0xff; \ + p += 4; \ + } while(0) + + if (!PyUnicode_Check(str)) { + PyErr_BadArgument(); + return NULL; + } + if (PyUnicode_READY(str) < 0) + return NULL; + kind = PyUnicode_KIND(str); + data = PyUnicode_DATA(str); + len = PyUnicode_GET_LENGTH(str); + + nsize = len + (byteorder == 0); + bytesize = nsize * 4; + if (bytesize / 4 != nsize) + return PyErr_NoMemory(); + v = PyBytes_FromStringAndSize(NULL, bytesize); + if (v == NULL) + return NULL; + + p = (unsigned char *)PyBytes_AS_STRING(v); + if (byteorder == 0) + STORECHAR(0xFEFF); + if (len == 0) + goto done; + + if (byteorder == -1) { + /* force LE */ + iorder[0] = 0; + iorder[1] = 1; + iorder[2] = 2; + iorder[3] = 3; + } + else if (byteorder == 1) { + /* force BE */ + iorder[0] = 3; + iorder[1] = 2; + iorder[2] = 1; + iorder[3] = 0; + } + + for (i = 0; i < len; i++) + STORECHAR(PyUnicode_READ(kind, data, i)); + + done: + return v; +#undef STORECHAR +} + +PyObject * +PyUnicode_EncodeUTF32(const Py_UNICODE *s, + Py_ssize_t size, + const char *errors, + int byteorder) +{ + PyObject *result; + PyObject *tmp = PyUnicode_FromUnicode(s, size); + if (tmp == NULL) + return NULL; + result = _PyUnicode_EncodeUTF32(tmp, errors, byteorder); + Py_DECREF(tmp); + return result; +} + +PyObject * +PyUnicode_AsUTF32String(PyObject *unicode) +{ + return _PyUnicode_EncodeUTF32(unicode, NULL, 0); +} + +/* --- UTF-16 Codec ------------------------------------------------------- */ + +PyObject * +PyUnicode_DecodeUTF16(const char *s, + Py_ssize_t size, + const char *errors, + int *byteorder) +{ + return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL); +} + +/* Two masks for fast checking of whether a C 'long' may contain + UTF16-encoded surrogate characters. This is an efficient heuristic, + assuming that non-surrogate characters with a code point >= 0x8000 are + rare in most input. + FAST_CHAR_MASK is used when the input is in native byte ordering, + SWAPPED_FAST_CHAR_MASK when the input is in byteswapped ordering. +*/ +#if (SIZEOF_LONG == 8) +# define FAST_CHAR_MASK 0x8000800080008000L +# define SWAPPED_FAST_CHAR_MASK 0x0080008000800080L +#elif (SIZEOF_LONG == 4) +# define FAST_CHAR_MASK 0x80008000L +# define SWAPPED_FAST_CHAR_MASK 0x00800080L +#else +# error C 'long' size should be either 4 or 8! +#endif + +PyObject * +PyUnicode_DecodeUTF16Stateful(const char *s, + Py_ssize_t size, + const char *errors, + int *byteorder, + Py_ssize_t *consumed) +{ + const char *starts = s; + Py_ssize_t startinpos; + Py_ssize_t endinpos; + Py_ssize_t outpos; + PyObject *unicode; + const unsigned char *q, *e, *aligned_end; + int bo = 0; /* assume native ordering by default */ + int native_ordering = 0; + const char *errmsg = ""; + /* Offsets from q for retrieving byte pairs in the right order. */ +#ifdef BYTEORDER_IS_LITTLE_ENDIAN + int ihi = 1, ilo = 0; +#else + int ihi = 0, ilo = 1; +#endif + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + + /* Note: size will always be longer than the resulting Unicode + character count */ + unicode = PyUnicode_New(size, 127); + if (!unicode) + return NULL; + if (size == 0) + return unicode; + outpos = 0; + + q = (unsigned char *)s; + e = q + size - 1; + + if (byteorder) + bo = *byteorder; + + /* Check for BOM marks (U+FEFF) in the input and adjust current + byte order setting accordingly. In native mode, the leading BOM + mark is skipped, in all other modes, it is copied to the output + stream as-is (giving a ZWNBSP character). */ + if (bo == 0) { + if (size >= 2) { + const Py_UCS4 bom = (q[ihi] << 8) | q[ilo]; +#ifdef BYTEORDER_IS_LITTLE_ENDIAN + if (bom == 0xFEFF) { + q += 2; + bo = -1; + } + else if (bom == 0xFFFE) { + q += 2; + bo = 1; + } +#else + if (bom == 0xFEFF) { + q += 2; + bo = 1; + } + else if (bom == 0xFFFE) { + q += 2; + bo = -1; + } +#endif + } + } + + if (bo == -1) { + /* force LE */ + ihi = 1; + ilo = 0; + } + else if (bo == 1) { + /* force BE */ + ihi = 0; + ilo = 1; + } +#ifdef BYTEORDER_IS_LITTLE_ENDIAN + native_ordering = ilo < ihi; +#else + native_ordering = ilo > ihi; +#endif + + aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK); + while (q < e) { + Py_UCS4 ch; + /* First check for possible aligned read of a C 'long'. Unaligned + reads are more expensive, better to defer to another iteration. */ + if (!((size_t) q & LONG_PTR_MASK)) { + /* Fast path for runs of non-surrogate chars. */ + register const unsigned char *_q = q; + int kind = PyUnicode_KIND(unicode); + void *data = PyUnicode_DATA(unicode); + while (_q < aligned_end) { + unsigned long block = * (unsigned long *) _q; + unsigned short *pblock = (unsigned short*)█ + Py_UCS4 maxch; + if (native_ordering) { + /* Can use buffer directly */ + if (block & FAST_CHAR_MASK) + break; + } + else { + /* Need to byte-swap */ + unsigned char *_p = (unsigned char*)pblock; + if (block & SWAPPED_FAST_CHAR_MASK) + break; + _p[0] = _q[1]; + _p[1] = _q[0]; + _p[2] = _q[3]; + _p[3] = _q[2]; +#if (SIZEOF_LONG == 8) + _p[4] = _q[5]; + _p[5] = _q[4]; + _p[6] = _q[7]; + _p[7] = _q[6]; +#endif + } + maxch = Py_MAX(pblock[0], pblock[1]); +#if SIZEOF_LONG == 8 + maxch = Py_MAX(maxch, Py_MAX(pblock[2], pblock[3])); +#endif + if (maxch > PyUnicode_MAX_CHAR_VALUE(unicode)) { + if (unicode_widen(&unicode, maxch) < 0) + goto onError; + kind = PyUnicode_KIND(unicode); + data = PyUnicode_DATA(unicode); + } + PyUnicode_WRITE(kind, data, outpos++, pblock[0]); + PyUnicode_WRITE(kind, data, outpos++, pblock[1]); +#if SIZEOF_LONG == 8 + PyUnicode_WRITE(kind, data, outpos++, pblock[2]); + PyUnicode_WRITE(kind, data, outpos++, pblock[3]); +#endif + _q += SIZEOF_LONG; + } + q = _q; + if (q >= e) + break; + } + ch = (q[ihi] << 8) | q[ilo]; + + q += 2; + + if (!Py_UNICODE_IS_SURROGATE(ch)) { + if (unicode_putchar(&unicode, &outpos, ch) < 0) + goto onError; + continue; + } + + /* UTF-16 code pair: */ + if (q > e) { + errmsg = "unexpected end of data"; + startinpos = (((const char *)q) - 2) - starts; + endinpos = ((const char *)e) + 1 - starts; + goto utf16Error; + } + if (Py_UNICODE_IS_HIGH_SURROGATE(ch)) { + Py_UCS4 ch2 = (q[ihi] << 8) | q[ilo]; + q += 2; + if (Py_UNICODE_IS_LOW_SURROGATE(ch2)) { + if (unicode_putchar(&unicode, &outpos, + Py_UNICODE_JOIN_SURROGATES(ch, ch2)) < 0) + goto onError; + continue; + } + else { + errmsg = "illegal UTF-16 surrogate"; + startinpos = (((const char *)q)-4)-starts; + endinpos = startinpos+2; + goto utf16Error; + } + + } + errmsg = "illegal encoding"; + startinpos = (((const char *)q)-2)-starts; + endinpos = startinpos+2; + /* Fall through to report the error */ + + utf16Error: + if (unicode_decode_call_errorhandler( + errors, + &errorHandler, + "utf16", errmsg, + &starts, + (const char **)&e, + &startinpos, + &endinpos, + &exc, + (const char **)&q, + &unicode, + &outpos)) + goto onError; + } + /* remaining byte at the end? (size should be even) */ + if (e == q) { + if (!consumed) { + errmsg = "truncated data"; + startinpos = ((const char *)q) - starts; + endinpos = ((const char *)e) + 1 - starts; + if (unicode_decode_call_errorhandler( + errors, + &errorHandler, + "utf16", errmsg, + &starts, + (const char **)&e, + &startinpos, + &endinpos, + &exc, + (const char **)&q, + &unicode, + &outpos)) + goto onError; + /* The remaining input chars are ignored if the callback + chooses to skip the input */ + } + } + + if (byteorder) + *byteorder = bo; + + if (consumed) + *consumed = (const char *)q-starts; + + /* Adjust length */ + if (PyUnicode_Resize(&unicode, outpos) < 0) + goto onError; + + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return unicode_result(unicode); + + onError: + Py_DECREF(unicode); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return NULL; +} + +#undef FAST_CHAR_MASK +#undef SWAPPED_FAST_CHAR_MASK + +PyObject * +_PyUnicode_EncodeUTF16(PyObject *str, + const char *errors, + int byteorder) +{ + int kind; + void *data; + Py_ssize_t len; + PyObject *v; + unsigned char *p; + Py_ssize_t nsize, bytesize; + Py_ssize_t i, pairs; + /* Offsets from p for storing byte pairs in the right order. */ +#ifdef BYTEORDER_IS_LITTLE_ENDIAN + int ihi = 1, ilo = 0; +#else + int ihi = 0, ilo = 1; +#endif + +#define STORECHAR(CH) \ + do { \ + p[ihi] = ((CH) >> 8) & 0xff; \ + p[ilo] = (CH) & 0xff; \ + p += 2; \ + } while(0) + + if (!PyUnicode_Check(str)) { + PyErr_BadArgument(); + return NULL; + } + if (PyUnicode_READY(str) < 0) + return NULL; + kind = PyUnicode_KIND(str); + data = PyUnicode_DATA(str); + len = PyUnicode_GET_LENGTH(str); + + pairs = 0; + if (kind == PyUnicode_4BYTE_KIND) + for (i = 0; i < len; i++) + if (PyUnicode_READ(kind, data, i) >= 0x10000) + pairs++; + /* 2 * (len + pairs + (byteorder == 0)) */ + if (len > PY_SSIZE_T_MAX - pairs - (byteorder == 0)) + return PyErr_NoMemory(); + nsize = len + pairs + (byteorder == 0); + bytesize = nsize * 2; + if (bytesize / 2 != nsize) + return PyErr_NoMemory(); + v = PyBytes_FromStringAndSize(NULL, bytesize); + if (v == NULL) + return NULL; + + p = (unsigned char *)PyBytes_AS_STRING(v); + if (byteorder == 0) + STORECHAR(0xFEFF); + if (len == 0) + goto done; + + if (byteorder == -1) { + /* force LE */ + ihi = 1; + ilo = 0; + } + else if (byteorder == 1) { + /* force BE */ + ihi = 0; + ilo = 1; + } + + for (i = 0; i < len; i++) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + Py_UCS4 ch2 = 0; + if (ch >= 0x10000) { + ch2 = Py_UNICODE_LOW_SURROGATE(ch); + ch = Py_UNICODE_HIGH_SURROGATE(ch); + } + STORECHAR(ch); + if (ch2) + STORECHAR(ch2); + } + + done: + return v; +#undef STORECHAR +} + +PyObject * +PyUnicode_EncodeUTF16(const Py_UNICODE *s, + Py_ssize_t size, + const char *errors, + int byteorder) +{ + PyObject *result; + PyObject *tmp = PyUnicode_FromUnicode(s, size); + if (tmp == NULL) + return NULL; + result = _PyUnicode_EncodeUTF16(tmp, errors, byteorder); + Py_DECREF(tmp); + return result; +} + +PyObject * +PyUnicode_AsUTF16String(PyObject *unicode) +{ + return _PyUnicode_EncodeUTF16(unicode, NULL, 0); +} + +/* --- Unicode Escape Codec ----------------------------------------------- */ + +/* Helper function for PyUnicode_DecodeUnicodeEscape, determines + if all the escapes in the string make it still a valid ASCII string. + Returns -1 if any escapes were found which cause the string to + pop out of ASCII range. Otherwise returns the length of the + required buffer to hold the string. + */ +static Py_ssize_t +length_of_escaped_ascii_string(const char *s, Py_ssize_t size) +{ + const unsigned char *p = (const unsigned char *)s; + const unsigned char *end = p + size; + Py_ssize_t length = 0; + + if (size < 0) + return -1; + + for (; p < end; ++p) { + if (*p > 127) { + /* Non-ASCII */ + return -1; + } + else if (*p != '\\') { + /* Normal character */ + ++length; + } + else { + /* Backslash-escape, check next char */ + ++p; + /* Escape sequence reaches till end of string or + non-ASCII follow-up. */ + if (p >= end || *p > 127) + return -1; + switch (*p) { + case '\n': + /* backslash + \n result in zero characters */ + break; + case '\\': case '\'': case '\"': + case 'b': case 'f': case 't': + case 'n': case 'r': case 'v': case 'a': + ++length; + break; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + case 'x': case 'u': case 'U': case 'N': + /* these do not guarantee ASCII characters */ + return -1; + default: + /* count the backslash + the other character */ + length += 2; + } + } + } + return length; +} + +static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; + +PyObject * +PyUnicode_DecodeUnicodeEscape(const char *s, + Py_ssize_t size, + const char *errors) +{ + const char *starts = s; + Py_ssize_t startinpos; + Py_ssize_t endinpos; + int j; + PyObject *v; + const char *end; + char* message; + Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */ + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + Py_ssize_t len; + Py_ssize_t i; + + len = length_of_escaped_ascii_string(s, size); + + /* After length_of_escaped_ascii_string() there are two alternatives, + either the string is pure ASCII with named escapes like \n, etc. + and we determined it's exact size (common case) + or it contains \x, \u, ... escape sequences. then we create a + legacy wchar string and resize it at the end of this function. */ + if (len >= 0) { + v = PyUnicode_New(len, 127); + if (!v) + goto onError; + assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); + } + else { + /* Escaped strings will always be longer than the resulting + Unicode string, so we start with size here and then reduce the + length after conversion to the true value. + (but if the error callback returns a long replacement string + we'll have to allocate more space) */ + v = PyUnicode_New(size, 127); + if (!v) + goto onError; + len = size; + } + + if (size == 0) + return v; + i = 0; + end = s + size; + + while (s < end) { + unsigned char c; + Py_UCS4 x; + int digits; + + /* The only case in which i == ascii_length is a backslash + followed by a newline. */ + assert(i <= len); + + /* Non-escape characters are interpreted as Unicode ordinals */ + if (*s != '\\') { + if (unicode_putchar(&v, &i, (unsigned char) *s++) < 0) + goto onError; + continue; + } + + startinpos = s-starts; + /* \ - Escapes */ + s++; + c = *s++; + if (s > end) + c = '\0'; /* Invalid after \ */ + + /* The only case in which i == ascii_length is a backslash + followed by a newline. */ + assert(i < len || (i == len && c == '\n')); + + switch (c) { + + /* \x escapes */ +#define WRITECHAR(ch) \ + do { \ + if (unicode_putchar(&v, &i, ch) < 0) \ + goto onError; \ + }while(0) + + case '\n': break; + case '\\': WRITECHAR('\\'); break; + case '\'': WRITECHAR('\''); break; + case '\"': WRITECHAR('\"'); break; + case 'b': WRITECHAR('\b'); break; + /* FF */ + case 'f': WRITECHAR('\014'); break; + case 't': WRITECHAR('\t'); break; + case 'n': WRITECHAR('\n'); break; + case 'r': WRITECHAR('\r'); break; + /* VT */ + case 'v': WRITECHAR('\013'); break; + /* BEL, not classic C */ + case 'a': WRITECHAR('\007'); break; + + /* \OOO (octal) escapes */ + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + x = s[-1] - '0'; + if (s < end && '0' <= *s && *s <= '7') { + x = (x<<3) + *s++ - '0'; + if (s < end && '0' <= *s && *s <= '7') + x = (x<<3) + *s++ - '0'; + } + WRITECHAR(x); + break; + + /* hex escapes */ + /* \xXX */ + case 'x': + digits = 2; + message = "truncated \\xXX escape"; + goto hexescape; + + /* \uXXXX */ + case 'u': + digits = 4; + message = "truncated \\uXXXX escape"; + goto hexescape; + + /* \UXXXXXXXX */ + case 'U': + digits = 8; + message = "truncated \\UXXXXXXXX escape"; + hexescape: + chr = 0; + if (s+digits>end) { + endinpos = size; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "unicodeescape", "end of string in escape sequence", + &starts, &end, &startinpos, &endinpos, &exc, &s, + &v, &i)) + goto onError; + goto nextByte; + } + for (j = 0; j < digits; ++j) { + c = (unsigned char) s[j]; + if (!Py_ISXDIGIT(c)) { + endinpos = (s+j+1)-starts; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "unicodeescape", message, + &starts, &end, &startinpos, &endinpos, &exc, &s, + &v, &i)) + goto onError; + len = PyUnicode_GET_LENGTH(v); + goto nextByte; + } + chr = (chr<<4) & ~0xF; + if (c >= '0' && c <= '9') + chr += c - '0'; + else if (c >= 'a' && c <= 'f') + chr += 10 + c - 'a'; + else + chr += 10 + c - 'A'; + } + s += j; + if (chr == 0xffffffff && PyErr_Occurred()) + /* _decoding_error will have already written into the + target buffer. */ + break; + store: + /* when we get here, chr is a 32-bit unicode character */ + if (chr <= MAX_UNICODE) { + WRITECHAR(chr); + } else { + endinpos = s-starts; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "unicodeescape", "illegal Unicode character", + &starts, &end, &startinpos, &endinpos, &exc, &s, + &v, &i)) + goto onError; + } + break; + + /* \N{name} */ + case 'N': + message = "malformed \\N character escape"; + if (ucnhash_CAPI == NULL) { + /* load the unicode data module */ + ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import( + PyUnicodeData_CAPSULE_NAME, 1); + if (ucnhash_CAPI == NULL) + goto ucnhashError; + } + if (*s == '{') { + const char *start = s+1; + /* look for the closing brace */ + while (*s != '}' && s < end) + s++; + if (s > start && s < end && *s == '}') { + /* found a name. look it up in the unicode database */ + message = "unknown Unicode character name"; + s++; + if (ucnhash_CAPI->getcode(NULL, start, (int)(s-start-1), + &chr, 0)) + goto store; + } + } + endinpos = s-starts; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "unicodeescape", message, + &starts, &end, &startinpos, &endinpos, &exc, &s, + &v, &i)) + goto onError; + break; + + default: + if (s > end) { + message = "\\ at end of string"; + s--; + endinpos = s-starts; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "unicodeescape", message, + &starts, &end, &startinpos, &endinpos, &exc, &s, + &v, &i)) + goto onError; + } + else { + WRITECHAR('\\'); + WRITECHAR(s[-1]); + } + break; + } + nextByte: + ; + } +#undef WRITECHAR + + if (PyUnicode_Resize(&v, i) < 0) + goto onError; + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return unicode_result(v); + + ucnhashError: + PyErr_SetString( + PyExc_UnicodeError, + "\\N escapes not supported (can't load unicodedata module)" + ); + Py_XDECREF(v); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return NULL; + + onError: + Py_XDECREF(v); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return NULL; +} + +/* Return a Unicode-Escape string version of the Unicode object. + + If quotes is true, the string is enclosed in u"" or u'' quotes as + appropriate. + +*/ + +PyObject * +PyUnicode_AsUnicodeEscapeString(PyObject *unicode) +{ + Py_ssize_t i, len; + PyObject *repr; + char *p; + int kind; + void *data; + Py_ssize_t expandsize = 0; + + /* Initial allocation is based on the longest-possible unichr + escape. + + In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source + unichr, so in this case it's the longest unichr escape. In + narrow (UTF-16) builds this is five chars per source unichr + since there are two unichrs in the surrogate pair, so in narrow + (UTF-16) builds it's not the longest unichr escape. + + In wide or narrow builds '\uxxxx' is 6 chars per source unichr, + so in the narrow (UTF-16) build case it's the longest unichr + escape. + */ + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return NULL; + } + if (PyUnicode_READY(unicode) < 0) + return NULL; + len = PyUnicode_GET_LENGTH(unicode); + kind = PyUnicode_KIND(unicode); + data = PyUnicode_DATA(unicode); + switch(kind) { + case PyUnicode_1BYTE_KIND: expandsize = 4; break; + case PyUnicode_2BYTE_KIND: expandsize = 6; break; + case PyUnicode_4BYTE_KIND: expandsize = 10; break; + } + + if (len == 0) + return PyBytes_FromStringAndSize(NULL, 0); + + if (len > (PY_SSIZE_T_MAX - 2 - 1) / expandsize) + return PyErr_NoMemory(); + + repr = PyBytes_FromStringAndSize(NULL, + 2 + + expandsize*len + + 1); + if (repr == NULL) + return NULL; + + p = PyBytes_AS_STRING(repr); + + for (i = 0; i < len; i++) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + + /* Escape backslashes */ + if (ch == '\\') { + *p++ = '\\'; + *p++ = (char) ch; + continue; + } + + /* Map 21-bit characters to '\U00xxxxxx' */ + else if (ch >= 0x10000) { + assert(ch <= MAX_UNICODE); + *p++ = '\\'; + *p++ = 'U'; + *p++ = Py_hexdigits[(ch >> 28) & 0x0000000F]; + *p++ = Py_hexdigits[(ch >> 24) & 0x0000000F]; + *p++ = Py_hexdigits[(ch >> 20) & 0x0000000F]; + *p++ = Py_hexdigits[(ch >> 16) & 0x0000000F]; + *p++ = Py_hexdigits[(ch >> 12) & 0x0000000F]; + *p++ = Py_hexdigits[(ch >> 8) & 0x0000000F]; + *p++ = Py_hexdigits[(ch >> 4) & 0x0000000F]; + *p++ = Py_hexdigits[ch & 0x0000000F]; + continue; + } + + /* Map 16-bit characters to '\uxxxx' */ + if (ch >= 256) { + *p++ = '\\'; + *p++ = 'u'; + *p++ = Py_hexdigits[(ch >> 12) & 0x000F]; + *p++ = Py_hexdigits[(ch >> 8) & 0x000F]; + *p++ = Py_hexdigits[(ch >> 4) & 0x000F]; + *p++ = Py_hexdigits[ch & 0x000F]; + } + + /* Map special whitespace to '\t', \n', '\r' */ + else if (ch == '\t') { + *p++ = '\\'; + *p++ = 't'; + } + else if (ch == '\n') { + *p++ = '\\'; + *p++ = 'n'; + } + else if (ch == '\r') { + *p++ = '\\'; + *p++ = 'r'; + } + + /* Map non-printable US ASCII to '\xhh' */ + else if (ch < ' ' || ch >= 0x7F) { + *p++ = '\\'; + *p++ = 'x'; + *p++ = Py_hexdigits[(ch >> 4) & 0x000F]; + *p++ = Py_hexdigits[ch & 0x000F]; + } + + /* Copy everything else as-is */ + else + *p++ = (char) ch; + } + + assert(p - PyBytes_AS_STRING(repr) > 0); + if (_PyBytes_Resize(&repr, p - PyBytes_AS_STRING(repr)) < 0) + return NULL; + return repr; +} + +PyObject * +PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, + Py_ssize_t size) +{ + PyObject *result; + PyObject *tmp = PyUnicode_FromUnicode(s, size); + if (tmp == NULL) + return NULL; + result = PyUnicode_AsUnicodeEscapeString(tmp); + Py_DECREF(tmp); + return result; +} + +/* --- Raw Unicode Escape Codec ------------------------------------------- */ + +PyObject * +PyUnicode_DecodeRawUnicodeEscape(const char *s, + Py_ssize_t size, + const char *errors) +{ + const char *starts = s; + Py_ssize_t startinpos; + Py_ssize_t endinpos; + Py_ssize_t outpos; + PyObject *v; + const char *end; + const char *bs; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + + /* Escaped strings will always be longer than the resulting + Unicode string, so we start with size here and then reduce the + length after conversion to the true value. (But decoding error + handler might have to resize the string) */ + v = PyUnicode_New(size, 127); + if (v == NULL) + goto onError; + if (size == 0) + return v; + outpos = 0; + end = s + size; + while (s < end) { + unsigned char c; + Py_UCS4 x; + int i; + int count; + + /* Non-escape characters are interpreted as Unicode ordinals */ + if (*s != '\\') { + if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0) + goto onError; + continue; + } + startinpos = s-starts; + + /* \u-escapes are only interpreted iff the number of leading + backslashes if odd */ + bs = s; + for (;s < end;) { + if (*s != '\\') + break; + if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0) + goto onError; + } + if (((s - bs) & 1) == 0 || + s >= end || + (*s != 'u' && *s != 'U')) { + continue; + } + outpos--; + count = *s=='u' ? 4 : 8; + s++; + + /* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */ + for (x = 0, i = 0; i < count; ++i, ++s) { + c = (unsigned char)*s; + if (!Py_ISXDIGIT(c)) { + endinpos = s-starts; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "rawunicodeescape", "truncated \\uXXXX", + &starts, &end, &startinpos, &endinpos, &exc, &s, + &v, &outpos)) + goto onError; + goto nextByte; + } + x = (x<<4) & ~0xF; + if (c >= '0' && c <= '9') + x += c - '0'; + else if (c >= 'a' && c <= 'f') + x += 10 + c - 'a'; + else + x += 10 + c - 'A'; + } + if (x <= MAX_UNICODE) { + if (unicode_putchar(&v, &outpos, x) < 0) + goto onError; + } else { + endinpos = s-starts; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "rawunicodeescape", "\\Uxxxxxxxx out of range", + &starts, &end, &startinpos, &endinpos, &exc, &s, + &v, &outpos)) + goto onError; + } + nextByte: + ; + } + if (PyUnicode_Resize(&v, outpos) < 0) + goto onError; + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return unicode_result(v); + + onError: + Py_XDECREF(v); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return NULL; +} + + +PyObject * +PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode) +{ + PyObject *repr; + char *p; + char *q; + Py_ssize_t expandsize, pos; + int kind; + void *data; + Py_ssize_t len; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return NULL; + } + if (PyUnicode_READY(unicode) < 0) + return NULL; + kind = PyUnicode_KIND(unicode); + data = PyUnicode_DATA(unicode); + len = PyUnicode_GET_LENGTH(unicode); + /* 4 byte characters can take up 10 bytes, 2 byte characters can take up 6 + bytes, and 1 byte characters 4. */ + expandsize = kind * 2 + 2; + + if (len > PY_SSIZE_T_MAX / expandsize) + return PyErr_NoMemory(); + + repr = PyBytes_FromStringAndSize(NULL, expandsize * len); + if (repr == NULL) + return NULL; + if (len == 0) + return repr; + + p = q = PyBytes_AS_STRING(repr); + for (pos = 0; pos < len; pos++) { + Py_UCS4 ch = PyUnicode_READ(kind, data, pos); + /* Map 32-bit characters to '\Uxxxxxxxx' */ + if (ch >= 0x10000) { + assert(ch <= MAX_UNICODE); + *p++ = '\\'; + *p++ = 'U'; + *p++ = Py_hexdigits[(ch >> 28) & 0xf]; + *p++ = Py_hexdigits[(ch >> 24) & 0xf]; + *p++ = Py_hexdigits[(ch >> 20) & 0xf]; + *p++ = Py_hexdigits[(ch >> 16) & 0xf]; + *p++ = Py_hexdigits[(ch >> 12) & 0xf]; + *p++ = Py_hexdigits[(ch >> 8) & 0xf]; + *p++ = Py_hexdigits[(ch >> 4) & 0xf]; + *p++ = Py_hexdigits[ch & 15]; + } + /* Map 16-bit characters to '\uxxxx' */ + else if (ch >= 256) { + *p++ = '\\'; + *p++ = 'u'; + *p++ = Py_hexdigits[(ch >> 12) & 0xf]; + *p++ = Py_hexdigits[(ch >> 8) & 0xf]; + *p++ = Py_hexdigits[(ch >> 4) & 0xf]; + *p++ = Py_hexdigits[ch & 15]; + } + /* Copy everything else as-is */ + else + *p++ = (char) ch; + } + + assert(p > q); + if (_PyBytes_Resize(&repr, p - q) < 0) + return NULL; + return repr; +} + +PyObject * +PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, + Py_ssize_t size) +{ + PyObject *result; + PyObject *tmp = PyUnicode_FromUnicode(s, size); + if (tmp == NULL) + return NULL; + result = PyUnicode_AsRawUnicodeEscapeString(tmp); + Py_DECREF(tmp); + return result; +} + +/* --- Unicode Internal Codec ------------------------------------------- */ + +PyObject * +_PyUnicode_DecodeUnicodeInternal(const char *s, + Py_ssize_t size, + const char *errors) +{ + const char *starts = s; + Py_ssize_t startinpos; + Py_ssize_t endinpos; + Py_ssize_t outpos; + PyObject *v; + const char *end; + const char *reason; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "unicode_internal codec has been deprecated", + 1)) + return NULL; + + /* XXX overflow detection missing */ + v = PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127); + if (v == NULL) + goto onError; + if (PyUnicode_GET_LENGTH(v) == 0) + return v; + outpos = 0; + end = s + size; + + while (s < end) { + Py_UNICODE uch; + Py_UCS4 ch; + /* We copy the raw representation one byte at a time because the + pointer may be unaligned (see test_codeccallbacks). */ + ((char *) &uch)[0] = s[0]; + ((char *) &uch)[1] = s[1]; +#ifdef Py_UNICODE_WIDE + ((char *) &uch)[2] = s[2]; + ((char *) &uch)[3] = s[3]; +#endif + ch = uch; + + /* We have to sanity check the raw data, otherwise doom looms for + some malformed UCS-4 data. */ + if ( +#ifdef Py_UNICODE_WIDE + ch > 0x10ffff || +#endif + end-s < Py_UNICODE_SIZE + ) + { + startinpos = s - starts; + if (end-s < Py_UNICODE_SIZE) { + endinpos = end-starts; + reason = "truncated input"; + } + else { + endinpos = s - starts + Py_UNICODE_SIZE; + reason = "illegal code point (> 0x10FFFF)"; + } + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "unicode_internal", reason, + &starts, &end, &startinpos, &endinpos, &exc, &s, + &v, &outpos)) + goto onError; + continue; + } + + s += Py_UNICODE_SIZE; +#ifndef Py_UNICODE_WIDE + if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && s < end) + { + Py_UNICODE uch2; + ((char *) &uch2)[0] = s[0]; + ((char *) &uch2)[1] = s[1]; + if (Py_UNICODE_IS_LOW_SURROGATE(uch2)) + { + ch = Py_UNICODE_JOIN_SURROGATES(uch, uch2); + s += Py_UNICODE_SIZE; + } + } +#endif + + if (unicode_putchar(&v, &outpos, ch) < 0) + goto onError; + } + + if (PyUnicode_Resize(&v, outpos) < 0) + goto onError; + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return unicode_result(v); + + onError: + Py_XDECREF(v); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return NULL; +} + +/* --- Latin-1 Codec ------------------------------------------------------ */ + +PyObject * +PyUnicode_DecodeLatin1(const char *s, + Py_ssize_t size, + const char *errors) +{ + /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */ + return _PyUnicode_FromUCS1((unsigned char*)s, size); +} + +/* create or adjust a UnicodeEncodeError */ +static void +make_encode_exception(PyObject **exceptionObject, + const char *encoding, + PyObject *unicode, + Py_ssize_t startpos, Py_ssize_t endpos, + const char *reason) +{ + if (*exceptionObject == NULL) { + *exceptionObject = PyObject_CallFunction( + PyExc_UnicodeEncodeError, "sOnns", + encoding, unicode, startpos, endpos, reason); + } + else { + if (PyUnicodeEncodeError_SetStart(*exceptionObject, startpos)) + goto onError; + if (PyUnicodeEncodeError_SetEnd(*exceptionObject, endpos)) + goto onError; + if (PyUnicodeEncodeError_SetReason(*exceptionObject, reason)) + goto onError; + return; + onError: + Py_DECREF(*exceptionObject); + *exceptionObject = NULL; + } +} + +/* raises a UnicodeEncodeError */ +static void +raise_encode_exception(PyObject **exceptionObject, + const char *encoding, + PyObject *unicode, + Py_ssize_t startpos, Py_ssize_t endpos, + const char *reason) +{ + make_encode_exception(exceptionObject, + encoding, unicode, startpos, endpos, reason); + if (*exceptionObject != NULL) + PyCodec_StrictErrors(*exceptionObject); +} + +/* error handling callback helper: + build arguments, call the callback and check the arguments, + put the result into newpos and return the replacement string, which + has to be freed by the caller */ +static PyObject * +unicode_encode_call_errorhandler(const char *errors, + PyObject **errorHandler, + const char *encoding, const char *reason, + PyObject *unicode, PyObject **exceptionObject, + Py_ssize_t startpos, Py_ssize_t endpos, + Py_ssize_t *newpos) +{ + static char *argparse = "On;encoding error handler must return (str/bytes, int) tuple"; + Py_ssize_t len; + PyObject *restuple; + PyObject *resunicode; + + if (*errorHandler == NULL) { + *errorHandler = PyCodec_LookupError(errors); + if (*errorHandler == NULL) + return NULL; + } + + if (PyUnicode_READY(unicode) < 0) + return NULL; + len = PyUnicode_GET_LENGTH(unicode); + + make_encode_exception(exceptionObject, + encoding, unicode, startpos, endpos, reason); + if (*exceptionObject == NULL) + return NULL; + + restuple = PyObject_CallFunctionObjArgs( + *errorHandler, *exceptionObject, NULL); + if (restuple == NULL) + return NULL; + if (!PyTuple_Check(restuple)) { + PyErr_SetString(PyExc_TypeError, &argparse[3]); + Py_DECREF(restuple); + return NULL; + } + if (!PyArg_ParseTuple(restuple, argparse, + &resunicode, newpos)) { + Py_DECREF(restuple); + return NULL; + } + if (!PyUnicode_Check(resunicode) && !PyBytes_Check(resunicode)) { + PyErr_SetString(PyExc_TypeError, &argparse[3]); + Py_DECREF(restuple); + return NULL; + } + if (*newpos<0) + *newpos = len + *newpos; + if (*newpos<0 || *newpos>len) { + PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos); + Py_DECREF(restuple); + return NULL; + } + Py_INCREF(resunicode); + Py_DECREF(restuple); + return resunicode; +} + +static PyObject * +unicode_encode_ucs1(PyObject *unicode, + const char *errors, + unsigned int limit) +{ + /* input state */ + Py_ssize_t pos=0, size; + int kind; + void *data; + /* output object */ + PyObject *res; + /* pointer into the output */ + char *str; + /* current output position */ + Py_ssize_t ressize; + const char *encoding = (limit == 256) ? "latin-1" : "ascii"; + const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)"; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + /* the following variable is used for caching string comparisons + * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */ + int known_errorHandler = -1; + + if (PyUnicode_READY(unicode) < 0) + return NULL; + size = PyUnicode_GET_LENGTH(unicode); + kind = PyUnicode_KIND(unicode); + data = PyUnicode_DATA(unicode); + /* allocate enough for a simple encoding without + replacements, if we need more, we'll resize */ + if (size == 0) + return PyBytes_FromStringAndSize(NULL, 0); + res = PyBytes_FromStringAndSize(NULL, size); + if (res == NULL) + return NULL; + str = PyBytes_AS_STRING(res); + ressize = size; + + while (pos < size) { + Py_UCS4 c = PyUnicode_READ(kind, data, pos); + + /* can we encode this? */ + if (c=limit)) + ++collend; + /* cache callback name lookup (if not done yet, i.e. it's the first error) */ + if (known_errorHandler==-1) { + if ((errors==NULL) || (!strcmp(errors, "strict"))) + known_errorHandler = 1; + else if (!strcmp(errors, "replace")) + known_errorHandler = 2; + else if (!strcmp(errors, "ignore")) + known_errorHandler = 3; + else if (!strcmp(errors, "xmlcharrefreplace")) + known_errorHandler = 4; + else + known_errorHandler = 0; + } + switch (known_errorHandler) { + case 1: /* strict */ + raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason); + goto onError; + case 2: /* replace */ + while (collstart++ ressize) { + if (requiredsize<2*ressize) + requiredsize = 2*ressize; + if (_PyBytes_Resize(&res, requiredsize)) + goto onError; + str = PyBytes_AS_STRING(res) + respos; + ressize = requiredsize; + } + /* generate replacement */ + for (i = collstart; i < collend; ++i) { + str += sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i)); + } + pos = collend; + break; + default: + repunicode = unicode_encode_call_errorhandler(errors, &errorHandler, + encoding, reason, unicode, &exc, + collstart, collend, &newpos); + if (repunicode == NULL || (PyUnicode_Check(repunicode) && + PyUnicode_READY(repunicode) < 0)) + goto onError; + if (PyBytes_Check(repunicode)) { + /* Directly copy bytes result to output. */ + repsize = PyBytes_Size(repunicode); + if (repsize > 1) { + /* Make room for all additional bytes. */ + respos = str - PyBytes_AS_STRING(res); + if (_PyBytes_Resize(&res, ressize+repsize-1)) { + Py_DECREF(repunicode); + goto onError; + } + str = PyBytes_AS_STRING(res) + respos; + ressize += repsize-1; + } + memcpy(str, PyBytes_AsString(repunicode), repsize); + str += repsize; + pos = newpos; + Py_DECREF(repunicode); + break; + } + /* need more space? (at least enough for what we + have+the replacement+the rest of the string, so + we won't have to check space for encodable characters) */ + respos = str - PyBytes_AS_STRING(res); + repsize = PyUnicode_GET_LENGTH(repunicode); + requiredsize = respos+repsize+(size-collend); + if (requiredsize > ressize) { + if (requiredsize<2*ressize) + requiredsize = 2*ressize; + if (_PyBytes_Resize(&res, requiredsize)) { + Py_DECREF(repunicode); + goto onError; + } + str = PyBytes_AS_STRING(res) + respos; + ressize = requiredsize; + } + /* check if there is anything unencodable in the replacement + and copy it to the output */ + for (i = 0; repsize-->0; ++i, ++str) { + c = PyUnicode_READ_CHAR(repunicode, i); + if (c >= limit) { + raise_encode_exception(&exc, encoding, unicode, + pos, pos+1, reason); + Py_DECREF(repunicode); + goto onError; + } + *str = (char)c; + } + pos = newpos; + Py_DECREF(repunicode); + } + } + } + /* Resize if we allocated to much */ + size = str - PyBytes_AS_STRING(res); + if (size < ressize) { /* If this falls res will be NULL */ + assert(size >= 0); + if (_PyBytes_Resize(&res, size) < 0) + goto onError; + } + + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return res; + + onError: + Py_XDECREF(res); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return NULL; +} + +/* Deprecated */ +PyObject * +PyUnicode_EncodeLatin1(const Py_UNICODE *p, + Py_ssize_t size, + const char *errors) +{ + PyObject *result; + PyObject *unicode = PyUnicode_FromUnicode(p, size); + if (unicode == NULL) + return NULL; + result = unicode_encode_ucs1(unicode, errors, 256); + Py_DECREF(unicode); + return result; +} + +PyObject * +_PyUnicode_AsLatin1String(PyObject *unicode, const char *errors) +{ + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return NULL; + } + if (PyUnicode_READY(unicode) == -1) + return NULL; + /* Fast path: if it is a one-byte string, construct + bytes object directly. */ + if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) + return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode), + PyUnicode_GET_LENGTH(unicode)); + /* Non-Latin-1 characters present. Defer to above function to + raise the exception. */ + return unicode_encode_ucs1(unicode, errors, 256); +} + +PyObject* +PyUnicode_AsLatin1String(PyObject *unicode) +{ + return _PyUnicode_AsLatin1String(unicode, NULL); +} + +/* --- 7-bit ASCII Codec -------------------------------------------------- */ + +PyObject * +PyUnicode_DecodeASCII(const char *s, + Py_ssize_t size, + const char *errors) +{ + const char *starts = s; + PyObject *v; + int kind; + void *data; + Py_ssize_t startinpos; + Py_ssize_t endinpos; + Py_ssize_t outpos; + const char *e; + int has_error; + const unsigned char *p = (const unsigned char *)s; + const unsigned char *end = p + size; + const unsigned char *aligned_end = (const unsigned char *) ((size_t) end & ~LONG_PTR_MASK); + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + + if (size == 0) { + Py_INCREF(unicode_empty); + return unicode_empty; + } + + /* ASCII is equivalent to the first 128 ordinals in Unicode. */ + if (size == 1 && (unsigned char)s[0] < 128) + return get_latin1_char((unsigned char)s[0]); + + has_error = 0; + while (p < end && !has_error) { + /* Fast path, see below in PyUnicode_DecodeUTF8Stateful for + an explanation. */ + if (!((size_t) p & LONG_PTR_MASK)) { + /* Help register allocation */ + register const unsigned char *_p = p; + while (_p < aligned_end) { + unsigned long value = *(unsigned long *) _p; + if (value & ASCII_CHAR_MASK) { + has_error = 1; + break; + } + _p += SIZEOF_LONG; + } + if (_p == end) + break; + if (has_error) + break; + p = _p; + } + if (*p & 0x80) { + has_error = 1; + break; + } + else { + ++p; + } + } + if (!has_error) + return unicode_fromascii((const unsigned char *)s, size); + + v = PyUnicode_New(size, 127); + if (v == NULL) + goto onError; + if (size == 0) + return v; + kind = PyUnicode_KIND(v); + data = PyUnicode_DATA(v); + outpos = 0; + e = s + size; + while (s < e) { + register unsigned char c = (unsigned char)*s; + if (c < 128) { + PyUnicode_WRITE(kind, data, outpos++, c); + ++s; + } + else { + startinpos = s-starts; + endinpos = startinpos + 1; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "ascii", "ordinal not in range(128)", + &starts, &e, &startinpos, &endinpos, &exc, &s, + &v, &outpos)) + goto onError; + kind = PyUnicode_KIND(v); + data = PyUnicode_DATA(v); + } + } + if (PyUnicode_Resize(&v, outpos) < 0) + goto onError; + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + assert(_PyUnicode_CheckConsistency(v, 1)); + return v; + + onError: + Py_XDECREF(v); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return NULL; +} + +/* Deprecated */ +PyObject * +PyUnicode_EncodeASCII(const Py_UNICODE *p, + Py_ssize_t size, + const char *errors) +{ + PyObject *result; + PyObject *unicode = PyUnicode_FromUnicode(p, size); + if (unicode == NULL) + return NULL; + result = unicode_encode_ucs1(unicode, errors, 128); + Py_DECREF(unicode); + return result; +} + +PyObject * +_PyUnicode_AsASCIIString(PyObject *unicode, const char *errors) +{ + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return NULL; + } + if (PyUnicode_READY(unicode) == -1) + return NULL; + /* Fast path: if it is an ASCII-only string, construct bytes object + directly. Else defer to above function to raise the exception. */ + if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128) + return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode), + PyUnicode_GET_LENGTH(unicode)); + return unicode_encode_ucs1(unicode, errors, 128); +} + +PyObject * +PyUnicode_AsASCIIString(PyObject *unicode) +{ + return _PyUnicode_AsASCIIString(unicode, NULL); +} + +#ifdef HAVE_MBCS + +/* --- MBCS codecs for Windows -------------------------------------------- */ + +#if SIZEOF_INT < SIZEOF_SIZE_T +#define NEED_RETRY +#endif + +#ifndef WC_ERR_INVALID_CHARS +# define WC_ERR_INVALID_CHARS 0x0080 +#endif + +static char* +code_page_name(UINT code_page, PyObject **obj) +{ + *obj = NULL; + if (code_page == CP_ACP) + return "mbcs"; + if (code_page == CP_UTF7) + return "CP_UTF7"; + if (code_page == CP_UTF8) + return "CP_UTF8"; + + *obj = PyBytes_FromFormat("cp%u", code_page); + if (*obj == NULL) + return NULL; + return PyBytes_AS_STRING(*obj); +} + +static int +is_dbcs_lead_byte(UINT code_page, const char *s, int offset) +{ + const char *curr = s + offset; + const char *prev; + + if (!IsDBCSLeadByteEx(code_page, *curr)) + return 0; + + prev = CharPrevExA(code_page, s, curr, 0); + if (prev == curr) + return 1; + /* FIXME: This code is limited to "true" double-byte encodings, + as it assumes an incomplete character consists of a single + byte. */ + if (curr - prev == 2) + return 1; + if (!IsDBCSLeadByteEx(code_page, *prev)) + return 1; + return 0; +} + +static DWORD +decode_code_page_flags(UINT code_page) +{ + if (code_page == CP_UTF7) { + /* The CP_UTF7 decoder only supports flags=0 */ + return 0; + } + else + return MB_ERR_INVALID_CHARS; +} + +/* + * Decode a byte string from a Windows code page into unicode object in strict + * mode. + * + * Returns consumed size if succeed, returns -2 on decode error, or raise a + * WindowsError and returns -1 on other error. + */ +static int +decode_code_page_strict(UINT code_page, + PyObject **v, + const char *in, + int insize) +{ + const DWORD flags = decode_code_page_flags(code_page); + wchar_t *out; + DWORD outsize; + + /* First get the size of the result */ + assert(insize > 0); + outsize = MultiByteToWideChar(code_page, flags, in, insize, NULL, 0); + if (outsize <= 0) + goto error; + + if (*v == NULL) { + /* Create unicode object */ + *v = (PyObject*)_PyUnicode_New(outsize); + if (*v == NULL) + return -1; + out = PyUnicode_AS_UNICODE(*v); + } + else { + /* Extend unicode object */ + Py_ssize_t n = PyUnicode_GET_SIZE(*v); + if (PyUnicode_Resize(v, n + outsize) < 0) + return -1; + out = PyUnicode_AS_UNICODE(*v) + n; + } + + /* Do the conversion */ + outsize = MultiByteToWideChar(code_page, flags, in, insize, out, outsize); + if (outsize <= 0) + goto error; + return insize; + +error: + if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) + return -2; + PyErr_SetFromWindowsErr(0); + return -1; +} + +/* + * Decode a byte string from a code page into unicode object with an error + * handler. + * + * Returns consumed size if succeed, or raise a WindowsError or + * UnicodeDecodeError exception and returns -1 on error. + */ +static int +decode_code_page_errors(UINT code_page, + PyObject **v, + const char *in, const int size, + const char *errors) +{ + const char *startin = in; + const char *endin = in + size; + const DWORD flags = decode_code_page_flags(code_page); + /* Ideally, we should get reason from FormatMessage. This is the Windows + 2000 English version of the message. */ + const char *reason = "No mapping for the Unicode character exists " + "in the target code page."; + /* each step cannot decode more than 1 character, but a character can be + represented as a surrogate pair */ + wchar_t buffer[2], *startout, *out; + int insize, outsize; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + PyObject *encoding_obj = NULL; + char *encoding; + DWORD err; + int ret = -1; + + assert(size > 0); + + encoding = code_page_name(code_page, &encoding_obj); + if (encoding == NULL) + return -1; + + if (errors == NULL || strcmp(errors, "strict") == 0) { + /* The last error was ERROR_NO_UNICODE_TRANSLATION, then we raise a + UnicodeDecodeError. */ + make_decode_exception(&exc, encoding, in, size, 0, 0, reason); + if (exc != NULL) { + PyCodec_StrictErrors(exc); + Py_CLEAR(exc); + } + goto error; + } + + if (*v == NULL) { + /* Create unicode object */ + if (size > PY_SSIZE_T_MAX / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) { + PyErr_NoMemory(); + goto error; + } + *v = (PyObject*)_PyUnicode_New(size * Py_ARRAY_LENGTH(buffer)); + if (*v == NULL) + goto error; + startout = PyUnicode_AS_UNICODE(*v); + } + else { + /* Extend unicode object */ + Py_ssize_t n = PyUnicode_GET_SIZE(*v); + if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) { + PyErr_NoMemory(); + goto error; + } + if (PyUnicode_Resize(v, n + size * Py_ARRAY_LENGTH(buffer)) < 0) + goto error; + startout = PyUnicode_AS_UNICODE(*v) + n; + } + + /* Decode the byte string character per character */ + out = startout; + while (in < endin) + { + /* Decode a character */ + insize = 1; + do + { + outsize = MultiByteToWideChar(code_page, flags, + in, insize, + buffer, Py_ARRAY_LENGTH(buffer)); + if (outsize > 0) + break; + err = GetLastError(); + if (err != ERROR_NO_UNICODE_TRANSLATION + && err != ERROR_INSUFFICIENT_BUFFER) + { + PyErr_SetFromWindowsErr(0); + goto error; + } + insize++; + } + /* 4=maximum length of a UTF-8 sequence */ + while (insize <= 4 && (in + insize) <= endin); + + if (outsize <= 0) { + Py_ssize_t startinpos, endinpos, outpos; + + startinpos = in - startin; + endinpos = startinpos + 1; + outpos = out - PyUnicode_AS_UNICODE(*v); + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + encoding, reason, + &startin, &endin, &startinpos, &endinpos, &exc, &in, + v, &outpos)) + { + goto error; + } + out = PyUnicode_AS_UNICODE(*v) + outpos; + } + else { + in += insize; + memcpy(out, buffer, outsize * sizeof(wchar_t)); + out += outsize; + } + } + + /* write a NUL character at the end */ + *out = 0; + + /* Extend unicode object */ + outsize = out - startout; + assert(outsize <= PyUnicode_WSTR_LENGTH(*v)); + if (PyUnicode_Resize(v, outsize) < 0) + goto error; + ret = size; + +error: + Py_XDECREF(encoding_obj); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return ret; +} + +static PyObject * +decode_code_page_stateful(int code_page, + const char *s, Py_ssize_t size, + const char *errors, Py_ssize_t *consumed) +{ + PyObject *v = NULL; + int chunk_size, final, converted, done; + + if (code_page < 0) { + PyErr_SetString(PyExc_ValueError, "invalid code page number"); + return NULL; + } + + if (consumed) + *consumed = 0; + + do + { +#ifdef NEED_RETRY + if (size > INT_MAX) { + chunk_size = INT_MAX; + final = 0; + done = 0; + } + else +#endif + { + chunk_size = (int)size; + final = (consumed == NULL); + done = 1; + } + + /* Skip trailing lead-byte unless 'final' is set */ + if (!final && is_dbcs_lead_byte(code_page, s, chunk_size - 1)) + --chunk_size; + + if (chunk_size == 0 && done) { + if (v != NULL) + break; + Py_INCREF(unicode_empty); + return unicode_empty; + } + + + converted = decode_code_page_strict(code_page, &v, + s, chunk_size); + if (converted == -2) + converted = decode_code_page_errors(code_page, &v, + s, chunk_size, + errors); + assert(converted != 0); + + if (converted < 0) { + Py_XDECREF(v); + return NULL; + } + + if (consumed) + *consumed += converted; + + s += converted; + size -= converted; + } while (!done); + + return unicode_result(v); +} + +PyObject * +PyUnicode_DecodeCodePageStateful(int code_page, + const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) +{ + return decode_code_page_stateful(code_page, s, size, errors, consumed); +} + +PyObject * +PyUnicode_DecodeMBCSStateful(const char *s, + Py_ssize_t size, + const char *errors, + Py_ssize_t *consumed) +{ + return decode_code_page_stateful(CP_ACP, s, size, errors, consumed); +} + +PyObject * +PyUnicode_DecodeMBCS(const char *s, + Py_ssize_t size, + const char *errors) +{ + return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL); +} + +static DWORD +encode_code_page_flags(UINT code_page, const char *errors) +{ + if (code_page == CP_UTF8) { + if (winver.dwMajorVersion >= 6) + /* CP_UTF8 supports WC_ERR_INVALID_CHARS on Windows Vista + and later */ + return WC_ERR_INVALID_CHARS; + else + /* CP_UTF8 only supports flags=0 on Windows older than Vista */ + return 0; + } + else if (code_page == CP_UTF7) { + /* CP_UTF7 only supports flags=0 */ + return 0; + } + else { + if (errors != NULL && strcmp(errors, "replace") == 0) + return 0; + else + return WC_NO_BEST_FIT_CHARS; + } +} + +/* + * Encode a Unicode string to a Windows code page into a byte string in strict + * mode. + * + * Returns consumed characters if succeed, returns -2 on encode error, or raise + * a WindowsError and returns -1 on other error. + */ +static int +encode_code_page_strict(UINT code_page, PyObject **outbytes, + PyObject *unicode, Py_ssize_t offset, int len, + const char* errors) +{ + BOOL usedDefaultChar = FALSE; + BOOL *pusedDefaultChar = &usedDefaultChar; + int outsize; + PyObject *exc = NULL; + wchar_t *p; + Py_ssize_t size; + const DWORD flags = encode_code_page_flags(code_page, NULL); + char *out; + /* Create a substring so that we can get the UTF-16 representation + of just the slice under consideration. */ + PyObject *substring; + + assert(len > 0); + + if (code_page != CP_UTF8 && code_page != CP_UTF7) + pusedDefaultChar = &usedDefaultChar; + else + pusedDefaultChar = NULL; + + substring = PyUnicode_Substring(unicode, offset, offset+len); + if (substring == NULL) + return -1; + p = PyUnicode_AsUnicodeAndSize(substring, &size); + if (p == NULL) { + Py_DECREF(substring); + return -1; + } + + /* First get the size of the result */ + outsize = WideCharToMultiByte(code_page, flags, + p, size, + NULL, 0, + NULL, pusedDefaultChar); + if (outsize <= 0) + goto error; + /* If we used a default char, then we failed! */ + if (pusedDefaultChar && *pusedDefaultChar) { + Py_DECREF(substring); + return -2; + } + + if (*outbytes == NULL) { + /* Create string object */ + *outbytes = PyBytes_FromStringAndSize(NULL, outsize); + if (*outbytes == NULL) { + Py_DECREF(substring); + return -1; + } + out = PyBytes_AS_STRING(*outbytes); + } + else { + /* Extend string object */ + const Py_ssize_t n = PyBytes_Size(*outbytes); + if (outsize > PY_SSIZE_T_MAX - n) { + PyErr_NoMemory(); + Py_DECREF(substring); + return -1; + } + if (_PyBytes_Resize(outbytes, n + outsize) < 0) { + Py_DECREF(substring); + return -1; + } + out = PyBytes_AS_STRING(*outbytes) + n; + } + + /* Do the conversion */ + outsize = WideCharToMultiByte(code_page, flags, + p, size, + out, outsize, + NULL, pusedDefaultChar); + Py_CLEAR(substring); + if (outsize <= 0) + goto error; + if (pusedDefaultChar && *pusedDefaultChar) + return -2; + return 0; + +error: + Py_XDECREF(substring); + if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) + return -2; + PyErr_SetFromWindowsErr(0); + return -1; +} + +/* + * Encode a Unicode string to a Windows code page into a byte string using a + * error handler. + * + * Returns consumed characters if succeed, or raise a WindowsError and returns + * -1 on other error. + */ +static int +encode_code_page_errors(UINT code_page, PyObject **outbytes, + PyObject *unicode, Py_ssize_t unicode_offset, + Py_ssize_t insize, const char* errors) +{ + const DWORD flags = encode_code_page_flags(code_page, errors); + Py_ssize_t pos = unicode_offset; + Py_ssize_t endin = unicode_offset + insize; + /* Ideally, we should get reason from FormatMessage. This is the Windows + 2000 English version of the message. */ + const char *reason = "invalid character"; + /* 4=maximum length of a UTF-8 sequence */ + char buffer[4]; + BOOL usedDefaultChar = FALSE, *pusedDefaultChar; + Py_ssize_t outsize; + char *out; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + PyObject *encoding_obj = NULL; + char *encoding; + Py_ssize_t newpos, newoutsize; + PyObject *rep; + int ret = -1; + + assert(insize > 0); + + encoding = code_page_name(code_page, &encoding_obj); + if (encoding == NULL) + return -1; + + if (errors == NULL || strcmp(errors, "strict") == 0) { + /* The last error was ERROR_NO_UNICODE_TRANSLATION, + then we raise a UnicodeEncodeError. */ + make_encode_exception(&exc, encoding, unicode, 0, 0, reason); + if (exc != NULL) { + PyCodec_StrictErrors(exc); + Py_DECREF(exc); + } + Py_XDECREF(encoding_obj); + return -1; + } + + if (code_page != CP_UTF8 && code_page != CP_UTF7) + pusedDefaultChar = &usedDefaultChar; + else + pusedDefaultChar = NULL; + + if (Py_ARRAY_LENGTH(buffer) > PY_SSIZE_T_MAX / insize) { + PyErr_NoMemory(); + goto error; + } + outsize = insize * Py_ARRAY_LENGTH(buffer); + + if (*outbytes == NULL) { + /* Create string object */ + *outbytes = PyBytes_FromStringAndSize(NULL, outsize); + if (*outbytes == NULL) + goto error; + out = PyBytes_AS_STRING(*outbytes); + } + else { + /* Extend string object */ + Py_ssize_t n = PyBytes_Size(*outbytes); + if (n > PY_SSIZE_T_MAX - outsize) { + PyErr_NoMemory(); + goto error; + } + if (_PyBytes_Resize(outbytes, n + outsize) < 0) + goto error; + out = PyBytes_AS_STRING(*outbytes) + n; + } + + /* Encode the string character per character */ + while (pos < endin) + { + Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, pos); + wchar_t chars[2]; + int charsize; + if (ch < 0x10000) { + chars[0] = (wchar_t)ch; + charsize = 1; + } + else { + ch -= 0x10000; + chars[0] = 0xd800 + (ch >> 10); + chars[1] = 0xdc00 + (ch & 0x3ff); + charsize = 2; + } + + outsize = WideCharToMultiByte(code_page, flags, + chars, charsize, + buffer, Py_ARRAY_LENGTH(buffer), + NULL, pusedDefaultChar); + if (outsize > 0) { + if (pusedDefaultChar == NULL || !(*pusedDefaultChar)) + { + pos++; + memcpy(out, buffer, outsize); + out += outsize; + continue; + } + } + else if (GetLastError() != ERROR_NO_UNICODE_TRANSLATION) { + PyErr_SetFromWindowsErr(0); + goto error; + } + + rep = unicode_encode_call_errorhandler( + errors, &errorHandler, encoding, reason, + unicode, &exc, + pos, pos + 1, &newpos); + if (rep == NULL) + goto error; + pos = newpos; + + if (PyBytes_Check(rep)) { + outsize = PyBytes_GET_SIZE(rep); + if (outsize != 1) { + Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes); + newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1); + if (_PyBytes_Resize(outbytes, newoutsize) < 0) { + Py_DECREF(rep); + goto error; + } + out = PyBytes_AS_STRING(*outbytes) + offset; + } + memcpy(out, PyBytes_AS_STRING(rep), outsize); + out += outsize; + } + else { + Py_ssize_t i; + enum PyUnicode_Kind kind; + void *data; + + if (PyUnicode_READY(rep) < 0) { + Py_DECREF(rep); + goto error; + } + + outsize = PyUnicode_GET_LENGTH(rep); + if (outsize != 1) { + Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes); + newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1); + if (_PyBytes_Resize(outbytes, newoutsize) < 0) { + Py_DECREF(rep); + goto error; + } + out = PyBytes_AS_STRING(*outbytes) + offset; + } + kind = PyUnicode_KIND(rep); + data = PyUnicode_DATA(rep); + for (i=0; i < outsize; i++) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (ch > 127) { + raise_encode_exception(&exc, + encoding, unicode, + pos, pos + 1, + "unable to encode error handler result to ASCII"); + Py_DECREF(rep); + goto error; + } + *out = (unsigned char)ch; + out++; + } + } + Py_DECREF(rep); + } + /* write a NUL byte */ + *out = 0; + outsize = out - PyBytes_AS_STRING(*outbytes); + assert(outsize <= PyBytes_GET_SIZE(*outbytes)); + if (_PyBytes_Resize(outbytes, outsize) < 0) + goto error; + ret = 0; + +error: + Py_XDECREF(encoding_obj); + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return ret; +} + +static PyObject * +encode_code_page(int code_page, + PyObject *unicode, + const char *errors) +{ + Py_ssize_t len; + PyObject *outbytes = NULL; + Py_ssize_t offset; + int chunk_len, ret, done; + + if (PyUnicode_READY(unicode) < 0) + return NULL; + len = PyUnicode_GET_LENGTH(unicode); + + if (code_page < 0) { + PyErr_SetString(PyExc_ValueError, "invalid code page number"); + return NULL; + } + + if (len == 0) + return PyBytes_FromStringAndSize(NULL, 0); + + offset = 0; + do + { +#ifdef NEED_RETRY + /* UTF-16 encoding may double the size, so use only INT_MAX/2 + chunks. */ + if (len > INT_MAX/2) { + chunk_len = INT_MAX/2; + done = 0; + } + else +#endif + { + chunk_len = (int)len; + done = 1; + } + + ret = encode_code_page_strict(code_page, &outbytes, + unicode, offset, chunk_len, + errors); + if (ret == -2) + ret = encode_code_page_errors(code_page, &outbytes, + unicode, offset, + chunk_len, errors); + if (ret < 0) { + Py_XDECREF(outbytes); + return NULL; + } + + offset += chunk_len; + len -= chunk_len; + } while (!done); + + return outbytes; +} + +PyObject * +PyUnicode_EncodeMBCS(const Py_UNICODE *p, + Py_ssize_t size, + const char *errors) +{ + PyObject *unicode, *res; + unicode = PyUnicode_FromUnicode(p, size); + if (unicode == NULL) + return NULL; + res = encode_code_page(CP_ACP, unicode, errors); + Py_DECREF(unicode); + return res; +} + +PyObject * +PyUnicode_EncodeCodePage(int code_page, + PyObject *unicode, + const char *errors) +{ + return encode_code_page(code_page, unicode, errors); +} + +PyObject * +PyUnicode_AsMBCSString(PyObject *unicode) +{ + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return NULL; + } + return PyUnicode_EncodeCodePage(CP_ACP, unicode, NULL); +} + +#undef NEED_RETRY + +#endif /* HAVE_MBCS */ + +/* --- Character Mapping Codec -------------------------------------------- */ + +PyObject * +PyUnicode_DecodeCharmap(const char *s, + Py_ssize_t size, + PyObject *mapping, + const char *errors) +{ + const char *starts = s; + Py_ssize_t startinpos; + Py_ssize_t endinpos; + Py_ssize_t outpos; + const char *e; + PyObject *v; + Py_ssize_t extrachars = 0; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + + /* Default to Latin-1 */ + if (mapping == NULL) + return PyUnicode_DecodeLatin1(s, size, errors); + + v = PyUnicode_New(size, 127); + if (v == NULL) + goto onError; + if (size == 0) + return v; + outpos = 0; + e = s + size; + if (PyUnicode_CheckExact(mapping)) { + Py_ssize_t maplen; + enum PyUnicode_Kind kind; + void *data; + Py_UCS4 x; + + if (PyUnicode_READY(mapping) < 0) + return NULL; + + maplen = PyUnicode_GET_LENGTH(mapping); + data = PyUnicode_DATA(mapping); + kind = PyUnicode_KIND(mapping); + while (s < e) { + unsigned char ch = *s; + + if (ch < maplen) + x = PyUnicode_READ(kind, data, ch); + else + x = 0xfffe; /* invalid value */ + + if (x == 0xfffe) + { + /* undefined mapping */ + startinpos = s-starts; + endinpos = startinpos+1; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "charmap", "character maps to ", + &starts, &e, &startinpos, &endinpos, &exc, &s, + &v, &outpos)) { + goto onError; + } + continue; + } + + if (unicode_putchar(&v, &outpos, x) < 0) + goto onError; + ++s; + } + } + else { + while (s < e) { + unsigned char ch = *s; + PyObject *w, *x; + + /* Get mapping (char ordinal -> integer, Unicode char or None) */ + w = PyLong_FromLong((long)ch); + if (w == NULL) + goto onError; + x = PyObject_GetItem(mapping, w); + Py_DECREF(w); + if (x == NULL) { + if (PyErr_ExceptionMatches(PyExc_LookupError)) { + /* No mapping found means: mapping is undefined. */ + PyErr_Clear(); + x = Py_None; + Py_INCREF(x); + } else + goto onError; + } + + /* Apply mapping */ + if (PyLong_Check(x)) { + long value = PyLong_AS_LONG(x); + if (value < 0 || value > 65535) { + PyErr_SetString(PyExc_TypeError, + "character mapping must be in range(65536)"); + Py_DECREF(x); + goto onError; + } + if (unicode_putchar(&v, &outpos, value) < 0) + goto onError; + } + else if (x == Py_None) { + /* undefined mapping */ + startinpos = s-starts; + endinpos = startinpos+1; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "charmap", "character maps to ", + &starts, &e, &startinpos, &endinpos, &exc, &s, + &v, &outpos)) { + Py_DECREF(x); + goto onError; + } + Py_DECREF(x); + continue; + } + else if (PyUnicode_Check(x)) { + Py_ssize_t targetsize; + + if (PyUnicode_READY(x) < 0) + goto onError; + targetsize = PyUnicode_GET_LENGTH(x); + + if (targetsize == 1) { + /* 1-1 mapping */ + if (unicode_putchar(&v, &outpos, + PyUnicode_READ_CHAR(x, 0)) < 0) + goto onError; + } + else if (targetsize > 1) { + /* 1-n mapping */ + if (targetsize > extrachars) { + /* resize first */ + Py_ssize_t needed = (targetsize - extrachars) + \ + (targetsize << 2); + extrachars += needed; + /* XXX overflow detection missing */ + if (PyUnicode_Resize(&v, + PyUnicode_GET_LENGTH(v) + needed) < 0) { + Py_DECREF(x); + goto onError; + } + } + if (unicode_widen(&v, PyUnicode_MAX_CHAR_VALUE(x)) < 0) + goto onError; + PyUnicode_CopyCharacters(v, outpos, x, 0, targetsize); + outpos += targetsize; + extrachars -= targetsize; + } + /* 1-0 mapping: skip the character */ + } + else { + /* wrong return value */ + PyErr_SetString(PyExc_TypeError, + "character mapping must return integer, None or str"); + Py_DECREF(x); + goto onError; + } + Py_DECREF(x); + ++s; + } + } + if (PyUnicode_Resize(&v, outpos) < 0) + goto onError; + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + return unicode_result(v); + + onError: + Py_XDECREF(errorHandler); + Py_XDECREF(exc); + Py_XDECREF(v); + return NULL; +} + +/* Charmap encoding: the lookup table */ + +struct encoding_map { + PyObject_HEAD + unsigned char level1[32]; + int count2, count3; + unsigned char level23[1]; +}; + +static PyObject* +encoding_map_size(PyObject *obj, PyObject* args) +{ + struct encoding_map *map = (struct encoding_map*)obj; + return PyLong_FromLong(sizeof(*map) - 1 + 16*map->count2 + + 128*map->count3); +} + +static PyMethodDef encoding_map_methods[] = { + {"size", encoding_map_size, METH_NOARGS, + PyDoc_STR("Return the size (in bytes) of this object") }, + { 0 } +}; + +static void +encoding_map_dealloc(PyObject* o) +{ + PyObject_FREE(o); +} + +static PyTypeObject EncodingMapType = { + PyVarObject_HEAD_INIT(NULL, 0) + "EncodingMap", /*tp_name*/ + sizeof(struct encoding_map), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + /* methods */ + encoding_map_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + 0, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + encoding_map_methods, /*tp_methods*/ + 0, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + 0, /*tp_init*/ + 0, /*tp_alloc*/ + 0, /*tp_new*/ + 0, /*tp_free*/ + 0, /*tp_is_gc*/ +}; + +PyObject* +PyUnicode_BuildEncodingMap(PyObject* string) +{ + PyObject *result; + struct encoding_map *mresult; + int i; + int need_dict = 0; + unsigned char level1[32]; + unsigned char level2[512]; + unsigned char *mlevel1, *mlevel2, *mlevel3; + int count2 = 0, count3 = 0; + int kind; + void *data; + Py_UCS4 ch; + + if (!PyUnicode_Check(string) || PyUnicode_GET_LENGTH(string) != 256) { + PyErr_BadArgument(); + return NULL; + } + kind = PyUnicode_KIND(string); + data = PyUnicode_DATA(string); + memset(level1, 0xFF, sizeof level1); + memset(level2, 0xFF, sizeof level2); + + /* If there isn't a one-to-one mapping of NULL to \0, + or if there are non-BMP characters, we need to use + a mapping dictionary. */ + if (PyUnicode_READ(kind, data, 0) != 0) + need_dict = 1; + for (i = 1; i < 256; i++) { + int l1, l2; + ch = PyUnicode_READ(kind, data, i); + if (ch == 0 || ch > 0xFFFF) { + need_dict = 1; + break; + } + if (ch == 0xFFFE) + /* unmapped character */ + continue; + l1 = ch >> 11; + l2 = ch >> 7; + if (level1[l1] == 0xFF) + level1[l1] = count2++; + if (level2[l2] == 0xFF) + level2[l2] = count3++; + } + + if (count2 >= 0xFF || count3 >= 0xFF) + need_dict = 1; + + if (need_dict) { + PyObject *result = PyDict_New(); + PyObject *key, *value; + if (!result) + return NULL; + for (i = 0; i < 256; i++) { + key = PyLong_FromLong(PyUnicode_READ(kind, data, i)); + value = PyLong_FromLong(i); + if (!key || !value) + goto failed1; + if (PyDict_SetItem(result, key, value) == -1) + goto failed1; + Py_DECREF(key); + Py_DECREF(value); + } + return result; + failed1: + Py_XDECREF(key); + Py_XDECREF(value); + Py_DECREF(result); + return NULL; + } + + /* Create a three-level trie */ + result = PyObject_MALLOC(sizeof(struct encoding_map) + + 16*count2 + 128*count3 - 1); + if (!result) + return PyErr_NoMemory(); + PyObject_Init(result, &EncodingMapType); + mresult = (struct encoding_map*)result; + mresult->count2 = count2; + mresult->count3 = count3; + mlevel1 = mresult->level1; + mlevel2 = mresult->level23; + mlevel3 = mresult->level23 + 16*count2; + memcpy(mlevel1, level1, 32); + memset(mlevel2, 0xFF, 16*count2); + memset(mlevel3, 0, 128*count3); + count3 = 0; + for (i = 1; i < 256; i++) { + int o1, o2, o3, i2, i3; + if (PyUnicode_READ(kind, data, i) == 0xFFFE) + /* unmapped character */ + continue; + o1 = PyUnicode_READ(kind, data, i)>>11; + o2 = (PyUnicode_READ(kind, data, i)>>7) & 0xF; + i2 = 16*mlevel1[o1] + o2; + if (mlevel2[i2] == 0xFF) + mlevel2[i2] = count3++; + o3 = PyUnicode_READ(kind, data, i) & 0x7F; + i3 = 128*mlevel2[i2] + o3; + mlevel3[i3] = i; + } + return result; +} + +static int +encoding_map_lookup(Py_UCS4 c, PyObject *mapping) +{ + struct encoding_map *map = (struct encoding_map*)mapping; + int l1 = c>>11; + int l2 = (c>>7) & 0xF; + int l3 = c & 0x7F; + int i; + + if (c > 0xFFFF) + return -1; + if (c == 0) + return 0; + /* level 1*/ + i = map->level1[l1]; + if (i == 0xFF) { + return -1; + } + /* level 2*/ + i = map->level23[16*i+l2]; + if (i == 0xFF) { + return -1; + } + /* level 3 */ + i = map->level23[16*map->count2 + 128*i + l3]; + if (i == 0) { + return -1; + } + return i; +} + +/* Lookup the character ch in the mapping. If the character + can't be found, Py_None is returned (or NULL, if another + error occurred). */ +static PyObject * +charmapencode_lookup(Py_UCS4 c, PyObject *mapping) +{ + PyObject *w = PyLong_FromLong((long)c); + PyObject *x; + + if (w == NULL) + return NULL; + x = PyObject_GetItem(mapping, w); + Py_DECREF(w); + if (x == NULL) { + if (PyErr_ExceptionMatches(PyExc_LookupError)) { + /* No mapping found means: mapping is undefined. */ + PyErr_Clear(); + x = Py_None; + Py_INCREF(x); + return x; + } else + return NULL; + } + else if (x == Py_None) + return x; + else if (PyLong_Check(x)) { + long value = PyLong_AS_LONG(x); + if (value < 0 || value > 255) { + PyErr_SetString(PyExc_TypeError, + "character mapping must be in range(256)"); + Py_DECREF(x); + return NULL; + } + return x; + } + else if (PyBytes_Check(x)) + return x; + else { + /* wrong return value */ + PyErr_Format(PyExc_TypeError, + "character mapping must return integer, bytes or None, not %.400s", + x->ob_type->tp_name); + Py_DECREF(x); + return NULL; + } +} + +static int +charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize) +{ + Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj); + /* exponentially overallocate to minimize reallocations */ + if (requiredsize < 2*outsize) + requiredsize = 2*outsize; + if (_PyBytes_Resize(outobj, requiredsize)) + return -1; + return 0; +} + +typedef enum charmapencode_result { + enc_SUCCESS, enc_FAILED, enc_EXCEPTION +} charmapencode_result; +/* lookup the character, put the result in the output string and adjust + various state variables. Resize the output bytes object if not enough + space is available. Return a new reference to the object that + was put in the output buffer, or Py_None, if the mapping was undefined + (in which case no character was written) or NULL, if a + reallocation error occurred. The caller must decref the result */ +static charmapencode_result +charmapencode_output(Py_UCS4 c, PyObject *mapping, + PyObject **outobj, Py_ssize_t *outpos) +{ + PyObject *rep; + char *outstart; + Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj); + + if (Py_TYPE(mapping) == &EncodingMapType) { + int res = encoding_map_lookup(c, mapping); + Py_ssize_t requiredsize = *outpos+1; + if (res == -1) + return enc_FAILED; + if (outsize outsize) + /* Make room for all additional bytes. */ + if (charmapencode_resize(res, respos, requiredsize)) { + Py_DECREF(repunicode); + return -1; + } + memcpy(PyBytes_AsString(*res) + *respos, + PyBytes_AsString(repunicode), repsize); + *respos += repsize; + *inpos = newpos; + Py_DECREF(repunicode); + break; + } + /* generate replacement */ + if (PyUnicode_READY(repunicode) < 0) { + Py_DECREF(repunicode); + return -1; + } + repsize = PyUnicode_GET_LENGTH(repunicode); + data = PyUnicode_DATA(repunicode); + kind = PyUnicode_KIND(repunicode); + for (index = 0; index < repsize; index++) { + Py_UCS4 repch = PyUnicode_READ(kind, data, index); + x = charmapencode_output(repch, mapping, res, respos); + if (x==enc_EXCEPTION) { + Py_DECREF(repunicode); + return -1; + } + else if (x==enc_FAILED) { + Py_DECREF(repunicode); + raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason); + return -1; + } + } + *inpos = newpos; + Py_DECREF(repunicode); + } + return 0; +} + +PyObject * +_PyUnicode_EncodeCharmap(PyObject *unicode, + PyObject *mapping, + const char *errors) +{ + /* output object */ + PyObject *res = NULL; + /* current input position */ + Py_ssize_t inpos = 0; + Py_ssize_t size; + /* current output position */ + Py_ssize_t respos = 0; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + /* the following variable is used for caching string comparisons + * -1=not initialized, 0=unknown, 1=strict, 2=replace, + * 3=ignore, 4=xmlcharrefreplace */ + int known_errorHandler = -1; + + if (PyUnicode_READY(unicode) < 0) + return NULL; + size = PyUnicode_GET_LENGTH(unicode); + + /* Default to Latin-1 */ + if (mapping == NULL) + return unicode_encode_ucs1(unicode, errors, 256); + + /* allocate enough for a simple encoding without + replacements, if we need more, we'll resize */ + res = PyBytes_FromStringAndSize(NULL, size); + if (res == NULL) + goto onError; + if (size == 0) + return res; + + while (inpos adjust input position */ + ++inpos; + } + + /* Resize if we allocated to much */ + if (resposPyUnicode_GET_LENGTH(unicode)) { + PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos); + Py_DECREF(restuple); + return NULL; + } + Py_INCREF(resunicode); + Py_DECREF(restuple); + return resunicode; +} + +/* Lookup the character ch in the mapping and put the result in result, + which must be decrefed by the caller. + Return 0 on success, -1 on error */ +static int +charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result) +{ + PyObject *w = PyLong_FromLong((long)c); + PyObject *x; + + if (w == NULL) + return -1; + x = PyObject_GetItem(mapping, w); + Py_DECREF(w); + if (x == NULL) { + if (PyErr_ExceptionMatches(PyExc_LookupError)) { + /* No mapping found means: use 1:1 mapping. */ + PyErr_Clear(); + *result = NULL; + return 0; + } else + return -1; + } + else if (x == Py_None) { + *result = x; + return 0; + } + else if (PyLong_Check(x)) { + long value = PyLong_AS_LONG(x); + long max = PyUnicode_GetMax(); + if (value < 0 || value > max) { + PyErr_Format(PyExc_TypeError, + "character mapping must be in range(0x%x)", max+1); + Py_DECREF(x); + return -1; + } + *result = x; + return 0; + } + else if (PyUnicode_Check(x)) { + *result = x; + return 0; + } + else { + /* wrong return value */ + PyErr_SetString(PyExc_TypeError, + "character mapping must return integer, None or str"); + Py_DECREF(x); + return -1; + } +} +/* ensure that *outobj is at least requiredsize characters long, + if not reallocate and adjust various state variables. + Return 0 on success, -1 on error */ +static int +charmaptranslate_makespace(Py_UCS4 **outobj, Py_ssize_t *psize, + Py_ssize_t requiredsize) +{ + Py_ssize_t oldsize = *psize; + if (requiredsize > oldsize) { + /* exponentially overallocate to minimize reallocations */ + if (requiredsize < 2 * oldsize) + requiredsize = 2 * oldsize; + *outobj = PyMem_Realloc(*outobj, requiredsize * sizeof(Py_UCS4)); + if (*outobj == 0) + return -1; + *psize = requiredsize; + } + return 0; +} +/* lookup the character, put the result in the output string and adjust + various state variables. Return a new reference to the object that + was put in the output buffer in *result, or Py_None, if the mapping was + undefined (in which case no character was written). + The called must decref result. + Return 0 on success, -1 on error. */ +static int +charmaptranslate_output(PyObject *input, Py_ssize_t ipos, + PyObject *mapping, Py_UCS4 **output, + Py_ssize_t *osize, Py_ssize_t *opos, + PyObject **res) +{ + Py_UCS4 curinp = PyUnicode_READ_CHAR(input, ipos); + if (charmaptranslate_lookup(curinp, mapping, res)) + return -1; + if (*res==NULL) { + /* not found => default to 1:1 mapping */ + (*output)[(*opos)++] = curinp; + } + else if (*res==Py_None) + ; + else if (PyLong_Check(*res)) { + /* no overflow check, because we know that the space is enough */ + (*output)[(*opos)++] = (Py_UCS4)PyLong_AS_LONG(*res); + } + else if (PyUnicode_Check(*res)) { + Py_ssize_t repsize; + if (PyUnicode_READY(*res) == -1) + return -1; + repsize = PyUnicode_GET_LENGTH(*res); + if (repsize==1) { + /* no overflow check, because we know that the space is enough */ + (*output)[(*opos)++] = PyUnicode_READ_CHAR(*res, 0); + } + else if (repsize!=0) { + /* more than one character */ + Py_ssize_t requiredsize = *opos + + (PyUnicode_GET_LENGTH(input) - ipos) + + repsize - 1; + Py_ssize_t i; + if (charmaptranslate_makespace(output, osize, requiredsize)) + return -1; + for(i = 0; i < repsize; i++) + (*output)[(*opos)++] = PyUnicode_READ_CHAR(*res, i); + } + } + else + return -1; + return 0; +} + +PyObject * +_PyUnicode_TranslateCharmap(PyObject *input, + PyObject *mapping, + const char *errors) +{ + /* input object */ + char *idata; + Py_ssize_t size, i; + int kind; + /* output buffer */ + Py_UCS4 *output = NULL; + Py_ssize_t osize; + PyObject *res; + /* current output position */ + Py_ssize_t opos; + char *reason = "character maps to "; + PyObject *errorHandler = NULL; + PyObject *exc = NULL; + /* the following variable is used for caching string comparisons + * -1=not initialized, 0=unknown, 1=strict, 2=replace, + * 3=ignore, 4=xmlcharrefreplace */ + int known_errorHandler = -1; + + if (mapping == NULL) { + PyErr_BadArgument(); + return NULL; + } + + if (PyUnicode_READY(input) == -1) + return NULL; + idata = (char*)PyUnicode_DATA(input); + kind = PyUnicode_KIND(input); + size = PyUnicode_GET_LENGTH(input); + i = 0; + + if (size == 0) { + Py_INCREF(input); + return input; + } + + /* allocate enough for a simple 1:1 translation without + replacements, if we need more, we'll resize */ + osize = size; + output = PyMem_Malloc(osize * sizeof(Py_UCS4)); + opos = 0; + if (output == NULL) { + PyErr_NoMemory(); + goto onError; + } + + while (i adjust input pointer */ + ++i; + else { /* untranslatable character */ + PyObject *repunicode = NULL; /* initialize to prevent gcc warning */ + Py_ssize_t repsize; + Py_ssize_t newpos; + Py_ssize_t uni2; + /* startpos for collecting untranslatable chars */ + Py_ssize_t collstart = i; + Py_ssize_t collend = i+1; + Py_ssize_t coll; + + /* find all untranslatable characters */ + while (collend < size) { + if (charmaptranslate_lookup(PyUnicode_READ(kind,idata, collend), mapping, &x)) + goto onError; + Py_XDECREF(x); + if (x!=Py_None) + break; + ++collend; + } + /* cache callback name lookup + * (if not done yet, i.e. it's the first error) */ + if (known_errorHandler==-1) { + if ((errors==NULL) || (!strcmp(errors, "strict"))) + known_errorHandler = 1; + else if (!strcmp(errors, "replace")) + known_errorHandler = 2; + else if (!strcmp(errors, "ignore")) + known_errorHandler = 3; + else if (!strcmp(errors, "xmlcharrefreplace")) + known_errorHandler = 4; + else + known_errorHandler = 0; + } + switch (known_errorHandler) { + case 1: /* strict */ + raise_translate_exception(&exc, input, collstart, + collend, reason); + goto onError; + case 2: /* replace */ + /* No need to check for space, this is a 1:1 replacement */ + for (coll = collstart; coll0; ++uni2) + output[opos++] = PyUnicode_READ_CHAR(repunicode, uni2); + i = newpos; + Py_DECREF(repunicode); + } + } + } + res = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, opos); + if (!res) + goto onError; + PyMem_Free(output); + Py_XDECREF(exc); + Py_XDECREF(errorHandler); + return res; + + onError: + PyMem_Free(output); + Py_XDECREF(exc); + Py_XDECREF(errorHandler); + return NULL; +} + +/* Deprecated. Use PyUnicode_Translate instead. */ +PyObject * +PyUnicode_TranslateCharmap(const Py_UNICODE *p, + Py_ssize_t size, + PyObject *mapping, + const char *errors) +{ + PyObject *unicode = PyUnicode_FromUnicode(p, size); + if (!unicode) + return NULL; + return _PyUnicode_TranslateCharmap(unicode, mapping, errors); +} + +PyObject * +PyUnicode_Translate(PyObject *str, + PyObject *mapping, + const char *errors) +{ + PyObject *result; + + str = PyUnicode_FromObject(str); + if (str == NULL) + goto onError; + result = _PyUnicode_TranslateCharmap(str, mapping, errors); + Py_DECREF(str); + return result; + + onError: + Py_XDECREF(str); + return NULL; +} + +static Py_UCS4 +fix_decimal_and_space_to_ascii(PyObject *self) +{ + /* No need to call PyUnicode_READY(self) because this function is only + called as a callback from fixup() which does it already. */ + const Py_ssize_t len = PyUnicode_GET_LENGTH(self); + const int kind = PyUnicode_KIND(self); + void *data = PyUnicode_DATA(self); + Py_UCS4 maxchar = 0, ch, fixed; + Py_ssize_t i; + + for (i = 0; i < len; ++i) { + ch = PyUnicode_READ(kind, data, i); + fixed = 0; + if (ch > 127) { + if (Py_UNICODE_ISSPACE(ch)) + fixed = ' '; + else { + const int decimal = Py_UNICODE_TODECIMAL(ch); + if (decimal >= 0) + fixed = '0' + decimal; + } + if (fixed != 0) { + if (fixed > maxchar) + maxchar = fixed; + PyUnicode_WRITE(kind, data, i, fixed); + } + else if (ch > maxchar) + maxchar = ch; + } + else if (ch > maxchar) + maxchar = ch; + } + + return maxchar; +} + +PyObject * +_PyUnicode_TransformDecimalAndSpaceToASCII(PyObject *unicode) +{ + if (!PyUnicode_Check(unicode)) { + PyErr_BadInternalCall(); + return NULL; + } + if (PyUnicode_READY(unicode) == -1) + return NULL; + if (PyUnicode_MAX_CHAR_VALUE(unicode) <= 127) { + /* If the string is already ASCII, just return the same string */ + Py_INCREF(unicode); + return unicode; + } + return fixup(unicode, fix_decimal_and_space_to_ascii); +} + +PyObject * +PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, + Py_ssize_t length) +{ + PyObject *decimal; + Py_ssize_t i; + Py_UCS4 maxchar; + enum PyUnicode_Kind kind; + void *data; + + maxchar = 0; + for (i = 0; i < length; i++) { + Py_UNICODE ch = s[i]; + if (ch > 127) { + int decimal = Py_UNICODE_TODECIMAL(ch); + if (decimal >= 0) + ch = '0' + decimal; + } + maxchar = Py_MAX(maxchar, ch); + } + + /* Copy to a new string */ + decimal = PyUnicode_New(length, maxchar); + if (decimal == NULL) + return decimal; + kind = PyUnicode_KIND(decimal); + data = PyUnicode_DATA(decimal); + /* Iterate over code points */ + for (i = 0; i < length; i++) { + Py_UNICODE ch = s[i]; + if (ch > 127) { + int decimal = Py_UNICODE_TODECIMAL(ch); + if (decimal >= 0) + ch = '0' + decimal; + } + PyUnicode_WRITE(kind, data, i, ch); + } + return unicode_result(decimal); +} +/* --- Decimal Encoder ---------------------------------------------------- */ + +int +PyUnicode_EncodeDecimal(Py_UNICODE *s, + Py_ssize_t length, + char *output, + const char *errors) +{ + PyObject *unicode; + Py_ssize_t i; + enum PyUnicode_Kind kind; + void *data; + + if (output == NULL) { + PyErr_BadArgument(); + return -1; + } + + unicode = PyUnicode_FromUnicode(s, length); + if (unicode == NULL) + return -1; + + if (PyUnicode_READY(unicode) < 0) { + Py_DECREF(unicode); + return -1; + } + kind = PyUnicode_KIND(unicode); + data = PyUnicode_DATA(unicode); + + for (i=0; i < length; ) { + PyObject *exc; + Py_UCS4 ch; + int decimal; + Py_ssize_t startpos; + + ch = PyUnicode_READ(kind, data, i); + + if (Py_UNICODE_ISSPACE(ch)) { + *output++ = ' '; + i++; + continue; + } + decimal = Py_UNICODE_TODECIMAL(ch); + if (decimal >= 0) { + *output++ = '0' + decimal; + i++; + continue; + } + if (0 < ch && ch < 256) { + *output++ = (char)ch; + i++; + continue; + } + + startpos = i; + exc = NULL; + raise_encode_exception(&exc, "decimal", unicode, + startpos, startpos+1, + "invalid decimal Unicode string"); + Py_XDECREF(exc); + Py_DECREF(unicode); + return -1; + } + /* 0-terminate the output string */ + *output++ = '\0'; + Py_DECREF(unicode); + return 0; +} + +/* --- Helpers ------------------------------------------------------------ */ + +static Py_ssize_t +any_find_slice(int direction, PyObject* s1, PyObject* s2, + Py_ssize_t start, + Py_ssize_t end) +{ + int kind1, kind2, kind; + void *buf1, *buf2; + Py_ssize_t len1, len2, result; + + kind1 = PyUnicode_KIND(s1); + kind2 = PyUnicode_KIND(s2); + kind = kind1 > kind2 ? kind1 : kind2; + buf1 = PyUnicode_DATA(s1); + buf2 = PyUnicode_DATA(s2); + if (kind1 != kind) + buf1 = _PyUnicode_AsKind(s1, kind); + if (!buf1) + return -2; + if (kind2 != kind) + buf2 = _PyUnicode_AsKind(s2, kind); + if (!buf2) { + if (kind1 != kind) PyMem_Free(buf1); + return -2; + } + len1 = PyUnicode_GET_LENGTH(s1); + len2 = PyUnicode_GET_LENGTH(s2); + + if (direction > 0) { + switch(kind) { + case PyUnicode_1BYTE_KIND: + if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2)) + result = asciilib_find_slice(buf1, len1, buf2, len2, start, end); + else + result = ucs1lib_find_slice(buf1, len1, buf2, len2, start, end); + break; + case PyUnicode_2BYTE_KIND: + result = ucs2lib_find_slice(buf1, len1, buf2, len2, start, end); + break; + case PyUnicode_4BYTE_KIND: + result = ucs4lib_find_slice(buf1, len1, buf2, len2, start, end); + break; + default: + assert(0); result = -2; + } + } + else { + switch(kind) { + case PyUnicode_1BYTE_KIND: + if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2)) + result = asciilib_rfind_slice(buf1, len1, buf2, len2, start, end); + else + result = ucs1lib_rfind_slice(buf1, len1, buf2, len2, start, end); + break; + case PyUnicode_2BYTE_KIND: + result = ucs2lib_rfind_slice(buf1, len1, buf2, len2, start, end); + break; + case PyUnicode_4BYTE_KIND: + result = ucs4lib_rfind_slice(buf1, len1, buf2, len2, start, end); + break; + default: + assert(0); result = -2; + } + } + + if (kind1 != kind) + PyMem_Free(buf1); + if (kind2 != kind) + PyMem_Free(buf2); + + return result; +} + +Py_ssize_t +_PyUnicode_InsertThousandsGrouping(PyObject *unicode, int kind, void *data, + Py_ssize_t n_buffer, + void *digits, Py_ssize_t n_digits, + Py_ssize_t min_width, + const char *grouping, + const char *thousands_sep) +{ + switch(kind) { + case PyUnicode_1BYTE_KIND: + if (unicode != NULL && PyUnicode_IS_ASCII(unicode)) + return _PyUnicode_ascii_InsertThousandsGrouping( + (Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits, + min_width, grouping, thousands_sep); + else + return _PyUnicode_ucs1_InsertThousandsGrouping( + (Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits, + min_width, grouping, thousands_sep); + case PyUnicode_2BYTE_KIND: + return _PyUnicode_ucs2_InsertThousandsGrouping( + (Py_UCS2*)data, n_buffer, (Py_UCS2*)digits, n_digits, + min_width, grouping, thousands_sep); + case PyUnicode_4BYTE_KIND: + return _PyUnicode_ucs4_InsertThousandsGrouping( + (Py_UCS4*)data, n_buffer, (Py_UCS4*)digits, n_digits, + min_width, grouping, thousands_sep); + } + assert(0); + return -1; +} + + +/* helper macro to fixup start/end slice values */ +#define ADJUST_INDICES(start, end, len) \ + if (end > len) \ + end = len; \ + else if (end < 0) { \ + end += len; \ + if (end < 0) \ + end = 0; \ + } \ + if (start < 0) { \ + start += len; \ + if (start < 0) \ + start = 0; \ + } + +Py_ssize_t +PyUnicode_Count(PyObject *str, + PyObject *substr, + Py_ssize_t start, + Py_ssize_t end) +{ + Py_ssize_t result; + PyObject* str_obj; + PyObject* sub_obj; + int kind1, kind2, kind; + void *buf1 = NULL, *buf2 = NULL; + Py_ssize_t len1, len2; + + str_obj = PyUnicode_FromObject(str); + if (!str_obj || PyUnicode_READY(str_obj) == -1) + return -1; + sub_obj = PyUnicode_FromObject(substr); + if (!sub_obj || PyUnicode_READY(sub_obj) == -1) { + Py_DECREF(str_obj); + return -1; + } + + kind1 = PyUnicode_KIND(str_obj); + kind2 = PyUnicode_KIND(sub_obj); + kind = kind1 > kind2 ? kind1 : kind2; + buf1 = PyUnicode_DATA(str_obj); + if (kind1 != kind) + buf1 = _PyUnicode_AsKind(str_obj, kind); + if (!buf1) + goto onError; + buf2 = PyUnicode_DATA(sub_obj); + if (kind2 != kind) + buf2 = _PyUnicode_AsKind(sub_obj, kind); + if (!buf2) + goto onError; + len1 = PyUnicode_GET_LENGTH(str_obj); + len2 = PyUnicode_GET_LENGTH(sub_obj); + + ADJUST_INDICES(start, end, len1); + switch(kind) { + case PyUnicode_1BYTE_KIND: + if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sub_obj)) + result = asciilib_count( + ((Py_UCS1*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + else + result = ucs1lib_count( + ((Py_UCS1*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + case PyUnicode_2BYTE_KIND: + result = ucs2lib_count( + ((Py_UCS2*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + case PyUnicode_4BYTE_KIND: + result = ucs4lib_count( + ((Py_UCS4*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + default: + assert(0); result = 0; + } + + Py_DECREF(sub_obj); + Py_DECREF(str_obj); + + if (kind1 != kind) + PyMem_Free(buf1); + if (kind2 != kind) + PyMem_Free(buf2); + + return result; + onError: + Py_DECREF(sub_obj); + Py_DECREF(str_obj); + if (kind1 != kind && buf1) + PyMem_Free(buf1); + if (kind2 != kind && buf2) + PyMem_Free(buf2); + return -1; +} + +Py_ssize_t +PyUnicode_Find(PyObject *str, + PyObject *sub, + Py_ssize_t start, + Py_ssize_t end, + int direction) +{ + Py_ssize_t result; + + str = PyUnicode_FromObject(str); + if (!str || PyUnicode_READY(str) == -1) + return -2; + sub = PyUnicode_FromObject(sub); + if (!sub || PyUnicode_READY(sub) == -1) { + Py_DECREF(str); + return -2; + } + + result = any_find_slice(direction, + str, sub, start, end + ); + + Py_DECREF(str); + Py_DECREF(sub); + + return result; +} + +Py_ssize_t +PyUnicode_FindChar(PyObject *str, Py_UCS4 ch, + Py_ssize_t start, Py_ssize_t end, + int direction) +{ + int kind; + Py_ssize_t result; + if (PyUnicode_READY(str) == -1) + return -2; + if (start < 0 || end < 0) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); + return -2; + } + if (end > PyUnicode_GET_LENGTH(str)) + end = PyUnicode_GET_LENGTH(str); + kind = PyUnicode_KIND(str); + result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start, + kind, end-start, ch, direction); + if (result == -1) + return -1; + else + return start + result; +} + +static int +tailmatch(PyObject *self, + PyObject *substring, + Py_ssize_t start, + Py_ssize_t end, + int direction) +{ + int kind_self; + int kind_sub; + void *data_self; + void *data_sub; + Py_ssize_t offset; + Py_ssize_t i; + Py_ssize_t end_sub; + + if (PyUnicode_READY(self) == -1 || + PyUnicode_READY(substring) == -1) + return 0; + + if (PyUnicode_GET_LENGTH(substring) == 0) + return 1; + + ADJUST_INDICES(start, end, PyUnicode_GET_LENGTH(self)); + end -= PyUnicode_GET_LENGTH(substring); + if (end < start) + return 0; + + kind_self = PyUnicode_KIND(self); + data_self = PyUnicode_DATA(self); + kind_sub = PyUnicode_KIND(substring); + data_sub = PyUnicode_DATA(substring); + end_sub = PyUnicode_GET_LENGTH(substring) - 1; + + if (direction > 0) + offset = end; + else + offset = start; + + if (PyUnicode_READ(kind_self, data_self, offset) == + PyUnicode_READ(kind_sub, data_sub, 0) && + PyUnicode_READ(kind_self, data_self, offset + end_sub) == + PyUnicode_READ(kind_sub, data_sub, end_sub)) { + /* If both are of the same kind, memcmp is sufficient */ + if (kind_self == kind_sub) { + return ! memcmp((char *)data_self + + (offset * PyUnicode_KIND(substring)), + data_sub, + PyUnicode_GET_LENGTH(substring) * + PyUnicode_KIND(substring)); + } + /* otherwise we have to compare each character by first accesing it */ + else { + /* We do not need to compare 0 and len(substring)-1 because + the if statement above ensured already that they are equal + when we end up here. */ + // TODO: honor direction and do a forward or backwards search + for (i = 1; i < end_sub; ++i) { + if (PyUnicode_READ(kind_self, data_self, offset + i) != + PyUnicode_READ(kind_sub, data_sub, i)) + return 0; + } + return 1; + } + } + + return 0; +} + +Py_ssize_t +PyUnicode_Tailmatch(PyObject *str, + PyObject *substr, + Py_ssize_t start, + Py_ssize_t end, + int direction) +{ + Py_ssize_t result; + + str = PyUnicode_FromObject(str); + if (str == NULL) + return -1; + substr = PyUnicode_FromObject(substr); + if (substr == NULL) { + Py_DECREF(str); + return -1; + } + + result = tailmatch(str, substr, + start, end, direction); + Py_DECREF(str); + Py_DECREF(substr); + return result; +} + +/* Apply fixfct filter to the Unicode object self and return a + reference to the modified object */ + +static PyObject * +fixup(PyObject *self, + Py_UCS4 (*fixfct)(PyObject *s)) +{ + PyObject *u; + Py_UCS4 maxchar_old, maxchar_new = 0; + + u = PyUnicode_Copy(self); + if (u == NULL) + return NULL; + maxchar_old = PyUnicode_MAX_CHAR_VALUE(u); + + /* fix functions return the new maximum character in a string, + if the kind of the resulting unicode object does not change, + everything is fine. Otherwise we need to change the string kind + and re-run the fix function. */ + maxchar_new = fixfct(u); + if (maxchar_new == 0) + /* do nothing, keep maxchar_new at 0 which means no changes. */; + else if (maxchar_new <= 127) + maxchar_new = 127; + else if (maxchar_new <= 255) + maxchar_new = 255; + else if (maxchar_new <= 65535) + maxchar_new = 65535; + else + maxchar_new = MAX_UNICODE; + + if (!maxchar_new && PyUnicode_CheckExact(self)) { + /* fixfct should return TRUE if it modified the buffer. If + FALSE, return a reference to the original buffer instead + (to save space, not time) */ + Py_INCREF(self); + Py_DECREF(u); + return self; + } + else if (maxchar_new == maxchar_old) { + return u; + } + else { + /* In case the maximum character changed, we need to + convert the string to the new category. */ + PyObject *v = PyUnicode_New(PyUnicode_GET_LENGTH(self), maxchar_new); + if (v == NULL) { + Py_DECREF(u); + return NULL; + } + if (maxchar_new > maxchar_old) { + /* If the maxchar increased so that the kind changed, not all + characters are representable anymore and we need to fix the + string again. This only happens in very few cases. */ + copy_characters(v, 0, self, 0, PyUnicode_GET_LENGTH(self)); + maxchar_old = fixfct(v); + assert(maxchar_old > 0 && maxchar_old <= maxchar_new); + } + else { + copy_characters(v, 0, u, 0, PyUnicode_GET_LENGTH(self)); + } + + Py_DECREF(u); + assert(_PyUnicode_CheckConsistency(v, 1)); + return v; + } +} + +static Py_UCS4 +fixupper(PyObject *self) +{ + /* No need to call PyUnicode_READY(self) because this function is only + called as a callback from fixup() which does it already. */ + const Py_ssize_t len = PyUnicode_GET_LENGTH(self); + const int kind = PyUnicode_KIND(self); + void *data = PyUnicode_DATA(self); + int touched = 0; + Py_UCS4 maxchar = 0; + Py_ssize_t i; + + for (i = 0; i < len; ++i) { + const Py_UCS4 ch = PyUnicode_READ(kind, data, i); + const Py_UCS4 up = Py_UNICODE_TOUPPER(ch); + if (up != ch) { + if (up > maxchar) + maxchar = up; + PyUnicode_WRITE(kind, data, i, up); + touched = 1; + } + else if (ch > maxchar) + maxchar = ch; + } + + if (touched) + return maxchar; + else + return 0; +} + +static Py_UCS4 +fixlower(PyObject *self) +{ + /* No need to call PyUnicode_READY(self) because fixup() which does it. */ + const Py_ssize_t len = PyUnicode_GET_LENGTH(self); + const int kind = PyUnicode_KIND(self); + void *data = PyUnicode_DATA(self); + int touched = 0; + Py_UCS4 maxchar = 0; + Py_ssize_t i; + + for(i = 0; i < len; ++i) { + const Py_UCS4 ch = PyUnicode_READ(kind, data, i); + const Py_UCS4 lo = Py_UNICODE_TOLOWER(ch); + if (lo != ch) { + if (lo > maxchar) + maxchar = lo; + PyUnicode_WRITE(kind, data, i, lo); + touched = 1; + } + else if (ch > maxchar) + maxchar = ch; + } + + if (touched) + return maxchar; + else + return 0; +} + +static Py_UCS4 +fixswapcase(PyObject *self) +{ + /* No need to call PyUnicode_READY(self) because fixup() which does it. */ + const Py_ssize_t len = PyUnicode_GET_LENGTH(self); + const int kind = PyUnicode_KIND(self); + void *data = PyUnicode_DATA(self); + int touched = 0; + Py_UCS4 maxchar = 0; + Py_ssize_t i; + + for(i = 0; i < len; ++i) { + const Py_UCS4 ch = PyUnicode_READ(kind, data, i); + Py_UCS4 nu = 0; + + if (Py_UNICODE_ISUPPER(ch)) + nu = Py_UNICODE_TOLOWER(ch); + else if (Py_UNICODE_ISLOWER(ch)) + nu = Py_UNICODE_TOUPPER(ch); + + if (nu != 0) { + if (nu > maxchar) + maxchar = nu; + PyUnicode_WRITE(kind, data, i, nu); + touched = 1; + } + else if (ch > maxchar) + maxchar = ch; + } + + if (touched) + return maxchar; + else + return 0; +} + +static Py_UCS4 +fixcapitalize(PyObject *self) +{ + /* No need to call PyUnicode_READY(self) because fixup() which does it. */ + const Py_ssize_t len = PyUnicode_GET_LENGTH(self); + const int kind = PyUnicode_KIND(self); + void *data = PyUnicode_DATA(self); + int touched = 0; + Py_UCS4 maxchar = 0; + Py_ssize_t i = 0; + Py_UCS4 ch; + + if (len == 0) + return 0; + + ch = PyUnicode_READ(kind, data, i); + if (!Py_UNICODE_ISUPPER(ch)) { + maxchar = Py_UNICODE_TOUPPER(ch); + PyUnicode_WRITE(kind, data, i, maxchar); + touched = 1; + } + ++i; + for(; i < len; ++i) { + ch = PyUnicode_READ(kind, data, i); + if (!Py_UNICODE_ISLOWER(ch)) { + const Py_UCS4 lo = Py_UNICODE_TOLOWER(ch); + if (lo > maxchar) + maxchar = lo; + PyUnicode_WRITE(kind, data, i, lo); + touched = 1; + } + else if (ch > maxchar) + maxchar = ch; + } + + if (touched) + return maxchar; + else + return 0; +} + +static Py_UCS4 +fixtitle(PyObject *self) +{ + /* No need to call PyUnicode_READY(self) because fixup() which does it. */ + const Py_ssize_t len = PyUnicode_GET_LENGTH(self); + const int kind = PyUnicode_KIND(self); + void *data = PyUnicode_DATA(self); + Py_UCS4 maxchar = 0; + Py_ssize_t i = 0; + int previous_is_cased; + + /* Shortcut for single character strings */ + if (len == 1) { + const Py_UCS4 ch = PyUnicode_READ(kind, data, i); + const Py_UCS4 ti = Py_UNICODE_TOTITLE(ch); + if (ti != ch) { + PyUnicode_WRITE(kind, data, i, ti); + return ti; + } + else + return 0; + } + previous_is_cased = 0; + for(; i < len; ++i) { + const Py_UCS4 ch = PyUnicode_READ(kind, data, i); + Py_UCS4 nu; + + if (previous_is_cased) + nu = Py_UNICODE_TOLOWER(ch); + else + nu = Py_UNICODE_TOTITLE(ch); + + if (nu > maxchar) + maxchar = nu; + PyUnicode_WRITE(kind, data, i, nu); + + if (Py_UNICODE_ISLOWER(ch) || + Py_UNICODE_ISUPPER(ch) || + Py_UNICODE_ISTITLE(ch)) + previous_is_cased = 1; + else + previous_is_cased = 0; + } + return maxchar; +} + +PyObject * +PyUnicode_Join(PyObject *separator, PyObject *seq) +{ + PyObject *sep = NULL; + Py_ssize_t seplen; + PyObject *res = NULL; /* the result */ + PyObject *fseq; /* PySequence_Fast(seq) */ + Py_ssize_t seqlen; /* len(fseq) -- number of items in sequence */ + PyObject **items; + PyObject *item; + Py_ssize_t sz, i, res_offset; + Py_UCS4 maxchar; + Py_UCS4 item_maxchar; + int use_memcpy; + unsigned char *res_data = NULL, *sep_data = NULL; + PyObject *last_obj; + unsigned int kind = 0; + + fseq = PySequence_Fast(seq, ""); + if (fseq == NULL) { + return NULL; + } + + /* NOTE: the following code can't call back into Python code, + * so we are sure that fseq won't be mutated. + */ + + seqlen = PySequence_Fast_GET_SIZE(fseq); + /* If empty sequence, return u"". */ + if (seqlen == 0) { + Py_DECREF(fseq); + Py_INCREF(unicode_empty); + res = unicode_empty; + return res; + } + + /* If singleton sequence with an exact Unicode, return that. */ + last_obj = NULL; + items = PySequence_Fast_ITEMS(fseq); + if (seqlen == 1) { + if (PyUnicode_CheckExact(items[0])) { + res = items[0]; + Py_INCREF(res); + Py_DECREF(fseq); + return res; + } + seplen = 0; + maxchar = 0; + } + else { + /* Set up sep and seplen */ + if (separator == NULL) { + /* fall back to a blank space separator */ + sep = PyUnicode_FromOrdinal(' '); + if (!sep) + goto onError; + seplen = 1; + maxchar = 32; + } + else { + if (!PyUnicode_Check(separator)) { + PyErr_Format(PyExc_TypeError, + "separator: expected str instance," + " %.80s found", + Py_TYPE(separator)->tp_name); + goto onError; + } + if (PyUnicode_READY(separator)) + goto onError; + sep = separator; + seplen = PyUnicode_GET_LENGTH(separator); + maxchar = PyUnicode_MAX_CHAR_VALUE(separator); + /* inc refcount to keep this code path symmetric with the + above case of a blank separator */ + Py_INCREF(sep); + } + last_obj = sep; + } + + /* There are at least two things to join, or else we have a subclass + * of str in the sequence. + * Do a pre-pass to figure out the total amount of space we'll + * need (sz), and see whether all argument are strings. + */ + sz = 0; +#ifdef Py_DEBUG + use_memcpy = 0; +#else + use_memcpy = 1; +#endif + for (i = 0; i < seqlen; i++) { + const Py_ssize_t old_sz = sz; + item = items[i]; + if (!PyUnicode_Check(item)) { + PyErr_Format(PyExc_TypeError, + "sequence item %zd: expected str instance," + " %.80s found", + i, Py_TYPE(item)->tp_name); + goto onError; + } + if (PyUnicode_READY(item) == -1) + goto onError; + sz += PyUnicode_GET_LENGTH(item); + item_maxchar = PyUnicode_MAX_CHAR_VALUE(item); + maxchar = Py_MAX(maxchar, item_maxchar); + if (i != 0) + sz += seplen; + if (sz < old_sz || sz > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, + "join() result is too long for a Python string"); + goto onError; + } + if (use_memcpy && last_obj != NULL) { + if (PyUnicode_KIND(last_obj) != PyUnicode_KIND(item)) + use_memcpy = 0; + } + last_obj = item; + } + + res = PyUnicode_New(sz, maxchar); + if (res == NULL) + goto onError; + + /* Catenate everything. */ +#ifdef Py_DEBUG + use_memcpy = 0; +#else + if (use_memcpy) { + res_data = PyUnicode_1BYTE_DATA(res); + kind = PyUnicode_KIND(res); + if (seplen != 0) + sep_data = PyUnicode_1BYTE_DATA(sep); + } +#endif + for (i = 0, res_offset = 0; i < seqlen; ++i) { + Py_ssize_t itemlen; + item = items[i]; + /* Copy item, and maybe the separator. */ + if (i && seplen != 0) { + if (use_memcpy) { + Py_MEMCPY(res_data, + sep_data, + kind * seplen); + res_data += kind * seplen; + } + else { + copy_characters(res, res_offset, sep, 0, seplen); + res_offset += seplen; + } + } + itemlen = PyUnicode_GET_LENGTH(item); + if (itemlen != 0) { + if (use_memcpy) { + Py_MEMCPY(res_data, + PyUnicode_DATA(item), + kind * itemlen); + res_data += kind * itemlen; + } + else { + copy_characters(res, res_offset, item, 0, itemlen); + res_offset += itemlen; + } + } + } + if (use_memcpy) + assert(res_data == PyUnicode_1BYTE_DATA(res) + + kind * PyUnicode_GET_LENGTH(res)); + else + assert(res_offset == PyUnicode_GET_LENGTH(res)); + + Py_DECREF(fseq); + Py_XDECREF(sep); + assert(_PyUnicode_CheckConsistency(res, 1)); + return res; + + onError: + Py_DECREF(fseq); + Py_XDECREF(sep); + Py_XDECREF(res); + return NULL; +} + +#define FILL(kind, data, value, start, length) \ + do { \ + Py_ssize_t i_ = 0; \ + assert(kind != PyUnicode_WCHAR_KIND); \ + switch ((kind)) { \ + case PyUnicode_1BYTE_KIND: { \ + unsigned char * to_ = (unsigned char *)((data)) + (start); \ + memset(to_, (unsigned char)value, length); \ + break; \ + } \ + case PyUnicode_2BYTE_KIND: { \ + Py_UCS2 * to_ = (Py_UCS2 *)((data)) + (start); \ + for (; i_ < (length); ++i_, ++to_) *to_ = (value); \ + break; \ + } \ + default: { \ + Py_UCS4 * to_ = (Py_UCS4 *)((data)) + (start); \ + for (; i_ < (length); ++i_, ++to_) *to_ = (value); \ + break; \ + } \ + } \ + } while (0) + +static PyObject * +pad(PyObject *self, + Py_ssize_t left, + Py_ssize_t right, + Py_UCS4 fill) +{ + PyObject *u; + Py_UCS4 maxchar; + int kind; + void *data; + + if (left < 0) + left = 0; + if (right < 0) + right = 0; + + if (left == 0 && right == 0 && PyUnicode_CheckExact(self)) { + Py_INCREF(self); + return self; + } + + if (left > PY_SSIZE_T_MAX - _PyUnicode_LENGTH(self) || + right > PY_SSIZE_T_MAX - (left + _PyUnicode_LENGTH(self))) { + PyErr_SetString(PyExc_OverflowError, "padded string is too long"); + return NULL; + } + maxchar = PyUnicode_MAX_CHAR_VALUE(self); + if (fill > maxchar) + maxchar = fill; + u = PyUnicode_New(left + _PyUnicode_LENGTH(self) + right, maxchar); + if (!u) + return NULL; + + kind = PyUnicode_KIND(u); + data = PyUnicode_DATA(u); + if (left) + FILL(kind, data, fill, 0, left); + if (right) + FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right); + copy_characters(u, left, self, 0, _PyUnicode_LENGTH(self)); + assert(_PyUnicode_CheckConsistency(u, 1)); + return u; +} +#undef FILL + +PyObject * +PyUnicode_Splitlines(PyObject *string, int keepends) +{ + PyObject *list; + + string = PyUnicode_FromObject(string); + if (string == NULL || PyUnicode_READY(string) == -1) + return NULL; + + switch(PyUnicode_KIND(string)) { + case PyUnicode_1BYTE_KIND: + if (PyUnicode_IS_ASCII(string)) + list = asciilib_splitlines( + string, PyUnicode_1BYTE_DATA(string), + PyUnicode_GET_LENGTH(string), keepends); + else + list = ucs1lib_splitlines( + string, PyUnicode_1BYTE_DATA(string), + PyUnicode_GET_LENGTH(string), keepends); + break; + case PyUnicode_2BYTE_KIND: + list = ucs2lib_splitlines( + string, PyUnicode_2BYTE_DATA(string), + PyUnicode_GET_LENGTH(string), keepends); + break; + case PyUnicode_4BYTE_KIND: + list = ucs4lib_splitlines( + string, PyUnicode_4BYTE_DATA(string), + PyUnicode_GET_LENGTH(string), keepends); + break; + default: + assert(0); + list = 0; + } + Py_DECREF(string); + return list; +} + +static PyObject * +split(PyObject *self, + PyObject *substring, + Py_ssize_t maxcount) +{ + int kind1, kind2, kind; + void *buf1, *buf2; + Py_ssize_t len1, len2; + PyObject* out; + + if (maxcount < 0) + maxcount = PY_SSIZE_T_MAX; + + if (PyUnicode_READY(self) == -1) + return NULL; + + if (substring == NULL) + switch(PyUnicode_KIND(self)) { + case PyUnicode_1BYTE_KIND: + if (PyUnicode_IS_ASCII(self)) + return asciilib_split_whitespace( + self, PyUnicode_1BYTE_DATA(self), + PyUnicode_GET_LENGTH(self), maxcount + ); + else + return ucs1lib_split_whitespace( + self, PyUnicode_1BYTE_DATA(self), + PyUnicode_GET_LENGTH(self), maxcount + ); + case PyUnicode_2BYTE_KIND: + return ucs2lib_split_whitespace( + self, PyUnicode_2BYTE_DATA(self), + PyUnicode_GET_LENGTH(self), maxcount + ); + case PyUnicode_4BYTE_KIND: + return ucs4lib_split_whitespace( + self, PyUnicode_4BYTE_DATA(self), + PyUnicode_GET_LENGTH(self), maxcount + ); + default: + assert(0); + return NULL; + } + + if (PyUnicode_READY(substring) == -1) + return NULL; + + kind1 = PyUnicode_KIND(self); + kind2 = PyUnicode_KIND(substring); + kind = kind1 > kind2 ? kind1 : kind2; + buf1 = PyUnicode_DATA(self); + buf2 = PyUnicode_DATA(substring); + if (kind1 != kind) + buf1 = _PyUnicode_AsKind(self, kind); + if (!buf1) + return NULL; + if (kind2 != kind) + buf2 = _PyUnicode_AsKind(substring, kind); + if (!buf2) { + if (kind1 != kind) PyMem_Free(buf1); + return NULL; + } + len1 = PyUnicode_GET_LENGTH(self); + len2 = PyUnicode_GET_LENGTH(substring); + + switch(kind) { + case PyUnicode_1BYTE_KIND: + if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring)) + out = asciilib_split( + self, buf1, len1, buf2, len2, maxcount); + else + out = ucs1lib_split( + self, buf1, len1, buf2, len2, maxcount); + break; + case PyUnicode_2BYTE_KIND: + out = ucs2lib_split( + self, buf1, len1, buf2, len2, maxcount); + break; + case PyUnicode_4BYTE_KIND: + out = ucs4lib_split( + self, buf1, len1, buf2, len2, maxcount); + break; + default: + out = NULL; + } + if (kind1 != kind) + PyMem_Free(buf1); + if (kind2 != kind) + PyMem_Free(buf2); + return out; +} + +static PyObject * +rsplit(PyObject *self, + PyObject *substring, + Py_ssize_t maxcount) +{ + int kind1, kind2, kind; + void *buf1, *buf2; + Py_ssize_t len1, len2; + PyObject* out; + + if (maxcount < 0) + maxcount = PY_SSIZE_T_MAX; + + if (PyUnicode_READY(self) == -1) + return NULL; + + if (substring == NULL) + switch(PyUnicode_KIND(self)) { + case PyUnicode_1BYTE_KIND: + if (PyUnicode_IS_ASCII(self)) + return asciilib_rsplit_whitespace( + self, PyUnicode_1BYTE_DATA(self), + PyUnicode_GET_LENGTH(self), maxcount + ); + else + return ucs1lib_rsplit_whitespace( + self, PyUnicode_1BYTE_DATA(self), + PyUnicode_GET_LENGTH(self), maxcount + ); + case PyUnicode_2BYTE_KIND: + return ucs2lib_rsplit_whitespace( + self, PyUnicode_2BYTE_DATA(self), + PyUnicode_GET_LENGTH(self), maxcount + ); + case PyUnicode_4BYTE_KIND: + return ucs4lib_rsplit_whitespace( + self, PyUnicode_4BYTE_DATA(self), + PyUnicode_GET_LENGTH(self), maxcount + ); + default: + assert(0); + return NULL; + } + + if (PyUnicode_READY(substring) == -1) + return NULL; + + kind1 = PyUnicode_KIND(self); + kind2 = PyUnicode_KIND(substring); + kind = kind1 > kind2 ? kind1 : kind2; + buf1 = PyUnicode_DATA(self); + buf2 = PyUnicode_DATA(substring); + if (kind1 != kind) + buf1 = _PyUnicode_AsKind(self, kind); + if (!buf1) + return NULL; + if (kind2 != kind) + buf2 = _PyUnicode_AsKind(substring, kind); + if (!buf2) { + if (kind1 != kind) PyMem_Free(buf1); + return NULL; + } + len1 = PyUnicode_GET_LENGTH(self); + len2 = PyUnicode_GET_LENGTH(substring); + + switch(kind) { + case PyUnicode_1BYTE_KIND: + if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring)) + out = asciilib_rsplit( + self, buf1, len1, buf2, len2, maxcount); + else + out = ucs1lib_rsplit( + self, buf1, len1, buf2, len2, maxcount); + break; + case PyUnicode_2BYTE_KIND: + out = ucs2lib_rsplit( + self, buf1, len1, buf2, len2, maxcount); + break; + case PyUnicode_4BYTE_KIND: + out = ucs4lib_rsplit( + self, buf1, len1, buf2, len2, maxcount); + break; + default: + out = NULL; + } + if (kind1 != kind) + PyMem_Free(buf1); + if (kind2 != kind) + PyMem_Free(buf2); + return out; +} + +static Py_ssize_t +anylib_find(int kind, PyObject *str1, void *buf1, Py_ssize_t len1, + PyObject *str2, void *buf2, Py_ssize_t len2, Py_ssize_t offset) +{ + switch(kind) { + case PyUnicode_1BYTE_KIND: + if (PyUnicode_IS_ASCII(str1) && PyUnicode_IS_ASCII(str2)) + return asciilib_find(buf1, len1, buf2, len2, offset); + else + return ucs1lib_find(buf1, len1, buf2, len2, offset); + case PyUnicode_2BYTE_KIND: + return ucs2lib_find(buf1, len1, buf2, len2, offset); + case PyUnicode_4BYTE_KIND: + return ucs4lib_find(buf1, len1, buf2, len2, offset); + } + assert(0); + return -1; +} + +static Py_ssize_t +anylib_count(int kind, PyObject *sstr, void* sbuf, Py_ssize_t slen, + PyObject *str1, void *buf1, Py_ssize_t len1, Py_ssize_t maxcount) +{ + switch(kind) { + case PyUnicode_1BYTE_KIND: + if (PyUnicode_IS_ASCII(sstr) && PyUnicode_IS_ASCII(str1)) + return asciilib_count(sbuf, slen, buf1, len1, maxcount); + else + return ucs1lib_count(sbuf, slen, buf1, len1, maxcount); + case PyUnicode_2BYTE_KIND: + return ucs2lib_count(sbuf, slen, buf1, len1, maxcount); + case PyUnicode_4BYTE_KIND: + return ucs4lib_count(sbuf, slen, buf1, len1, maxcount); + } + assert(0); + return 0; +} + +static PyObject * +replace(PyObject *self, PyObject *str1, + PyObject *str2, Py_ssize_t maxcount) +{ + PyObject *u; + char *sbuf = PyUnicode_DATA(self); + char *buf1 = PyUnicode_DATA(str1); + char *buf2 = PyUnicode_DATA(str2); + int srelease = 0, release1 = 0, release2 = 0; + int skind = PyUnicode_KIND(self); + int kind1 = PyUnicode_KIND(str1); + int kind2 = PyUnicode_KIND(str2); + Py_ssize_t slen = PyUnicode_GET_LENGTH(self); + Py_ssize_t len1 = PyUnicode_GET_LENGTH(str1); + Py_ssize_t len2 = PyUnicode_GET_LENGTH(str2); + int mayshrink; + Py_UCS4 maxchar, maxchar_str2; + + if (maxcount < 0) + maxcount = PY_SSIZE_T_MAX; + else if (maxcount == 0 || slen == 0) + goto nothing; + + if (str1 == str2) + goto nothing; + if (skind < kind1) + /* substring too wide to be present */ + goto nothing; + + maxchar = PyUnicode_MAX_CHAR_VALUE(self); + maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2); + /* Replacing str1 with str2 may cause a maxchar reduction in the + result string. */ + mayshrink = (maxchar_str2 < maxchar); + maxchar = Py_MAX(maxchar, maxchar_str2); + + if (len1 == len2) { + Py_ssize_t i; + /* same length */ + if (len1 == 0) + goto nothing; + if (len1 == 1) { + /* replace characters */ + Py_UCS4 u1, u2; + int rkind; + u1 = PyUnicode_READ_CHAR(str1, 0); + if (findchar(sbuf, PyUnicode_KIND(self), + slen, u1, 1) < 0) + goto nothing; + u2 = PyUnicode_READ_CHAR(str2, 0); + u = PyUnicode_New(slen, maxchar); + if (!u) + goto error; + copy_characters(u, 0, self, 0, slen); + rkind = PyUnicode_KIND(u); + for (i = 0; i < PyUnicode_GET_LENGTH(u); i++) + if (PyUnicode_READ(rkind, PyUnicode_DATA(u), i) == u1) { + if (--maxcount < 0) + break; + PyUnicode_WRITE(rkind, PyUnicode_DATA(u), i, u2); + } + } + else { + int rkind = skind; + char *res; + + if (kind1 < rkind) { + /* widen substring */ + buf1 = _PyUnicode_AsKind(str1, rkind); + if (!buf1) goto error; + release1 = 1; + } + i = anylib_find(rkind, self, sbuf, slen, str1, buf1, len1, 0); + if (i < 0) + goto nothing; + if (rkind > kind2) { + /* widen replacement */ + buf2 = _PyUnicode_AsKind(str2, rkind); + if (!buf2) goto error; + release2 = 1; + } + else if (rkind < kind2) { + /* widen self and buf1 */ + rkind = kind2; + if (release1) PyMem_Free(buf1); + sbuf = _PyUnicode_AsKind(self, rkind); + if (!sbuf) goto error; + srelease = 1; + buf1 = _PyUnicode_AsKind(str1, rkind); + if (!buf1) goto error; + release1 = 1; + } + u = PyUnicode_New(slen, maxchar); + if (!u) + goto error; + assert(PyUnicode_KIND(u) == rkind); + res = PyUnicode_DATA(u); + + memcpy(res, sbuf, rkind * slen); + /* change everything in-place, starting with this one */ + memcpy(res + rkind * i, + buf2, + rkind * len2); + i += len1; + + while ( --maxcount > 0) { + i = anylib_find(rkind, self, + sbuf+rkind*i, slen-i, + str1, buf1, len1, i); + if (i == -1) + break; + memcpy(res + rkind * i, + buf2, + rkind * len2); + i += len1; + } + } + } + else { + Py_ssize_t n, i, j, ires; + Py_ssize_t product, new_size; + int rkind = skind; + char *res; + + if (kind1 < rkind) { + /* widen substring */ + buf1 = _PyUnicode_AsKind(str1, rkind); + if (!buf1) goto error; + release1 = 1; + } + n = anylib_count(rkind, self, sbuf, slen, str1, buf1, len1, maxcount); + if (n == 0) + goto nothing; + if (kind2 < rkind) { + /* widen replacement */ + buf2 = _PyUnicode_AsKind(str2, rkind); + if (!buf2) goto error; + release2 = 1; + } + else if (kind2 > rkind) { + /* widen self and buf1 */ + rkind = kind2; + sbuf = _PyUnicode_AsKind(self, rkind); + if (!sbuf) goto error; + srelease = 1; + if (release1) PyMem_Free(buf1); + buf1 = _PyUnicode_AsKind(str1, rkind); + if (!buf1) goto error; + release1 = 1; + } + /* new_size = PyUnicode_GET_LENGTH(self) + n * (PyUnicode_GET_LENGTH(str2) - + PyUnicode_GET_LENGTH(str1))); */ + product = n * (len2-len1); + if ((product / (len2-len1)) != n) { + PyErr_SetString(PyExc_OverflowError, + "replace string is too long"); + goto error; + } + new_size = slen + product; + if (new_size == 0) { + Py_INCREF(unicode_empty); + u = unicode_empty; + goto done; + } + if (new_size < 0 || new_size > (PY_SSIZE_T_MAX >> (rkind-1))) { + PyErr_SetString(PyExc_OverflowError, + "replace string is too long"); + goto error; + } + u = PyUnicode_New(new_size, maxchar); + if (!u) + goto error; + assert(PyUnicode_KIND(u) == rkind); + res = PyUnicode_DATA(u); + ires = i = 0; + if (len1 > 0) { + while (n-- > 0) { + /* look for next match */ + j = anylib_find(rkind, self, + sbuf + rkind * i, slen-i, + str1, buf1, len1, i); + if (j == -1) + break; + else if (j > i) { + /* copy unchanged part [i:j] */ + memcpy(res + rkind * ires, + sbuf + rkind * i, + rkind * (j-i)); + ires += j - i; + } + /* copy substitution string */ + if (len2 > 0) { + memcpy(res + rkind * ires, + buf2, + rkind * len2); + ires += len2; + } + i = j + len1; + } + if (i < slen) + /* copy tail [i:] */ + memcpy(res + rkind * ires, + sbuf + rkind * i, + rkind * (slen-i)); + } + else { + /* interleave */ + while (n > 0) { + memcpy(res + rkind * ires, + buf2, + rkind * len2); + ires += len2; + if (--n <= 0) + break; + memcpy(res + rkind * ires, + sbuf + rkind * i, + rkind); + ires++; + i++; + } + memcpy(res + rkind * ires, + sbuf + rkind * i, + rkind * (slen-i)); + } + } + + if (mayshrink) { + unicode_adjust_maxchar(&u); + if (u == NULL) + goto error; + } + + done: + if (srelease) + PyMem_FREE(sbuf); + if (release1) + PyMem_FREE(buf1); + if (release2) + PyMem_FREE(buf2); + assert(_PyUnicode_CheckConsistency(u, 1)); + return u; + + nothing: + /* nothing to replace; return original string (when possible) */ + if (srelease) + PyMem_FREE(sbuf); + if (release1) + PyMem_FREE(buf1); + if (release2) + PyMem_FREE(buf2); + if (PyUnicode_CheckExact(self)) { + Py_INCREF(self); + return self; + } + return PyUnicode_Copy(self); + error: + if (srelease && sbuf) + PyMem_FREE(sbuf); + if (release1 && buf1) + PyMem_FREE(buf1); + if (release2 && buf2) + PyMem_FREE(buf2); + return NULL; +} + +/* --- Unicode Object Methods --------------------------------------------- */ + +PyDoc_STRVAR(title__doc__, + "S.title() -> str\n\ +\n\ +Return a titlecased version of S, i.e. words start with title case\n\ +characters, all remaining cased characters have lower case."); + +static PyObject* +unicode_title(PyObject *self) +{ + return fixup(self, fixtitle); +} + +PyDoc_STRVAR(capitalize__doc__, + "S.capitalize() -> str\n\ +\n\ +Return a capitalized version of S, i.e. make the first character\n\ +have upper case and the rest lower case."); + +static PyObject* +unicode_capitalize(PyObject *self) +{ + return fixup(self, fixcapitalize); +} + +#if 0 +PyDoc_STRVAR(capwords__doc__, + "S.capwords() -> str\n\ +\n\ +Apply .capitalize() to all words in S and return the result with\n\ +normalized whitespace (all whitespace strings are replaced by ' ')."); + +static PyObject* +unicode_capwords(PyObject *self) +{ + PyObject *list; + PyObject *item; + Py_ssize_t i; + + /* Split into words */ + list = split(self, NULL, -1); + if (!list) + return NULL; + + /* Capitalize each word */ + for (i = 0; i < PyList_GET_SIZE(list); i++) { + item = fixup(PyList_GET_ITEM(list, i), + fixcapitalize); + if (item == NULL) + goto onError; + Py_DECREF(PyList_GET_ITEM(list, i)); + PyList_SET_ITEM(list, i, item); + } + + /* Join the words to form a new string */ + item = PyUnicode_Join(NULL, list); + + onError: + Py_DECREF(list); + return item; +} +#endif + +/* Argument converter. Coerces to a single unicode character */ + +static int +convert_uc(PyObject *obj, void *addr) +{ + Py_UCS4 *fillcharloc = (Py_UCS4 *)addr; + PyObject *uniobj; + + uniobj = PyUnicode_FromObject(obj); + if (uniobj == NULL) { + PyErr_SetString(PyExc_TypeError, + "The fill character cannot be converted to Unicode"); + return 0; + } + if (PyUnicode_GET_LENGTH(uniobj) != 1) { + PyErr_SetString(PyExc_TypeError, + "The fill character must be exactly one character long"); + Py_DECREF(uniobj); + return 0; + } + *fillcharloc = PyUnicode_READ_CHAR(uniobj, 0); + Py_DECREF(uniobj); + return 1; +} + +PyDoc_STRVAR(center__doc__, + "S.center(width[, fillchar]) -> str\n\ +\n\ +Return S centered in a string of length width. Padding is\n\ +done using the specified fill character (default is a space)"); + +static PyObject * +unicode_center(PyObject *self, PyObject *args) +{ + Py_ssize_t marg, left; + Py_ssize_t width; + Py_UCS4 fillchar = ' '; + + if (!PyArg_ParseTuple(args, "n|O&:center", &width, convert_uc, &fillchar)) + return NULL; + + if (PyUnicode_READY(self) == -1) + return NULL; + + if (_PyUnicode_LENGTH(self) >= width && PyUnicode_CheckExact(self)) { + Py_INCREF(self); + return self; + } + + marg = width - _PyUnicode_LENGTH(self); + left = marg / 2 + (marg & width & 1); + + return pad(self, left, marg - left, fillchar); +} + +/* This function assumes that str1 and str2 are readied by the caller. */ + +static int +unicode_compare(PyObject *str1, PyObject *str2) +{ + int kind1, kind2; + void *data1, *data2; + Py_ssize_t len1, len2, i; + + kind1 = PyUnicode_KIND(str1); + kind2 = PyUnicode_KIND(str2); + data1 = PyUnicode_DATA(str1); + data2 = PyUnicode_DATA(str2); + len1 = PyUnicode_GET_LENGTH(str1); + len2 = PyUnicode_GET_LENGTH(str2); + + for (i = 0; i < len1 && i < len2; ++i) { + Py_UCS4 c1, c2; + c1 = PyUnicode_READ(kind1, data1, i); + c2 = PyUnicode_READ(kind2, data2, i); + + if (c1 != c2) + return (c1 < c2) ? -1 : 1; + } + + return (len1 < len2) ? -1 : (len1 != len2); +} + +int +PyUnicode_Compare(PyObject *left, PyObject *right) +{ + if (PyUnicode_Check(left) && PyUnicode_Check(right)) { + if (PyUnicode_READY(left) == -1 || + PyUnicode_READY(right) == -1) + return -1; + return unicode_compare(left, right); + } + PyErr_Format(PyExc_TypeError, + "Can't compare %.100s and %.100s", + left->ob_type->tp_name, + right->ob_type->tp_name); + return -1; +} + +int +PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) +{ + Py_ssize_t i; + int kind; + void *data; + Py_UCS4 chr; + + assert(_PyUnicode_CHECK(uni)); + if (PyUnicode_READY(uni) == -1) + return -1; + kind = PyUnicode_KIND(uni); + data = PyUnicode_DATA(uni); + /* Compare Unicode string and source character set string */ + for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++) + if (chr != str[i]) + return (chr < (unsigned char)(str[i])) ? -1 : 1; + /* This check keeps Python strings that end in '\0' from comparing equal + to C strings identical up to that point. */ + if (PyUnicode_GET_LENGTH(uni) != i || chr) + return 1; /* uni is longer */ + if (str[i]) + return -1; /* str is longer */ + return 0; +} + + +#define TEST_COND(cond) \ + ((cond) ? Py_True : Py_False) + +PyObject * +PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) +{ + int result; + + if (PyUnicode_Check(left) && PyUnicode_Check(right)) { + PyObject *v; + if (PyUnicode_READY(left) == -1 || + PyUnicode_READY(right) == -1) + return NULL; + if (PyUnicode_GET_LENGTH(left) != PyUnicode_GET_LENGTH(right) || + PyUnicode_KIND(left) != PyUnicode_KIND(right)) { + if (op == Py_EQ) { + Py_INCREF(Py_False); + return Py_False; + } + if (op == Py_NE) { + Py_INCREF(Py_True); + return Py_True; + } + } + if (left == right) + result = 0; + else + result = unicode_compare(left, right); + + /* Convert the return value to a Boolean */ + switch (op) { + case Py_EQ: + v = TEST_COND(result == 0); + break; + case Py_NE: + v = TEST_COND(result != 0); + break; + case Py_LE: + v = TEST_COND(result <= 0); + break; + case Py_GE: + v = TEST_COND(result >= 0); + break; + case Py_LT: + v = TEST_COND(result == -1); + break; + case Py_GT: + v = TEST_COND(result == 1); + break; + default: + PyErr_BadArgument(); + return NULL; + } + Py_INCREF(v); + return v; + } + + Py_RETURN_NOTIMPLEMENTED; +} + +int +PyUnicode_Contains(PyObject *container, PyObject *element) +{ + PyObject *str, *sub; + int kind1, kind2, kind; + void *buf1, *buf2; + Py_ssize_t len1, len2; + int result; + + /* Coerce the two arguments */ + sub = PyUnicode_FromObject(element); + if (!sub) { + PyErr_Format(PyExc_TypeError, + "'in ' requires string as left operand, not %s", + element->ob_type->tp_name); + return -1; + } + if (PyUnicode_READY(sub) == -1) + return -1; + + str = PyUnicode_FromObject(container); + if (!str || PyUnicode_READY(str) == -1) { + Py_DECREF(sub); + return -1; + } + + kind1 = PyUnicode_KIND(str); + kind2 = PyUnicode_KIND(sub); + kind = kind1 > kind2 ? kind1 : kind2; + buf1 = PyUnicode_DATA(str); + buf2 = PyUnicode_DATA(sub); + if (kind1 != kind) + buf1 = _PyUnicode_AsKind(str, kind); + if (!buf1) { + Py_DECREF(sub); + return -1; + } + if (kind2 != kind) + buf2 = _PyUnicode_AsKind(sub, kind); + if (!buf2) { + Py_DECREF(sub); + if (kind1 != kind) PyMem_Free(buf1); + return -1; + } + len1 = PyUnicode_GET_LENGTH(str); + len2 = PyUnicode_GET_LENGTH(sub); + + switch(kind) { + case PyUnicode_1BYTE_KIND: + result = ucs1lib_find(buf1, len1, buf2, len2, 0) != -1; + break; + case PyUnicode_2BYTE_KIND: + result = ucs2lib_find(buf1, len1, buf2, len2, 0) != -1; + break; + case PyUnicode_4BYTE_KIND: + result = ucs4lib_find(buf1, len1, buf2, len2, 0) != -1; + break; + default: + result = -1; + assert(0); + } + + Py_DECREF(str); + Py_DECREF(sub); + + if (kind1 != kind) + PyMem_Free(buf1); + if (kind2 != kind) + PyMem_Free(buf2); + + return result; +} + +/* Concat to string or Unicode object giving a new Unicode object. */ + +PyObject * +PyUnicode_Concat(PyObject *left, PyObject *right) +{ + PyObject *u = NULL, *v = NULL, *w; + Py_UCS4 maxchar, maxchar2; + + /* Coerce the two arguments */ + u = PyUnicode_FromObject(left); + if (u == NULL) + goto onError; + v = PyUnicode_FromObject(right); + if (v == NULL) + goto onError; + + /* Shortcuts */ + if (v == unicode_empty) { + Py_DECREF(v); + return u; + } + if (u == unicode_empty) { + Py_DECREF(u); + return v; + } + + maxchar = PyUnicode_MAX_CHAR_VALUE(u); + maxchar2 = PyUnicode_MAX_CHAR_VALUE(v); + maxchar = Py_MAX(maxchar, maxchar2); + + /* Concat the two Unicode strings */ + w = PyUnicode_New( + PyUnicode_GET_LENGTH(u) + PyUnicode_GET_LENGTH(v), + maxchar); + if (w == NULL) + goto onError; + copy_characters(w, 0, u, 0, PyUnicode_GET_LENGTH(u)); + copy_characters(w, PyUnicode_GET_LENGTH(u), v, 0, PyUnicode_GET_LENGTH(v)); + Py_DECREF(u); + Py_DECREF(v); + assert(_PyUnicode_CheckConsistency(w, 1)); + return w; + + onError: + Py_XDECREF(u); + Py_XDECREF(v); + return NULL; +} + +static void +unicode_append_inplace(PyObject **p_left, PyObject *right) +{ + Py_ssize_t left_len, right_len, new_len; + + assert(PyUnicode_IS_READY(*p_left)); + assert(PyUnicode_IS_READY(right)); + + left_len = PyUnicode_GET_LENGTH(*p_left); + right_len = PyUnicode_GET_LENGTH(right); + if (left_len > PY_SSIZE_T_MAX - right_len) { + PyErr_SetString(PyExc_OverflowError, + "strings are too large to concat"); + goto error; + } + new_len = left_len + right_len; + + /* Now we own the last reference to 'left', so we can resize it + * in-place. + */ + if (unicode_resize(p_left, new_len) != 0) { + /* XXX if _PyUnicode_Resize() fails, 'left' has been + * deallocated so it cannot be put back into + * 'variable'. The MemoryError is raised when there + * is no value in 'variable', which might (very + * remotely) be a cause of incompatibilities. + */ + goto error; + } + /* copy 'right' into the newly allocated area of 'left' */ + copy_characters(*p_left, left_len, right, 0, right_len); + _PyUnicode_DIRTY(*p_left); + return; + +error: + Py_DECREF(*p_left); + *p_left = NULL; +} + +void +PyUnicode_Append(PyObject **p_left, PyObject *right) +{ + PyObject *left, *res; + + if (p_left == NULL) { + if (!PyErr_Occurred()) + PyErr_BadInternalCall(); + return; + } + left = *p_left; + if (right == NULL || !PyUnicode_Check(left)) { + if (!PyErr_Occurred()) + PyErr_BadInternalCall(); + goto error; + } + + if (PyUnicode_READY(left)) + goto error; + if (PyUnicode_READY(right)) + goto error; + + if (PyUnicode_CheckExact(left) && left != unicode_empty + && PyUnicode_CheckExact(right) && right != unicode_empty + && unicode_resizable(left) + && (_PyUnicode_KIND(right) <= _PyUnicode_KIND(left) + || _PyUnicode_WSTR(left) != NULL)) + { + /* Don't resize for ascii += latin1. Convert ascii to latin1 requires + to change the structure size, but characters are stored just after + the structure, and so it requires to move all characters which is + not so different than duplicating the string. */ + if (!(PyUnicode_IS_ASCII(left) && !PyUnicode_IS_ASCII(right))) + { + unicode_append_inplace(p_left, right); + assert(p_left == NULL || _PyUnicode_CheckConsistency(*p_left, 1)); + return; + } + } + + res = PyUnicode_Concat(left, right); + if (res == NULL) + goto error; + Py_DECREF(left); + *p_left = res; + return; + +error: + Py_DECREF(*p_left); + *p_left = NULL; +} + +void +PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right) +{ + PyUnicode_Append(pleft, right); + Py_XDECREF(right); +} + +PyDoc_STRVAR(count__doc__, + "S.count(sub[, start[, end]]) -> int\n\ +\n\ +Return the number of non-overlapping occurrences of substring sub in\n\ +string S[start:end]. Optional arguments start and end are\n\ +interpreted as in slice notation."); + +static PyObject * +unicode_count(PyObject *self, PyObject *args) +{ + PyObject *substring; + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + PyObject *result; + int kind1, kind2, kind; + void *buf1, *buf2; + Py_ssize_t len1, len2, iresult; + + if (!stringlib_parse_args_finds_unicode("count", args, &substring, + &start, &end)) + return NULL; + + kind1 = PyUnicode_KIND(self); + kind2 = PyUnicode_KIND(substring); + kind = kind1 > kind2 ? kind1 : kind2; + buf1 = PyUnicode_DATA(self); + buf2 = PyUnicode_DATA(substring); + if (kind1 != kind) + buf1 = _PyUnicode_AsKind(self, kind); + if (!buf1) { + Py_DECREF(substring); + return NULL; + } + if (kind2 != kind) + buf2 = _PyUnicode_AsKind(substring, kind); + if (!buf2) { + Py_DECREF(substring); + if (kind1 != kind) PyMem_Free(buf1); + return NULL; + } + len1 = PyUnicode_GET_LENGTH(self); + len2 = PyUnicode_GET_LENGTH(substring); + + ADJUST_INDICES(start, end, len1); + switch(kind) { + case PyUnicode_1BYTE_KIND: + iresult = ucs1lib_count( + ((Py_UCS1*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + case PyUnicode_2BYTE_KIND: + iresult = ucs2lib_count( + ((Py_UCS2*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + case PyUnicode_4BYTE_KIND: + iresult = ucs4lib_count( + ((Py_UCS4*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + default: + assert(0); iresult = 0; + } + + result = PyLong_FromSsize_t(iresult); + + if (kind1 != kind) + PyMem_Free(buf1); + if (kind2 != kind) + PyMem_Free(buf2); + + Py_DECREF(substring); + + return result; +} + +PyDoc_STRVAR(encode__doc__, + "S.encode(encoding='utf-8', errors='strict') -> bytes\n\ +\n\ +Encode S using the codec registered for encoding. Default encoding\n\ +is 'utf-8'. errors may be given to set a different error\n\ +handling scheme. Default is 'strict' meaning that encoding errors raise\n\ +a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\ +'xmlcharrefreplace' as well as any other name registered with\n\ +codecs.register_error that can handle UnicodeEncodeErrors."); + +static PyObject * +unicode_encode(PyObject *self, PyObject *args, PyObject *kwargs) +{ + static char *kwlist[] = {"encoding", "errors", 0}; + char *encoding = NULL; + char *errors = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode", + kwlist, &encoding, &errors)) + return NULL; + return PyUnicode_AsEncodedString(self, encoding, errors); +} + +PyDoc_STRVAR(expandtabs__doc__, + "S.expandtabs([tabsize]) -> str\n\ +\n\ +Return a copy of S where all tab characters are expanded using spaces.\n\ +If tabsize is not given, a tab size of 8 characters is assumed."); + +static PyObject* +unicode_expandtabs(PyObject *self, PyObject *args) +{ + Py_ssize_t i, j, line_pos, src_len, incr; + Py_UCS4 ch; + PyObject *u; + void *src_data, *dest_data; + int tabsize = 8; + int kind; + int found; + + if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) + return NULL; + + if (PyUnicode_READY(self) == -1) + return NULL; + + /* First pass: determine size of output string */ + src_len = PyUnicode_GET_LENGTH(self); + i = j = line_pos = 0; + kind = PyUnicode_KIND(self); + src_data = PyUnicode_DATA(self); + found = 0; + for (; i < src_len; i++) { + ch = PyUnicode_READ(kind, src_data, i); + if (ch == '\t') { + found = 1; + if (tabsize > 0) { + incr = tabsize - (line_pos % tabsize); /* cannot overflow */ + if (j > PY_SSIZE_T_MAX - incr) + goto overflow; + line_pos += incr; + j += incr; + } + } + else { + if (j > PY_SSIZE_T_MAX - 1) + goto overflow; + line_pos++; + j++; + if (ch == '\n' || ch == '\r') + line_pos = 0; + } + } + if (!found && PyUnicode_CheckExact(self)) { + Py_INCREF(self); + return self; + } + + /* Second pass: create output string and fill it */ + u = PyUnicode_New(j, PyUnicode_MAX_CHAR_VALUE(self)); + if (!u) + return NULL; + dest_data = PyUnicode_DATA(u); + + i = j = line_pos = 0; + + for (; i < src_len; i++) { + ch = PyUnicode_READ(kind, src_data, i); + if (ch == '\t') { + if (tabsize > 0) { + incr = tabsize - (line_pos % tabsize); + line_pos += incr; + while (incr--) { + PyUnicode_WRITE(kind, dest_data, j, ' '); + j++; + } + } + } + else { + line_pos++; + PyUnicode_WRITE(kind, dest_data, j, ch); + j++; + if (ch == '\n' || ch == '\r') + line_pos = 0; + } + } + assert (j == PyUnicode_GET_LENGTH(u)); + return unicode_result(u); + + overflow: + PyErr_SetString(PyExc_OverflowError, "new string is too long"); + return NULL; +} + +PyDoc_STRVAR(find__doc__, + "S.find(sub[, start[, end]]) -> int\n\ +\n\ +Return the lowest index in S where substring sub is found,\n\ +such that sub is contained within S[start:end]. Optional\n\ +arguments start and end are interpreted as in slice notation.\n\ +\n\ +Return -1 on failure."); + +static PyObject * +unicode_find(PyObject *self, PyObject *args) +{ + PyObject *substring; + Py_ssize_t start; + Py_ssize_t end; + Py_ssize_t result; + + if (!stringlib_parse_args_finds_unicode("find", args, &substring, + &start, &end)) + return NULL; + + if (PyUnicode_READY(self) == -1) + return NULL; + if (PyUnicode_READY(substring) == -1) + return NULL; + + result = any_find_slice(1, self, substring, start, end); + + Py_DECREF(substring); + + if (result == -2) + return NULL; + + return PyLong_FromSsize_t(result); +} + +static PyObject * +unicode_getitem(PyObject *self, Py_ssize_t index) +{ + Py_UCS4 ch = PyUnicode_ReadChar(self, index); + if (ch == (Py_UCS4)-1) + return NULL; + return PyUnicode_FromOrdinal(ch); +} + +/* Believe it or not, this produces the same value for ASCII strings + as bytes_hash(). */ +static Py_hash_t +unicode_hash(PyObject *self) +{ + Py_ssize_t len; + Py_uhash_t x; + + if (_PyUnicode_HASH(self) != -1) + return _PyUnicode_HASH(self); + if (PyUnicode_READY(self) == -1) + return -1; + len = PyUnicode_GET_LENGTH(self); + + /* The hash function as a macro, gets expanded three times below. */ +#define HASH(P) \ + x = (Py_uhash_t)*P << 7; \ + while (--len >= 0) \ + x = (1000003*x) ^ (Py_uhash_t)*P++; + + switch (PyUnicode_KIND(self)) { + case PyUnicode_1BYTE_KIND: { + const unsigned char *c = PyUnicode_1BYTE_DATA(self); + HASH(c); + break; + } + case PyUnicode_2BYTE_KIND: { + const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self); + HASH(s); + break; + } + default: { + Py_UCS4 *l; + assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND && + "Impossible switch case in unicode_hash"); + l = PyUnicode_4BYTE_DATA(self); + HASH(l); + break; + } + } + x ^= (Py_uhash_t)PyUnicode_GET_LENGTH(self); + + if (x == -1) + x = -2; + _PyUnicode_HASH(self) = x; + return x; +} +#undef HASH + +PyDoc_STRVAR(index__doc__, + "S.index(sub[, start[, end]]) -> int\n\ +\n\ +Like S.find() but raise ValueError when the substring is not found."); + +static PyObject * +unicode_index(PyObject *self, PyObject *args) +{ + Py_ssize_t result; + PyObject *substring; + Py_ssize_t start; + Py_ssize_t end; + + if (!stringlib_parse_args_finds_unicode("index", args, &substring, + &start, &end)) + return NULL; + + if (PyUnicode_READY(self) == -1) + return NULL; + if (PyUnicode_READY(substring) == -1) + return NULL; + + result = any_find_slice(1, self, substring, start, end); + + Py_DECREF(substring); + + if (result == -2) + return NULL; + + if (result < 0) { + PyErr_SetString(PyExc_ValueError, "substring not found"); + return NULL; + } + + return PyLong_FromSsize_t(result); +} + +PyDoc_STRVAR(islower__doc__, + "S.islower() -> bool\n\ +\n\ +Return True if all cased characters in S are lowercase and there is\n\ +at least one cased character in S, False otherwise."); + +static PyObject* +unicode_islower(PyObject *self) +{ + Py_ssize_t i, length; + int kind; + void *data; + int cased; + + if (PyUnicode_READY(self) == -1) + return NULL; + length = PyUnicode_GET_LENGTH(self); + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + + /* Shortcut for single character strings */ + if (length == 1) + return PyBool_FromLong( + Py_UNICODE_ISLOWER(PyUnicode_READ(kind, data, 0))); + + /* Special case for empty strings */ + if (length == 0) + return PyBool_FromLong(0); + + cased = 0; + for (i = 0; i < length; i++) { + const Py_UCS4 ch = PyUnicode_READ(kind, data, i); + + if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch)) + return PyBool_FromLong(0); + else if (!cased && Py_UNICODE_ISLOWER(ch)) + cased = 1; + } + return PyBool_FromLong(cased); +} + +PyDoc_STRVAR(isupper__doc__, + "S.isupper() -> bool\n\ +\n\ +Return True if all cased characters in S are uppercase and there is\n\ +at least one cased character in S, False otherwise."); + +static PyObject* +unicode_isupper(PyObject *self) +{ + Py_ssize_t i, length; + int kind; + void *data; + int cased; + + if (PyUnicode_READY(self) == -1) + return NULL; + length = PyUnicode_GET_LENGTH(self); + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + + /* Shortcut for single character strings */ + if (length == 1) + return PyBool_FromLong( + Py_UNICODE_ISUPPER(PyUnicode_READ(kind, data, 0)) != 0); + + /* Special case for empty strings */ + if (length == 0) + return PyBool_FromLong(0); + + cased = 0; + for (i = 0; i < length; i++) { + const Py_UCS4 ch = PyUnicode_READ(kind, data, i); + + if (Py_UNICODE_ISLOWER(ch) || Py_UNICODE_ISTITLE(ch)) + return PyBool_FromLong(0); + else if (!cased && Py_UNICODE_ISUPPER(ch)) + cased = 1; + } + return PyBool_FromLong(cased); +} + +PyDoc_STRVAR(istitle__doc__, + "S.istitle() -> bool\n\ +\n\ +Return True if S is a titlecased string and there is at least one\n\ +character in S, i.e. upper- and titlecase characters may only\n\ +follow uncased characters and lowercase characters only cased ones.\n\ +Return False otherwise."); + +static PyObject* +unicode_istitle(PyObject *self) +{ + Py_ssize_t i, length; + int kind; + void *data; + int cased, previous_is_cased; + + if (PyUnicode_READY(self) == -1) + return NULL; + length = PyUnicode_GET_LENGTH(self); + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + + /* Shortcut for single character strings */ + if (length == 1) { + Py_UCS4 ch = PyUnicode_READ(kind, data, 0); + return PyBool_FromLong((Py_UNICODE_ISTITLE(ch) != 0) || + (Py_UNICODE_ISUPPER(ch) != 0)); + } + + /* Special case for empty strings */ + if (length == 0) + return PyBool_FromLong(0); + + cased = 0; + previous_is_cased = 0; + for (i = 0; i < length; i++) { + const Py_UCS4 ch = PyUnicode_READ(kind, data, i); + + if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch)) { + if (previous_is_cased) + return PyBool_FromLong(0); + previous_is_cased = 1; + cased = 1; + } + else if (Py_UNICODE_ISLOWER(ch)) { + if (!previous_is_cased) + return PyBool_FromLong(0); + previous_is_cased = 1; + cased = 1; + } + else + previous_is_cased = 0; + } + return PyBool_FromLong(cased); +} + +PyDoc_STRVAR(isspace__doc__, + "S.isspace() -> bool\n\ +\n\ +Return True if all characters in S are whitespace\n\ +and there is at least one character in S, False otherwise."); + +static PyObject* +unicode_isspace(PyObject *self) +{ + Py_ssize_t i, length; + int kind; + void *data; + + if (PyUnicode_READY(self) == -1) + return NULL; + length = PyUnicode_GET_LENGTH(self); + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + + /* Shortcut for single character strings */ + if (length == 1) + return PyBool_FromLong( + Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, 0))); + + /* Special case for empty strings */ + if (length == 0) + return PyBool_FromLong(0); + + for (i = 0; i < length; i++) { + const Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (!Py_UNICODE_ISSPACE(ch)) + return PyBool_FromLong(0); + } + return PyBool_FromLong(1); +} + +PyDoc_STRVAR(isalpha__doc__, + "S.isalpha() -> bool\n\ +\n\ +Return True if all characters in S are alphabetic\n\ +and there is at least one character in S, False otherwise."); + +static PyObject* +unicode_isalpha(PyObject *self) +{ + Py_ssize_t i, length; + int kind; + void *data; + + if (PyUnicode_READY(self) == -1) + return NULL; + length = PyUnicode_GET_LENGTH(self); + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + + /* Shortcut for single character strings */ + if (length == 1) + return PyBool_FromLong( + Py_UNICODE_ISALPHA(PyUnicode_READ(kind, data, 0))); + + /* Special case for empty strings */ + if (length == 0) + return PyBool_FromLong(0); + + for (i = 0; i < length; i++) { + if (!Py_UNICODE_ISALPHA(PyUnicode_READ(kind, data, i))) + return PyBool_FromLong(0); + } + return PyBool_FromLong(1); +} + +PyDoc_STRVAR(isalnum__doc__, + "S.isalnum() -> bool\n\ +\n\ +Return True if all characters in S are alphanumeric\n\ +and there is at least one character in S, False otherwise."); + +static PyObject* +unicode_isalnum(PyObject *self) +{ + int kind; + void *data; + Py_ssize_t len, i; + + if (PyUnicode_READY(self) == -1) + return NULL; + + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + len = PyUnicode_GET_LENGTH(self); + + /* Shortcut for single character strings */ + if (len == 1) { + const Py_UCS4 ch = PyUnicode_READ(kind, data, 0); + return PyBool_FromLong(Py_UNICODE_ISALNUM(ch)); + } + + /* Special case for empty strings */ + if (len == 0) + return PyBool_FromLong(0); + + for (i = 0; i < len; i++) { + const Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (!Py_UNICODE_ISALNUM(ch)) + return PyBool_FromLong(0); + } + return PyBool_FromLong(1); +} + +PyDoc_STRVAR(isdecimal__doc__, + "S.isdecimal() -> bool\n\ +\n\ +Return True if there are only decimal characters in S,\n\ +False otherwise."); + +static PyObject* +unicode_isdecimal(PyObject *self) +{ + Py_ssize_t i, length; + int kind; + void *data; + + if (PyUnicode_READY(self) == -1) + return NULL; + length = PyUnicode_GET_LENGTH(self); + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + + /* Shortcut for single character strings */ + if (length == 1) + return PyBool_FromLong( + Py_UNICODE_ISDECIMAL(PyUnicode_READ(kind, data, 0))); + + /* Special case for empty strings */ + if (length == 0) + return PyBool_FromLong(0); + + for (i = 0; i < length; i++) { + if (!Py_UNICODE_ISDECIMAL(PyUnicode_READ(kind, data, i))) + return PyBool_FromLong(0); + } + return PyBool_FromLong(1); +} + +PyDoc_STRVAR(isdigit__doc__, + "S.isdigit() -> bool\n\ +\n\ +Return True if all characters in S are digits\n\ +and there is at least one character in S, False otherwise."); + +static PyObject* +unicode_isdigit(PyObject *self) +{ + Py_ssize_t i, length; + int kind; + void *data; + + if (PyUnicode_READY(self) == -1) + return NULL; + length = PyUnicode_GET_LENGTH(self); + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + + /* Shortcut for single character strings */ + if (length == 1) { + const Py_UCS4 ch = PyUnicode_READ(kind, data, 0); + return PyBool_FromLong(Py_UNICODE_ISDIGIT(ch)); + } + + /* Special case for empty strings */ + if (length == 0) + return PyBool_FromLong(0); + + for (i = 0; i < length; i++) { + if (!Py_UNICODE_ISDIGIT(PyUnicode_READ(kind, data, i))) + return PyBool_FromLong(0); + } + return PyBool_FromLong(1); +} + +PyDoc_STRVAR(isnumeric__doc__, + "S.isnumeric() -> bool\n\ +\n\ +Return True if there are only numeric characters in S,\n\ +False otherwise."); + +static PyObject* +unicode_isnumeric(PyObject *self) +{ + Py_ssize_t i, length; + int kind; + void *data; + + if (PyUnicode_READY(self) == -1) + return NULL; + length = PyUnicode_GET_LENGTH(self); + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + + /* Shortcut for single character strings */ + if (length == 1) + return PyBool_FromLong( + Py_UNICODE_ISNUMERIC(PyUnicode_READ(kind, data, 0))); + + /* Special case for empty strings */ + if (length == 0) + return PyBool_FromLong(0); + + for (i = 0; i < length; i++) { + if (!Py_UNICODE_ISNUMERIC(PyUnicode_READ(kind, data, i))) + return PyBool_FromLong(0); + } + return PyBool_FromLong(1); +} + +int +PyUnicode_IsIdentifier(PyObject *self) +{ + int kind; + void *data; + Py_ssize_t i; + Py_UCS4 first; + + if (PyUnicode_READY(self) == -1) { + Py_FatalError("identifier not ready"); + return 0; + } + + /* Special case for empty strings */ + if (PyUnicode_GET_LENGTH(self) == 0) + return 0; + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + + /* PEP 3131 says that the first character must be in + XID_Start and subsequent characters in XID_Continue, + and for the ASCII range, the 2.x rules apply (i.e + start with letters and underscore, continue with + letters, digits, underscore). However, given the current + definition of XID_Start and XID_Continue, it is sufficient + to check just for these, except that _ must be allowed + as starting an identifier. */ + first = PyUnicode_READ(kind, data, 0); + if (!_PyUnicode_IsXidStart(first) && first != 0x5F /* LOW LINE */) + return 0; + + for (i = 1; i < PyUnicode_GET_LENGTH(self); i++) + if (!_PyUnicode_IsXidContinue(PyUnicode_READ(kind, data, i))) + return 0; + return 1; +} + +PyDoc_STRVAR(isidentifier__doc__, + "S.isidentifier() -> bool\n\ +\n\ +Return True if S is a valid identifier according\n\ +to the language definition."); + +static PyObject* +unicode_isidentifier(PyObject *self) +{ + return PyBool_FromLong(PyUnicode_IsIdentifier(self)); +} + +PyDoc_STRVAR(isprintable__doc__, + "S.isprintable() -> bool\n\ +\n\ +Return True if all characters in S are considered\n\ +printable in repr() or S is empty, False otherwise."); + +static PyObject* +unicode_isprintable(PyObject *self) +{ + Py_ssize_t i, length; + int kind; + void *data; + + if (PyUnicode_READY(self) == -1) + return NULL; + length = PyUnicode_GET_LENGTH(self); + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + + /* Shortcut for single character strings */ + if (length == 1) + return PyBool_FromLong( + Py_UNICODE_ISPRINTABLE(PyUnicode_READ(kind, data, 0))); + + for (i = 0; i < length; i++) { + if (!Py_UNICODE_ISPRINTABLE(PyUnicode_READ(kind, data, i))) { + Py_RETURN_FALSE; + } + } + Py_RETURN_TRUE; +} + +PyDoc_STRVAR(join__doc__, + "S.join(iterable) -> str\n\ +\n\ +Return a string which is the concatenation of the strings in the\n\ +iterable. The separator between elements is S."); + +static PyObject* +unicode_join(PyObject *self, PyObject *data) +{ + return PyUnicode_Join(self, data); +} + +static Py_ssize_t +unicode_length(PyObject *self) +{ + if (PyUnicode_READY(self) == -1) + return -1; + return PyUnicode_GET_LENGTH(self); +} + +PyDoc_STRVAR(ljust__doc__, + "S.ljust(width[, fillchar]) -> str\n\ +\n\ +Return S left-justified in a Unicode string of length width. Padding is\n\ +done using the specified fill character (default is a space)."); + +static PyObject * +unicode_ljust(PyObject *self, PyObject *args) +{ + Py_ssize_t width; + Py_UCS4 fillchar = ' '; + + if (PyUnicode_READY(self) == -1) + return NULL; + + if (!PyArg_ParseTuple(args, "n|O&:ljust", &width, convert_uc, &fillchar)) + return NULL; + + if (_PyUnicode_LENGTH(self) >= width && PyUnicode_CheckExact(self)) { + Py_INCREF(self); + return self; + } + + return pad(self, 0, width - _PyUnicode_LENGTH(self), fillchar); +} + +PyDoc_STRVAR(lower__doc__, + "S.lower() -> str\n\ +\n\ +Return a copy of the string S converted to lowercase."); + +static PyObject* +unicode_lower(PyObject *self) +{ + return fixup(self, fixlower); +} + +#define LEFTSTRIP 0 +#define RIGHTSTRIP 1 +#define BOTHSTRIP 2 + +/* Arrays indexed by above */ +static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; + +#define STRIPNAME(i) (stripformat[i]+3) + +/* externally visible for str.strip(unicode) */ +PyObject * +_PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj) +{ + void *data; + int kind; + Py_ssize_t i, j, len; + BLOOM_MASK sepmask; + + if (PyUnicode_READY(self) == -1 || PyUnicode_READY(sepobj) == -1) + return NULL; + + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + len = PyUnicode_GET_LENGTH(self); + sepmask = make_bloom_mask(PyUnicode_KIND(sepobj), + PyUnicode_DATA(sepobj), + PyUnicode_GET_LENGTH(sepobj)); + + i = 0; + if (striptype != RIGHTSTRIP) { + while (i < len && + BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, i), sepobj)) { + i++; + } + } + + j = len; + if (striptype != LEFTSTRIP) { + do { + j--; + } while (j >= i && + BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, j), sepobj)); + j++; + } + + return PyUnicode_Substring(self, i, j); +} + +PyObject* +PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) +{ + unsigned char *data; + int kind; + Py_ssize_t length; + + if (PyUnicode_READY(self) == -1) + return NULL; + + end = Py_MIN(end, PyUnicode_GET_LENGTH(self)); + + if (start == 0 && end == PyUnicode_GET_LENGTH(self)) + { + if (PyUnicode_CheckExact(self)) { + Py_INCREF(self); + return self; + } + else + return PyUnicode_Copy(self); + } + + length = end - start; + if (length == 1) + return unicode_getitem(self, start); + + if (start < 0 || end < 0) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); + return NULL; + } + + if (PyUnicode_IS_ASCII(self)) { + kind = PyUnicode_KIND(self); + data = PyUnicode_1BYTE_DATA(self); + return unicode_fromascii(data + start, length); + } + else { + kind = PyUnicode_KIND(self); + data = PyUnicode_1BYTE_DATA(self); + return PyUnicode_FromKindAndData(kind, + data + kind * start, + length); + } +} + +static PyObject * +do_strip(PyObject *self, int striptype) +{ + int kind; + void *data; + Py_ssize_t len, i, j; + + if (PyUnicode_READY(self) == -1) + return NULL; + + kind = PyUnicode_KIND(self); + data = PyUnicode_DATA(self); + len = PyUnicode_GET_LENGTH(self); + + i = 0; + if (striptype != RIGHTSTRIP) { + while (i < len && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) { + i++; + } + } + + j = len; + if (striptype != LEFTSTRIP) { + do { + j--; + } while (j >= i && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j))); + j++; + } + + return PyUnicode_Substring(self, i, j); +} + + +static PyObject * +do_argstrip(PyObject *self, int striptype, PyObject *args) +{ + PyObject *sep = NULL; + + if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep)) + return NULL; + + if (sep != NULL && sep != Py_None) { + if (PyUnicode_Check(sep)) + return _PyUnicode_XStrip(self, striptype, sep); + else { + PyErr_Format(PyExc_TypeError, + "%s arg must be None or str", + STRIPNAME(striptype)); + return NULL; + } + } + + return do_strip(self, striptype); +} + + +PyDoc_STRVAR(strip__doc__, + "S.strip([chars]) -> str\n\ +\n\ +Return a copy of the string S with leading and trailing\n\ +whitespace removed.\n\ +If chars is given and not None, remove characters in chars instead."); + +static PyObject * +unicode_strip(PyObject *self, PyObject *args) +{ + if (PyTuple_GET_SIZE(args) == 0) + return do_strip(self, BOTHSTRIP); /* Common case */ + else + return do_argstrip(self, BOTHSTRIP, args); +} + + +PyDoc_STRVAR(lstrip__doc__, + "S.lstrip([chars]) -> str\n\ +\n\ +Return a copy of the string S with leading whitespace removed.\n\ +If chars is given and not None, remove characters in chars instead."); + +static PyObject * +unicode_lstrip(PyObject *self, PyObject *args) +{ + if (PyTuple_GET_SIZE(args) == 0) + return do_strip(self, LEFTSTRIP); /* Common case */ + else + return do_argstrip(self, LEFTSTRIP, args); +} + + +PyDoc_STRVAR(rstrip__doc__, + "S.rstrip([chars]) -> str\n\ +\n\ +Return a copy of the string S with trailing whitespace removed.\n\ +If chars is given and not None, remove characters in chars instead."); + +static PyObject * +unicode_rstrip(PyObject *self, PyObject *args) +{ + if (PyTuple_GET_SIZE(args) == 0) + return do_strip(self, RIGHTSTRIP); /* Common case */ + else + return do_argstrip(self, RIGHTSTRIP, args); +} + + +static PyObject* +unicode_repeat(PyObject *str, Py_ssize_t len) +{ + PyObject *u; + Py_ssize_t nchars, n; + + if (len < 1) { + Py_INCREF(unicode_empty); + return unicode_empty; + } + + if (len == 1 && PyUnicode_CheckExact(str)) { + /* no repeat, return original string */ + Py_INCREF(str); + return str; + } + + if (PyUnicode_READY(str) == -1) + return NULL; + + if (PyUnicode_GET_LENGTH(str) > PY_SSIZE_T_MAX / len) { + PyErr_SetString(PyExc_OverflowError, + "repeated string is too long"); + return NULL; + } + nchars = len * PyUnicode_GET_LENGTH(str); + + u = PyUnicode_New(nchars, PyUnicode_MAX_CHAR_VALUE(str)); + if (!u) + return NULL; + assert(PyUnicode_KIND(u) == PyUnicode_KIND(str)); + + if (PyUnicode_GET_LENGTH(str) == 1) { + const int kind = PyUnicode_KIND(str); + const Py_UCS4 fill_char = PyUnicode_READ(kind, PyUnicode_DATA(str), 0); + void *to = PyUnicode_DATA(u); + if (kind == PyUnicode_1BYTE_KIND) + memset(to, (unsigned char)fill_char, len); + else { + for (n = 0; n < len; ++n) + PyUnicode_WRITE(kind, to, n, fill_char); + } + } + else { + /* number of characters copied this far */ + Py_ssize_t done = PyUnicode_GET_LENGTH(str); + const Py_ssize_t char_size = PyUnicode_KIND(str); + char *to = (char *) PyUnicode_DATA(u); + Py_MEMCPY(to, PyUnicode_DATA(str), + PyUnicode_GET_LENGTH(str) * char_size); + while (done < nchars) { + n = (done <= nchars-done) ? done : nchars-done; + Py_MEMCPY(to + (done * char_size), to, n * char_size); + done += n; + } + } + + assert(_PyUnicode_CheckConsistency(u, 1)); + return u; +} + +PyObject * +PyUnicode_Replace(PyObject *obj, + PyObject *subobj, + PyObject *replobj, + Py_ssize_t maxcount) +{ + PyObject *self; + PyObject *str1; + PyObject *str2; + PyObject *result; + + self = PyUnicode_FromObject(obj); + if (self == NULL || PyUnicode_READY(self) == -1) + return NULL; + str1 = PyUnicode_FromObject(subobj); + if (str1 == NULL || PyUnicode_READY(str1) == -1) { + Py_DECREF(self); + return NULL; + } + str2 = PyUnicode_FromObject(replobj); + if (str2 == NULL || PyUnicode_READY(str2)) { + Py_DECREF(self); + Py_DECREF(str1); + return NULL; + } + result = replace(self, str1, str2, maxcount); + Py_DECREF(self); + Py_DECREF(str1); + Py_DECREF(str2); + return result; +} + +PyDoc_STRVAR(replace__doc__, + "S.replace(old, new[, count]) -> str\n\ +\n\ +Return a copy of S with all occurrences of substring\n\ +old replaced by new. If the optional argument count is\n\ +given, only the first count occurrences are replaced."); + +static PyObject* +unicode_replace(PyObject *self, PyObject *args) +{ + PyObject *str1; + PyObject *str2; + Py_ssize_t maxcount = -1; + PyObject *result; + + if (!PyArg_ParseTuple(args, "OO|n:replace", &str1, &str2, &maxcount)) + return NULL; + if (!PyUnicode_READY(self) == -1) + return NULL; + str1 = PyUnicode_FromObject(str1); + if (str1 == NULL || PyUnicode_READY(str1) == -1) + return NULL; + str2 = PyUnicode_FromObject(str2); + if (str2 == NULL || PyUnicode_READY(str2) == -1) { + Py_DECREF(str1); + return NULL; + } + + result = replace(self, str1, str2, maxcount); + + Py_DECREF(str1); + Py_DECREF(str2); + return result; +} + +static PyObject * +unicode_repr(PyObject *unicode) +{ + PyObject *repr; + Py_ssize_t isize; + Py_ssize_t osize, squote, dquote, i, o; + Py_UCS4 max, quote; + int ikind, okind; + void *idata, *odata; + + if (PyUnicode_READY(unicode) == -1) + return NULL; + + isize = PyUnicode_GET_LENGTH(unicode); + idata = PyUnicode_DATA(unicode); + + /* Compute length of output, quote characters, and + maximum character */ + osize = 2; /* quotes */ + max = 127; + squote = dquote = 0; + ikind = PyUnicode_KIND(unicode); + for (i = 0; i < isize; i++) { + Py_UCS4 ch = PyUnicode_READ(ikind, idata, i); + switch (ch) { + case '\'': squote++; osize++; break; + case '"': dquote++; osize++; break; + case '\\': case '\t': case '\r': case '\n': + osize += 2; break; + default: + /* Fast-path ASCII */ + if (ch < ' ' || ch == 0x7f) + osize += 4; /* \xHH */ + else if (ch < 0x7f) + osize++; + else if (Py_UNICODE_ISPRINTABLE(ch)) { + osize++; + max = ch > max ? ch : max; + } + else if (ch < 0x100) + osize += 4; /* \xHH */ + else if (ch < 0x10000) + osize += 6; /* \uHHHH */ + else + osize += 10; /* \uHHHHHHHH */ + } + } + + quote = '\''; + if (squote) { + if (dquote) + /* Both squote and dquote present. Use squote, + and escape them */ + osize += squote; + else + quote = '"'; + } + + repr = PyUnicode_New(osize, max); + if (repr == NULL) + return NULL; + okind = PyUnicode_KIND(repr); + odata = PyUnicode_DATA(repr); + + PyUnicode_WRITE(okind, odata, 0, quote); + PyUnicode_WRITE(okind, odata, osize-1, quote); + + for (i = 0, o = 1; i < isize; i++) { + Py_UCS4 ch = PyUnicode_READ(ikind, idata, i); + + /* Escape quotes and backslashes */ + if ((ch == quote) || (ch == '\\')) { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, ch); + continue; + } + + /* Map special whitespace to '\t', \n', '\r' */ + if (ch == '\t') { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 't'); + } + else if (ch == '\n') { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 'n'); + } + else if (ch == '\r') { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 'r'); + } + + /* Map non-printable US ASCII to '\xhh' */ + else if (ch < ' ' || ch == 0x7F) { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 'x'); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); + } + + /* Copy ASCII characters as-is */ + else if (ch < 0x7F) { + PyUnicode_WRITE(okind, odata, o++, ch); + } + + /* Non-ASCII characters */ + else { + /* Map Unicode whitespace and control characters + (categories Z* and C* except ASCII space) + */ + if (!Py_UNICODE_ISPRINTABLE(ch)) { + /* Map 8-bit characters to '\xhh' */ + if (ch <= 0xff) { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 'x'); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); + } + /* Map 21-bit characters to '\U00xxxxxx' */ + else if (ch >= 0x10000) { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 'U'); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); + } + /* Map 16-bit characters to '\uxxxx' */ + else { + PyUnicode_WRITE(okind, odata, o++, '\\'); + PyUnicode_WRITE(okind, odata, o++, 'u'); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); + PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); + } + } + /* Copy characters as-is */ + else { + PyUnicode_WRITE(okind, odata, o++, ch); + } + } + } + /* Closing quote already added at the beginning */ + assert(_PyUnicode_CheckConsistency(repr, 1)); + return repr; +} + +PyDoc_STRVAR(rfind__doc__, + "S.rfind(sub[, start[, end]]) -> int\n\ +\n\ +Return the highest index in S where substring sub is found,\n\ +such that sub is contained within S[start:end]. Optional\n\ +arguments start and end are interpreted as in slice notation.\n\ +\n\ +Return -1 on failure."); + +static PyObject * +unicode_rfind(PyObject *self, PyObject *args) +{ + PyObject *substring; + Py_ssize_t start; + Py_ssize_t end; + Py_ssize_t result; + + if (!stringlib_parse_args_finds_unicode("rfind", args, &substring, + &start, &end)) + return NULL; + + if (PyUnicode_READY(self) == -1) + return NULL; + if (PyUnicode_READY(substring) == -1) + return NULL; + + result = any_find_slice(-1, self, substring, start, end); + + Py_DECREF(substring); + + if (result == -2) + return NULL; + + return PyLong_FromSsize_t(result); +} + +PyDoc_STRVAR(rindex__doc__, + "S.rindex(sub[, start[, end]]) -> int\n\ +\n\ +Like S.rfind() but raise ValueError when the substring is not found."); + +static PyObject * +unicode_rindex(PyObject *self, PyObject *args) +{ + PyObject *substring; + Py_ssize_t start; + Py_ssize_t end; + Py_ssize_t result; + + if (!stringlib_parse_args_finds_unicode("rindex", args, &substring, + &start, &end)) + return NULL; + + if (PyUnicode_READY(self) == -1) + return NULL; + if (PyUnicode_READY(substring) == -1) + return NULL; + + result = any_find_slice(-1, self, substring, start, end); + + Py_DECREF(substring); + + if (result == -2) + return NULL; + + if (result < 0) { + PyErr_SetString(PyExc_ValueError, "substring not found"); + return NULL; + } + + return PyLong_FromSsize_t(result); +} + +PyDoc_STRVAR(rjust__doc__, + "S.rjust(width[, fillchar]) -> str\n\ +\n\ +Return S right-justified in a string of length width. Padding is\n\ +done using the specified fill character (default is a space)."); + +static PyObject * +unicode_rjust(PyObject *self, PyObject *args) +{ + Py_ssize_t width; + Py_UCS4 fillchar = ' '; + + if (!PyArg_ParseTuple(args, "n|O&:rjust", &width, convert_uc, &fillchar)) + return NULL; + + if (PyUnicode_READY(self) == -1) + return NULL; + + if (_PyUnicode_LENGTH(self) >= width && PyUnicode_CheckExact(self)) { + Py_INCREF(self); + return self; + } + + return pad(self, width - _PyUnicode_LENGTH(self), 0, fillchar); +} + +PyObject * +PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) +{ + PyObject *result; + + s = PyUnicode_FromObject(s); + if (s == NULL) + return NULL; + if (sep != NULL) { + sep = PyUnicode_FromObject(sep); + if (sep == NULL) { + Py_DECREF(s); + return NULL; + } + } + + result = split(s, sep, maxsplit); + + Py_DECREF(s); + Py_XDECREF(sep); + return result; +} + +PyDoc_STRVAR(split__doc__, + "S.split([sep[, maxsplit]]) -> list of strings\n\ +\n\ +Return a list of the words in S, using sep as the\n\ +delimiter string. If maxsplit is given, at most maxsplit\n\ +splits are done. If sep is not specified or is None, any\n\ +whitespace string is a separator and empty strings are\n\ +removed from the result."); + +static PyObject* +unicode_split(PyObject *self, PyObject *args) +{ + PyObject *substring = Py_None; + Py_ssize_t maxcount = -1; + + if (!PyArg_ParseTuple(args, "|On:split", &substring, &maxcount)) + return NULL; + + if (substring == Py_None) + return split(self, NULL, maxcount); + else if (PyUnicode_Check(substring)) + return split(self, substring, maxcount); + else + return PyUnicode_Split(self, substring, maxcount); +} + +PyObject * +PyUnicode_Partition(PyObject *str_in, PyObject *sep_in) +{ + PyObject* str_obj; + PyObject* sep_obj; + PyObject* out; + int kind1, kind2, kind; + void *buf1 = NULL, *buf2 = NULL; + Py_ssize_t len1, len2; + + str_obj = PyUnicode_FromObject(str_in); + if (!str_obj || PyUnicode_READY(str_obj) == -1) + return NULL; + sep_obj = PyUnicode_FromObject(sep_in); + if (!sep_obj || PyUnicode_READY(sep_obj) == -1) { + Py_DECREF(str_obj); + return NULL; + } + + kind1 = PyUnicode_KIND(str_obj); + kind2 = PyUnicode_KIND(sep_obj); + kind = Py_MAX(kind1, kind2); + buf1 = PyUnicode_DATA(str_obj); + if (kind1 != kind) + buf1 = _PyUnicode_AsKind(str_obj, kind); + if (!buf1) + goto onError; + buf2 = PyUnicode_DATA(sep_obj); + if (kind2 != kind) + buf2 = _PyUnicode_AsKind(sep_obj, kind); + if (!buf2) + goto onError; + len1 = PyUnicode_GET_LENGTH(str_obj); + len2 = PyUnicode_GET_LENGTH(sep_obj); + + switch(PyUnicode_KIND(str_obj)) { + case PyUnicode_1BYTE_KIND: + if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj)) + out = asciilib_partition(str_obj, buf1, len1, sep_obj, buf2, len2); + else + out = ucs1lib_partition(str_obj, buf1, len1, sep_obj, buf2, len2); + break; + case PyUnicode_2BYTE_KIND: + out = ucs2lib_partition(str_obj, buf1, len1, sep_obj, buf2, len2); + break; + case PyUnicode_4BYTE_KIND: + out = ucs4lib_partition(str_obj, buf1, len1, sep_obj, buf2, len2); + break; + default: + assert(0); + out = 0; + } + + Py_DECREF(sep_obj); + Py_DECREF(str_obj); + if (kind1 != kind) + PyMem_Free(buf1); + if (kind2 != kind) + PyMem_Free(buf2); + + return out; + onError: + Py_DECREF(sep_obj); + Py_DECREF(str_obj); + if (kind1 != kind && buf1) + PyMem_Free(buf1); + if (kind2 != kind && buf2) + PyMem_Free(buf2); + return NULL; +} + + +PyObject * +PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in) +{ + PyObject* str_obj; + PyObject* sep_obj; + PyObject* out; + int kind1, kind2, kind; + void *buf1 = NULL, *buf2 = NULL; + Py_ssize_t len1, len2; + + str_obj = PyUnicode_FromObject(str_in); + if (!str_obj) + return NULL; + sep_obj = PyUnicode_FromObject(sep_in); + if (!sep_obj) { + Py_DECREF(str_obj); + return NULL; + } + + kind1 = PyUnicode_KIND(str_in); + kind2 = PyUnicode_KIND(sep_obj); + kind = Py_MAX(kind1, kind2); + buf1 = PyUnicode_DATA(str_in); + if (kind1 != kind) + buf1 = _PyUnicode_AsKind(str_in, kind); + if (!buf1) + goto onError; + buf2 = PyUnicode_DATA(sep_obj); + if (kind2 != kind) + buf2 = _PyUnicode_AsKind(sep_obj, kind); + if (!buf2) + goto onError; + len1 = PyUnicode_GET_LENGTH(str_obj); + len2 = PyUnicode_GET_LENGTH(sep_obj); + + switch(PyUnicode_KIND(str_in)) { + case PyUnicode_1BYTE_KIND: + if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj)) + out = asciilib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2); + else + out = ucs1lib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2); + break; + case PyUnicode_2BYTE_KIND: + out = ucs2lib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2); + break; + case PyUnicode_4BYTE_KIND: + out = ucs4lib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2); + break; + default: + assert(0); + out = 0; + } + + Py_DECREF(sep_obj); + Py_DECREF(str_obj); + if (kind1 != kind) + PyMem_Free(buf1); + if (kind2 != kind) + PyMem_Free(buf2); + + return out; + onError: + Py_DECREF(sep_obj); + Py_DECREF(str_obj); + if (kind1 != kind && buf1) + PyMem_Free(buf1); + if (kind2 != kind && buf2) + PyMem_Free(buf2); + return NULL; +} + +PyDoc_STRVAR(partition__doc__, + "S.partition(sep) -> (head, sep, tail)\n\ +\n\ +Search for the separator sep in S, and return the part before it,\n\ +the separator itself, and the part after it. If the separator is not\n\ +found, return S and two empty strings."); + +static PyObject* +unicode_partition(PyObject *self, PyObject *separator) +{ + return PyUnicode_Partition(self, separator); +} + +PyDoc_STRVAR(rpartition__doc__, + "S.rpartition(sep) -> (head, sep, tail)\n\ +\n\ +Search for the separator sep in S, starting at the end of S, and return\n\ +the part before it, the separator itself, and the part after it. If the\n\ +separator is not found, return two empty strings and S."); + +static PyObject* +unicode_rpartition(PyObject *self, PyObject *separator) +{ + return PyUnicode_RPartition(self, separator); +} + +PyObject * +PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) +{ + PyObject *result; + + s = PyUnicode_FromObject(s); + if (s == NULL) + return NULL; + if (sep != NULL) { + sep = PyUnicode_FromObject(sep); + if (sep == NULL) { + Py_DECREF(s); + return NULL; + } + } + + result = rsplit(s, sep, maxsplit); + + Py_DECREF(s); + Py_XDECREF(sep); + return result; +} + +PyDoc_STRVAR(rsplit__doc__, + "S.rsplit([sep[, maxsplit]]) -> list of strings\n\ +\n\ +Return a list of the words in S, using sep as the\n\ +delimiter string, starting at the end of the string and\n\ +working to the front. If maxsplit is given, at most maxsplit\n\ +splits are done. If sep is not specified, any whitespace string\n\ +is a separator."); + +static PyObject* +unicode_rsplit(PyObject *self, PyObject *args) +{ + PyObject *substring = Py_None; + Py_ssize_t maxcount = -1; + + if (!PyArg_ParseTuple(args, "|On:rsplit", &substring, &maxcount)) + return NULL; + + if (substring == Py_None) + return rsplit(self, NULL, maxcount); + else if (PyUnicode_Check(substring)) + return rsplit(self, substring, maxcount); + else + return PyUnicode_RSplit(self, substring, maxcount); +} + +PyDoc_STRVAR(splitlines__doc__, + "S.splitlines([keepends]) -> list of strings\n\ +\n\ +Return a list of the lines in S, breaking at line boundaries.\n\ +Line breaks are not included in the resulting list unless keepends\n\ +is given and true."); + +static PyObject* +unicode_splitlines(PyObject *self, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"keepends", 0}; + int keepends = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines", + kwlist, &keepends)) + return NULL; + + return PyUnicode_Splitlines(self, keepends); +} + +static +PyObject *unicode_str(PyObject *self) +{ + if (PyUnicode_CheckExact(self)) { + Py_INCREF(self); + return self; + } else + /* Subtype -- return genuine unicode string with the same value. */ + return PyUnicode_Copy(self); +} + +PyDoc_STRVAR(swapcase__doc__, + "S.swapcase() -> str\n\ +\n\ +Return a copy of S with uppercase characters converted to lowercase\n\ +and vice versa."); + +static PyObject* +unicode_swapcase(PyObject *self) +{ + return fixup(self, fixswapcase); +} + +PyDoc_STRVAR(maketrans__doc__, + "str.maketrans(x[, y[, z]]) -> dict (static method)\n\ +\n\ +Return a translation table usable for str.translate().\n\ +If there is only one argument, it must be a dictionary mapping Unicode\n\ +ordinals (integers) or characters to Unicode ordinals, strings or None.\n\ +Character keys will be then converted to ordinals.\n\ +If there are two arguments, they must be strings of equal length, and\n\ +in the resulting dictionary, each character in x will be mapped to the\n\ +character at the same position in y. If there is a third argument, it\n\ +must be a string, whose characters will be mapped to None in the result."); + +static PyObject* +unicode_maketrans(PyObject *null, PyObject *args) +{ + PyObject *x, *y = NULL, *z = NULL; + PyObject *new = NULL, *key, *value; + Py_ssize_t i = 0; + int res; + + if (!PyArg_ParseTuple(args, "O|UU:maketrans", &x, &y, &z)) + return NULL; + new = PyDict_New(); + if (!new) + return NULL; + if (y != NULL) { + int x_kind, y_kind, z_kind; + void *x_data, *y_data, *z_data; + + /* x must be a string too, of equal length */ + if (!PyUnicode_Check(x)) { + PyErr_SetString(PyExc_TypeError, "first maketrans argument must " + "be a string if there is a second argument"); + goto err; + } + if (PyUnicode_GET_LENGTH(x) != PyUnicode_GET_LENGTH(y)) { + PyErr_SetString(PyExc_ValueError, "the first two maketrans " + "arguments must have equal length"); + goto err; + } + /* create entries for translating chars in x to those in y */ + x_kind = PyUnicode_KIND(x); + y_kind = PyUnicode_KIND(y); + x_data = PyUnicode_DATA(x); + y_data = PyUnicode_DATA(y); + for (i = 0; i < PyUnicode_GET_LENGTH(x); i++) { + key = PyLong_FromLong(PyUnicode_READ(x_kind, x_data, i)); + value = PyLong_FromLong(PyUnicode_READ(y_kind, y_data, i)); + if (!key || !value) + goto err; + res = PyDict_SetItem(new, key, value); + Py_DECREF(key); + Py_DECREF(value); + if (res < 0) + goto err; + } + /* create entries for deleting chars in z */ + if (z != NULL) { + z_kind = PyUnicode_KIND(z); + z_data = PyUnicode_DATA(z); + for (i = 0; i < PyUnicode_GET_LENGTH(z); i++) { + key = PyLong_FromLong(PyUnicode_READ(z_kind, z_data, i)); + if (!key) + goto err; + res = PyDict_SetItem(new, key, Py_None); + Py_DECREF(key); + if (res < 0) + goto err; + } + } + } else { + int kind; + void *data; + + /* x must be a dict */ + if (!PyDict_CheckExact(x)) { + PyErr_SetString(PyExc_TypeError, "if you give only one argument " + "to maketrans it must be a dict"); + goto err; + } + /* copy entries into the new dict, converting string keys to int keys */ + while (PyDict_Next(x, &i, &key, &value)) { + if (PyUnicode_Check(key)) { + /* convert string keys to integer keys */ + PyObject *newkey; + if (PyUnicode_GET_LENGTH(key) != 1) { + PyErr_SetString(PyExc_ValueError, "string keys in translate " + "table must be of length 1"); + goto err; + } + kind = PyUnicode_KIND(key); + data = PyUnicode_DATA(key); + newkey = PyLong_FromLong(PyUnicode_READ(kind, data, 0)); + if (!newkey) + goto err; + res = PyDict_SetItem(new, newkey, value); + Py_DECREF(newkey); + if (res < 0) + goto err; + } else if (PyLong_Check(key)) { + /* just keep integer keys */ + if (PyDict_SetItem(new, key, value) < 0) + goto err; + } else { + PyErr_SetString(PyExc_TypeError, "keys in translate table must " + "be strings or integers"); + goto err; + } + } + } + return new; + err: + Py_DECREF(new); + return NULL; +} + +PyDoc_STRVAR(translate__doc__, + "S.translate(table) -> str\n\ +\n\ +Return a copy of the string S, where all characters have been mapped\n\ +through the given translation table, which must be a mapping of\n\ +Unicode ordinals to Unicode ordinals, strings, or None.\n\ +Unmapped characters are left untouched. Characters mapped to None\n\ +are deleted."); + +static PyObject* +unicode_translate(PyObject *self, PyObject *table) +{ + return _PyUnicode_TranslateCharmap(self, table, "ignore"); +} + +PyDoc_STRVAR(upper__doc__, + "S.upper() -> str\n\ +\n\ +Return a copy of S converted to uppercase."); + +static PyObject* +unicode_upper(PyObject *self) +{ + return fixup(self, fixupper); +} + +PyDoc_STRVAR(zfill__doc__, + "S.zfill(width) -> str\n\ +\n\ +Pad a numeric string S with zeros on the left, to fill a field\n\ +of the specified width. The string S is never truncated."); + +static PyObject * +unicode_zfill(PyObject *self, PyObject *args) +{ + Py_ssize_t fill; + PyObject *u; + Py_ssize_t width; + int kind; + void *data; + Py_UCS4 chr; + + if (PyUnicode_READY(self) == -1) + return NULL; + + if (!PyArg_ParseTuple(args, "n:zfill", &width)) + return NULL; + + if (PyUnicode_GET_LENGTH(self) >= width) { + if (PyUnicode_CheckExact(self)) { + Py_INCREF(self); + return self; + } + else + return PyUnicode_Copy(self); + } + + fill = width - _PyUnicode_LENGTH(self); + + u = pad(self, fill, 0, '0'); + + if (u == NULL) + return NULL; + + kind = PyUnicode_KIND(u); + data = PyUnicode_DATA(u); + chr = PyUnicode_READ(kind, data, fill); + + if (chr == '+' || chr == '-') { + /* move sign to beginning of string */ + PyUnicode_WRITE(kind, data, 0, chr); + PyUnicode_WRITE(kind, data, fill, '0'); + } + + assert(_PyUnicode_CheckConsistency(u, 1)); + return u; +} + +#if 0 +static PyObject * +unicode__decimal2ascii(PyObject *self) +{ + return PyUnicode_TransformDecimalAndSpaceToASCII(self); +} +#endif + +PyDoc_STRVAR(startswith__doc__, + "S.startswith(prefix[, start[, end]]) -> bool\n\ +\n\ +Return True if S starts with the specified prefix, False otherwise.\n\ +With optional start, test S beginning at that position.\n\ +With optional end, stop comparing S at that position.\n\ +prefix can also be a tuple of strings to try."); + +static PyObject * +unicode_startswith(PyObject *self, + PyObject *args) +{ + PyObject *subobj; + PyObject *substring; + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + int result; + + if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end)) + return NULL; + if (PyTuple_Check(subobj)) { + Py_ssize_t i; + for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { + substring = PyUnicode_FromObject(PyTuple_GET_ITEM(subobj, i)); + if (substring == NULL) + return NULL; + result = tailmatch(self, substring, start, end, -1); + Py_DECREF(substring); + if (result) { + Py_RETURN_TRUE; + } + } + /* nothing matched */ + Py_RETURN_FALSE; + } + substring = PyUnicode_FromObject(subobj); + if (substring == NULL) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) + PyErr_Format(PyExc_TypeError, "startswith first arg must be str or " + "a tuple of str, not %s", Py_TYPE(subobj)->tp_name); + return NULL; + } + result = tailmatch(self, substring, start, end, -1); + Py_DECREF(substring); + return PyBool_FromLong(result); +} + + +PyDoc_STRVAR(endswith__doc__, + "S.endswith(suffix[, start[, end]]) -> bool\n\ +\n\ +Return True if S ends with the specified suffix, False otherwise.\n\ +With optional start, test S beginning at that position.\n\ +With optional end, stop comparing S at that position.\n\ +suffix can also be a tuple of strings to try."); + +static PyObject * +unicode_endswith(PyObject *self, + PyObject *args) +{ + PyObject *subobj; + PyObject *substring; + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + int result; + + if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end)) + return NULL; + if (PyTuple_Check(subobj)) { + Py_ssize_t i; + for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { + substring = PyUnicode_FromObject( + PyTuple_GET_ITEM(subobj, i)); + if (substring == NULL) + return NULL; + result = tailmatch(self, substring, start, end, +1); + Py_DECREF(substring); + if (result) { + Py_RETURN_TRUE; + } + } + Py_RETURN_FALSE; + } + substring = PyUnicode_FromObject(subobj); + if (substring == NULL) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) + PyErr_Format(PyExc_TypeError, "endswith first arg must be str or " + "a tuple of str, not %s", Py_TYPE(subobj)->tp_name); + return NULL; + } + result = tailmatch(self, substring, start, end, +1); + Py_DECREF(substring); + return PyBool_FromLong(result); +} + +#include "stringlib/unicode_format.h" + +PyDoc_STRVAR(format__doc__, + "S.format(*args, **kwargs) -> str\n\ +\n\ +Return a formatted version of S, using substitutions from args and kwargs.\n\ +The substitutions are identified by braces ('{' and '}')."); + +PyDoc_STRVAR(format_map__doc__, + "S.format_map(mapping) -> str\n\ +\n\ +Return a formatted version of S, using substitutions from mapping.\n\ +The substitutions are identified by braces ('{' and '}')."); + +static PyObject * +unicode__format__(PyObject* self, PyObject* args) +{ + PyObject *format_spec, *out; + + if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) + return NULL; + + out = _PyUnicode_FormatAdvanced(self, format_spec, 0, + PyUnicode_GET_LENGTH(format_spec)); + return out; +} + +PyDoc_STRVAR(p_format__doc__, + "S.__format__(format_spec) -> str\n\ +\n\ +Return a formatted version of S as described by format_spec."); + +static PyObject * +unicode__sizeof__(PyObject *v) +{ + Py_ssize_t size; + + /* If it's a compact object, account for base structure + + character data. */ + if (PyUnicode_IS_COMPACT_ASCII(v)) + size = sizeof(PyASCIIObject) + PyUnicode_GET_LENGTH(v) + 1; + else if (PyUnicode_IS_COMPACT(v)) + size = sizeof(PyCompactUnicodeObject) + + (PyUnicode_GET_LENGTH(v) + 1) * PyUnicode_KIND(v); + else { + /* If it is a two-block object, account for base object, and + for character block if present. */ + size = sizeof(PyUnicodeObject); + if (_PyUnicode_DATA_ANY(v)) + size += (PyUnicode_GET_LENGTH(v) + 1) * + PyUnicode_KIND(v); + } + /* If the wstr pointer is present, account for it unless it is shared + with the data pointer. Check if the data is not shared. */ + if (_PyUnicode_HAS_WSTR_MEMORY(v)) + size += (PyUnicode_WSTR_LENGTH(v) + 1) * sizeof(wchar_t); + if (_PyUnicode_HAS_UTF8_MEMORY(v)) + size += PyUnicode_UTF8_LENGTH(v) + 1; + + return PyLong_FromSsize_t(size); +} + +PyDoc_STRVAR(sizeof__doc__, + "S.__sizeof__() -> size of S in memory, in bytes"); + +static PyObject * +unicode_getnewargs(PyObject *v) +{ + PyObject *copy = PyUnicode_Copy(v); + if (!copy) + return NULL; + return Py_BuildValue("(N)", copy); +} + +static PyMethodDef unicode_methods[] = { + + /* Order is according to common usage: often used methods should + appear first, since lookup is done sequentially. */ + + {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__}, + {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__}, + {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__}, + {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__}, + {"join", (PyCFunction) unicode_join, METH_O, join__doc__}, + {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__}, + {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__}, + {"center", (PyCFunction) unicode_center, METH_VARARGS, center__doc__}, + {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__}, + {"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS, expandtabs__doc__}, + {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__}, + {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__}, + {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__}, + {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__}, + {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__}, + {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__}, + {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__}, + {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__}, + {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__}, + {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__}, + {"rpartition", (PyCFunction) unicode_rpartition, METH_O, rpartition__doc__}, + {"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS | METH_KEYWORDS, splitlines__doc__}, + {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__}, + {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__}, + {"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__}, + {"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__}, + {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__}, + {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__}, + {"islower", (PyCFunction) unicode_islower, METH_NOARGS, islower__doc__}, + {"isupper", (PyCFunction) unicode_isupper, METH_NOARGS, isupper__doc__}, + {"istitle", (PyCFunction) unicode_istitle, METH_NOARGS, istitle__doc__}, + {"isspace", (PyCFunction) unicode_isspace, METH_NOARGS, isspace__doc__}, + {"isdecimal", (PyCFunction) unicode_isdecimal, METH_NOARGS, isdecimal__doc__}, + {"isdigit", (PyCFunction) unicode_isdigit, METH_NOARGS, isdigit__doc__}, + {"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__}, + {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__}, + {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__}, + {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__}, + {"isprintable", (PyCFunction) unicode_isprintable, METH_NOARGS, isprintable__doc__}, + {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__}, + {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, + {"format_map", (PyCFunction) do_string_format_map, METH_O, format_map__doc__}, + {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__}, + {"maketrans", (PyCFunction) unicode_maketrans, + METH_VARARGS | METH_STATIC, maketrans__doc__}, + {"__sizeof__", (PyCFunction) unicode__sizeof__, METH_NOARGS, sizeof__doc__}, +#if 0 + {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__}, +#endif + +#if 0 + /* These methods are just used for debugging the implementation. */ + {"_decimal2ascii", (PyCFunction) unicode__decimal2ascii, METH_NOARGS}, +#endif + + {"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS}, + {NULL, NULL} +}; + +static PyObject * +unicode_mod(PyObject *v, PyObject *w) +{ + if (!PyUnicode_Check(v)) + Py_RETURN_NOTIMPLEMENTED; + return PyUnicode_Format(v, w); +} + +static PyNumberMethods unicode_as_number = { + 0, /*nb_add*/ + 0, /*nb_subtract*/ + 0, /*nb_multiply*/ + unicode_mod, /*nb_remainder*/ +}; + +static PySequenceMethods unicode_as_sequence = { + (lenfunc) unicode_length, /* sq_length */ + PyUnicode_Concat, /* sq_concat */ + (ssizeargfunc) unicode_repeat, /* sq_repeat */ + (ssizeargfunc) unicode_getitem, /* sq_item */ + 0, /* sq_slice */ + 0, /* sq_ass_item */ + 0, /* sq_ass_slice */ + PyUnicode_Contains, /* sq_contains */ +}; + +static PyObject* +unicode_subscript(PyObject* self, PyObject* item) +{ + if (PyUnicode_READY(self) == -1) + return NULL; + + if (PyIndex_Check(item)) { + Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); + if (i == -1 && PyErr_Occurred()) + return NULL; + if (i < 0) + i += PyUnicode_GET_LENGTH(self); + return unicode_getitem(self, i); + } else if (PySlice_Check(item)) { + Py_ssize_t start, stop, step, slicelength, cur, i; + PyObject *result; + void *src_data, *dest_data; + int src_kind, dest_kind; + Py_UCS4 ch, max_char, kind_limit; + + if (PySlice_GetIndicesEx(item, PyUnicode_GET_LENGTH(self), + &start, &stop, &step, &slicelength) < 0) { + return NULL; + } + + if (slicelength <= 0) { + return PyUnicode_New(0, 0); + } else if (start == 0 && step == 1 && + slicelength == PyUnicode_GET_LENGTH(self) && + PyUnicode_CheckExact(self)) { + Py_INCREF(self); + return self; + } else if (step == 1) { + return PyUnicode_Substring(self, + start, start + slicelength); + } + /* General case */ + src_kind = PyUnicode_KIND(self); + src_data = PyUnicode_DATA(self); + if (!PyUnicode_IS_ASCII(self)) { + kind_limit = kind_maxchar_limit(src_kind); + max_char = 0; + for (cur = start, i = 0; i < slicelength; cur += step, i++) { + ch = PyUnicode_READ(src_kind, src_data, cur); + if (ch > max_char) { + max_char = ch; + if (max_char >= kind_limit) + break; + } + } + } + else + max_char = 127; + result = PyUnicode_New(slicelength, max_char); + if (result == NULL) + return NULL; + dest_kind = PyUnicode_KIND(result); + dest_data = PyUnicode_DATA(result); + + for (cur = start, i = 0; i < slicelength; cur += step, i++) { + Py_UCS4 ch = PyUnicode_READ(src_kind, src_data, cur); + PyUnicode_WRITE(dest_kind, dest_data, i, ch); + } + assert(_PyUnicode_CheckConsistency(result, 1)); + return result; + } else { + PyErr_SetString(PyExc_TypeError, "string indices must be integers"); + return NULL; + } +} + +static PyMappingMethods unicode_as_mapping = { + (lenfunc)unicode_length, /* mp_length */ + (binaryfunc)unicode_subscript, /* mp_subscript */ + (objobjargproc)0, /* mp_ass_subscript */ +}; + + +/* Helpers for PyUnicode_Format() */ + +static PyObject * +getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) +{ + Py_ssize_t argidx = *p_argidx; + if (argidx < arglen) { + (*p_argidx)++; + if (arglen < 0) + return args; + else + return PyTuple_GetItem(args, argidx); + } + PyErr_SetString(PyExc_TypeError, + "not enough arguments for format string"); + return NULL; +} + +/* Returns a new reference to a PyUnicode object, or NULL on failure. */ + +static PyObject * +formatfloat(PyObject *v, int flags, int prec, int type) +{ + char *p; + PyObject *result; + double x; + + x = PyFloat_AsDouble(v); + if (x == -1.0 && PyErr_Occurred()) + return NULL; + + if (prec < 0) + prec = 6; + + p = PyOS_double_to_string(x, type, prec, + (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); + if (p == NULL) + return NULL; + result = PyUnicode_DecodeASCII(p, strlen(p), NULL); + PyMem_Free(p); + return result; +} + +static PyObject* +formatlong(PyObject *val, int flags, int prec, int type) +{ + char *buf; + int len; + PyObject *str; /* temporary string object. */ + PyObject *result; + + str = _PyBytes_FormatLong(val, flags, prec, type, &buf, &len); + if (!str) + return NULL; + result = PyUnicode_DecodeASCII(buf, len, NULL); + Py_DECREF(str); + return result; +} + +static Py_UCS4 +formatchar(PyObject *v) +{ + /* presume that the buffer is at least 3 characters long */ + if (PyUnicode_Check(v)) { + if (PyUnicode_GET_LENGTH(v) == 1) { + return PyUnicode_READ_CHAR(v, 0); + } + goto onError; + } + else { + /* Integer input truncated to a character */ + long x; + x = PyLong_AsLong(v); + if (x == -1 && PyErr_Occurred()) + goto onError; + + if (x < 0 || x > MAX_UNICODE) { + PyErr_SetString(PyExc_OverflowError, + "%c arg not in range(0x110000)"); + return (Py_UCS4) -1; + } + + return (Py_UCS4) x; + } + + onError: + PyErr_SetString(PyExc_TypeError, + "%c requires int or char"); + return (Py_UCS4) -1; +} + +static int +repeat_accumulate(_PyAccu *acc, PyObject *obj, Py_ssize_t count) +{ + int r; + assert(count > 0); + assert(PyUnicode_Check(obj)); + if (count > 5) { + PyObject *repeated = unicode_repeat(obj, count); + if (repeated == NULL) + return -1; + r = _PyAccu_Accumulate(acc, repeated); + Py_DECREF(repeated); + return r; + } + else { + do { + if (_PyAccu_Accumulate(acc, obj)) + return -1; + } while (--count); + return 0; + } +} + +PyObject * +PyUnicode_Format(PyObject *format, PyObject *args) +{ + void *fmt; + int fmtkind; + PyObject *result; + int kind; + int r; + Py_ssize_t fmtcnt, fmtpos, arglen, argidx; + int args_owned = 0; + PyObject *dict = NULL; + PyObject *temp = NULL; + PyObject *second = NULL; + PyObject *uformat; + _PyAccu acc; + static PyObject *plus, *minus, *blank, *zero, *percent; + + if (!plus && !(plus = get_latin1_char('+'))) + return NULL; + if (!minus && !(minus = get_latin1_char('-'))) + return NULL; + if (!blank && !(blank = get_latin1_char(' '))) + return NULL; + if (!zero && !(zero = get_latin1_char('0'))) + return NULL; + if (!percent && !(percent = get_latin1_char('%'))) + return NULL; + + if (format == NULL || args == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + uformat = PyUnicode_FromObject(format); + if (uformat == NULL || PyUnicode_READY(uformat) == -1) + return NULL; + if (_PyAccu_Init(&acc)) + goto onError; + fmt = PyUnicode_DATA(uformat); + fmtkind = PyUnicode_KIND(uformat); + fmtcnt = PyUnicode_GET_LENGTH(uformat); + fmtpos = 0; + + if (PyTuple_Check(args)) { + arglen = PyTuple_Size(args); + argidx = 0; + } + else { + arglen = -1; + argidx = -2; + } + if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) && + !PyUnicode_Check(args)) + dict = args; + + while (--fmtcnt >= 0) { + if (PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') { + PyObject *nonfmt; + Py_ssize_t nonfmtpos; + nonfmtpos = fmtpos++; + while (fmtcnt >= 0 && + PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') { + fmtpos++; + fmtcnt--; + } + nonfmt = PyUnicode_Substring(uformat, nonfmtpos, fmtpos); + if (nonfmt == NULL) + goto onError; + r = _PyAccu_Accumulate(&acc, nonfmt); + Py_DECREF(nonfmt); + if (r) + goto onError; + } + else { + /* Got a format specifier */ + int flags = 0; + Py_ssize_t width = -1; + int prec = -1; + Py_UCS4 c = '\0'; + Py_UCS4 fill, sign; + int isnumok; + PyObject *v = NULL; + void *pbuf = NULL; + Py_ssize_t pindex, len; + PyObject *signobj = NULL, *fillobj = NULL; + + fmtpos++; + if (PyUnicode_READ(fmtkind, fmt, fmtpos) == '(') { + Py_ssize_t keystart; + Py_ssize_t keylen; + PyObject *key; + int pcount = 1; + + if (dict == NULL) { + PyErr_SetString(PyExc_TypeError, + "format requires a mapping"); + goto onError; + } + ++fmtpos; + --fmtcnt; + keystart = fmtpos; + /* Skip over balanced parentheses */ + while (pcount > 0 && --fmtcnt >= 0) { + if (PyUnicode_READ(fmtkind, fmt, fmtpos) == ')') + --pcount; + else if (PyUnicode_READ(fmtkind, fmt, fmtpos) == '(') + ++pcount; + fmtpos++; + } + keylen = fmtpos - keystart - 1; + if (fmtcnt < 0 || pcount > 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format key"); + goto onError; + } + key = PyUnicode_Substring(uformat, + keystart, keystart + keylen); + if (key == NULL) + goto onError; + if (args_owned) { + Py_DECREF(args); + args_owned = 0; + } + args = PyObject_GetItem(dict, key); + Py_DECREF(key); + if (args == NULL) { + goto onError; + } + args_owned = 1; + arglen = -1; + argidx = -2; + } + while (--fmtcnt >= 0) { + switch (c = PyUnicode_READ(fmtkind, fmt, fmtpos++)) { + case '-': flags |= F_LJUST; continue; + case '+': flags |= F_SIGN; continue; + case ' ': flags |= F_BLANK; continue; + case '#': flags |= F_ALT; continue; + case '0': flags |= F_ZERO; continue; + } + break; + } + if (c == '*') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto onError; + if (!PyLong_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "* wants int"); + goto onError; + } + width = PyLong_AsLong(v); + if (width == -1 && PyErr_Occurred()) + goto onError; + if (width < 0) { + flags |= F_LJUST; + width = -width; + } + if (--fmtcnt >= 0) + c = PyUnicode_READ(fmtkind, fmt, fmtpos++); + } + else if (c >= '0' && c <= '9') { + width = c - '0'; + while (--fmtcnt >= 0) { + c = PyUnicode_READ(fmtkind, fmt, fmtpos++); + if (c < '0' || c > '9') + break; + if ((width*10) / 10 != width) { + PyErr_SetString(PyExc_ValueError, + "width too big"); + goto onError; + } + width = width*10 + (c - '0'); + } + } + if (c == '.') { + prec = 0; + if (--fmtcnt >= 0) + c = PyUnicode_READ(fmtkind, fmt, fmtpos++); + if (c == '*') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto onError; + if (!PyLong_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "* wants int"); + goto onError; + } + prec = PyLong_AsLong(v); + if (prec == -1 && PyErr_Occurred()) + goto onError; + if (prec < 0) + prec = 0; + if (--fmtcnt >= 0) + c = PyUnicode_READ(fmtkind, fmt, fmtpos++); + } + else if (c >= '0' && c <= '9') { + prec = c - '0'; + while (--fmtcnt >= 0) { + c = PyUnicode_READ(fmtkind, fmt, fmtpos++); + if (c < '0' || c > '9') + break; + if ((prec*10) / 10 != prec) { + PyErr_SetString(PyExc_ValueError, + "prec too big"); + goto onError; + } + prec = prec*10 + (c - '0'); + } + } + } /* prec */ + if (fmtcnt >= 0) { + if (c == 'h' || c == 'l' || c == 'L') { + if (--fmtcnt >= 0) + c = PyUnicode_READ(fmtkind, fmt, fmtpos++); + } + } + if (fmtcnt < 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format"); + goto onError; + } + if (c != '%') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto onError; + } + sign = 0; + fill = ' '; + fillobj = blank; + switch (c) { + + case '%': + _PyAccu_Accumulate(&acc, percent); + continue; + + case 's': + case 'r': + case 'a': + if (PyUnicode_CheckExact(v) && c == 's') { + temp = v; + Py_INCREF(temp); + } + else { + if (c == 's') + temp = PyObject_Str(v); + else if (c == 'r') + temp = PyObject_Repr(v); + else + temp = PyObject_ASCII(v); + if (temp == NULL) + goto onError; + if (PyUnicode_Check(temp)) + /* nothing to do */; + else { + Py_DECREF(temp); + PyErr_SetString(PyExc_TypeError, + "%s argument has non-string str()"); + goto onError; + } + } + if (PyUnicode_READY(temp) == -1) { + Py_CLEAR(temp); + goto onError; + } + pbuf = PyUnicode_DATA(temp); + kind = PyUnicode_KIND(temp); + len = PyUnicode_GET_LENGTH(temp); + if (prec >= 0 && len > prec) + len = prec; + break; + + case 'i': + case 'd': + case 'u': + case 'o': + case 'x': + case 'X': + isnumok = 0; + if (PyNumber_Check(v)) { + PyObject *iobj=NULL; + + if (PyLong_Check(v)) { + iobj = v; + Py_INCREF(iobj); + } + else { + iobj = PyNumber_Long(v); + } + if (iobj!=NULL) { + if (PyLong_Check(iobj)) { + isnumok = 1; + temp = formatlong(iobj, flags, prec, (c == 'i'? 'd': c)); + Py_DECREF(iobj); + if (!temp) + goto onError; + if (PyUnicode_READY(temp) == -1) { + Py_CLEAR(temp); + goto onError; + } + pbuf = PyUnicode_DATA(temp); + kind = PyUnicode_KIND(temp); + len = PyUnicode_GET_LENGTH(temp); + sign = 1; + } + else { + Py_DECREF(iobj); + } + } + } + if (!isnumok) { + PyErr_Format(PyExc_TypeError, + "%%%c format: a number is required, " + "not %.200s", (char)c, Py_TYPE(v)->tp_name); + goto onError; + } + if (flags & F_ZERO) { + fill = '0'; + fillobj = zero; + } + break; + + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + temp = formatfloat(v, flags, prec, c); + if (!temp) + goto onError; + if (PyUnicode_READY(temp) == -1) { + Py_CLEAR(temp); + goto onError; + } + pbuf = PyUnicode_DATA(temp); + kind = PyUnicode_KIND(temp); + len = PyUnicode_GET_LENGTH(temp); + sign = 1; + if (flags & F_ZERO) { + fill = '0'; + fillobj = zero; + } + break; + + case 'c': + { + Py_UCS4 ch = formatchar(v); + if (ch == (Py_UCS4) -1) + goto onError; + temp = _PyUnicode_FromUCS4(&ch, 1); + if (temp == NULL) + goto onError; + pbuf = PyUnicode_DATA(temp); + kind = PyUnicode_KIND(temp); + len = PyUnicode_GET_LENGTH(temp); + break; + } + + default: + PyErr_Format(PyExc_ValueError, + "unsupported format character '%c' (0x%x) " + "at index %zd", + (31<=c && c<=126) ? (char)c : '?', + (int)c, + fmtpos - 1); + goto onError; + } + /* pbuf is initialized here. */ + pindex = 0; + if (sign) { + if (PyUnicode_READ(kind, pbuf, pindex) == '-') { + signobj = minus; + len--; + pindex++; + } + else if (PyUnicode_READ(kind, pbuf, pindex) == '+') { + signobj = plus; + len--; + pindex++; + } + else if (flags & F_SIGN) + signobj = plus; + else if (flags & F_BLANK) + signobj = blank; + else + sign = 0; + } + if (width < len) + width = len; + if (sign) { + if (fill != ' ') { + assert(signobj != NULL); + if (_PyAccu_Accumulate(&acc, signobj)) + goto onError; + } + if (width > len) + width--; + } + if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) { + assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); + assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c); + if (fill != ' ') { + second = get_latin1_char( + PyUnicode_READ(kind, pbuf, pindex + 1)); + pindex += 2; + if (second == NULL || + _PyAccu_Accumulate(&acc, zero) || + _PyAccu_Accumulate(&acc, second)) + goto onError; + Py_CLEAR(second); + } + width -= 2; + if (width < 0) + width = 0; + len -= 2; + } + if (width > len && !(flags & F_LJUST)) { + assert(fillobj != NULL); + if (repeat_accumulate(&acc, fillobj, width - len)) + goto onError; + width = len; + } + if (fill == ' ') { + if (sign) { + assert(signobj != NULL); + if (_PyAccu_Accumulate(&acc, signobj)) + goto onError; + } + if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) { + assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); + assert(PyUnicode_READ(kind, pbuf, pindex+1) == c); + second = get_latin1_char( + PyUnicode_READ(kind, pbuf, pindex + 1)); + pindex += 2; + if (second == NULL || + _PyAccu_Accumulate(&acc, zero) || + _PyAccu_Accumulate(&acc, second)) + goto onError; + Py_CLEAR(second); + } + } + /* Copy all characters, preserving len */ + if (temp != NULL) { + assert(pbuf == PyUnicode_DATA(temp)); + v = PyUnicode_Substring(temp, pindex, pindex + len); + } + else { + const char *p = (const char *) pbuf; + assert(pbuf != NULL); + p += kind * pindex; + v = PyUnicode_FromKindAndData(kind, p, len); + } + if (v == NULL) + goto onError; + r = _PyAccu_Accumulate(&acc, v); + Py_DECREF(v); + if (r) + goto onError; + if (width > len && repeat_accumulate(&acc, blank, width - len)) + goto onError; + if (dict && (argidx < arglen) && c != '%') { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during string formatting"); + goto onError; + } + Py_CLEAR(temp); + } /* '%' */ + } /* until end */ + if (argidx < arglen && !dict) { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during string formatting"); + goto onError; + } + + result = _PyAccu_Finish(&acc); + if (args_owned) { + Py_DECREF(args); + } + Py_DECREF(uformat); + Py_XDECREF(temp); + Py_XDECREF(second); + return result; + + onError: + Py_DECREF(uformat); + Py_XDECREF(temp); + Py_XDECREF(second); + _PyAccu_Destroy(&acc); + if (args_owned) { + Py_DECREF(args); + } + return NULL; +} + +static PyObject * +unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); + +static PyObject * +unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *x = NULL; + static char *kwlist[] = {"object", "encoding", "errors", 0}; + char *encoding = NULL; + char *errors = NULL; + + if (type != &PyUnicode_Type) + return unicode_subtype_new(type, args, kwds); + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str", + kwlist, &x, &encoding, &errors)) + return NULL; + if (x == NULL) + return PyUnicode_New(0, 0); + if (encoding == NULL && errors == NULL) + return PyObject_Str(x); + else + return PyUnicode_FromEncodedObject(x, encoding, errors); +} + +static PyObject * +unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *unicode, *self; + Py_ssize_t length, char_size; + int share_wstr, share_utf8; + unsigned int kind; + void *data; + + assert(PyType_IsSubtype(type, &PyUnicode_Type)); + + unicode = unicode_new(&PyUnicode_Type, args, kwds); + if (unicode == NULL) + return NULL; + assert(_PyUnicode_CHECK(unicode)); + if (PyUnicode_READY(unicode)) + return NULL; + + self = type->tp_alloc(type, 0); + if (self == NULL) { + Py_DECREF(unicode); + return NULL; + } + kind = PyUnicode_KIND(unicode); + length = PyUnicode_GET_LENGTH(unicode); + + _PyUnicode_LENGTH(self) = length; +#ifdef Py_DEBUG + _PyUnicode_HASH(self) = -1; +#else + _PyUnicode_HASH(self) = _PyUnicode_HASH(unicode); +#endif + _PyUnicode_STATE(self).interned = 0; + _PyUnicode_STATE(self).kind = kind; + _PyUnicode_STATE(self).compact = 0; + _PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii; + _PyUnicode_STATE(self).ready = 1; + _PyUnicode_WSTR(self) = NULL; + _PyUnicode_UTF8_LENGTH(self) = 0; + _PyUnicode_UTF8(self) = NULL; + _PyUnicode_WSTR_LENGTH(self) = 0; + _PyUnicode_DATA_ANY(self) = NULL; + + share_utf8 = 0; + share_wstr = 0; + if (kind == PyUnicode_1BYTE_KIND) { + char_size = 1; + if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128) + share_utf8 = 1; + } + else if (kind == PyUnicode_2BYTE_KIND) { + char_size = 2; + if (sizeof(wchar_t) == 2) + share_wstr = 1; + } + else { + assert(kind == PyUnicode_4BYTE_KIND); + char_size = 4; + if (sizeof(wchar_t) == 4) + share_wstr = 1; + } + + /* Ensure we won't overflow the length. */ + if (length > (PY_SSIZE_T_MAX / char_size - 1)) { + PyErr_NoMemory(); + goto onError; + } + data = PyObject_MALLOC((length + 1) * char_size); + if (data == NULL) { + PyErr_NoMemory(); + goto onError; + } + + _PyUnicode_DATA_ANY(self) = data; + if (share_utf8) { + _PyUnicode_UTF8_LENGTH(self) = length; + _PyUnicode_UTF8(self) = data; + } + if (share_wstr) { + _PyUnicode_WSTR_LENGTH(self) = length; + _PyUnicode_WSTR(self) = (wchar_t *)data; + } + + Py_MEMCPY(data, PyUnicode_DATA(unicode), + kind * (length + 1)); + assert(_PyUnicode_CheckConsistency(self, 1)); +#ifdef Py_DEBUG + _PyUnicode_HASH(self) = _PyUnicode_HASH(unicode); +#endif + Py_DECREF(unicode); + return self; + +onError: + Py_DECREF(unicode); + Py_DECREF(self); + return NULL; +} + +PyDoc_STRVAR(unicode_doc, + "str(string[, encoding[, errors]]) -> str\n\ +\n\ +Create a new string object from the given encoded string.\n\ +encoding defaults to the current default string encoding.\n\ +errors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'."); + +static PyObject *unicode_iter(PyObject *seq); + +PyTypeObject PyUnicode_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "str", /* tp_name */ + sizeof(PyUnicodeObject), /* tp_size */ + 0, /* tp_itemsize */ + /* Slots */ + (destructor)unicode_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + unicode_repr, /* tp_repr */ + &unicode_as_number, /* tp_as_number */ + &unicode_as_sequence, /* tp_as_sequence */ + &unicode_as_mapping, /* tp_as_mapping */ + (hashfunc) unicode_hash, /* tp_hash*/ + 0, /* tp_call*/ + (reprfunc) unicode_str, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | + Py_TPFLAGS_UNICODE_SUBCLASS, /* tp_flags */ + unicode_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + PyUnicode_RichCompare, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + unicode_iter, /* tp_iter */ + 0, /* tp_iternext */ + unicode_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + &PyBaseObject_Type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + unicode_new, /* tp_new */ + PyObject_Del, /* tp_free */ +}; + +/* Initialize the Unicode implementation */ + +int _PyUnicode_Init(void) +{ + int i; + + /* XXX - move this array to unicodectype.c ? */ + Py_UCS2 linebreak[] = { + 0x000A, /* LINE FEED */ + 0x000D, /* CARRIAGE RETURN */ + 0x001C, /* FILE SEPARATOR */ + 0x001D, /* GROUP SEPARATOR */ + 0x001E, /* RECORD SEPARATOR */ + 0x0085, /* NEXT LINE */ + 0x2028, /* LINE SEPARATOR */ + 0x2029, /* PARAGRAPH SEPARATOR */ + }; + + /* Init the implementation */ + unicode_empty = PyUnicode_New(0, 0); + if (!unicode_empty) + Py_FatalError("Can't create empty string"); + assert(_PyUnicode_CheckConsistency(unicode_empty, 1)); + + for (i = 0; i < 256; i++) + unicode_latin1[i] = NULL; + if (PyType_Ready(&PyUnicode_Type) < 0) + Py_FatalError("Can't initialize 'unicode'"); + + /* initialize the linebreak bloom filter */ + bloom_linebreak = make_bloom_mask( + PyUnicode_2BYTE_KIND, linebreak, + Py_ARRAY_LENGTH(linebreak)); + + PyType_Ready(&EncodingMapType); + +#ifdef HAVE_MBCS + winver.dwOSVersionInfoSize = sizeof(winver); + if (!GetVersionEx((OSVERSIONINFO*)&winver)) { + PyErr_SetFromWindowsErr(0); + return -1; + } +#endif + return 0; +} + +/* Finalize the Unicode implementation */ + +int +PyUnicode_ClearFreeList(void) +{ + return 0; +} + +void +_PyUnicode_Fini(void) +{ + int i; + + Py_XDECREF(unicode_empty); + unicode_empty = NULL; + + for (i = 0; i < 256; i++) { + if (unicode_latin1[i]) { + Py_DECREF(unicode_latin1[i]); + unicode_latin1[i] = NULL; + } + } + _PyUnicode_ClearStaticStrings(); + (void)PyUnicode_ClearFreeList(); +} + +void +PyUnicode_InternInPlace(PyObject **p) +{ + register PyObject *s = *p; + PyObject *t; +#ifdef Py_DEBUG + assert(s != NULL); + assert(_PyUnicode_CHECK(s)); +#else + if (s == NULL || !PyUnicode_Check(s)) + return; +#endif + /* If it's a subclass, we don't really know what putting + it in the interned dict might do. */ + if (!PyUnicode_CheckExact(s)) + return; + if (PyUnicode_CHECK_INTERNED(s)) + return; + if (interned == NULL) { + interned = PyDict_New(); + if (interned == NULL) { + PyErr_Clear(); /* Don't leave an exception */ + return; + } + } + /* It might be that the GetItem call fails even + though the key is present in the dictionary, + namely when this happens during a stack overflow. */ + Py_ALLOW_RECURSION + t = PyDict_GetItem(interned, s); + Py_END_ALLOW_RECURSION + + if (t) { + Py_INCREF(t); + Py_DECREF(*p); + *p = t; + return; + } + + PyThreadState_GET()->recursion_critical = 1; + if (PyDict_SetItem(interned, s, s) < 0) { + PyErr_Clear(); + PyThreadState_GET()->recursion_critical = 0; + return; + } + PyThreadState_GET()->recursion_critical = 0; + /* The two references in interned are not counted by refcnt. + The deallocator will take care of this */ + Py_REFCNT(s) -= 2; + _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL; +} + +void +PyUnicode_InternImmortal(PyObject **p) +{ + PyUnicode_InternInPlace(p); + if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { + _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL; + Py_INCREF(*p); + } +} + +PyObject * +PyUnicode_InternFromString(const char *cp) +{ + PyObject *s = PyUnicode_FromString(cp); + if (s == NULL) + return NULL; + PyUnicode_InternInPlace(&s); + return s; +} + +void +_Py_ReleaseInternedUnicodeStrings(void) +{ + PyObject *keys; + PyObject *s; + Py_ssize_t i, n; + Py_ssize_t immortal_size = 0, mortal_size = 0; + + if (interned == NULL || !PyDict_Check(interned)) + return; + keys = PyDict_Keys(interned); + if (keys == NULL || !PyList_Check(keys)) { + PyErr_Clear(); + return; + } + + /* Since _Py_ReleaseInternedUnicodeStrings() is intended to help a leak + detector, interned unicode strings are not forcibly deallocated; + rather, we give them their stolen references back, and then clear + and DECREF the interned dict. */ + + n = PyList_GET_SIZE(keys); + fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n", + n); + for (i = 0; i < n; i++) { + s = PyList_GET_ITEM(keys, i); + if (PyUnicode_READY(s) == -1) { + assert(0 && "could not ready string"); + fprintf(stderr, "could not ready string\n"); + } + switch (PyUnicode_CHECK_INTERNED(s)) { + case SSTATE_NOT_INTERNED: + /* XXX Shouldn't happen */ + break; + case SSTATE_INTERNED_IMMORTAL: + Py_REFCNT(s) += 1; + immortal_size += PyUnicode_GET_LENGTH(s); + break; + case SSTATE_INTERNED_MORTAL: + Py_REFCNT(s) += 2; + mortal_size += PyUnicode_GET_LENGTH(s); + break; + default: + Py_FatalError("Inconsistent interned string state."); + } + _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED; + } + fprintf(stderr, "total size of all interned strings: " + "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d " + "mortal/immortal\n", mortal_size, immortal_size); + Py_DECREF(keys); + PyDict_Clear(interned); + Py_DECREF(interned); + interned = NULL; +} + + +/********************* Unicode Iterator **************************/ + +typedef struct { + PyObject_HEAD + Py_ssize_t it_index; + PyObject *it_seq; /* Set to NULL when iterator is exhausted */ +} unicodeiterobject; + +static void +unicodeiter_dealloc(unicodeiterobject *it) +{ + _PyObject_GC_UNTRACK(it); + Py_XDECREF(it->it_seq); + PyObject_GC_Del(it); +} + +static int +unicodeiter_traverse(unicodeiterobject *it, visitproc visit, void *arg) +{ + Py_VISIT(it->it_seq); + return 0; +} + +static PyObject * +unicodeiter_next(unicodeiterobject *it) +{ + PyObject *seq, *item; + + assert(it != NULL); + seq = it->it_seq; + if (seq == NULL) + return NULL; + assert(_PyUnicode_CHECK(seq)); + + if (it->it_index < PyUnicode_GET_LENGTH(seq)) { + int kind = PyUnicode_KIND(seq); + void *data = PyUnicode_DATA(seq); + Py_UCS4 chr = PyUnicode_READ(kind, data, it->it_index); + item = PyUnicode_FromOrdinal(chr); + if (item != NULL) + ++it->it_index; + return item; + } + + Py_DECREF(seq); + it->it_seq = NULL; + return NULL; +} + +static PyObject * +unicodeiter_len(unicodeiterobject *it) +{ + Py_ssize_t len = 0; + if (it->it_seq) + len = PyUnicode_GET_LENGTH(it->it_seq) - it->it_index; + return PyLong_FromSsize_t(len); +} + +PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); + +static PyMethodDef unicodeiter_methods[] = { + {"__length_hint__", (PyCFunction)unicodeiter_len, METH_NOARGS, + length_hint_doc}, + {NULL, NULL} /* sentinel */ +}; + +PyTypeObject PyUnicodeIter_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "str_iterator", /* tp_name */ + sizeof(unicodeiterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)unicodeiter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ + 0, /* tp_doc */ + (traverseproc)unicodeiter_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)unicodeiter_next, /* tp_iternext */ + unicodeiter_methods, /* tp_methods */ + 0, +}; + +static PyObject * +unicode_iter(PyObject *seq) +{ + unicodeiterobject *it; + + if (!PyUnicode_Check(seq)) { + PyErr_BadInternalCall(); + return NULL; + } + if (PyUnicode_READY(seq) == -1) + return NULL; + it = PyObject_GC_New(unicodeiterobject, &PyUnicodeIter_Type); + if (it == NULL) + return NULL; + it->it_index = 0; + Py_INCREF(seq); + it->it_seq = seq; + _PyObject_GC_TRACK(it); + return (PyObject *)it; +} + + +size_t +Py_UNICODE_strlen(const Py_UNICODE *u) +{ + int res = 0; + while(*u++) + res++; + return res; +} + +Py_UNICODE* +Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2) +{ + Py_UNICODE *u = s1; + while ((*u++ = *s2++)); + return s1; +} + +Py_UNICODE* +Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) +{ + Py_UNICODE *u = s1; + while ((*u++ = *s2++)) + if (n-- == 0) + break; + return s1; +} + +Py_UNICODE* +Py_UNICODE_strcat(Py_UNICODE *s1, const Py_UNICODE *s2) +{ + Py_UNICODE *u1 = s1; + u1 += Py_UNICODE_strlen(u1); + Py_UNICODE_strcpy(u1, s2); + return s1; +} + +int +Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2) +{ + while (*s1 && *s2 && *s1 == *s2) + s1++, s2++; + if (*s1 && *s2) + return (*s1 < *s2) ? -1 : +1; + if (*s1) + return 1; + if (*s2) + return -1; + return 0; +} + +int +Py_UNICODE_strncmp(const Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) +{ + register Py_UNICODE u1, u2; + for (; n != 0; n--) { + u1 = *s1; + u2 = *s2; + if (u1 != u2) + return (u1 < u2) ? -1 : +1; + if (u1 == '\0') + return 0; + s1++; + s2++; + } + return 0; +} + +Py_UNICODE* +Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c) +{ + const Py_UNICODE *p; + for (p = s; *p; p++) + if (*p == c) + return (Py_UNICODE*)p; + return NULL; +} + +Py_UNICODE* +Py_UNICODE_strrchr(const Py_UNICODE *s, Py_UNICODE c) +{ + const Py_UNICODE *p; + p = s + Py_UNICODE_strlen(s); + while (p != s) { + p--; + if (*p == c) + return (Py_UNICODE*)p; + } + return NULL; +} + +Py_UNICODE* +PyUnicode_AsUnicodeCopy(PyObject *unicode) +{ + Py_UNICODE *u, *copy; + Py_ssize_t len, size; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return NULL; + } + u = PyUnicode_AsUnicodeAndSize(unicode, &len); + if (u == NULL) + return NULL; + /* Ensure we won't overflow the size. */ + if (len > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) { + PyErr_NoMemory(); + return NULL; + } + size = len + 1; /* copy the null character */ + size *= sizeof(Py_UNICODE); + copy = PyMem_Malloc(size); + if (copy == NULL) { + PyErr_NoMemory(); + return NULL; + } + memcpy(copy, u, size); + return copy; +} + +/* A _string module, to export formatter_parser and formatter_field_name_split + to the string.Formatter class implemented in Python. */ + +static PyMethodDef _string_methods[] = { + {"formatter_field_name_split", (PyCFunction) formatter_field_name_split, + METH_O, PyDoc_STR("split the argument as a field name")}, + {"formatter_parser", (PyCFunction) formatter_parser, + METH_O, PyDoc_STR("parse the argument as a format string")}, + {NULL, NULL} +}; + +static struct PyModuleDef _string_module = { + PyModuleDef_HEAD_INIT, + "_string", + PyDoc_STR("string helper module"), + 0, + _string_methods, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC +PyInit__string(void) +{ + return PyModule_Create(&_string_module); +} + + diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Python/bltinmodule.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Python/bltinmodule.c Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,2413 @@ +/* Built-in functions */ + +#include "Python.h" +#include "Python-ast.h" + +#include "node.h" +#include "code.h" + +#include "asdl.h" +#include "ast.h" + +#include + +#ifdef HAVE_LANGINFO_H +#include /* CODESET */ +#endif + +/* The default encoding used by the platform file system APIs + Can remain NULL for all platforms that don't have such a concept + + Don't forget to modify PyUnicode_DecodeFSDefault() if you touch any of the + values for Py_FileSystemDefaultEncoding! +*/ +#ifdef HAVE_MBCS +const char *Py_FileSystemDefaultEncoding = "mbcs"; +int Py_HasFileSystemDefaultEncoding = 1; +#elif defined(__APPLE__) +const char *Py_FileSystemDefaultEncoding = "utf-8"; +int Py_HasFileSystemDefaultEncoding = 1; +#else +const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */ +int Py_HasFileSystemDefaultEncoding = 0; +#endif + +_Py_IDENTIFIER(fileno); +_Py_IDENTIFIER(flush); + +static PyObject * +builtin___build_class__(PyObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *func, *name, *bases, *mkw, *meta, *winner, *prep, *ns, *cell; + PyObject *cls = NULL; + Py_ssize_t nargs; + int isclass; + _Py_IDENTIFIER(__prepare__); + + assert(args != NULL); + if (!PyTuple_Check(args)) { + PyErr_SetString(PyExc_TypeError, + "__build_class__: args is not a tuple"); + return NULL; + } + nargs = PyTuple_GET_SIZE(args); + if (nargs < 2) { + PyErr_SetString(PyExc_TypeError, + "__build_class__: not enough arguments"); + return NULL; + } + func = PyTuple_GET_ITEM(args, 0); /* Better be callable */ + name = PyTuple_GET_ITEM(args, 1); + if (!PyUnicode_Check(name)) { + PyErr_SetString(PyExc_TypeError, + "__build_class__: name is not a string"); + return NULL; + } + bases = PyTuple_GetSlice(args, 2, nargs); + if (bases == NULL) + return NULL; + + if (kwds == NULL) { + meta = NULL; + mkw = NULL; + } + else { + mkw = PyDict_Copy(kwds); /* Don't modify kwds passed in! */ + if (mkw == NULL) { + Py_DECREF(bases); + return NULL; + } + meta = PyDict_GetItemString(mkw, "metaclass"); + if (meta != NULL) { + Py_INCREF(meta); + if (PyDict_DelItemString(mkw, "metaclass") < 0) { + Py_DECREF(meta); + Py_DECREF(mkw); + Py_DECREF(bases); + return NULL; + } + /* metaclass is explicitly given, check if it's indeed a class */ + isclass = PyType_Check(meta); + } + } + if (meta == NULL) { + /* if there are no bases, use type: */ + if (PyTuple_GET_SIZE(bases) == 0) { + meta = (PyObject *) (&PyType_Type); + } + /* else get the type of the first base */ + else { + PyObject *base0 = PyTuple_GET_ITEM(bases, 0); + meta = (PyObject *) (base0->ob_type); + } + Py_INCREF(meta); + isclass = 1; /* meta is really a class */ + } + + if (isclass) { + /* meta is really a class, so check for a more derived + metaclass, or possible metaclass conflicts: */ + winner = (PyObject *)_PyType_CalculateMetaclass((PyTypeObject *)meta, + bases); + if (winner == NULL) { + Py_DECREF(meta); + Py_XDECREF(mkw); + Py_DECREF(bases); + return NULL; + } + if (winner != meta) { + Py_DECREF(meta); + meta = winner; + Py_INCREF(meta); + } + } + /* else: meta is not a class, so we cannot do the metaclass + calculation, so we will use the explicitly given object as it is */ + prep = _PyObject_GetAttrId(meta, &PyId___prepare__); + if (prep == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + ns = PyDict_New(); + } + else { + Py_DECREF(meta); + Py_XDECREF(mkw); + Py_DECREF(bases); + return NULL; + } + } + else { + PyObject *pargs = PyTuple_Pack(2, name, bases); + if (pargs == NULL) { + Py_DECREF(prep); + Py_DECREF(meta); + Py_XDECREF(mkw); + Py_DECREF(bases); + return NULL; + } + ns = PyEval_CallObjectWithKeywords(prep, pargs, mkw); + Py_DECREF(pargs); + Py_DECREF(prep); + } + if (ns == NULL) { + Py_DECREF(meta); + Py_XDECREF(mkw); + Py_DECREF(bases); + return NULL; + } + cell = PyObject_CallFunctionObjArgs(func, ns, NULL); + if (cell != NULL) { + PyObject *margs; + margs = PyTuple_Pack(3, name, bases, ns); + if (margs != NULL) { + cls = PyEval_CallObjectWithKeywords(meta, margs, mkw); + Py_DECREF(margs); + } + if (cls != NULL && PyCell_Check(cell)) { + Py_INCREF(cls); + PyCell_SET(cell, cls); + } + Py_DECREF(cell); + } + Py_DECREF(ns); + Py_DECREF(meta); + Py_XDECREF(mkw); + Py_DECREF(bases); + return cls; +} + +PyDoc_STRVAR(build_class_doc, +"__build_class__(func, name, *bases, metaclass=None, **kwds) -> class\n\ +\n\ +Internal helper function used by the class statement."); + +static PyObject * +builtin___import__(PyObject *self, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"name", "globals", "locals", "fromlist", + "level", 0}; + PyObject *name, *globals = NULL, *locals = NULL, *fromlist = NULL; + int level = -1; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "U|OOOi:__import__", + kwlist, &name, &globals, &locals, &fromlist, &level)) + return NULL; + return PyImport_ImportModuleLevelObject(name, globals, locals, + fromlist, level); +} + +PyDoc_STRVAR(import_doc, +"__import__(name, globals={}, locals={}, fromlist=[], level=-1) -> module\n\ +\n\ +Import a module. Because this function is meant for use by the Python\n\ +interpreter and not for general use it is better to use\n\ +importlib.import_module() to programmatically import a module.\n\ +\n\ +The globals argument is only used to determine the context;\n\ +they are not modified. The locals argument is unused. The fromlist\n\ +should be a list of names to emulate ``from name import ...'', or an\n\ +empty list to emulate ``import name''.\n\ +When importing a module from a package, note that __import__('A.B', ...)\n\ +returns package A when fromlist is empty, but its submodule B when\n\ +fromlist is not empty. Level is used to determine whether to perform \n\ +absolute or relative imports. -1 is the original strategy of attempting\n\ +both absolute and relative imports, 0 is absolute, a positive number\n\ +is the number of parent directories to search relative to the current module."); + + +static PyObject * +builtin_abs(PyObject *self, PyObject *v) +{ + return PyNumber_Absolute(v); +} + +PyDoc_STRVAR(abs_doc, +"abs(number) -> number\n\ +\n\ +Return the absolute value of the argument."); + +static PyObject * +builtin_all(PyObject *self, PyObject *v) +{ + PyObject *it, *item; + PyObject *(*iternext)(PyObject *); + int cmp; + + it = PyObject_GetIter(v); + if (it == NULL) + return NULL; + iternext = *Py_TYPE(it)->tp_iternext; + + for (;;) { + item = iternext(it); + if (item == NULL) + break; + cmp = PyObject_IsTrue(item); + Py_DECREF(item); + if (cmp < 0) { + Py_DECREF(it); + return NULL; + } + if (cmp == 0) { + Py_DECREF(it); + Py_RETURN_FALSE; + } + } + Py_DECREF(it); + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_StopIteration)) + PyErr_Clear(); + else + return NULL; + } + Py_RETURN_TRUE; +} + +PyDoc_STRVAR(all_doc, +"all(iterable) -> bool\n\ +\n\ +Return True if bool(x) is True for all values x in the iterable."); + +static PyObject * +builtin_any(PyObject *self, PyObject *v) +{ + PyObject *it, *item; + PyObject *(*iternext)(PyObject *); + int cmp; + + it = PyObject_GetIter(v); + if (it == NULL) + return NULL; + iternext = *Py_TYPE(it)->tp_iternext; + + for (;;) { + item = iternext(it); + if (item == NULL) + break; + cmp = PyObject_IsTrue(item); + Py_DECREF(item); + if (cmp < 0) { + Py_DECREF(it); + return NULL; + } + if (cmp == 1) { + Py_DECREF(it); + Py_RETURN_TRUE; + } + } + Py_DECREF(it); + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_StopIteration)) + PyErr_Clear(); + else + return NULL; + } + Py_RETURN_FALSE; +} + +PyDoc_STRVAR(any_doc, +"any(iterable) -> bool\n\ +\n\ +Return True if bool(x) is True for any x in the iterable."); + +static PyObject * +builtin_ascii(PyObject *self, PyObject *v) +{ + return PyObject_ASCII(v); +} + +PyDoc_STRVAR(ascii_doc, +"ascii(object) -> string\n\ +\n\ +As repr(), return a string containing a printable representation of an\n\ +object, but escape the non-ASCII characters in the string returned by\n\ +repr() using \\x, \\u or \\U escapes. This generates a string similar\n\ +to that returned by repr() in Python 2."); + + +static PyObject * +builtin_bin(PyObject *self, PyObject *v) +{ + return PyNumber_ToBase(v, 2); +} + +PyDoc_STRVAR(bin_doc, +"bin(number) -> string\n\ +\n\ +Return the binary representation of an integer."); + + +static PyObject * +builtin_callable(PyObject *self, PyObject *v) +{ + return PyBool_FromLong((long)PyCallable_Check(v)); +} + +PyDoc_STRVAR(callable_doc, +"callable(object) -> bool\n\ +\n\ +Return whether the object is callable (i.e., some kind of function).\n\ +Note that classes are callable, as are instances of classes with a\n\ +__call__() method."); + + +typedef struct { + PyObject_HEAD + PyObject *func; + PyObject *it; +} filterobject; + +static PyObject * +filter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *func, *seq; + PyObject *it; + filterobject *lz; + + if (type == &PyFilter_Type && !_PyArg_NoKeywords("filter()", kwds)) + return NULL; + + if (!PyArg_UnpackTuple(args, "filter", 2, 2, &func, &seq)) + return NULL; + + /* Get iterator. */ + it = PyObject_GetIter(seq); + if (it == NULL) + return NULL; + + /* create filterobject structure */ + lz = (filterobject *)type->tp_alloc(type, 0); + if (lz == NULL) { + Py_DECREF(it); + return NULL; + } + Py_INCREF(func); + lz->func = func; + lz->it = it; + + return (PyObject *)lz; +} + +static void +filter_dealloc(filterobject *lz) +{ + PyObject_GC_UnTrack(lz); + Py_XDECREF(lz->func); + Py_XDECREF(lz->it); + Py_TYPE(lz)->tp_free(lz); +} + +static int +filter_traverse(filterobject *lz, visitproc visit, void *arg) +{ + Py_VISIT(lz->it); + Py_VISIT(lz->func); + return 0; +} + +static PyObject * +filter_next(filterobject *lz) +{ + PyObject *item; + PyObject *it = lz->it; + long ok; + PyObject *(*iternext)(PyObject *); + + iternext = *Py_TYPE(it)->tp_iternext; + for (;;) { + item = iternext(it); + if (item == NULL) + return NULL; + + if (lz->func == Py_None || lz->func == (PyObject *)&PyBool_Type) { + ok = PyObject_IsTrue(item); + } else { + PyObject *good; + good = PyObject_CallFunctionObjArgs(lz->func, + item, NULL); + if (good == NULL) { + Py_DECREF(item); + return NULL; + } + ok = PyObject_IsTrue(good); + Py_DECREF(good); + } + if (ok) + return item; + Py_DECREF(item); + } +} + +PyDoc_STRVAR(filter_doc, +"filter(function or None, iterable) --> filter object\n\ +\n\ +Return an iterator yielding those items of iterable for which function(item)\n\ +is true. If function is None, return the items that are true."); + +PyTypeObject PyFilter_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "filter", /* tp_name */ + sizeof(filterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)filter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE, /* tp_flags */ + filter_doc, /* tp_doc */ + (traverseproc)filter_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)filter_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + filter_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + + +static PyObject * +builtin_format(PyObject *self, PyObject *args) +{ + PyObject *value; + PyObject *format_spec = NULL; + + if (!PyArg_ParseTuple(args, "O|U:format", &value, &format_spec)) + return NULL; + + return PyObject_Format(value, format_spec); +} + +PyDoc_STRVAR(format_doc, +"format(value[, format_spec]) -> string\n\ +\n\ +Returns value.__format__(format_spec)\n\ +format_spec defaults to \"\""); + +static PyObject * +builtin_chr(PyObject *self, PyObject *args) +{ + int x; + + if (!PyArg_ParseTuple(args, "i:chr", &x)) + return NULL; + + return PyUnicode_FromOrdinal(x); +} + +PyDoc_STRVAR(chr_doc, +"chr(i) -> Unicode character\n\ +\n\ +Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff."); + + +static char * +source_as_string(PyObject *cmd, char *funcname, char *what, PyCompilerFlags *cf) +{ + char *str; + Py_ssize_t size; + + if (PyUnicode_Check(cmd)) { + cf->cf_flags |= PyCF_IGNORE_COOKIE; + str = PyUnicode_AsUTF8AndSize(cmd, &size); + if (str == NULL) + return NULL; + } + else if (!PyObject_CheckReadBuffer(cmd)) { + PyErr_Format(PyExc_TypeError, + "%s() arg 1 must be a %s object", + funcname, what); + return NULL; + } + else if (PyObject_AsReadBuffer(cmd, (const void **)&str, &size) < 0) { + return NULL; + } + + if (strlen(str) != size) { + PyErr_SetString(PyExc_TypeError, + "source code string cannot contain null bytes"); + return NULL; + } + return str; +} + +static PyObject * +builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) +{ + char *str; + PyObject *filename_obj; + char *filename; + char *startstr; + int mode = -1; + int dont_inherit = 0; + int supplied_flags = 0; + int optimize = -1; + int is_ast; + PyCompilerFlags cf; + PyObject *cmd; + static char *kwlist[] = {"source", "filename", "mode", "flags", + "dont_inherit", "optimize", NULL}; + int start[] = {Py_file_input, Py_eval_input, Py_single_input}; + PyObject *result; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&s|iii:compile", kwlist, + &cmd, + PyUnicode_FSConverter, &filename_obj, + &startstr, &supplied_flags, + &dont_inherit, &optimize)) + return NULL; + + filename = PyBytes_AS_STRING(filename_obj); + cf.cf_flags = supplied_flags | PyCF_SOURCE_IS_UTF8; + + if (supplied_flags & + ~(PyCF_MASK | PyCF_MASK_OBSOLETE | PyCF_DONT_IMPLY_DEDENT | PyCF_ONLY_AST)) + { + PyErr_SetString(PyExc_ValueError, + "compile(): unrecognised flags"); + goto error; + } + /* XXX Warn if (supplied_flags & PyCF_MASK_OBSOLETE) != 0? */ + + if (optimize < -1 || optimize > 2) { + PyErr_SetString(PyExc_ValueError, + "compile(): invalid optimize value"); + goto error; + } + + if (!dont_inherit) { + PyEval_MergeCompilerFlags(&cf); + } + + if (strcmp(startstr, "exec") == 0) + mode = 0; + else if (strcmp(startstr, "eval") == 0) + mode = 1; + else if (strcmp(startstr, "single") == 0) + mode = 2; + else { + PyErr_SetString(PyExc_ValueError, + "compile() arg 3 must be 'exec', 'eval' or 'single'"); + goto error; + } + + is_ast = PyAST_Check(cmd); + if (is_ast == -1) + goto error; + if (is_ast) { + if (supplied_flags & PyCF_ONLY_AST) { + Py_INCREF(cmd); + result = cmd; + } + else { + PyArena *arena; + mod_ty mod; + + arena = PyArena_New(); + mod = PyAST_obj2mod(cmd, arena, mode); + if (mod == NULL) { + PyArena_Free(arena); + goto error; + } + if (!PyAST_Validate(mod)) { + PyArena_Free(arena); + goto error; + } + result = (PyObject*)PyAST_CompileEx(mod, filename, + &cf, optimize, arena); + PyArena_Free(arena); + } + goto finally; + } + + str = source_as_string(cmd, "compile", "string, bytes, AST or code", &cf); + if (str == NULL) + goto error; + + result = Py_CompileStringExFlags(str, filename, start[mode], &cf, optimize); + goto finally; + +error: + result = NULL; +finally: + Py_DECREF(filename_obj); + return result; +} + +PyDoc_STRVAR(compile_doc, +"compile(source, filename, mode[, flags[, dont_inherit]]) -> code object\n\ +\n\ +Compile the source string (a Python module, statement or expression)\n\ +into a code object that can be executed by exec() or eval().\n\ +The filename will be used for run-time error messages.\n\ +The mode must be 'exec' to compile a module, 'single' to compile a\n\ +single (interactive) statement, or 'eval' to compile an expression.\n\ +The flags argument, if present, controls which future statements influence\n\ +the compilation of the code.\n\ +The dont_inherit argument, if non-zero, stops the compilation inheriting\n\ +the effects of any future statements in effect in the code calling\n\ +compile; if absent or zero these statements do influence the compilation,\n\ +in addition to any features explicitly specified."); + +static PyObject * +builtin_dir(PyObject *self, PyObject *args) +{ + PyObject *arg = NULL; + + if (!PyArg_UnpackTuple(args, "dir", 0, 1, &arg)) + return NULL; + return PyObject_Dir(arg); +} + +PyDoc_STRVAR(dir_doc, +"dir([object]) -> list of strings\n" +"\n" +"If called without an argument, return the names in the current scope.\n" +"Else, return an alphabetized list of names comprising (some of) the attributes\n" +"of the given object, and of attributes reachable from it.\n" +"If the object supplies a method named __dir__, it will be used; otherwise\n" +"the default dir() logic is used and returns:\n" +" for a module object: the module's attributes.\n" +" for a class object: its attributes, and recursively the attributes\n" +" of its bases.\n" +" for any other object: its attributes, its class's attributes, and\n" +" recursively the attributes of its class's base classes."); + +static PyObject * +builtin_divmod(PyObject *self, PyObject *args) +{ + PyObject *v, *w; + + if (!PyArg_UnpackTuple(args, "divmod", 2, 2, &v, &w)) + return NULL; + return PyNumber_Divmod(v, w); +} + +PyDoc_STRVAR(divmod_doc, +"divmod(x, y) -> (div, mod)\n\ +\n\ +Return the tuple ((x-x%y)/y, x%y). Invariant: div*y + mod == x."); + + +static PyObject * +builtin_eval(PyObject *self, PyObject *args) +{ + PyObject *cmd, *result, *tmp = NULL; + PyObject *globals = Py_None, *locals = Py_None; + char *str; + PyCompilerFlags cf; + + if (!PyArg_UnpackTuple(args, "eval", 1, 3, &cmd, &globals, &locals)) + return NULL; + if (locals != Py_None && !PyMapping_Check(locals)) { + PyErr_SetString(PyExc_TypeError, "locals must be a mapping"); + return NULL; + } + if (globals != Py_None && !PyDict_Check(globals)) { + PyErr_SetString(PyExc_TypeError, PyMapping_Check(globals) ? + "globals must be a real dict; try eval(expr, {}, mapping)" + : "globals must be a dict"); + return NULL; + } + if (globals == Py_None) { + globals = PyEval_GetGlobals(); + if (locals == Py_None) + locals = PyEval_GetLocals(); + } + else if (locals == Py_None) + locals = globals; + + if (globals == NULL || locals == NULL) { + PyErr_SetString(PyExc_TypeError, + "eval must be given globals and locals " + "when called without a frame"); + return NULL; + } + + if (PyDict_GetItemString(globals, "__builtins__") == NULL) { + if (PyDict_SetItemString(globals, "__builtins__", + PyEval_GetBuiltins()) != 0) + return NULL; + } + + if (PyCode_Check(cmd)) { + if (PyCode_GetNumFree((PyCodeObject *)cmd) > 0) { + PyErr_SetString(PyExc_TypeError, + "code object passed to eval() may not contain free variables"); + return NULL; + } + return PyEval_EvalCode(cmd, globals, locals); + } + + cf.cf_flags = PyCF_SOURCE_IS_UTF8; + str = source_as_string(cmd, "eval", "string, bytes or code", &cf); + if (str == NULL) + return NULL; + + while (*str == ' ' || *str == '\t') + str++; + + (void)PyEval_MergeCompilerFlags(&cf); + result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf); + Py_XDECREF(tmp); + return result; +} + +PyDoc_STRVAR(eval_doc, +"eval(source[, globals[, locals]]) -> value\n\ +\n\ +Evaluate the source in the context of globals and locals.\n\ +The source may be a string representing a Python expression\n\ +or a code object as returned by compile().\n\ +The globals must be a dictionary and locals can be any mapping,\n\ +defaulting to the current globals and locals.\n\ +If only globals is given, locals defaults to it.\n"); + +static PyObject * +builtin_exec(PyObject *self, PyObject *args) +{ + PyObject *v; + PyObject *prog, *globals = Py_None, *locals = Py_None; + + if (!PyArg_UnpackTuple(args, "exec", 1, 3, &prog, &globals, &locals)) + return NULL; + + if (globals == Py_None) { + globals = PyEval_GetGlobals(); + if (locals == Py_None) { + locals = PyEval_GetLocals(); + } + if (!globals || !locals) { + PyErr_SetString(PyExc_SystemError, + "globals and locals cannot be NULL"); + return NULL; + } + } + else if (locals == Py_None) + locals = globals; + + if (!PyDict_Check(globals)) { + PyErr_Format(PyExc_TypeError, "exec() arg 2 must be a dict, not %.100s", + globals->ob_type->tp_name); + return NULL; + } + if (!PyMapping_Check(locals)) { + PyErr_Format(PyExc_TypeError, + "arg 3 must be a mapping or None, not %.100s", + locals->ob_type->tp_name); + return NULL; + } + if (PyDict_GetItemString(globals, "__builtins__") == NULL) { + if (PyDict_SetItemString(globals, "__builtins__", + PyEval_GetBuiltins()) != 0) + return NULL; + } + + if (PyCode_Check(prog)) { + if (PyCode_GetNumFree((PyCodeObject *)prog) > 0) { + PyErr_SetString(PyExc_TypeError, + "code object passed to exec() may not " + "contain free variables"); + return NULL; + } + v = PyEval_EvalCode(prog, globals, locals); + } + else { + char *str; + PyCompilerFlags cf; + cf.cf_flags = PyCF_SOURCE_IS_UTF8; + str = source_as_string(prog, "exec", + "string, bytes or code", &cf); + if (str == NULL) + return NULL; + if (PyEval_MergeCompilerFlags(&cf)) + v = PyRun_StringFlags(str, Py_file_input, globals, + locals, &cf); + else + v = PyRun_String(str, Py_file_input, globals, locals); + } + if (v == NULL) + return NULL; + Py_DECREF(v); + Py_RETURN_NONE; +} + +PyDoc_STRVAR(exec_doc, +"exec(object[, globals[, locals]])\n\ +\n\ +Read and execute code from an object, which can be a string or a code\n\ +object.\n\ +The globals and locals are dictionaries, defaulting to the current\n\ +globals and locals. If only globals is given, locals defaults to it."); + + +static PyObject * +builtin_getattr(PyObject *self, PyObject *args) +{ + PyObject *v, *result, *dflt = NULL; + PyObject *name; + + if (!PyArg_UnpackTuple(args, "getattr", 2, 3, &v, &name, &dflt)) + return NULL; + + if (!PyUnicode_Check(name)) { + PyErr_SetString(PyExc_TypeError, + "getattr(): attribute name must be string"); + return NULL; + } + result = PyObject_GetAttr(v, name); + if (result == NULL && dflt != NULL && + PyErr_ExceptionMatches(PyExc_AttributeError)) + { + PyErr_Clear(); + Py_INCREF(dflt); + result = dflt; + } + return result; +} + +PyDoc_STRVAR(getattr_doc, +"getattr(object, name[, default]) -> value\n\ +\n\ +Get a named attribute from an object; getattr(x, 'y') is equivalent to x.y.\n\ +When a default argument is given, it is returned when the attribute doesn't\n\ +exist; without it, an exception is raised in that case."); + + +static PyObject * +builtin_globals(PyObject *self) +{ + PyObject *d; + + d = PyEval_GetGlobals(); + Py_XINCREF(d); + return d; +} + +PyDoc_STRVAR(globals_doc, +"globals() -> dictionary\n\ +\n\ +Return the dictionary containing the current scope's global variables."); + + +static PyObject * +builtin_hasattr(PyObject *self, PyObject *args) +{ + PyObject *v; + PyObject *name; + + if (!PyArg_UnpackTuple(args, "hasattr", 2, 2, &v, &name)) + return NULL; + if (!PyUnicode_Check(name)) { + PyErr_SetString(PyExc_TypeError, + "hasattr(): attribute name must be string"); + return NULL; + } + v = PyObject_GetAttr(v, name); + if (v == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + Py_RETURN_FALSE; + } + return NULL; + } + Py_DECREF(v); + Py_RETURN_TRUE; +} + +PyDoc_STRVAR(hasattr_doc, +"hasattr(object, name) -> bool\n\ +\n\ +Return whether the object has an attribute with the given name.\n\ +(This is done by calling getattr(object, name) and catching AttributeError.)"); + + +static PyObject * +builtin_id(PyObject *self, PyObject *v) +{ + return PyLong_FromVoidPtr(v); +} + +PyDoc_STRVAR(id_doc, +"id(object) -> integer\n\ +\n\ +Return the identity of an object. This is guaranteed to be unique among\n\ +simultaneously existing objects. (Hint: it's the object's memory address.)"); + + +/* map object ************************************************************/ + +typedef struct { + PyObject_HEAD + PyObject *iters; + PyObject *func; +} mapobject; + +static PyObject * +map_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *it, *iters, *func; + mapobject *lz; + Py_ssize_t numargs, i; + + if (type == &PyMap_Type && !_PyArg_NoKeywords("map()", kwds)) + return NULL; + + numargs = PyTuple_Size(args); + if (numargs < 2) { + PyErr_SetString(PyExc_TypeError, + "map() must have at least two arguments."); + return NULL; + } + + iters = PyTuple_New(numargs-1); + if (iters == NULL) + return NULL; + + for (i=1 ; itp_alloc(type, 0); + if (lz == NULL) { + Py_DECREF(iters); + return NULL; + } + lz->iters = iters; + func = PyTuple_GET_ITEM(args, 0); + Py_INCREF(func); + lz->func = func; + + return (PyObject *)lz; +} + +static void +map_dealloc(mapobject *lz) +{ + PyObject_GC_UnTrack(lz); + Py_XDECREF(lz->iters); + Py_XDECREF(lz->func); + Py_TYPE(lz)->tp_free(lz); +} + +static int +map_traverse(mapobject *lz, visitproc visit, void *arg) +{ + Py_VISIT(lz->iters); + Py_VISIT(lz->func); + return 0; +} + +static PyObject * +map_next(mapobject *lz) +{ + PyObject *val; + PyObject *argtuple; + PyObject *result; + Py_ssize_t numargs, i; + + numargs = PyTuple_Size(lz->iters); + argtuple = PyTuple_New(numargs); + if (argtuple == NULL) + return NULL; + + for (i=0 ; iiters, i)); + if (val == NULL) { + Py_DECREF(argtuple); + return NULL; + } + PyTuple_SET_ITEM(argtuple, i, val); + } + result = PyObject_Call(lz->func, argtuple, NULL); + Py_DECREF(argtuple); + return result; +} + +PyDoc_STRVAR(map_doc, +"map(func, *iterables) --> map object\n\ +\n\ +Make an iterator that computes the function using arguments from\n\ +each of the iterables. Stops when the shortest iterable is exhausted."); + +PyTypeObject PyMap_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "map", /* tp_name */ + sizeof(mapobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)map_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE, /* tp_flags */ + map_doc, /* tp_doc */ + (traverseproc)map_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)map_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + map_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + +static PyObject * +builtin_next(PyObject *self, PyObject *args) +{ + PyObject *it, *res; + PyObject *def = NULL; + + if (!PyArg_UnpackTuple(args, "next", 1, 2, &it, &def)) + return NULL; + if (!PyIter_Check(it)) { + PyErr_Format(PyExc_TypeError, + "'%.200s' object is not an iterator", + it->ob_type->tp_name); + return NULL; + } + + res = (*it->ob_type->tp_iternext)(it); + if (res != NULL) { + return res; + } else if (def != NULL) { + if (PyErr_Occurred()) { + if(!PyErr_ExceptionMatches(PyExc_StopIteration)) + return NULL; + PyErr_Clear(); + } + Py_INCREF(def); + return def; + } else if (PyErr_Occurred()) { + return NULL; + } else { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } +} + +PyDoc_STRVAR(next_doc, +"next(iterator[, default])\n\ +\n\ +Return the next item from the iterator. If default is given and the iterator\n\ +is exhausted, it is returned instead of raising StopIteration."); + + +static PyObject * +builtin_setattr(PyObject *self, PyObject *args) +{ + PyObject *v; + PyObject *name; + PyObject *value; + + if (!PyArg_UnpackTuple(args, "setattr", 3, 3, &v, &name, &value)) + return NULL; + if (PyObject_SetAttr(v, name, value) != 0) + return NULL; + Py_INCREF(Py_None); + return Py_None; +} + +PyDoc_STRVAR(setattr_doc, +"setattr(object, name, value)\n\ +\n\ +Set a named attribute on an object; setattr(x, 'y', v) is equivalent to\n\ +``x.y = v''."); + + +static PyObject * +builtin_delattr(PyObject *self, PyObject *args) +{ + PyObject *v; + PyObject *name; + + if (!PyArg_UnpackTuple(args, "delattr", 2, 2, &v, &name)) + return NULL; + if (PyObject_SetAttr(v, name, (PyObject *)NULL) != 0) + return NULL; + Py_INCREF(Py_None); + return Py_None; +} + +PyDoc_STRVAR(delattr_doc, +"delattr(object, name)\n\ +\n\ +Delete a named attribute on an object; delattr(x, 'y') is equivalent to\n\ +``del x.y''."); + + +static PyObject * +builtin_hash(PyObject *self, PyObject *v) +{ + Py_hash_t x; + + x = PyObject_Hash(v); + if (x == -1) + return NULL; + return PyLong_FromSsize_t(x); +} + +PyDoc_STRVAR(hash_doc, +"hash(object) -> integer\n\ +\n\ +Return a hash value for the object. Two objects with the same value have\n\ +the same hash value. The reverse is not necessarily true, but likely."); + + +static PyObject * +builtin_hex(PyObject *self, PyObject *v) +{ + return PyNumber_ToBase(v, 16); +} + +PyDoc_STRVAR(hex_doc, +"hex(number) -> string\n\ +\n\ +Return the hexadecimal representation of an integer."); + + +static PyObject * +builtin_iter(PyObject *self, PyObject *args) +{ + PyObject *v, *w = NULL; + + if (!PyArg_UnpackTuple(args, "iter", 1, 2, &v, &w)) + return NULL; + if (w == NULL) + return PyObject_GetIter(v); + if (!PyCallable_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "iter(v, w): v must be callable"); + return NULL; + } + return PyCallIter_New(v, w); +} + +PyDoc_STRVAR(iter_doc, +"iter(iterable) -> iterator\n\ +iter(callable, sentinel) -> iterator\n\ +\n\ +Get an iterator from an object. In the first form, the argument must\n\ +supply its own iterator, or be a sequence.\n\ +In the second form, the callable is called until it returns the sentinel."); + + +static PyObject * +builtin_len(PyObject *self, PyObject *v) +{ + Py_ssize_t res; + + res = PyObject_Size(v); + if (res < 0 && PyErr_Occurred()) + return NULL; + return PyLong_FromSsize_t(res); +} + +PyDoc_STRVAR(len_doc, +"len(object) -> integer\n\ +\n\ +Return the number of items of a sequence or mapping."); + + +static PyObject * +builtin_locals(PyObject *self) +{ + PyObject *d; + + d = PyEval_GetLocals(); + Py_XINCREF(d); + return d; +} + +PyDoc_STRVAR(locals_doc, +"locals() -> dictionary\n\ +\n\ +Update and return a dictionary containing the current scope's local variables."); + + +static PyObject * +min_max(PyObject *args, PyObject *kwds, int op) +{ + PyObject *v, *it, *item, *val, *maxitem, *maxval, *keyfunc=NULL; + const char *name = op == Py_LT ? "min" : "max"; + + if (PyTuple_Size(args) > 1) + v = args; + else if (!PyArg_UnpackTuple(args, (char *)name, 1, 1, &v)) + return NULL; + + if (kwds != NULL && PyDict_Check(kwds) && PyDict_Size(kwds)) { + keyfunc = PyDict_GetItemString(kwds, "key"); + if (PyDict_Size(kwds)!=1 || keyfunc == NULL) { + PyErr_Format(PyExc_TypeError, + "%s() got an unexpected keyword argument", name); + return NULL; + } + Py_INCREF(keyfunc); + } + + it = PyObject_GetIter(v); + if (it == NULL) { + Py_XDECREF(keyfunc); + return NULL; + } + + maxitem = NULL; /* the result */ + maxval = NULL; /* the value associated with the result */ + while (( item = PyIter_Next(it) )) { + /* get the value from the key function */ + if (keyfunc != NULL) { + val = PyObject_CallFunctionObjArgs(keyfunc, item, NULL); + if (val == NULL) + goto Fail_it_item; + } + /* no key function; the value is the item */ + else { + val = item; + Py_INCREF(val); + } + + /* maximum value and item are unset; set them */ + if (maxval == NULL) { + maxitem = item; + maxval = val; + } + /* maximum value and item are set; update them as necessary */ + else { + int cmp = PyObject_RichCompareBool(val, maxval, op); + if (cmp < 0) + goto Fail_it_item_and_val; + else if (cmp > 0) { + Py_DECREF(maxval); + Py_DECREF(maxitem); + maxval = val; + maxitem = item; + } + else { + Py_DECREF(item); + Py_DECREF(val); + } + } + } + if (PyErr_Occurred()) + goto Fail_it; + if (maxval == NULL) { + PyErr_Format(PyExc_ValueError, + "%s() arg is an empty sequence", name); + assert(maxitem == NULL); + } + else + Py_DECREF(maxval); + Py_DECREF(it); + Py_XDECREF(keyfunc); + return maxitem; + +Fail_it_item_and_val: + Py_DECREF(val); +Fail_it_item: + Py_DECREF(item); +Fail_it: + Py_XDECREF(maxval); + Py_XDECREF(maxitem); + Py_DECREF(it); + Py_XDECREF(keyfunc); + return NULL; +} + +static PyObject * +builtin_min(PyObject *self, PyObject *args, PyObject *kwds) +{ + return min_max(args, kwds, Py_LT); +} + +PyDoc_STRVAR(min_doc, +"min(iterable[, key=func]) -> value\n\ +min(a, b, c, ...[, key=func]) -> value\n\ +\n\ +With a single iterable argument, return its smallest item.\n\ +With two or more arguments, return the smallest argument."); + + +static PyObject * +builtin_max(PyObject *self, PyObject *args, PyObject *kwds) +{ + return min_max(args, kwds, Py_GT); +} + +PyDoc_STRVAR(max_doc, +"max(iterable[, key=func]) -> value\n\ +max(a, b, c, ...[, key=func]) -> value\n\ +\n\ +With a single iterable argument, return its largest item.\n\ +With two or more arguments, return the largest argument."); + + +static PyObject * +builtin_oct(PyObject *self, PyObject *v) +{ + return PyNumber_ToBase(v, 8); +} + +PyDoc_STRVAR(oct_doc, +"oct(number) -> string\n\ +\n\ +Return the octal representation of an integer."); + + +static PyObject * +builtin_ord(PyObject *self, PyObject* obj) +{ + long ord; + Py_ssize_t size; + + if (PyBytes_Check(obj)) { + size = PyBytes_GET_SIZE(obj); + if (size == 1) { + ord = (long)((unsigned char)*PyBytes_AS_STRING(obj)); + return PyLong_FromLong(ord); + } + } + else if (PyUnicode_Check(obj)) { + if (PyUnicode_READY(obj) == -1) + return NULL; + size = PyUnicode_GET_LENGTH(obj); + if (size == 1) { + ord = (long)PyUnicode_READ_CHAR(obj, 0); + return PyLong_FromLong(ord); + } + } + else if (PyByteArray_Check(obj)) { + /* XXX Hopefully this is temporary */ + size = PyByteArray_GET_SIZE(obj); + if (size == 1) { + ord = (long)((unsigned char)*PyByteArray_AS_STRING(obj)); + return PyLong_FromLong(ord); + } + } + else { + PyErr_Format(PyExc_TypeError, + "ord() expected string of length 1, but " \ + "%.200s found", obj->ob_type->tp_name); + return NULL; + } + + PyErr_Format(PyExc_TypeError, + "ord() expected a character, " + "but string of length %zd found", + size); + return NULL; +} + +PyDoc_VAR(ord_doc) = PyDoc_STR( +"ord(c) -> integer\n\ +\n\ +Return the integer ordinal of a one-character string." +) +#ifndef Py_UNICODE_WIDE +PyDoc_STR( +"\nA valid surrogate pair is also accepted." +) +#endif +; + + +static PyObject * +builtin_pow(PyObject *self, PyObject *args) +{ + PyObject *v, *w, *z = Py_None; + + if (!PyArg_UnpackTuple(args, "pow", 2, 3, &v, &w, &z)) + return NULL; + return PyNumber_Power(v, w, z); +} + +PyDoc_STRVAR(pow_doc, +"pow(x, y[, z]) -> number\n\ +\n\ +With two arguments, equivalent to x**y. With three arguments,\n\ +equivalent to (x**y) % z, but may be more efficient (e.g. for longs)."); + + + +static PyObject * +builtin_print(PyObject *self, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"sep", "end", "file", 0}; + static PyObject *dummy_args; + PyObject *sep = NULL, *end = NULL, *file = NULL; + int i, err; + + if (dummy_args == NULL) { + if (!(dummy_args = PyTuple_New(0))) + return NULL; + } + if (!PyArg_ParseTupleAndKeywords(dummy_args, kwds, "|OOO:print", + kwlist, &sep, &end, &file)) + return NULL; + if (file == NULL || file == Py_None) { + file = PySys_GetObject("stdout"); + /* sys.stdout may be None when FILE* stdout isn't connected */ + if (file == Py_None) + Py_RETURN_NONE; + } + + if (sep == Py_None) { + sep = NULL; + } + else if (sep && !PyUnicode_Check(sep)) { + PyErr_Format(PyExc_TypeError, + "sep must be None or a string, not %.200s", + sep->ob_type->tp_name); + return NULL; + } + if (end == Py_None) { + end = NULL; + } + else if (end && !PyUnicode_Check(end)) { + PyErr_Format(PyExc_TypeError, + "end must be None or a string, not %.200s", + end->ob_type->tp_name); + return NULL; + } + + for (i = 0; i < PyTuple_Size(args); i++) { + if (i > 0) { + if (sep == NULL) + err = PyFile_WriteString(" ", file); + else + err = PyFile_WriteObject(sep, file, + Py_PRINT_RAW); + if (err) + return NULL; + } + err = PyFile_WriteObject(PyTuple_GetItem(args, i), file, + Py_PRINT_RAW); + if (err) + return NULL; + } + + if (end == NULL) + err = PyFile_WriteString("\n", file); + else + err = PyFile_WriteObject(end, file, Py_PRINT_RAW); + if (err) + return NULL; + + Py_RETURN_NONE; +} + +PyDoc_STRVAR(print_doc, +"print(value, ..., sep=' ', end='\\n', file=sys.stdout)\n\ +\n\ +Prints the values to a stream, or to sys.stdout by default.\n\ +Optional keyword arguments:\n\ +file: a file-like object (stream); defaults to the current sys.stdout.\n\ +sep: string inserted between values, default a space.\n\ +end: string appended after the last value, default a newline."); + + +static PyObject * +builtin_input(PyObject *self, PyObject *args) +{ + PyObject *promptarg = NULL; + PyObject *fin = PySys_GetObject("stdin"); + PyObject *fout = PySys_GetObject("stdout"); + PyObject *ferr = PySys_GetObject("stderr"); + PyObject *tmp; + long fd; + int tty; + + /* Parse arguments */ + if (!PyArg_UnpackTuple(args, "input", 0, 1, &promptarg)) + return NULL; + + /* Check that stdin/out/err are intact */ + if (fin == NULL || fin == Py_None) { + PyErr_SetString(PyExc_RuntimeError, + "input(): lost sys.stdin"); + return NULL; + } + if (fout == NULL || fout == Py_None) { + PyErr_SetString(PyExc_RuntimeError, + "input(): lost sys.stdout"); + return NULL; + } + if (ferr == NULL || ferr == Py_None) { + PyErr_SetString(PyExc_RuntimeError, + "input(): lost sys.stderr"); + return NULL; + } + + /* First of all, flush stderr */ + tmp = _PyObject_CallMethodId(ferr, &PyId_flush, ""); + if (tmp == NULL) + PyErr_Clear(); + else + Py_DECREF(tmp); + + /* We should only use (GNU) readline if Python's sys.stdin and + sys.stdout are the same as C's stdin and stdout, because we + need to pass it those. */ + tmp = _PyObject_CallMethodId(fin, &PyId_fileno, ""); + if (tmp == NULL) { + PyErr_Clear(); + tty = 0; + } + else { + fd = PyLong_AsLong(tmp); + Py_DECREF(tmp); + if (fd < 0 && PyErr_Occurred()) + return NULL; + tty = fd == fileno(stdin) && isatty(fd); + } + if (tty) { + tmp = _PyObject_CallMethodId(fout, &PyId_fileno, ""); + if (tmp == NULL) + PyErr_Clear(); + else { + fd = PyLong_AsLong(tmp); + Py_DECREF(tmp); + if (fd < 0 && PyErr_Occurred()) + return NULL; + tty = fd == fileno(stdout) && isatty(fd); + } + } + + /* If we're interactive, use (GNU) readline */ + if (tty) { + PyObject *po = NULL; + char *prompt; + char *s = NULL; + PyObject *stdin_encoding = NULL, *stdin_errors = NULL; + PyObject *stdout_encoding = NULL, *stdout_errors = NULL; + char *stdin_encoding_str, *stdin_errors_str; + PyObject *result; + size_t len; + _Py_IDENTIFIER(encoding); + _Py_IDENTIFIER(errors); + + stdin_encoding = _PyObject_GetAttrId(fin, &PyId_encoding); + stdin_errors = _PyObject_GetAttrId(fin, &PyId_errors); + if (!stdin_encoding || !stdin_errors) + /* stdin is a text stream, so it must have an + encoding. */ + goto _readline_errors; + stdin_encoding_str = _PyUnicode_AsString(stdin_encoding); + stdin_errors_str = _PyUnicode_AsString(stdin_errors); + if (!stdin_encoding_str || !stdin_errors_str) + goto _readline_errors; + tmp = _PyObject_CallMethodId(fout, &PyId_flush, ""); + if (tmp == NULL) + PyErr_Clear(); + else + Py_DECREF(tmp); + if (promptarg != NULL) { + /* We have a prompt, encode it as stdout would */ + char *stdout_encoding_str, *stdout_errors_str; + PyObject *stringpo; + stdout_encoding = _PyObject_GetAttrId(fout, &PyId_encoding); + stdout_errors = _PyObject_GetAttrId(fout, &PyId_errors); + if (!stdout_encoding || !stdout_errors) + goto _readline_errors; + stdout_encoding_str = _PyUnicode_AsString(stdout_encoding); + stdout_errors_str = _PyUnicode_AsString(stdout_errors); + if (!stdout_encoding_str || !stdout_errors_str) + goto _readline_errors; + stringpo = PyObject_Str(promptarg); + if (stringpo == NULL) + goto _readline_errors; + po = PyUnicode_AsEncodedString(stringpo, + stdout_encoding_str, stdout_errors_str); + Py_CLEAR(stdout_encoding); + Py_CLEAR(stdout_errors); + Py_CLEAR(stringpo); + if (po == NULL) + goto _readline_errors; + prompt = PyBytes_AsString(po); + if (prompt == NULL) + goto _readline_errors; + } + else { + po = NULL; + prompt = ""; + } + s = PyOS_Readline(stdin, stdout, prompt); + if (s == NULL) { + if (!PyErr_Occurred()) + PyErr_SetNone(PyExc_KeyboardInterrupt); + goto _readline_errors; + } + + len = strlen(s); + if (len == 0) { + PyErr_SetNone(PyExc_EOFError); + result = NULL; + } + else { + if (len > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, + "input: input too long"); + result = NULL; + } + else { + len--; /* strip trailing '\n' */ + if (len != 0 && s[len-1] == '\r') + len--; /* strip trailing '\r' */ + result = PyUnicode_Decode(s, len, stdin_encoding_str, + stdin_errors_str); + } + } + Py_DECREF(stdin_encoding); + Py_DECREF(stdin_errors); + Py_XDECREF(po); + PyMem_FREE(s); + return result; + _readline_errors: + Py_XDECREF(stdin_encoding); + Py_XDECREF(stdout_encoding); + Py_XDECREF(stdin_errors); + Py_XDECREF(stdout_errors); + Py_XDECREF(po); + return NULL; + } + + /* Fallback if we're not interactive */ + if (promptarg != NULL) { + if (PyFile_WriteObject(promptarg, fout, Py_PRINT_RAW) != 0) + return NULL; + } + tmp = _PyObject_CallMethodId(fout, &PyId_flush, ""); + if (tmp == NULL) + PyErr_Clear(); + else + Py_DECREF(tmp); + return PyFile_GetLine(fin, -1); +} + +PyDoc_STRVAR(input_doc, +"input([prompt]) -> string\n\ +\n\ +Read a string from standard input. The trailing newline is stripped.\n\ +If the user hits EOF (Unix: Ctl-D, Windows: Ctl-Z+Return), raise EOFError.\n\ +On Unix, GNU readline is used if enabled. The prompt string, if given,\n\ +is printed without a trailing newline before reading."); + + +static PyObject * +builtin_repr(PyObject *self, PyObject *v) +{ + return PyObject_Repr(v); +} + +PyDoc_STRVAR(repr_doc, +"repr(object) -> string\n\ +\n\ +Return the canonical string representation of the object.\n\ +For most object types, eval(repr(object)) == object."); + + +static PyObject * +builtin_round(PyObject *self, PyObject *args, PyObject *kwds) +{ + static PyObject *round_str = NULL; + PyObject *ndigits = NULL; + static char *kwlist[] = {"number", "ndigits", 0}; + PyObject *number, *round; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:round", + kwlist, &number, &ndigits)) + return NULL; + + if (Py_TYPE(number)->tp_dict == NULL) { + if (PyType_Ready(Py_TYPE(number)) < 0) + return NULL; + } + + if (round_str == NULL) { + round_str = PyUnicode_InternFromString("__round__"); + if (round_str == NULL) + return NULL; + } + + round = _PyType_Lookup(Py_TYPE(number), round_str); + if (round == NULL) { + PyErr_Format(PyExc_TypeError, + "type %.100s doesn't define __round__ method", + Py_TYPE(number)->tp_name); + return NULL; + } + + if (ndigits == NULL) + return PyObject_CallFunction(round, "O", number); + else + return PyObject_CallFunction(round, "OO", number, ndigits); +} + +PyDoc_STRVAR(round_doc, +"round(number[, ndigits]) -> number\n\ +\n\ +Round a number to a given precision in decimal digits (default 0 digits).\n\ +This returns an int when called with one argument, otherwise the\n\ +same type as the number. ndigits may be negative."); + + +static PyObject * +builtin_sorted(PyObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *newlist, *v, *seq, *keyfunc=NULL, *newargs; + PyObject *callable; + static char *kwlist[] = {"iterable", "key", "reverse", 0}; + int reverse; + _Py_IDENTIFIER(sort); + + /* args 1-3 should match listsort in Objects/listobject.c */ + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:sorted", + kwlist, &seq, &keyfunc, &reverse)) + return NULL; + + newlist = PySequence_List(seq); + if (newlist == NULL) + return NULL; + + callable = _PyObject_GetAttrId(newlist, &PyId_sort); + if (callable == NULL) { + Py_DECREF(newlist); + return NULL; + } + + newargs = PyTuple_GetSlice(args, 1, 4); + if (newargs == NULL) { + Py_DECREF(newlist); + Py_DECREF(callable); + return NULL; + } + + v = PyObject_Call(callable, newargs, kwds); + Py_DECREF(newargs); + Py_DECREF(callable); + if (v == NULL) { + Py_DECREF(newlist); + return NULL; + } + Py_DECREF(v); + return newlist; +} + +PyDoc_STRVAR(sorted_doc, +"sorted(iterable, key=None, reverse=False) --> new sorted list"); + +static PyObject * +builtin_vars(PyObject *self, PyObject *args) +{ + PyObject *v = NULL; + PyObject *d; + + if (!PyArg_UnpackTuple(args, "vars", 0, 1, &v)) + return NULL; + if (v == NULL) { + d = PyEval_GetLocals(); + if (d == NULL) { + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_SystemError, + "vars(): no locals!?"); + } + else + Py_INCREF(d); + } + else { + _Py_IDENTIFIER(__dict__); + d = _PyObject_GetAttrId(v, &PyId___dict__); + if (d == NULL) { + PyErr_SetString(PyExc_TypeError, + "vars() argument must have __dict__ attribute"); + return NULL; + } + } + return d; +} + +PyDoc_STRVAR(vars_doc, +"vars([object]) -> dictionary\n\ +\n\ +Without arguments, equivalent to locals().\n\ +With an argument, equivalent to object.__dict__."); + +static PyObject* +builtin_sum(PyObject *self, PyObject *args) +{ + PyObject *seq; + PyObject *result = NULL; + PyObject *temp, *item, *iter; + + if (!PyArg_UnpackTuple(args, "sum", 1, 2, &seq, &result)) + return NULL; + + iter = PyObject_GetIter(seq); + if (iter == NULL) + return NULL; + + if (result == NULL) { + result = PyLong_FromLong(0); + if (result == NULL) { + Py_DECREF(iter); + return NULL; + } + } else { + /* reject string values for 'start' parameter */ + if (PyUnicode_Check(result)) { + PyErr_SetString(PyExc_TypeError, + "sum() can't sum strings [use ''.join(seq) instead]"); + Py_DECREF(iter); + return NULL; + } + if (PyBytes_Check(result)) { + PyErr_SetString(PyExc_TypeError, + "sum() can't sum bytes [use b''.join(seq) instead]"); + Py_DECREF(iter); + return NULL; + } + if (PyByteArray_Check(result)) { + PyErr_SetString(PyExc_TypeError, + "sum() can't sum bytearray [use b''.join(seq) instead]"); + Py_DECREF(iter); + return NULL; + } + + Py_INCREF(result); + } + +#ifndef SLOW_SUM + /* Fast addition by keeping temporary sums in C instead of new Python objects. + Assumes all inputs are the same type. If the assumption fails, default + to the more general routine. + */ + if (PyLong_CheckExact(result)) { + int overflow; + long i_result = PyLong_AsLongAndOverflow(result, &overflow); + /* If this already overflowed, don't even enter the loop. */ + if (overflow == 0) { + Py_DECREF(result); + result = NULL; + } + while(result == NULL) { + item = PyIter_Next(iter); + if (item == NULL) { + Py_DECREF(iter); + if (PyErr_Occurred()) + return NULL; + return PyLong_FromLong(i_result); + } + if (PyLong_CheckExact(item)) { + long b = PyLong_AsLongAndOverflow(item, &overflow); + long x = i_result + b; + if (overflow == 0 && ((x^i_result) >= 0 || (x^b) >= 0)) { + i_result = x; + Py_DECREF(item); + continue; + } + } + /* Either overflowed or is not an int. Restore real objects and process normally */ + result = PyLong_FromLong(i_result); + temp = PyNumber_Add(result, item); + Py_DECREF(result); + Py_DECREF(item); + result = temp; + if (result == NULL) { + Py_DECREF(iter); + return NULL; + } + } + } + + if (PyFloat_CheckExact(result)) { + double f_result = PyFloat_AS_DOUBLE(result); + Py_DECREF(result); + result = NULL; + while(result == NULL) { + item = PyIter_Next(iter); + if (item == NULL) { + Py_DECREF(iter); + if (PyErr_Occurred()) + return NULL; + return PyFloat_FromDouble(f_result); + } + if (PyFloat_CheckExact(item)) { + PyFPE_START_PROTECT("add", Py_DECREF(item); Py_DECREF(iter); return 0) + f_result += PyFloat_AS_DOUBLE(item); + PyFPE_END_PROTECT(f_result) + Py_DECREF(item); + continue; + } + if (PyLong_CheckExact(item)) { + long value; + int overflow; + value = PyLong_AsLongAndOverflow(item, &overflow); + if (!overflow) { + PyFPE_START_PROTECT("add", Py_DECREF(item); Py_DECREF(iter); return 0) + f_result += (double)value; + PyFPE_END_PROTECT(f_result) + Py_DECREF(item); + continue; + } + } + result = PyFloat_FromDouble(f_result); + temp = PyNumber_Add(result, item); + Py_DECREF(result); + Py_DECREF(item); + result = temp; + if (result == NULL) { + Py_DECREF(iter); + return NULL; + } + } + } +#endif + + for(;;) { + item = PyIter_Next(iter); + if (item == NULL) { + /* error, or end-of-sequence */ + if (PyErr_Occurred()) { + Py_DECREF(result); + result = NULL; + } + break; + } + /* It's tempting to use PyNumber_InPlaceAdd instead of + PyNumber_Add here, to avoid quadratic running time + when doing 'sum(list_of_lists, [])'. However, this + would produce a change in behaviour: a snippet like + + empty = [] + sum([[x] for x in range(10)], empty) + + would change the value of empty. */ + temp = PyNumber_Add(result, item); + Py_DECREF(result); + Py_DECREF(item); + result = temp; + if (result == NULL) + break; + } + Py_DECREF(iter); + return result; +} + +PyDoc_STRVAR(sum_doc, +"sum(iterable[, start]) -> value\n\ +\n\ +Returns the sum of an iterable of numbers (NOT strings) plus the value\n\ +of parameter 'start' (which defaults to 0). When the iterable is\n\ +empty, returns start."); + + +static PyObject * +builtin_isinstance(PyObject *self, PyObject *args) +{ + PyObject *inst; + PyObject *cls; + int retval; + + if (!PyArg_UnpackTuple(args, "isinstance", 2, 2, &inst, &cls)) + return NULL; + + retval = PyObject_IsInstance(inst, cls); + if (retval < 0) + return NULL; + return PyBool_FromLong(retval); +} + +PyDoc_STRVAR(isinstance_doc, +"isinstance(object, class-or-type-or-tuple) -> bool\n\ +\n\ +Return whether an object is an instance of a class or of a subclass thereof.\n\ +With a type as second argument, return whether that is the object's type.\n\ +The form using a tuple, isinstance(x, (A, B, ...)), is a shortcut for\n\ +isinstance(x, A) or isinstance(x, B) or ... (etc.)."); + + +static PyObject * +builtin_issubclass(PyObject *self, PyObject *args) +{ + PyObject *derived; + PyObject *cls; + int retval; + + if (!PyArg_UnpackTuple(args, "issubclass", 2, 2, &derived, &cls)) + return NULL; + + retval = PyObject_IsSubclass(derived, cls); + if (retval < 0) + return NULL; + return PyBool_FromLong(retval); +} + +PyDoc_STRVAR(issubclass_doc, +"issubclass(C, B) -> bool\n\ +\n\ +Return whether class C is a subclass (i.e., a derived class) of class B.\n\ +When using a tuple as the second argument issubclass(X, (A, B, ...)),\n\ +is a shortcut for issubclass(X, A) or issubclass(X, B) or ... (etc.)."); + + +typedef struct { + PyObject_HEAD + Py_ssize_t tuplesize; + PyObject *ittuple; /* tuple of iterators */ + PyObject *result; +} zipobject; + +static PyObject * +zip_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + zipobject *lz; + Py_ssize_t i; + PyObject *ittuple; /* tuple of iterators */ + PyObject *result; + Py_ssize_t tuplesize = PySequence_Length(args); + + if (type == &PyZip_Type && !_PyArg_NoKeywords("zip()", kwds)) + return NULL; + + /* args must be a tuple */ + assert(PyTuple_Check(args)); + + /* obtain iterators */ + ittuple = PyTuple_New(tuplesize); + if (ittuple == NULL) + return NULL; + for (i=0; i < tuplesize; ++i) { + PyObject *item = PyTuple_GET_ITEM(args, i); + PyObject *it = PyObject_GetIter(item); + if (it == NULL) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) + PyErr_Format(PyExc_TypeError, + "zip argument #%zd must support iteration", + i+1); + Py_DECREF(ittuple); + return NULL; + } + PyTuple_SET_ITEM(ittuple, i, it); + } + + /* create a result holder */ + result = PyTuple_New(tuplesize); + if (result == NULL) { + Py_DECREF(ittuple); + return NULL; + } + for (i=0 ; i < tuplesize ; i++) { + Py_INCREF(Py_None); + PyTuple_SET_ITEM(result, i, Py_None); + } + + /* create zipobject structure */ + lz = (zipobject *)type->tp_alloc(type, 0); + if (lz == NULL) { + Py_DECREF(ittuple); + Py_DECREF(result); + return NULL; + } + lz->ittuple = ittuple; + lz->tuplesize = tuplesize; + lz->result = result; + + return (PyObject *)lz; +} + +static void +zip_dealloc(zipobject *lz) +{ + PyObject_GC_UnTrack(lz); + Py_XDECREF(lz->ittuple); + Py_XDECREF(lz->result); + Py_TYPE(lz)->tp_free(lz); +} + +static int +zip_traverse(zipobject *lz, visitproc visit, void *arg) +{ + Py_VISIT(lz->ittuple); + Py_VISIT(lz->result); + return 0; +} + +static PyObject * +zip_next(zipobject *lz) +{ + Py_ssize_t i; + Py_ssize_t tuplesize = lz->tuplesize; + PyObject *result = lz->result; + PyObject *it; + PyObject *item; + PyObject *olditem; + + if (tuplesize == 0) + return NULL; + if (Py_REFCNT(result) == 1) { + Py_INCREF(result); + for (i=0 ; i < tuplesize ; i++) { + it = PyTuple_GET_ITEM(lz->ittuple, i); + item = (*Py_TYPE(it)->tp_iternext)(it); + if (item == NULL) { + Py_DECREF(result); + return NULL; + } + olditem = PyTuple_GET_ITEM(result, i); + PyTuple_SET_ITEM(result, i, item); + Py_DECREF(olditem); + } + } else { + result = PyTuple_New(tuplesize); + if (result == NULL) + return NULL; + for (i=0 ; i < tuplesize ; i++) { + it = PyTuple_GET_ITEM(lz->ittuple, i); + item = (*Py_TYPE(it)->tp_iternext)(it); + if (item == NULL) { + Py_DECREF(result); + return NULL; + } + PyTuple_SET_ITEM(result, i, item); + } + } + return result; +} + +PyDoc_STRVAR(zip_doc, +"zip(iter1 [,iter2 [...]]) --> zip object\n\ +\n\ +Return a zip object whose .__next__() method returns a tuple where\n\ +the i-th element comes from the i-th iterable argument. The .__next__()\n\ +method continues until the shortest iterable in the argument sequence\n\ +is exhausted and then it raises StopIteration."); + +PyTypeObject PyZip_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "zip", /* tp_name */ + sizeof(zipobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)zip_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE, /* tp_flags */ + zip_doc, /* tp_doc */ + (traverseproc)zip_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)zip_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + zip_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + + +static PyMethodDef builtin_methods[] = { + {"__build_class__", (PyCFunction)builtin___build_class__, + METH_VARARGS | METH_KEYWORDS, build_class_doc}, + {"__import__", (PyCFunction)builtin___import__, METH_VARARGS | METH_KEYWORDS, import_doc}, + {"abs", builtin_abs, METH_O, abs_doc}, + {"all", builtin_all, METH_O, all_doc}, + {"any", builtin_any, METH_O, any_doc}, + {"ascii", builtin_ascii, METH_O, ascii_doc}, + {"bin", builtin_bin, METH_O, bin_doc}, + {"callable", builtin_callable, METH_O, callable_doc}, + {"chr", builtin_chr, METH_VARARGS, chr_doc}, + {"compile", (PyCFunction)builtin_compile, METH_VARARGS | METH_KEYWORDS, compile_doc}, + {"delattr", builtin_delattr, METH_VARARGS, delattr_doc}, + {"dir", builtin_dir, METH_VARARGS, dir_doc}, + {"divmod", builtin_divmod, METH_VARARGS, divmod_doc}, + {"eval", builtin_eval, METH_VARARGS, eval_doc}, + {"exec", builtin_exec, METH_VARARGS, exec_doc}, + {"format", builtin_format, METH_VARARGS, format_doc}, + {"getattr", builtin_getattr, METH_VARARGS, getattr_doc}, + {"globals", (PyCFunction)builtin_globals, METH_NOARGS, globals_doc}, + {"hasattr", builtin_hasattr, METH_VARARGS, hasattr_doc}, + {"hash", builtin_hash, METH_O, hash_doc}, + {"hex", builtin_hex, METH_O, hex_doc}, + {"id", builtin_id, METH_O, id_doc}, + {"input", builtin_input, METH_VARARGS, input_doc}, + {"isinstance", builtin_isinstance, METH_VARARGS, isinstance_doc}, + {"issubclass", builtin_issubclass, METH_VARARGS, issubclass_doc}, + {"iter", builtin_iter, METH_VARARGS, iter_doc}, + {"len", builtin_len, METH_O, len_doc}, + {"locals", (PyCFunction)builtin_locals, METH_NOARGS, locals_doc}, + {"max", (PyCFunction)builtin_max, METH_VARARGS | METH_KEYWORDS, max_doc}, + {"min", (PyCFunction)builtin_min, METH_VARARGS | METH_KEYWORDS, min_doc}, + {"next", (PyCFunction)builtin_next, METH_VARARGS, next_doc}, + {"oct", builtin_oct, METH_O, oct_doc}, + {"ord", builtin_ord, METH_O, ord_doc}, + {"pow", builtin_pow, METH_VARARGS, pow_doc}, + {"print", (PyCFunction)builtin_print, METH_VARARGS | METH_KEYWORDS, print_doc}, + {"repr", builtin_repr, METH_O, repr_doc}, + {"round", (PyCFunction)builtin_round, METH_VARARGS | METH_KEYWORDS, round_doc}, + {"setattr", builtin_setattr, METH_VARARGS, setattr_doc}, + {"sorted", (PyCFunction)builtin_sorted, METH_VARARGS | METH_KEYWORDS, sorted_doc}, + {"sum", builtin_sum, METH_VARARGS, sum_doc}, + {"vars", builtin_vars, METH_VARARGS, vars_doc}, + {NULL, NULL}, +}; + +PyDoc_STRVAR(builtin_doc, +"Built-in functions, exceptions, and other objects.\n\ +\n\ +Noteworthy: None is the `nil' object; Ellipsis represents `...' in slices."); + +static struct PyModuleDef builtinsmodule = { + PyModuleDef_HEAD_INIT, + "builtins", + builtin_doc, + -1, /* multiple "initialization" just copies the module dict. */ + builtin_methods, + NULL, + NULL, + NULL, + NULL +}; + + +PyObject * +_PyBuiltin_Init(void) +{ + PyObject *mod, *dict, *debug; + mod = PyModule_Create(&builtinsmodule); + if (mod == NULL) + return NULL; + dict = PyModule_GetDict(mod); + +#ifdef Py_TRACE_REFS + /* "builtins" exposes a number of statically allocated objects + * that, before this code was added in 2.3, never showed up in + * the list of "all objects" maintained by Py_TRACE_REFS. As a + * result, programs leaking references to None and False (etc) + * couldn't be diagnosed by examining sys.getobjects(0). + */ +#define ADD_TO_ALL(OBJECT) _Py_AddToAllObjects((PyObject *)(OBJECT), 0) +#else +#define ADD_TO_ALL(OBJECT) (void)0 +#endif + +#define SETBUILTIN(NAME, OBJECT) \ + if (PyDict_SetItemString(dict, NAME, (PyObject *)OBJECT) < 0) \ + return NULL; \ + ADD_TO_ALL(OBJECT) + + SETBUILTIN("None", Py_None); + SETBUILTIN("Ellipsis", Py_Ellipsis); + SETBUILTIN("NotImplemented", Py_NotImplemented); + SETBUILTIN("False", Py_False); + SETBUILTIN("True", Py_True); + SETBUILTIN("bool", &PyBool_Type); + SETBUILTIN("memoryview", &PyMemoryView_Type); + SETBUILTIN("bytearray", &PyByteArray_Type); + SETBUILTIN("bytes", &PyBytes_Type); + SETBUILTIN("classmethod", &PyClassMethod_Type); + SETBUILTIN("complex", &PyComplex_Type); + SETBUILTIN("dict", &PyDict_Type); + SETBUILTIN("enumerate", &PyEnum_Type); + SETBUILTIN("filter", &PyFilter_Type); + SETBUILTIN("float", &PyFloat_Type); + SETBUILTIN("frozenset", &PyFrozenSet_Type); + SETBUILTIN("property", &PyProperty_Type); + SETBUILTIN("int", &PyLong_Type); + SETBUILTIN("list", &PyList_Type); + SETBUILTIN("map", &PyMap_Type); + SETBUILTIN("object", &PyBaseObject_Type); + SETBUILTIN("range", &PyRange_Type); + SETBUILTIN("reversed", &PyReversed_Type); + SETBUILTIN("set", &PySet_Type); + SETBUILTIN("slice", &PySlice_Type); + SETBUILTIN("staticmethod", &PyStaticMethod_Type); + SETBUILTIN("str", &PyUnicode_Type); + SETBUILTIN("super", &PySuper_Type); + SETBUILTIN("tuple", &PyTuple_Type); + SETBUILTIN("type", &PyType_Type); + SETBUILTIN("zip", &PyZip_Type); + debug = PyBool_FromLong(Py_OptimizeFlag == 0); + if (PyDict_SetItemString(dict, "__debug__", debug) < 0) { + Py_XDECREF(debug); + return NULL; + } + Py_XDECREF(debug); + + return mod; +#undef ADD_TO_ALL +#undef SETBUILTIN +} diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Python/ceval.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Python/ceval.c Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,4327 @@ + +/* Execute compiled code */ + +/* XXX TO DO: + XXX speed up searching for keywords by using a dictionary + XXX document it! + */ + +/* enable more aggressive intra-module optimizations, where available */ +#define PY_LOCAL_AGGRESSIVE + +#include "Python.h" + +#include "code.h" +#include "frameobject.h" +#include "opcode.h" +#include "structmember.h" + +#include + +typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *); + +/* Forward declarations */ +static PyObject * call_function(PyObject ***, int); +static PyObject * fast_function(PyObject *, PyObject ***, int, int, int); +static PyObject * do_call(PyObject *, PyObject ***, int, int); +static PyObject * ext_do_call(PyObject *, PyObject ***, int, int, int); +static PyObject * update_keyword_args(PyObject *, int, PyObject ***, + PyObject *); +static PyObject * update_star_args(int, int, PyObject *, PyObject ***); +static PyObject * load_args(PyObject ***, int); +#define CALL_FLAG_VAR 1 +#define CALL_FLAG_KW 2 + +static int call_trace(Py_tracefunc, PyObject *, PyFrameObject *, + int, PyObject *); +static int call_trace_protected(Py_tracefunc, PyObject *, + PyFrameObject *, int, PyObject *); +static void call_exc_trace(Py_tracefunc, PyObject *, PyFrameObject *); +static int maybe_call_line_trace(Py_tracefunc, PyObject *, + PyFrameObject *, int *, int *, int *); + +static PyObject * cmp_outcome(int, PyObject *, PyObject *); +static PyObject * import_from(PyObject *, PyObject *); +static int import_all_from(PyObject *, PyObject *); +static void format_exc_check_arg(PyObject *, const char *, PyObject *); +static void format_exc_unbound(PyCodeObject *co, int oparg); +static PyObject * unicode_concatenate(PyObject *, PyObject *, + PyFrameObject *, unsigned char *); +static PyObject * special_lookup(PyObject *, char *, PyObject **); + +#define NAME_ERROR_MSG \ + "name '%.200s' is not defined" +#define GLOBAL_NAME_ERROR_MSG \ + "global name '%.200s' is not defined" +#define UNBOUNDLOCAL_ERROR_MSG \ + "local variable '%.200s' referenced before assignment" +#define UNBOUNDFREE_ERROR_MSG \ + "free variable '%.200s' referenced before assignment" \ + " in enclosing scope" + +#define PCALL(O) + +PyObject * +PyEval_GetCallStats(PyObject *self) +{ + Py_INCREF(Py_None); + return Py_None; +} + + +#ifdef WITH_THREAD +#define GIL_REQUEST _Py_atomic_load_relaxed(&gil_drop_request) +#else +#define GIL_REQUEST 0 +#endif + +/* This can set eval_breaker to 0 even though gil_drop_request became + 1. We believe this is all right because the eval loop will release + the GIL eventually anyway. */ +#define COMPUTE_EVAL_BREAKER() \ + _Py_atomic_store_relaxed( \ + &eval_breaker, \ + GIL_REQUEST | \ + _Py_atomic_load_relaxed(&pendingcalls_to_do) | \ + pending_async_exc) + +#ifdef WITH_THREAD + +#define SET_GIL_DROP_REQUEST() \ + do { \ + _Py_atomic_store_relaxed(&gil_drop_request, 1); \ + _Py_atomic_store_relaxed(&eval_breaker, 1); \ + } while (0) + +#define RESET_GIL_DROP_REQUEST() \ + do { \ + _Py_atomic_store_relaxed(&gil_drop_request, 0); \ + COMPUTE_EVAL_BREAKER(); \ + } while (0) + +#endif + +/* Pending calls are only modified under pending_lock */ +#define SIGNAL_PENDING_CALLS() \ + do { \ + _Py_atomic_store_relaxed(&pendingcalls_to_do, 1); \ + _Py_atomic_store_relaxed(&eval_breaker, 1); \ + } while (0) + +#define UNSIGNAL_PENDING_CALLS() \ + do { \ + _Py_atomic_store_relaxed(&pendingcalls_to_do, 0); \ + COMPUTE_EVAL_BREAKER(); \ + } while (0) + +#define SIGNAL_ASYNC_EXC() \ + do { \ + pending_async_exc = 1; \ + _Py_atomic_store_relaxed(&eval_breaker, 1); \ + } while (0) + +#define UNSIGNAL_ASYNC_EXC() \ + do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0) + + +#ifdef WITH_THREAD + +#include "pythread.h" + +static PyThread_type_lock pending_lock = 0; /* for pending calls */ +static long main_thread = 0; +/* This single variable consolidates all requests to break out of the fast path + in the eval loop. */ +static _Py_atomic_int eval_breaker = {0}; +/* Request for dropping the GIL */ +static _Py_atomic_int gil_drop_request = {0}; +/* Request for running pending calls. */ +static _Py_atomic_int pendingcalls_to_do = {0}; +/* Request for looking at the `async_exc` field of the current thread state. + Guarded by the GIL. */ +static int pending_async_exc = 0; + +#include "ceval_gil.h" + +int +PyEval_ThreadsInitialized(void) +{ + return gil_created(); +} + +void +PyEval_InitThreads(void) +{ + if (gil_created()) + return; + create_gil(); + take_gil(PyThreadState_GET()); + main_thread = PyThread_get_thread_ident(); + if (!pending_lock) + pending_lock = PyThread_allocate_lock(); +} + +void +_PyEval_FiniThreads(void) +{ + if (!gil_created()) + return; + destroy_gil(); + assert(!gil_created()); +} + +void +PyEval_AcquireLock(void) +{ + PyThreadState *tstate = PyThreadState_GET(); + if (tstate == NULL) + Py_FatalError("PyEval_AcquireLock: current thread state is NULL"); + take_gil(tstate); +} + +void +PyEval_ReleaseLock(void) +{ + /* This function must succeed when the current thread state is NULL. + We therefore avoid PyThreadState_GET() which dumps a fatal error + in debug mode. + */ + drop_gil((PyThreadState*)_Py_atomic_load_relaxed( + &_PyThreadState_Current)); +} + +void +PyEval_AcquireThread(PyThreadState *tstate) +{ + if (tstate == NULL) + Py_FatalError("PyEval_AcquireThread: NULL new thread state"); + /* Check someone has called PyEval_InitThreads() to create the lock */ + assert(gil_created()); + take_gil(tstate); + if (PyThreadState_Swap(tstate) != NULL) + Py_FatalError( + "PyEval_AcquireThread: non-NULL old thread state"); +} + +void +PyEval_ReleaseThread(PyThreadState *tstate) +{ + if (tstate == NULL) + Py_FatalError("PyEval_ReleaseThread: NULL thread state"); + if (PyThreadState_Swap(NULL) != tstate) + Py_FatalError("PyEval_ReleaseThread: wrong thread state"); + drop_gil(tstate); +} + +/* This function is called from PyOS_AfterFork to ensure that newly + created child processes don't hold locks referring to threads which + are not running in the child process. (This could also be done using + pthread_atfork mechanism, at least for the pthreads implementation.) */ + +void +PyEval_ReInitThreads(void) +{ + _Py_IDENTIFIER(_after_fork); + PyObject *threading, *result; + PyThreadState *tstate = PyThreadState_GET(); + + if (!gil_created()) + return; + recreate_gil(); + pending_lock = PyThread_allocate_lock(); + take_gil(tstate); + main_thread = PyThread_get_thread_ident(); + + /* Update the threading module with the new state. + */ + tstate = PyThreadState_GET(); + threading = PyMapping_GetItemString(tstate->interp->modules, + "threading"); + if (threading == NULL) { + /* threading not imported */ + PyErr_Clear(); + return; + } + result = _PyObject_CallMethodId(threading, &PyId__after_fork, NULL); + if (result == NULL) + PyErr_WriteUnraisable(threading); + else + Py_DECREF(result); + Py_DECREF(threading); +} + +#else +static _Py_atomic_int eval_breaker = {0}; +static int pending_async_exc = 0; +#endif /* WITH_THREAD */ + +/* This function is used to signal that async exceptions are waiting to be + raised, therefore it is also useful in non-threaded builds. */ + +void +_PyEval_SignalAsyncExc(void) +{ + SIGNAL_ASYNC_EXC(); +} + +/* Functions save_thread and restore_thread are always defined so + dynamically loaded modules needn't be compiled separately for use + with and without threads: */ + +PyThreadState * +PyEval_SaveThread(void) +{ + PyThreadState *tstate = PyThreadState_Swap(NULL); + if (tstate == NULL) + Py_FatalError("PyEval_SaveThread: NULL tstate"); +#ifdef WITH_THREAD + if (gil_created()) + drop_gil(tstate); +#endif + return tstate; +} + +void +PyEval_RestoreThread(PyThreadState *tstate) +{ + if (tstate == NULL) + Py_FatalError("PyEval_RestoreThread: NULL tstate"); +#ifdef WITH_THREAD + if (gil_created()) { + int err = errno; + take_gil(tstate); + /* _Py_Finalizing is protected by the GIL */ + if (_Py_Finalizing && tstate != _Py_Finalizing) { + drop_gil(tstate); + PyThread_exit_thread(); + assert(0); /* unreachable */ + } + errno = err; + } +#endif + PyThreadState_Swap(tstate); +} + + +/* Mechanism whereby asynchronously executing callbacks (e.g. UNIX + signal handlers or Mac I/O completion routines) can schedule calls + to a function to be called synchronously. + The synchronous function is called with one void* argument. + It should return 0 for success or -1 for failure -- failure should + be accompanied by an exception. + + If registry succeeds, the registry function returns 0; if it fails + (e.g. due to too many pending calls) it returns -1 (without setting + an exception condition). + + Note that because registry may occur from within signal handlers, + or other asynchronous events, calling malloc() is unsafe! + +#ifdef WITH_THREAD + Any thread can schedule pending calls, but only the main thread + will execute them. + There is no facility to schedule calls to a particular thread, but + that should be easy to change, should that ever be required. In + that case, the static variables here should go into the python + threadstate. +#endif +*/ + +#ifdef WITH_THREAD + +/* The WITH_THREAD implementation is thread-safe. It allows + scheduling to be made from any thread, and even from an executing + callback. + */ + +#define NPENDINGCALLS 32 +static struct { + int (*func)(void *); + void *arg; +} pendingcalls[NPENDINGCALLS]; +static int pendingfirst = 0; +static int pendinglast = 0; + +int +Py_AddPendingCall(int (*func)(void *), void *arg) +{ + int i, j, result=0; + PyThread_type_lock lock = pending_lock; + + /* try a few times for the lock. Since this mechanism is used + * for signal handling (on the main thread), there is a (slim) + * chance that a signal is delivered on the same thread while we + * hold the lock during the Py_MakePendingCalls() function. + * This avoids a deadlock in that case. + * Note that signals can be delivered on any thread. In particular, + * on Windows, a SIGINT is delivered on a system-created worker + * thread. + * We also check for lock being NULL, in the unlikely case that + * this function is called before any bytecode evaluation takes place. + */ + if (lock != NULL) { + for (i = 0; i<100; i++) { + if (PyThread_acquire_lock(lock, NOWAIT_LOCK)) + break; + } + if (i == 100) + return -1; + } + + i = pendinglast; + j = (i + 1) % NPENDINGCALLS; + if (j == pendingfirst) { + result = -1; /* Queue full */ + } else { + pendingcalls[i].func = func; + pendingcalls[i].arg = arg; + pendinglast = j; + } + /* signal main loop */ + SIGNAL_PENDING_CALLS(); + if (lock != NULL) + PyThread_release_lock(lock); + return result; +} + +int +Py_MakePendingCalls(void) +{ + static int busy = 0; + int i; + int r = 0; + + if (!pending_lock) { + /* initial allocation of the lock */ + pending_lock = PyThread_allocate_lock(); + if (pending_lock == NULL) + return -1; + } + + /* only service pending calls on main thread */ + if (main_thread && PyThread_get_thread_ident() != main_thread) + return 0; + /* don't perform recursive pending calls */ + if (busy) + return 0; + busy = 1; + /* perform a bounded number of calls, in case of recursion */ + for (i=0; irecursion_depth; + PyErr_SetString(PyExc_MemoryError, "Stack overflow"); + return -1; + } +#endif + _Py_CheckRecursionLimit = recursion_limit; + if (tstate->recursion_critical) + /* Somebody asked that we don't check for recursion. */ + return 0; + if (tstate->overflowed) { + if (tstate->recursion_depth > recursion_limit + 50) { + /* Overflowing while handling an overflow. Give up. */ + Py_FatalError("Cannot recover from stack overflow."); + } + return 0; + } + if (tstate->recursion_depth > recursion_limit) { + --tstate->recursion_depth; + tstate->overflowed = 1; + PyErr_Format(PyExc_RuntimeError, + "maximum recursion depth exceeded%s", + where); + return -1; + } + return 0; +} + +/* Status code for main loop (reason for stack unwind) */ +enum why_code { + WHY_NOT = 0x0001, /* No error */ + WHY_EXCEPTION = 0x0002, /* Exception occurred */ + WHY_RERAISE = 0x0004, /* Exception re-raised by 'finally' */ + WHY_RETURN = 0x0008, /* 'return' statement */ + WHY_BREAK = 0x0010, /* 'break' statement */ + WHY_CONTINUE = 0x0020, /* 'continue' statement */ + WHY_YIELD = 0x0040, /* 'yield' operator */ + WHY_SILENCED = 0x0080 /* Exception silenced by 'with' */ +}; + +static void save_exc_state(PyThreadState *, PyFrameObject *); +static void swap_exc_state(PyThreadState *, PyFrameObject *); +static void restore_and_clear_exc_state(PyThreadState *, PyFrameObject *); +static enum why_code do_raise(PyObject *, PyObject *); +static int unpack_iterable(PyObject *, int, int, PyObject **); + +/* Records whether tracing is on for any thread. Counts the number of + threads for which tstate->c_tracefunc is non-NULL, so if the value + is 0, we know we don't have to check this thread's c_tracefunc. + This speeds up the if statement in PyEval_EvalFrameEx() after + fast_next_opcode*/ +static int _Py_TracingPossible = 0; + + + +PyObject * +PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals) +{ + return PyEval_EvalCodeEx(co, + globals, locals, + (PyObject **)NULL, 0, + (PyObject **)NULL, 0, + (PyObject **)NULL, 0, + NULL, NULL); +} + + +/* Interpreter main loop */ + +PyObject * +PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) +{ + register PyObject **stack_pointer; /* Next free slot in value stack */ + register unsigned char *next_instr; + register int opcode; /* Current opcode */ + register int oparg; /* Current opcode argument, if any */ + register enum why_code why; /* Reason for block stack unwind */ + register int err; /* Error status -- nonzero if error */ + register PyObject *x; /* Result object -- NULL if error */ + register PyObject *v; /* Temporary objects popped off stack */ + register PyObject *w; + register PyObject *u; + register PyObject *t; + register PyObject **fastlocals, **freevars; + PyObject *retval = NULL; /* Return value */ + PyThreadState *tstate = PyThreadState_GET(); + PyCodeObject *co; + + /* when tracing we set things up so that + + not (instr_lb <= current_bytecode_offset < instr_ub) + + is true when the line being executed has changed. The + initial values are such as to make this false the first + time it is tested. */ + int instr_ub = -1, instr_lb = 0, instr_prev = -1; + + unsigned char *first_instr; + PyObject *names; + PyObject *consts; + +/* Computed GOTOs, or + the-optimization-commonly-but-improperly-known-as-"threaded code" + using gcc's labels-as-values extension + (http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html). + + The traditional bytecode evaluation loop uses a "switch" statement, which + decent compilers will optimize as a single indirect branch instruction + combined with a lookup table of jump addresses. However, since the + indirect jump instruction is shared by all opcodes, the CPU will have a + hard time making the right prediction for where to jump next (actually, + it will be always wrong except in the uncommon case of a sequence of + several identical opcodes). + + "Threaded code" in contrast, uses an explicit jump table and an explicit + indirect jump instruction at the end of each opcode. Since the jump + instruction is at a different address for each opcode, the CPU will make a + separate prediction for each of these instructions, which is equivalent to + predicting the second opcode of each opcode pair. These predictions have + a much better chance to turn out valid, especially in small bytecode loops. + + A mispredicted branch on a modern CPU flushes the whole pipeline and + can cost several CPU cycles (depending on the pipeline depth), + and potentially many more instructions (depending on the pipeline width). + A correctly predicted branch, however, is nearly free. + + At the time of this writing, the "threaded code" version is up to 15-20% + faster than the normal "switch" version, depending on the compiler and the + CPU architecture. + + We disable the optimization if DYNAMIC_EXECUTION_PROFILE is defined, + because it would render the measurements invalid. + + + NOTE: care must be taken that the compiler doesn't try to "optimize" the + indirect jumps by sharing them between all opcodes. Such optimizations + can be disabled on gcc by using the -fno-gcse flag (or possibly + -fno-crossjumping). +*/ + +#ifdef DYNAMIC_EXECUTION_PROFILE +#undef USE_COMPUTED_GOTOS +#define USE_COMPUTED_GOTOS 0 +#endif + +#ifdef HAVE_COMPUTED_GOTOS + #ifndef USE_COMPUTED_GOTOS + #define USE_COMPUTED_GOTOS 1 + #endif +#else + #if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS + #error "Computed gotos are not supported on this compiler." + #endif + #undef USE_COMPUTED_GOTOS + #define USE_COMPUTED_GOTOS 0 +#endif + +#if USE_COMPUTED_GOTOS +/* Import the static jump table */ +#include "opcode_targets.h" + +/* This macro is used when several opcodes defer to the same implementation + (e.g. SETUP_LOOP, SETUP_FINALLY) */ +#define TARGET_WITH_IMPL(op, impl) \ + TARGET_##op: \ + opcode = op; \ + if (HAS_ARG(op)) \ + oparg = NEXTARG(); \ + case op: \ + goto impl; \ + +#define TARGET(op) \ + TARGET_##op: \ + opcode = op; \ + if (HAS_ARG(op)) \ + oparg = NEXTARG(); \ + case op: + + +#define DISPATCH() \ + { \ + if (!_Py_atomic_load_relaxed(&eval_breaker)) { \ + FAST_DISPATCH(); \ + } \ + continue; \ + } + +#ifdef LLTRACE +#define FAST_DISPATCH() \ + { \ + if (!lltrace && !_Py_TracingPossible) { \ + f->f_lasti = INSTR_OFFSET(); \ + goto *opcode_targets[*next_instr++]; \ + } \ + goto fast_next_opcode; \ + } +#else +#define FAST_DISPATCH() \ + { \ + if (!_Py_TracingPossible) { \ + f->f_lasti = INSTR_OFFSET(); \ + goto *opcode_targets[*next_instr++]; \ + } \ + goto fast_next_opcode; \ + } +#endif + +#else +#define TARGET(op) \ + case op: +#define TARGET_WITH_IMPL(op, impl) \ + /* silence compiler warnings about `impl` unused */ \ + if (0) goto impl; \ + case op: +#define DISPATCH() continue +#define FAST_DISPATCH() goto fast_next_opcode +#endif + + +/* Tuple access macros */ + +#ifndef Py_DEBUG +#define GETITEM(v, i) PyTuple_GET_ITEM((PyTupleObject *)(v), (i)) +#else +#define GETITEM(v, i) PyTuple_GetItem((v), (i)) +#endif + +#ifdef WITH_TSC +/* Use Pentium timestamp counter to mark certain events: + inst0 -- beginning of switch statement for opcode dispatch + inst1 -- end of switch statement (may be skipped) + loop0 -- the top of the mainloop + loop1 -- place where control returns again to top of mainloop + (may be skipped) + intr1 -- beginning of long interruption + intr2 -- end of long interruption + + Many opcodes call out to helper C functions. In some cases, the + time in those functions should be counted towards the time for the + opcode, but not in all cases. For example, a CALL_FUNCTION opcode + calls another Python function; there's no point in charge all the + bytecode executed by the called function to the caller. + + It's hard to make a useful judgement statically. In the presence + of operator overloading, it's impossible to tell if a call will + execute new Python code or not. + + It's a case-by-case judgement. I'll use intr1 for the following + cases: + + IMPORT_STAR + IMPORT_FROM + CALL_FUNCTION (and friends) + + */ + uint64 inst0, inst1, loop0, loop1, intr0 = 0, intr1 = 0; + int ticked = 0; + + READ_TIMESTAMP(inst0); + READ_TIMESTAMP(inst1); + READ_TIMESTAMP(loop0); + READ_TIMESTAMP(loop1); + + /* shut up the compiler */ + opcode = 0; +#endif + +/* Code access macros */ + +#define INSTR_OFFSET() ((int)(next_instr - first_instr)) +#define NEXTOP() (*next_instr++) +#define NEXTARG() (next_instr += 2, (next_instr[-1]<<8) + next_instr[-2]) +#define PEEKARG() ((next_instr[2]<<8) + next_instr[1]) +#define JUMPTO(x) (next_instr = first_instr + (x)) +#define JUMPBY(x) (next_instr += (x)) + +/* OpCode prediction macros + Some opcodes tend to come in pairs thus making it possible to + predict the second code when the first is run. For example, + COMPARE_OP is often followed by JUMP_IF_FALSE or JUMP_IF_TRUE. And, + those opcodes are often followed by a POP_TOP. + + Verifying the prediction costs a single high-speed test of a register + variable against a constant. If the pairing was good, then the + processor's own internal branch predication has a high likelihood of + success, resulting in a nearly zero-overhead transition to the + next opcode. A successful prediction saves a trip through the eval-loop + including its two unpredictable branches, the HAS_ARG test and the + switch-case. Combined with the processor's internal branch prediction, + a successful PREDICT has the effect of making the two opcodes run as if + they were a single new opcode with the bodies combined. + + If collecting opcode statistics, your choices are to either keep the + predictions turned-on and interpret the results as if some opcodes + had been combined or turn-off predictions so that the opcode frequency + counter updates for both opcodes. + + Opcode prediction is disabled with threaded code, since the latter allows + the CPU to record separate branch prediction information for each + opcode. + +*/ + +#if defined(DYNAMIC_EXECUTION_PROFILE) || USE_COMPUTED_GOTOS +#define PREDICT(op) if (0) goto PRED_##op +#define PREDICTED(op) PRED_##op: +#define PREDICTED_WITH_ARG(op) PRED_##op: +#else +#define PREDICT(op) if (*next_instr == op) goto PRED_##op +#define PREDICTED(op) PRED_##op: next_instr++ +#define PREDICTED_WITH_ARG(op) PRED_##op: oparg = PEEKARG(); next_instr += 3 +#endif + + +/* Stack manipulation macros */ + +/* The stack can grow at most MAXINT deep, as co_nlocals and + co_stacksize are ints. */ +#define STACK_LEVEL() ((int)(stack_pointer - f->f_valuestack)) +#define EMPTY() (STACK_LEVEL() == 0) +#define TOP() (stack_pointer[-1]) +#define SECOND() (stack_pointer[-2]) +#define THIRD() (stack_pointer[-3]) +#define FOURTH() (stack_pointer[-4]) +#define PEEK(n) (stack_pointer[-(n)]) +#define SET_TOP(v) (stack_pointer[-1] = (v)) +#define SET_SECOND(v) (stack_pointer[-2] = (v)) +#define SET_THIRD(v) (stack_pointer[-3] = (v)) +#define SET_FOURTH(v) (stack_pointer[-4] = (v)) +#define SET_VALUE(n, v) (stack_pointer[-(n)] = (v)) +#define BASIC_STACKADJ(n) (stack_pointer += n) +#define BASIC_PUSH(v) (*stack_pointer++ = (v)) +#define BASIC_POP() (*--stack_pointer) + +#define PUSH(v) BASIC_PUSH(v) +#define POP() BASIC_POP() +#define STACKADJ(n) BASIC_STACKADJ(n) +#define EXT_POP(STACK_POINTER) (*--(STACK_POINTER)) + +/* Local variable macros */ + +#define GETLOCAL(i) (fastlocals[i]) + +/* The SETLOCAL() macro must not DECREF the local variable in-place and + then store the new value; it must copy the old value to a temporary + value, then store the new value, and then DECREF the temporary value. + This is because it is possible that during the DECREF the frame is + accessed by other code (e.g. a __del__ method or gc.collect()) and the + variable would be pointing to already-freed memory. */ +#define SETLOCAL(i, value) do { PyObject *tmp = GETLOCAL(i); \ + GETLOCAL(i) = value; \ + Py_XDECREF(tmp); } while (0) + + +#define UNWIND_BLOCK(b) \ + while (STACK_LEVEL() > (b)->b_level) { \ + PyObject *v = POP(); \ + Py_XDECREF(v); \ + } + +#define UNWIND_EXCEPT_HANDLER(b) \ + { \ + PyObject *type, *value, *traceback; \ + assert(STACK_LEVEL() >= (b)->b_level + 3); \ + while (STACK_LEVEL() > (b)->b_level + 3) { \ + value = POP(); \ + Py_XDECREF(value); \ + } \ + type = tstate->exc_type; \ + value = tstate->exc_value; \ + traceback = tstate->exc_traceback; \ + tstate->exc_type = POP(); \ + tstate->exc_value = POP(); \ + tstate->exc_traceback = POP(); \ + Py_XDECREF(type); \ + Py_XDECREF(value); \ + Py_XDECREF(traceback); \ + } + +/* Start of code */ + + /* push frame */ + if (Py_EnterRecursiveCall("")) + return NULL; + + tstate->frame = f; + + if (tstate->use_tracing) { + if (tstate->c_tracefunc != NULL) { + /* tstate->c_tracefunc, if defined, is a + function that will be called on *every* entry + to a code block. Its return value, if not + None, is a function that will be called at + the start of each executed line of code. + (Actually, the function must return itself + in order to continue tracing.) The trace + functions are called with three arguments: + a pointer to the current frame, a string + indicating why the function is called, and + an argument which depends on the situation. + The global trace function is also called + whenever an exception is detected. */ + if (call_trace_protected(tstate->c_tracefunc, + tstate->c_traceobj, + f, PyTrace_CALL, Py_None)) { + /* Trace function raised an error */ + goto exit_eval_frame; + } + } + if (tstate->c_profilefunc != NULL) { + /* Similar for c_profilefunc, except it needn't + return itself and isn't called for "line" events */ + if (call_trace_protected(tstate->c_profilefunc, + tstate->c_profileobj, + f, PyTrace_CALL, Py_None)) { + /* Profile function raised an error */ + goto exit_eval_frame; + } + } + } + + co = f->f_code; + names = co->co_names; + consts = co->co_consts; + fastlocals = f->f_localsplus; + freevars = f->f_localsplus + co->co_nlocals; + first_instr = (unsigned char*) PyBytes_AS_STRING(co->co_code); + /* An explanation is in order for the next line. + + f->f_lasti now refers to the index of the last instruction + executed. You might think this was obvious from the name, but + this wasn't always true before 2.3! PyFrame_New now sets + f->f_lasti to -1 (i.e. the index *before* the first instruction) + and YIELD_VALUE doesn't fiddle with f_lasti any more. So this + does work. Promise. + + When the PREDICT() macros are enabled, some opcode pairs follow in + direct succession without updating f->f_lasti. A successful + prediction effectively links the two codes together as if they + were a single new opcode; accordingly,f->f_lasti will point to + the first code in the pair (for instance, GET_ITER followed by + FOR_ITER is effectively a single opcode and f->f_lasti will point + at to the beginning of the combined pair.) + */ + next_instr = first_instr + f->f_lasti + 1; + stack_pointer = f->f_stacktop; + assert(stack_pointer != NULL); + f->f_stacktop = NULL; /* remains NULL unless yield suspends frame */ + + if (co->co_flags & CO_GENERATOR && !throwflag) { + if (f->f_exc_type != NULL && f->f_exc_type != Py_None) { + /* We were in an except handler when we left, + restore the exception state which was put aside + (see YIELD_VALUE). */ + swap_exc_state(tstate, f); + } + else + save_exc_state(tstate, f); + } + + why = WHY_NOT; + err = 0; + x = Py_None; /* Not a reference, just anything non-NULL */ + w = NULL; + + if (throwflag) { /* support for generator.throw() */ + why = WHY_EXCEPTION; + goto on_error; + } + + for (;;) { + assert(stack_pointer >= f->f_valuestack); /* else underflow */ + assert(STACK_LEVEL() <= co->co_stacksize); /* else overflow */ + + /* Do periodic things. Doing this every time through + the loop would add too much overhead, so we do it + only every Nth instruction. We also do it if + ``pendingcalls_to_do'' is set, i.e. when an asynchronous + event needs attention (e.g. a signal handler or + async I/O handler); see Py_AddPendingCall() and + Py_MakePendingCalls() above. */ + + if (_Py_atomic_load_relaxed(&eval_breaker)) { + if (*next_instr == SETUP_FINALLY) { + /* Make the last opcode before + a try: finally: block uninterruptible. */ + goto fast_next_opcode; + } + tstate->tick_counter++; + if (_Py_atomic_load_relaxed(&pendingcalls_to_do)) { + if (Py_MakePendingCalls() < 0) { + why = WHY_EXCEPTION; + goto on_error; + } + } +#ifdef WITH_THREAD + if (_Py_atomic_load_relaxed(&gil_drop_request)) { + /* Give another thread a chance */ + if (PyThreadState_Swap(NULL) != tstate) + Py_FatalError("ceval: tstate mix-up"); + drop_gil(tstate); + + /* Other threads may run now */ + + take_gil(tstate); + if (PyThreadState_Swap(tstate) != NULL) + Py_FatalError("ceval: orphan tstate"); + } +#endif + /* Check for asynchronous exceptions. */ + if (tstate->async_exc != NULL) { + x = tstate->async_exc; + tstate->async_exc = NULL; + UNSIGNAL_ASYNC_EXC(); + PyErr_SetNone(x); + Py_DECREF(x); + why = WHY_EXCEPTION; + goto on_error; + } + } + + fast_next_opcode: + f->f_lasti = INSTR_OFFSET(); + + /* line-by-line tracing support */ + + if (_Py_TracingPossible && + tstate->c_tracefunc != NULL && !tstate->tracing) { + /* see maybe_call_line_trace + for expository comments */ + f->f_stacktop = stack_pointer; + + err = maybe_call_line_trace(tstate->c_tracefunc, + tstate->c_traceobj, + f, &instr_lb, &instr_ub, + &instr_prev); + /* Reload possibly changed frame fields */ + JUMPTO(f->f_lasti); + if (f->f_stacktop != NULL) { + stack_pointer = f->f_stacktop; + f->f_stacktop = NULL; + } + if (err) { + /* trace function raised an exception */ + goto on_error; + } + } + + /* Extract opcode and argument */ + + opcode = NEXTOP(); + oparg = 0; /* allows oparg to be stored in a register because + it doesn't have to be remembered across a full loop */ + if (HAS_ARG(opcode)) + oparg = NEXTARG(); + dispatch_opcode: + + /* Main switch on opcode */ + + switch (opcode) { + + /* BEWARE! + It is essential that any operation that fails sets either + x to NULL, err to nonzero, or why to anything but WHY_NOT, + and that no operation that succeeds does this! */ + + TARGET(NOP) + FAST_DISPATCH(); + + TARGET(LOAD_FAST) + x = GETLOCAL(oparg); + if (x != NULL) { + Py_INCREF(x); + PUSH(x); + FAST_DISPATCH(); + } + format_exc_check_arg(PyExc_UnboundLocalError, + UNBOUNDLOCAL_ERROR_MSG, + PyTuple_GetItem(co->co_varnames, oparg)); + break; + + TARGET(LOAD_CONST) + x = GETITEM(consts, oparg); + Py_INCREF(x); + PUSH(x); + FAST_DISPATCH(); + + PREDICTED_WITH_ARG(STORE_FAST); + TARGET(STORE_FAST) + v = POP(); + SETLOCAL(oparg, v); + FAST_DISPATCH(); + + TARGET(POP_TOP) + v = POP(); + Py_DECREF(v); + FAST_DISPATCH(); + + TARGET(ROT_TWO) + v = TOP(); + w = SECOND(); + SET_TOP(w); + SET_SECOND(v); + FAST_DISPATCH(); + + TARGET(ROT_THREE) + v = TOP(); + w = SECOND(); + x = THIRD(); + SET_TOP(w); + SET_SECOND(x); + SET_THIRD(v); + FAST_DISPATCH(); + + TARGET(DUP_TOP) + v = TOP(); + Py_INCREF(v); + PUSH(v); + FAST_DISPATCH(); + + TARGET(DUP_TOP_TWO) + x = TOP(); + Py_INCREF(x); + w = SECOND(); + Py_INCREF(w); + STACKADJ(2); + SET_TOP(x); + SET_SECOND(w); + FAST_DISPATCH(); + + TARGET(UNARY_POSITIVE) + v = TOP(); + x = PyNumber_Positive(v); + Py_DECREF(v); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(UNARY_NEGATIVE) + v = TOP(); + x = PyNumber_Negative(v); + Py_DECREF(v); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(UNARY_NOT) + v = TOP(); + err = PyObject_IsTrue(v); + Py_DECREF(v); + if (err == 0) { + Py_INCREF(Py_True); + SET_TOP(Py_True); + DISPATCH(); + } + else if (err > 0) { + Py_INCREF(Py_False); + SET_TOP(Py_False); + err = 0; + DISPATCH(); + } + STACKADJ(-1); + break; + + TARGET(UNARY_INVERT) + v = TOP(); + x = PyNumber_Invert(v); + Py_DECREF(v); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(BINARY_POWER) + w = POP(); + v = TOP(); + x = PyNumber_Power(v, w, Py_None); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(BINARY_MULTIPLY) + w = POP(); + v = TOP(); + x = PyNumber_Multiply(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(BINARY_TRUE_DIVIDE) + w = POP(); + v = TOP(); + x = PyNumber_TrueDivide(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(BINARY_FLOOR_DIVIDE) + w = POP(); + v = TOP(); + x = PyNumber_FloorDivide(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(BINARY_MODULO) + w = POP(); + v = TOP(); + if (PyUnicode_CheckExact(v)) + x = PyUnicode_Format(v, w); + else + x = PyNumber_Remainder(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(BINARY_ADD) + w = POP(); + v = TOP(); + if (PyUnicode_CheckExact(v) && + PyUnicode_CheckExact(w)) { + x = unicode_concatenate(v, w, f, next_instr); + /* unicode_concatenate consumed the ref to v */ + goto skip_decref_vx; + } + else { + x = PyNumber_Add(v, w); + } + Py_DECREF(v); + skip_decref_vx: + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(BINARY_SUBTRACT) + w = POP(); + v = TOP(); + x = PyNumber_Subtract(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(BINARY_SUBSCR) + w = POP(); + v = TOP(); + x = PyObject_GetItem(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(BINARY_LSHIFT) + w = POP(); + v = TOP(); + x = PyNumber_Lshift(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(BINARY_RSHIFT) + w = POP(); + v = TOP(); + x = PyNumber_Rshift(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(BINARY_AND) + w = POP(); + v = TOP(); + x = PyNumber_And(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(BINARY_XOR) + w = POP(); + v = TOP(); + x = PyNumber_Xor(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(BINARY_OR) + w = POP(); + v = TOP(); + x = PyNumber_Or(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(LIST_APPEND) + w = POP(); + v = PEEK(oparg); + err = PyList_Append(v, w); + Py_DECREF(w); + if (err == 0) { + PREDICT(JUMP_ABSOLUTE); + DISPATCH(); + } + break; + + TARGET(SET_ADD) + w = POP(); + v = stack_pointer[-oparg]; + err = PySet_Add(v, w); + Py_DECREF(w); + if (err == 0) { + PREDICT(JUMP_ABSOLUTE); + DISPATCH(); + } + break; + + TARGET(INPLACE_POWER) + w = POP(); + v = TOP(); + x = PyNumber_InPlacePower(v, w, Py_None); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(INPLACE_MULTIPLY) + w = POP(); + v = TOP(); + x = PyNumber_InPlaceMultiply(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(INPLACE_TRUE_DIVIDE) + w = POP(); + v = TOP(); + x = PyNumber_InPlaceTrueDivide(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(INPLACE_FLOOR_DIVIDE) + w = POP(); + v = TOP(); + x = PyNumber_InPlaceFloorDivide(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(INPLACE_MODULO) + w = POP(); + v = TOP(); + x = PyNumber_InPlaceRemainder(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(INPLACE_ADD) + w = POP(); + v = TOP(); + if (PyUnicode_CheckExact(v) && + PyUnicode_CheckExact(w)) { + x = unicode_concatenate(v, w, f, next_instr); + /* unicode_concatenate consumed the ref to v */ + goto skip_decref_v; + } + else { + x = PyNumber_InPlaceAdd(v, w); + } + Py_DECREF(v); + skip_decref_v: + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(INPLACE_SUBTRACT) + w = POP(); + v = TOP(); + x = PyNumber_InPlaceSubtract(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(INPLACE_LSHIFT) + w = POP(); + v = TOP(); + x = PyNumber_InPlaceLshift(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(INPLACE_RSHIFT) + w = POP(); + v = TOP(); + x = PyNumber_InPlaceRshift(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(INPLACE_AND) + w = POP(); + v = TOP(); + x = PyNumber_InPlaceAnd(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(INPLACE_XOR) + w = POP(); + v = TOP(); + x = PyNumber_InPlaceXor(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(INPLACE_OR) + w = POP(); + v = TOP(); + x = PyNumber_InPlaceOr(v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(STORE_SUBSCR) + w = TOP(); + v = SECOND(); + u = THIRD(); + STACKADJ(-3); + /* v[w] = u */ + err = PyObject_SetItem(v, w, u); + Py_DECREF(u); + Py_DECREF(v); + Py_DECREF(w); + if (err == 0) DISPATCH(); + break; + + TARGET(DELETE_SUBSCR) + w = TOP(); + v = SECOND(); + STACKADJ(-2); + /* del v[w] */ + err = PyObject_DelItem(v, w); + Py_DECREF(v); + Py_DECREF(w); + if (err == 0) DISPATCH(); + break; + + TARGET(PRINT_EXPR) + v = POP(); + w = PySys_GetObject("displayhook"); + if (w == NULL) { + PyErr_SetString(PyExc_RuntimeError, + "lost sys.displayhook"); + err = -1; + x = NULL; + } + if (err == 0) { + x = PyTuple_Pack(1, v); + if (x == NULL) + err = -1; + } + if (err == 0) { + w = PyEval_CallObject(w, x); + Py_XDECREF(w); + if (w == NULL) + err = -1; + } + Py_DECREF(v); + Py_XDECREF(x); + break; + + TARGET(RAISE_VARARGS) + v = w = NULL; + switch (oparg) { + case 2: + v = POP(); /* cause */ + case 1: + w = POP(); /* exc */ + case 0: /* Fallthrough */ + why = do_raise(w, v); + break; + default: + PyErr_SetString(PyExc_SystemError, + "bad RAISE_VARARGS oparg"); + why = WHY_EXCEPTION; + break; + } + break; + + TARGET(STORE_LOCALS) + x = POP(); + v = f->f_locals; + Py_XDECREF(v); + f->f_locals = x; + DISPATCH(); + + TARGET(RETURN_VALUE) + retval = POP(); + why = WHY_RETURN; + goto fast_block_end; + + TARGET(YIELD_VALUE) + retval = POP(); + f->f_stacktop = stack_pointer; + why = WHY_YIELD; + goto fast_yield; + + TARGET(POP_EXCEPT) + { + PyTryBlock *b = PyFrame_BlockPop(f); + if (b->b_type != EXCEPT_HANDLER) { + PyErr_SetString(PyExc_SystemError, + "popped block is not an except handler"); + why = WHY_EXCEPTION; + break; + } + UNWIND_EXCEPT_HANDLER(b); + } + DISPATCH(); + + TARGET(POP_BLOCK) + { + PyTryBlock *b = PyFrame_BlockPop(f); + UNWIND_BLOCK(b); + } + DISPATCH(); + + PREDICTED(END_FINALLY); + TARGET(END_FINALLY) + v = POP(); + if (PyLong_Check(v)) { + why = (enum why_code) PyLong_AS_LONG(v); + assert(why != WHY_YIELD); + if (why == WHY_RETURN || + why == WHY_CONTINUE) + retval = POP(); + if (why == WHY_SILENCED) { + /* An exception was silenced by 'with', we must + manually unwind the EXCEPT_HANDLER block which was + created when the exception was caught, otherwise + the stack will be in an inconsistent state. */ + PyTryBlock *b = PyFrame_BlockPop(f); + assert(b->b_type == EXCEPT_HANDLER); + UNWIND_EXCEPT_HANDLER(b); + why = WHY_NOT; + } + } + else if (PyExceptionClass_Check(v)) { + w = POP(); + u = POP(); + PyErr_Restore(v, w, u); + why = WHY_RERAISE; + break; + } + else if (v != Py_None) { + PyErr_SetString(PyExc_SystemError, + "'finally' pops bad exception"); + why = WHY_EXCEPTION; + } + Py_DECREF(v); + break; + + TARGET(LOAD_BUILD_CLASS) + x = PyDict_GetItemString(f->f_builtins, + "__build_class__"); + if (x == NULL) { + PyErr_SetString(PyExc_ImportError, + "__build_class__ not found"); + break; + } + Py_INCREF(x); + PUSH(x); + break; + + TARGET(STORE_NAME) + w = GETITEM(names, oparg); + v = POP(); + if ((x = f->f_locals) != NULL) { + if (PyDict_CheckExact(x)) + err = PyDict_SetItem(x, w, v); + else + err = PyObject_SetItem(x, w, v); + Py_DECREF(v); + if (err == 0) DISPATCH(); + break; + } + PyErr_Format(PyExc_SystemError, + "no locals found when storing %R", w); + break; + + TARGET(DELETE_NAME) + w = GETITEM(names, oparg); + if ((x = f->f_locals) != NULL) { + if ((err = PyObject_DelItem(x, w)) != 0) + format_exc_check_arg(PyExc_NameError, + NAME_ERROR_MSG, + w); + break; + } + PyErr_Format(PyExc_SystemError, + "no locals when deleting %R", w); + break; + + PREDICTED_WITH_ARG(UNPACK_SEQUENCE); + TARGET(UNPACK_SEQUENCE) + v = POP(); + if (PyTuple_CheckExact(v) && + PyTuple_GET_SIZE(v) == oparg) { + PyObject **items = \ + ((PyTupleObject *)v)->ob_item; + while (oparg--) { + w = items[oparg]; + Py_INCREF(w); + PUSH(w); + } + Py_DECREF(v); + DISPATCH(); + } else if (PyList_CheckExact(v) && + PyList_GET_SIZE(v) == oparg) { + PyObject **items = \ + ((PyListObject *)v)->ob_item; + while (oparg--) { + w = items[oparg]; + Py_INCREF(w); + PUSH(w); + } + } else if (unpack_iterable(v, oparg, -1, + stack_pointer + oparg)) { + STACKADJ(oparg); + } else { + /* unpack_iterable() raised an exception */ + why = WHY_EXCEPTION; + } + Py_DECREF(v); + break; + + TARGET(UNPACK_EX) + { + int totalargs = 1 + (oparg & 0xFF) + (oparg >> 8); + v = POP(); + + if (unpack_iterable(v, oparg & 0xFF, oparg >> 8, + stack_pointer + totalargs)) { + stack_pointer += totalargs; + } else { + why = WHY_EXCEPTION; + } + Py_DECREF(v); + break; + } + + TARGET(STORE_ATTR) + w = GETITEM(names, oparg); + v = TOP(); + u = SECOND(); + STACKADJ(-2); + err = PyObject_SetAttr(v, w, u); /* v.w = u */ + Py_DECREF(v); + Py_DECREF(u); + if (err == 0) DISPATCH(); + break; + + TARGET(DELETE_ATTR) + w = GETITEM(names, oparg); + v = POP(); + err = PyObject_SetAttr(v, w, (PyObject *)NULL); + /* del v.w */ + Py_DECREF(v); + break; + + TARGET(STORE_GLOBAL) + w = GETITEM(names, oparg); + v = POP(); + err = PyDict_SetItem(f->f_globals, w, v); + Py_DECREF(v); + if (err == 0) DISPATCH(); + break; + + TARGET(DELETE_GLOBAL) + w = GETITEM(names, oparg); + if ((err = PyDict_DelItem(f->f_globals, w)) != 0) + format_exc_check_arg( + PyExc_NameError, GLOBAL_NAME_ERROR_MSG, w); + break; + + TARGET(LOAD_NAME) + w = GETITEM(names, oparg); + if ((v = f->f_locals) == NULL) { + PyErr_Format(PyExc_SystemError, + "no locals when loading %R", w); + why = WHY_EXCEPTION; + break; + } + if (PyDict_CheckExact(v)) { + x = PyDict_GetItem(v, w); + Py_XINCREF(x); + } + else { + x = PyObject_GetItem(v, w); + if (x == NULL && PyErr_Occurred()) { + if (!PyErr_ExceptionMatches( + PyExc_KeyError)) + break; + PyErr_Clear(); + } + } + if (x == NULL) { + x = PyDict_GetItem(f->f_globals, w); + if (x == NULL) { + x = PyDict_GetItem(f->f_builtins, w); + if (x == NULL) { + format_exc_check_arg( + PyExc_NameError, + NAME_ERROR_MSG, w); + break; + } + } + Py_INCREF(x); + } + PUSH(x); + DISPATCH(); + + TARGET(LOAD_GLOBAL) + w = GETITEM(names, oparg); + if (PyUnicode_CheckExact(w)) { + /* Inline the PyDict_GetItem() calls. + WARNING: this is an extreme speed hack. + Do not try this at home. */ + Py_hash_t hash = ((PyASCIIObject *)w)->hash; + if (hash != -1) { + PyDictObject *d; + PyDictEntry *e; + d = (PyDictObject *)(f->f_globals); + e = d->ma_lookup(d, w, hash); + if (e == NULL) { + x = NULL; + break; + } + x = e->me_value; + if (x != NULL) { + Py_INCREF(x); + PUSH(x); + DISPATCH(); + } + d = (PyDictObject *)(f->f_builtins); + e = d->ma_lookup(d, w, hash); + if (e == NULL) { + x = NULL; + break; + } + x = e->me_value; + if (x != NULL) { + Py_INCREF(x); + PUSH(x); + DISPATCH(); + } + goto load_global_error; + } + } + /* This is the un-inlined version of the code above */ + x = PyDict_GetItem(f->f_globals, w); + if (x == NULL) { + x = PyDict_GetItem(f->f_builtins, w); + if (x == NULL) { + load_global_error: + format_exc_check_arg( + PyExc_NameError, + GLOBAL_NAME_ERROR_MSG, w); + break; + } + } + Py_INCREF(x); + PUSH(x); + DISPATCH(); + + TARGET(DELETE_FAST) + x = GETLOCAL(oparg); + if (x != NULL) { + SETLOCAL(oparg, NULL); + DISPATCH(); + } + format_exc_check_arg( + PyExc_UnboundLocalError, + UNBOUNDLOCAL_ERROR_MSG, + PyTuple_GetItem(co->co_varnames, oparg) + ); + break; + + TARGET(DELETE_DEREF) + x = freevars[oparg]; + if (PyCell_GET(x) != NULL) { + PyCell_Set(x, NULL); + DISPATCH(); + } + err = -1; + format_exc_unbound(co, oparg); + break; + + TARGET(LOAD_CLOSURE) + x = freevars[oparg]; + Py_INCREF(x); + PUSH(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(LOAD_DEREF) + x = freevars[oparg]; + w = PyCell_Get(x); + if (w != NULL) { + PUSH(w); + DISPATCH(); + } + err = -1; + format_exc_unbound(co, oparg); + break; + + TARGET(STORE_DEREF) + w = POP(); + x = freevars[oparg]; + PyCell_Set(x, w); + Py_DECREF(w); + DISPATCH(); + + TARGET(BUILD_TUPLE) + x = PyTuple_New(oparg); + if (x != NULL) { + for (; --oparg >= 0;) { + w = POP(); + PyTuple_SET_ITEM(x, oparg, w); + } + PUSH(x); + DISPATCH(); + } + break; + + TARGET(BUILD_LIST) + x = PyList_New(oparg); + if (x != NULL) { + for (; --oparg >= 0;) { + w = POP(); + PyList_SET_ITEM(x, oparg, w); + } + PUSH(x); + DISPATCH(); + } + break; + + TARGET(BUILD_SET) + x = PySet_New(NULL); + if (x != NULL) { + for (; --oparg >= 0;) { + w = POP(); + if (err == 0) + err = PySet_Add(x, w); + Py_DECREF(w); + } + if (err != 0) { + Py_DECREF(x); + break; + } + PUSH(x); + DISPATCH(); + } + break; + + TARGET(BUILD_MAP) + x = _PyDict_NewPresized((Py_ssize_t)oparg); + PUSH(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(STORE_MAP) + w = TOP(); /* key */ + u = SECOND(); /* value */ + v = THIRD(); /* dict */ + STACKADJ(-2); + assert (PyDict_CheckExact(v)); + err = PyDict_SetItem(v, w, u); /* v[w] = u */ + Py_DECREF(u); + Py_DECREF(w); + if (err == 0) DISPATCH(); + break; + + TARGET(MAP_ADD) + w = TOP(); /* key */ + u = SECOND(); /* value */ + STACKADJ(-2); + v = stack_pointer[-oparg]; /* dict */ + assert (PyDict_CheckExact(v)); + err = PyDict_SetItem(v, w, u); /* v[w] = u */ + Py_DECREF(u); + Py_DECREF(w); + if (err == 0) { + PREDICT(JUMP_ABSOLUTE); + DISPATCH(); + } + break; + + TARGET(LOAD_ATTR) + w = GETITEM(names, oparg); + v = TOP(); + x = PyObject_GetAttr(v, w); + Py_DECREF(v); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(COMPARE_OP) + w = POP(); + v = TOP(); + x = cmp_outcome(oparg, v, w); + Py_DECREF(v); + Py_DECREF(w); + SET_TOP(x); + if (x == NULL) break; + PREDICT(POP_JUMP_IF_FALSE); + PREDICT(POP_JUMP_IF_TRUE); + DISPATCH(); + + TARGET(IMPORT_NAME) + w = GETITEM(names, oparg); + x = PyDict_GetItemString(f->f_builtins, "__import__"); + if (x == NULL) { + PyErr_SetString(PyExc_ImportError, + "__import__ not found"); + break; + } + Py_INCREF(x); + v = POP(); + u = TOP(); + if (PyLong_AsLong(u) != -1 || PyErr_Occurred()) + w = PyTuple_Pack(5, + w, + f->f_globals, + f->f_locals == NULL ? + Py_None : f->f_locals, + v, + u); + else + w = PyTuple_Pack(4, + w, + f->f_globals, + f->f_locals == NULL ? + Py_None : f->f_locals, + v); + Py_DECREF(v); + Py_DECREF(u); + if (w == NULL) { + u = POP(); + Py_DECREF(x); + x = NULL; + break; + } + READ_TIMESTAMP(intr0); + v = x; + x = PyEval_CallObject(v, w); + Py_DECREF(v); + READ_TIMESTAMP(intr1); + Py_DECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(IMPORT_STAR) + v = POP(); + PyFrame_FastToLocals(f); + if ((x = f->f_locals) == NULL) { + PyErr_SetString(PyExc_SystemError, + "no locals found during 'import *'"); + break; + } + READ_TIMESTAMP(intr0); + err = import_all_from(x, v); + READ_TIMESTAMP(intr1); + PyFrame_LocalsToFast(f, 0); + Py_DECREF(v); + if (err == 0) DISPATCH(); + break; + + TARGET(IMPORT_FROM) + w = GETITEM(names, oparg); + v = TOP(); + READ_TIMESTAMP(intr0); + x = import_from(v, w); + READ_TIMESTAMP(intr1); + PUSH(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(JUMP_FORWARD) + JUMPBY(oparg); + FAST_DISPATCH(); + + PREDICTED_WITH_ARG(POP_JUMP_IF_FALSE); + TARGET(POP_JUMP_IF_FALSE) + w = POP(); + if (w == Py_True) { + Py_DECREF(w); + FAST_DISPATCH(); + } + if (w == Py_False) { + Py_DECREF(w); + JUMPTO(oparg); + FAST_DISPATCH(); + } + err = PyObject_IsTrue(w); + Py_DECREF(w); + if (err > 0) + err = 0; + else if (err == 0) + JUMPTO(oparg); + else + break; + DISPATCH(); + + PREDICTED_WITH_ARG(POP_JUMP_IF_TRUE); + TARGET(POP_JUMP_IF_TRUE) + w = POP(); + if (w == Py_False) { + Py_DECREF(w); + FAST_DISPATCH(); + } + if (w == Py_True) { + Py_DECREF(w); + JUMPTO(oparg); + FAST_DISPATCH(); + } + err = PyObject_IsTrue(w); + Py_DECREF(w); + if (err > 0) { + err = 0; + JUMPTO(oparg); + } + else if (err == 0) + ; + else + break; + DISPATCH(); + + TARGET(JUMP_IF_FALSE_OR_POP) + w = TOP(); + if (w == Py_True) { + STACKADJ(-1); + Py_DECREF(w); + FAST_DISPATCH(); + } + if (w == Py_False) { + JUMPTO(oparg); + FAST_DISPATCH(); + } + err = PyObject_IsTrue(w); + if (err > 0) { + STACKADJ(-1); + Py_DECREF(w); + err = 0; + } + else if (err == 0) + JUMPTO(oparg); + else + break; + DISPATCH(); + + TARGET(JUMP_IF_TRUE_OR_POP) + w = TOP(); + if (w == Py_False) { + STACKADJ(-1); + Py_DECREF(w); + FAST_DISPATCH(); + } + if (w == Py_True) { + JUMPTO(oparg); + FAST_DISPATCH(); + } + err = PyObject_IsTrue(w); + if (err > 0) { + err = 0; + JUMPTO(oparg); + } + else if (err == 0) { + STACKADJ(-1); + Py_DECREF(w); + } + else + break; + DISPATCH(); + + PREDICTED_WITH_ARG(JUMP_ABSOLUTE); + TARGET(JUMP_ABSOLUTE) + JUMPTO(oparg); +#if FAST_LOOPS + /* Enabling this path speeds-up all while and for-loops by bypassing + the per-loop checks for signals. By default, this should be turned-off + because it prevents detection of a control-break in tight loops like + "while 1: pass". Compile with this option turned-on when you need + the speed-up and do not need break checking inside tight loops (ones + that contain only instructions ending with FAST_DISPATCH). + */ + FAST_DISPATCH(); +#else + DISPATCH(); +#endif + + TARGET(GET_ITER) + /* before: [obj]; after [getiter(obj)] */ + v = TOP(); + x = PyObject_GetIter(v); + Py_DECREF(v); + if (x != NULL) { + SET_TOP(x); + PREDICT(FOR_ITER); + DISPATCH(); + } + STACKADJ(-1); + break; + + PREDICTED_WITH_ARG(FOR_ITER); + TARGET(FOR_ITER) + /* before: [iter]; after: [iter, iter()] *or* [] */ + v = TOP(); + x = (*v->ob_type->tp_iternext)(v); + if (x != NULL) { + PUSH(x); + PREDICT(STORE_FAST); + PREDICT(UNPACK_SEQUENCE); + DISPATCH(); + } + if (PyErr_Occurred()) { + if (!PyErr_ExceptionMatches( + PyExc_StopIteration)) + break; + PyErr_Clear(); + } + /* iterator ended normally */ + x = v = POP(); + Py_DECREF(v); + JUMPBY(oparg); + DISPATCH(); + + TARGET(BREAK_LOOP) + why = WHY_BREAK; + goto fast_block_end; + + TARGET(CONTINUE_LOOP) + retval = PyLong_FromLong(oparg); + if (!retval) { + x = NULL; + break; + } + why = WHY_CONTINUE; + goto fast_block_end; + + TARGET_WITH_IMPL(SETUP_LOOP, _setup_finally) + TARGET_WITH_IMPL(SETUP_EXCEPT, _setup_finally) + TARGET(SETUP_FINALLY) + _setup_finally: + /* NOTE: If you add any new block-setup opcodes that + are not try/except/finally handlers, you may need + to update the PyGen_NeedsFinalizing() function. + */ + + PyFrame_BlockSetup(f, opcode, INSTR_OFFSET() + oparg, + STACK_LEVEL()); + DISPATCH(); + + TARGET(SETUP_WITH) + { + static PyObject *exit, *enter; + w = TOP(); + x = special_lookup(w, "__exit__", &exit); + if (!x) + break; + SET_TOP(x); + u = special_lookup(w, "__enter__", &enter); + Py_DECREF(w); + if (!u) { + x = NULL; + break; + } + x = PyObject_CallFunctionObjArgs(u, NULL); + Py_DECREF(u); + if (!x) + break; + /* Setup the finally block before pushing the result + of __enter__ on the stack. */ + PyFrame_BlockSetup(f, SETUP_FINALLY, INSTR_OFFSET() + oparg, + STACK_LEVEL()); + + PUSH(x); + DISPATCH(); + } + + TARGET(WITH_CLEANUP) + { + /* At the top of the stack are 1-3 values indicating + how/why we entered the finally clause: + - TOP = None + - (TOP, SECOND) = (WHY_{RETURN,CONTINUE}), retval + - TOP = WHY_*; no retval below it + - (TOP, SECOND, THIRD) = exc_info() + (FOURTH, FITH, SIXTH) = previous exception for EXCEPT_HANDLER + Below them is EXIT, the context.__exit__ bound method. + In the last case, we must call + EXIT(TOP, SECOND, THIRD) + otherwise we must call + EXIT(None, None, None) + + In the first two cases, we remove EXIT from the + stack, leaving the rest in the same order. In the + third case, we shift the bottom 3 values of the + stack down, and replace the empty spot with NULL. + + In addition, if the stack represents an exception, + *and* the function call returns a 'true' value, we + push WHY_SILENCED onto the stack. END_FINALLY will + then not re-raise the exception. (But non-local + gotos should still be resumed.) + */ + + PyObject *exit_func; + u = TOP(); + if (u == Py_None) { + (void)POP(); + exit_func = TOP(); + SET_TOP(u); + v = w = Py_None; + } + else if (PyLong_Check(u)) { + (void)POP(); + switch(PyLong_AsLong(u)) { + case WHY_RETURN: + case WHY_CONTINUE: + /* Retval in TOP. */ + exit_func = SECOND(); + SET_SECOND(TOP()); + SET_TOP(u); + break; + default: + exit_func = TOP(); + SET_TOP(u); + break; + } + u = v = w = Py_None; + } + else { + PyObject *tp, *exc, *tb; + PyTryBlock *block; + v = SECOND(); + w = THIRD(); + tp = FOURTH(); + exc = PEEK(5); + tb = PEEK(6); + exit_func = PEEK(7); + SET_VALUE(7, tb); + SET_VALUE(6, exc); + SET_VALUE(5, tp); + /* UNWIND_EXCEPT_HANDLER will pop this off. */ + SET_FOURTH(NULL); + /* We just shifted the stack down, so we have + to tell the except handler block that the + values are lower than it expects. */ + block = &f->f_blockstack[f->f_iblock - 1]; + assert(block->b_type == EXCEPT_HANDLER); + block->b_level--; + } + /* XXX Not the fastest way to call it... */ + x = PyObject_CallFunctionObjArgs(exit_func, u, v, w, + NULL); + Py_DECREF(exit_func); + if (x == NULL) + break; /* Go to error exit */ + + if (u != Py_None) + err = PyObject_IsTrue(x); + else + err = 0; + Py_DECREF(x); + + if (err < 0) + break; /* Go to error exit */ + else if (err > 0) { + err = 0; + /* There was an exception and a True return */ + PUSH(PyLong_FromLong((long) WHY_SILENCED)); + } + PREDICT(END_FINALLY); + break; + } + + TARGET(CALL_FUNCTION) + { + PyObject **sp; + PCALL(PCALL_ALL); + sp = stack_pointer; + x = call_function(&sp, oparg); + stack_pointer = sp; + PUSH(x); + if (x != NULL) + DISPATCH(); + break; + } + + TARGET_WITH_IMPL(CALL_FUNCTION_VAR, _call_function_var_kw) + TARGET_WITH_IMPL(CALL_FUNCTION_KW, _call_function_var_kw) + TARGET(CALL_FUNCTION_VAR_KW) + _call_function_var_kw: + { + int na = oparg & 0xff; + int nk = (oparg>>8) & 0xff; + int flags = (opcode - CALL_FUNCTION) & 3; + int n = na + 2 * nk; + PyObject **pfunc, *func, **sp; + PCALL(PCALL_ALL); + if (flags & CALL_FLAG_VAR) + n++; + if (flags & CALL_FLAG_KW) + n++; + pfunc = stack_pointer - n - 1; + func = *pfunc; + + if (PyMethod_Check(func) + && PyMethod_GET_SELF(func) != NULL) { + PyObject *self = PyMethod_GET_SELF(func); + Py_INCREF(self); + func = PyMethod_GET_FUNCTION(func); + Py_INCREF(func); + Py_DECREF(*pfunc); + *pfunc = self; + na++; + /* n++; */ + } else + Py_INCREF(func); + sp = stack_pointer; + READ_TIMESTAMP(intr0); + x = ext_do_call(func, &sp, flags, na, nk); + READ_TIMESTAMP(intr1); + stack_pointer = sp; + Py_DECREF(func); + + while (stack_pointer > pfunc) { + w = POP(); + Py_DECREF(w); + } + PUSH(x); + if (x != NULL) + DISPATCH(); + break; + } + + TARGET_WITH_IMPL(MAKE_CLOSURE, _make_function) + TARGET(MAKE_FUNCTION) + _make_function: + { + int posdefaults = oparg & 0xff; + int kwdefaults = (oparg>>8) & 0xff; + int num_annotations = (oparg >> 16) & 0x7fff; + + w = POP(); /* qualname */ + v = POP(); /* code object */ + x = PyFunction_NewWithQualName(v, f->f_globals, w); + Py_DECREF(v); + Py_DECREF(w); + + if (x != NULL && opcode == MAKE_CLOSURE) { + v = POP(); + if (PyFunction_SetClosure(x, v) != 0) { + /* Can't happen unless bytecode is corrupt. */ + why = WHY_EXCEPTION; + } + Py_DECREF(v); + } + + if (x != NULL && num_annotations > 0) { + Py_ssize_t name_ix; + u = POP(); /* names of args with annotations */ + v = PyDict_New(); + if (v == NULL) { + Py_DECREF(x); + x = NULL; + break; + } + name_ix = PyTuple_Size(u); + assert(num_annotations == name_ix+1); + while (name_ix > 0) { + --name_ix; + t = PyTuple_GET_ITEM(u, name_ix); + w = POP(); + /* XXX(nnorwitz): check for errors */ + PyDict_SetItem(v, t, w); + Py_DECREF(w); + } + + if (PyFunction_SetAnnotations(x, v) != 0) { + /* Can't happen unless + PyFunction_SetAnnotations changes. */ + why = WHY_EXCEPTION; + } + Py_DECREF(v); + Py_DECREF(u); + } + + /* XXX Maybe this should be a separate opcode? */ + if (x != NULL && posdefaults > 0) { + v = PyTuple_New(posdefaults); + if (v == NULL) { + Py_DECREF(x); + x = NULL; + break; + } + while (--posdefaults >= 0) { + w = POP(); + PyTuple_SET_ITEM(v, posdefaults, w); + } + if (PyFunction_SetDefaults(x, v) != 0) { + /* Can't happen unless + PyFunction_SetDefaults changes. */ + why = WHY_EXCEPTION; + } + Py_DECREF(v); + } + if (x != NULL && kwdefaults > 0) { + v = PyDict_New(); + if (v == NULL) { + Py_DECREF(x); + x = NULL; + break; + } + while (--kwdefaults >= 0) { + w = POP(); /* default value */ + u = POP(); /* kw only arg name */ + /* XXX(nnorwitz): check for errors */ + PyDict_SetItem(v, u, w); + Py_DECREF(w); + Py_DECREF(u); + } + if (PyFunction_SetKwDefaults(x, v) != 0) { + /* Can't happen unless + PyFunction_SetKwDefaults changes. */ + why = WHY_EXCEPTION; + } + Py_DECREF(v); + } + PUSH(x); + break; + } + + TARGET(BUILD_SLICE) + if (oparg == 3) + w = POP(); + else + w = NULL; + v = POP(); + u = TOP(); + x = PySlice_New(u, v, w); + Py_DECREF(u); + Py_DECREF(v); + Py_XDECREF(w); + SET_TOP(x); + if (x != NULL) DISPATCH(); + break; + + TARGET(EXTENDED_ARG) + opcode = NEXTOP(); + oparg = oparg<<16 | NEXTARG(); + goto dispatch_opcode; + +#if USE_COMPUTED_GOTOS + _unknown_opcode: +#endif + default: + fprintf(stderr, + "XXX lineno: %d, opcode: %d\n", + PyFrame_GetLineNumber(f), + opcode); + PyErr_SetString(PyExc_SystemError, "unknown opcode"); + why = WHY_EXCEPTION; + break; + + + } /* switch */ + + on_error: + + READ_TIMESTAMP(inst1); + + /* Quickly continue if no error occurred */ + + if (why == WHY_NOT) { + if (err == 0 && x != NULL) { +#ifdef CHECKEXC + /* This check is expensive! */ + if (PyErr_Occurred()) + fprintf(stderr, + "XXX undetected error\n"); + else { +#endif + READ_TIMESTAMP(loop1); + continue; /* Normal, fast path */ +#ifdef CHECKEXC + } +#endif + } + why = WHY_EXCEPTION; + x = Py_None; + err = 0; + } + + /* Double-check exception status */ + + if (why == WHY_EXCEPTION || why == WHY_RERAISE) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_SystemError, + "error return without exception set"); + why = WHY_EXCEPTION; + } + } +#ifdef CHECKEXC + else { + /* This check is expensive! */ + if (PyErr_Occurred()) { + char buf[128]; + sprintf(buf, "Stack unwind with exception " + "set and why=%d", why); + Py_FatalError(buf); + } + } +#endif + + /* Log traceback info if this is a real exception */ + + if (why == WHY_EXCEPTION) { + PyTraceBack_Here(f); + + if (tstate->c_tracefunc != NULL) + call_exc_trace(tstate->c_tracefunc, + tstate->c_traceobj, f); + } + + /* For the rest, treat WHY_RERAISE as WHY_EXCEPTION */ + + if (why == WHY_RERAISE) + why = WHY_EXCEPTION; + + /* Unwind stacks if a (pseudo) exception occurred */ + +fast_block_end: + while (why != WHY_NOT && f->f_iblock > 0) { + /* Peek at the current block. */ + PyTryBlock *b = &f->f_blockstack[f->f_iblock - 1]; + + assert(why != WHY_YIELD); + if (b->b_type == SETUP_LOOP && why == WHY_CONTINUE) { + why = WHY_NOT; + JUMPTO(PyLong_AS_LONG(retval)); + Py_DECREF(retval); + break; + } + /* Now we have to pop the block. */ + f->f_iblock--; + + if (b->b_type == EXCEPT_HANDLER) { + UNWIND_EXCEPT_HANDLER(b); + continue; + } + UNWIND_BLOCK(b); + if (b->b_type == SETUP_LOOP && why == WHY_BREAK) { + why = WHY_NOT; + JUMPTO(b->b_handler); + break; + } + if (why == WHY_EXCEPTION && (b->b_type == SETUP_EXCEPT + || b->b_type == SETUP_FINALLY)) { + PyObject *exc, *val, *tb; + int handler = b->b_handler; + /* Beware, this invalidates all b->b_* fields */ + PyFrame_BlockSetup(f, EXCEPT_HANDLER, -1, STACK_LEVEL()); + PUSH(tstate->exc_traceback); + PUSH(tstate->exc_value); + if (tstate->exc_type != NULL) { + PUSH(tstate->exc_type); + } + else { + Py_INCREF(Py_None); + PUSH(Py_None); + } + PyErr_Fetch(&exc, &val, &tb); + /* Make the raw exception data + available to the handler, + so a program can emulate the + Python main loop. */ + PyErr_NormalizeException( + &exc, &val, &tb); + PyException_SetTraceback(val, tb); + Py_INCREF(exc); + tstate->exc_type = exc; + Py_INCREF(val); + tstate->exc_value = val; + tstate->exc_traceback = tb; + if (tb == NULL) + tb = Py_None; + Py_INCREF(tb); + PUSH(tb); + PUSH(val); + PUSH(exc); + why = WHY_NOT; + JUMPTO(handler); + break; + } + if (b->b_type == SETUP_FINALLY) { + if (why & (WHY_RETURN | WHY_CONTINUE)) + PUSH(retval); + PUSH(PyLong_FromLong((long)why)); + why = WHY_NOT; + JUMPTO(b->b_handler); + break; + } + } /* unwind stack */ + + /* End the loop if we still have an error (or return) */ + + if (why != WHY_NOT) + break; + READ_TIMESTAMP(loop1); + + } /* main loop */ + + assert(why != WHY_YIELD); + /* Pop remaining stack entries. */ + while (!EMPTY()) { + v = POP(); + Py_XDECREF(v); + } + + if (why != WHY_RETURN) + retval = NULL; + +fast_yield: + if (co->co_flags & CO_GENERATOR && (why == WHY_YIELD || why == WHY_RETURN)) { + /* The purpose of this block is to put aside the generator's exception + state and restore that of the calling frame. If the current + exception state is from the caller, we clear the exception values + on the generator frame, so they are not swapped back in latter. The + origin of the current exception state is determined by checking for + except handler blocks, which we must be in iff a new exception + state came into existence in this frame. (An uncaught exception + would have why == WHY_EXCEPTION, and we wouldn't be here). */ + int i; + for (i = 0; i < f->f_iblock; i++) + if (f->f_blockstack[i].b_type == EXCEPT_HANDLER) + break; + if (i == f->f_iblock) + /* We did not create this exception. */ + restore_and_clear_exc_state(tstate, f); + else + swap_exc_state(tstate, f); + } + + if (tstate->use_tracing) { + if (tstate->c_tracefunc) { + if (why == WHY_RETURN || why == WHY_YIELD) { + if (call_trace(tstate->c_tracefunc, + tstate->c_traceobj, f, + PyTrace_RETURN, retval)) { + Py_XDECREF(retval); + retval = NULL; + why = WHY_EXCEPTION; + } + } + else if (why == WHY_EXCEPTION) { + call_trace_protected(tstate->c_tracefunc, + tstate->c_traceobj, f, + PyTrace_RETURN, NULL); + } + } + if (tstate->c_profilefunc) { + if (why == WHY_EXCEPTION) + call_trace_protected(tstate->c_profilefunc, + tstate->c_profileobj, f, + PyTrace_RETURN, NULL); + else if (call_trace(tstate->c_profilefunc, + tstate->c_profileobj, f, + PyTrace_RETURN, retval)) { + Py_XDECREF(retval); + retval = NULL; + /* why = WHY_EXCEPTION; */ + } + } + } + + /* pop frame */ +exit_eval_frame: + Py_LeaveRecursiveCall(); + tstate->frame = f->f_back; + + return retval; +} + +static void +format_missing(const char *kind, PyCodeObject *co, PyObject *names) +{ + int err; + Py_ssize_t len = PyList_GET_SIZE(names); + PyObject *name_str, *comma, *tail, *tmp; + + assert(PyList_CheckExact(names)); + assert(len >= 1); + /* Deal with the joys of natural language. */ + switch (len) { + case 1: + name_str = PyList_GET_ITEM(names, 0); + Py_INCREF(name_str); + break; + case 2: + name_str = PyUnicode_FromFormat("%U and %U", + PyList_GET_ITEM(names, len - 2), + PyList_GET_ITEM(names, len - 1)); + break; + default: + tail = PyUnicode_FromFormat(", %U, and %U", + PyList_GET_ITEM(names, len - 2), + PyList_GET_ITEM(names, len - 1)); + /* Chop off the last two objects in the list. This shouldn't actually + fail, but we can't be too careful. */ + err = PyList_SetSlice(names, len - 2, len, NULL); + if (err == -1) { + Py_DECREF(tail); + return; + } + /* Stitch everything up into a nice comma-separated list. */ + comma = PyUnicode_FromString(", "); + if (comma == NULL) { + Py_DECREF(tail); + return; + } + tmp = PyUnicode_Join(comma, names); + Py_DECREF(comma); + if (tmp == NULL) { + Py_DECREF(tail); + return; + } + name_str = PyUnicode_Concat(tmp, tail); + Py_DECREF(tmp); + Py_DECREF(tail); + break; + } + if (name_str == NULL) + return; + PyErr_Format(PyExc_TypeError, + "%U() missing %i required %s argument%s: %U", + co->co_name, + len, + kind, + len == 1 ? "" : "s", + name_str); + Py_DECREF(name_str); +} + +static void +missing_arguments(PyCodeObject *co, int missing, int defcount, + PyObject **fastlocals) +{ + int i, j = 0; + int start, end; + int positional = defcount != -1; + const char *kind = positional ? "positional" : "keyword-only"; + PyObject *missing_names; + + /* Compute the names of the arguments that are missing. */ + missing_names = PyList_New(missing); + if (missing_names == NULL) + return; + if (positional) { + start = 0; + end = co->co_argcount - defcount; + } + else { + start = co->co_argcount; + end = start + co->co_kwonlyargcount; + } + for (i = start; i < end; i++) { + if (GETLOCAL(i) == NULL) { + PyObject *raw = PyTuple_GET_ITEM(co->co_varnames, i); + PyObject *name = PyObject_Repr(raw); + if (name == NULL) { + Py_DECREF(missing_names); + return; + } + PyList_SET_ITEM(missing_names, j++, name); + } + } + assert(j == missing); + format_missing(kind, co, missing_names); + Py_DECREF(missing_names); +} + +static void +too_many_positional(PyCodeObject *co, int given, int defcount, PyObject **fastlocals) +{ + int plural; + int kwonly_given = 0; + int i; + PyObject *sig, *kwonly_sig; + + assert((co->co_flags & CO_VARARGS) == 0); + /* Count missing keyword-only args. */ + for (i = co->co_argcount; i < co->co_argcount + co->co_kwonlyargcount; i++) + if (GETLOCAL(i) != NULL) + kwonly_given++; + if (defcount) { + int atleast = co->co_argcount - defcount; + plural = 1; + sig = PyUnicode_FromFormat("from %d to %d", atleast, co->co_argcount); + } + else { + plural = co->co_argcount != 1; + sig = PyUnicode_FromFormat("%d", co->co_argcount); + } + if (sig == NULL) + return; + if (kwonly_given) { + const char *format = " positional argument%s (and %d keyword-only argument%s)"; + kwonly_sig = PyUnicode_FromFormat(format, given != 1 ? "s" : "", kwonly_given, + kwonly_given != 1 ? "s" : ""); + if (kwonly_sig == NULL) { + Py_DECREF(sig); + return; + } + } + else { + /* This will not fail. */ + kwonly_sig = PyUnicode_FromString(""); + assert(kwonly_sig != NULL); + } + PyErr_Format(PyExc_TypeError, + "%U() takes %U positional argument%s but %d%U %s given", + co->co_name, + sig, + plural ? "s" : "", + given, + kwonly_sig, + given == 1 && !kwonly_given ? "was" : "were"); + Py_DECREF(sig); + Py_DECREF(kwonly_sig); +} + +/* This is gonna seem *real weird*, but if you put some other code between + PyEval_EvalFrame() and PyEval_EvalCodeEx() you will need to adjust + the test in the if statements in Misc/gdbinit (pystack and pystackv). */ + +PyObject * +PyEval_EvalCodeEx(PyObject *_co, PyObject *globals, PyObject *locals, + PyObject **args, int argcount, PyObject **kws, int kwcount, + PyObject **defs, int defcount, PyObject *kwdefs, PyObject *closure) +{ + PyCodeObject* co = (PyCodeObject*)_co; + register PyFrameObject *f; + register PyObject *retval = NULL; + register PyObject **fastlocals, **freevars; + PyThreadState *tstate = PyThreadState_GET(); + PyObject *x, *u; + int total_args = co->co_argcount + co->co_kwonlyargcount; + int i; + int n = argcount; + PyObject *kwdict = NULL; + + if (globals == NULL) { + PyErr_SetString(PyExc_SystemError, + "PyEval_EvalCodeEx: NULL globals"); + return NULL; + } + + assert(tstate != NULL); + assert(globals != NULL); + f = PyFrame_New(tstate, co, globals, locals); + if (f == NULL) + return NULL; + + fastlocals = f->f_localsplus; + freevars = f->f_localsplus + co->co_nlocals; + + /* Parse arguments. */ + if (co->co_flags & CO_VARKEYWORDS) { + kwdict = PyDict_New(); + if (kwdict == NULL) + goto fail; + i = total_args; + if (co->co_flags & CO_VARARGS) + i++; + SETLOCAL(i, kwdict); + } + if (argcount > co->co_argcount) + n = co->co_argcount; + for (i = 0; i < n; i++) { + x = args[i]; + Py_INCREF(x); + SETLOCAL(i, x); + } + if (co->co_flags & CO_VARARGS) { + u = PyTuple_New(argcount - n); + if (u == NULL) + goto fail; + SETLOCAL(total_args, u); + for (i = n; i < argcount; i++) { + x = args[i]; + Py_INCREF(x); + PyTuple_SET_ITEM(u, i-n, x); + } + } + for (i = 0; i < kwcount; i++) { + PyObject **co_varnames; + PyObject *keyword = kws[2*i]; + PyObject *value = kws[2*i + 1]; + int j; + if (keyword == NULL || !PyUnicode_Check(keyword)) { + PyErr_Format(PyExc_TypeError, + "%U() keywords must be strings", + co->co_name); + goto fail; + } + /* Speed hack: do raw pointer compares. As names are + normally interned this should almost always hit. */ + co_varnames = ((PyTupleObject *)(co->co_varnames))->ob_item; + for (j = 0; j < total_args; j++) { + PyObject *nm = co_varnames[j]; + if (nm == keyword) + goto kw_found; + } + /* Slow fallback, just in case */ + for (j = 0; j < total_args; j++) { + PyObject *nm = co_varnames[j]; + int cmp = PyObject_RichCompareBool( + keyword, nm, Py_EQ); + if (cmp > 0) + goto kw_found; + else if (cmp < 0) + goto fail; + } + if (j >= total_args && kwdict == NULL) { + PyErr_Format(PyExc_TypeError, + "%U() got an unexpected " + "keyword argument '%S'", + co->co_name, + keyword); + goto fail; + } + PyDict_SetItem(kwdict, keyword, value); + continue; + kw_found: + if (GETLOCAL(j) != NULL) { + PyErr_Format(PyExc_TypeError, + "%U() got multiple " + "values for argument '%S'", + co->co_name, + keyword); + goto fail; + } + Py_INCREF(value); + SETLOCAL(j, value); + } + if (argcount > co->co_argcount && !(co->co_flags & CO_VARARGS)) { + too_many_positional(co, argcount, defcount, fastlocals); + goto fail; + } + if (argcount < co->co_argcount) { + int m = co->co_argcount - defcount; + int missing = 0; + for (i = argcount; i < m; i++) + if (GETLOCAL(i) == NULL) + missing++; + if (missing) { + missing_arguments(co, missing, defcount, fastlocals); + goto fail; + } + if (n > m) + i = n - m; + else + i = 0; + for (; i < defcount; i++) { + if (GETLOCAL(m+i) == NULL) { + PyObject *def = defs[i]; + Py_INCREF(def); + SETLOCAL(m+i, def); + } + } + } + if (co->co_kwonlyargcount > 0) { + int missing = 0; + for (i = co->co_argcount; i < total_args; i++) { + PyObject *name; + if (GETLOCAL(i) != NULL) + continue; + name = PyTuple_GET_ITEM(co->co_varnames, i); + if (kwdefs != NULL) { + PyObject *def = PyDict_GetItem(kwdefs, name); + if (def) { + Py_INCREF(def); + SETLOCAL(i, def); + continue; + } + } + missing++; + } + if (missing) { + missing_arguments(co, missing, -1, fastlocals); + goto fail; + } + } + + /* Allocate and initialize storage for cell vars, and copy free + vars into frame. */ + for (i = 0; i < PyTuple_GET_SIZE(co->co_cellvars); ++i) { + PyObject *c; + int arg; + /* Possibly account for the cell variable being an argument. */ + if (co->co_cell2arg != NULL && + (arg = co->co_cell2arg[i]) != CO_CELL_NOT_AN_ARG) + c = PyCell_New(GETLOCAL(arg)); + else + c = PyCell_New(NULL); + if (c == NULL) + goto fail; + SETLOCAL(co->co_nlocals + i, c); + } + for (i = 0; i < PyTuple_GET_SIZE(co->co_freevars); ++i) { + PyObject *o = PyTuple_GET_ITEM(closure, i); + Py_INCREF(o); + freevars[PyTuple_GET_SIZE(co->co_cellvars) + i] = o; + } + + if (co->co_flags & CO_GENERATOR) { + /* Don't need to keep the reference to f_back, it will be set + * when the generator is resumed. */ + Py_XDECREF(f->f_back); + f->f_back = NULL; + + PCALL(PCALL_GENERATOR); + + /* Create a new generator that owns the ready to run frame + * and return that as the value. */ + return PyGen_New(f); + } + + retval = PyEval_EvalFrameEx(f,0); + +fail: /* Jump here from prelude on failure */ + + /* decref'ing the frame can cause __del__ methods to get invoked, + which can call back into Python. While we're done with the + current Python frame (f), the associated C stack is still in use, + so recursion_depth must be boosted for the duration. + */ + assert(tstate != NULL); + ++tstate->recursion_depth; + Py_DECREF(f); + --tstate->recursion_depth; + return retval; +} + + +static PyObject * +special_lookup(PyObject *o, char *meth, PyObject **cache) +{ + PyObject *res; + res = _PyObject_LookupSpecial(o, meth, cache); + if (res == NULL && !PyErr_Occurred()) { + PyErr_SetObject(PyExc_AttributeError, *cache); + return NULL; + } + return res; +} + + +/* These 3 functions deal with the exception state of generators. */ + +static void +save_exc_state(PyThreadState *tstate, PyFrameObject *f) +{ + PyObject *type, *value, *traceback; + Py_XINCREF(tstate->exc_type); + Py_XINCREF(tstate->exc_value); + Py_XINCREF(tstate->exc_traceback); + type = f->f_exc_type; + value = f->f_exc_value; + traceback = f->f_exc_traceback; + f->f_exc_type = tstate->exc_type; + f->f_exc_value = tstate->exc_value; + f->f_exc_traceback = tstate->exc_traceback; + Py_XDECREF(type); + Py_XDECREF(value); + Py_XDECREF(traceback); +} + +static void +swap_exc_state(PyThreadState *tstate, PyFrameObject *f) +{ + PyObject *tmp; + tmp = tstate->exc_type; + tstate->exc_type = f->f_exc_type; + f->f_exc_type = tmp; + tmp = tstate->exc_value; + tstate->exc_value = f->f_exc_value; + f->f_exc_value = tmp; + tmp = tstate->exc_traceback; + tstate->exc_traceback = f->f_exc_traceback; + f->f_exc_traceback = tmp; +} + +static void +restore_and_clear_exc_state(PyThreadState *tstate, PyFrameObject *f) +{ + PyObject *type, *value, *tb; + type = tstate->exc_type; + value = tstate->exc_value; + tb = tstate->exc_traceback; + tstate->exc_type = f->f_exc_type; + tstate->exc_value = f->f_exc_value; + tstate->exc_traceback = f->f_exc_traceback; + f->f_exc_type = NULL; + f->f_exc_value = NULL; + f->f_exc_traceback = NULL; + Py_XDECREF(type); + Py_XDECREF(value); + Py_XDECREF(tb); +} + + +/* Logic for the raise statement (too complicated for inlining). + This *consumes* a reference count to each of its arguments. */ +static enum why_code +do_raise(PyObject *exc, PyObject *cause) +{ + PyObject *type = NULL, *value = NULL; + + if (exc == NULL) { + /* Reraise */ + PyThreadState *tstate = PyThreadState_GET(); + PyObject *tb; + type = tstate->exc_type; + value = tstate->exc_value; + tb = tstate->exc_traceback; + if (type == Py_None) { + PyErr_SetString(PyExc_RuntimeError, + "No active exception to reraise"); + return WHY_EXCEPTION; + } + Py_XINCREF(type); + Py_XINCREF(value); + Py_XINCREF(tb); + PyErr_Restore(type, value, tb); + return WHY_RERAISE; + } + + /* We support the following forms of raise: + raise + raise + raise */ + + if (PyExceptionClass_Check(exc)) { + type = exc; + value = PyObject_CallObject(exc, NULL); + if (value == NULL) + goto raise_error; + if (!PyExceptionInstance_Check(value)) { + PyErr_Format(PyExc_TypeError, + "calling %R should have returned an instance of " + "BaseException, not %R", + type, Py_TYPE(value)); + goto raise_error; + } + } + else if (PyExceptionInstance_Check(exc)) { + value = exc; + type = PyExceptionInstance_Class(exc); + Py_INCREF(type); + } + else { + /* Not something you can raise. You get an exception + anyway, just not what you specified :-) */ + Py_DECREF(exc); + PyErr_SetString(PyExc_TypeError, + "exceptions must derive from BaseException"); + goto raise_error; + } + + if (cause) { + PyObject *fixed_cause; + if (PyExceptionClass_Check(cause)) { + fixed_cause = PyObject_CallObject(cause, NULL); + if (fixed_cause == NULL) + goto raise_error; + Py_DECREF(cause); + } + else if (PyExceptionInstance_Check(cause)) { + fixed_cause = cause; + } + else { + PyErr_SetString(PyExc_TypeError, + "exception causes must derive from " + "BaseException"); + goto raise_error; + } + PyException_SetCause(value, fixed_cause); + } + + PyErr_SetObject(type, value); + /* PyErr_SetObject incref's its arguments */ + Py_XDECREF(value); + Py_XDECREF(type); + return WHY_EXCEPTION; + +raise_error: + Py_XDECREF(value); + Py_XDECREF(type); + Py_XDECREF(cause); + return WHY_EXCEPTION; +} + +/* Iterate v argcnt times and store the results on the stack (via decreasing + sp). Return 1 for success, 0 if error. + + If argcntafter == -1, do a simple unpack. If it is >= 0, do an unpack + with a variable target. +*/ + +static int +unpack_iterable(PyObject *v, int argcnt, int argcntafter, PyObject **sp) +{ + int i = 0, j = 0; + Py_ssize_t ll = 0; + PyObject *it; /* iter(v) */ + PyObject *w; + PyObject *l = NULL; /* variable list */ + + assert(v != NULL); + + it = PyObject_GetIter(v); + if (it == NULL) + goto Error; + + for (; i < argcnt; i++) { + w = PyIter_Next(it); + if (w == NULL) { + /* Iterator done, via error or exhaustion. */ + if (!PyErr_Occurred()) { + PyErr_Format(PyExc_ValueError, + "need more than %d value%s to unpack", + i, i == 1 ? "" : "s"); + } + goto Error; + } + *--sp = w; + } + + if (argcntafter == -1) { + /* We better have exhausted the iterator now. */ + w = PyIter_Next(it); + if (w == NULL) { + if (PyErr_Occurred()) + goto Error; + Py_DECREF(it); + return 1; + } + Py_DECREF(w); + PyErr_Format(PyExc_ValueError, "too many values to unpack " + "(expected %d)", argcnt); + goto Error; + } + + l = PySequence_List(it); + if (l == NULL) + goto Error; + *--sp = l; + i++; + + ll = PyList_GET_SIZE(l); + if (ll < argcntafter) { + PyErr_Format(PyExc_ValueError, "need more than %zd values to unpack", + argcnt + ll); + goto Error; + } + + /* Pop the "after-variable" args off the list. */ + for (j = argcntafter; j > 0; j--, i++) { + *--sp = PyList_GET_ITEM(l, ll - j); + } + /* Resize the list. */ + Py_SIZE(l) = ll - argcntafter; + Py_DECREF(it); + return 1; + +Error: + for (; i > 0; i--, sp++) + Py_DECREF(*sp); + Py_XDECREF(it); + return 0; +} + + +#ifdef LLTRACE +static int +prtrace(PyObject *v, char *str) +{ + printf("%s ", str); + if (PyObject_Print(v, stdout, 0) != 0) + PyErr_Clear(); /* Don't know what else to do */ + printf("\n"); + return 1; +} +#endif + +static void +call_exc_trace(Py_tracefunc func, PyObject *self, PyFrameObject *f) +{ + PyObject *type, *value, *traceback, *arg; + int err; + PyErr_Fetch(&type, &value, &traceback); + if (value == NULL) { + value = Py_None; + Py_INCREF(value); + } + arg = PyTuple_Pack(3, type, value, traceback); + if (arg == NULL) { + PyErr_Restore(type, value, traceback); + return; + } + err = call_trace(func, self, f, PyTrace_EXCEPTION, arg); + Py_DECREF(arg); + if (err == 0) + PyErr_Restore(type, value, traceback); + else { + Py_XDECREF(type); + Py_XDECREF(value); + Py_XDECREF(traceback); + } +} + +static int +call_trace_protected(Py_tracefunc func, PyObject *obj, PyFrameObject *frame, + int what, PyObject *arg) +{ + PyObject *type, *value, *traceback; + int err; + PyErr_Fetch(&type, &value, &traceback); + err = call_trace(func, obj, frame, what, arg); + if (err == 0) + { + PyErr_Restore(type, value, traceback); + return 0; + } + else { + Py_XDECREF(type); + Py_XDECREF(value); + Py_XDECREF(traceback); + return -1; + } +} + +static int +call_trace(Py_tracefunc func, PyObject *obj, PyFrameObject *frame, + int what, PyObject *arg) +{ + register PyThreadState *tstate = frame->f_tstate; + int result; + if (tstate->tracing) + return 0; + tstate->tracing++; + tstate->use_tracing = 0; + result = func(obj, frame, what, arg); + tstate->use_tracing = ((tstate->c_tracefunc != NULL) + || (tstate->c_profilefunc != NULL)); + tstate->tracing--; + return result; +} + +PyObject * +_PyEval_CallTracing(PyObject *func, PyObject *args) +{ + PyFrameObject *frame = PyEval_GetFrame(); + PyThreadState *tstate = frame->f_tstate; + int save_tracing = tstate->tracing; + int save_use_tracing = tstate->use_tracing; + PyObject *result; + + tstate->tracing = 0; + tstate->use_tracing = ((tstate->c_tracefunc != NULL) + || (tstate->c_profilefunc != NULL)); + result = PyObject_Call(func, args, NULL); + tstate->tracing = save_tracing; + tstate->use_tracing = save_use_tracing; + return result; +} + +/* See Objects/lnotab_notes.txt for a description of how tracing works. */ +static int +maybe_call_line_trace(Py_tracefunc func, PyObject *obj, + PyFrameObject *frame, int *instr_lb, int *instr_ub, + int *instr_prev) +{ + int result = 0; + int line = frame->f_lineno; + + /* If the last instruction executed isn't in the current + instruction window, reset the window. + */ + if (frame->f_lasti < *instr_lb || frame->f_lasti >= *instr_ub) { + PyAddrPair bounds; + line = _PyCode_CheckLineNumber(frame->f_code, frame->f_lasti, + &bounds); + *instr_lb = bounds.ap_lower; + *instr_ub = bounds.ap_upper; + } + /* If the last instruction falls at the start of a line or if + it represents a jump backwards, update the frame's line + number and call the trace function. */ + if (frame->f_lasti == *instr_lb || frame->f_lasti < *instr_prev) { + frame->f_lineno = line; + result = call_trace(func, obj, frame, PyTrace_LINE, Py_None); + } + *instr_prev = frame->f_lasti; + return result; +} + +void +PyEval_SetProfile(Py_tracefunc func, PyObject *arg) +{ + PyThreadState *tstate = PyThreadState_GET(); + PyObject *temp = tstate->c_profileobj; + Py_XINCREF(arg); + tstate->c_profilefunc = NULL; + tstate->c_profileobj = NULL; + /* Must make sure that tracing is not ignored if 'temp' is freed */ + tstate->use_tracing = tstate->c_tracefunc != NULL; + Py_XDECREF(temp); + tstate->c_profilefunc = func; + tstate->c_profileobj = arg; + /* Flag that tracing or profiling is turned on */ + tstate->use_tracing = (func != NULL) || (tstate->c_tracefunc != NULL); +} + +void +PyEval_SetTrace(Py_tracefunc func, PyObject *arg) +{ + PyThreadState *tstate = PyThreadState_GET(); + PyObject *temp = tstate->c_traceobj; + _Py_TracingPossible += (func != NULL) - (tstate->c_tracefunc != NULL); + Py_XINCREF(arg); + tstate->c_tracefunc = NULL; + tstate->c_traceobj = NULL; + /* Must make sure that profiling is not ignored if 'temp' is freed */ + tstate->use_tracing = tstate->c_profilefunc != NULL; + Py_XDECREF(temp); + tstate->c_tracefunc = func; + tstate->c_traceobj = arg; + /* Flag that tracing or profiling is turned on */ + tstate->use_tracing = ((func != NULL) + || (tstate->c_profilefunc != NULL)); +} + +PyObject * +PyEval_GetBuiltins(void) +{ + PyFrameObject *current_frame = PyEval_GetFrame(); + if (current_frame == NULL) + return PyThreadState_GET()->interp->builtins; + else + return current_frame->f_builtins; +} + +PyObject * +PyEval_GetLocals(void) +{ + PyFrameObject *current_frame = PyEval_GetFrame(); + if (current_frame == NULL) + return NULL; + PyFrame_FastToLocals(current_frame); + return current_frame->f_locals; +} + +PyObject * +PyEval_GetGlobals(void) +{ + PyFrameObject *current_frame = PyEval_GetFrame(); + if (current_frame == NULL) + return NULL; + else + return current_frame->f_globals; +} + +PyFrameObject * +PyEval_GetFrame(void) +{ + PyThreadState *tstate = PyThreadState_GET(); + return _PyThreadState_GetFrame(tstate); +} + +int +PyEval_MergeCompilerFlags(PyCompilerFlags *cf) +{ + PyFrameObject *current_frame = PyEval_GetFrame(); + int result = cf->cf_flags != 0; + + if (current_frame != NULL) { + const int codeflags = current_frame->f_code->co_flags; + const int compilerflags = codeflags & PyCF_MASK; + if (compilerflags) { + result = 1; + cf->cf_flags |= compilerflags; + } +#if 0 /* future keyword */ + if (codeflags & CO_GENERATOR_ALLOWED) { + result = 1; + cf->cf_flags |= CO_GENERATOR_ALLOWED; + } +#endif + } + return result; +} + + +/* External interface to call any callable object. + The arg must be a tuple or NULL. The kw must be a dict or NULL. */ + +PyObject * +PyEval_CallObjectWithKeywords(PyObject *func, PyObject *arg, PyObject *kw) +{ + PyObject *result; + + if (arg == NULL) { + arg = PyTuple_New(0); + if (arg == NULL) + return NULL; + } + else if (!PyTuple_Check(arg)) { + PyErr_SetString(PyExc_TypeError, + "argument list must be a tuple"); + return NULL; + } + else + Py_INCREF(arg); + + if (kw != NULL && !PyDict_Check(kw)) { + PyErr_SetString(PyExc_TypeError, + "keyword list must be a dictionary"); + Py_DECREF(arg); + return NULL; + } + + result = PyObject_Call(func, arg, kw); + Py_DECREF(arg); + return result; +} + +const char * +PyEval_GetFuncName(PyObject *func) +{ + if (PyMethod_Check(func)) + return PyEval_GetFuncName(PyMethod_GET_FUNCTION(func)); + else if (PyFunction_Check(func)) + return _PyUnicode_AsString(((PyFunctionObject*)func)->func_name); + else if (PyCFunction_Check(func)) + return ((PyCFunctionObject*)func)->m_ml->ml_name; + else + return func->ob_type->tp_name; +} + +const char * +PyEval_GetFuncDesc(PyObject *func) +{ + if (PyMethod_Check(func)) + return "()"; + else if (PyFunction_Check(func)) + return "()"; + else if (PyCFunction_Check(func)) + return "()"; + else + return " object"; +} + +static void +err_args(PyObject *func, int flags, int nargs) +{ + if (flags & METH_NOARGS) + PyErr_Format(PyExc_TypeError, + "%.200s() takes no arguments (%d given)", + ((PyCFunctionObject *)func)->m_ml->ml_name, + nargs); + else + PyErr_Format(PyExc_TypeError, + "%.200s() takes exactly one argument (%d given)", + ((PyCFunctionObject *)func)->m_ml->ml_name, + nargs); +} + +#define C_TRACE(x, call) \ +if (tstate->use_tracing && tstate->c_profilefunc) { \ + if (call_trace(tstate->c_profilefunc, \ + tstate->c_profileobj, \ + tstate->frame, PyTrace_C_CALL, \ + func)) { \ + x = NULL; \ + } \ + else { \ + x = call; \ + if (tstate->c_profilefunc != NULL) { \ + if (x == NULL) { \ + call_trace_protected(tstate->c_profilefunc, \ + tstate->c_profileobj, \ + tstate->frame, PyTrace_C_EXCEPTION, \ + func); \ + /* XXX should pass (type, value, tb) */ \ + } else { \ + if (call_trace(tstate->c_profilefunc, \ + tstate->c_profileobj, \ + tstate->frame, PyTrace_C_RETURN, \ + func)) { \ + Py_DECREF(x); \ + x = NULL; \ + } \ + } \ + } \ + } \ +} else { \ + x = call; \ + } + +static PyObject * +call_function(PyObject ***pp_stack, int oparg +#ifdef WITH_TSC + , uint64* pintr0, uint64* pintr1 +#endif + ) +{ + int na = oparg & 0xff; + int nk = (oparg>>8) & 0xff; + int n = na + 2 * nk; + PyObject **pfunc = (*pp_stack) - n - 1; + PyObject *func = *pfunc; + PyObject *x, *w; + + /* Always dispatch PyCFunction first, because these are + presumed to be the most frequent callable object. + */ + if (PyCFunction_Check(func) && nk == 0) { + int flags = PyCFunction_GET_FLAGS(func); + PyThreadState *tstate = PyThreadState_GET(); + + PCALL(PCALL_CFUNCTION); + if (flags & (METH_NOARGS | METH_O)) { + PyCFunction meth = PyCFunction_GET_FUNCTION(func); + PyObject *self = PyCFunction_GET_SELF(func); + if (flags & METH_NOARGS && na == 0) { + C_TRACE(x, (*meth)(self,NULL)); + } + else if (flags & METH_O && na == 1) { + PyObject *arg = EXT_POP(*pp_stack); + C_TRACE(x, (*meth)(self,arg)); + Py_DECREF(arg); + } + else { + err_args(func, flags, na); + x = NULL; + } + } + else { + PyObject *callargs; + callargs = load_args(pp_stack, na); + READ_TIMESTAMP(*pintr0); + C_TRACE(x, PyCFunction_Call(func,callargs,NULL)); + READ_TIMESTAMP(*pintr1); + Py_XDECREF(callargs); + } + } else { + if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) { + /* optimize access to bound methods */ + PyObject *self = PyMethod_GET_SELF(func); + PCALL(PCALL_METHOD); + PCALL(PCALL_BOUND_METHOD); + Py_INCREF(self); + func = PyMethod_GET_FUNCTION(func); + Py_INCREF(func); + Py_DECREF(*pfunc); + *pfunc = self; + na++; + n++; + } else + Py_INCREF(func); + READ_TIMESTAMP(*pintr0); + if (PyFunction_Check(func)) + x = fast_function(func, pp_stack, n, na, nk); + else + x = do_call(func, pp_stack, na, nk); + READ_TIMESTAMP(*pintr1); + Py_DECREF(func); + } + + /* Clear the stack of the function object. Also removes + the arguments in case they weren't consumed already + (fast_function() and err_args() leave them on the stack). + */ + while ((*pp_stack) > pfunc) { + w = EXT_POP(*pp_stack); + Py_DECREF(w); + PCALL(PCALL_POP); + } + return x; +} + +/* The fast_function() function optimize calls for which no argument + tuple is necessary; the objects are passed directly from the stack. + For the simplest case -- a function that takes only positional + arguments and is called with only positional arguments -- it + inlines the most primitive frame setup code from + PyEval_EvalCodeEx(), which vastly reduces the checks that must be + done before evaluating the frame. +*/ + +static PyObject * +fast_function(PyObject *func, PyObject ***pp_stack, int n, int na, int nk) +{ + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); + PyObject *globals = PyFunction_GET_GLOBALS(func); + PyObject *argdefs = PyFunction_GET_DEFAULTS(func); + PyObject *kwdefs = PyFunction_GET_KW_DEFAULTS(func); + PyObject **d = NULL; + int nd = 0; + + PCALL(PCALL_FUNCTION); + PCALL(PCALL_FAST_FUNCTION); + if (argdefs == NULL && co->co_argcount == n && + co->co_kwonlyargcount == 0 && nk==0 && + co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { + PyFrameObject *f; + PyObject *retval = NULL; + PyThreadState *tstate = PyThreadState_GET(); + PyObject **fastlocals, **stack; + int i; + + PCALL(PCALL_FASTER_FUNCTION); + assert(globals != NULL); + /* XXX Perhaps we should create a specialized + PyFrame_New() that doesn't take locals, but does + take builtins without sanity checking them. + */ + assert(tstate != NULL); + f = PyFrame_New(tstate, co, globals, NULL); + if (f == NULL) + return NULL; + + fastlocals = f->f_localsplus; + stack = (*pp_stack) - n; + + for (i = 0; i < n; i++) { + Py_INCREF(*stack); + fastlocals[i] = *stack++; + } + retval = PyEval_EvalFrameEx(f,0); + ++tstate->recursion_depth; + Py_DECREF(f); + --tstate->recursion_depth; + return retval; + } + if (argdefs != NULL) { + d = &PyTuple_GET_ITEM(argdefs, 0); + nd = Py_SIZE(argdefs); + } + return PyEval_EvalCodeEx((PyObject*)co, globals, + (PyObject *)NULL, (*pp_stack)-n, na, + (*pp_stack)-2*nk, nk, d, nd, kwdefs, + PyFunction_GET_CLOSURE(func)); +} + +static PyObject * +update_keyword_args(PyObject *orig_kwdict, int nk, PyObject ***pp_stack, + PyObject *func) +{ + PyObject *kwdict = NULL; + if (orig_kwdict == NULL) + kwdict = PyDict_New(); + else { + kwdict = PyDict_Copy(orig_kwdict); + Py_DECREF(orig_kwdict); + } + if (kwdict == NULL) + return NULL; + while (--nk >= 0) { + int err; + PyObject *value = EXT_POP(*pp_stack); + PyObject *key = EXT_POP(*pp_stack); + if (PyDict_GetItem(kwdict, key) != NULL) { + PyErr_Format(PyExc_TypeError, + "%.200s%s got multiple values " + "for keyword argument '%U'", + PyEval_GetFuncName(func), + PyEval_GetFuncDesc(func), + key); + Py_DECREF(key); + Py_DECREF(value); + Py_DECREF(kwdict); + return NULL; + } + err = PyDict_SetItem(kwdict, key, value); + Py_DECREF(key); + Py_DECREF(value); + if (err) { + Py_DECREF(kwdict); + return NULL; + } + } + return kwdict; +} + +static PyObject * +update_star_args(int nstack, int nstar, PyObject *stararg, + PyObject ***pp_stack) +{ + PyObject *callargs, *w; + + callargs = PyTuple_New(nstack + nstar); + if (callargs == NULL) { + return NULL; + } + if (nstar) { + int i; + for (i = 0; i < nstar; i++) { + PyObject *a = PyTuple_GET_ITEM(stararg, i); + Py_INCREF(a); + PyTuple_SET_ITEM(callargs, nstack + i, a); + } + } + while (--nstack >= 0) { + w = EXT_POP(*pp_stack); + PyTuple_SET_ITEM(callargs, nstack, w); + } + return callargs; +} + +static PyObject * +load_args(PyObject ***pp_stack, int na) +{ + PyObject *args = PyTuple_New(na); + PyObject *w; + + if (args == NULL) + return NULL; + while (--na >= 0) { + w = EXT_POP(*pp_stack); + PyTuple_SET_ITEM(args, na, w); + } + return args; +} + +static PyObject * +do_call(PyObject *func, PyObject ***pp_stack, int na, int nk) +{ + PyObject *callargs = NULL; + PyObject *kwdict = NULL; + PyObject *result = NULL; + + if (nk > 0) { + kwdict = update_keyword_args(NULL, nk, pp_stack, func); + if (kwdict == NULL) + goto call_fail; + } + callargs = load_args(pp_stack, na); + if (callargs == NULL) + goto call_fail; +#ifdef CALL_PROFILE + /* At this point, we have to look at the type of func to + update the call stats properly. Do it here so as to avoid + exposing the call stats machinery outside ceval.c + */ + if (PyFunction_Check(func)) + PCALL(PCALL_FUNCTION); + else if (PyMethod_Check(func)) + PCALL(PCALL_METHOD); + else if (PyType_Check(func)) + PCALL(PCALL_TYPE); + else if (PyCFunction_Check(func)) + PCALL(PCALL_CFUNCTION); + else + PCALL(PCALL_OTHER); +#endif + if (PyCFunction_Check(func)) { + PyThreadState *tstate = PyThreadState_GET(); + C_TRACE(result, PyCFunction_Call(func, callargs, kwdict)); + } + else + result = PyObject_Call(func, callargs, kwdict); +call_fail: + Py_XDECREF(callargs); + Py_XDECREF(kwdict); + return result; +} + +static PyObject * +ext_do_call(PyObject *func, PyObject ***pp_stack, int flags, int na, int nk) +{ + int nstar = 0; + PyObject *callargs = NULL; + PyObject *stararg = NULL; + PyObject *kwdict = NULL; + PyObject *result = NULL; + + if (flags & CALL_FLAG_KW) { + kwdict = EXT_POP(*pp_stack); + if (!PyDict_Check(kwdict)) { + PyObject *d; + d = PyDict_New(); + if (d == NULL) + goto ext_call_fail; + if (PyDict_Update(d, kwdict) != 0) { + Py_DECREF(d); + /* PyDict_Update raises attribute + * error (percolated from an attempt + * to get 'keys' attribute) instead of + * a type error if its second argument + * is not a mapping. + */ + if (PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Format(PyExc_TypeError, + "%.200s%.200s argument after ** " + "must be a mapping, not %.200s", + PyEval_GetFuncName(func), + PyEval_GetFuncDesc(func), + kwdict->ob_type->tp_name); + } + goto ext_call_fail; + } + Py_DECREF(kwdict); + kwdict = d; + } + } + if (flags & CALL_FLAG_VAR) { + stararg = EXT_POP(*pp_stack); + if (!PyTuple_Check(stararg)) { + PyObject *t = NULL; + t = PySequence_Tuple(stararg); + if (t == NULL) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Format(PyExc_TypeError, + "%.200s%.200s argument after * " + "must be a sequence, not %.200s", + PyEval_GetFuncName(func), + PyEval_GetFuncDesc(func), + stararg->ob_type->tp_name); + } + goto ext_call_fail; + } + Py_DECREF(stararg); + stararg = t; + } + nstar = PyTuple_GET_SIZE(stararg); + } + if (nk > 0) { + kwdict = update_keyword_args(kwdict, nk, pp_stack, func); + if (kwdict == NULL) + goto ext_call_fail; + } + callargs = update_star_args(na, nstar, stararg, pp_stack); + if (callargs == NULL) + goto ext_call_fail; +#ifdef CALL_PROFILE + /* At this point, we have to look at the type of func to + update the call stats properly. Do it here so as to avoid + exposing the call stats machinery outside ceval.c + */ + if (PyFunction_Check(func)) + PCALL(PCALL_FUNCTION); + else if (PyMethod_Check(func)) + PCALL(PCALL_METHOD); + else if (PyType_Check(func)) + PCALL(PCALL_TYPE); + else if (PyCFunction_Check(func)) + PCALL(PCALL_CFUNCTION); + else + PCALL(PCALL_OTHER); +#endif + if (PyCFunction_Check(func)) { + PyThreadState *tstate = PyThreadState_GET(); + C_TRACE(result, PyCFunction_Call(func, callargs, kwdict)); + } + else + result = PyObject_Call(func, callargs, kwdict); +ext_call_fail: + Py_XDECREF(callargs); + Py_XDECREF(kwdict); + Py_XDECREF(stararg); + return result; +} + +/* Extract a slice index from a PyInt or PyLong or an object with the + nb_index slot defined, and store in *pi. + Silently reduce values larger than PY_SSIZE_T_MAX to PY_SSIZE_T_MAX, + and silently boost values less than -PY_SSIZE_T_MAX-1 to -PY_SSIZE_T_MAX-1. + Return 0 on error, 1 on success. +*/ +/* Note: If v is NULL, return success without storing into *pi. This + is because_PyEval_SliceIndex() is called by apply_slice(), which can be + called by the SLICE opcode with v and/or w equal to NULL. +*/ +int +_PyEval_SliceIndex(PyObject *v, Py_ssize_t *pi) +{ + if (v != NULL) { + Py_ssize_t x; + if (PyIndex_Check(v)) { + x = PyNumber_AsSsize_t(v, NULL); + if (x == -1 && PyErr_Occurred()) + return 0; + } + else { + PyErr_SetString(PyExc_TypeError, + "slice indices must be integers or " + "None or have an __index__ method"); + return 0; + } + *pi = x; + } + return 1; +} + +#define CANNOT_CATCH_MSG "catching classes that do not inherit from "\ + "BaseException is not allowed" + +static PyObject * +cmp_outcome(int op, register PyObject *v, register PyObject *w) +{ + int res = 0; + switch (op) { + case PyCmp_IS: + res = (v == w); + break; + case PyCmp_IS_NOT: + res = (v != w); + break; + case PyCmp_IN: + res = PySequence_Contains(w, v); + if (res < 0) + return NULL; + break; + case PyCmp_NOT_IN: + res = PySequence_Contains(w, v); + if (res < 0) + return NULL; + res = !res; + break; + case PyCmp_EXC_MATCH: + if (PyTuple_Check(w)) { + Py_ssize_t i, length; + length = PyTuple_Size(w); + for (i = 0; i < length; i += 1) { + PyObject *exc = PyTuple_GET_ITEM(w, i); + if (!PyExceptionClass_Check(exc)) { + PyErr_SetString(PyExc_TypeError, + CANNOT_CATCH_MSG); + return NULL; + } + } + } + else { + if (!PyExceptionClass_Check(w)) { + PyErr_SetString(PyExc_TypeError, + CANNOT_CATCH_MSG); + return NULL; + } + } + res = PyErr_GivenExceptionMatches(v, w); + break; + default: + return PyObject_RichCompare(v, w, op); + } + v = res ? Py_True : Py_False; + Py_INCREF(v); + return v; +} + +static PyObject * +import_from(PyObject *v, PyObject *name) +{ + PyObject *x; + + x = PyObject_GetAttr(v, name); + if (x == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Format(PyExc_ImportError, "cannot import name %S", name); + } + return x; +} + +static int +import_all_from(PyObject *locals, PyObject *v) +{ + _Py_IDENTIFIER(__all__); + _Py_IDENTIFIER(__dict__); + PyObject *all = _PyObject_GetAttrId(v, &PyId___all__); + PyObject *dict, *name, *value; + int skip_leading_underscores = 0; + int pos, err; + + if (all == NULL) { + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) + return -1; /* Unexpected error */ + PyErr_Clear(); + dict = _PyObject_GetAttrId(v, &PyId___dict__); + if (dict == NULL) { + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) + return -1; + PyErr_SetString(PyExc_ImportError, + "from-import-* object has no __dict__ and no __all__"); + return -1; + } + all = PyMapping_Keys(dict); + Py_DECREF(dict); + if (all == NULL) + return -1; + skip_leading_underscores = 1; + } + + for (pos = 0, err = 0; ; pos++) { + name = PySequence_GetItem(all, pos); + if (name == NULL) { + if (!PyErr_ExceptionMatches(PyExc_IndexError)) + err = -1; + else + PyErr_Clear(); + break; + } + if (skip_leading_underscores && + PyUnicode_Check(name) && + PyUnicode_READY(name) != -1 && + PyUnicode_READ_CHAR(name, 0) == '_') + { + Py_DECREF(name); + continue; + } + value = PyObject_GetAttr(v, name); + if (value == NULL) + err = -1; + else if (PyDict_CheckExact(locals)) + err = PyDict_SetItem(locals, name, value); + else + err = PyObject_SetItem(locals, name, value); + Py_DECREF(name); + Py_XDECREF(value); + if (err != 0) + break; + } + Py_DECREF(all); + return err; +} + +static void +format_exc_check_arg(PyObject *exc, const char *format_str, PyObject *obj) +{ + const char *obj_str; + + if (!obj) + return; + + obj_str = _PyUnicode_AsString(obj); + if (!obj_str) + return; + + PyErr_Format(exc, format_str, obj_str); +} + +static void +format_exc_unbound(PyCodeObject *co, int oparg) +{ + PyObject *name; + /* Don't stomp existing exception */ + if (PyErr_Occurred()) + return; + if (oparg < PyTuple_GET_SIZE(co->co_cellvars)) { + name = PyTuple_GET_ITEM(co->co_cellvars, + oparg); + format_exc_check_arg( + PyExc_UnboundLocalError, + UNBOUNDLOCAL_ERROR_MSG, + name); + } else { + name = PyTuple_GET_ITEM(co->co_freevars, oparg - + PyTuple_GET_SIZE(co->co_cellvars)); + format_exc_check_arg(PyExc_NameError, + UNBOUNDFREE_ERROR_MSG, name); + } +} + +static PyObject * +unicode_concatenate(PyObject *v, PyObject *w, + PyFrameObject *f, unsigned char *next_instr) +{ + PyObject *res; + if (Py_REFCNT(v) == 2) { + /* In the common case, there are 2 references to the value + * stored in 'variable' when the += is performed: one on the + * value stack (in 'v') and one still stored in the + * 'variable'. We try to delete the variable now to reduce + * the refcnt to 1. + */ + switch (*next_instr) { + case STORE_FAST: + { + int oparg = PEEKARG(); + PyObject **fastlocals = f->f_localsplus; + if (GETLOCAL(oparg) == v) + SETLOCAL(oparg, NULL); + break; + } + case STORE_DEREF: + { + PyObject **freevars = (f->f_localsplus + + f->f_code->co_nlocals); + PyObject *c = freevars[PEEKARG()]; + if (PyCell_GET(c) == v) + PyCell_Set(c, NULL); + break; + } + case STORE_NAME: + { + PyObject *names = f->f_code->co_names; + PyObject *name = GETITEM(names, PEEKARG()); + PyObject *locals = f->f_locals; + if (PyDict_CheckExact(locals) && + PyDict_GetItem(locals, name) == v) { + if (PyDict_DelItem(locals, name) != 0) { + PyErr_Clear(); + } + } + break; + } + } + } + res = v; + PyUnicode_Append(&res, w); + return res; +} + + diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Python/errors.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Python/errors.c Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,906 @@ + +/* Error handling */ + +#include "Python.h" + +#ifndef __STDC__ +#ifndef MS_WINDOWS +extern char *strerror(int); +#endif +#endif + +#ifdef MS_WINDOWS +#include +#include +#endif + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +void +PyErr_Restore(PyObject *type, PyObject *value, PyObject *traceback) +{ + PyThreadState *tstate = PyThreadState_GET(); + PyObject *oldtype, *oldvalue, *oldtraceback; + + if (traceback != NULL && !PyTraceBack_Check(traceback)) { + /* XXX Should never happen -- fatal error instead? */ + /* Well, it could be None. */ + Py_DECREF(traceback); + traceback = NULL; + } + + /* Save these in locals to safeguard against recursive + invocation through Py_XDECREF */ + oldtype = tstate->curexc_type; + oldvalue = tstate->curexc_value; + oldtraceback = tstate->curexc_traceback; + + tstate->curexc_type = type; + tstate->curexc_value = value; + tstate->curexc_traceback = traceback; + + Py_XDECREF(oldtype); + Py_XDECREF(oldvalue); + Py_XDECREF(oldtraceback); +} + +void +PyErr_SetObject(PyObject *exception, PyObject *value) +{ + PyThreadState *tstate = PyThreadState_GET(); + PyObject *exc_value; + PyObject *tb = NULL; + + if (exception != NULL && + !PyExceptionClass_Check(exception)) { + PyErr_Format(PyExc_SystemError, + "exception %R not a BaseException subclass", + exception); + return; + } + Py_XINCREF(value); + exc_value = tstate->exc_value; + if (exc_value != NULL && exc_value != Py_None) { + /* Implicit exception chaining */ + Py_INCREF(exc_value); + if (value == NULL || !PyExceptionInstance_Check(value)) { + /* We must normalize the value right now */ + PyObject *args, *fixed_value; + if (value == NULL || value == Py_None) + args = PyTuple_New(0); + else if (PyTuple_Check(value)) { + Py_INCREF(value); + args = value; + } + else + args = PyTuple_Pack(1, value); + fixed_value = args ? + PyEval_CallObject(exception, args) : NULL; + Py_XDECREF(args); + Py_XDECREF(value); + if (fixed_value == NULL) + return; + value = fixed_value; + } + /* Avoid reference cycles through the context chain. + This is O(chain length) but context chains are + usually very short. Sensitive readers may try + to inline the call to PyException_GetContext. */ + if (exc_value != value) { + PyObject *o = exc_value, *context; + while ((context = PyException_GetContext(o))) { + Py_DECREF(context); + if (context == value) { + PyException_SetContext(o, NULL); + break; + } + o = context; + } + PyException_SetContext(value, exc_value); + } else { + Py_DECREF(exc_value); + } + } + if (value != NULL && PyExceptionInstance_Check(value)) + tb = PyException_GetTraceback(value); + Py_XINCREF(exception); + PyErr_Restore(exception, value, tb); +} + +void +PyErr_SetNone(PyObject *exception) +{ + PyErr_SetObject(exception, (PyObject *)NULL); +} + +void +PyErr_SetString(PyObject *exception, const char *string) +{ + PyObject *value = PyUnicode_FromString(string); + PyErr_SetObject(exception, value); + Py_XDECREF(value); +} + + +PyObject * +PyErr_Occurred(void) +{ + /* If there is no thread state, PyThreadState_GET calls + Py_FatalError, which calls PyErr_Occurred. To avoid the + resulting infinite loop, we inline PyThreadState_GET here and + treat no thread as no error. */ + PyThreadState *tstate = + ((PyThreadState*)_Py_atomic_load_relaxed(&_PyThreadState_Current)); + + return tstate == NULL ? NULL : tstate->curexc_type; +} + + +int +PyErr_GivenExceptionMatches(PyObject *err, PyObject *exc) +{ + if (err == NULL || exc == NULL) { + /* maybe caused by "import exceptions" that failed early on */ + return 0; + } + if (PyTuple_Check(exc)) { + Py_ssize_t i, n; + n = PyTuple_Size(exc); + for (i = 0; i < n; i++) { + /* Test recursively */ + if (PyErr_GivenExceptionMatches( + err, PyTuple_GET_ITEM(exc, i))) + { + return 1; + } + } + return 0; + } + /* err might be an instance, so check its class. */ + if (PyExceptionInstance_Check(err)) + err = PyExceptionInstance_Class(err); + + if (PyExceptionClass_Check(err) && PyExceptionClass_Check(exc)) { + int res = 0; + PyObject *exception, *value, *tb; + PyErr_Fetch(&exception, &value, &tb); + /* PyObject_IsSubclass() can recurse and therefore is + not safe (see test_bad_getattr in test.pickletester). */ + res = PyType_IsSubtype((PyTypeObject *)err, (PyTypeObject *)exc); + /* This function must not fail, so print the error here */ + if (res == -1) { + PyErr_WriteUnraisable(err); + res = 0; + } + PyErr_Restore(exception, value, tb); + return res; + } + + return err == exc; +} + + +int +PyErr_ExceptionMatches(PyObject *exc) +{ + return PyErr_GivenExceptionMatches(PyErr_Occurred(), exc); +} + + +/* Used in many places to normalize a raised exception, including in + eval_code2(), do_raise(), and PyErr_Print() + + XXX: should PyErr_NormalizeException() also call + PyException_SetTraceback() with the resulting value and tb? +*/ +void +PyErr_NormalizeException(PyObject **exc, PyObject **val, PyObject **tb) +{ + PyObject *type = *exc; + PyObject *value = *val; + PyObject *inclass = NULL; + PyObject *initial_tb = NULL; + PyThreadState *tstate = NULL; + + if (type == NULL) { + /* There was no exception, so nothing to do. */ + return; + } + + /* If PyErr_SetNone() was used, the value will have been actually + set to NULL. + */ + if (!value) { + value = Py_None; + Py_INCREF(value); + } + + if (PyExceptionInstance_Check(value)) + inclass = PyExceptionInstance_Class(value); + + /* Normalize the exception so that if the type is a class, the + value will be an instance. + */ + if (PyExceptionClass_Check(type)) { + /* if the value was not an instance, or is not an instance + whose class is (or is derived from) type, then use the + value as an argument to instantiation of the type + class. + */ + if (!inclass || !PyObject_IsSubclass(inclass, type)) { + PyObject *args, *res; + + if (value == Py_None) + args = PyTuple_New(0); + else if (PyTuple_Check(value)) { + Py_INCREF(value); + args = value; + } + else + args = PyTuple_Pack(1, value); + + if (args == NULL) + goto finally; + res = PyEval_CallObject(type, args); + Py_DECREF(args); + if (res == NULL) + goto finally; + Py_DECREF(value); + value = res; + } + /* if the class of the instance doesn't exactly match the + class of the type, believe the instance + */ + else if (inclass != type) { + Py_DECREF(type); + type = inclass; + Py_INCREF(type); + } + } + *exc = type; + *val = value; + return; +finally: + Py_DECREF(type); + Py_DECREF(value); + /* If the new exception doesn't set a traceback and the old + exception had a traceback, use the old traceback for the + new exception. It's better than nothing. + */ + initial_tb = *tb; + PyErr_Fetch(exc, val, tb); + if (initial_tb != NULL) { + if (*tb == NULL) + *tb = initial_tb; + else + Py_DECREF(initial_tb); + } + /* normalize recursively */ + tstate = PyThreadState_GET(); + if (++tstate->recursion_depth > Py_GetRecursionLimit()) { + --tstate->recursion_depth; + /* throw away the old exception... */ + Py_DECREF(*exc); + Py_DECREF(*val); + /* ... and use the recursion error instead */ + *exc = PyExc_RuntimeError; + *val = PyExc_RecursionErrorInst; + Py_INCREF(*exc); + Py_INCREF(*val); + /* just keeping the old traceback */ + return; + } + PyErr_NormalizeException(exc, val, tb); + --tstate->recursion_depth; +} + + +void +PyErr_Fetch(PyObject **p_type, PyObject **p_value, PyObject **p_traceback) +{ + PyThreadState *tstate = PyThreadState_GET(); + + *p_type = tstate->curexc_type; + *p_value = tstate->curexc_value; + *p_traceback = tstate->curexc_traceback; + + tstate->curexc_type = NULL; + tstate->curexc_value = NULL; + tstate->curexc_traceback = NULL; +} + +void +PyErr_Clear(void) +{ + PyErr_Restore(NULL, NULL, NULL); +} + +/* Convenience functions to set a type error exception and return 0 */ + +int +PyErr_BadArgument(void) +{ + PyErr_SetString(PyExc_TypeError, + "bad argument type for built-in operation"); + return 0; +} + +PyObject * +PyErr_NoMemory(void) +{ + PyErr_SetNone(PyExc_MemoryError); + return NULL; +} + +PyObject * +PyErr_SetFromErrnoWithFilenameObject(PyObject *exc, PyObject *filenameObject) +{ + PyObject *message; + PyObject *v, *args; + int i = errno; +#ifdef MS_WINDOWS + WCHAR *s_buf = NULL; +#endif /* Unix/Windows */ + +#ifdef EINTR + if (i == EINTR && PyErr_CheckSignals()) + return NULL; +#endif + +#ifndef MS_WINDOWS + if (i != 0) { + char *s = strerror(i); + message = PyUnicode_DecodeLocale(s, "surrogateescape"); + } + else { + /* Sometimes errno didn't get set */ + message = PyUnicode_FromString("Error"); + } +#else + if (i == 0) + message = PyUnicode_FromString("Error"); /* Sometimes errno didn't get set */ + else + { + /* Note that the Win32 errors do not lineup with the + errno error. So if the error is in the MSVC error + table, we use it, otherwise we assume it really _is_ + a Win32 error code + */ + if (i > 0 && i < _sys_nerr) { + message = PyUnicode_FromString(_sys_errlist[i]); + } + else { + int len = FormatMessageW( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, /* no message source */ + i, + MAKELANGID(LANG_NEUTRAL, + SUBLANG_DEFAULT), + /* Default language */ + (LPWSTR) &s_buf, + 0, /* size not used */ + NULL); /* no args */ + if (len==0) { + /* Only ever seen this in out-of-mem + situations */ + s_buf = NULL; + message = PyUnicode_FromFormat("Windows Error 0x%X", i); + } else { + /* remove trailing cr/lf and dots */ + while (len > 0 && (s_buf[len-1] <= L' ' || s_buf[len-1] == L'.')) + s_buf[--len] = L'\0'; + message = PyUnicode_FromWideChar(s_buf, len); + } + } + } +#endif /* Unix/Windows */ + + if (message == NULL) + { +#ifdef MS_WINDOWS + LocalFree(s_buf); +#endif + return NULL; + } + + if (filenameObject != NULL) + args = Py_BuildValue("(iOO)", i, message, filenameObject); + else + args = Py_BuildValue("(iO)", i, message); + Py_DECREF(message); + + if (args != NULL) { + v = PyObject_Call(exc, args, NULL); + Py_DECREF(args); + if (v != NULL) { + PyErr_SetObject((PyObject *) Py_TYPE(v), v); + Py_DECREF(v); + } + } +#ifdef MS_WINDOWS + LocalFree(s_buf); +#endif + return NULL; +} + + +PyObject * +PyErr_SetFromErrnoWithFilename(PyObject *exc, const char *filename) +{ + PyObject *name = filename ? PyUnicode_DecodeFSDefault(filename) : NULL; + PyObject *result = PyErr_SetFromErrnoWithFilenameObject(exc, name); + Py_XDECREF(name); + return result; +} + +#ifdef MS_WINDOWS +PyObject * +PyErr_SetFromErrnoWithUnicodeFilename(PyObject *exc, const Py_UNICODE *filename) +{ + PyObject *name = filename ? + PyUnicode_FromUnicode(filename, wcslen(filename)) : + NULL; + PyObject *result = PyErr_SetFromErrnoWithFilenameObject(exc, name); + Py_XDECREF(name); + return result; +} +#endif /* MS_WINDOWS */ + +PyObject * +PyErr_SetFromErrno(PyObject *exc) +{ + return PyErr_SetFromErrnoWithFilenameObject(exc, NULL); +} + +#ifdef MS_WINDOWS +/* Windows specific error code handling */ +PyObject *PyErr_SetExcFromWindowsErrWithFilenameObject( + PyObject *exc, + int ierr, + PyObject *filenameObject) +{ + int len; + WCHAR *s_buf = NULL; /* Free via LocalFree */ + PyObject *message; + PyObject *args, *v; + DWORD err = (DWORD)ierr; + if (err==0) err = GetLastError(); + len = FormatMessageW( + /* Error API error */ + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, /* no message source */ + err, + MAKELANGID(LANG_NEUTRAL, + SUBLANG_DEFAULT), /* Default language */ + (LPWSTR) &s_buf, + 0, /* size not used */ + NULL); /* no args */ + if (len==0) { + /* Only seen this in out of mem situations */ + message = PyUnicode_FromFormat("Windows Error 0x%X", err); + s_buf = NULL; + } else { + /* remove trailing cr/lf and dots */ + while (len > 0 && (s_buf[len-1] <= L' ' || s_buf[len-1] == L'.')) + s_buf[--len] = L'\0'; + message = PyUnicode_FromWideChar(s_buf, len); + } + + if (message == NULL) + { + LocalFree(s_buf); + return NULL; + } + + if (filenameObject == NULL) + filenameObject = Py_None; + /* This is the constructor signature for passing a Windows error code. + The POSIX translation will be figured out by the constructor. */ + args = Py_BuildValue("(iOOi)", 0, message, filenameObject, err); + Py_DECREF(message); + + if (args != NULL) { + v = PyObject_Call(exc, args, NULL); + Py_DECREF(args); + if (v != NULL) { + PyErr_SetObject((PyObject *) Py_TYPE(v), v); + Py_DECREF(v); + } + } + LocalFree(s_buf); + return NULL; +} + +PyObject *PyErr_SetExcFromWindowsErrWithFilename( + PyObject *exc, + int ierr, + const char *filename) +{ + PyObject *name = filename ? PyUnicode_DecodeFSDefault(filename) : NULL; + PyObject *ret = PyErr_SetExcFromWindowsErrWithFilenameObject(exc, + ierr, + name); + Py_XDECREF(name); + return ret; +} + +PyObject *PyErr_SetExcFromWindowsErrWithUnicodeFilename( + PyObject *exc, + int ierr, + const Py_UNICODE *filename) +{ + PyObject *name = filename ? + PyUnicode_FromUnicode(filename, wcslen(filename)) : + NULL; + PyObject *ret = PyErr_SetExcFromWindowsErrWithFilenameObject(exc, + ierr, + name); + Py_XDECREF(name); + return ret; +} + +PyObject *PyErr_SetExcFromWindowsErr(PyObject *exc, int ierr) +{ + return PyErr_SetExcFromWindowsErrWithFilename(exc, ierr, NULL); +} + +PyObject *PyErr_SetFromWindowsErr(int ierr) +{ + return PyErr_SetExcFromWindowsErrWithFilename(PyExc_WindowsError, + ierr, NULL); +} +PyObject *PyErr_SetFromWindowsErrWithFilename( + int ierr, + const char *filename) +{ + PyObject *name = filename ? PyUnicode_DecodeFSDefault(filename) : NULL; + PyObject *result = PyErr_SetExcFromWindowsErrWithFilenameObject( + PyExc_WindowsError, + ierr, name); + Py_XDECREF(name); + return result; +} + +PyObject *PyErr_SetFromWindowsErrWithUnicodeFilename( + int ierr, + const Py_UNICODE *filename) +{ + PyObject *name = filename ? + PyUnicode_FromUnicode(filename, wcslen(filename)) : + NULL; + PyObject *result = PyErr_SetExcFromWindowsErrWithFilenameObject( + PyExc_WindowsError, + ierr, name); + Py_XDECREF(name); + return result; +} +#endif /* MS_WINDOWS */ + +void +_PyErr_BadInternalCall(const char *filename, int lineno) +{ + PyErr_Format(PyExc_SystemError, + "%s:%d: bad argument to internal function", + filename, lineno); +} + +/* Remove the preprocessor macro for PyErr_BadInternalCall() so that we can + export the entry point for existing object code: */ +#undef PyErr_BadInternalCall +void +PyErr_BadInternalCall(void) +{ + PyErr_Format(PyExc_SystemError, + "bad argument to internal function"); +} +#define PyErr_BadInternalCall() _PyErr_BadInternalCall(__FILE__, __LINE__) + + + +PyObject * +PyErr_Format(PyObject *exception, const char *format, ...) +{ + va_list vargs; + PyObject* string; + +#ifdef HAVE_STDARG_PROTOTYPES + va_start(vargs, format); +#else + va_start(vargs); +#endif + + string = PyUnicode_FromFormatV(format, vargs); + PyErr_SetObject(exception, string); + Py_XDECREF(string); + va_end(vargs); + return NULL; +} + + + +PyObject * +PyErr_NewException(const char *name, PyObject *base, PyObject *dict) +{ + const char *dot; + PyObject *modulename = NULL; + PyObject *classname = NULL; + PyObject *mydict = NULL; + PyObject *bases = NULL; + PyObject *result = NULL; + dot = strrchr(name, '.'); + if (dot == NULL) { + PyErr_SetString(PyExc_SystemError, + "PyErr_NewException: name must be module.class"); + return NULL; + } + if (base == NULL) + base = PyExc_Exception; + if (dict == NULL) { + dict = mydict = PyDict_New(); + if (dict == NULL) + goto failure; + } + if (PyDict_GetItemString(dict, "__module__") == NULL) { + modulename = PyUnicode_FromStringAndSize(name, + (Py_ssize_t)(dot-name)); + if (modulename == NULL) + goto failure; + if (PyDict_SetItemString(dict, "__module__", modulename) != 0) + goto failure; + } + if (PyTuple_Check(base)) { + bases = base; + /* INCREF as we create a new ref in the else branch */ + Py_INCREF(bases); + } else { + bases = PyTuple_Pack(1, base); + if (bases == NULL) + goto failure; + } + /* Create a real class. */ + result = PyObject_CallFunction((PyObject *)&PyType_Type, "sOO", + dot+1, bases, dict); + failure: + Py_XDECREF(bases); + Py_XDECREF(mydict); + Py_XDECREF(classname); + Py_XDECREF(modulename); + return result; +} + + +/* Create an exception with docstring */ +PyObject * +PyErr_NewExceptionWithDoc(const char *name, const char *doc, + PyObject *base, PyObject *dict) +{ + int result; + PyObject *ret = NULL; + PyObject *mydict = NULL; /* points to the dict only if we create it */ + PyObject *docobj; + + if (dict == NULL) { + dict = mydict = PyDict_New(); + if (dict == NULL) { + return NULL; + } + } + + if (doc != NULL) { + docobj = PyUnicode_FromString(doc); + if (docobj == NULL) + goto failure; + result = PyDict_SetItemString(dict, "__doc__", docobj); + Py_DECREF(docobj); + if (result < 0) + goto failure; + } + + ret = PyErr_NewException(name, base, dict); + failure: + Py_XDECREF(mydict); + return ret; +} + + +/* Call when an exception has occurred but there is no way for Python + to handle it. Examples: exception in __del__ or during GC. */ +void +PyErr_WriteUnraisable(PyObject *obj) +{ + _Py_IDENTIFIER(__module__); + PyObject *f, *t, *v, *tb; + PyErr_Fetch(&t, &v, &tb); + f = PySys_GetObject("stderr"); + if (f != NULL && f != Py_None) { + PyFile_WriteString("Exception ", f); + if (t) { + PyObject* moduleName; + char* className; + assert(PyExceptionClass_Check(t)); + className = PyExceptionClass_Name(t); + if (className != NULL) { + char *dot = strrchr(className, '.'); + if (dot != NULL) + className = dot+1; + } + + moduleName = _PyObject_GetAttrId(t, &PyId___module__); + if (moduleName == NULL) + PyFile_WriteString("", f); + else { + char* modstr = _PyUnicode_AsString(moduleName); + if (modstr && + strcmp(modstr, "builtins") != 0) + { + PyFile_WriteString(modstr, f); + PyFile_WriteString(".", f); + } + } + if (className == NULL) + PyFile_WriteString("", f); + else + PyFile_WriteString(className, f); + if (v && v != Py_None) { + PyFile_WriteString(": ", f); + PyFile_WriteObject(v, f, 0); + } + Py_XDECREF(moduleName); + } + if (obj) { + PyFile_WriteString(" in ", f); + PyFile_WriteObject(obj, f, 0); + } + PyFile_WriteString(" ignored\n", f); + PyErr_Clear(); /* Just in case */ + } + Py_XDECREF(t); + Py_XDECREF(v); + Py_XDECREF(tb); +} + +extern PyObject *PyModule_GetWarningsModule(void); + + +void +PyErr_SyntaxLocation(const char *filename, int lineno) { + PyErr_SyntaxLocationEx(filename, lineno, -1); +} + + +/* Set file and line information for the current exception. + If the exception is not a SyntaxError, also sets additional attributes + to make printing of exceptions believe it is a syntax error. */ + +void +PyErr_SyntaxLocationEx(const char *filename, int lineno, int col_offset) +{ + PyObject *exc, *v, *tb, *tmp; + _Py_IDENTIFIER(filename); + _Py_IDENTIFIER(lineno); + _Py_IDENTIFIER(msg); + _Py_IDENTIFIER(offset); + _Py_IDENTIFIER(print_file_and_line); + _Py_IDENTIFIER(text); + + /* add attributes for the line number and filename for the error */ + PyErr_Fetch(&exc, &v, &tb); + PyErr_NormalizeException(&exc, &v, &tb); + /* XXX check that it is, indeed, a syntax error. It might not + * be, though. */ + tmp = PyLong_FromLong(lineno); + if (tmp == NULL) + PyErr_Clear(); + else { + if (_PyObject_SetAttrId(v, &PyId_lineno, tmp)) + PyErr_Clear(); + Py_DECREF(tmp); + } + if (col_offset >= 0) { + tmp = PyLong_FromLong(col_offset); + if (tmp == NULL) + PyErr_Clear(); + else { + if (_PyObject_SetAttrId(v, &PyId_offset, tmp)) + PyErr_Clear(); + Py_DECREF(tmp); + } + } + if (filename != NULL) { + tmp = PyUnicode_DecodeFSDefault(filename); + if (tmp == NULL) + PyErr_Clear(); + else { + if (_PyObject_SetAttrId(v, &PyId_filename, tmp)) + PyErr_Clear(); + Py_DECREF(tmp); + } + + tmp = PyErr_ProgramText(filename, lineno); + if (tmp) { + if (_PyObject_SetAttrId(v, &PyId_text, tmp)) + PyErr_Clear(); + Py_DECREF(tmp); + } + } + if (_PyObject_SetAttrId(v, &PyId_offset, Py_None)) { + PyErr_Clear(); + } + if (exc != PyExc_SyntaxError) { + if (!_PyObject_HasAttrId(v, &PyId_msg)) { + tmp = PyObject_Str(v); + if (tmp) { + if (_PyObject_SetAttrId(v, &PyId_msg, tmp)) + PyErr_Clear(); + Py_DECREF(tmp); + } else { + PyErr_Clear(); + } + } + if (!_PyObject_HasAttrId(v, &PyId_print_file_and_line)) { + if (_PyObject_SetAttrId(v, &PyId_print_file_and_line, + Py_None)) + PyErr_Clear(); + } + } + PyErr_Restore(exc, v, tb); +} + +/* Attempt to load the line of text that the exception refers to. If it + fails, it will return NULL but will not set an exception. + + XXX The functionality of this function is quite similar to the + functionality in tb_displayline() in traceback.c. */ + +PyObject * +PyErr_ProgramText(const char *filename, int lineno) +{ + FILE *fp; + int i; + char linebuf[1000]; + + if (filename == NULL || *filename == '\0' || lineno <= 0) + return NULL; + fp = fopen(filename, "r" PY_STDIOTEXTMODE); + if (fp == NULL) + return NULL; + for (i = 0; i < lineno; i++) { + char *pLastChar = &linebuf[sizeof(linebuf) - 2]; + do { + *pLastChar = '\0'; + if (Py_UniversalNewlineFgets(linebuf, sizeof linebuf, + fp, NULL) == NULL) + break; + /* fgets read *something*; if it didn't get as + far as pLastChar, it must have found a newline + or hit the end of the file; if pLastChar is \n, + it obviously found a newline; else we haven't + yet seen a newline, so must continue */ + } while (*pLastChar != '\0' && *pLastChar != '\n'); + } + fclose(fp); + if (i == lineno) { + char *p = linebuf; + PyObject *res; + while (*p == ' ' || *p == '\t' || *p == '\014') + p++; + res = PyUnicode_FromString(p); + if (res == NULL) + PyErr_Clear(); + return res; + } + return NULL; +} + +#ifdef __cplusplus +} +#endif diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Python/import.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Python/import.c Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,4190 @@ + +/* Module definition and import implementation */ + +#include "Python.h" + +#include "Python-ast.h" +#undef Yield /* undefine macro conflicting with winbase.h */ +#include "errcode.h" +#include "marshal.h" +#include "code.h" +#include "osdefs.h" +#include "importdl.h" + +#ifdef HAVE_FCNTL_H +#include +#endif + + +/* Magic word to reject .pyc files generated by other Python versions. + It should change for each incompatible change to the bytecode. + + The value of CR and LF is incorporated so if you ever read or write + a .pyc file in text mode the magic number will be wrong; also, the + Apple MPW compiler swaps their values, botching string constants. + + The magic numbers must be spaced apart at least 2 values, as the + -U interpeter flag will cause MAGIC+1 being used. They have been + odd numbers for some time now. + + There were a variety of old schemes for setting the magic number. + The current working scheme is to increment the previous value by + 10. + + Starting with the adoption of PEP 3147 in Python 3.2, every bump in magic + number also includes a new "magic tag", i.e. a human readable string used + to represent the magic number in __pycache__ directories. When you change + the magic number, you must also set a new unique magic tag. Generally this + can be named after the Python major version of the magic number bump, but + it can really be anything, as long as it's different than anything else + that's come before. The tags are included in the following table, starting + with Python 3.2a0. + + Known values: + Python 1.5: 20121 + Python 1.5.1: 20121 + Python 1.5.2: 20121 + Python 1.6: 50428 + Python 2.0: 50823 + Python 2.0.1: 50823 + Python 2.1: 60202 + Python 2.1.1: 60202 + Python 2.1.2: 60202 + Python 2.2: 60717 + Python 2.3a0: 62011 + Python 2.3a0: 62021 + Python 2.3a0: 62011 (!) + Python 2.4a0: 62041 + Python 2.4a3: 62051 + Python 2.4b1: 62061 + Python 2.5a0: 62071 + Python 2.5a0: 62081 (ast-branch) + Python 2.5a0: 62091 (with) + Python 2.5a0: 62092 (changed WITH_CLEANUP opcode) + Python 2.5b3: 62101 (fix wrong code: for x, in ...) + Python 2.5b3: 62111 (fix wrong code: x += yield) + Python 2.5c1: 62121 (fix wrong lnotab with for loops and + storing constants that should have been removed) + Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp) + Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode) + Python 2.6a1: 62161 (WITH_CLEANUP optimization) + Python 3000: 3000 + 3010 (removed UNARY_CONVERT) + 3020 (added BUILD_SET) + 3030 (added keyword-only parameters) + 3040 (added signature annotations) + 3050 (print becomes a function) + 3060 (PEP 3115 metaclass syntax) + 3061 (string literals become unicode) + 3071 (PEP 3109 raise changes) + 3081 (PEP 3137 make __file__ and __name__ unicode) + 3091 (kill str8 interning) + 3101 (merge from 2.6a0, see 62151) + 3103 (__file__ points to source file) + Python 3.0a4: 3111 (WITH_CLEANUP optimization). + Python 3.0a5: 3131 (lexical exception stacking, including POP_EXCEPT) + Python 3.1a0: 3141 (optimize list, set and dict comprehensions: + change LIST_APPEND and SET_ADD, add MAP_ADD) + Python 3.1a0: 3151 (optimize conditional branches: + introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE) + Python 3.2a0: 3160 (add SETUP_WITH) + tag: cpython-32 + Python 3.2a1: 3170 (add DUP_TOP_TWO, remove DUP_TOPX and ROT_FOUR) + tag: cpython-32 + Python 3.2a2 3180 (add DELETE_DEREF) + Python 3.3a0 3190 __class__ super closure changed + Python 3.3a0 3200 (__qualname__ added) +*/ + +/* MAGIC must change whenever the bytecode emitted by the compiler may no + longer be understood by older implementations of the eval loop (usually + due to the addition of new opcodes) + TAG must change for each major Python release. The magic number will take + care of any bytecode changes that occur during development. +*/ +#define QUOTE(arg) #arg +#define STRIFY(name) QUOTE(name) +#define MAJOR STRIFY(PY_MAJOR_VERSION) +#define MINOR STRIFY(PY_MINOR_VERSION) +#define MAGIC (3200 | ((long)'\r'<<16) | ((long)'\n'<<24)) +#define TAG "cpython-" MAJOR MINOR; +#define CACHEDIR "__pycache__" +/* Current magic word and string tag as globals. */ +static long pyc_magic = MAGIC; +static const char *pyc_tag = TAG; +#undef QUOTE +#undef STRIFY +#undef MAJOR +#undef MINOR + +/* See _PyImport_FixupExtensionObject() below */ +static PyObject *extensions = NULL; + +/* Function from Parser/tokenizer.c */ +extern char * PyTokenizer_FindEncodingFilename(int, PyObject *); + +/* This table is defined in config.c: */ +extern struct _inittab _PyImport_Inittab[]; + +struct _inittab *PyImport_Inittab = _PyImport_Inittab; + +/* these tables define the module suffixes that Python recognizes */ +struct filedescr * _PyImport_Filetab = NULL; + +static const struct filedescr _PyImport_StandardFiletab[] = { + {".py", "U", PY_SOURCE}, + {".pyc", "rb", PY_COMPILED}, + {0, 0} +}; + +static PyObject *initstr = NULL; +_Py_IDENTIFIER(__path__); + +/* Initialize things */ + +void +_PyImport_Init(void) +{ + const struct filedescr *scan; + struct filedescr *filetab; + int countD = 0; + int countS = 0; + + initstr = PyUnicode_InternFromString("__init__"); + if (initstr == NULL) + Py_FatalError("Can't initialize import variables"); + + /* prepare _PyImport_Filetab: copy entries from + _PyImport_DynLoadFiletab and _PyImport_StandardFiletab. + */ +#ifdef HAVE_DYNAMIC_LOADING + for (scan = _PyImport_DynLoadFiletab; scan->suffix != NULL; ++scan) + ++countD; +#endif + for (scan = _PyImport_StandardFiletab; scan->suffix != NULL; ++scan) + ++countS; + filetab = PyMem_NEW(struct filedescr, countD + countS + 1); + if (filetab == NULL) + Py_FatalError("Can't initialize import file table."); +#ifdef HAVE_DYNAMIC_LOADING + memcpy(filetab, _PyImport_DynLoadFiletab, + countD * sizeof(struct filedescr)); +#endif + memcpy(filetab + countD, _PyImport_StandardFiletab, + countS * sizeof(struct filedescr)); + filetab[countD + countS].suffix = NULL; + + _PyImport_Filetab = filetab; + + if (Py_OptimizeFlag) { + /* Replace ".pyc" with ".pyo" in _PyImport_Filetab */ + for (; filetab->suffix != NULL; filetab++) { + if (strcmp(filetab->suffix, ".pyc") == 0) + filetab->suffix = ".pyo"; + } + } +} + +void +_PyImportHooks_Init(void) +{ + PyObject *v, *path_hooks = NULL, *zimpimport; + int err = 0; + + /* adding sys.path_hooks and sys.path_importer_cache, setting up + zipimport */ + if (PyType_Ready(&PyNullImporter_Type) < 0) + goto error; + + if (Py_VerboseFlag) + PySys_WriteStderr("# installing zipimport hook\n"); + + v = PyList_New(0); + if (v == NULL) + goto error; + err = PySys_SetObject("meta_path", v); + Py_DECREF(v); + if (err) + goto error; + v = PyDict_New(); + if (v == NULL) + goto error; + err = PySys_SetObject("path_importer_cache", v); + Py_DECREF(v); + if (err) + goto error; + path_hooks = PyList_New(0); + if (path_hooks == NULL) + goto error; + err = PySys_SetObject("path_hooks", path_hooks); + if (err) { + error: + PyErr_Print(); + Py_FatalError("initializing sys.meta_path, sys.path_hooks, " + "path_importer_cache, or NullImporter failed" + ); + } + + zimpimport = PyImport_ImportModule("zipimport"); + if (zimpimport == NULL) { + PyErr_Clear(); /* No zip import module -- okay */ + if (Py_VerboseFlag) + PySys_WriteStderr("# can't import zipimport\n"); + } + else { + _Py_IDENTIFIER(zipimporter); + PyObject *zipimporter = _PyObject_GetAttrId(zimpimport, + &PyId_zipimporter); + Py_DECREF(zimpimport); + if (zipimporter == NULL) { + PyErr_Clear(); /* No zipimporter object -- okay */ + if (Py_VerboseFlag) + PySys_WriteStderr( + "# can't import zipimport.zipimporter\n"); + } + else { + /* sys.path_hooks.append(zipimporter) */ + err = PyList_Append(path_hooks, zipimporter); + Py_DECREF(zipimporter); + if (err) + goto error; + if (Py_VerboseFlag) + PySys_WriteStderr( + "# installed zipimport hook\n"); + } + } + Py_DECREF(path_hooks); +} + +/* Locking primitives to prevent parallel imports of the same module + in different threads to return with a partially loaded module. + These calls are serialized by the global interpreter lock. */ + +#ifdef WITH_THREAD + +#include "pythread.h" + +static PyThread_type_lock import_lock = 0; +static long import_lock_thread = -1; +static int import_lock_level = 0; + +void +_PyImport_AcquireLock(void) +{ + long me = PyThread_get_thread_ident(); + if (me == -1) + return; /* Too bad */ + if (import_lock == NULL) { + import_lock = PyThread_allocate_lock(); + if (import_lock == NULL) + return; /* Nothing much we can do. */ + } + if (import_lock_thread == me) { + import_lock_level++; + return; + } + if (import_lock_thread != -1 || !PyThread_acquire_lock(import_lock, 0)) + { + PyThreadState *tstate = PyEval_SaveThread(); + PyThread_acquire_lock(import_lock, 1); + PyEval_RestoreThread(tstate); + } + import_lock_thread = me; + import_lock_level = 1; +} + +int +_PyImport_ReleaseLock(void) +{ + long me = PyThread_get_thread_ident(); + if (me == -1 || import_lock == NULL) + return 0; /* Too bad */ + if (import_lock_thread != me) + return -1; + import_lock_level--; + if (import_lock_level == 0) { + import_lock_thread = -1; + PyThread_release_lock(import_lock); + } + return 1; +} + +/* This function is called from PyOS_AfterFork to ensure that newly + created child processes do not share locks with the parent. + We now acquire the import lock around fork() calls but on some platforms + (Solaris 9 and earlier? see isue7242) that still left us with problems. */ + +void +_PyImport_ReInitLock(void) +{ + if (import_lock != NULL) + import_lock = PyThread_allocate_lock(); + if (import_lock_level > 1) { + /* Forked as a side effect of import */ + long me = PyThread_get_thread_ident(); + PyThread_acquire_lock(import_lock, 0); + /* XXX: can the previous line fail? */ + import_lock_thread = me; + import_lock_level--; + } else { + import_lock_thread = -1; + import_lock_level = 0; + } +} + +#endif + +static PyObject * +imp_lock_held(PyObject *self, PyObject *noargs) +{ +#ifdef WITH_THREAD + return PyBool_FromLong(import_lock_thread != -1); +#else + return PyBool_FromLong(0); +#endif +} + +static PyObject * +imp_acquire_lock(PyObject *self, PyObject *noargs) +{ +#ifdef WITH_THREAD + _PyImport_AcquireLock(); +#endif + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +imp_release_lock(PyObject *self, PyObject *noargs) +{ +#ifdef WITH_THREAD + if (_PyImport_ReleaseLock() < 0) { + PyErr_SetString(PyExc_RuntimeError, + "not holding the import lock"); + return NULL; + } +#endif + Py_INCREF(Py_None); + return Py_None; +} + +void +_PyImport_Fini(void) +{ + Py_XDECREF(extensions); + extensions = NULL; + PyMem_DEL(_PyImport_Filetab); + _PyImport_Filetab = NULL; +#ifdef WITH_THREAD + if (import_lock != NULL) { + PyThread_free_lock(import_lock); + import_lock = NULL; + } +#endif +} + +static void +imp_modules_reloading_clear(void) +{ + PyInterpreterState *interp = PyThreadState_Get()->interp; + if (interp->modules_reloading != NULL) + PyDict_Clear(interp->modules_reloading); +} + +/* Helper for sys */ + +PyObject * +PyImport_GetModuleDict(void) +{ + PyInterpreterState *interp = PyThreadState_GET()->interp; + if (interp->modules == NULL) + Py_FatalError("PyImport_GetModuleDict: no module dictionary!"); + return interp->modules; +} + + +/* List of names to clear in sys */ +static char* sys_deletes[] = { + "path", "argv", "ps1", "ps2", + "last_type", "last_value", "last_traceback", + "path_hooks", "path_importer_cache", "meta_path", + /* misc stuff */ + "flags", "float_info", + NULL +}; + +static char* sys_files[] = { + "stdin", "__stdin__", + "stdout", "__stdout__", + "stderr", "__stderr__", + NULL +}; + + +/* Un-initialize things, as good as we can */ + +void +PyImport_Cleanup(void) +{ + Py_ssize_t pos, ndone; + PyObject *key, *value, *dict; + PyInterpreterState *interp = PyThreadState_GET()->interp; + PyObject *modules = interp->modules; + + if (modules == NULL) + return; /* Already done */ + + /* Delete some special variables first. These are common + places where user values hide and people complain when their + destructors fail. Since the modules containing them are + deleted *last* of all, they would come too late in the normal + destruction order. Sigh. */ + + value = PyDict_GetItemString(modules, "builtins"); + if (value != NULL && PyModule_Check(value)) { + dict = PyModule_GetDict(value); + if (Py_VerboseFlag) + PySys_WriteStderr("# clear builtins._\n"); + PyDict_SetItemString(dict, "_", Py_None); + } + value = PyDict_GetItemString(modules, "sys"); + if (value != NULL && PyModule_Check(value)) { + char **p; + PyObject *v; + dict = PyModule_GetDict(value); + for (p = sys_deletes; *p != NULL; p++) { + if (Py_VerboseFlag) + PySys_WriteStderr("# clear sys.%s\n", *p); + PyDict_SetItemString(dict, *p, Py_None); + } + for (p = sys_files; *p != NULL; p+=2) { + if (Py_VerboseFlag) + PySys_WriteStderr("# restore sys.%s\n", *p); + v = PyDict_GetItemString(dict, *(p+1)); + if (v == NULL) + v = Py_None; + PyDict_SetItemString(dict, *p, v); + } + } + + /* First, delete __main__ */ + value = PyDict_GetItemString(modules, "__main__"); + if (value != NULL && PyModule_Check(value)) { + if (Py_VerboseFlag) + PySys_WriteStderr("# cleanup __main__\n"); + _PyModule_Clear(value); + PyDict_SetItemString(modules, "__main__", Py_None); + } + + /* The special treatment of "builtins" here is because even + when it's not referenced as a module, its dictionary is + referenced by almost every module's __builtins__. Since + deleting a module clears its dictionary (even if there are + references left to it), we need to delete the "builtins" + module last. Likewise, we don't delete sys until the very + end because it is implicitly referenced (e.g. by print). + + Also note that we 'delete' modules by replacing their entry + in the modules dict with None, rather than really deleting + them; this avoids a rehash of the modules dictionary and + also marks them as "non existent" so they won't be + re-imported. */ + + /* Next, repeatedly delete modules with a reference count of + one (skipping builtins and sys) and delete them */ + do { + ndone = 0; + pos = 0; + while (PyDict_Next(modules, &pos, &key, &value)) { + if (value->ob_refcnt != 1) + continue; + if (PyUnicode_Check(key) && PyModule_Check(value)) { + if (PyUnicode_CompareWithASCIIString(key, "builtins") == 0) + continue; + if (PyUnicode_CompareWithASCIIString(key, "sys") == 0) + continue; + if (Py_VerboseFlag) + PySys_FormatStderr( + "# cleanup[1] %U\n", key); + _PyModule_Clear(value); + PyDict_SetItem(modules, key, Py_None); + ndone++; + } + } + } while (ndone > 0); + + /* Next, delete all modules (still skipping builtins and sys) */ + pos = 0; + while (PyDict_Next(modules, &pos, &key, &value)) { + if (PyUnicode_Check(key) && PyModule_Check(value)) { + if (PyUnicode_CompareWithASCIIString(key, "builtins") == 0) + continue; + if (PyUnicode_CompareWithASCIIString(key, "sys") == 0) + continue; + if (Py_VerboseFlag) + PySys_FormatStderr("# cleanup[2] %U\n", key); + _PyModule_Clear(value); + PyDict_SetItem(modules, key, Py_None); + } + } + + /* Next, delete sys and builtins (in that order) */ + value = PyDict_GetItemString(modules, "sys"); + if (value != NULL && PyModule_Check(value)) { + if (Py_VerboseFlag) + PySys_WriteStderr("# cleanup sys\n"); + _PyModule_Clear(value); + PyDict_SetItemString(modules, "sys", Py_None); + } + value = PyDict_GetItemString(modules, "builtins"); + if (value != NULL && PyModule_Check(value)) { + if (Py_VerboseFlag) + PySys_WriteStderr("# cleanup builtins\n"); + _PyModule_Clear(value); + PyDict_SetItemString(modules, "builtins", Py_None); + } + + /* Finally, clear and delete the modules directory */ + PyDict_Clear(modules); + interp->modules = NULL; + Py_DECREF(modules); + Py_CLEAR(interp->modules_reloading); +} + + +/* Helper for pythonrun.c -- return magic number and tag. */ + +long +PyImport_GetMagicNumber(void) +{ + return pyc_magic; +} + + +const char * +PyImport_GetMagicTag(void) +{ + return pyc_tag; +} + +/* Magic for extension modules (built-in as well as dynamically + loaded). To prevent initializing an extension module more than + once, we keep a static dictionary 'extensions' keyed by module name + (for built-in modules) or by filename (for dynamically loaded + modules), containing these modules. A copy of the module's + dictionary is stored by calling _PyImport_FixupExtensionObject() + immediately after the module initialization function succeeds. A + copy can be retrieved from there by calling + _PyImport_FindExtensionObject(). + + Modules which do support multiple initialization set their m_size + field to a non-negative number (indicating the size of the + module-specific state). They are still recorded in the extensions + dictionary, to avoid loading shared libraries twice. +*/ + +int +_PyImport_FixupExtensionObject(PyObject *mod, PyObject *name, + PyObject *filename) +{ + PyObject *modules, *dict; + struct PyModuleDef *def; + if (extensions == NULL) { + extensions = PyDict_New(); + if (extensions == NULL) + return -1; + } + if (mod == NULL || !PyModule_Check(mod)) { + PyErr_BadInternalCall(); + return -1; + } + def = PyModule_GetDef(mod); + if (!def) { + PyErr_BadInternalCall(); + return -1; + } + modules = PyImport_GetModuleDict(); + if (PyDict_SetItem(modules, name, mod) < 0) + return -1; + if (_PyState_AddModule(mod, def) < 0) { + PyDict_DelItem(modules, name); + return -1; + } + if (def->m_size == -1) { + if (def->m_base.m_copy) { + /* Somebody already imported the module, + likely under a different name. + XXX this should really not happen. */ + Py_DECREF(def->m_base.m_copy); + def->m_base.m_copy = NULL; + } + dict = PyModule_GetDict(mod); + if (dict == NULL) + return -1; + def->m_base.m_copy = PyDict_Copy(dict); + if (def->m_base.m_copy == NULL) + return -1; + } + PyDict_SetItem(extensions, filename, (PyObject*)def); + return 0; +} + +int +_PyImport_FixupBuiltin(PyObject *mod, char *name) +{ + int res; + PyObject *nameobj; + nameobj = PyUnicode_InternFromString(name); + if (nameobj == NULL) + return -1; + res = _PyImport_FixupExtensionObject(mod, nameobj, nameobj); + Py_DECREF(nameobj); + return res; +} + +PyObject * +_PyImport_FindExtensionObject(PyObject *name, PyObject *filename) +{ + PyObject *mod, *mdict; + PyModuleDef* def; + if (extensions == NULL) + return NULL; + def = (PyModuleDef*)PyDict_GetItem(extensions, filename); + if (def == NULL) + return NULL; + if (def->m_size == -1) { + /* Module does not support repeated initialization */ + if (def->m_base.m_copy == NULL) + return NULL; + mod = PyImport_AddModuleObject(name); + if (mod == NULL) + return NULL; + mdict = PyModule_GetDict(mod); + if (mdict == NULL) + return NULL; + if (PyDict_Update(mdict, def->m_base.m_copy)) + return NULL; + } + else { + if (def->m_base.m_init == NULL) + return NULL; + mod = def->m_base.m_init(); + if (mod == NULL) + return NULL; + PyDict_SetItem(PyImport_GetModuleDict(), name, mod); + Py_DECREF(mod); + } + if (_PyState_AddModule(mod, def) < 0) { + PyDict_DelItem(PyImport_GetModuleDict(), name); + Py_DECREF(mod); + return NULL; + } + if (Py_VerboseFlag) + PySys_FormatStderr("import %U # previously loaded (%R)\n", + name, filename); + return mod; + +} + +PyObject * +_PyImport_FindBuiltin(const char *name) +{ + PyObject *res, *nameobj; + nameobj = PyUnicode_InternFromString(name); + if (nameobj == NULL) + return NULL; + res = _PyImport_FindExtensionObject(nameobj, nameobj); + Py_DECREF(nameobj); + return res; +} + +/* Get the module object corresponding to a module name. + First check the modules dictionary if there's one there, + if not, create a new one and insert it in the modules dictionary. + Because the former action is most common, THIS DOES NOT RETURN A + 'NEW' REFERENCE! */ + +PyObject * +PyImport_AddModuleObject(PyObject *name) +{ + PyObject *modules = PyImport_GetModuleDict(); + PyObject *m; + + if ((m = PyDict_GetItem(modules, name)) != NULL && + PyModule_Check(m)) + return m; + m = PyModule_NewObject(name); + if (m == NULL) + return NULL; + if (PyDict_SetItem(modules, name, m) != 0) { + Py_DECREF(m); + return NULL; + } + Py_DECREF(m); /* Yes, it still exists, in modules! */ + + return m; +} + +PyObject * +PyImport_AddModule(const char *name) +{ + PyObject *nameobj, *module; + nameobj = PyUnicode_FromString(name); + if (nameobj == NULL) + return NULL; + module = PyImport_AddModuleObject(nameobj); + Py_DECREF(nameobj); + return module; +} + + +/* Remove name from sys.modules, if it's there. */ +static void +remove_module(PyObject *name) +{ + PyObject *modules = PyImport_GetModuleDict(); + if (PyDict_GetItem(modules, name) == NULL) + return; + if (PyDict_DelItem(modules, name) < 0) + Py_FatalError("import: deleting existing key in" + "sys.modules failed"); +} + +static PyObject * get_sourcefile(PyObject *filename); +static PyObject *make_source_pathname(PyObject *pathname); +static PyObject* make_compiled_pathname(PyObject *pathname, int debug); + +/* Execute a code object in a module and return the module object + * WITH INCREMENTED REFERENCE COUNT. If an error occurs, name is + * removed from sys.modules, to avoid leaving damaged module objects + * in sys.modules. The caller may wish to restore the original + * module object (if any) in this case; PyImport_ReloadModule is an + * example. + * + * Note that PyImport_ExecCodeModuleWithPathnames() is the preferred, richer + * interface. The other two exist primarily for backward compatibility. + */ +PyObject * +PyImport_ExecCodeModule(char *name, PyObject *co) +{ + return PyImport_ExecCodeModuleWithPathnames( + name, co, (char *)NULL, (char *)NULL); +} + +PyObject * +PyImport_ExecCodeModuleEx(char *name, PyObject *co, char *pathname) +{ + return PyImport_ExecCodeModuleWithPathnames( + name, co, pathname, (char *)NULL); +} + +PyObject * +PyImport_ExecCodeModuleWithPathnames(char *name, PyObject *co, char *pathname, + char *cpathname) +{ + PyObject *m = NULL; + PyObject *nameobj, *pathobj = NULL, *cpathobj = NULL; + + nameobj = PyUnicode_FromString(name); + if (nameobj == NULL) + return NULL; + + if (pathname != NULL) { + pathobj = PyUnicode_DecodeFSDefault(pathname); + if (pathobj == NULL) + goto error; + } else + pathobj = NULL; + if (cpathname != NULL) { + cpathobj = PyUnicode_DecodeFSDefault(cpathname); + if (cpathobj == NULL) + goto error; + } else + cpathobj = NULL; + m = PyImport_ExecCodeModuleObject(nameobj, co, pathobj, cpathobj); +error: + Py_DECREF(nameobj); + Py_XDECREF(pathobj); + Py_XDECREF(cpathobj); + return m; +} + +PyObject* +PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname, + PyObject *cpathname) +{ + PyObject *modules = PyImport_GetModuleDict(); + PyObject *m, *d, *v; + + m = PyImport_AddModuleObject(name); + if (m == NULL) + return NULL; + /* If the module is being reloaded, we get the old module back + and re-use its dict to exec the new code. */ + d = PyModule_GetDict(m); + if (PyDict_GetItemString(d, "__builtins__") == NULL) { + if (PyDict_SetItemString(d, "__builtins__", + PyEval_GetBuiltins()) != 0) + goto error; + } + /* Remember the filename as the __file__ attribute */ + if (pathname != NULL) { + v = get_sourcefile(pathname); + if (v == NULL) + PyErr_Clear(); + } + else + v = NULL; + if (v == NULL) { + v = ((PyCodeObject *)co)->co_filename; + Py_INCREF(v); + } + if (PyDict_SetItemString(d, "__file__", v) != 0) + PyErr_Clear(); /* Not important enough to report */ + Py_DECREF(v); + + /* Remember the pyc path name as the __cached__ attribute. */ + if (cpathname != NULL) + v = cpathname; + else + v = Py_None; + if (PyDict_SetItemString(d, "__cached__", v) != 0) + PyErr_Clear(); /* Not important enough to report */ + + v = PyEval_EvalCode(co, d, d); + if (v == NULL) + goto error; + Py_DECREF(v); + + if ((m = PyDict_GetItem(modules, name)) == NULL) { + PyErr_Format(PyExc_ImportError, + "Loaded module %R not found in sys.modules", + name); + return NULL; + } + + Py_INCREF(m); + + return m; + + error: + remove_module(name); + return NULL; +} + + +/* Like strrchr(string, '/') but searches for the rightmost of either SEP + or ALTSEP, if the latter is defined. +*/ +static Py_UCS4* +rightmost_sep(Py_UCS4 *s) +{ + Py_UCS4 *found, c; + for (found = NULL; (c = *s); s++) { + if (c == SEP +#ifdef ALTSEP + || c == ALTSEP +#endif + ) + { + found = s; + } + } + return found; +} + +/* Like rightmost_sep, but operate on unicode objects. */ +static Py_ssize_t +rightmost_sep_obj(PyObject* o, Py_ssize_t start, Py_ssize_t end) +{ + Py_ssize_t found, i; + Py_UCS4 c; + for (found = -1, i = start; i < end; i++) { + c = PyUnicode_READ_CHAR(o, i); + if (c == SEP +#ifdef ALTSEP + || c == ALTSEP +#endif + ) + { + found = i; + } + } + return found; +} + +/* Given a pathname for a Python source file, fill a buffer with the + pathname for the corresponding compiled file. Return the pathname + for the compiled file, or NULL if there's no space in the buffer. + Doesn't set an exception. + + foo.py -> __pycache__/foo..pyc + + pathstr is assumed to be "ready". +*/ + +static PyObject* +make_compiled_pathname(PyObject *pathstr, int debug) +{ + PyObject *result; + Py_ssize_t fname, ext, len, i, pos, taglen; + Py_ssize_t pycache_len = sizeof(CACHEDIR) - 1; + int kind; + void *data; + Py_UCS4 lastsep; + + /* Compute the output string size. */ + len = PyUnicode_GET_LENGTH(pathstr); + /* If there is no separator, this returns -1, so + fname will be 0. */ + fname = rightmost_sep_obj(pathstr, 0, len) + 1; + /* Windows: re-use the last separator character (/ or \\) when + appending the __pycache__ path. */ + if (fname > 0) + lastsep = PyUnicode_READ_CHAR(pathstr, fname -1); + else + lastsep = SEP; + ext = fname - 1; + for(i = fname; i < len; i++) + if (PyUnicode_READ_CHAR(pathstr, i) == '.') + ext = i + 1; + if (ext < fname) + /* No dot in filename; use entire filename */ + ext = len; + + /* result = pathstr[:fname] + "__pycache__" + SEP + + pathstr[fname:ext] + tag + ".py[co]" */ + taglen = strlen(pyc_tag); + result = PyUnicode_New(ext + pycache_len + 1 + taglen + 4, + PyUnicode_MAX_CHAR_VALUE(pathstr)); + if (!result) + return NULL; + kind = PyUnicode_KIND(result); + data = PyUnicode_DATA(result); + PyUnicode_CopyCharacters(result, 0, pathstr, 0, fname); + pos = fname; + for (i = 0; i < pycache_len; i++) + PyUnicode_WRITE(kind, data, pos++, CACHEDIR[i]); + PyUnicode_WRITE(kind, data, pos++, lastsep); + PyUnicode_CopyCharacters(result, pos, pathstr, + fname, ext - fname); + pos += ext - fname; + for (i = 0; pyc_tag[i]; i++) + PyUnicode_WRITE(kind, data, pos++, pyc_tag[i]); + PyUnicode_WRITE(kind, data, pos++, '.'); + PyUnicode_WRITE(kind, data, pos++, 'p'); + PyUnicode_WRITE(kind, data, pos++, 'y'); + PyUnicode_WRITE(kind, data, pos++, debug ? 'c' : 'o'); + return result; +} + + +/* Given a pathname to a Python byte compiled file, return the path to the + source file, if the path matches the PEP 3147 format. This does not check + for any file existence, however, if the pyc file name does not match PEP + 3147 style, NULL is returned. buf must be at least as big as pathname; + the resulting path will always be shorter. + + (...)/__pycache__/foo..pyc -> (...)/foo.py */ + +static PyObject* +make_source_pathname(PyObject *path) +{ + Py_ssize_t left, right, dot0, dot1, len; + Py_ssize_t i, j; + PyObject *result; + int kind; + void *data; + + len = PyUnicode_GET_LENGTH(path); + if (len > MAXPATHLEN) + return NULL; + + /* Look back two slashes from the end. In between these two slashes + must be the string __pycache__ or this is not a PEP 3147 style + path. It's possible for there to be only one slash. + */ + right = rightmost_sep_obj(path, 0, len); + if (right == -1) + return NULL; + left = rightmost_sep_obj(path, 0, right); + if (left == -1) + left = 0; + else + left++; + if (right-left != sizeof(CACHEDIR)-1) + return NULL; + for (i = 0; i < sizeof(CACHEDIR)-1; i++) + if (PyUnicode_READ_CHAR(path, left+i) != CACHEDIR[i]) + return NULL; + + /* Now verify that the path component to the right of the last slash + has two dots in it. + */ + dot0 = PyUnicode_FindChar(path, '.', right+1, len, 1); + if (dot0 < 0) + return NULL; + dot1 = PyUnicode_FindChar(path, '.', dot0+1, len, 1); + if (dot1 < 0) + return NULL; + /* Too many dots? */ + if (PyUnicode_FindChar(path, '.', dot1+1, len, 1) != -1) + return NULL; + + /* This is a PEP 3147 path. Start by copying everything from the + start of pathname up to and including the leftmost slash. Then + copy the file's basename, removing the magic tag and adding a .py + suffix. + */ + result = PyUnicode_New(left + (dot0-right) + 2, + PyUnicode_MAX_CHAR_VALUE(path)); + if (!result) + return NULL; + kind = PyUnicode_KIND(result); + data = PyUnicode_DATA(result); + PyUnicode_CopyCharacters(result, 0, path, 0, (i = left)); + PyUnicode_CopyCharacters(result, left, path, right+1, + (j = dot0-right)); + PyUnicode_WRITE(kind, data, i+j, 'p'); + PyUnicode_WRITE(kind, data, i+j+1, 'y'); + return result; +} + +/* Given a pathname for a Python source file, its time of last + modification, and a pathname for a compiled file, check whether the + compiled file represents the same version of the source. If so, + return a FILE pointer for the compiled file, positioned just after + the header; if not, return NULL. + Doesn't set an exception. */ + +static FILE * +check_compiled_module(PyObject *pathname, time_t mtime, PyObject *cpathname) +{ + FILE *fp; + long magic; + long pyc_mtime; + + fp = _Py_fopen(cpathname, "rb"); + if (fp == NULL) + return NULL; + magic = PyMarshal_ReadLongFromFile(fp); + if (magic != pyc_magic) { + if (Py_VerboseFlag) + PySys_FormatStderr("# %R has bad magic\n", cpathname); + fclose(fp); + return NULL; + } + pyc_mtime = PyMarshal_ReadLongFromFile(fp); + if (pyc_mtime != mtime) { + if (Py_VerboseFlag) + PySys_FormatStderr("# %R has bad mtime\n", cpathname); + fclose(fp); + return NULL; + } + if (Py_VerboseFlag) + PySys_FormatStderr("# %R matches %R\n", cpathname, pathname); + return fp; +} + + +/* Read a code object from a file and check it for validity */ + +static PyCodeObject * +read_compiled_module(PyObject *cpathname, FILE *fp) +{ + PyObject *co; + + co = PyMarshal_ReadLastObjectFromFile(fp); + if (co == NULL) + return NULL; + if (!PyCode_Check(co)) { + PyErr_Format(PyExc_ImportError, + "Non-code object in %R", cpathname); + Py_DECREF(co); + return NULL; + } + return (PyCodeObject *)co; +} + + +/* Load a module from a compiled file, execute it, and return its + module object WITH INCREMENTED REFERENCE COUNT */ + +static PyObject * +load_compiled_module(PyObject *name, PyObject *cpathname, FILE *fp) +{ + long magic; + PyCodeObject *co; + PyObject *m; + + magic = PyMarshal_ReadLongFromFile(fp); + if (magic != pyc_magic) { + PyErr_Format(PyExc_ImportError, + "Bad magic number in %R", cpathname); + return NULL; + } + (void) PyMarshal_ReadLongFromFile(fp); + co = read_compiled_module(cpathname, fp); + if (co == NULL) + return NULL; + if (Py_VerboseFlag) + PySys_FormatStderr("import %U # precompiled from %R\n", + name, cpathname); + m = PyImport_ExecCodeModuleObject(name, (PyObject *)co, + cpathname, cpathname); + Py_DECREF(co); + + return m; +} + +/* Parse a source file and return the corresponding code object */ + +static PyCodeObject * +parse_source_module(PyObject *pathname, FILE *fp) +{ + PyCodeObject *co; + PyObject *pathbytes; + mod_ty mod; + PyCompilerFlags flags; + PyArena *arena; + + pathbytes = PyUnicode_EncodeFSDefault(pathname); + if (pathbytes == NULL) + return NULL; + + arena = PyArena_New(); + if (arena == NULL) { + Py_DECREF(pathbytes); + return NULL; + } + + flags.cf_flags = 0; + mod = PyParser_ASTFromFile(fp, PyBytes_AS_STRING(pathbytes), NULL, + Py_file_input, 0, 0, &flags, + NULL, arena); + if (mod != NULL) + co = PyAST_Compile(mod, PyBytes_AS_STRING(pathbytes), NULL, arena); + else + co = NULL; + Py_DECREF(pathbytes); + PyArena_Free(arena); + return co; +} + +/* Write a compiled module to a file, placing the time of last + modification of its source into the header. + Errors are ignored, if a write error occurs an attempt is made to + remove the file. */ + +static void +write_compiled_module(PyCodeObject *co, PyObject *cpathname, + struct stat *srcstat) +{ + Py_UCS4 *cpathname_ucs4; + FILE *fp; + time_t mtime = srcstat->st_mtime; + PyObject *cpathname_tmp; + mode_t dirmode = (srcstat->st_mode | + S_IXUSR | S_IXGRP | S_IXOTH | + S_IWUSR | S_IWGRP | S_IWOTH); + PyObject *dirbytes; + PyObject *cpathbytes, *cpathbytes_tmp; + int fd; + PyObject *dirname; + Py_UCS4 *dirsep; + int res, ok; + + /* Ensure that the __pycache__ directory exists. */ + cpathname_ucs4 = PyUnicode_AsUCS4Copy(cpathname); + if (!cpathname_ucs4) + return; + dirsep = rightmost_sep(cpathname_ucs4); + if (dirsep == NULL) { + if (Py_VerboseFlag) + PySys_FormatStderr("# no %s path found %R\n", CACHEDIR, cpathname); + return; + } + dirname = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, + cpathname_ucs4, + dirsep - cpathname_ucs4); + PyMem_Free(cpathname_ucs4); + if (dirname == NULL) { + PyErr_Clear(); + return; + } + + dirbytes = PyUnicode_EncodeFSDefault(dirname); + if (dirbytes == NULL) { + PyErr_Clear(); + return; + } + res = mkdir(PyBytes_AS_STRING(dirbytes), dirmode); + Py_DECREF(dirbytes); + if (0 <= res) + ok = 1; + else + ok = (errno == EEXIST); + if (!ok) { + if (Py_VerboseFlag) + PySys_FormatStderr("# cannot create cache dir %R\n", dirname); + Py_DECREF(dirname); + return; + } + Py_DECREF(dirname); + + /* We first write to a tmp file and then take advantage + of atomic renaming (which *should* be true even under Windows). + As in importlib, we use id(something) to generate a pseudo-random + filename. mkstemp() can't be used since it doesn't allow specifying + the file access permissions. + */ + cpathname_tmp = PyUnicode_FromFormat("%U.%zd", + cpathname, (Py_ssize_t) co); + if (cpathname_tmp == NULL) { + PyErr_Clear(); + return; + } + cpathbytes_tmp = PyUnicode_EncodeFSDefault(cpathname_tmp); + Py_DECREF(cpathname_tmp); + if (cpathbytes_tmp == NULL) { + PyErr_Clear(); + return; + } + cpathbytes = PyUnicode_EncodeFSDefault(cpathname); + if (cpathbytes == NULL) { + PyErr_Clear(); + return; + } + fd = open(PyBytes_AS_STRING(cpathbytes_tmp), + O_CREAT | O_EXCL | O_WRONLY, 0666); + if (0 <= fd) + fp = fdopen(fd, "wb"); + else + fp = NULL; + if (fp == NULL) { + if (Py_VerboseFlag) + PySys_FormatStderr( + "# can't create %R\n", cpathname); + Py_DECREF(cpathbytes); + Py_DECREF(cpathbytes_tmp); + return; + } + PyMarshal_WriteLongToFile(pyc_magic, fp, Py_MARSHAL_VERSION); + /* First write a 0 for mtime */ + PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION); + PyMarshal_WriteObjectToFile((PyObject *)co, fp, Py_MARSHAL_VERSION); + fflush(fp); + /* Now write the true mtime */ + fseek(fp, 4L, 0); + assert(mtime < LONG_MAX); + PyMarshal_WriteLongToFile((long)mtime, fp, Py_MARSHAL_VERSION); + if (fflush(fp) != 0 || ferror(fp)) { + if (Py_VerboseFlag) + PySys_FormatStderr("# can't write %R\n", cpathname); + /* Don't keep partial file */ + fclose(fp); + (void) unlink(PyBytes_AS_STRING(cpathbytes_tmp)); + Py_DECREF(cpathbytes); + Py_DECREF(cpathbytes_tmp); + return; + } + fclose(fp); + /* Do a (hopefully) atomic rename */ + if (rename(PyBytes_AS_STRING(cpathbytes_tmp), + PyBytes_AS_STRING(cpathbytes))) { + if (Py_VerboseFlag) + PySys_FormatStderr("# can't write %R\n", cpathname); + /* Don't keep tmp file */ + unlink(PyBytes_AS_STRING(cpathbytes_tmp)); + Py_DECREF(cpathbytes); + Py_DECREF(cpathbytes_tmp); + return; + } + Py_DECREF(cpathbytes); + Py_DECREF(cpathbytes_tmp); + if (Py_VerboseFlag) + PySys_FormatStderr("# wrote %R\n", cpathname); +} + +static void +update_code_filenames(PyCodeObject *co, PyObject *oldname, PyObject *newname) +{ + PyObject *constants, *tmp; + Py_ssize_t i, n; + + if (PyUnicode_Compare(co->co_filename, oldname)) + return; + + tmp = co->co_filename; + co->co_filename = newname; + Py_INCREF(co->co_filename); + Py_DECREF(tmp); + + constants = co->co_consts; + n = PyTuple_GET_SIZE(constants); + for (i = 0; i < n; i++) { + tmp = PyTuple_GET_ITEM(constants, i); + if (PyCode_Check(tmp)) + update_code_filenames((PyCodeObject *)tmp, + oldname, newname); + } +} + +static void +update_compiled_module(PyCodeObject *co, PyObject *newname) +{ + PyObject *oldname; + + if (PyUnicode_Compare(co->co_filename, newname) == 0) + return; + + oldname = co->co_filename; + Py_INCREF(oldname); + update_code_filenames(co, oldname, newname); + Py_DECREF(oldname); +} + +static PyObject * +imp_fix_co_filename(PyObject *self, PyObject *args) +{ + PyObject *co; + PyObject *file_path; + + if (!PyArg_ParseTuple(args, "OO:_fix_co_filename", &co, &file_path)) + return NULL; + + if (!PyCode_Check(co)) { + PyErr_SetString(PyExc_TypeError, + "first argument must be a code object"); + return NULL; + } + + if (!PyUnicode_Check(file_path)) { + PyErr_SetString(PyExc_TypeError, + "second argument must be a string"); + return NULL; + } + + update_compiled_module((PyCodeObject*)co, file_path); + + Py_RETURN_NONE; +} + +/* Load a source module from a given file and return its module + object WITH INCREMENTED REFERENCE COUNT. If there's a matching + byte-compiled file, use that instead. */ + +static PyObject * +load_source_module(PyObject *name, PyObject *pathname, FILE *fp) +{ + struct stat st; + FILE *fpc; + PyObject *cpathname = NULL, *cpathbytes = NULL; + PyCodeObject *co; + PyObject *m = NULL; + + if (fstat(fileno(fp), &st) != 0) { + PyErr_Format(PyExc_RuntimeError, + "unable to get file status from %R", + pathname); + goto error; + } +#if SIZEOF_TIME_T > 4 + /* Python's .pyc timestamp handling presumes that the timestamp fits + in 4 bytes. This will be fine until sometime in the year 2038, + when a 4-byte signed time_t will overflow. + */ + if (st.st_mtime >> 32) { + PyErr_SetString(PyExc_OverflowError, + "modification time overflows a 4 byte field"); + goto error; + } +#endif + if (PyUnicode_READY(pathname) < 0) + return NULL; + cpathname = make_compiled_pathname(pathname, !Py_OptimizeFlag); + + if (cpathname != NULL) + fpc = check_compiled_module(pathname, st.st_mtime, cpathname); + else + fpc = NULL; + + if (fpc) { + co = read_compiled_module(cpathname, fpc); + fclose(fpc); + if (co == NULL) + goto error; + update_compiled_module(co, pathname); + if (Py_VerboseFlag) + PySys_FormatStderr("import %U # precompiled from %R\n", + name, cpathname); + m = PyImport_ExecCodeModuleObject(name, (PyObject *)co, + cpathname, cpathname); + } + else { + co = parse_source_module(pathname, fp); + if (co == NULL) + goto error; + if (Py_VerboseFlag) + PySys_FormatStderr("import %U # from %R\n", + name, pathname); + if (cpathname != NULL) { + PyObject *ro = PySys_GetObject("dont_write_bytecode"); + if (ro == NULL || !PyObject_IsTrue(ro)) + write_compiled_module(co, cpathname, &st); + } + m = PyImport_ExecCodeModuleObject(name, (PyObject *)co, + pathname, cpathname); + } + Py_DECREF(co); + +error: + Py_XDECREF(cpathbytes); + Py_XDECREF(cpathname); + return m; +} + +/* Get source file -> unicode or None + * Returns the path to the py file if available, else the given path + */ +static PyObject * +get_sourcefile(PyObject *filename) +{ + Py_ssize_t len; + Py_UCS4 *fileuni; + PyObject *py; + struct stat statbuf; + + len = PyUnicode_GET_LENGTH(filename); + if (len == 0) + Py_RETURN_NONE; + + /* don't match *.pyc or *.pyo? */ + fileuni = PyUnicode_AsUCS4Copy(filename); + if (!fileuni) + return NULL; + if (len < 5 + || fileuni[len-4] != '.' + || (fileuni[len-3] != 'p' && fileuni[len-3] != 'P') + || (fileuni[len-2] != 'y' && fileuni[len-2] != 'Y')) + goto unchanged; + + /* Start by trying to turn PEP 3147 path into source path. If that + * fails, just chop off the trailing character, i.e. legacy pyc path + * to py. + */ + py = make_source_pathname(filename); + if (py == NULL) { + PyErr_Clear(); + py = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, fileuni, len - 1); + } + if (py == NULL) + goto error; + + if (_Py_stat(py, &statbuf) == 0 && S_ISREG(statbuf.st_mode)) { + PyMem_Free(fileuni); + return py; + } + Py_DECREF(py); + goto unchanged; + +error: + PyErr_Clear(); +unchanged: + PyMem_Free(fileuni); + Py_INCREF(filename); + return filename; +} + +/* Forward */ +static PyObject *load_module(PyObject *, FILE *, PyObject *, int, PyObject *); +static struct filedescr *find_module(PyObject *, PyObject *, PyObject *, + PyObject **, FILE **, PyObject **); +static struct _frozen * find_frozen(PyObject *); + +/* Load a package and return its module object WITH INCREMENTED + REFERENCE COUNT */ + +static PyObject * +load_package(PyObject *name, PyObject *pathname) +{ + PyObject *m, *d, *bufobj; + PyObject *file = NULL, *path_list = NULL; + int err; + FILE *fp = NULL; + struct filedescr *fdp; + + m = PyImport_AddModuleObject(name); + if (m == NULL) + return NULL; + if (Py_VerboseFlag) + PySys_FormatStderr("import %U # directory %R\n", + name, pathname); + file = get_sourcefile(pathname); + if (file == NULL) + return NULL; + path_list = Py_BuildValue("[O]", file); + if (path_list == NULL) { + Py_DECREF(file); + return NULL; + } + d = PyModule_GetDict(m); + err = PyDict_SetItemString(d, "__file__", file); + Py_DECREF(file); + if (err == 0) + err = PyDict_SetItemString(d, "__path__", path_list); + if (err != 0) { + Py_DECREF(path_list); + return NULL; + } + fdp = find_module(name, initstr, path_list, + &bufobj, &fp, NULL); + Py_DECREF(path_list); + if (fdp == NULL) { + if (PyErr_ExceptionMatches(PyExc_ImportError)) { + PyErr_Clear(); + Py_INCREF(m); + return m; + } + else + return NULL; + } + m = load_module(name, fp, bufobj, fdp->type, NULL); + Py_XDECREF(bufobj); + if (fp != NULL) + fclose(fp); + return m; +} + + +/* Helper to test for built-in module */ + +static int +is_builtin(PyObject *name) +{ + int i, cmp; + for (i = 0; PyImport_Inittab[i].name != NULL; i++) { + cmp = PyUnicode_CompareWithASCIIString(name, PyImport_Inittab[i].name); + if (cmp == 0) { + if (PyImport_Inittab[i].initfunc == NULL) + return -1; + else + return 1; + } + } + return 0; +} + + +/* Return an importer object for a sys.path/pkg.__path__ item 'p', + possibly by fetching it from the path_importer_cache dict. If it + wasn't yet cached, traverse path_hooks until a hook is found + that can handle the path item. Return None if no hook could; + this tells our caller it should fall back to the builtin + import mechanism. Cache the result in path_importer_cache. + Returns a borrowed reference. */ + +static PyObject * +get_path_importer(PyObject *path_importer_cache, PyObject *path_hooks, + PyObject *p) +{ + PyObject *importer; + Py_ssize_t j, nhooks; + + /* These conditions are the caller's responsibility: */ + assert(PyList_Check(path_hooks)); + assert(PyDict_Check(path_importer_cache)); + + nhooks = PyList_Size(path_hooks); + if (nhooks < 0) + return NULL; /* Shouldn't happen */ + + importer = PyDict_GetItem(path_importer_cache, p); + if (importer != NULL) + return importer; + + /* set path_importer_cache[p] to None to avoid recursion */ + if (PyDict_SetItem(path_importer_cache, p, Py_None) != 0) + return NULL; + + for (j = 0; j < nhooks; j++) { + PyObject *hook = PyList_GetItem(path_hooks, j); + if (hook == NULL) + return NULL; + importer = PyObject_CallFunctionObjArgs(hook, p, NULL); + if (importer != NULL) + break; + + if (!PyErr_ExceptionMatches(PyExc_ImportError)) { + return NULL; + } + PyErr_Clear(); + } + if (importer == NULL) { + importer = PyObject_CallFunctionObjArgs( + (PyObject *)&PyNullImporter_Type, p, NULL + ); + if (importer == NULL) { + if (PyErr_ExceptionMatches(PyExc_ImportError)) { + PyErr_Clear(); + return Py_None; + } + } + } + if (importer != NULL) { + int err = PyDict_SetItem(path_importer_cache, p, importer); + Py_DECREF(importer); + if (err != 0) + return NULL; + } + return importer; +} + +PyAPI_FUNC(PyObject *) +PyImport_GetImporter(PyObject *path) { + PyObject *importer=NULL, *path_importer_cache=NULL, *path_hooks=NULL; + + if ((path_importer_cache = PySys_GetObject("path_importer_cache"))) { + if ((path_hooks = PySys_GetObject("path_hooks"))) { + importer = get_path_importer(path_importer_cache, + path_hooks, path); + } + } + Py_XINCREF(importer); /* get_path_importer returns a borrowed reference */ + return importer; +} + +/* Search the path (default sys.path) for a module. Return the + corresponding filedescr struct, and (via return arguments) the + pathname and an open file. Return NULL if the module is not found. */ + +#ifdef MS_COREDLL +extern FILE *_PyWin_FindRegisteredModule(PyObject *, struct filedescr **, + PyObject **p_path); +#endif + +/* Forward */ +static int case_ok(PyObject *, Py_ssize_t, PyObject *); +static int find_init_module(PyObject *); +static struct filedescr importhookdescr = {"", "", IMP_HOOK}; + +/* Get the path of a module: get its importer and call importer.find_module() + hook, or check if the module if a package (if path/__init__.py exists). + + -1: error: a Python error occurred + 0: ignore: an error occurred because of invalid data, but the error is not + important enough to be reported. + 1: get path: module not found, but *buf contains its path + 2: found: *p_fd is the file descriptor (IMP_HOOK or PKG_DIRECTORY) + and *buf is the path */ + +static int +find_module_path(PyObject *fullname, PyObject *name, PyObject *path, + PyObject *path_hooks, PyObject *path_importer_cache, + PyObject **p_path, PyObject **p_loader, struct filedescr **p_fd) +{ + PyObject *path_unicode, *filename = NULL; + Py_ssize_t len, pos; + struct stat statbuf; + static struct filedescr fd_package = {"", "", PKG_DIRECTORY}; + int result, addsep; + + if (PyUnicode_Check(path)) { + Py_INCREF(path); + path_unicode = path; + } + else if (PyBytes_Check(path)) { + path_unicode = PyUnicode_DecodeFSDefaultAndSize( + PyBytes_AS_STRING(path), PyBytes_GET_SIZE(path)); + if (path_unicode == NULL) + return -1; + } + else + return 0; + + if (PyUnicode_READY(path_unicode)) + return -1; + + len = PyUnicode_GET_LENGTH(path_unicode); + if (PyUnicode_FindChar(path_unicode, 0, 0, len, 1) != -1) { + result = 0; + goto out; /* path contains '\0' */ + } + + /* sys.path_hooks import hook */ + if (p_loader != NULL) { + _Py_IDENTIFIER(find_module); + PyObject *importer; + + importer = get_path_importer(path_importer_cache, + path_hooks, path); + if (importer == NULL) { + result = -1; + goto out; + } + /* Note: importer is a borrowed reference */ + if (importer != Py_None) { + PyObject *loader; + loader = _PyObject_CallMethodId(importer, + &PyId_find_module, "O", fullname); + if (loader == NULL) { + result = -1; /* error */ + goto out; + } + if (loader != Py_None) { + /* a loader was found */ + *p_loader = loader; + *p_fd = &importhookdescr; + result = 2; + goto out; + } + Py_DECREF(loader); + result = 0; + goto out; + } + } + /* no hook was found, use builtin import */ + + addsep = 0; + if (len > 0 && PyUnicode_READ_CHAR(path_unicode, len-1) != SEP +#ifdef ALTSEP + && PyUnicode_READ_CHAR(path_unicode, len-1) != ALTSEP +#endif + ) + addsep = 1; + filename = PyUnicode_New(len + PyUnicode_GET_LENGTH(name) + addsep, + Py_MAX(PyUnicode_MAX_CHAR_VALUE(path_unicode), + PyUnicode_MAX_CHAR_VALUE(name))); + if (filename == NULL) { + result = -1; + goto out; + } + PyUnicode_CopyCharacters(filename, 0, path_unicode, 0, len); + pos = len; + if (addsep) + PyUnicode_WRITE(PyUnicode_KIND(filename), + PyUnicode_DATA(filename), + pos++, SEP); + PyUnicode_CopyCharacters(filename, pos, name, 0, + PyUnicode_GET_LENGTH(name)); + + /* Check for package import (buf holds a directory name, + and there's an __init__ module in that directory */ +#ifdef HAVE_STAT + if (_Py_stat(filename, &statbuf) == 0 && /* it exists */ + S_ISDIR(statbuf.st_mode)) /* it's a directory */ + { + int match; + + match = case_ok(filename, 0, name); + if (match < 0) { + result = -1; + goto out; + } + if (match) { /* case matches */ + if (find_init_module(filename)) { /* and has __init__.py */ + *p_path = filename; + filename = NULL; + *p_fd = &fd_package; + result = 2; + goto out; + } + else { + int err; + err = PyErr_WarnFormat(PyExc_ImportWarning, 1, + "Not importing directory %R: missing __init__.py", + filename); + if (err) { + result = -1; + goto out; + } + } + } + } +#endif + *p_path = filename; + filename = NULL; + result = 1; + out: + Py_DECREF(path_unicode); + Py_XDECREF(filename); + return result; +} + +/* Find a module in search_path_list. For each path, try + find_module_path() or try each _PyImport_Filetab suffix. + + If the module is found, return a file descriptor, write the path in + *p_filename, write the pointer to the file object into *p_fp, and (if + p_loader is not NULL) the loader into *p_loader. + + Otherwise, raise an exception and return NULL. */ + +static struct filedescr* +find_module_path_list(PyObject *fullname, PyObject *name, + PyObject *search_path_list, PyObject *path_hooks, + PyObject *path_importer_cache, + PyObject **p_path, FILE **p_fp, PyObject **p_loader) +{ + Py_ssize_t i, npath; + struct filedescr *fdp = NULL; + char *filemode; + FILE *fp = NULL; + PyObject *prefix, *filename; + int match; + + npath = PyList_Size(search_path_list); + for (i = 0; i < npath; i++) { + PyObject *path; + int ok; + + path = PyList_GetItem(search_path_list, i); + if (path == NULL) + return NULL; + + prefix = NULL; + ok = find_module_path(fullname, name, path, + path_hooks, path_importer_cache, + &prefix, p_loader, &fdp); + if (ok < 0) + return NULL; + if (ok == 0) + continue; + if (ok == 2) { + *p_path = prefix; + return fdp; + } + + for (fdp = _PyImport_Filetab; fdp->suffix != NULL; fdp++) { + struct stat statbuf; + + filemode = fdp->mode; + if (filemode[0] == 'U') + filemode = "r" PY_STDIOTEXTMODE; + + filename = PyUnicode_FromFormat("%U%s", prefix, fdp->suffix); + if (filename == NULL) { + Py_DECREF(prefix); + return NULL; + } + + if (Py_VerboseFlag > 1) + PySys_FormatStderr("# trying %R\n", filename); + + if (_Py_stat(filename, &statbuf) != 0 || S_ISDIR(statbuf.st_mode)) + { + /* it doesn't exist, or it's a directory */ + Py_DECREF(filename); + continue; + } + + fp = _Py_fopen(filename, filemode); + if (fp == NULL) { + Py_DECREF(filename); + if (PyErr_Occurred()) { + Py_DECREF(prefix); + return NULL; + } + continue; + } + match = case_ok(filename, -(Py_ssize_t)strlen(fdp->suffix), name); + if (match < 0) { + Py_DECREF(prefix); + Py_DECREF(filename); + return NULL; + } + if (match) { + Py_DECREF(prefix); + *p_path = filename; + *p_fp = fp; + return fdp; + } + Py_DECREF(filename); + + fclose(fp); + fp = NULL; + } + Py_DECREF(prefix); + } + PyErr_Format(PyExc_ImportError, + "No module named %R", name); + return NULL; +} + +/* Find a module: + + - try find_module() of each sys.meta_path hook + - try find_frozen() + - try is_builtin() + - try _PyWin_FindRegisteredModule() (Windows only) + - otherwise, call find_module_path_list() with search_path_list (if not + NULL) or sys.path + + fullname can be NULL, but only if p_loader is NULL. + + Return: + + - &fd_builtin (C_BUILTIN) if it is a builtin + - &fd_frozen (PY_FROZEN) if it is frozen + - &fd_package (PKG_DIRECTORY) and write the filename into *p_path + if it is a package + - &importhookdescr (IMP_HOOK) and write the loader into *p_loader if a + importer loader was found + - a file descriptor (PY_SOURCE, PY_COMPILED, C_EXTENSION, PY_RESOURCE or + PY_CODERESOURCE: see _PyImport_Filetab), write the filename into + *p_path and the pointer to the open file into *p_fp + - NULL on error + + By default, *p_path, *p_fp and *p_loader (if set) are set to NULL. + Eg. *p_path is set to NULL for a builtin package. +*/ + +static struct filedescr * +find_module(PyObject *fullname, PyObject *name, PyObject *search_path_list, + PyObject **p_path, FILE **p_fp, PyObject **p_loader) +{ + Py_ssize_t i, npath; + static struct filedescr fd_frozen = {"", "", PY_FROZEN}; + static struct filedescr fd_builtin = {"", "", C_BUILTIN}; + PyObject *path_hooks, *path_importer_cache; + + *p_path = NULL; + *p_fp = NULL; + if (p_loader != NULL) + *p_loader = NULL; + + if (PyUnicode_GET_LENGTH(name) > MAXPATHLEN) { + PyErr_SetString(PyExc_OverflowError, + "module name is too long"); + return NULL; + } + + /* sys.meta_path import hook */ + if (p_loader != NULL) { + _Py_IDENTIFIER(find_module); + PyObject *meta_path; + + meta_path = PySys_GetObject("meta_path"); + if (meta_path == NULL || !PyList_Check(meta_path)) { + PyErr_SetString(PyExc_RuntimeError, + "sys.meta_path must be a list of " + "import hooks"); + return NULL; + } + Py_INCREF(meta_path); /* zap guard */ + npath = PyList_Size(meta_path); + for (i = 0; i < npath; i++) { + PyObject *loader; + PyObject *hook = PyList_GetItem(meta_path, i); + loader = _PyObject_CallMethodId(hook, &PyId_find_module, + "OO", fullname, + search_path_list != NULL ? + search_path_list : Py_None); + if (loader == NULL) { + Py_DECREF(meta_path); + return NULL; /* true error */ + } + if (loader != Py_None) { + /* a loader was found */ + *p_loader = loader; + Py_DECREF(meta_path); + return &importhookdescr; + } + Py_DECREF(loader); + } + Py_DECREF(meta_path); + } + + if (find_frozen(fullname) != NULL) + return &fd_frozen; + + if (search_path_list == NULL) { +#ifdef MS_COREDLL + FILE *fp; + struct filedescr *fdp; +#endif + if (is_builtin(name)) + return &fd_builtin; +#ifdef MS_COREDLL + fp = _PyWin_FindRegisteredModule(name, &fdp, p_path); + if (fp != NULL) { + *p_fp = fp; + return fdp; + } + else if (PyErr_Occurred()) + return NULL; +#endif + search_path_list = PySys_GetObject("path"); + } + + if (search_path_list == NULL || !PyList_Check(search_path_list)) { + PyErr_SetString(PyExc_RuntimeError, + "sys.path must be a list of directory names"); + return NULL; + } + + path_hooks = PySys_GetObject("path_hooks"); + if (path_hooks == NULL || !PyList_Check(path_hooks)) { + PyErr_SetString(PyExc_RuntimeError, + "sys.path_hooks must be a list of " + "import hooks"); + return NULL; + } + path_importer_cache = PySys_GetObject("path_importer_cache"); + if (path_importer_cache == NULL || + !PyDict_Check(path_importer_cache)) { + PyErr_SetString(PyExc_RuntimeError, + "sys.path_importer_cache must be a dict"); + return NULL; + } + + return find_module_path_list(fullname, name, + search_path_list, path_hooks, + path_importer_cache, + p_path, p_fp, p_loader); +} + +/* case_bytes(char* buf, Py_ssize_t len, Py_ssize_t namelen, char* name) + * The arguments here are tricky, best shown by example: + * /a/b/c/d/e/f/g/h/i/j/k/some_long_module_name.py\0 + * ^ ^ ^ ^ + * |--------------------- buf ---------------------| + * |------------------- len ------------------| + * |------ name -------| + * |----- namelen -----| + * buf is the full path, but len only counts up to (& exclusive of) the + * extension. name is the module name, also exclusive of extension. + * + * We've already done a successful stat() or fopen() on buf, so know that + * there's some match, possibly case-insensitive. + * + * case_bytes() is to return 1 if there's a case-sensitive match for + * name, else 0. case_bytes() is also to return 1 if envar PYTHONCASEOK + * exists. + * + * case_bytes() is used to implement case-sensitive import semantics even + * on platforms with case-insensitive filesystems. It's trivial to implement + * for case-sensitive filesystems. It's pretty much a cross-platform + * nightmare for systems with case-insensitive filesystems. + */ + +/* First we may need a pile of platform-specific header files; the sequence + * of #if's here should match the sequence in the body of case_bytes(). + */ +#if defined(MS_WINDOWS) +#include + +#elif defined(DJGPP) +#include + +#elif (defined(__MACH__) && defined(__APPLE__) || defined(__CYGWIN__)) && defined(HAVE_DIRENT_H) +#include +#include + +#elif defined(PYOS_OS2) +#define INCL_DOS +#define INCL_DOSERRORS +#define INCL_NOPMAPI +#include +#endif + +#if defined(DJGPP) \ + || ((defined(__MACH__) && defined(__APPLE__) || defined(__CYGWIN__)) \ + && defined(HAVE_DIRENT_H)) \ + || defined(PYOS_OS2) +# define USE_CASE_OK_BYTES +#endif + + +#ifdef USE_CASE_OK_BYTES +static int +case_bytes(char *buf, Py_ssize_t len, Py_ssize_t namelen, const char *name) +{ +/* Pick a platform-specific implementation; the sequence of #if's here should + * match the sequence just above. + */ + +/* DJGPP */ +#if defined(DJGPP) + struct ffblk ffblk; + int done; + + if (Py_GETENV("PYTHONCASEOK") != NULL) + return 1; + + done = findfirst(buf, &ffblk, FA_ARCH|FA_RDONLY|FA_HIDDEN|FA_DIREC); + if (done) { + PyErr_Format(PyExc_NameError, + "Can't find file for module %.100s\n(filename %.300s)", + name, buf); + return -1; + } + return strncmp(ffblk.ff_name, name, namelen) == 0; + +/* new-fangled macintosh (macosx) or Cygwin */ +#elif (defined(__MACH__) && defined(__APPLE__) || defined(__CYGWIN__)) && defined(HAVE_DIRENT_H) + DIR *dirp; + struct dirent *dp; + char dirname[MAXPATHLEN + 1]; + const int dirlen = len - namelen - 1; /* don't want trailing SEP */ + + if (Py_GETENV("PYTHONCASEOK") != NULL) + return 1; + + /* Copy the dir component into dirname; substitute "." if empty */ + if (dirlen <= 0) { + dirname[0] = '.'; + dirname[1] = '\0'; + } + else { + assert(dirlen <= MAXPATHLEN); + memcpy(dirname, buf, dirlen); + dirname[dirlen] = '\0'; + } + /* Open the directory and search the entries for an exact match. */ + dirp = opendir(dirname); + if (dirp) { + char *nameWithExt = buf + len - namelen; + while ((dp = readdir(dirp)) != NULL) { + const int thislen = +#ifdef _DIRENT_HAVE_D_NAMELEN + dp->d_namlen; +#else + strlen(dp->d_name); +#endif + if (thislen >= namelen && + strcmp(dp->d_name, nameWithExt) == 0) { + (void)closedir(dirp); + return 1; /* Found */ + } + } + (void)closedir(dirp); + } + return 0 ; /* Not found */ + +/* OS/2 */ +#elif defined(PYOS_OS2) + HDIR hdir = 1; + ULONG srchcnt = 1; + FILEFINDBUF3 ffbuf; + APIRET rc; + + if (Py_GETENV("PYTHONCASEOK") != NULL) + return 1; + + rc = DosFindFirst(buf, + &hdir, + FILE_READONLY | FILE_HIDDEN | FILE_SYSTEM | FILE_DIRECTORY, + &ffbuf, sizeof(ffbuf), + &srchcnt, + FIL_STANDARD); + if (rc != NO_ERROR) + return 0; + return strncmp(ffbuf.achName, name, namelen) == 0; + +/* assuming it's a case-sensitive filesystem, so there's nothing to do! */ +#else +# error "USE_CASE_OK_BYTES is not correctly defined" +#endif +} +#endif + +/* + * Check if a filename case matchs the name case. We've already done a + * successful stat() or fopen() on buf, so know that there's some match, + * possibly case-insensitive. + * + * case_ok() is to return 1 if there's a case-sensitive match for name, 0 if it + * the filename doesn't match, or -1 on error. case_ok() is also to return 1 + * if envar PYTHONCASEOK exists. + * + * case_ok() is used to implement case-sensitive import semantics even + * on platforms with case-insensitive filesystems. It's trivial to implement + * for case-sensitive filesystems. It's pretty much a cross-platform + * nightmare for systems with case-insensitive filesystems. + */ + +static int +case_ok(PyObject *filename, Py_ssize_t prefix_delta, PyObject *name) +{ + /* assuming it's a case-sensitive filesystem, so there's nothing to do! */ + return 1; + +} + +#ifdef HAVE_STAT + +/* Helper to look for __init__.py or __init__.py[co] in potential package. + Return 1 if __init__ was found, 0 if not, or -1 on error. */ +static int +find_init_module(PyObject *directory) +{ + struct stat statbuf; + PyObject *filename; + int match; + + filename = PyUnicode_FromFormat("%U%c__init__.py", directory, SEP); + if (filename == NULL) + return -1; + if (_Py_stat(filename, &statbuf) == 0) { + /* 3=len(".py") */ + match = case_ok(filename, -3, initstr); + if (match < 0) { + Py_DECREF(filename); + return -1; + } + if (match) { + Py_DECREF(filename); + return 1; + } + } + Py_DECREF(filename); + + filename = PyUnicode_FromFormat("%U%c__init__.py%c", + directory, SEP, Py_OptimizeFlag ? 'o' : 'c'); + if (filename == NULL) + return -1; + if (_Py_stat(filename, &statbuf) == 0) { + /* 4=len(".pyc") */ + match = case_ok(filename, -4, initstr); + if (match < 0) { + Py_DECREF(filename); + return -1; + } + if (match) { + Py_DECREF(filename); + return 1; + } + } + Py_DECREF(filename); + return 0; +} + +#endif /* HAVE_STAT */ + + +static int init_builtin(PyObject *); /* Forward */ + +static PyObject* +load_builtin(PyObject *name, int type) +{ + PyObject *m, *modules; + int err; + + if (type == C_BUILTIN) + err = init_builtin(name); + else + err = PyImport_ImportFrozenModuleObject(name); + if (err < 0) + return NULL; + if (err == 0) { + PyErr_Format(PyExc_ImportError, + "Purported %s module %R not found", + type == C_BUILTIN ? "builtin" : "frozen", + name); + return NULL; + } + + modules = PyImport_GetModuleDict(); + m = PyDict_GetItem(modules, name); + if (m == NULL) { + PyErr_Format( + PyExc_ImportError, + "%s module %R not properly initialized", + type == C_BUILTIN ? "builtin" : "frozen", + name); + return NULL; + } + Py_INCREF(m); + return m; +} + +/* Load an external module using the default search path and return + its module object WITH INCREMENTED REFERENCE COUNT */ + +static PyObject * +load_module(PyObject *name, FILE *fp, PyObject *pathname, int type, PyObject *loader) +{ + PyObject *m; + + /* First check that there's an open file (if we need one) */ + switch (type) { + case PY_SOURCE: + case PY_COMPILED: + if (fp == NULL) { + PyErr_Format(PyExc_ValueError, + "file object required for import (type code %d)", + type); + return NULL; + } + } + + switch (type) { + + case PY_SOURCE: + m = load_source_module(name, pathname, fp); + break; + + case PY_COMPILED: + m = load_compiled_module(name, pathname, fp); + break; + +#ifdef HAVE_DYNAMIC_LOADING + case C_EXTENSION: + m = _PyImport_LoadDynamicModule(name, pathname, fp); + break; +#endif + + case PKG_DIRECTORY: + m = load_package(name, pathname); + break; + + case C_BUILTIN: + case PY_FROZEN: + m = load_builtin(name, type); + break; + + case IMP_HOOK: { + _Py_IDENTIFIER(load_module); + if (loader == NULL) { + PyErr_SetString(PyExc_ImportError, + "import hook without loader"); + return NULL; + } + m = _PyObject_CallMethodId(loader, &PyId_load_module, "O", name); + break; + } + + default: + PyErr_Format(PyExc_ImportError, + "Don't know how to import %R (type code %d)", + name, type); + m = NULL; + + } + + return m; +} + + +/* Initialize a built-in module. + Return 1 for success, 0 if the module is not found, and -1 with + an exception set if the initialization failed. */ + +static int +init_builtin(PyObject *name) +{ + struct _inittab *p; + + if (_PyImport_FindExtensionObject(name, name) != NULL) + return 1; + + for (p = PyImport_Inittab; p->name != NULL; p++) { + PyObject *mod; + if (PyUnicode_CompareWithASCIIString(name, p->name) == 0) { + if (p->initfunc == NULL) { + PyErr_Format(PyExc_ImportError, + "Cannot re-init internal module %R", + name); + return -1; + } + if (Py_VerboseFlag) + PySys_FormatStderr("import %U # builtin\n", name); + mod = (*p->initfunc)(); + if (mod == 0) + return -1; + if (_PyImport_FixupExtensionObject(mod, name, name) < 0) + return -1; + /* FixupExtension has put the module into sys.modules, + so we can release our own reference. */ + Py_DECREF(mod); + return 1; + } + } + return 0; +} + + +/* Frozen modules */ + +static struct _frozen * +find_frozen(PyObject *name) +{ + struct _frozen *p; + + if (name == NULL) + return NULL; + + for (p = PyImport_FrozenModules; ; p++) { + if (p->name == NULL) + return NULL; + if (PyUnicode_CompareWithASCIIString(name, p->name) == 0) + break; + } + return p; +} + +static PyObject * +get_frozen_object(PyObject *name) +{ + struct _frozen *p = find_frozen(name); + int size; + + if (p == NULL) { + PyErr_Format(PyExc_ImportError, + "No such frozen object named %R", + name); + return NULL; + } + if (p->code == NULL) { + PyErr_Format(PyExc_ImportError, + "Excluded frozen object named %R", + name); + return NULL; + } + size = p->size; + if (size < 0) + size = -size; + return PyMarshal_ReadObjectFromString((char *)p->code, size); +} + +static PyObject * +is_frozen_package(PyObject *name) +{ + struct _frozen *p = find_frozen(name); + int size; + + if (p == NULL) { + PyErr_Format(PyExc_ImportError, + "No such frozen object named %R", + name); + return NULL; + } + + size = p->size; + + if (size < 0) + Py_RETURN_TRUE; + else + Py_RETURN_FALSE; +} + + +/* Initialize a frozen module. + Return 1 for success, 0 if the module is not found, and -1 with + an exception set if the initialization failed. + This function is also used from frozenmain.c */ + +int +PyImport_ImportFrozenModuleObject(PyObject *name) +{ + struct _frozen *p; + PyObject *co, *m, *path; + int ispackage; + int size; + + p = find_frozen(name); + + if (p == NULL) + return 0; + if (p->code == NULL) { + PyErr_Format(PyExc_ImportError, + "Excluded frozen object named %R", + name); + return -1; + } + size = p->size; + ispackage = (size < 0); + if (ispackage) + size = -size; + if (Py_VerboseFlag) + PySys_FormatStderr("import %U # frozen%s\n", + name, ispackage ? " package" : ""); + co = PyMarshal_ReadObjectFromString((char *)p->code, size); + if (co == NULL) + return -1; + if (!PyCode_Check(co)) { + PyErr_Format(PyExc_TypeError, + "frozen object %R is not a code object", + name); + goto err_return; + } + if (ispackage) { + /* Set __path__ to the package name */ + PyObject *d, *l; + int err; + m = PyImport_AddModuleObject(name); + if (m == NULL) + goto err_return; + d = PyModule_GetDict(m); + l = PyList_New(1); + if (l == NULL) { + goto err_return; + } + Py_INCREF(name); + PyList_SET_ITEM(l, 0, name); + err = PyDict_SetItemString(d, "__path__", l); + Py_DECREF(l); + if (err != 0) + goto err_return; + } + path = PyUnicode_FromString(""); + if (path == NULL) + goto err_return; + m = PyImport_ExecCodeModuleObject(name, co, path, NULL); + Py_DECREF(path); + if (m == NULL) + goto err_return; + Py_DECREF(co); + Py_DECREF(m); + return 1; +err_return: + Py_DECREF(co); + return -1; +} + +int +PyImport_ImportFrozenModule(char *name) +{ + PyObject *nameobj; + int ret; + nameobj = PyUnicode_InternFromString(name); + if (nameobj == NULL) + return -1; + ret = PyImport_ImportFrozenModuleObject(nameobj); + Py_DECREF(nameobj); + return ret; +} + + +/* Import a module, either built-in, frozen, or external, and return + its module object WITH INCREMENTED REFERENCE COUNT */ + +PyObject * +PyImport_ImportModule(const char *name) +{ + PyObject *pname; + PyObject *result; + + pname = PyUnicode_FromString(name); + if (pname == NULL) + return NULL; + result = PyImport_Import(pname); + Py_DECREF(pname); + return result; +} + +/* Import a module without blocking + * + * At first it tries to fetch the module from sys.modules. If the module was + * never loaded before it loads it with PyImport_ImportModule() unless another + * thread holds the import lock. In the latter case the function raises an + * ImportError instead of blocking. + * + * Returns the module object with incremented ref count. + */ +PyObject * +PyImport_ImportModuleNoBlock(const char *name) +{ + PyObject *nameobj, *modules, *result; +#ifdef WITH_THREAD + long me; +#endif + + /* Try to get the module from sys.modules[name] */ + modules = PyImport_GetModuleDict(); + if (modules == NULL) + return NULL; + + nameobj = PyUnicode_FromString(name); + if (nameobj == NULL) + return NULL; + result = PyDict_GetItem(modules, nameobj); + if (result != NULL) { + Py_DECREF(nameobj); + Py_INCREF(result); + return result; + } + PyErr_Clear(); +#ifdef WITH_THREAD + /* check the import lock + * me might be -1 but I ignore the error here, the lock function + * takes care of the problem */ + me = PyThread_get_thread_ident(); + if (import_lock_thread == -1 || import_lock_thread == me) { + /* no thread or me is holding the lock */ + result = PyImport_Import(nameobj); + } + else { + PyErr_Format(PyExc_ImportError, + "Failed to import %R because the import lock" + "is held by another thread.", + nameobj); + result = NULL; + } +#else + result = PyImport_Import(nameobj); +#endif + Py_DECREF(nameobj); + return result; +} + +/* Forward declarations for helper routines */ +static PyObject *get_parent(PyObject *globals, + PyObject **p_name, + int level); +static PyObject *load_next(PyObject *mod, PyObject *altmod, + PyObject *inputname, PyObject **p_outputname, + PyObject **p_prefix); +static int mark_miss(PyObject *name); +static int ensure_fromlist(PyObject *mod, PyObject *fromlist, + PyObject *buf, int recursive); +static PyObject * import_submodule(PyObject *mod, PyObject *name, + PyObject *fullname); + +/* The Magnum Opus of dotted-name import :-) */ + +static PyObject * +import_module_level(PyObject *name, PyObject *globals, PyObject *locals, + PyObject *fromlist, int level) +{ + PyObject *parent, *next, *inputname, *outputname; + PyObject *head = NULL; + PyObject *tail = NULL; + PyObject *prefix = NULL; + PyObject *result = NULL; + Py_ssize_t sep, altsep; + + if (PyUnicode_READY(name)) + return NULL; + + sep = PyUnicode_FindChar(name, SEP, 0, PyUnicode_GET_LENGTH(name), 1); + if (sep == -2) + return NULL; +#ifdef ALTSEP + altsep = PyUnicode_FindChar(name, ALTSEP, 0, PyUnicode_GET_LENGTH(name), 1); + if (altsep == -2) + return NULL; +#else + altsep = -1; +#endif + if (sep != -1 || altsep != -1) + { + PyErr_SetString(PyExc_ImportError, + "Import by filename is not supported."); + return NULL; + } + + parent = get_parent(globals, &prefix, level); + if (parent == NULL) { + return NULL; + } + + if (PyUnicode_READY(prefix)) + return NULL; + + head = load_next(parent, level < 0 ? Py_None : parent, name, &outputname, + &prefix); + if (head == NULL) + goto out; + + tail = head; + Py_INCREF(tail); + + if (outputname != NULL) { + while (1) { + inputname = outputname; + next = load_next(tail, tail, inputname, &outputname, + &prefix); + Py_CLEAR(tail); + Py_CLEAR(inputname); + if (next == NULL) + goto out; + tail = next; + + if (outputname == NULL) { + break; + } + } + } + if (tail == Py_None) { + /* If tail is Py_None, both get_parent and load_next found + an empty module name: someone called __import__("") or + doctored faulty bytecode */ + PyErr_SetString(PyExc_ValueError, "Empty module name"); + goto out; + } + + if (fromlist != NULL) { + if (fromlist == Py_None || !PyObject_IsTrue(fromlist)) + fromlist = NULL; + } + + if (fromlist == NULL) { + result = head; + Py_INCREF(result); + goto out; + } + + if (!ensure_fromlist(tail, fromlist, prefix, 0)) + goto out; + + result = tail; + Py_INCREF(result); + out: + Py_XDECREF(head); + Py_XDECREF(tail); + Py_XDECREF(prefix); + return result; +} + +PyObject * +PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals, + PyObject *locals, PyObject *fromlist, + int level) +{ + PyObject *mod; + _PyImport_AcquireLock(); + mod = import_module_level(name, globals, locals, fromlist, level); + if (_PyImport_ReleaseLock() < 0) { + Py_XDECREF(mod); + PyErr_SetString(PyExc_RuntimeError, + "not holding the import lock"); + return NULL; + } + return mod; +} + +PyObject * +PyImport_ImportModuleLevel(const char *name, PyObject *globals, PyObject *locals, + PyObject *fromlist, int level) +{ + PyObject *nameobj, *mod; + nameobj = PyUnicode_FromString(name); + if (nameobj == NULL) + return NULL; + mod = PyImport_ImportModuleLevelObject(nameobj, globals, locals, + fromlist, level); + Py_DECREF(nameobj); + return mod; +} + + +/* Return the package that an import is being performed in. If globals comes + from the module foo.bar.bat (not itself a package), this returns the + sys.modules entry for foo.bar. If globals is from a package's __init__.py, + the package's entry in sys.modules is returned, as a borrowed reference. + + The name of the returned package is returned in *p_name. + + If globals doesn't come from a package or a module in a package, or a + corresponding entry is not found in sys.modules, Py_None is returned. +*/ +static PyObject * +get_parent(PyObject *globals, PyObject **p_name, int level) +{ + PyObject *nameobj; + + static PyObject *namestr = NULL; + static PyObject *pathstr = NULL; + static PyObject *pkgstr = NULL; + PyObject *pkgname, *modname, *modpath, *modules, *parent; + int orig_level = level; + + if (globals == NULL || !PyDict_Check(globals) || !level) + goto return_none; + + if (namestr == NULL) { + namestr = PyUnicode_InternFromString("__name__"); + if (namestr == NULL) + return NULL; + } + if (pathstr == NULL) { + pathstr = PyUnicode_InternFromString("__path__"); + if (pathstr == NULL) + return NULL; + } + if (pkgstr == NULL) { + pkgstr = PyUnicode_InternFromString("__package__"); + if (pkgstr == NULL) + return NULL; + } + + pkgname = PyDict_GetItem(globals, pkgstr); + + if ((pkgname != NULL) && (pkgname != Py_None)) { + /* __package__ is set, so use it */ + if (!PyUnicode_Check(pkgname)) { + PyErr_SetString(PyExc_ValueError, + "__package__ set to non-string"); + return NULL; + } + if (PyUnicode_GET_LENGTH(pkgname) == 0) { + if (level > 0) { + PyErr_SetString(PyExc_ValueError, + "Attempted relative import in non-package"); + return NULL; + } + goto return_none; + } + Py_INCREF(pkgname); + nameobj = pkgname; + } else { + /* __package__ not set, so figure it out and set it */ + modname = PyDict_GetItem(globals, namestr); + if (modname == NULL || !PyUnicode_Check(modname)) + goto return_none; + + modpath = PyDict_GetItem(globals, pathstr); + if (modpath != NULL) { + /* __path__ is set, so modname is already the package name */ + int error; + + error = PyDict_SetItem(globals, pkgstr, modname); + if (error) { + PyErr_SetString(PyExc_ValueError, + "Could not set __package__"); + return NULL; + } + Py_INCREF(modname); + nameobj = modname; + } else { + /* Normal module, so work out the package name if any */ + Py_ssize_t len; + len = PyUnicode_FindChar(modname, '.', + 0, PyUnicode_GET_LENGTH(modname), -1); + if (len == -2) + return NULL; + if (len < 0) { + if (level > 0) { + PyErr_SetString(PyExc_ValueError, + "Attempted relative import in non-package"); + return NULL; + } + if (PyDict_SetItem(globals, pkgstr, Py_None)) { + PyErr_SetString(PyExc_ValueError, + "Could not set __package__"); + return NULL; + } + goto return_none; + } + pkgname = PyUnicode_Substring(modname, 0, len); + if (pkgname == NULL) + return NULL; + if (PyDict_SetItem(globals, pkgstr, pkgname)) { + Py_DECREF(pkgname); + PyErr_SetString(PyExc_ValueError, + "Could not set __package__"); + return NULL; + } + nameobj = pkgname; + } + } + if (level > 1) { + Py_ssize_t dot, end = PyUnicode_GET_LENGTH(nameobj); + PyObject *newname; + while (--level > 0) { + dot = PyUnicode_FindChar(nameobj, '.', 0, end, -1); + if (dot == -2) { + Py_DECREF(nameobj); + return NULL; + } + if (dot < 0) { + Py_DECREF(nameobj); + PyErr_SetString(PyExc_ValueError, + "Attempted relative import beyond " + "toplevel package"); + return NULL; + } + end = dot; + } + newname = PyUnicode_Substring(nameobj, 0, end); + Py_DECREF(nameobj); + if (newname == NULL) + return NULL; + nameobj = newname; + } + + modules = PyImport_GetModuleDict(); + parent = PyDict_GetItem(modules, nameobj); + if (parent == NULL) { + int err; + + if (orig_level >= 1) { + PyErr_Format(PyExc_SystemError, + "Parent module %R not loaded, " + "cannot perform relative import", nameobj); + Py_DECREF(nameobj); + return NULL; + } + + err = PyErr_WarnFormat( + PyExc_RuntimeWarning, 1, + "Parent module %R not found while handling absolute import", + nameobj); + Py_DECREF(nameobj); + if (err) + return NULL; + + goto return_none; + } + *p_name = nameobj; + return parent; + /* We expect, but can't guarantee, if parent != None, that: + - parent.__name__ == name + - parent.__dict__ is globals + If this is violated... Who cares? */ + +return_none: + nameobj = PyUnicode_New(0, 0); + if (nameobj == NULL) + return NULL; + *p_name = nameobj; + return Py_None; +} + +/* altmod is either None or same as mod */ +static PyObject * +load_next(PyObject *mod, PyObject *altmod, + PyObject *inputname, PyObject **p_outputname, + PyObject **p_prefix) +{ + Py_ssize_t dot; + Py_ssize_t len; + PyObject *fullname, *name = NULL, *result; + + *p_outputname = NULL; + + len = PyUnicode_GET_LENGTH(inputname); + if (len == 0) { + /* completely empty module name should only happen in + 'from . import' (or '__import__("")')*/ + Py_INCREF(mod); + return mod; + } + + + dot = PyUnicode_FindChar(inputname, '.', 0, len, 1); + if (dot >= 0) { + len = dot; + if (len == 0) { + PyErr_SetString(PyExc_ValueError, + "Empty module name"); + goto error; + } + } + + /* name = inputname[:len] */ + name = PyUnicode_Substring(inputname, 0, len); + if (name == NULL) + goto error; + + if (PyUnicode_GET_LENGTH(*p_prefix)) { + /* fullname = prefix + "." + name */ + fullname = PyUnicode_FromFormat("%U.%U", *p_prefix, name); + if (fullname == NULL) + goto error; + } + else { + fullname = name; + Py_INCREF(fullname); + } + + result = import_submodule(mod, name, fullname); + Py_DECREF(*p_prefix); + /* Transfer reference. */ + *p_prefix = fullname; + if (result == Py_None && altmod != mod) { + Py_DECREF(result); + /* Here, altmod must be None and mod must not be None */ + result = import_submodule(altmod, name, name); + if (result != NULL && result != Py_None) { + if (mark_miss(*p_prefix) != 0) { + Py_DECREF(result); + goto error; + } + Py_DECREF(*p_prefix); + *p_prefix = name; + Py_INCREF(*p_prefix); + } + } + if (result == NULL) + goto error; + + if (result == Py_None) { + Py_DECREF(result); + PyErr_Format(PyExc_ImportError, + "No module named %R", inputname); + goto error; + } + + if (dot >= 0) { + *p_outputname = PyUnicode_Substring(inputname, dot+1, + PyUnicode_GET_LENGTH(inputname)); + if (*p_outputname == NULL) { + Py_DECREF(result); + goto error; + } + } + + Py_DECREF(name); + return result; + +error: + Py_XDECREF(name); + return NULL; +} + +static int +mark_miss(PyObject *name) +{ + PyObject *modules = PyImport_GetModuleDict(); + return PyDict_SetItem(modules, name, Py_None); +} + +static int +ensure_fromlist(PyObject *mod, PyObject *fromlist, PyObject *name, + int recursive) +{ + int i; + PyObject *fullname; + Py_ssize_t fromlist_len; + + if (!_PyObject_HasAttrId(mod, &PyId___path__)) + return 1; + + fromlist_len = PySequence_Size(fromlist); + + for (i = 0; i < fromlist_len; i++) { + PyObject *item = PySequence_GetItem(fromlist, i); + int hasit; + if (item == NULL) + return 0; + if (!PyUnicode_Check(item)) { + PyErr_SetString(PyExc_TypeError, + "Item in ``from list'' not a string"); + Py_DECREF(item); + return 0; + } + if (PyUnicode_READ_CHAR(item, 0) == '*') { + PyObject *all; + _Py_IDENTIFIER(__all__); + Py_DECREF(item); + /* See if the package defines __all__ */ + if (recursive) + continue; /* Avoid endless recursion */ + all = _PyObject_GetAttrId(mod, &PyId___all__); + if (all == NULL) + PyErr_Clear(); + else { + int ret = ensure_fromlist(mod, all, name, 1); + Py_DECREF(all); + if (!ret) + return 0; + } + continue; + } + hasit = PyObject_HasAttr(mod, item); + if (!hasit) { + PyObject *submod; + fullname = PyUnicode_FromFormat("%U.%U", name, item); + if (fullname != NULL) { + submod = import_submodule(mod, item, fullname); + Py_DECREF(fullname); + } + else + submod = NULL; + Py_XDECREF(submod); + if (submod == NULL) { + Py_DECREF(item); + return 0; + } + } + Py_DECREF(item); + } + + return 1; +} + +static int +add_submodule(PyObject *mod, PyObject *submod, PyObject *fullname, + PyObject *subname, PyObject *modules) +{ + if (mod == Py_None) + return 1; + /* Irrespective of the success of this load, make a + reference to it in the parent package module. A copy gets + saved in the modules dictionary under the full name, so get a + reference from there, if need be. (The exception is when the + load failed with a SyntaxError -- then there's no trace in + sys.modules. In that case, of course, do nothing extra.) */ + if (submod == NULL) { + submod = PyDict_GetItem(modules, fullname); + if (submod == NULL) + return 1; + } + if (PyModule_Check(mod)) { + /* We can't use setattr here since it can give a + * spurious warning if the submodule name shadows a + * builtin name */ + PyObject *dict = PyModule_GetDict(mod); + if (!dict) + return 0; + if (PyDict_SetItem(dict, subname, submod) < 0) + return 0; + } + else { + if (PyObject_SetAttr(mod, subname, submod) < 0) + return 0; + } + return 1; +} + +static PyObject * +import_submodule(PyObject *mod, PyObject *subname, PyObject *fullname) +{ + PyObject *modules = PyImport_GetModuleDict(); + PyObject *m = NULL, *bufobj, *path_list, *loader; + struct filedescr *fdp; + FILE *fp; + + /* Require: + if mod == None: subname == fullname + else: mod.__name__ + "." + subname == fullname + */ + + if ((m = PyDict_GetItem(modules, fullname)) != NULL) { + Py_INCREF(m); + return m; + } + + if (mod == Py_None) + path_list = NULL; + else { + path_list = _PyObject_GetAttrId(mod, &PyId___path__); + if (path_list == NULL) { + PyErr_Clear(); + Py_INCREF(Py_None); + return Py_None; + } + } + + fdp = find_module(fullname, subname, path_list, + &bufobj, &fp, &loader); + Py_XDECREF(path_list); + if (fdp == NULL) { + if (!PyErr_ExceptionMatches(PyExc_ImportError)) + return NULL; + PyErr_Clear(); + Py_INCREF(Py_None); + return Py_None; + } + m = load_module(fullname, fp, bufobj, fdp->type, loader); + Py_XDECREF(bufobj); + Py_XDECREF(loader); + if (fp) + fclose(fp); + if (m == NULL) + return NULL; + if (!add_submodule(mod, m, fullname, subname, modules)) { + Py_XDECREF(m); + return NULL; + } + return m; +} + + +/* Re-import a module of any kind and return its module object, WITH + INCREMENTED REFERENCE COUNT */ + +PyObject * +PyImport_ReloadModule(PyObject *m) +{ + PyInterpreterState *interp = PyThreadState_Get()->interp; + PyObject *modules_reloading = interp->modules_reloading; + PyObject *modules = PyImport_GetModuleDict(); + PyObject *path_list = NULL, *loader = NULL, *existing_m = NULL; + PyObject *name, *bufobj, *subname; + Py_ssize_t subname_start; + struct filedescr *fdp; + FILE *fp = NULL; + PyObject *newm = NULL; + + if (modules_reloading == NULL) { + Py_FatalError("PyImport_ReloadModule: " + "no modules_reloading dictionary!"); + return NULL; + } + + if (m == NULL || !PyModule_Check(m)) { + PyErr_SetString(PyExc_TypeError, + "reload() argument must be module"); + return NULL; + } + name = PyModule_GetNameObject(m); + if (name == NULL || PyUnicode_READY(name) == -1) + return NULL; + if (m != PyDict_GetItem(modules, name)) { + PyErr_Format(PyExc_ImportError, + "reload(): module %R not in sys.modules", + name); + Py_DECREF(name); + return NULL; + } + existing_m = PyDict_GetItem(modules_reloading, name); + if (existing_m != NULL) { + /* Due to a recursive reload, this module is already + being reloaded. */ + Py_DECREF(name); + Py_INCREF(existing_m); + return existing_m; + } + if (PyDict_SetItem(modules_reloading, name, m) < 0) { + Py_DECREF(name); + return NULL; + } + + subname_start = PyUnicode_FindChar(name, '.', 0, + PyUnicode_GET_LENGTH(name), -1); + if (subname_start == -1) { + Py_INCREF(name); + subname = name; + } + else { + PyObject *parentname, *parent; + Py_ssize_t len; + parentname = PyUnicode_Substring(name, 0, subname_start); + if (parentname == NULL) { + goto error; + } + parent = PyDict_GetItem(modules, parentname); + if (parent == NULL) { + PyErr_Format(PyExc_ImportError, + "reload(): parent %R not in sys.modules", + parentname); + Py_DECREF(parentname); + goto error; + } + Py_DECREF(parentname); + path_list = _PyObject_GetAttrId(parent, &PyId___path__); + if (path_list == NULL) + PyErr_Clear(); + subname_start++; + len = PyUnicode_GET_LENGTH(name) - (subname_start + 1); + subname = PyUnicode_Substring(name, subname_start, len); + } + if (subname == NULL) + goto error; + fdp = find_module(name, subname, path_list, + &bufobj, &fp, &loader); + Py_DECREF(subname); + Py_XDECREF(path_list); + + if (fdp == NULL) { + Py_XDECREF(loader); + goto error; + } + + newm = load_module(name, fp, bufobj, fdp->type, loader); + Py_XDECREF(bufobj); + Py_XDECREF(loader); + + if (fp) + fclose(fp); + if (newm == NULL) { + /* load_module probably removed name from modules because of + * the error. Put back the original module object. We're + * going to return NULL in this case regardless of whether + * replacing name succeeds, so the return value is ignored. + */ + PyDict_SetItem(modules, name, m); + } + +error: + imp_modules_reloading_clear(); + Py_DECREF(name); + return newm; +} + + +/* Higher-level import emulator which emulates the "import" statement + more accurately -- it invokes the __import__() function from the + builtins of the current globals. This means that the import is + done using whatever import hooks are installed in the current + environment. + A dummy list ["__doc__"] is passed as the 4th argument so that + e.g. PyImport_Import(PyUnicode_FromString("win32com.client.gencache")) + will return instead of . */ + +PyObject * +PyImport_Import(PyObject *module_name) +{ + static PyObject *silly_list = NULL; + static PyObject *builtins_str = NULL; + static PyObject *import_str = NULL; + PyObject *globals = NULL; + PyObject *import = NULL; + PyObject *builtins = NULL; + PyObject *modules = NULL; + PyObject *r = NULL; + + /* Initialize constant string objects */ + if (silly_list == NULL) { + import_str = PyUnicode_InternFromString("__import__"); + if (import_str == NULL) + return NULL; + builtins_str = PyUnicode_InternFromString("__builtins__"); + if (builtins_str == NULL) + return NULL; + silly_list = PyList_New(0); + if (silly_list == NULL) + return NULL; + } + + /* Get the builtins from current globals */ + globals = PyEval_GetGlobals(); + if (globals != NULL) { + Py_INCREF(globals); + builtins = PyObject_GetItem(globals, builtins_str); + if (builtins == NULL) + goto err; + } + else { + /* No globals -- use standard builtins, and fake globals */ + builtins = PyImport_ImportModuleLevel("builtins", + NULL, NULL, NULL, 0); + if (builtins == NULL) + return NULL; + globals = Py_BuildValue("{OO}", builtins_str, builtins); + if (globals == NULL) + goto err; + } + + /* Get the __import__ function from the builtins */ + if (PyDict_Check(builtins)) { + import = PyObject_GetItem(builtins, import_str); + if (import == NULL) + PyErr_SetObject(PyExc_KeyError, import_str); + } + else + import = PyObject_GetAttr(builtins, import_str); + if (import == NULL) + goto err; + + /* Call the __import__ function with the proper argument list + Always use absolute import here. + Calling for side-effect of import. */ + r = PyObject_CallFunction(import, "OOOOi", module_name, globals, + globals, silly_list, 0, NULL); + if (r == NULL) + goto err; + Py_DECREF(r); + + modules = PyImport_GetModuleDict(); + r = PyDict_GetItem(modules, module_name); + if (r != NULL) + Py_INCREF(r); + + err: + Py_XDECREF(globals); + Py_XDECREF(builtins); + Py_XDECREF(import); + + return r; +} + + +/* Module 'imp' provides Python access to the primitives used for + importing modules. +*/ + +static PyObject * +imp_make_magic(long magic) +{ + char buf[4]; + + buf[0] = (char) ((magic >> 0) & 0xff); + buf[1] = (char) ((magic >> 8) & 0xff); + buf[2] = (char) ((magic >> 16) & 0xff); + buf[3] = (char) ((magic >> 24) & 0xff); + + return PyBytes_FromStringAndSize(buf, 4); +} + +static PyObject * +imp_get_magic(PyObject *self, PyObject *noargs) +{ + return imp_make_magic(pyc_magic); +} + +static PyObject * +imp_get_tag(PyObject *self, PyObject *noargs) +{ + return PyUnicode_FromString(pyc_tag); +} + +static PyObject * +imp_get_suffixes(PyObject *self, PyObject *noargs) +{ + PyObject *list; + struct filedescr *fdp; + + list = PyList_New(0); + if (list == NULL) + return NULL; + for (fdp = _PyImport_Filetab; fdp->suffix != NULL; fdp++) { + PyObject *item = Py_BuildValue("ssi", + fdp->suffix, fdp->mode, fdp->type); + if (item == NULL) { + Py_DECREF(list); + return NULL; + } + if (PyList_Append(list, item) < 0) { + Py_DECREF(list); + Py_DECREF(item); + return NULL; + } + Py_DECREF(item); + } + return list; +} + +static PyObject * +call_find_module(PyObject *name, PyObject *path_list) +{ + extern int fclose(FILE *); + PyObject *fob, *ret; + PyObject *pathobj; + struct filedescr *fdp; + FILE *fp; + int fd = -1; + char *found_encoding = NULL; + char *encoding = NULL; + + if (path_list == Py_None) + path_list = NULL; + fdp = find_module(NULL, name, path_list, + &pathobj, &fp, NULL); + if (fdp == NULL) + return NULL; + if (fp != NULL) { + fd = fileno(fp); + if (fd != -1) + fd = dup(fd); + fclose(fp); + if (fd == -1) { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + fp = NULL; + } + if (fd != -1) { + if (strchr(fdp->mode, 'b') == NULL) { + /* PyTokenizer_FindEncodingFilename() returns PyMem_MALLOC'ed + memory. */ + found_encoding = PyTokenizer_FindEncodingFilename(fd, pathobj); + lseek(fd, 0, 0); /* Reset position */ + if (found_encoding == NULL && PyErr_Occurred()) { + Py_XDECREF(pathobj); + return NULL; + } + encoding = (found_encoding != NULL) ? found_encoding : + (char*)PyUnicode_GetDefaultEncoding(); + } + fob = PyFile_FromFd(fd, NULL, fdp->mode, -1, + (char*)encoding, NULL, NULL, 1); + if (fob == NULL) { + Py_XDECREF(pathobj); + close(fd); + PyMem_FREE(found_encoding); + return NULL; + } + } + else { + fob = Py_None; + Py_INCREF(fob); + } + if (pathobj == NULL) { + Py_INCREF(Py_None); + pathobj = Py_None; + } + ret = Py_BuildValue("NN(ssi)", + fob, pathobj, fdp->suffix, fdp->mode, fdp->type); + PyMem_FREE(found_encoding); + + return ret; +} + +static PyObject * +imp_find_module(PyObject *self, PyObject *args) +{ + PyObject *name, *path_list = NULL; + if (!PyArg_ParseTuple(args, "U|O:find_module", + &name, &path_list)) + return NULL; + return call_find_module(name, path_list); +} + +static PyObject * +imp_init_builtin(PyObject *self, PyObject *args) +{ + PyObject *name; + int ret; + PyObject *m; + if (!PyArg_ParseTuple(args, "U:init_builtin", &name)) + return NULL; + ret = init_builtin(name); + if (ret < 0) + return NULL; + if (ret == 0) { + Py_INCREF(Py_None); + return Py_None; + } + m = PyImport_AddModuleObject(name); + Py_XINCREF(m); + return m; +} + +static PyObject * +imp_init_frozen(PyObject *self, PyObject *args) +{ + PyObject *name; + int ret; + PyObject *m; + if (!PyArg_ParseTuple(args, "U:init_frozen", &name)) + return NULL; + ret = PyImport_ImportFrozenModuleObject(name); + if (ret < 0) + return NULL; + if (ret == 0) { + Py_INCREF(Py_None); + return Py_None; + } + m = PyImport_AddModuleObject(name); + Py_XINCREF(m); + return m; +} + +static PyObject * +imp_get_frozen_object(PyObject *self, PyObject *args) +{ + PyObject *name; + + if (!PyArg_ParseTuple(args, "U:get_frozen_object", &name)) + return NULL; + return get_frozen_object(name); +} + +static PyObject * +imp_is_frozen_package(PyObject *self, PyObject *args) +{ + PyObject *name; + + if (!PyArg_ParseTuple(args, "U:is_frozen_package", &name)) + return NULL; + return is_frozen_package(name); +} + +static PyObject * +imp_is_builtin(PyObject *self, PyObject *args) +{ + PyObject *name; + if (!PyArg_ParseTuple(args, "U:is_builtin", &name)) + return NULL; + return PyLong_FromLong(is_builtin(name)); +} + +static PyObject * +imp_is_frozen(PyObject *self, PyObject *args) +{ + PyObject *name; + struct _frozen *p; + if (!PyArg_ParseTuple(args, "U:is_frozen", &name)) + return NULL; + p = find_frozen(name); + return PyBool_FromLong((long) (p == NULL ? 0 : p->size)); +} + +static FILE * +get_file(PyObject *pathname, PyObject *fob, char *mode) +{ + FILE *fp; + if (mode[0] == 'U') + mode = "r" PY_STDIOTEXTMODE; + if (fob == NULL) { + fp = _Py_fopen(pathname, mode); + } + else { + int fd = PyObject_AsFileDescriptor(fob); + if (fd == -1) + return NULL; + if (!_PyVerify_fd(fd)) + goto error; + /* the FILE struct gets a new fd, so that it can be closed + * independently of the file descriptor given + */ + fd = dup(fd); + if (fd == -1) + goto error; + fp = fdopen(fd, mode); + } + if (fp) + return fp; +error: + PyErr_SetFromErrno(PyExc_IOError); + return NULL; +} + +static PyObject * +imp_load_compiled(PyObject *self, PyObject *args) +{ + PyObject *name, *pathname; + PyObject *fob = NULL; + PyObject *m; + FILE *fp; + if (!PyArg_ParseTuple(args, "UO&|O:load_compiled", + &name, + PyUnicode_FSDecoder, &pathname, + &fob)) + return NULL; + fp = get_file(pathname, fob, "rb"); + if (fp == NULL) { + Py_DECREF(pathname); + return NULL; + } + m = load_compiled_module(name, pathname, fp); + fclose(fp); + Py_DECREF(pathname); + return m; +} + +#ifdef HAVE_DYNAMIC_LOADING + +static PyObject * +imp_load_dynamic(PyObject *self, PyObject *args) +{ + PyObject *name, *pathname, *fob = NULL, *mod; + FILE *fp; + + if (!PyArg_ParseTuple(args, "UO&|O:load_dynamic", + &name, PyUnicode_FSDecoder, &pathname, &fob)) + return NULL; + if (fob != NULL) { + fp = get_file(NULL, fob, "r"); + if (fp == NULL) { + Py_DECREF(pathname); + return NULL; + } + } + else + fp = NULL; + mod = _PyImport_LoadDynamicModule(name, pathname, fp); + Py_DECREF(pathname); + if (fp) + fclose(fp); + return mod; +} + +#endif /* HAVE_DYNAMIC_LOADING */ + +static PyObject * +imp_load_source(PyObject *self, PyObject *args) +{ + PyObject *name, *pathname; + PyObject *fob = NULL; + PyObject *m; + FILE *fp; + if (!PyArg_ParseTuple(args, "UO&|O:load_source", + &name, + PyUnicode_FSDecoder, &pathname, + &fob)) + return NULL; + fp = get_file(pathname, fob, "r"); + if (fp == NULL) { + Py_DECREF(pathname); + return NULL; + } + m = load_source_module(name, pathname, fp); + Py_DECREF(pathname); + fclose(fp); + return m; +} + +static PyObject * +imp_load_module(PyObject *self, PyObject *args) +{ + PyObject *name, *fob, *pathname, *pathname_obj, *ret; + char *suffix; /* Unused */ + char *mode; + int type; + FILE *fp; + + if (!PyArg_ParseTuple(args, "UOO(ssi):load_module", + &name, &fob, &pathname_obj, &suffix, &mode, &type)) + return NULL; + if (pathname_obj != Py_None) { + if (!PyUnicode_FSDecoder(pathname_obj, &pathname)) + return NULL; + } + else + pathname = NULL; + + if (*mode) { + /* Mode must start with 'r' or 'U' and must not contain '+'. + Implicit in this test is the assumption that the mode + may contain other modifiers like 'b' or 't'. */ + + if (!(*mode == 'r' || *mode == 'U') || strchr(mode, '+')) { + PyErr_Format(PyExc_ValueError, + "invalid file open mode %.200s", mode); + Py_XDECREF(pathname); + return NULL; + } + } + if (fob == Py_None) + fp = NULL; + else { + fp = get_file(NULL, fob, mode); + if (fp == NULL) { + Py_XDECREF(pathname); + return NULL; + } + } + ret = load_module(name, fp, pathname, type, NULL); + Py_XDECREF(pathname); + if (fp) + fclose(fp); + return ret; +} + +static PyObject * +imp_load_package(PyObject *self, PyObject *args) +{ + PyObject *name, *pathname; + PyObject * ret; + if (!PyArg_ParseTuple(args, "UO&:load_package", + &name, PyUnicode_FSDecoder, &pathname)) + return NULL; + ret = load_package(name, pathname); + Py_DECREF(pathname); + return ret; +} + +static PyObject * +imp_new_module(PyObject *self, PyObject *args) +{ + PyObject *name; + if (!PyArg_ParseTuple(args, "U:new_module", &name)) + return NULL; + return PyModule_NewObject(name); +} + +static PyObject * +imp_reload(PyObject *self, PyObject *v) +{ + return PyImport_ReloadModule(v); +} + +PyDoc_STRVAR(doc_reload, +"reload(module) -> module\n\ +\n\ +Reload the module. The module must have been successfully imported before."); + +static PyObject * +imp_cache_from_source(PyObject *self, PyObject *args, PyObject *kws) +{ + static char *kwlist[] = {"path", "debug_override", NULL}; + + PyObject *pathname, *cpathname; + PyObject *debug_override = NULL; + int debug = !Py_OptimizeFlag; + + if (!PyArg_ParseTupleAndKeywords( + args, kws, "O&|O", kwlist, + PyUnicode_FSDecoder, &pathname, &debug_override)) + return NULL; + + if (debug_override != NULL && + (debug = PyObject_IsTrue(debug_override)) < 0) { + Py_DECREF(pathname); + return NULL; + } + + if (PyUnicode_READY(pathname) < 0) + return NULL; + + cpathname = make_compiled_pathname(pathname, debug); + Py_DECREF(pathname); + + if (cpathname == NULL) { + PyErr_Format(PyExc_SystemError, "path buffer too short"); + return NULL; + } + return cpathname; +} + +PyDoc_STRVAR(doc_cache_from_source, +"cache_from_source(path, [debug_override]) -> path\n\ +Given the path to a .py file, return the path to its .pyc/.pyo file.\n\ +\n\ +The .py file does not need to exist; this simply returns the path to the\n\ +.pyc/.pyo file calculated as if the .py file were imported. The extension\n\ +will be .pyc unless __debug__ is not defined, then it will be .pyo.\n\ +\n\ +If debug_override is not None, then it must be a boolean and is taken as\n\ +the value of __debug__ instead."); + +static PyObject * +imp_source_from_cache(PyObject *self, PyObject *args, PyObject *kws) +{ + static char *kwlist[] = {"path", NULL}; + PyObject *pathname, *source; + + if (!PyArg_ParseTupleAndKeywords( + args, kws, "O&", kwlist, + PyUnicode_FSDecoder, &pathname)) + return NULL; + + source = make_source_pathname(pathname); + if (source == NULL) { + PyErr_Format(PyExc_ValueError, "Not a PEP 3147 pyc path: %R", + pathname); + Py_DECREF(pathname); + return NULL; + } + Py_DECREF(pathname); + return source; +} + +PyDoc_STRVAR(doc_source_from_cache, +"source_from_cache(path) -> path\n\ +Given the path to a .pyc./.pyo file, return the path to its .py file.\n\ +\n\ +The .pyc/.pyo file does not need to exist; this simply returns the path to\n\ +the .py file calculated to correspond to the .pyc/.pyo file. If path\n\ +does not conform to PEP 3147 format, ValueError will be raised."); + +/* Doc strings */ + +PyDoc_STRVAR(doc_imp, +"This module provides the components needed to build your own\n\ +__import__ function. Undocumented functions are obsolete."); + +PyDoc_STRVAR(doc_find_module, +"find_module(name, [path]) -> (file, filename, (suffix, mode, type))\n\ +Search for a module. If path is omitted or None, search for a\n\ +built-in, frozen or special module and continue search in sys.path.\n\ +The module name cannot contain '.'; to search for a submodule of a\n\ +package, pass the submodule name and the package's __path__."); + +PyDoc_STRVAR(doc_load_module, +"load_module(name, file, filename, (suffix, mode, type)) -> module\n\ +Load a module, given information returned by find_module().\n\ +The module name must include the full package name, if any."); + +PyDoc_STRVAR(doc_get_magic, +"get_magic() -> string\n\ +Return the magic number for .pyc or .pyo files."); + +PyDoc_STRVAR(doc_get_tag, +"get_tag() -> string\n\ +Return the magic tag for .pyc or .pyo files."); + +PyDoc_STRVAR(doc_get_suffixes, +"get_suffixes() -> [(suffix, mode, type), ...]\n\ +Return a list of (suffix, mode, type) tuples describing the files\n\ +that find_module() looks for."); + +PyDoc_STRVAR(doc_new_module, +"new_module(name) -> module\n\ +Create a new module. Do not enter it in sys.modules.\n\ +The module name must include the full package name, if any."); + +PyDoc_STRVAR(doc_lock_held, +"lock_held() -> boolean\n\ +Return True if the import lock is currently held, else False.\n\ +On platforms without threads, return False."); + +PyDoc_STRVAR(doc_acquire_lock, +"acquire_lock() -> None\n\ +Acquires the interpreter's import lock for the current thread.\n\ +This lock should be used by import hooks to ensure thread-safety\n\ +when importing modules.\n\ +On platforms without threads, this function does nothing."); + +PyDoc_STRVAR(doc_release_lock, +"release_lock() -> None\n\ +Release the interpreter's import lock.\n\ +On platforms without threads, this function does nothing."); + +static PyMethodDef imp_methods[] = { + {"find_module", imp_find_module, METH_VARARGS, doc_find_module}, + {"get_magic", imp_get_magic, METH_NOARGS, doc_get_magic}, + {"get_tag", imp_get_tag, METH_NOARGS, doc_get_tag}, + {"get_suffixes", imp_get_suffixes, METH_NOARGS, doc_get_suffixes}, + {"load_module", imp_load_module, METH_VARARGS, doc_load_module}, + {"new_module", imp_new_module, METH_VARARGS, doc_new_module}, + {"lock_held", imp_lock_held, METH_NOARGS, doc_lock_held}, + {"acquire_lock", imp_acquire_lock, METH_NOARGS, doc_acquire_lock}, + {"release_lock", imp_release_lock, METH_NOARGS, doc_release_lock}, + {"reload", imp_reload, METH_O, doc_reload}, + {"cache_from_source", (PyCFunction)imp_cache_from_source, + METH_VARARGS | METH_KEYWORDS, doc_cache_from_source}, + {"source_from_cache", (PyCFunction)imp_source_from_cache, + METH_VARARGS | METH_KEYWORDS, doc_source_from_cache}, + /* The rest are obsolete */ + {"get_frozen_object", imp_get_frozen_object, METH_VARARGS}, + {"is_frozen_package", imp_is_frozen_package, METH_VARARGS}, + {"init_builtin", imp_init_builtin, METH_VARARGS}, + {"init_frozen", imp_init_frozen, METH_VARARGS}, + {"is_builtin", imp_is_builtin, METH_VARARGS}, + {"is_frozen", imp_is_frozen, METH_VARARGS}, + {"load_compiled", imp_load_compiled, METH_VARARGS}, +#ifdef HAVE_DYNAMIC_LOADING + {"load_dynamic", imp_load_dynamic, METH_VARARGS}, +#endif + {"load_package", imp_load_package, METH_VARARGS}, + {"load_source", imp_load_source, METH_VARARGS}, + {"_fix_co_filename", imp_fix_co_filename, METH_VARARGS}, + {NULL, NULL} /* sentinel */ +}; + +static int +setint(PyObject *d, char *name, int value) +{ + PyObject *v; + int err; + + v = PyLong_FromLong((long)value); + err = PyDict_SetItemString(d, name, v); + Py_XDECREF(v); + return err; +} + +typedef struct { + PyObject_HEAD +} NullImporter; + +static int +NullImporter_init(NullImporter *self, PyObject *args, PyObject *kwds) +{ +#ifndef MS_WINDOWS + PyObject *path; + struct stat statbuf; + int rv; + + if (!_PyArg_NoKeywords("NullImporter()", kwds)) + return -1; + + if (!PyArg_ParseTuple(args, "O&:NullImporter", + PyUnicode_FSConverter, &path)) + return -1; + + if (PyBytes_GET_SIZE(path) == 0) { + Py_DECREF(path); + PyErr_SetString(PyExc_ImportError, "empty pathname"); + return -1; + } + + rv = stat(PyBytes_AS_STRING(path), &statbuf); + Py_DECREF(path); + if (rv == 0) { + /* it exists */ + if (S_ISDIR(statbuf.st_mode)) { + /* it's a directory */ + PyErr_SetString(PyExc_ImportError, "existing directory"); + return -1; + } + } +#else /* MS_WINDOWS */ + PyObject *pathobj; + DWORD rv; + wchar_t *path; + + if (!_PyArg_NoKeywords("NullImporter()", kwds)) + return -1; + + if (!PyArg_ParseTuple(args, "U:NullImporter", + &pathobj)) + return -1; + + if (PyUnicode_GET_LENGTH(pathobj) == 0) { + PyErr_SetString(PyExc_ImportError, "empty pathname"); + return -1; + } + + path = PyUnicode_AsWideCharString(pathobj, NULL); + if (path == NULL) + return -1; + /* see issue1293 and issue3677: + * stat() on Windows doesn't recognise paths like + * "e:\\shared\\" and "\\\\whiterab-c2znlh\\shared" as dirs. + */ + rv = GetFileAttributesW(path); + PyMem_Free(path); + if (rv != INVALID_FILE_ATTRIBUTES) { + /* it exists */ + if (rv & FILE_ATTRIBUTE_DIRECTORY) { + /* it's a directory */ + PyErr_SetString(PyExc_ImportError, "existing directory"); + return -1; + } + } +#endif + return 0; +} + +static PyObject * +NullImporter_find_module(NullImporter *self, PyObject *args) +{ + Py_RETURN_NONE; +} + +static PyMethodDef NullImporter_methods[] = { + {"find_module", (PyCFunction)NullImporter_find_module, METH_VARARGS, + "Always return None" + }, + {NULL} /* Sentinel */ +}; + + +PyTypeObject PyNullImporter_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + "imp.NullImporter", /*tp_name*/ + sizeof(NullImporter), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + "Null importer object", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + NullImporter_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)NullImporter_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew /* tp_new */ +}; + +static struct PyModuleDef impmodule = { + PyModuleDef_HEAD_INIT, + "imp", + doc_imp, + 0, + imp_methods, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC +PyInit_imp(void) +{ + PyObject *m, *d; + + if (PyType_Ready(&PyNullImporter_Type) < 0) + return NULL; + + m = PyModule_Create(&impmodule); + if (m == NULL) + goto failure; + d = PyModule_GetDict(m); + if (d == NULL) + goto failure; + + if (setint(d, "SEARCH_ERROR", SEARCH_ERROR) < 0) goto failure; + if (setint(d, "PY_SOURCE", PY_SOURCE) < 0) goto failure; + if (setint(d, "PY_COMPILED", PY_COMPILED) < 0) goto failure; + if (setint(d, "C_EXTENSION", C_EXTENSION) < 0) goto failure; + if (setint(d, "PY_RESOURCE", PY_RESOURCE) < 0) goto failure; + if (setint(d, "PKG_DIRECTORY", PKG_DIRECTORY) < 0) goto failure; + if (setint(d, "C_BUILTIN", C_BUILTIN) < 0) goto failure; + if (setint(d, "PY_FROZEN", PY_FROZEN) < 0) goto failure; + if (setint(d, "PY_CODERESOURCE", PY_CODERESOURCE) < 0) goto failure; + if (setint(d, "IMP_HOOK", IMP_HOOK) < 0) goto failure; + + Py_INCREF(&PyNullImporter_Type); + PyModule_AddObject(m, "NullImporter", (PyObject *)&PyNullImporter_Type); + return m; + failure: + Py_XDECREF(m); + return NULL; +} + + +/* API for embedding applications that want to add their own entries + to the table of built-in modules. This should normally be called + *before* Py_Initialize(). When the table resize fails, -1 is + returned and the existing table is unchanged. + + After a similar function by Just van Rossum. */ + +int +PyImport_ExtendInittab(struct _inittab *newtab) +{ + static struct _inittab *our_copy = NULL; + struct _inittab *p; + int i, n; + + /* Count the number of entries in both tables */ + for (n = 0; newtab[n].name != NULL; n++) + ; + if (n == 0) + return 0; /* Nothing to do */ + for (i = 0; PyImport_Inittab[i].name != NULL; i++) + ; + + /* Allocate new memory for the combined table */ + p = our_copy; + PyMem_RESIZE(p, struct _inittab, i+n+1); + if (p == NULL) + return -1; + + /* Copy the tables into the new memory */ + if (our_copy != PyImport_Inittab) + memcpy(p, PyImport_Inittab, (i+1) * sizeof(struct _inittab)); + PyImport_Inittab = our_copy = p; + memcpy(p+i, newtab, (n+1) * sizeof(struct _inittab)); + + return 0; +} + +/* Shorthand to add a single entry given a name and a function */ + +int +PyImport_AppendInittab(const char *name, PyObject* (*initfunc)(void)) +{ + struct _inittab newtab[2]; + + memset(newtab, '\0', sizeof newtab); + + newtab[0].name = (char *)name; + newtab[0].initfunc = initfunc; + + return PyImport_ExtendInittab(newtab); +} + diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Python/pystate.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Python/pystate.c Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,729 @@ + +/* Thread and interpreter state structures and their interfaces */ + +#include "Python.h" + +/* -------------------------------------------------------------------------- +CAUTION + +Always use malloc() and free() directly in this file. A number of these +functions are advertised as safe to call when the GIL isn't held, and in +a debug build Python redirects (e.g.) PyMem_NEW (etc) to Python's debugging +obmalloc functions. Those aren't thread-safe (they rely on the GIL to avoid +the expense of doing their own locking). +-------------------------------------------------------------------------- */ + +#ifdef HAVE_DLOPEN +#ifdef HAVE_DLFCN_H +#include +#endif +#ifndef RTLD_LAZY +#define RTLD_LAZY 1 +#endif +#endif + + +#ifdef WITH_THREAD +#include "pythread.h" +static PyThread_type_lock head_mutex = NULL; /* Protects interp->tstate_head */ +#define HEAD_INIT() (void)(head_mutex || (head_mutex = PyThread_allocate_lock())) +#define HEAD_LOCK() PyThread_acquire_lock(head_mutex, WAIT_LOCK) +#define HEAD_UNLOCK() PyThread_release_lock(head_mutex) + + +/* The single PyInterpreterState used by this process' + GILState implementation +*/ +static PyInterpreterState *autoInterpreterState = NULL; +static int autoTLSkey = 0; +#else +#define HEAD_INIT() /* Nothing */ +#define HEAD_LOCK() /* Nothing */ +#define HEAD_UNLOCK() /* Nothing */ +#endif + +static PyInterpreterState *interp_head = NULL; + +/* Assuming the current thread holds the GIL, this is the + PyThreadState for the current thread. */ +_Py_atomic_address _PyThreadState_Current = {NULL}; +PyThreadFrameGetter _PyThreadState_GetFrame = NULL; + +#ifdef WITH_THREAD +static void _PyGILState_NoteThreadState(PyThreadState* tstate); +#endif + + +PyInterpreterState * +PyInterpreterState_New(void) +{ + PyInterpreterState *interp = (PyInterpreterState *) + malloc(sizeof(PyInterpreterState)); + + if (interp != NULL) { + HEAD_INIT(); +#ifdef WITH_THREAD + if (head_mutex == NULL) + Py_FatalError("Can't initialize threads for interpreter"); +#endif + interp->modules = NULL; + interp->modules_reloading = NULL; + interp->modules_by_index = NULL; + interp->sysdict = NULL; + interp->builtins = NULL; + interp->tstate_head = NULL; + interp->codec_search_path = NULL; + interp->codec_search_cache = NULL; + interp->codec_error_registry = NULL; + interp->codecs_initialized = 0; + interp->fscodec_initialized = 0; +#ifdef HAVE_DLOPEN +#ifdef RTLD_NOW + interp->dlopenflags = RTLD_NOW; +#else + interp->dlopenflags = RTLD_LAZY; +#endif +#endif +#ifdef WITH_TSC + interp->tscdump = 0; +#endif + + HEAD_LOCK(); + interp->next = interp_head; + interp_head = interp; + HEAD_UNLOCK(); + } + + return interp; +} + + +void +PyInterpreterState_Clear(PyInterpreterState *interp) +{ + PyThreadState *p; + HEAD_LOCK(); + for (p = interp->tstate_head; p != NULL; p = p->next) + PyThreadState_Clear(p); + HEAD_UNLOCK(); + Py_CLEAR(interp->codec_search_path); + Py_CLEAR(interp->codec_search_cache); + Py_CLEAR(interp->codec_error_registry); + Py_CLEAR(interp->modules); + Py_CLEAR(interp->modules_by_index); + Py_CLEAR(interp->modules_reloading); + Py_CLEAR(interp->sysdict); + Py_CLEAR(interp->builtins); +} + + +static void +zapthreads(PyInterpreterState *interp) +{ + PyThreadState *p; + /* No need to lock the mutex here because this should only happen + when the threads are all really dead (XXX famous last words). */ + while ((p = interp->tstate_head) != NULL) { + PyThreadState_Delete(p); + } +} + + +void +PyInterpreterState_Delete(PyInterpreterState *interp) +{ + PyInterpreterState **p; + zapthreads(interp); + HEAD_LOCK(); + for (p = &interp_head; ; p = &(*p)->next) { + if (*p == NULL) + Py_FatalError( + "PyInterpreterState_Delete: invalid interp"); + if (*p == interp) + break; + } + if (interp->tstate_head != NULL) + Py_FatalError("PyInterpreterState_Delete: remaining threads"); + *p = interp->next; + HEAD_UNLOCK(); + free(interp); +#ifdef WITH_THREAD + if (interp_head == NULL && head_mutex != NULL) { + PyThread_free_lock(head_mutex); + head_mutex = NULL; + } +#endif +} + + +/* Default implementation for _PyThreadState_GetFrame */ +static struct _frame * +threadstate_getframe(PyThreadState *self) +{ + return self->frame; +} + +static PyThreadState * +new_threadstate(PyInterpreterState *interp, int init) +{ + PyThreadState *tstate = (PyThreadState *)malloc(sizeof(PyThreadState)); + + if (_PyThreadState_GetFrame == NULL) + _PyThreadState_GetFrame = threadstate_getframe; + + if (tstate != NULL) { + tstate->interp = interp; + + tstate->frame = NULL; + tstate->recursion_depth = 0; + tstate->overflowed = 0; + tstate->recursion_critical = 0; + tstate->tracing = 0; + tstate->use_tracing = 0; + tstate->tick_counter = 0; + tstate->gilstate_counter = 0; + tstate->async_exc = NULL; +#ifdef WITH_THREAD + tstate->thread_id = PyThread_get_thread_ident(); +#else + tstate->thread_id = 0; +#endif + + tstate->dict = NULL; + + tstate->curexc_type = NULL; + tstate->curexc_value = NULL; + tstate->curexc_traceback = NULL; + + tstate->exc_type = NULL; + tstate->exc_value = NULL; + tstate->exc_traceback = NULL; + + tstate->c_profilefunc = NULL; + tstate->c_tracefunc = NULL; + tstate->c_profileobj = NULL; + tstate->c_traceobj = NULL; + + if (init) + _PyThreadState_Init(tstate); + + HEAD_LOCK(); + tstate->next = interp->tstate_head; + interp->tstate_head = tstate; + HEAD_UNLOCK(); + } + + return tstate; +} + +PyThreadState * +PyThreadState_New(PyInterpreterState *interp) +{ + return new_threadstate(interp, 1); +} + +PyThreadState * +_PyThreadState_Prealloc(PyInterpreterState *interp) +{ + return new_threadstate(interp, 0); +} + +void +_PyThreadState_Init(PyThreadState *tstate) +{ +#ifdef WITH_THREAD + _PyGILState_NoteThreadState(tstate); +#endif +} + +PyObject* +PyState_FindModule(struct PyModuleDef* m) +{ + Py_ssize_t index = m->m_base.m_index; + PyInterpreterState *state = PyThreadState_GET()->interp; + PyObject *res; + if (index == 0) + return NULL; + if (state->modules_by_index == NULL) + return NULL; + if (index > PyList_GET_SIZE(state->modules_by_index)) + return NULL; + res = PyList_GET_ITEM(state->modules_by_index, index); + return res==Py_None ? NULL : res; +} + +int +_PyState_AddModule(PyObject* module, struct PyModuleDef* def) +{ + PyInterpreterState *state = PyThreadState_GET()->interp; + if (!def) + return -1; + if (!state->modules_by_index) { + state->modules_by_index = PyList_New(0); + if (!state->modules_by_index) + return -1; + } + while(PyList_GET_SIZE(state->modules_by_index) <= def->m_base.m_index) + if (PyList_Append(state->modules_by_index, Py_None) < 0) + return -1; + Py_INCREF(module); + return PyList_SetItem(state->modules_by_index, + def->m_base.m_index, module); +} + +void +PyThreadState_Clear(PyThreadState *tstate) +{ + if (Py_VerboseFlag && tstate->frame != NULL) + fprintf(stderr, + "PyThreadState_Clear: warning: thread still has a frame\n"); + + Py_CLEAR(tstate->frame); + + Py_CLEAR(tstate->dict); + Py_CLEAR(tstate->async_exc); + + Py_CLEAR(tstate->curexc_type); + Py_CLEAR(tstate->curexc_value); + Py_CLEAR(tstate->curexc_traceback); + + Py_CLEAR(tstate->exc_type); + Py_CLEAR(tstate->exc_value); + Py_CLEAR(tstate->exc_traceback); + + tstate->c_profilefunc = NULL; + tstate->c_tracefunc = NULL; + Py_CLEAR(tstate->c_profileobj); + Py_CLEAR(tstate->c_traceobj); +} + + +/* Common code for PyThreadState_Delete() and PyThreadState_DeleteCurrent() */ +static void +tstate_delete_common(PyThreadState *tstate) +{ + PyInterpreterState *interp; + PyThreadState **p; + PyThreadState *prev_p = NULL; + if (tstate == NULL) + Py_FatalError("PyThreadState_Delete: NULL tstate"); + interp = tstate->interp; + if (interp == NULL) + Py_FatalError("PyThreadState_Delete: NULL interp"); + HEAD_LOCK(); + for (p = &interp->tstate_head; ; p = &(*p)->next) { + if (*p == NULL) + Py_FatalError( + "PyThreadState_Delete: invalid tstate"); + if (*p == tstate) + break; + /* Sanity check. These states should never happen but if + * they do we must abort. Otherwise we'll end up spinning in + * in a tight loop with the lock held. A similar check is done + * in thread.c find_key(). */ + if (*p == prev_p) + Py_FatalError( + "PyThreadState_Delete: small circular list(!)" + " and tstate not found."); + prev_p = *p; + if ((*p)->next == interp->tstate_head) + Py_FatalError( + "PyThreadState_Delete: circular list(!) and" + " tstate not found."); + } + *p = tstate->next; + HEAD_UNLOCK(); + free(tstate); +} + + +void +PyThreadState_Delete(PyThreadState *tstate) +{ + if (tstate == _Py_atomic_load_relaxed(&_PyThreadState_Current)) + Py_FatalError("PyThreadState_Delete: tstate is still current"); + tstate_delete_common(tstate); +#ifdef WITH_THREAD + if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate) + PyThread_delete_key_value(autoTLSkey); +#endif /* WITH_THREAD */ +} + + +#ifdef WITH_THREAD +void +PyThreadState_DeleteCurrent() +{ + PyThreadState *tstate = (PyThreadState*)_Py_atomic_load_relaxed( + &_PyThreadState_Current); + if (tstate == NULL) + Py_FatalError( + "PyThreadState_DeleteCurrent: no current tstate"); + _Py_atomic_store_relaxed(&_PyThreadState_Current, NULL); + tstate_delete_common(tstate); + if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate) + PyThread_delete_key_value(autoTLSkey); + PyEval_ReleaseLock(); +} +#endif /* WITH_THREAD */ + + +PyThreadState * +PyThreadState_Get(void) +{ + PyThreadState *tstate = (PyThreadState*)_Py_atomic_load_relaxed( + &_PyThreadState_Current); + if (tstate == NULL) + Py_FatalError("PyThreadState_Get: no current thread"); + + return tstate; +} + + +PyThreadState * +PyThreadState_Swap(PyThreadState *newts) +{ + PyThreadState *oldts = (PyThreadState*)_Py_atomic_load_relaxed( + &_PyThreadState_Current); + + _Py_atomic_store_relaxed(&_PyThreadState_Current, newts); + /* It should not be possible for more than one thread state + to be used for a thread. Check this the best we can in debug + builds. + */ +#if defined(Py_DEBUG) && defined(WITH_THREAD) + if (newts) { + /* This can be called from PyEval_RestoreThread(). Similar + to it, we need to ensure errno doesn't change. + */ + int err = errno; + PyThreadState *check = PyGILState_GetThisThreadState(); + if (check && check->interp == newts->interp && check != newts) + Py_FatalError("Invalid thread state for this thread"); + errno = err; + } +#endif + return oldts; +} + +/* An extension mechanism to store arbitrary additional per-thread state. + PyThreadState_GetDict() returns a dictionary that can be used to hold such + state; the caller should pick a unique key and store its state there. If + PyThreadState_GetDict() returns NULL, an exception has *not* been raised + and the caller should assume no per-thread state is available. */ + +PyObject * +PyThreadState_GetDict(void) +{ + PyThreadState *tstate = (PyThreadState*)_Py_atomic_load_relaxed( + &_PyThreadState_Current); + if (tstate == NULL) + return NULL; + + if (tstate->dict == NULL) { + PyObject *d; + tstate->dict = d = PyDict_New(); + if (d == NULL) + PyErr_Clear(); + } + return tstate->dict; +} + + +/* Asynchronously raise an exception in a thread. + Requested by Just van Rossum and Alex Martelli. + To prevent naive misuse, you must write your own extension + to call this, or use ctypes. Must be called with the GIL held. + Returns the number of tstates modified (normally 1, but 0 if `id` didn't + match any known thread id). Can be called with exc=NULL to clear an + existing async exception. This raises no exceptions. */ + +int +PyThreadState_SetAsyncExc(long id, PyObject *exc) { + PyThreadState *tstate = PyThreadState_GET(); + PyInterpreterState *interp = tstate->interp; + PyThreadState *p; + + /* Although the GIL is held, a few C API functions can be called + * without the GIL held, and in particular some that create and + * destroy thread and interpreter states. Those can mutate the + * list of thread states we're traversing, so to prevent that we lock + * head_mutex for the duration. + */ + HEAD_LOCK(); + for (p = interp->tstate_head; p != NULL; p = p->next) { + if (p->thread_id == id) { + /* Tricky: we need to decref the current value + * (if any) in p->async_exc, but that can in turn + * allow arbitrary Python code to run, including + * perhaps calls to this function. To prevent + * deadlock, we need to release head_mutex before + * the decref. + */ + PyObject *old_exc = p->async_exc; + Py_XINCREF(exc); + p->async_exc = exc; + HEAD_UNLOCK(); + Py_XDECREF(old_exc); + _PyEval_SignalAsyncExc(); + return 1; + } + } + HEAD_UNLOCK(); + return 0; +} + + +/* Routines for advanced debuggers, requested by David Beazley. + Don't use unless you know what you are doing! */ + +PyInterpreterState * +PyInterpreterState_Head(void) +{ + return interp_head; +} + +PyInterpreterState * +PyInterpreterState_Next(PyInterpreterState *interp) { + return interp->next; +} + +PyThreadState * +PyInterpreterState_ThreadHead(PyInterpreterState *interp) { + return interp->tstate_head; +} + +PyThreadState * +PyThreadState_Next(PyThreadState *tstate) { + return tstate->next; +} + +/* The implementation of sys._current_frames(). This is intended to be + called with the GIL held, as it will be when called via + sys._current_frames(). It's possible it would work fine even without + the GIL held, but haven't thought enough about that. +*/ +PyObject * +_PyThread_CurrentFrames(void) +{ + PyObject *result; + PyInterpreterState *i; + + result = PyDict_New(); + if (result == NULL) + return NULL; + + /* for i in all interpreters: + * for t in all of i's thread states: + * if t's frame isn't NULL, map t's id to its frame + * Because these lists can mutate even when the GIL is held, we + * need to grab head_mutex for the duration. + */ + HEAD_LOCK(); + for (i = interp_head; i != NULL; i = i->next) { + PyThreadState *t; + for (t = i->tstate_head; t != NULL; t = t->next) { + PyObject *id; + int stat; + struct _frame *frame = t->frame; + if (frame == NULL) + continue; + id = PyLong_FromLong(t->thread_id); + if (id == NULL) + goto Fail; + stat = PyDict_SetItem(result, id, (PyObject *)frame); + Py_DECREF(id); + if (stat < 0) + goto Fail; + } + } + HEAD_UNLOCK(); + return result; + + Fail: + HEAD_UNLOCK(); + Py_DECREF(result); + return NULL; +} + +/* Python "auto thread state" API. */ +#ifdef WITH_THREAD + +/* Keep this as a static, as it is not reliable! It can only + ever be compared to the state for the *current* thread. + * If not equal, then it doesn't matter that the actual + value may change immediately after comparison, as it can't + possibly change to the current thread's state. + * If equal, then the current thread holds the lock, so the value can't + change until we yield the lock. +*/ +static int +PyThreadState_IsCurrent(PyThreadState *tstate) +{ + /* Must be the tstate for this thread */ + assert(PyGILState_GetThisThreadState()==tstate); + return tstate == _Py_atomic_load_relaxed(&_PyThreadState_Current); +} + +/* Internal initialization/finalization functions called by + Py_Initialize/Py_Finalize +*/ +void +_PyGILState_Init(PyInterpreterState *i, PyThreadState *t) +{ + assert(i && t); /* must init with valid states */ + autoTLSkey = PyThread_create_key(); + if (autoTLSkey == -1) + Py_FatalError("Could not allocate TLS entry"); + autoInterpreterState = i; + assert(PyThread_get_key_value(autoTLSkey) == NULL); + assert(t->gilstate_counter == 0); + + _PyGILState_NoteThreadState(t); +} + +void +_PyGILState_Fini(void) +{ + PyThread_delete_key(autoTLSkey); + autoInterpreterState = NULL; +} + +/* Reset the TLS key - called by PyOS_AfterFork(). + * This should not be necessary, but some - buggy - pthread implementations + * don't reset TLS upon fork(), see issue #10517. + */ +void +_PyGILState_Reinit(void) +{ + PyThreadState *tstate = PyGILState_GetThisThreadState(); + PyThread_delete_key(autoTLSkey); + if ((autoTLSkey = PyThread_create_key()) == -1) + Py_FatalError("Could not allocate TLS entry"); + + /* If the thread had an associated auto thread state, reassociate it with + * the new key. */ + if (tstate && PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0) + Py_FatalError("Couldn't create autoTLSkey mapping"); +} + +/* When a thread state is created for a thread by some mechanism other than + PyGILState_Ensure, it's important that the GILState machinery knows about + it so it doesn't try to create another thread state for the thread (this is + a better fix for SF bug #1010677 than the first one attempted). +*/ +static void +_PyGILState_NoteThreadState(PyThreadState* tstate) +{ + /* If autoTLSkey isn't initialized, this must be the very first + threadstate created in Py_Initialize(). Don't do anything for now + (we'll be back here when _PyGILState_Init is called). */ + if (!autoInterpreterState) + return; + + /* Stick the thread state for this thread in thread local storage. + + The only situation where you can legitimately have more than one + thread state for an OS level thread is when there are multiple + interpreters, when: + + a) You shouldn't really be using the PyGILState_ APIs anyway, + and: + + b) The slightly odd way PyThread_set_key_value works (see + comments by its implementation) means that the first thread + state created for that given OS level thread will "win", + which seems reasonable behaviour. + */ + if (PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0) + Py_FatalError("Couldn't create autoTLSkey mapping"); + + /* PyGILState_Release must not try to delete this thread state. */ + tstate->gilstate_counter = 1; +} + +/* The public functions */ +PyThreadState * +PyGILState_GetThisThreadState(void) +{ + if (autoInterpreterState == NULL) + return NULL; + return (PyThreadState *)PyThread_get_key_value(autoTLSkey); +} + +PyGILState_STATE +PyGILState_Ensure(void) +{ + int current; + PyThreadState *tcur; + /* Note that we do not auto-init Python here - apart from + potential races with 2 threads auto-initializing, pep-311 + spells out other issues. Embedders are expected to have + called Py_Initialize() and usually PyEval_InitThreads(). + */ + assert(autoInterpreterState); /* Py_Initialize() hasn't been called! */ + tcur = (PyThreadState *)PyThread_get_key_value(autoTLSkey); + if (tcur == NULL) { + /* Create a new thread state for this thread */ + tcur = PyThreadState_New(autoInterpreterState); + if (tcur == NULL) + Py_FatalError("Couldn't create thread-state for new thread"); + /* This is our thread state! We'll need to delete it in the + matching call to PyGILState_Release(). */ + tcur->gilstate_counter = 0; + current = 0; /* new thread state is never current */ + } + else + current = PyThreadState_IsCurrent(tcur); + if (current == 0) + PyEval_RestoreThread(tcur); + /* Update our counter in the thread-state - no need for locks: + - tcur will remain valid as we hold the GIL. + - the counter is safe as we are the only thread "allowed" + to modify this value + */ + ++tcur->gilstate_counter; + return current ? PyGILState_LOCKED : PyGILState_UNLOCKED; +} + +void +PyGILState_Release(PyGILState_STATE oldstate) +{ + PyThreadState *tcur = (PyThreadState *)PyThread_get_key_value( + autoTLSkey); + if (tcur == NULL) + Py_FatalError("auto-releasing thread-state, " + "but no thread-state for this thread"); + /* We must hold the GIL and have our thread state current */ + /* XXX - remove the check - the assert should be fine, + but while this is very new (April 2003), the extra check + by release-only users can't hurt. + */ + if (! PyThreadState_IsCurrent(tcur)) + Py_FatalError("This thread state must be current when releasing"); + assert(PyThreadState_IsCurrent(tcur)); + --tcur->gilstate_counter; + assert(tcur->gilstate_counter >= 0); /* illegal counter value */ + + /* If we're going to destroy this thread-state, we must + * clear it while the GIL is held, as destructors may run. + */ + if (tcur->gilstate_counter == 0) { + /* can't have been locked when we created it */ + assert(oldstate == PyGILState_UNLOCKED); + PyThreadState_Clear(tcur); + /* Delete the thread-state. Note this releases the GIL too! + * It's vital that the GIL be held here, to avoid shutdown + * races; see bugs 225673 and 1061968 (that nasty bug has a + * habit of coming back). + */ + PyThreadState_DeleteCurrent(); + } + /* Release the lock if necessary */ + else if (oldstate == PyGILState_UNLOCKED) + PyEval_SaveThread(); +} + +#endif /* WITH_THREAD */ + + diff -r dcce92b1efbc -r 7f74363f4c82 cos/python/Python/pythonrun.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cos/python/Python/pythonrun.c Tue Dec 27 18:59:02 2011 +0100 @@ -0,0 +1,2270 @@ + +/* Python interpreter top-level routines, including init/exit */ + +#include "Python.h" + +#include "Python-ast.h" +#undef Yield /* undefine macro conflicting with winbase.h */ +#include "grammar.h" +#include "node.h" +#include "token.h" +#include "parsetok.h" +#include "errcode.h" +#include "code.h" +#include "symtable.h" +#include "ast.h" +#include "marshal.h" +#include "osdefs.h" + +#define PRINT_TOTAL_REFS() + +extern wchar_t *Py_GetPath(void); + +extern grammar _PyParser_Grammar; /* From graminit.c */ + +/* Forward */ +static void initmain(void); +static int initfsencoding(PyInterpreterState *interp); +static void initsite(void); +static int initstdio(void); +static void flush_io(void); +static PyObject *run_mod(mod_ty, const char *, PyObject *, PyObject *, + PyCompilerFlags *, PyArena *); +static PyObject *run_pyc_file(FILE *, const char *, PyObject *, PyObject *, + PyCompilerFlags *); +static void err_input(perrdetail *); +static void err_free(perrdetail *); +static void initsigs(void); +static void call_py_exitfuncs(void); +static void wait_for_thread_shutdown(void); +static void call_ll_exitfuncs(void); +extern int _PyUnicode_Init(void); +extern void _PyUnicode_Fini(void); +extern int _PyLong_Init(void); +extern void PyLong_Fini(void); +extern int _PyFaulthandler_Init(void); +extern void _PyFaulthandler_Fini(void); + +#ifdef WITH_THREAD +extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *); +extern void _PyGILState_Fini(void); +#endif /* WITH_THREAD */ + +int Py_DebugFlag; /* Needed by parser.c */ +int Py_VerboseFlag; /* Needed by import.c */ +int Py_QuietFlag; /* Needed by sysmodule.c */ +int Py_InteractiveFlag; /* Needed by Py_FdIsInteractive() below */ +int Py_InspectFlag; /* Needed to determine whether to exit at SystemExit */ +int Py_NoSiteFlag; /* Suppress 'import site' */ +int Py_BytesWarningFlag; /* Warn on str(bytes) and str(buffer) */ +int Py_DontWriteBytecodeFlag; /* Suppress writing bytecode files (*.py[co]) */ +int Py_UseClassExceptionsFlag = 1; /* Needed by bltinmodule.c: deprecated */ +int Py_FrozenFlag; /* Needed by getpath.c */ +int Py_IgnoreEnvironmentFlag; /* e.g. PYTHONPATH, PYTHONHOME */ +int Py_NoUserSiteDirectory = 0; /* for -s and site.py */ +int Py_UnbufferedStdioFlag = 0; /* Unbuffered binary std{in,out,err} */ + +PyThreadState *_Py_Finalizing = NULL; + +/* PyModule_GetWarningsModule is no longer necessary as of 2.6 +since _warnings is builtin. This API should not be used. */ +PyObject * +PyModule_GetWarningsModule(void) +{ + return PyImport_ImportModule("warnings"); +} + +static int initialized = 0; + +/* API to access the initialized flag -- useful for esoteric use */ + +int +Py_IsInitialized(void) +{ + return initialized; +} + +/* Global initializations. Can be undone by Py_Finalize(). Don't + call this twice without an intervening Py_Finalize() call. When + initializations fail, a fatal error is issued and the function does + not return. On return, the first thread and interpreter state have + been created. + + Locking: you must hold the interpreter lock while calling this. + (If the lock has not yet been initialized, that's equivalent to + having the lock, but you cannot use multiple threads.) + +*/ + +static int +add_flag(int flag, const char *envs) +{ + int env = atoi(envs); + if (flag < env) + flag = env; + if (flag < 1) + flag = 1; + return flag; +} + +static char* +get_codec_name(const char *encoding) +{ + char *name_utf8, *name_str; + PyObject *codec, *name = NULL; + _Py_IDENTIFIER(name); + + codec = _PyCodec_Lookup(encoding); + if (!codec) + goto error; + + name = _PyObject_GetAttrId(codec, &PyId_name); + Py_CLEAR(codec); + if (!name) + goto error; + + name_utf8 = _PyUnicode_AsString(name); + if (name_utf8 == NULL) + goto error; + name_str = strdup(name_utf8); + Py_DECREF(name); + if (name_str == NULL) { + PyErr_NoMemory(); + return NULL; + } + return name_str; + +error: + Py_XDECREF(codec); + Py_XDECREF(name); + return NULL; +} + +static char* +get_locale_encoding(void) +{ + PyErr_SetNone(PyExc_NotImplementedError); + return NULL; +} + +void +Py_InitializeEx(int install_sigs) +{ + PyInterpreterState *interp; + PyThreadState *tstate; + PyObject *bimod, *sysmod, *pstderr; + char *p; + extern void _Py_ReadyTypes(void); + + if (initialized) + return; + initialized = 1; + _Py_Finalizing = NULL; + + if ((p = Py_GETENV("PYTHONDEBUG")) && *p != '\0') + Py_DebugFlag = add_flag(Py_DebugFlag, p); + if ((p = Py_GETENV("PYTHONVERBOSE")) && *p != '\0') + Py_VerboseFlag = add_flag(Py_VerboseFlag, p); + if ((p = Py_GETENV("PYTHONOPTIMIZE")) && *p != '\0') + Py_OptimizeFlag = add_flag(Py_OptimizeFlag, p); + if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0') + Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p); + + interp = PyInterpreterState_New(); + if (interp == NULL) + Py_FatalError("Py_Initialize: can't make first interpreter"); + + tstate = PyThreadState_New(interp); + if (tstate == NULL) + Py_FatalError("Py_Initialize: can't make first thread"); + (void) PyThreadState_Swap(tstate); + +#ifdef WITH_THREAD + /* We can't call _PyEval_FiniThreads() in Py_Finalize because + destroying the GIL might fail when it is being referenced from + another running thread (see issue #9901). + Instead we destroy the previously created GIL here, which ensures + that we can call Py_Initialize / Py_Finalize multiple times. */ + _PyEval_FiniThreads(); + + /* Auto-thread-state API */ + _PyGILState_Init(interp, tstate); +#endif /* WITH_THREAD */ + + _Py_ReadyTypes(); + + if (!_PyFrame_Init()) + Py_FatalError("Py_Initialize: can't init frames"); + + if (!_PyLong_Init()) + Py_FatalError("Py_Initialize: can't init longs"); + + if (!PyByteArray_Init()) + Py_FatalError("Py_Initialize: can't init bytearray"); + + _PyFloat_Init(); + + interp->modules = PyDict_New(); + if (interp->modules == NULL) + Py_FatalError("Py_Initialize: can't make modules dictionary"); + interp->modules_reloading = PyDict_New(); + if (interp->modules_reloading == NULL) + Py_FatalError("Py_Initialize: can't make modules_reloading dictionary"); + + /* Init Unicode implementation; relies on the codec registry */ + if (_PyUnicode_Init() < 0) + Py_FatalError("Py_Initialize: can't initialize unicode"); + + bimod = _PyBuiltin_Init(); + if (bimod == NULL) + Py_FatalError("Py_Initialize: can't initialize builtins modules"); + _PyImport_FixupBuiltin(bimod, "builtins"); + interp->builtins = PyModule_GetDict(bimod); + if (interp->builtins == NULL) + Py_FatalError("Py_Initialize: can't initialize builtins dict"); + Py_INCREF(interp->builtins); + + /* initialize builtin exceptions */ + _PyExc_Init(); + + sysmod = _PySys_Init(); + if (sysmod == NULL) + Py_FatalError("Py_Initialize: can't initialize sys"); + interp->sysdict = PyModule_GetDict(sysmod); + if (interp->sysdict == NULL) + Py_FatalError("Py_Initialize: can't initialize sys dict"); + Py_INCREF(interp->sysdict); + _PyImport_FixupBuiltin(sysmod, "sys"); + PySys_SetPath(Py_GetPath()); + PyDict_SetItemString(interp->sysdict, "modules", + interp->modules); + + /* Set up a preliminary stderr printer until we have enough + infrastructure for the io module in place. */ + pstderr = PyFile_NewStdPrinter(fileno(stderr)); + if (pstderr == NULL) + Py_FatalError("Py_Initialize: can't set preliminary stderr"); + PySys_SetObject("stderr", pstderr); + PySys_SetObject("__stderr__", pstderr); + Py_DECREF(pstderr); + + _PyImport_Init(); + + _PyImportHooks_Init(); + + /* initialize the faulthandler module */ + if (_PyFaulthandler_Init()) + Py_FatalError("Py_Initialize: can't initialize faulthandler"); + + /* Initialize _warnings. */ + _PyWarnings_Init(); + + _PyTime_Init(); + + if (initfsencoding(interp) < 0) + Py_FatalError("Py_Initialize: unable to load the file system codec"); + + if (install_sigs) + initsigs(); /* Signal handling stuff, including initintr() */ + + initmain(); /* Module __main__ */ + if (initstdio() < 0) + Py_FatalError( + "Py_Initialize: can't initialize sys standard streams"); + + /* Initialize warnings. */ + if (PySys_HasWarnOptions()) { + PyObject *warnings_module = PyImport_ImportModule("warnings"); + if (warnings_module == NULL) { + fprintf(stderr, "'import warnings' failed; traceback:\n"); + PyErr_Print(); + } + Py_XDECREF(warnings_module); + } + + if (!Py_NoSiteFlag) + initsite(); /* Module site */ +} + +void +Py_Initialize(void) +{ + Py_InitializeEx(1); +} + +/* Flush stdout and stderr */ + +static int +file_is_closed(PyObject *fobj) +{ + int r; + PyObject *tmp = PyObject_GetAttrString(fobj, "closed"); + if (tmp == NULL) { + PyErr_Clear(); + return 0; + } + r = PyObject_IsTrue(tmp); + Py_DECREF(tmp); + if (r < 0) + PyErr_Clear(); + return r > 0; +} + +static void +flush_std_files(void) +{ + PyObject *fout = PySys_GetObject("stdout"); + PyObject *ferr = PySys_GetObject("stderr"); + PyObject *tmp; + _Py_IDENTIFIER(flush); + + if (fout != NULL && fout != Py_None && !file_is_closed(fout)) { + tmp = _PyObject_CallMethodId(fout, &PyId_flush, ""); + if (tmp == NULL) + PyErr_WriteUnraisable(fout); + else + Py_DECREF(tmp); + } + + if (ferr != NULL && ferr != Py_None && !file_is_closed(ferr)) { + tmp = _PyObject_CallMethodId(ferr, &PyId_flush, ""); + if (tmp == NULL) + PyErr_Clear(); + else + Py_DECREF(tmp); + } +} + +/* Undo the effect of Py_Initialize(). + + Beware: if multiple interpreter and/or thread states exist, these + are not wiped out; only the current thread and interpreter state + are deleted. But since everything else is deleted, those other + interpreter and thread states should no longer be used. + + (XXX We should do better, e.g. wipe out all interpreters and + threads.) + + Locking: as above. + +*/ + +void +Py_Finalize(void) +{ + PyInterpreterState *interp; + PyThreadState *tstate; + + if (!initialized) + return; + + wait_for_thread_shutdown(); + + /* The interpreter is still entirely intact at this point, and the + * exit funcs may be relying on that. In particular, if some thread + * or exit func is still waiting to do an import, the import machinery + * expects Py_IsInitialized() to return true. So don't say the + * interpreter is uninitialized until after the exit funcs have run. + * Note that Threading.py uses an exit func to do a join on all the + * threads created thru it, so this also protects pending imports in + * the threads created via Threading. + */ + call_py_exitfuncs(); + + /* Get current thread state and interpreter pointer */ + tstate = PyThreadState_GET(); + interp = tstate->interp; + + /* Remaining threads (e.g. daemon threads) will automatically exit + after taking the GIL (in PyEval_RestoreThread()). */ + _Py_Finalizing = tstate; + initialized = 0; + + /* Flush stdout+stderr */ + flush_std_files(); + + /* Disable signal handling */ + PyOS_FiniInterrupts(); + + /* Clear type lookup cache */ + PyType_ClearCache(); + + /* Collect garbage. This may call finalizers; it's nice to call these + * before all modules are destroyed. + * XXX If a __del__ or weakref callback is triggered here, and tries to + * XXX import a module, bad things can happen, because Python no + * XXX longer believes it's initialized. + * XXX Fatal Python error: Interpreter not initialized (version mismatch?) + * XXX is easy to provoke that way. I've also seen, e.g., + * XXX Exception exceptions.ImportError: 'No module named sha' + * XXX in ignored + * XXX but I'm unclear on exactly how that one happens. In any case, + * XXX I haven't seen a real-life report of either of these. + */ + PyGC_Collect(); + /* We run this while most interpreter state is still alive, so that + debug information can be printed out */ + _PyGC_Fini(); + + /* Destroy all modules */ + PyImport_Cleanup(); + + /* Flush stdout+stderr (again, in case more was printed) */ + flush_std_files(); + + /* Collect final garbage. This disposes of cycles created by + * new-style class definitions, for example. + * XXX This is disabled because it caused too many problems. If + * XXX a __del__ or weakref callback triggers here, Python code has + * XXX a hard time running, because even the sys module has been + * XXX cleared out (sys.stdout is gone, sys.excepthook is gone, etc). + * XXX One symptom is a sequence of information-free messages + * XXX coming from threads (if a __del__ or callback is invoked, + * XXX other threads can execute too, and any exception they encounter + * XXX triggers a comedy of errors as subsystem after subsystem + * XXX fails to find what it *expects* to find in sys to help report + * XXX the exception and consequent unexpected failures). I've also + * XXX seen segfaults then, after adding print statements to the + * XXX Python code getting called. + */ + + /* Destroy the database used by _PyImport_{Fixup,Find}Extension */ + _PyImport_Fini(); + + /* unload faulthandler module */ + _PyFaulthandler_Fini(); + + /* Clear interpreter state */ + PyInterpreterState_Clear(interp); + + /* Now we decref the exception classes. After this point nothing + can raise an exception. That's okay, because each Fini() method + below has been checked to make sure no exceptions are ever + raised. + */ + + _PyExc_Fini(); + + /* Cleanup auto-thread-state */ +#ifdef WITH_THREAD + _PyGILState_Fini(); +#endif /* WITH_THREAD */ + + /* Delete current thread */ + PyThreadState_Swap(NULL); + PyInterpreterState_Delete(interp); + + /* Sundry finalizers */ + PyMethod_Fini(); + PyFrame_Fini(); + PyCFunction_Fini(); + PyTuple_Fini(); + PyList_Fini(); + PySet_Fini(); + PyBytes_Fini(); + PyByteArray_Fini(); + PyLong_Fini(); + PyFloat_Fini(); + PyDict_Fini(); + PySlice_Fini(); + + /* Cleanup Unicode implementation */ + _PyUnicode_Fini(); + + /* reset file system default encoding */ + if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) { + free((char*)Py_FileSystemDefaultEncoding); + Py_FileSystemDefaultEncoding = NULL; + } + + /* XXX Still allocated: + - various static ad-hoc pointers to interned strings + - int and float free list blocks + - whatever various modules and libraries allocate + */ + + PyGrammar_RemoveAccelerators(&_PyParser_Grammar); + + call_ll_exitfuncs(); +} + +/* Create and initialize a new interpreter and thread, and return the + new thread. This requires that Py_Initialize() has been called + first. + + Unsuccessful initialization yields a NULL pointer. Note that *no* + exception information is available even in this case -- the + exception information is held in the thread, and there is no + thread. + + Locking: as above. + +*/ + +PyThreadState * +Py_NewInterpreter(void) +{ + PyInterpreterState *interp; + PyThreadState *tstate, *save_tstate; + PyObject *bimod, *sysmod; + + if (!initialized) + Py_FatalError("Py_NewInterpreter: call Py_Initialize first"); + + interp = PyInterpreterState_New(); + if (interp == NULL) + return NULL; + + tstate = PyThreadState_New(interp); + if (tstate == NULL) { + PyInterpreterState_Delete(interp); + return NULL; + } + + save_tstate = PyThreadState_Swap(tstate); + + /* XXX The following is lax in error checking */ + + interp->modules = PyDict_New(); + interp->modules_reloading = PyDict_New(); + + bimod = _PyImport_FindBuiltin("builtins"); + if (bimod != NULL) { + interp->builtins = PyModule_GetDict(bimod); + if (interp->builtins == NULL) + goto handle_error; + Py_INCREF(interp->builtins); + } + + /* initialize builtin exceptions */ + _PyExc_Init(); + + sysmod = _PyImport_FindBuiltin("sys"); + if (bimod != NULL && sysmod != NULL) { + PyObject *pstderr; + interp->sysdict = PyModule_GetDict(sysmod); + if (interp->sysdict == NULL) + goto handle_error; + Py_INCREF(interp->sysdict); + PySys_SetPath(Py_GetPath()); + PyDict_SetItemString(interp->sysdict, "modules", + interp->modules); + /* Set up a preliminary stderr printer until we have enough + infrastructure for the io module in place. */ + pstderr = PyFile_NewStdPrinter(fileno(stderr)); + if (pstderr == NULL) + Py_FatalError("Py_Initialize: can't set preliminary stderr"); + PySys_SetObject("stderr", pstderr); + PySys_SetObject("__stderr__", pstderr); + Py_DECREF(pstderr); + + _PyImportHooks_Init(); + + if (initfsencoding(interp) < 0) + goto handle_error; + + if (initstdio() < 0) + Py_FatalError( + "Py_Initialize: can't initialize sys standard streams"); + initmain(); + if (!Py_NoSiteFlag) + initsite(); + } + + if (!PyErr_Occurred()) + return tstate; + +handle_error: + /* Oops, it didn't work. Undo it all. */ + + PyErr_PrintEx(0); + PyThreadState_Clear(tstate); + PyThreadState_Swap(save_tstate); + PyThreadState_Delete(tstate); + PyInterpreterState_Delete(interp); + + return NULL; +} + +/* Delete an interpreter and its last thread. This requires that the + given thread state is current, that the thread has no remaining + frames, and that it is its interpreter's only remaining thread. + It is a fatal error to violate these constraints. + + (Py_Finalize() doesn't have these constraints -- it zaps + everything, regardless.) + + Locking: as above. + +*/ + +void +Py_EndInterpreter(PyThreadState *tstate) +{ + PyInterpreterState *interp = tstate->interp; + + if (tstate != PyThreadState_GET()) + Py_FatalError("Py_EndInterpreter: thread is not current"); + if (tstate->frame != NULL) + Py_FatalError("Py_EndInterpreter: thread still has a frame"); + if (tstate != interp->tstate_head || tstate->next != NULL) + Py_FatalError("Py_EndInterpreter: not the last thread"); + + PyImport_Cleanup(); + PyInterpreterState_Clear(interp); + PyThreadState_Swap(NULL); + PyInterpreterState_Delete(interp); +} + +static wchar_t *progname = L"python"; + +void +Py_SetProgramName(wchar_t *pn) +{ + if (pn && *pn) + progname = pn; +} + +wchar_t * +Py_GetProgramName(void) +{ + return progname; +} + +static wchar_t *default_home = NULL; +static wchar_t env_home[PATH_MAX+1]; + +void +Py_SetPythonHome(wchar_t *home) +{ + default_home = home; +} + +wchar_t * +Py_GetPythonHome(void) +{ + wchar_t *home = default_home; + if (home == NULL && !Py_IgnoreEnvironmentFlag) { + char* chome = Py_GETENV("PYTHONHOME"); + if (chome) { + size_t r = mbstowcs(env_home, chome, PATH_MAX+1); + if (r != (size_t)-1 && r <= PATH_MAX) + home = env_home; + } + + } + return home; +} + +/* Create __main__ module */ + +static void +initmain(void) +{ + PyObject *m, *d; + m = PyImport_AddModule("__main__"); + if (m == NULL) + Py_FatalError("can't create __main__ module"); + d = PyModule_GetDict(m); + if (PyDict_GetItemString(d, "__builtins__") == NULL) { + PyObject *bimod = PyImport_ImportModule("builtins"); + if (bimod == NULL || + PyDict_SetItemString(d, "__builtins__", bimod) != 0) + Py_FatalError("can't add __builtins__ to __main__"); + Py_DECREF(bimod); + } +} + +static int +initfsencoding(PyInterpreterState *interp) +{ + PyObject *codec; + + if (Py_FileSystemDefaultEncoding == NULL) + { + Py_FileSystemDefaultEncoding = get_locale_encoding(); + if (Py_FileSystemDefaultEncoding == NULL) + Py_FatalError("Py_Initialize: Unable to get the locale encoding"); + + Py_HasFileSystemDefaultEncoding = 0; + interp->fscodec_initialized = 1; + return 0; + } + + /* the encoding is mbcs, utf-8 or ascii */ + codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding); + if (!codec) { + /* Such error can only occurs in critical situations: no more + * memory, import a module of the standard library failed, + * etc. */ + return -1; + } + Py_DECREF(codec); + interp->fscodec_initialized = 1; + return 0; +} + +/* Import the site module (not into __main__ though) */ + +static void +initsite(void) +{ + PyObject *m; + m = PyImport_ImportModule("site"); + if (m == NULL) { + PyErr_Print(); + Py_Finalize(); + exit(1); + } + else { + Py_DECREF(m); + } +} + +static PyObject* +create_stdio(PyObject* io, + int fd, int write_mode, char* name, + char* encoding, char* errors) +{ + PyObject *buf = NULL, *stream = NULL, *text = NULL, *raw = NULL, *res; + const char* mode; + const char* newline; + PyObject *line_buffering; + int buffering, isatty; + _Py_IDENTIFIER(open); + _Py_IDENTIFIER(isatty); + _Py_IDENTIFIER(TextIOWrapper); + _Py_IDENTIFIER(name); + _Py_IDENTIFIER(mode); + + /* stdin is always opened in buffered mode, first because it shouldn't + make a difference in common use cases, second because TextIOWrapper + depends on the presence of a read1() method which only exists on + buffered streams. + */ + if (Py_UnbufferedStdioFlag && write_mode) + buffering = 0; + else + buffering = -1; + if (write_mode) + mode = "wb"; + else + mode = "rb"; + buf = _PyObject_CallMethodId(io, &PyId_open, "isiOOOi", + fd, mode, buffering, + Py_None, Py_None, Py_None, 0); + if (buf == NULL) + goto error; + + if (buffering) { + _Py_IDENTIFIER(raw); + raw = _PyObject_GetAttrId(buf, &PyId_raw); + if (raw == NULL) + goto error; + } + else { + raw = buf; + Py_INCREF(raw); + } + + text = PyUnicode_FromString(name); + if (text == NULL || _PyObject_SetAttrId(raw, &PyId_name, text) < 0) + goto error; + res = _PyObject_CallMethodId(raw, &PyId_isatty, ""); + if (res == NULL) + goto error; + isatty = PyObject_IsTrue(res); + Py_DECREF(res); + if (isatty == -1) + goto error; + if (isatty || Py_UnbufferedStdioFlag) + line_buffering = Py_True; + else + line_buffering = Py_False; + + Py_CLEAR(raw); + Py_CLEAR(text); + + newline = "\n"; + + stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OsssO", + buf, encoding, errors, + newline, line_buffering); + Py_CLEAR(buf); + if (stream == NULL) + goto error; + + if (write_mode) + mode = "w"; + else + mode = "r"; + text = PyUnicode_FromString(mode); + if (!text || _PyObject_SetAttrId(stream, &PyId_mode, text) < 0) + goto error; + Py_CLEAR(text); + return stream; + +error: + Py_XDECREF(buf); + Py_XDECREF(stream); + Py_XDECREF(text); + Py_XDECREF(raw); + return NULL; +} + +static int +is_valid_fd(int fd) +{ + int dummy_fd; + if (fd < 0 || !_PyVerify_fd(fd)) + return 0; + dummy_fd = dup(fd); + if (dummy_fd < 0) + return 0; + close(dummy_fd); + return 1; +} + +/* Initialize sys.stdin, stdout, stderr and builtins.open */ +static int +initstdio(void) +{ + PyObject *iomod = NULL, *wrapper; + PyObject *bimod = NULL; + PyObject *m; + PyObject *std = NULL; + int status = 0, fd; + PyObject * encoding_attr; + char *encoding = NULL, *errors; + + /* Hack to avoid a nasty recursion issue when Python is invoked + in verbose mode: pre-import the Latin-1 and UTF-8 codecs */ + if ((m = PyImport_ImportModule("encodings.utf_8")) == NULL) { + goto error; + } + Py_DECREF(m); + + if (!(m = PyImport_ImportModule("encodings.latin_1"))) { + goto error; + } + Py_DECREF(m); + + if (!(bimod = PyImport_ImportModule("builtins"))) { + goto error; + } + + if (!(iomod = PyImport_ImportModule("io"))) { + goto error; + } + if (!(wrapper = PyObject_GetAttrString(iomod, "OpenWrapper"))) { + goto error; + } + + /* Set builtins.open */ + if (PyObject_SetAttrString(bimod, "open", wrapper) == -1) { + Py_DECREF(wrapper); + goto error; + } + Py_DECREF(wrapper); + + encoding = Py_GETENV("PYTHONIOENCODING"); + errors = NULL; + if (encoding) { + encoding = strdup(encoding); + errors = strchr(encoding, ':'); + if (errors) { + *errors = '\0'; + errors++; + } + } + + /* Set sys.stdin */ + fd = fileno(stdin); + /* Under some conditions stdin, stdout and stderr may not be connected + * and fileno() may point to an invalid file descriptor. For example + * GUI apps don't have valid standard streams by default. + */ + if (!is_valid_fd(fd)) { + std = Py_None; + Py_INCREF(std); + } + else { + std = create_stdio(iomod, fd, 0, "", encoding, errors); + if (std == NULL) + goto error; + } /* if (fd < 0) */ + PySys_SetObject("__stdin__", std); + PySys_SetObject("stdin", std); + Py_DECREF(std); + + /* Set sys.stdout */ + fd = fileno(stdout); + if (!is_valid_fd(fd)) { + std = Py_None; + Py_INCREF(std); + } + else { + std = create_stdio(iomod, fd, 1, "", encoding, errors); + if (std == NULL) + goto error; + } /* if (fd < 0) */ + PySys_SetObject("__stdout__", std); + PySys_SetObject("stdout", std); + Py_DECREF(std); + +#if 1 /* Disable this if you have trouble debugging bootstrap stuff */ + /* Set sys.stderr, replaces the preliminary stderr */ + fd = fileno(stderr); + if (!is_valid_fd(fd)) { + std = Py_None; + Py_INCREF(std); + } + else { + std = create_stdio(iomod, fd, 1, "", encoding, "backslashreplace"); + if (std == NULL) + goto error; + } /* if (fd < 0) */ + + /* Same as hack above, pre-import stderr's codec to avoid recursion + when import.c tries to write to stderr in verbose mode. */ + encoding_attr = PyObject_GetAttrString(std, "encoding"); + if (encoding_attr != NULL) { + const char * encoding; + encoding = _PyUnicode_AsString(encoding_attr); + if (encoding != NULL) { + _PyCodec_Lookup(encoding); + } + Py_DECREF(encoding_attr); + } + PyErr_Clear(); /* Not a fatal error if codec isn't available */ + + PySys_SetObject("__stderr__", std); + PySys_SetObject("stderr", std); + Py_DECREF(std); +#endif + + if (0) { + error: + status = -1; + } + + if (encoding) + free(encoding); + Py_XDECREF(bimod); + Py_XDECREF(iomod); + return status; +} + +/* Parse input from a file and execute it */ + +int +PyRun_AnyFileExFlags(FILE *fp, const char *filename, int closeit, + PyCompilerFlags *flags) +{ + if (filename == NULL) + filename = "???"; + if (Py_FdIsInteractive(fp, filename)) { + int err = PyRun_InteractiveLoopFlags(fp, filename, flags); + if (closeit) + fclose(fp); + return err; + } + else + return PyRun_SimpleFileExFlags(fp, filename, closeit, flags); +} + +int +PyRun_InteractiveLoopFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) +{ + PyObject *v; + int ret; + PyCompilerFlags local_flags; + + if (flags == NULL) { + flags = &local_flags; + local_flags.cf_flags = 0; + } + v = PySys_GetObject("ps1"); + if (v == NULL) { + PySys_SetObject("ps1", v = PyUnicode_FromString(">>> ")); + Py_XDECREF(v); + } + v = PySys_GetObject("ps2"); + if (v == NULL) { + PySys_SetObject("ps2", v = PyUnicode_FromString("... ")); + Py_XDECREF(v); + } + for (;;) { + ret = PyRun_InteractiveOneFlags(fp, filename, flags); + PRINT_TOTAL_REFS(); + if (ret == E_EOF) + return 0; + /* + if (ret == E_NOMEM) + return -1; + */ + } +} + +/* compute parser flags based on compiler flags */ +static int PARSER_FLAGS(PyCompilerFlags *flags) +{ + int parser_flags = 0; + if (!flags) + return 0; + if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) + parser_flags |= PyPARSE_DONT_IMPLY_DEDENT; + if (flags->cf_flags & PyCF_IGNORE_COOKIE) + parser_flags |= PyPARSE_IGNORE_COOKIE; + if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) + parser_flags |= PyPARSE_BARRY_AS_BDFL; + return parser_flags; +} + +int +PyRun_InteractiveOneFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) +{ + PyObject *m, *d, *v, *w, *oenc = NULL; + mod_ty mod; + PyArena *arena; + char *ps1 = "", *ps2 = "", *enc = NULL; + int errcode = 0; + _Py_IDENTIFIER(encoding); + + if (fp == stdin) { + /* Fetch encoding from sys.stdin */ + v = PySys_GetObject("stdin"); + if (v == NULL || v == Py_None) + return -1; + oenc = _PyObject_GetAttrId(v, &PyId_encoding); + if (!oenc) + return -1; + enc = _PyUnicode_AsString(oenc); + if (enc == NULL) + return -1; + } + v = PySys_GetObject("ps1"); + if (v != NULL) { + v = PyObject_Str(v); + if (v == NULL) + PyErr_Clear(); + else if (PyUnicode_Check(v)) { + ps1 = _PyUnicode_AsString(v); + if (ps1 == NULL) { + PyErr_Clear(); + ps1 = ""; + } + } + } + w = PySys_GetObject("ps2"); + if (w != NULL) { + w = PyObject_Str(w); + if (w == NULL) + PyErr_Clear(); + else if (PyUnicode_Check(w)) { + ps2 = _PyUnicode_AsString(w); + if (ps2 == NULL) { + PyErr_Clear(); + ps2 = ""; + } + } + } + arena = PyArena_New(); + if (arena == NULL) { + Py_XDECREF(v); + Py_XDECREF(w); + Py_XDECREF(oenc); + return -1; + } + mod = PyParser_ASTFromFile(fp, filename, enc, + Py_single_input, ps1, ps2, + flags, &errcode, arena); + Py_XDECREF(v); + Py_XDECREF(w); + Py_XDECREF(oenc); + if (mod == NULL) { + PyArena_Free(arena); + if (errcode == E_EOF) { + PyErr_Clear(); + return E_EOF; + } + PyErr_Print(); + return -1; + } + m = PyImport_AddModule("__main__"); + if (m == NULL) { + PyArena_Free(arena); + return -1; + } + d = PyModule_GetDict(m); + v = run_mod(mod, filename, d, d, flags, arena); + PyArena_Free(arena); + flush_io(); + if (v == NULL) { + PyErr_Print(); + return -1; + } + Py_DECREF(v); + return 0; +} + +/* Check whether a file maybe a pyc file: Look at the extension, + the file type, and, if we may close it, at the first few bytes. */ + +static int +maybe_pyc_file(FILE *fp, const char* filename, const char* ext, int closeit) +{ + if (strcmp(ext, ".pyc") == 0 || strcmp(ext, ".pyo") == 0) + return 1; + + /* Only look into the file if we are allowed to close it, since + it then should also be seekable. */ + if (closeit) { + /* Read only two bytes of the magic. If the file was opened in + text mode, the bytes 3 and 4 of the magic (\r\n) might not + be read as they are on disk. */ + unsigned int halfmagic = PyImport_GetMagicNumber() & 0xFFFF; + unsigned char buf[2]; + /* Mess: In case of -x, the stream is NOT at its start now, + and ungetc() was used to push back the first newline, + which makes the current stream position formally undefined, + and a x-platform nightmare. + Unfortunately, we have no direct way to know whether -x + was specified. So we use a terrible hack: if the current + stream position is not 0, we assume -x was specified, and + give up. Bug 132850 on SourceForge spells out the + hopelessness of trying anything else (fseek and ftell + don't work predictably x-platform for text-mode files). + */ + int ispyc = 0; + if (ftell(fp) == 0) { + if (fread(buf, 1, 2, fp) == 2 && + ((unsigned int)buf[1]<<8 | buf[0]) == halfmagic) + ispyc = 1; + rewind(fp); + } + return ispyc; + } + return 0; +} + +int +PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit, + PyCompilerFlags *flags) +{ + PyObject *m, *d, *v; + const char *ext; + int set_file_name = 0, ret; + size_t len; + + m = PyImport_AddModule("__main__"); + if (m == NULL) + return -1; + d = PyModule_GetDict(m); + if (PyDict_GetItemString(d, "__file__") == NULL) { + PyObject *f; + f = PyUnicode_DecodeFSDefault(filename); + if (f == NULL) + return -1; + if (PyDict_SetItemString(d, "__file__", f) < 0) { + Py_DECREF(f); + return -1; + } + if (PyDict_SetItemString(d, "__cached__", Py_None) < 0) { + Py_DECREF(f); + return -1; + } + set_file_name = 1; + Py_DECREF(f); + } + len = strlen(filename); + ext = filename + len - (len > 4 ? 4 : 0); + if (maybe_pyc_file(fp, filename, ext, closeit)) { + /* Try to run a pyc file. First, re-open in binary */ + if (closeit) + fclose(fp); + if ((fp = fopen(filename, "rb")) == NULL) { + fprintf(stderr, "python: Can't reopen .pyc file\n"); + ret = -1; + goto done; + } + /* Turn on optimization if a .pyo file is given */ + if (strcmp(ext, ".pyo") == 0) + Py_OptimizeFlag = 1; + v = run_pyc_file(fp, filename, d, d, flags); + } else { + v = PyRun_FileExFlags(fp, filename, Py_file_input, d, d, + closeit, flags); + } + flush_io(); + if (v == NULL) { + PyErr_Print(); + ret = -1; + goto done; + } + Py_DECREF(v); + ret = 0; + done: + if (set_file_name && PyDict_DelItemString(d, "__file__")) + PyErr_Clear(); + return ret; +} + +int +PyRun_SimpleStringFlags(const char *command, PyCompilerFlags *flags) +{ + PyObject *m, *d, *v; + m = PyImport_AddModule("__main__"); + if (m == NULL) + return -1; + d = PyModule_GetDict(m); + v = PyRun_StringFlags(command, Py_file_input, d, d, flags); + if (v == NULL) { + PyErr_Print(); + return -1; + } + Py_DECREF(v); + return 0; +} + +static int +parse_syntax_error(PyObject *err, PyObject **message, const char **filename, + int *lineno, int *offset, const char **text) +{ + long hold; + PyObject *v; + _Py_IDENTIFIER(msg); + _Py_IDENTIFIER(filename); + _Py_IDENTIFIER(lineno); + _Py_IDENTIFIER(offset); + _Py_IDENTIFIER(text); + + /* old style errors */ + if (PyTuple_Check(err)) + return PyArg_ParseTuple(err, "O(ziiz)", message, filename, + lineno, offset, text); + + /* new style errors. `err' is an instance */ + + if (! (v = _PyObject_GetAttrId(err, &PyId_msg))) + goto finally; + *message = v; + + if (!(v = _PyObject_GetAttrId(err, &PyId_filename))) + goto finally; + if (v == Py_None) + *filename = NULL; + else if (! (*filename = _PyUnicode_AsString(v))) + goto finally; + + Py_DECREF(v); + if (!(v = _PyObject_GetAttrId(err, &PyId_lineno))) + goto finally; + hold = PyLong_AsLong(v); + Py_DECREF(v); + v = NULL; + if (hold < 0 && PyErr_Occurred()) + goto finally; + *lineno = (int)hold; + + if (!(v = _PyObject_GetAttrId(err, &PyId_offset))) + goto finally; + if (v == Py_None) { + *offset = -1; + Py_DECREF(v); + v = NULL; + } else { + hold = PyLong_AsLong(v); + Py_DECREF(v); + v = NULL; + if (hold < 0 && PyErr_Occurred()) + goto finally; + *offset = (int)hold; + } + + if (!(v = _PyObject_GetAttrId(err, &PyId_text))) + goto finally; + if (v == Py_None) + *text = NULL; + else if (!PyUnicode_Check(v) || + !(*text = _PyUnicode_AsString(v))) + goto finally; + Py_DECREF(v); + return 1; + +finally: + Py_XDECREF(v); + return 0; +} + +void +PyErr_Print(void) +{ + PyErr_PrintEx(1); +} + +static void +print_error_text(PyObject *f, int offset, const char *text) +{ + char *nl; + if (offset >= 0) { + if (offset > 0 && offset == strlen(text) && text[offset - 1] == '\n') + offset--; + for (;;) { + nl = strchr(text, '\n'); + if (nl == NULL || nl-text >= offset) + break; + offset -= (int)(nl+1-text); + text = nl+1; + } + while (*text == ' ' || *text == '\t') { + text++; + offset--; + } + } + PyFile_WriteString(" ", f); + PyFile_WriteString(text, f); + if (*text == '\0' || text[strlen(text)-1] != '\n') + PyFile_WriteString("\n", f); + if (offset == -1) + return; + PyFile_WriteString(" ", f); + while (--offset > 0) + PyFile_WriteString(" ", f); + PyFile_WriteString("^\n", f); +} + +static void +handle_system_exit(void) +{ + PyObject *exception, *value, *tb; + int exitcode = 0; + + if (Py_InspectFlag) + /* Don't exit if -i flag was given. This flag is set to 0 + * when entering interactive mode for inspecting. */ + return; + + PyErr_Fetch(&exception, &value, &tb); + fflush(stdout); + if (value == NULL || value == Py_None) + goto done; + if (PyExceptionInstance_Check(value)) { + /* The error code should be in the `code' attribute. */ + _Py_IDENTIFIER(code); + PyObject *code = _PyObject_GetAttrId(value, &PyId_code); + if (code) { + Py_DECREF(value); + value = code; + if (value == Py_None) + goto done; + } + /* If we failed to dig out the 'code' attribute, + just let the else clause below print the error. */ + } + if (PyLong_Check(value)) + exitcode = (int)PyLong_AsLong(value); + else { + PyObject *sys_stderr = PySys_GetObject("stderr"); + if (sys_stderr != NULL && sys_stderr != Py_None) { + PyFile_WriteObject(value, sys_stderr, Py_PRINT_RAW); + } else { + PyObject_Print(value, stderr, Py_PRINT_RAW); + fflush(stderr); + } + PySys_WriteStderr("\n"); + exitcode = 1; + } + done: + /* Restore and clear the exception info, in order to properly decref + * the exception, value, and traceback. If we just exit instead, + * these leak, which confuses PYTHONDUMPREFS output, and may prevent + * some finalizers from running. + */ + PyErr_Restore(exception, value, tb); + PyErr_Clear(); + Py_Exit(exitcode); + /* NOTREACHED */ +} + +void +PyErr_PrintEx(int set_sys_last_vars) +{ + PyObject *exception, *v, *tb, *hook; + + if (PyErr_ExceptionMatches(PyExc_SystemExit)) { + handle_system_exit(); + } + PyErr_Fetch(&exception, &v, &tb); + if (exception == NULL) + return; + PyErr_NormalizeException(&exception, &v, &tb); + if (tb == NULL) { + tb = Py_None; + Py_INCREF(tb); + } + PyException_SetTraceback(v, tb); + if (exception == NULL) + return; + /* Now we know v != NULL too */ + if (set_sys_last_vars) { + PySys_SetObject("last_type", exception); + PySys_SetObject("last_value", v); + PySys_SetObject("last_traceback", tb); + } + hook = PySys_GetObject("excepthook"); + if (hook) { + PyObject *args = PyTuple_Pack(3, exception, v, tb); + PyObject *result = PyEval_CallObject(hook, args); + if (result == NULL) { + PyObject *exception2, *v2, *tb2; + if (PyErr_ExceptionMatches(PyExc_SystemExit)) { + handle_system_exit(); + } + PyErr_Fetch(&exception2, &v2, &tb2); + PyErr_NormalizeException(&exception2, &v2, &tb2); + /* It should not be possible for exception2 or v2 + to be NULL. However PyErr_Display() can't + tolerate NULLs, so just be safe. */ + if (exception2 == NULL) { + exception2 = Py_None; + Py_INCREF(exception2); + } + if (v2 == NULL) { + v2 = Py_None; + Py_INCREF(v2); + } + fflush(stdout); + PySys_WriteStderr("Error in sys.excepthook:\n"); + PyErr_Display(exception2, v2, tb2); + PySys_WriteStderr("\nOriginal exception was:\n"); + PyErr_Display(exception, v, tb); + Py_DECREF(exception2); + Py_DECREF(v2); + Py_XDECREF(tb2); + } + Py_XDECREF(result); + Py_XDECREF(args); + } else { + PySys_WriteStderr("sys.excepthook is missing\n"); + PyErr_Display(exception, v, tb); + } + Py_XDECREF(exception); + Py_XDECREF(v); + Py_XDECREF(tb); +} + +static void +print_exception(PyObject *f, PyObject *value) +{ + int err = 0; + PyObject *type, *tb; + _Py_IDENTIFIER(print_file_and_line); + + if (!PyExceptionInstance_Check(value)) { + PyFile_WriteString("TypeError: print_exception(): Exception expected for value, ", f); + PyFile_WriteString(Py_TYPE(value)->tp_name, f); + PyFile_WriteString(" found\n", f); + return; + } + + Py_INCREF(value); + fflush(stdout); + type = (PyObject *) Py_TYPE(value); + tb = PyException_GetTraceback(value); + if (tb && tb != Py_None) + err = PyTraceBack_Print(tb, f); + if (err == 0 && + _PyObject_HasAttrId(value, &PyId_print_file_and_line)) + { + PyObject *message; + const char *filename, *text; + int lineno, offset; + if (!parse_syntax_error(value, &message, &filename, + &lineno, &offset, &text)) + PyErr_Clear(); + else { + char buf[10]; + PyFile_WriteString(" File \"", f); + if (filename == NULL) + PyFile_WriteString("", f); + else + PyFile_WriteString(filename, f); + PyFile_WriteString("\", line ", f); + PyOS_snprintf(buf, sizeof(buf), "%d", lineno); + PyFile_WriteString(buf, f); + PyFile_WriteString("\n", f); + if (text != NULL) + print_error_text(f, offset, text); + Py_DECREF(value); + value = message; + /* Can't be bothered to check all those + PyFile_WriteString() calls */ + if (PyErr_Occurred()) + err = -1; + } + } + if (err) { + /* Don't do anything else */ + } + else { + PyObject* moduleName; + char* className; + _Py_IDENTIFIER(__module__); + assert(PyExceptionClass_Check(type)); + className = PyExceptionClass_Name(type); + if (className != NULL) { + char *dot = strrchr(className, '.'); + if (dot != NULL) + className = dot+1; + } + + moduleName = _PyObject_GetAttrId(type, &PyId___module__); + if (moduleName == NULL || !PyUnicode_Check(moduleName)) + { + Py_XDECREF(moduleName); + err = PyFile_WriteString("", f); + } + else { + char* modstr = _PyUnicode_AsString(moduleName); + if (modstr && strcmp(modstr, "builtins")) + { + err = PyFile_WriteString(modstr, f); + err += PyFile_WriteString(".", f); + } + Py_DECREF(moduleName); + } + if (err == 0) { + if (className == NULL) + err = PyFile_WriteString("", f); + else + err = PyFile_WriteString(className, f); + } + } + if (err == 0 && (value != Py_None)) { + PyObject *s = PyObject_Str(value); + /* only print colon if the str() of the + object is not the empty string + */ + if (s == NULL) + err = -1; + else if (!PyUnicode_Check(s) || + PyUnicode_GetLength(s) != 0) + err = PyFile_WriteString(": ", f); + if (err == 0) + err = PyFile_WriteObject(s, f, Py_PRINT_RAW); + Py_XDECREF(s); + } + /* try to write a newline in any case */ + err += PyFile_WriteString("\n", f); + Py_XDECREF(tb); + Py_DECREF(value); + /* If an error happened here, don't show it. + XXX This is wrong, but too many callers rely on this behavior. */ + if (err != 0) + PyErr_Clear(); +} + +static const char *cause_message = + "\nThe above exception was the direct cause " + "of the following exception:\n\n"; + +static const char *context_message = + "\nDuring handling of the above exception, " + "another exception occurred:\n\n"; + +static void +print_exception_recursive(PyObject *f, PyObject *value, PyObject *seen) +{ + int err = 0, res; + PyObject *cause, *context; + + if (seen != NULL) { + /* Exception chaining */ + if (PySet_Add(seen, value) == -1) + PyErr_Clear(); + else if (PyExceptionInstance_Check(value)) { + cause = PyException_GetCause(value); + context = PyException_GetContext(value); + if (cause) { + res = PySet_Contains(seen, cause); + if (res == -1) + PyErr_Clear(); + if (res == 0) { + print_exception_recursive( + f, cause, seen); + err |= PyFile_WriteString( + cause_message, f); + } + } + else if (context) { + res = PySet_Contains(seen, context); + if (res == -1) + PyErr_Clear(); + if (res == 0) { + print_exception_recursive( + f, context, seen); + err |= PyFile_WriteString( + context_message, f); + } + } + Py_XDECREF(context); + Py_XDECREF(cause); + } + } + print_exception(f, value); + if (err != 0) + PyErr_Clear(); +} + +void +PyErr_Display(PyObject *exception, PyObject *value, PyObject *tb) +{ + PyObject *seen; + PyObject *f = PySys_GetObject("stderr"); + if (f == Py_None) { + /* pass */ + } + else if (f == NULL) { + _PyObject_Dump(value); + fprintf(stderr, "lost sys.stderr\n"); + } + else { + /* We choose to ignore seen being possibly NULL, and report + at least the main exception (it could be a MemoryError). + */ + seen = PySet_New(NULL); + if (seen == NULL) + PyErr_Clear(); + print_exception_recursive(f, value, seen); + Py_XDECREF(seen); + } +} + +PyObject * +PyRun_StringFlags(const char *str, int start, PyObject *globals, + PyObject *locals, PyCompilerFlags *flags) +{ + PyObject *ret = NULL; + mod_ty mod; + PyArena *arena = PyArena_New(); + if (arena == NULL) + return NULL; + + mod = PyParser_ASTFromString(str, "", start, flags, arena); + if (mod != NULL) + ret = run_mod(mod, "", globals, locals, flags, arena); + PyArena_Free(arena); + return ret; +} + +PyObject * +PyRun_FileExFlags(FILE *fp, const char *filename, int start, PyObject *globals, + PyObject *locals, int closeit, PyCompilerFlags *flags) +{ + PyObject *ret; + mod_ty mod; + PyArena *arena = PyArena_New(); + if (arena == NULL) + return NULL; + + mod = PyParser_ASTFromFile(fp, filename, NULL, start, 0, 0, + flags, NULL, arena); + if (closeit) + fclose(fp); + if (mod == NULL) { + PyArena_Free(arena); + return NULL; + } + ret = run_mod(mod, filename, globals, locals, flags, arena); + PyArena_Free(arena); + return ret; +} + +static void +flush_io(void) +{ + PyObject *f, *r; + PyObject *type, *value, *traceback; + _Py_IDENTIFIER(flush); + + /* Save the current exception */ + PyErr_Fetch(&type, &value, &traceback); + + f = PySys_GetObject("stderr"); + if (f != NULL) { + r = _PyObject_CallMethodId(f, &PyId_flush, ""); + if (r) + Py_DECREF(r); + else + PyErr_Clear(); + } + f = PySys_GetObject("stdout"); + if (f != NULL) { + r = _PyObject_CallMethodId(f, &PyId_flush, ""); + if (r) + Py_DECREF(r); + else + PyErr_Clear(); + } + + PyErr_Restore(type, value, traceback); +} + +static PyObject * +run_mod(mod_ty mod, const char *filename, PyObject *globals, PyObject *locals, + PyCompilerFlags *flags, PyArena *arena) +{ + PyCodeObject *co; + PyObject *v; + co = PyAST_Compile(mod, filename, flags, arena); + if (co == NULL) + return NULL; + v = PyEval_EvalCode((PyObject*)co, globals, locals); + Py_DECREF(co); + return v; +} + +static PyObject * +run_pyc_file(FILE *fp, const char *filename, PyObject *globals, + PyObject *locals, PyCompilerFlags *flags) +{ + PyCodeObject *co; + PyObject *v; + long magic; + long PyImport_GetMagicNumber(void); + + magic = PyMarshal_ReadLongFromFile(fp); + if (magic != PyImport_GetMagicNumber()) { + PyErr_SetString(PyExc_RuntimeError, + "Bad magic number in .pyc file"); + return NULL; + } + (void) PyMarshal_ReadLongFromFile(fp); + v = PyMarshal_ReadLastObjectFromFile(fp); + fclose(fp); + if (v == NULL || !PyCode_Check(v)) { + Py_XDECREF(v); + PyErr_SetString(PyExc_RuntimeError, + "Bad code object in .pyc file"); + return NULL; + } + co = (PyCodeObject *)v; + v = PyEval_EvalCode((PyObject*)co, globals, locals); + if (v && flags) + flags->cf_flags |= (co->co_flags & PyCF_MASK); + Py_DECREF(co); + return v; +} + +PyObject * +Py_CompileStringExFlags(const char *str, const char *filename, int start, + PyCompilerFlags *flags, int optimize) +{ + PyCodeObject *co; + mod_ty mod; + PyArena *arena = PyArena_New(); + if (arena == NULL) + return NULL; + + mod = PyParser_ASTFromString(str, filename, start, flags, arena); + if (mod == NULL) { + PyArena_Free(arena); + return NULL; + } + if (flags && (flags->cf_flags & PyCF_ONLY_AST)) { + PyObject *result = PyAST_mod2obj(mod); + PyArena_Free(arena); + return result; + } + co = PyAST_CompileEx(mod, filename, flags, optimize, arena); + PyArena_Free(arena); + return (PyObject *)co; +} + +/* For use in Py_LIMITED_API */ +#undef Py_CompileString +PyObject * +PyCompileString(const char *str, const char *filename, int start) +{ + return Py_CompileStringFlags(str, filename, start, NULL); +} + +struct symtable * +Py_SymtableString(const char *str, const char *filename, int start) +{ + struct symtable *st; + mod_ty mod; + PyCompilerFlags flags; + PyArena *arena = PyArena_New(); + if (arena == NULL) + return NULL; + + flags.cf_flags = 0; + mod = PyParser_ASTFromString(str, filename, start, &flags, arena); + if (mod == NULL) { + PyArena_Free(arena); + return NULL; + } + st = PySymtable_Build(mod, filename, 0); + PyArena_Free(arena); + return st; +} + +/* Preferred access to parser is through AST. */ +mod_ty +PyParser_ASTFromString(const char *s, const char *filename, int start, + PyCompilerFlags *flags, PyArena *arena) +{ + mod_ty mod; + PyCompilerFlags localflags; + perrdetail err; + int iflags = PARSER_FLAGS(flags); + + node *n = PyParser_ParseStringFlagsFilenameEx(s, filename, + &_PyParser_Grammar, start, &err, + &iflags); + if (flags == NULL) { + localflags.cf_flags = 0; + flags = &localflags; + } + if (n) { + flags->cf_flags |= iflags & PyCF_MASK; + mod = PyAST_FromNode(n, flags, filename, arena); + PyNode_Free(n); + } + else { + err_input(&err); + mod = NULL; + } + err_free(&err); + return mod; +} + +mod_ty +PyParser_ASTFromFile(FILE *fp, const char *filename, const char* enc, + int start, char *ps1, + char *ps2, PyCompilerFlags *flags, int *errcode, + PyArena *arena) +{ + mod_ty mod; + PyCompilerFlags localflags; + perrdetail err; + int iflags = PARSER_FLAGS(flags); + + node *n = PyParser_ParseFileFlagsEx(fp, filename, enc, + &_PyParser_Grammar, + start, ps1, ps2, &err, &iflags); + if (flags == NULL) { + localflags.cf_flags = 0; + flags = &localflags; + } + if (n) { + flags->cf_flags |= iflags & PyCF_MASK; + mod = PyAST_FromNode(n, flags, filename, arena); + PyNode_Free(n); + } + else { + err_input(&err); + if (errcode) + *errcode = err.error; + mod = NULL; + } + err_free(&err); + return mod; +} + +/* Simplified interface to parsefile -- return node or set exception */ + +node * +PyParser_SimpleParseFileFlags(FILE *fp, const char *filename, int start, int flags) +{ + perrdetail err; + node *n = PyParser_ParseFileFlags(fp, filename, NULL, + &_PyParser_Grammar, + start, NULL, NULL, &err, flags); + if (n == NULL) + err_input(&err); + err_free(&err); + + return n; +} + +/* Simplified interface to parsestring -- return node or set exception */ + +node * +PyParser_SimpleParseStringFlags(const char *str, int start, int flags) +{ + perrdetail err; + node *n = PyParser_ParseStringFlags(str, &_PyParser_Grammar, + start, &err, flags); + if (n == NULL) + err_input(&err); + err_free(&err); + return n; +} + +node * +PyParser_SimpleParseStringFlagsFilename(const char *str, const char *filename, + int start, int flags) +{ + perrdetail err; + node *n = PyParser_ParseStringFlagsFilename(str, filename, + &_PyParser_Grammar, start, &err, flags); + if (n == NULL) + err_input(&err); + err_free(&err); + return n; +} + +node * +PyParser_SimpleParseStringFilename(const char *str, const char *filename, int start) +{ + return PyParser_SimpleParseStringFlagsFilename(str, filename, start, 0); +} + +/* May want to move a more generalized form of this to parsetok.c or + even parser modules. */ + +void +PyParser_ClearError(perrdetail *err) +{ + err_free(err); +} + +void +PyParser_SetError(perrdetail *err) +{ + err_input(err); +} + +static void +err_free(perrdetail *err) +{ + Py_CLEAR(err->filename); +} + +/* Set the error appropriate to the given input error code (see errcode.h) */ + +static void +err_input(perrdetail *err) +{ + PyObject *v, *w, *errtype, *errtext; + PyObject *msg_obj = NULL; + char *msg = NULL; + + errtype = PyExc_SyntaxError; + switch (err->error) { + case E_ERROR: + return; + case E_SYNTAX: + errtype = PyExc_IndentationError; + if (err->expected == INDENT) + msg = "expected an indented block"; + else if (err->token == INDENT) + msg = "unexpected indent"; + else if (err->token == DEDENT) + msg = "unexpected unindent"; + else { + errtype = PyExc_SyntaxError; + msg = "invalid syntax"; + } + break; + case E_TOKEN: + msg = "invalid token"; + break; + case E_EOFS: + msg = "EOF while scanning triple-quoted string literal"; + break; + case E_EOLS: + msg = "EOL while scanning string literal"; + break; + case E_INTR: + if (!PyErr_Occurred()) + PyErr_SetNone(PyExc_KeyboardInterrupt); + goto cleanup; + case E_NOMEM: + PyErr_NoMemory(); + goto cleanup; + case E_EOF: + msg = "unexpected EOF while parsing"; + break; + case E_TABSPACE: + errtype = PyExc_TabError; + msg = "inconsistent use of tabs and spaces in indentation"; + break; + case E_OVERFLOW: + msg = "expression too long"; + break; + case E_DEDENT: + errtype = PyExc_IndentationError; + msg = "unindent does not match any outer indentation level"; + break; + case E_TOODEEP: + errtype = PyExc_IndentationError; + msg = "too many levels of indentation"; + break; + case E_DECODE: { + PyObject *type, *value, *tb; + PyErr_Fetch(&type, &value, &tb); + msg = "unknown decode error"; + if (value != NULL) + msg_obj = PyObject_Str(value); + Py_XDECREF(type); + Py_XDECREF(value); + Py_XDECREF(tb); + break; + } + case E_LINECONT: + msg = "unexpected character after line continuation character"; + break; + + case E_IDENTIFIER: + msg = "invalid character in identifier"; + break; + default: + fprintf(stderr, "error=%d\n", err->error); + msg = "unknown parsing error"; + break; + } + /* err->text may not be UTF-8 in case of decoding errors. + Explicitly convert to an object. */ + if (!err->text) { + errtext = Py_None; + Py_INCREF(Py_None); + } else { + errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text), + "replace"); + } + v = Py_BuildValue("(OiiN)", err->filename, + err->lineno, err->offset, errtext); + if (v != NULL) { + if (msg_obj) + w = Py_BuildValue("(OO)", msg_obj, v); + else + w = Py_BuildValue("(sO)", msg, v); + } else + w = NULL; + Py_XDECREF(v); + PyErr_SetObject(errtype, w); + Py_XDECREF(w); +cleanup: + Py_XDECREF(msg_obj); + if (err->text != NULL) { + PyObject_FREE(err->text); + err->text = NULL; + } +} + +/* Print fatal error message and abort */ + +void +Py_FatalError(const char *msg) +{ + const int fd = fileno(stderr); + PyThreadState *tstate; + + fprintf(stderr, "Fatal Python error: %s\n", msg); + fflush(stderr); /* it helps in Windows debug build */ + if (PyErr_Occurred()) { + PyErr_PrintEx(0); + } + else { + tstate = _Py_atomic_load_relaxed(&_PyThreadState_Current); + if (tstate != NULL) { + fputc('\n', stderr); + fflush(stderr); + _Py_DumpTracebackThreads(fd, tstate->interp, tstate); + } + _PyFaulthandler_Fini(); + } + + abort(); +} + +/* Clean up and exit */ + +#ifdef WITH_THREAD +#include "pythread.h" +#endif + +static void (*pyexitfunc)(void) = NULL; +/* For the atexit module. */ +void _Py_PyAtExit(void (*func)(void)) +{ + pyexitfunc = func; +} + +static void +call_py_exitfuncs(void) +{ + if (pyexitfunc == NULL) + return; + + (*pyexitfunc)(); + PyErr_Clear(); +} + +/* Wait until threading._shutdown completes, provided + the threading module was imported in the first place. + The shutdown routine will wait until all non-daemon + "threading" threads have completed. */ +static void +wait_for_thread_shutdown(void) +{ +#ifdef WITH_THREAD + _Py_IDENTIFIER(_shutdown); + PyObject *result; + PyThreadState *tstate = PyThreadState_GET(); + PyObject *threading = PyMapping_GetItemString(tstate->interp->modules, + "threading"); + if (threading == NULL) { + /* threading not imported */ + PyErr_Clear(); + return; + } + result = _PyObject_CallMethodId(threading, &PyId__shutdown, ""); + if (result == NULL) { + PyErr_WriteUnraisable(threading); + } + else { + Py_DECREF(result); + } + Py_DECREF(threading); +#endif +} + +#define NEXITFUNCS 32 +static void (*exitfuncs[NEXITFUNCS])(void); +static int nexitfuncs = 0; + +int Py_AtExit(void (*func)(void)) +{ + if (nexitfuncs >= NEXITFUNCS) + return -1; + exitfuncs[nexitfuncs++] = func; + return 0; +} + +static void +call_ll_exitfuncs(void) +{ + while (nexitfuncs > 0) + (*exitfuncs[--nexitfuncs])(); + + fflush(stdout); + fflush(stderr); +} + +void +Py_Exit(int sts) +{ + Py_Finalize(); + + exit(sts); +} + +static void +initsigs(void) +{ +#ifdef SIGPIPE + PyOS_setsig(SIGPIPE, SIG_IGN); +#endif +#ifdef SIGXFZ + PyOS_setsig(SIGXFZ, SIG_IGN); +#endif +#ifdef SIGXFSZ + PyOS_setsig(SIGXFSZ, SIG_IGN); +#endif + PyOS_InitInterrupts(); /* May imply initsignal() */ +} + + +/* Restore signals that the interpreter has called SIG_IGN on to SIG_DFL. + * + * All of the code in this function must only use async-signal-safe functions, + * listed at `man 7 signal` or + * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html. + */ +void +_Py_RestoreSignals(void) +{ +#ifdef SIGPIPE + PyOS_setsig(SIGPIPE, SIG_DFL); +#endif +#ifdef SIGXFZ + PyOS_setsig(SIGXFZ, SIG_DFL); +#endif +#ifdef SIGXFSZ + PyOS_setsig(SIGXFSZ, SIG_DFL); +#endif +} + + +/* + * The file descriptor fd is considered ``interactive'' if either + * a) isatty(fd) is TRUE, or + * b) the -i flag was given, and the filename associated with + * the descriptor is NULL or "" or "???". + */ +int +Py_FdIsInteractive(FILE *fp, const char *filename) +{ + if (isatty((int)fileno(fp))) + return 1; + if (!Py_InteractiveFlag) + return 0; + return (filename == NULL) || + (strcmp(filename, "") == 0) || + (strcmp(filename, "???") == 0); +} + +/* Wrappers around sigaction() or signal(). */ + +PyOS_sighandler_t +PyOS_getsig(int sig) +{ +#ifdef HAVE_SIGACTION + struct sigaction context; + if (sigaction(sig, NULL, &context) == -1) + return SIG_ERR; + return context.sa_handler; +#else + PyOS_sighandler_t handler; + handler = signal(sig, SIG_IGN); + if (handler != SIG_ERR) + signal(sig, handler); + return handler; +#endif +} + +/* + * All of the code in this function must only use async-signal-safe functions, + * listed at `man 7 signal` or + * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html. + */ +PyOS_sighandler_t +PyOS_setsig(int sig, PyOS_sighandler_t handler) +{ +#ifdef HAVE_SIGACTION + /* Some code in Modules/signalmodule.c depends on sigaction() being + * used here if HAVE_SIGACTION is defined. Fix that if this code + * changes to invalidate that assumption. + */ + struct sigaction context, ocontext; + context.sa_handler = handler; + sigemptyset(&context.sa_mask); + context.sa_flags = 0; + if (sigaction(sig, &context, &ocontext) == -1) + return SIG_ERR; + return ocontext.sa_handler; +#else + PyOS_sighandler_t oldhandler; + oldhandler = signal(sig, handler); +#ifdef HAVE_SIGINTERRUPT + siginterrupt(sig, 1); +#endif + return oldhandler; +#endif +} +