Mercurial > lcfOS
changeset 217:8b2e5f3cd579
Removed some stale python source files
author | Windel Bouwman |
---|---|
date | Fri, 05 Jul 2013 14:13:59 +0200 |
parents | 57c032c5e753 |
children | 494828a7adf1 |
files | cos/Makefile cos/python/Include/Python.h cos/python/Include/boolobject.h cos/python/Include/ceval.h cos/python/Include/longobject.h cos/python/Include/object.h cos/python/Modules/gcmodule.c cos/python/Modules/main.c cos/python/Objects/boolobject.c cos/python/Objects/listobject.c cos/python/Objects/longobject.c cos/python/Objects/object.c cos/python/Objects/tupleobject.c cos/python/Objects/unicodeobject.c cos/python/Python/bltinmodule.c cos/python/Python/ceval.c cos/python/Python/errors.c cos/python/Python/import.c cos/python/Python/pystate.c cos/python/Python/pythonrun.c cos/utils/bin2c.py cos/utils/makeinitrd.py python/bin2c.py python/c3/analyse.py python/c3/astnodes.py python/c3/codegenerator.py python/c3/scope.py python/makeinitrd.py python/testc3.py |
diffstat | 29 files changed, 111 insertions(+), 42383 deletions(-) [+] |
line wrap: on
line diff
--- a/cos/Makefile Fri Jul 05 13:26:51 2013 +0200 +++ b/cos/Makefile Fri Jul 05 14:13:59 2013 +0200 @@ -3,7 +3,7 @@ all: make -C kernel make -C hello - python utils/makeinitrd.py lcfosinitrd.img hello/hello.bin + python ../python/makeinitrd.py lcfosinitrd.img hello/hello.bin cp grub/emptybootdisk.img bootdisk.img mcopy -D o -i bootdisk.img kernel/lcfos.bin :: mcopy -D o -i bootdisk.img lcfosinitrd.img ::
--- a/cos/python/Include/Python.h Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,76 +0,0 @@ -#ifndef Py_PYTHON_H -#define Py_PYTHON_H - -/* Include nearly all Python header files */ - -// #include "pyatomic.h" - -//#include "pymath.h" -#include "pytime.h" -#include "pymem.h" - -#include "object.h" -#include "objimpl.h" -#include "typeslots.h" - -#include "pydebug.h" - -#include "bytearrayobject.h" -#include "bytesobject.h" -#include "unicodeobject.h" -#include "longobject.h" -#include "longintrepr.h" -#include "boolobject.h" -#include "floatobject.h" -#include "complexobject.h" -#include "rangeobject.h" -#include "memoryobject.h" -#include "tupleobject.h" -#include "listobject.h" -#include "dictobject.h" -#include "enumobject.h" -#include "setobject.h" -#include "methodobject.h" -#include "moduleobject.h" -#include "funcobject.h" -#include "classobject.h" -#include "fileobject.h" -#include "pycapsule.h" -#include "traceback.h" -#include "sliceobject.h" -#include "cellobject.h" -#include "iterobject.h" -#include "genobject.h" -#include "descrobject.h" -#include "warnings.h" -#include "weakrefobject.h" -#include "structseq.h" -#include "accu.h" - -#include "codecs.h" -#include "pyerrors.h" - -#include "pystate.h" - -#include "pyarena.h" -#include "modsupport.h" -#include "pythonrun.h" -#include "ceval.h" -#include "sysmodule.h" -#include "intrcheck.h" -#include "import.h" - -#include "abstract.h" -#include "bltinmodule.h" - -#include "compile.h" -#include "eval.h" - -#include "pyctype.h" -#include "pystrtod.h" -#include "pystrcmp.h" -#include "dtoa.h" -#include "fileutils.h" -#include "pyfpe.h" - -#endif /* !Py_PYTHON_H */
--- a/cos/python/Include/boolobject.h Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -/* Boolean object interface */ - -#ifndef Py_BOOLOBJECT_H -#define Py_BOOLOBJECT_H - -PyAPI_DATA(PyTypeObject) PyBool_Type; - -#define PyBool_Check(x) (Py_TYPE(x) == &PyBool_Type) - -/* Py_False and Py_True are the only two bools in existence. -Don't forget to apply Py_INCREF() when returning either!!! */ - -/* Don't use these directly */ -PyAPI_DATA(struct _longobject) _Py_FalseStruct, _Py_TrueStruct; - -/* Use these macros */ -#define Py_False ((PyObject *) &_Py_FalseStruct) -#define Py_True ((PyObject *) &_Py_TrueStruct) - -/* Macros for returning Py_True or Py_False, respectively */ -#define Py_RETURN_TRUE return Py_INCREF(Py_True), Py_True -#define Py_RETURN_FALSE return Py_INCREF(Py_False), Py_False - -/* Function to return a bool from a C long */ -PyAPI_FUNC(PyObject *) PyBool_FromLong(long); - -#endif /* !Py_BOOLOBJECT_H */
--- a/cos/python/Include/ceval.h Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,180 +0,0 @@ -#ifndef Py_CEVAL_H -#define Py_CEVAL_H - -/* Interface to random parts in ceval.c */ - -PyAPI_FUNC(PyObject *) PyEval_CallObjectWithKeywords( - PyObject *, PyObject *, PyObject *); - -/* Inline this */ -#define PyEval_CallObject(func,arg) \ - PyEval_CallObjectWithKeywords(func, arg, (PyObject *)NULL) - -PyAPI_FUNC(PyObject *) PyEval_CallFunction(PyObject *obj, - const char *format, ...); -PyAPI_FUNC(PyObject *) PyEval_CallMethod(PyObject *obj, - const char *methodname, - const char *format, ...); - -PyAPI_FUNC(void) PyEval_SetProfile(Py_tracefunc, PyObject *); -PyAPI_FUNC(void) PyEval_SetTrace(Py_tracefunc, PyObject *); - -struct _frame; /* Avoid including frameobject.h */ - -PyAPI_FUNC(PyObject *) PyEval_GetBuiltins(void); -PyAPI_FUNC(PyObject *) PyEval_GetGlobals(void); -PyAPI_FUNC(PyObject *) PyEval_GetLocals(void); -PyAPI_FUNC(struct _frame *) PyEval_GetFrame(void); - -/* Look at the current frame's (if any) code's co_flags, and turn on - the corresponding compiler flags in cf->cf_flags. Return 1 if any - flag was set, else return 0. */ -PyAPI_FUNC(int) PyEval_MergeCompilerFlags(PyCompilerFlags *cf); - -PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg); -PyAPI_FUNC(int) Py_MakePendingCalls(void); - -/* Protection against deeply nested recursive calls - - In Python 3.0, this protection has two levels: - * normal anti-recursion protection is triggered when the recursion level - exceeds the current recursion limit. It raises a RuntimeError, and sets - the "overflowed" flag in the thread state structure. This flag - temporarily *disables* the normal protection; this allows cleanup code - to potentially outgrow the recursion limit while processing the - RuntimeError. - * "last chance" anti-recursion protection is triggered when the recursion - level exceeds "current recursion limit + 50". By construction, this - protection can only be triggered when the "overflowed" flag is set. It - means the cleanup code has itself gone into an infinite loop, or the - RuntimeError has been mistakingly ignored. When this protection is - triggered, the interpreter aborts with a Fatal Error. - - In addition, the "overflowed" flag is automatically reset when the - recursion level drops below "current recursion limit - 50". This heuristic - is meant to ensure that the normal anti-recursion protection doesn't get - disabled too long. - - Please note: this scheme has its own limitations. See: - http://mail.python.org/pipermail/python-dev/2008-August/082106.html - for some observations. -*/ -PyAPI_FUNC(void) Py_SetRecursionLimit(int); -PyAPI_FUNC(int) Py_GetRecursionLimit(void); - -#define Py_EnterRecursiveCall(where) \ - (_Py_MakeRecCheck(PyThreadState_GET()->recursion_depth) && \ - _Py_CheckRecursiveCall(where)) -#define Py_LeaveRecursiveCall() \ - do{ if(_Py_MakeEndRecCheck(PyThreadState_GET()->recursion_depth)) \ - PyThreadState_GET()->overflowed = 0; \ - } while(0) -PyAPI_FUNC(int) _Py_CheckRecursiveCall(char *where); -PyAPI_DATA(int) _Py_CheckRecursionLimit; - -# define _Py_MakeRecCheck(x) (++(x) > _Py_CheckRecursionLimit) - -#define _Py_MakeEndRecCheck(x) \ - (--(x) < ((_Py_CheckRecursionLimit > 100) \ - ? (_Py_CheckRecursionLimit - 50) \ - : (3 * (_Py_CheckRecursionLimit >> 2)))) - -#define Py_ALLOW_RECURSION \ - do { unsigned char _old = PyThreadState_GET()->recursion_critical;\ - PyThreadState_GET()->recursion_critical = 1; - -#define Py_END_ALLOW_RECURSION \ - PyThreadState_GET()->recursion_critical = _old; \ - } while(0); - -PyAPI_FUNC(const char *) PyEval_GetFuncName(PyObject *); -PyAPI_FUNC(const char *) PyEval_GetFuncDesc(PyObject *); - -PyAPI_FUNC(PyObject *) PyEval_GetCallStats(PyObject *); -PyAPI_FUNC(PyObject *) PyEval_EvalFrame(struct _frame *); -PyAPI_FUNC(PyObject *) PyEval_EvalFrameEx(struct _frame *f, int exc); - -/* Interface for threads. - - A module that plans to do a blocking system call (or something else - that lasts a long time and doesn't touch Python data) can allow other - threads to run as follows: - - ...preparations here... - Py_BEGIN_ALLOW_THREADS - ...blocking system call here... - Py_END_ALLOW_THREADS - ...interpret result here... - - The Py_BEGIN_ALLOW_THREADS/Py_END_ALLOW_THREADS pair expands to a - {}-surrounded block. - To leave the block in the middle (e.g., with return), you must insert - a line containing Py_BLOCK_THREADS before the return, e.g. - - if (...premature_exit...) { - Py_BLOCK_THREADS - PyErr_SetFromErrno(PyExc_IOError); - return NULL; - } - - An alternative is: - - Py_BLOCK_THREADS - if (...premature_exit...) { - PyErr_SetFromErrno(PyExc_IOError); - return NULL; - } - Py_UNBLOCK_THREADS - - For convenience, that the value of 'errno' is restored across - Py_END_ALLOW_THREADS and Py_BLOCK_THREADS. - - WARNING: NEVER NEST CALLS TO Py_BEGIN_ALLOW_THREADS AND - Py_END_ALLOW_THREADS!!! - - The function PyEval_InitThreads() should be called only from - init_thread() in "_threadmodule.c". - - Note that not yet all candidates have been converted to use this - mechanism! -*/ - -PyAPI_FUNC(PyThreadState *) PyEval_SaveThread(void); -PyAPI_FUNC(void) PyEval_RestoreThread(PyThreadState *); - -#ifdef WITH_THREAD - -PyAPI_FUNC(int) PyEval_ThreadsInitialized(void); -PyAPI_FUNC(void) PyEval_InitThreads(void); -PyAPI_FUNC(void) _PyEval_FiniThreads(void); -PyAPI_FUNC(void) PyEval_AcquireLock(void); -PyAPI_FUNC(void) PyEval_ReleaseLock(void); -PyAPI_FUNC(void) PyEval_AcquireThread(PyThreadState *tstate); -PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); -PyAPI_FUNC(void) PyEval_ReInitThreads(void); - -PyAPI_FUNC(void) _PyEval_SetSwitchInterval(unsigned long microseconds); -PyAPI_FUNC(unsigned long) _PyEval_GetSwitchInterval(void); - -#define Py_BEGIN_ALLOW_THREADS { \ - PyThreadState *_save; \ - _save = PyEval_SaveThread(); -#define Py_BLOCK_THREADS PyEval_RestoreThread(_save); -#define Py_UNBLOCK_THREADS _save = PyEval_SaveThread(); -#define Py_END_ALLOW_THREADS PyEval_RestoreThread(_save); \ - } - -#else /* !WITH_THREAD */ - -#define Py_BEGIN_ALLOW_THREADS { -#define Py_BLOCK_THREADS -#define Py_UNBLOCK_THREADS -#define Py_END_ALLOW_THREADS } - -#endif /* !WITH_THREAD */ - -PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *); -PyAPI_FUNC(void) _PyEval_SignalAsyncExc(void); - - -#endif /* !Py_CEVAL_H */
--- a/cos/python/Include/longobject.h Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,166 +0,0 @@ -#ifndef Py_LONGOBJECT_H -#define Py_LONGOBJECT_H - -/* Long (arbitrary precision) integer object interface */ - -typedef struct _longobject PyLongObject; /* Revealed in longintrepr.h */ - -PyAPI_DATA(PyTypeObject) PyLong_Type; - -#define PyLong_Check(op) \ - PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_LONG_SUBCLASS) -#define PyLong_CheckExact(op) (Py_TYPE(op) == &PyLong_Type) - -PyAPI_FUNC(PyObject *) PyLong_FromLong(long); -PyAPI_FUNC(PyObject *) PyLong_FromUnsignedLong(unsigned long); -PyAPI_FUNC(PyObject *) PyLong_FromSize_t(size_t); -PyAPI_FUNC(PyObject *) PyLong_FromSsize_t(Py_ssize_t); -PyAPI_FUNC(PyObject *) PyLong_FromDouble(double); -PyAPI_FUNC(long) PyLong_AsLong(PyObject *); -PyAPI_FUNC(long) PyLong_AsLongAndOverflow(PyObject *, int *); -PyAPI_FUNC(Py_ssize_t) PyLong_AsSsize_t(PyObject *); -PyAPI_FUNC(size_t) PyLong_AsSize_t(PyObject *); -PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLong(PyObject *); -PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLongMask(PyObject *); -PyAPI_FUNC(PyObject *) PyLong_GetInfo(void); - -/* It may be useful in the future. I've added it in the PyInt -> PyLong - cleanup to keep the extra information. [CH] */ -#define PyLong_AS_LONG(op) PyLong_AsLong(op) - -/* Issue #1983: pid_t can be longer than a C long on some systems */ -#if !defined(SIZEOF_PID_T) || SIZEOF_PID_T == SIZEOF_INT -#define _Py_PARSE_PID "i" -#define PyLong_FromPid PyLong_FromLong -#define PyLong_AsPid PyLong_AsLong -#elif SIZEOF_PID_T == SIZEOF_LONG -#define _Py_PARSE_PID "l" -#define PyLong_FromPid PyLong_FromLong -#define PyLong_AsPid PyLong_AsLong -#elif defined(SIZEOF_LONG_LONG) && SIZEOF_PID_T == SIZEOF_LONG_LONG -#define _Py_PARSE_PID "L" -#define PyLong_FromPid PyLong_FromLongLong -#define PyLong_AsPid PyLong_AsLongLong -#else -#error "sizeof(pid_t) is neither sizeof(int), sizeof(long) or sizeof(long long)" -#endif /* SIZEOF_PID_T */ - -/* Used by Python/mystrtoul.c. */ -#ifndef Py_LIMITED_API -PyAPI_DATA(unsigned char) _PyLong_DigitValue[256]; -#endif - -/* _PyLong_Frexp returns a double x and an exponent e such that the - true value is approximately equal to x * 2**e. e is >= 0. x is - 0.0 if and only if the input is 0 (in which case, e and x are both - zeroes); otherwise, 0.5 <= abs(x) < 1.0. On overflow, which is - possible if the number of bits doesn't fit into a Py_ssize_t, sets - OverflowError and returns -1.0 for x, 0 for e. */ -#ifndef Py_LIMITED_API -PyAPI_FUNC(double) _PyLong_Frexp(PyLongObject *a, Py_ssize_t *e); -#endif - -PyAPI_FUNC(double) PyLong_AsDouble(PyObject *); -PyAPI_FUNC(PyObject *) PyLong_FromVoidPtr(void *); -PyAPI_FUNC(void *) PyLong_AsVoidPtr(PyObject *); - -#ifdef HAVE_LONG_LONG -PyAPI_FUNC(PyObject *) PyLong_FromLongLong(PY_LONG_LONG); -PyAPI_FUNC(PyObject *) PyLong_FromUnsignedLongLong(unsigned PY_LONG_LONG); -PyAPI_FUNC(PY_LONG_LONG) PyLong_AsLongLong(PyObject *); -PyAPI_FUNC(unsigned PY_LONG_LONG) PyLong_AsUnsignedLongLong(PyObject *); -PyAPI_FUNC(unsigned PY_LONG_LONG) PyLong_AsUnsignedLongLongMask(PyObject *); -PyAPI_FUNC(PY_LONG_LONG) PyLong_AsLongLongAndOverflow(PyObject *, int *); -#endif /* HAVE_LONG_LONG */ - -PyAPI_FUNC(PyObject *) PyLong_FromString(char *, char **, int); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject *) PyLong_FromUnicode(Py_UNICODE*, Py_ssize_t, int); -PyAPI_FUNC(PyObject *) PyLong_FromUnicodeObject(PyObject *u, int base); -#endif - -#ifndef Py_LIMITED_API -/* _PyLong_Sign. Return 0 if v is 0, -1 if v < 0, +1 if v > 0. - v must not be NULL, and must be a normalized long. - There are no error cases. -*/ -PyAPI_FUNC(int) _PyLong_Sign(PyObject *v); - - -/* _PyLong_NumBits. Return the number of bits needed to represent the - absolute value of a long. For example, this returns 1 for 1 and -1, 2 - for 2 and -2, and 2 for 3 and -3. It returns 0 for 0. - v must not be NULL, and must be a normalized long. - (size_t)-1 is returned and OverflowError set if the true result doesn't - fit in a size_t. -*/ -PyAPI_FUNC(size_t) _PyLong_NumBits(PyObject *v); - -/* _PyLong_DivmodNear. Given integers a and b, compute the nearest - integer q to the exact quotient a / b, rounding to the nearest even integer - in the case of a tie. Return (q, r), where r = a - q*b. The remainder r - will satisfy abs(r) <= abs(b)/2, with equality possible only if q is - even. -*/ -PyAPI_FUNC(PyObject *) _PyLong_DivmodNear(PyObject *, PyObject *); - -/* _PyLong_FromByteArray: View the n unsigned bytes as a binary integer in - base 256, and return a Python long with the same numeric value. - If n is 0, the integer is 0. Else: - If little_endian is 1/true, bytes[n-1] is the MSB and bytes[0] the LSB; - else (little_endian is 0/false) bytes[0] is the MSB and bytes[n-1] the - LSB. - If is_signed is 0/false, view the bytes as a non-negative integer. - If is_signed is 1/true, view the bytes as a 2's-complement integer, - non-negative if bit 0x80 of the MSB is clear, negative if set. - Error returns: - + Return NULL with the appropriate exception set if there's not - enough memory to create the Python long. -*/ -PyAPI_FUNC(PyObject *) _PyLong_FromByteArray( - const unsigned char* bytes, size_t n, - int little_endian, int is_signed); - -/* _PyLong_AsByteArray: Convert the least-significant 8*n bits of long - v to a base-256 integer, stored in array bytes. Normally return 0, - return -1 on error. - If little_endian is 1/true, store the MSB at bytes[n-1] and the LSB at - bytes[0]; else (little_endian is 0/false) store the MSB at bytes[0] and - the LSB at bytes[n-1]. - If is_signed is 0/false, it's an error if v < 0; else (v >= 0) n bytes - are filled and there's nothing special about bit 0x80 of the MSB. - If is_signed is 1/true, bytes is filled with the 2's-complement - representation of v's value. Bit 0x80 of the MSB is the sign bit. - Error returns (-1): - + is_signed is 0 and v < 0. TypeError is set in this case, and bytes - isn't altered. - + n isn't big enough to hold the full mathematical value of v. For - example, if is_signed is 0 and there are more digits in the v than - fit in n; or if is_signed is 1, v < 0, and n is just 1 bit shy of - being large enough to hold a sign bit. OverflowError is set in this - case, but bytes holds the least-signficant n bytes of the true value. -*/ -PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v, - unsigned char* bytes, size_t n, - int little_endian, int is_signed); - - -/* _PyLong_Format: Convert the long to a string object with given base, - appending a base prefix of 0[box] if base is 2, 8 or 16. */ -PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base); - -/* Format the object based on the format_spec, as defined in PEP 3101 - (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); -#endif /* Py_LIMITED_API */ - -/* These aren't really part of the long object, but they're handy. The - functions are in Python/mystrtoul.c. - */ -PyAPI_FUNC(unsigned long) PyOS_strtoul(char *, char **, int); -PyAPI_FUNC(long) PyOS_strtol(char *, char **, int); - -#endif /* !Py_LONGOBJECT_H */
--- a/cos/python/Include/object.h Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,838 +0,0 @@ -#ifndef Py_OBJECT_H -#define Py_OBJECT_H - -/* Object and type object interface */ - -/* -Objects are structures allocated on the heap. Special rules apply to -the use of objects to ensure they are properly garbage-collected. -Objects are never allocated statically or on the stack; they must be -accessed through special macros and functions only. (Type objects are -exceptions to the first rule; the standard types are represented by -statically initialized type objects, although work on type/class unification -for Python 2.2 made it possible to have heap-allocated type objects too). - -An object has a 'reference count' that is increased or decreased when a -pointer to the object is copied or deleted; when the reference count -reaches zero there are no references to the object left and it can be -removed from the heap. - -An object has a 'type' that determines what it represents and what kind -of data it contains. An object's type is fixed when it is created. -Types themselves are represented as objects; an object contains a -pointer to the corresponding type object. The type itself has a type -pointer pointing to the object representing the type 'type', which -contains a pointer to itself!). - -Objects do not float around in memory; once allocated an object keeps -the same size and address. Objects that must hold variable-size data -can contain pointers to variable-size parts of the object. Not all -objects of the same type have the same size; but the size cannot change -after allocation. (These restrictions are made so a reference to an -object can be simply a pointer -- moving an object would require -updating all the pointers, and changing an object's size would require -moving it if there was another object right next to it.) - -Objects are always accessed through pointers of the type 'PyObject *'. -The type 'PyObject' is a structure that only contains the reference count -and the type pointer. The actual memory allocated for an object -contains other data that can only be accessed after casting the pointer -to a pointer to a longer structure type. This longer type must start -with the reference count and type fields; the macro PyObject_HEAD should be -used for this (to accommodate for future changes). The implementation -of a particular object type can cast the object pointer to the proper -type and back. - -A standard interface exists for objects that contain an array of items -whose size is determined when the object is allocated. -*/ - -/* PyObject_HEAD defines the initial segment of every PyObject. */ -#define PyObject_HEAD PyObject ob_base; - -#define PyObject_HEAD_INIT(type) { 1, type }, - -#define PyVarObject_HEAD_INIT(type, size) \ - { PyObject_HEAD_INIT(type) size }, - -/* PyObject_VAR_HEAD defines the initial segment of all variable-size - * container objects. These end with a declaration of an array with 1 - * element, but enough space is malloc'ed so that the array actually - * has room for ob_size elements. Note that ob_size is an element count, - * not necessarily a byte count. - */ -#define PyObject_VAR_HEAD PyVarObject ob_base; -#define Py_INVALID_SIZE (Py_ssize_t)-1 - -/* Nothing is actually declared to be a PyObject, but every pointer to - * a Python object can be cast to a PyObject*. This is inheritance built - * by hand. Similarly every pointer to a variable-size Python object can, - * in addition, be cast to PyVarObject*. - */ -typedef struct _object { - int ob_refcnt; - struct _typeobject *ob_type; -} PyObject; - -typedef struct { - PyObject ob_base; - int ob_size; /* Number of items in variable part */ -} PyVarObject; - -#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt) -#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) -#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) - -/* -Type objects contain a string containing the type name (to help somewhat -in debugging), the allocation parameters (see PyObject_New() and -PyObject_NewVar()), -and methods for accessing objects of the type. Methods are optional, a -nil pointer meaning that particular kind of access is not available for -this type. The Py_DECREF() macro uses the tp_dealloc method without -checking for a nil pointer; it should always be implemented except if -the implementation can guarantee that the reference count will never -reach zero (e.g., for statically allocated type objects). - -NB: the methods for certain type groups are now contained in separate -method blocks. -*/ - -typedef PyObject * (*unaryfunc)(PyObject *); -typedef PyObject * (*binaryfunc)(PyObject *, PyObject *); -typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); -typedef int (*inquiry)(PyObject *); -typedef int (*lenfunc)(PyObject *); -typedef PyObject *(*ssizeargfunc)(PyObject *, int); -typedef PyObject *(*ssizessizeargfunc)(PyObject *, int, int); -typedef int(*ssizeobjargproc)(PyObject *, int, PyObject *); -typedef int(*ssizessizeobjargproc)(PyObject *, int, int, PyObject *); -typedef int(*objobjargproc)(PyObject *, PyObject *, PyObject *); - -/* buffer interface */ -typedef struct bufferinfo { - void *buf; - PyObject *obj; /* owned reference */ - Py_ssize_t len; - Py_ssize_t itemsize; /* This is Py_ssize_t so it can be - pointed to by strides in simple case.*/ - int readonly; - int ndim; - char *format; - Py_ssize_t *shape; - Py_ssize_t *strides; - Py_ssize_t *suboffsets; - Py_ssize_t smalltable[2]; /* static store for shape and strides of - mono-dimensional buffers. */ - void *internal; -} Py_buffer; - -typedef int (*getbufferproc)(PyObject *, Py_buffer *, int); -typedef void (*releasebufferproc)(PyObject *, Py_buffer *); - - /* Flags for getting buffers */ -#define PyBUF_SIMPLE 0 -#define PyBUF_WRITABLE 0x0001 -/* we used to include an E, backwards compatible alias */ -#define PyBUF_WRITEABLE PyBUF_WRITABLE -#define PyBUF_FORMAT 0x0004 -#define PyBUF_ND 0x0008 -#define PyBUF_STRIDES (0x0010 | PyBUF_ND) -#define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES) -#define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES) -#define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES) -#define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES) - -#define PyBUF_CONTIG (PyBUF_ND | PyBUF_WRITABLE) -#define PyBUF_CONTIG_RO (PyBUF_ND) - -#define PyBUF_STRIDED (PyBUF_STRIDES | PyBUF_WRITABLE) -#define PyBUF_STRIDED_RO (PyBUF_STRIDES) - -#define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_WRITABLE | PyBUF_FORMAT) -#define PyBUF_RECORDS_RO (PyBUF_STRIDES | PyBUF_FORMAT) - -#define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_WRITABLE | PyBUF_FORMAT) -#define PyBUF_FULL_RO (PyBUF_INDIRECT | PyBUF_FORMAT) - - -#define PyBUF_READ 0x100 -#define PyBUF_WRITE 0x200 - -/* End buffer interface */ - -typedef int (*objobjproc)(PyObject *, PyObject *); -typedef int (*visitproc)(PyObject *, void *); -typedef int (*traverseproc)(PyObject *, visitproc, void *); - -typedef struct { - /* Number implementations must check *both* - arguments for proper type and implement the necessary conversions - in the slot functions themselves. */ - - binaryfunc nb_add; - binaryfunc nb_subtract; - binaryfunc nb_multiply; - binaryfunc nb_remainder; - binaryfunc nb_divmod; - ternaryfunc nb_power; - unaryfunc nb_negative; - unaryfunc nb_positive; - unaryfunc nb_absolute; - inquiry nb_bool; - unaryfunc nb_invert; - binaryfunc nb_lshift; - binaryfunc nb_rshift; - binaryfunc nb_and; - binaryfunc nb_xor; - binaryfunc nb_or; - unaryfunc nb_int; - void *nb_reserved; /* the slot formerly known as nb_long */ - unaryfunc nb_float; - - binaryfunc nb_inplace_add; - binaryfunc nb_inplace_subtract; - binaryfunc nb_inplace_multiply; - binaryfunc nb_inplace_remainder; - ternaryfunc nb_inplace_power; - binaryfunc nb_inplace_lshift; - binaryfunc nb_inplace_rshift; - binaryfunc nb_inplace_and; - binaryfunc nb_inplace_xor; - binaryfunc nb_inplace_or; - - binaryfunc nb_floor_divide; - binaryfunc nb_true_divide; - binaryfunc nb_inplace_floor_divide; - binaryfunc nb_inplace_true_divide; - - unaryfunc nb_index; -} PyNumberMethods; - -typedef struct { - lenfunc sq_length; - binaryfunc sq_concat; - ssizeargfunc sq_repeat; - ssizeargfunc sq_item; - void *was_sq_slice; - ssizeobjargproc sq_ass_item; - void *was_sq_ass_slice; - objobjproc sq_contains; - - binaryfunc sq_inplace_concat; - ssizeargfunc sq_inplace_repeat; -} PySequenceMethods; - -typedef struct { - lenfunc mp_length; - binaryfunc mp_subscript; - objobjargproc mp_ass_subscript; -} PyMappingMethods; - - -typedef struct { - getbufferproc bf_getbuffer; - releasebufferproc bf_releasebuffer; -} PyBufferProcs; - -typedef void (*freefunc)(void *); -typedef void (*destructor)(PyObject *); -/* We can't provide a full compile-time check that limited-API - users won't implement tp_print. However, not defining printfunc - and making tp_print of a different function pointer type - should at least cause a warning in most cases. */ -typedef int (*printfunc)(PyObject *, FILE *, int); -typedef PyObject *(*getattrfunc)(PyObject *, char *); -typedef PyObject *(*getattrofunc)(PyObject *, PyObject *); -typedef int (*setattrfunc)(PyObject *, char *, PyObject *); -typedef int (*setattrofunc)(PyObject *, PyObject *, PyObject *); -typedef PyObject *(*reprfunc)(PyObject *); -typedef Py_hash_t (*hashfunc)(PyObject *); -typedef PyObject *(*richcmpfunc) (PyObject *, PyObject *, int); -typedef PyObject *(*getiterfunc) (PyObject *); -typedef PyObject *(*iternextfunc) (PyObject *); -typedef PyObject *(*descrgetfunc) (PyObject *, PyObject *, PyObject *); -typedef int (*descrsetfunc) (PyObject *, PyObject *, PyObject *); -typedef int (*initproc)(PyObject *, PyObject *, PyObject *); -typedef PyObject *(*newfunc)(struct _typeobject *, PyObject *, PyObject *); -typedef PyObject *(*allocfunc)(struct _typeobject *, int); - -typedef struct _typeobject PyTypeObject; /* opaque */ -typedef struct _typeobject { - PyObject_VAR_HEAD - const char *tp_name; /* For printing, in format "<module>.<name>" */ - int tp_basicsize, tp_itemsize; /* For allocation */ - - /* Methods to implement standard operations */ - - destructor tp_dealloc; - printfunc tp_print; - getattrfunc tp_getattr; - setattrfunc tp_setattr; - void *tp_reserved; /* formerly known as tp_compare */ - reprfunc tp_repr; - - /* Method suites for standard classes */ - - PyNumberMethods *tp_as_number; - PySequenceMethods *tp_as_sequence; - PyMappingMethods *tp_as_mapping; - - /* More standard operations (here for binary compatibility) */ - - hashfunc tp_hash; - ternaryfunc tp_call; - reprfunc tp_str; - getattrofunc tp_getattro; - setattrofunc tp_setattro; - - /* Functions to access object as input/output buffer */ - PyBufferProcs *tp_as_buffer; - - /* Flags to define presence of optional/expanded features */ - long tp_flags; - - const char *tp_doc; /* Documentation string */ - - /* Assigned meaning in release 2.0 */ - /* call function for all accessible objects */ - traverseproc tp_traverse; - - /* delete references to contained objects */ - inquiry tp_clear; - - /* Assigned meaning in release 2.1 */ - /* rich comparisons */ - richcmpfunc tp_richcompare; - - /* weak reference enabler */ - Py_ssize_t tp_weaklistoffset; - - /* Iterators */ - getiterfunc tp_iter; - iternextfunc tp_iternext; - - /* Attribute descriptor and subclassing stuff */ - struct PyMethodDef *tp_methods; - struct PyMemberDef *tp_members; - struct PyGetSetDef *tp_getset; - struct _typeobject *tp_base; - PyObject *tp_dict; - descrgetfunc tp_descr_get; - descrsetfunc tp_descr_set; - Py_ssize_t tp_dictoffset; - initproc tp_init; - allocfunc tp_alloc; - newfunc tp_new; - freefunc tp_free; /* Low-level free-memory routine */ - inquiry tp_is_gc; /* For PyObject_IS_GC */ - PyObject *tp_bases; - PyObject *tp_mro; /* method resolution order */ - PyObject *tp_cache; - PyObject *tp_subclasses; - PyObject *tp_weaklist; - destructor tp_del; - - /* Type attribute cache version tag. Added in version 2.6 */ - unsigned int tp_version_tag; - -} PyTypeObject; - -typedef struct{ - int slot; /* slot id, see below */ - void *pfunc; /* function pointer */ -} PyType_Slot; - -typedef struct{ - const char* name; - int basicsize; - int itemsize; - int flags; - PyType_Slot *slots; /* terminated by slot==0. */ -} PyType_Spec; - -PyAPI_FUNC(PyObject*) PyType_FromSpec(PyType_Spec*); - -#ifndef Py_LIMITED_API -/* The *real* layout of a type object when allocated on the heap */ -typedef struct _heaptypeobject { - /* Note: there's a dependency on the order of these members - in slotptr() in typeobject.c . */ - PyTypeObject ht_type; - PyNumberMethods as_number; - PyMappingMethods as_mapping; - PySequenceMethods as_sequence; /* as_sequence comes after as_mapping, - so that the mapping wins when both - the mapping and the sequence define - a given operator (e.g. __getitem__). - see add_operators() in typeobject.c . */ - PyBufferProcs as_buffer; - PyObject *ht_name, *ht_slots, *ht_qualname; - /* here are optional user slots, followed by the members. */ -} PyHeapTypeObject; - -/* access macro to the members which are floating "behind" the object */ -#define PyHeapType_GET_MEMBERS(etype) \ - ((PyMemberDef *)(((char *)etype) + Py_TYPE(etype)->tp_basicsize)) -#endif - -/* Generic type check */ -PyAPI_FUNC(int) PyType_IsSubtype(PyTypeObject *, PyTypeObject *); -#define PyObject_TypeCheck(ob, tp) \ - (Py_TYPE(ob) == (tp) || PyType_IsSubtype(Py_TYPE(ob), (tp))) - -PyAPI_DATA(PyTypeObject) PyType_Type; /* built-in 'type' */ -PyAPI_DATA(PyTypeObject) PyBaseObject_Type; /* built-in 'object' */ -PyAPI_DATA(PyTypeObject) PySuper_Type; /* built-in 'super' */ - -PyAPI_FUNC(long) PyType_GetFlags(PyTypeObject*); - -#define PyType_Check(op) \ - PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_TYPE_SUBCLASS) -#define PyType_CheckExact(op) (Py_TYPE(op) == &PyType_Type) - -PyAPI_FUNC(int) PyType_Ready(PyTypeObject *); -PyAPI_FUNC(PyObject *) PyType_GenericAlloc(PyTypeObject *, Py_ssize_t); -PyAPI_FUNC(PyObject *) PyType_GenericNew(PyTypeObject *, - PyObject *, PyObject *); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject *) _PyType_Lookup(PyTypeObject *, PyObject *); -PyAPI_FUNC(PyObject *) _PyObject_LookupSpecial(PyObject *, char *, PyObject **); -PyAPI_FUNC(PyTypeObject *) _PyType_CalculateMetaclass(PyTypeObject *, PyObject *); -#endif -PyAPI_FUNC(unsigned int) PyType_ClearCache(void); -PyAPI_FUNC(void) PyType_Modified(PyTypeObject *); - -/* Generic operations on objects */ -struct _Py_Identifier; -#ifndef Py_LIMITED_API -PyAPI_FUNC(int) PyObject_Print(PyObject *, FILE *, int); -PyAPI_FUNC(void) _Py_BreakPoint(void); -PyAPI_FUNC(void) _PyObject_Dump(PyObject *); -#endif -PyAPI_FUNC(PyObject *) PyObject_Repr(PyObject *); -PyAPI_FUNC(PyObject *) PyObject_Str(PyObject *); -PyAPI_FUNC(PyObject *) PyObject_ASCII(PyObject *); -PyAPI_FUNC(PyObject *) PyObject_Bytes(PyObject *); -PyAPI_FUNC(PyObject *) PyObject_RichCompare(PyObject *, PyObject *, int); -PyAPI_FUNC(int) PyObject_RichCompareBool(PyObject *, PyObject *, int); -PyAPI_FUNC(PyObject *) PyObject_GetAttrString(PyObject *, const char *); -PyAPI_FUNC(int) PyObject_SetAttrString(PyObject *, const char *, PyObject *); -PyAPI_FUNC(int) PyObject_HasAttrString(PyObject *, const char *); -PyAPI_FUNC(PyObject *) PyObject_GetAttr(PyObject *, PyObject *); -PyAPI_FUNC(int) PyObject_SetAttr(PyObject *, PyObject *, PyObject *); -PyAPI_FUNC(int) PyObject_HasAttr(PyObject *, PyObject *); -PyAPI_FUNC(PyObject *) _PyObject_GetAttrId(PyObject *, struct _Py_Identifier *); -PyAPI_FUNC(int) _PyObject_SetAttrId(PyObject *, struct _Py_Identifier *, PyObject *); -PyAPI_FUNC(int) _PyObject_HasAttrId(PyObject *, struct _Py_Identifier *); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject **) _PyObject_GetDictPtr(PyObject *); -#endif -PyAPI_FUNC(PyObject *) PyObject_SelfIter(PyObject *); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject *) _PyObject_NextNotImplemented(PyObject *); -#endif -PyAPI_FUNC(PyObject *) PyObject_GenericGetAttr(PyObject *, PyObject *); -PyAPI_FUNC(int) PyObject_GenericSetAttr(PyObject *, - PyObject *, PyObject *); -PyAPI_FUNC(Py_hash_t) PyObject_Hash(PyObject *); -PyAPI_FUNC(Py_hash_t) PyObject_HashNotImplemented(PyObject *); -PyAPI_FUNC(int) PyObject_IsTrue(PyObject *); -PyAPI_FUNC(int) PyObject_Not(PyObject *); -PyAPI_FUNC(int) PyCallable_Check(PyObject *); - -PyAPI_FUNC(void) PyObject_ClearWeakRefs(PyObject *); - -/* Same as PyObject_Generic{Get,Set}Attr, but passing the attributes - dict as the last parameter. */ -PyAPI_FUNC(PyObject *) -_PyObject_GenericGetAttrWithDict(PyObject *, PyObject *, PyObject *); -PyAPI_FUNC(int) -_PyObject_GenericSetAttrWithDict(PyObject *, PyObject *, - PyObject *, PyObject *); - - -/* PyObject_Dir(obj) acts like Python builtins.dir(obj), returning a - list of strings. PyObject_Dir(NULL) is like builtins.dir(), - returning the names of the current locals. In this case, if there are - no current locals, NULL is returned, and PyErr_Occurred() is false. -*/ -PyAPI_FUNC(PyObject *) PyObject_Dir(PyObject *); - - -/* Helpers for printing recursive container types */ -PyAPI_FUNC(int) Py_ReprEnter(PyObject *); -PyAPI_FUNC(void) Py_ReprLeave(PyObject *); - -/* Helpers for hash functions */ -PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double); -PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*); -PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t); - -/* Helper for passing objects to printf and the like */ -#define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj)) - -/* Flag bits for printing: */ -#define Py_PRINT_RAW 1 /* No string quotes etc. */ - -/* -`Type flags (tp_flags) - -These flags are used to extend the type structure in a backwards-compatible -fashion. Extensions can use the flags to indicate (and test) when a given -type structure contains a new feature. The Python core will use these when -introducing new functionality between major revisions (to avoid mid-version -changes in the PYTHON_API_VERSION). - -Arbitration of the flag bit positions will need to be coordinated among -all extension writers who publically release their extensions (this will -be fewer than you might expect!).. - -Most flags were removed as of Python 3.0 to make room for new flags. (Some -flags are not for backwards compatibility but to indicate the presence of an -optional feature; these flags remain of course.) - -Type definitions should use Py_TPFLAGS_DEFAULT for their tp_flags value. - -Code can use PyType_HasFeature(type_ob, flag_value) to test whether the -given type object has a specified feature. -*/ - -/* Set if the type object is dynamically allocated */ -#define Py_TPFLAGS_HEAPTYPE (1L<<9) - -/* Set if the type allows subclassing */ -#define Py_TPFLAGS_BASETYPE (1L<<10) - -/* Set if the type is 'ready' -- fully initialized */ -#define Py_TPFLAGS_READY (1L<<12) - -/* Set while the type is being 'readied', to prevent recursive ready calls */ -#define Py_TPFLAGS_READYING (1L<<13) - -/* Objects support garbage collection (see objimp.h) */ -#define Py_TPFLAGS_HAVE_GC (1L<<14) - -/* These two bits are preserved for Stackless Python, next after this is 17 */ -#ifdef STACKLESS -#define Py_TPFLAGS_HAVE_STACKLESS_EXTENSION (3L<<15) -#else -#define Py_TPFLAGS_HAVE_STACKLESS_EXTENSION 0 -#endif - -/* Objects support type attribute cache */ -#define Py_TPFLAGS_HAVE_VERSION_TAG (1L<<18) -#define Py_TPFLAGS_VALID_VERSION_TAG (1L<<19) - -/* Type is abstract and cannot be instantiated */ -#define Py_TPFLAGS_IS_ABSTRACT (1L<<20) - -/* These flags are used to determine if a type is a subclass. */ -#define Py_TPFLAGS_INT_SUBCLASS (1L<<23) -#define Py_TPFLAGS_LONG_SUBCLASS (1L<<24) -#define Py_TPFLAGS_LIST_SUBCLASS (1L<<25) -#define Py_TPFLAGS_TUPLE_SUBCLASS (1L<<26) -#define Py_TPFLAGS_BYTES_SUBCLASS (1L<<27) -#define Py_TPFLAGS_UNICODE_SUBCLASS (1L<<28) -#define Py_TPFLAGS_DICT_SUBCLASS (1L<<29) -#define Py_TPFLAGS_BASE_EXC_SUBCLASS (1L<<30) -#define Py_TPFLAGS_TYPE_SUBCLASS (1L<<31) - -#define Py_TPFLAGS_DEFAULT ( \ - Py_TPFLAGS_HAVE_STACKLESS_EXTENSION | \ - Py_TPFLAGS_HAVE_VERSION_TAG | \ - 0) - -#ifdef Py_LIMITED_API -#define PyType_HasFeature(t,f) ((PyType_GetFlags(t) & (f)) != 0) -#else -#define PyType_HasFeature(t,f) (((t)->tp_flags & (f)) != 0) -#endif -#define PyType_FastSubclass(t,f) PyType_HasFeature(t,f) - - -/* -The macros Py_INCREF(op) and Py_DECREF(op) are used to increment or decrement -reference counts. Py_DECREF calls the object's deallocator function when -the refcount falls to 0; for -objects that don't contain references to other objects or heap memory -this can be the standard function free(). Both macros can be used -wherever a void expression is allowed. The argument must not be a -NULL pointer. If it may be NULL, use Py_XINCREF/Py_XDECREF instead. -The macro _Py_NewReference(op) initialize reference counts to 1, and -in special builds (Py_REF_DEBUG, Py_TRACE_REFS) performs additional -bookkeeping appropriate to the special build. - -We assume that the reference count field can never overflow; this can -be proven when the size of the field is the same as the pointer size, so -we ignore the possibility. Provided a C int is at least 32 bits (which -is implicitly assumed in many parts of this code), that's enough for -about 2**31 references to an object. - -XXX The following became out of date in Python 2.2, but I'm not sure -XXX what the full truth is now. Certainly, heap-allocated type objects -XXX can and should be deallocated. -Type objects should never be deallocated; the type pointer in an object -is not considered to be a reference to the type object, to save -complications in the deallocation function. (This is actually a -decision that's up to the implementer of each new type so if you want, -you can count such references to the type object.) - -*** WARNING*** The Py_DECREF macro must have a side-effect-free argument -since it may evaluate its argument multiple times. (The alternative -would be to mace it a proper function or assign it to a global temporary -variable first, both of which are slower; and in a multi-threaded -environment the global variable trick is not safe.) -*/ - -/* First define a pile of simple helper macros, one set per special - * build symbol. These either expand to the obvious things, or to - * nothing at all when the special mode isn't in effect. The main - * macros can later be defined just once then, yet expand to different - * things depending on which special build options are and aren't in effect. - * Trust me <wink>: while painful, this is 20x easier to understand than, - * e.g, defining _Py_NewReference five different times in a maze of nested - * #ifdefs (we used to do that -- it was impenetrable). - */ -#define _Py_INC_REFTOTAL -#define _Py_DEC_REFTOTAL -#define _Py_REF_DEBUG_COMMA -#define _Py_CHECK_REFCNT(OP) /* a semicolon */; - -#define _Py_INC_TPALLOCS(OP) -#define _Py_INC_TPFREES(OP) -#define _Py_DEC_TPFREES(OP) -#define _Py_COUNT_ALLOCS_COMMA - -/* Without Py_TRACE_REFS, there's little enough to do that we expand code - * inline. - */ -#define _Py_NewReference(op) ( \ - _Py_INC_TPALLOCS(op) \ - _Py_INC_REFTOTAL \ - Py_REFCNT(op) = 1) - -#define _Py_ForgetReference(op) _Py_INC_TPFREES(op) - -#define _Py_Dealloc(op) ( \ - _Py_INC_TPFREES(op) _Py_COUNT_ALLOCS_COMMA \ - (*Py_TYPE(op)->tp_dealloc)((PyObject *)(op))) - -#define Py_INCREF(op) ( \ - _Py_INC_REFTOTAL \ - ((PyObject*)(op))->ob_refcnt++) - -#define Py_DECREF(op) \ - do { \ - if (_Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA \ - --((PyObject*)(op))->ob_refcnt != 0) \ - _Py_CHECK_REFCNT(op) \ - else \ - _Py_Dealloc((PyObject *)(op)); \ - } while (0) - -/* Safely decref `op` and set `op` to NULL, especially useful in tp_clear - * and tp_dealloc implementatons. - * - * Note that "the obvious" code can be deadly: - * - * Py_XDECREF(op); - * op = NULL; - * - * Typically, `op` is something like self->containee, and `self` is done - * using its `containee` member. In the code sequence above, suppose - * `containee` is non-NULL with a refcount of 1. Its refcount falls to - * 0 on the first line, which can trigger an arbitrary amount of code, - * possibly including finalizers (like __del__ methods or weakref callbacks) - * coded in Python, which in turn can release the GIL and allow other threads - * to run, etc. Such code may even invoke methods of `self` again, or cause - * cyclic gc to trigger, but-- oops! --self->containee still points to the - * object being torn down, and it may be in an insane state while being torn - * down. This has in fact been a rich historic source of miserable (rare & - * hard-to-diagnose) segfaulting (and other) bugs. - * - * The safe way is: - * - * Py_CLEAR(op); - * - * That arranges to set `op` to NULL _before_ decref'ing, so that any code - * triggered as a side-effect of `op` getting torn down no longer believes - * `op` points to a valid object. - * - * There are cases where it's safe to use the naive code, but they're brittle. - * For example, if `op` points to a Python integer, you know that destroying - * one of those can't cause problems -- but in part that relies on that - * Python integers aren't currently weakly referencable. Best practice is - * to use Py_CLEAR() even if you can't think of a reason for why you need to. - */ -#define Py_CLEAR(op) \ - do { \ - if (op) { \ - PyObject *_py_tmp = (PyObject *)(op); \ - (op) = NULL; \ - Py_DECREF(_py_tmp); \ - } \ - } while (0) - -/* Macros to use in case the object pointer may be NULL: */ -#define Py_XINCREF(op) do { if ((op) == NULL) ; else Py_INCREF(op); } while (0) -#define Py_XDECREF(op) do { if ((op) == NULL) ; else Py_DECREF(op); } while (0) - -/* -These are provided as conveniences to Python runtime embedders, so that -they can have object code that is not dependent on Python compilation flags. -*/ -PyAPI_FUNC(void) Py_IncRef(PyObject *); -PyAPI_FUNC(void) Py_DecRef(PyObject *); - -/* -_Py_NoneStruct is an object of undefined type which can be used in contexts -where NULL (nil) is not suitable (since NULL often means 'error'). - -Don't forget to apply Py_INCREF() when returning this value!!! -*/ -PyAPI_DATA(PyObject) _Py_NoneStruct; /* Don't use this directly */ -#define Py_None (&_Py_NoneStruct) - -/* Macro for returning Py_None from a function */ -#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None - -/* -Py_NotImplemented is a singleton used to signal that an operation is -not implemented for a given type combination. -*/ -PyAPI_DATA(PyObject) _Py_NotImplementedStruct; /* Don't use this directly */ -#define Py_NotImplemented (&_Py_NotImplementedStruct) - -/* Macro for returning Py_NotImplemented from a function */ -#define Py_RETURN_NOTIMPLEMENTED \ - return Py_INCREF(Py_NotImplemented), Py_NotImplemented - -/* Rich comparison opcodes */ -#define Py_LT 0 -#define Py_LE 1 -#define Py_EQ 2 -#define Py_NE 3 -#define Py_GT 4 -#define Py_GE 5 - -/* Maps Py_LT to Py_GT, ..., Py_GE to Py_LE. - * Defined in object.c. - */ -PyAPI_DATA(int) _Py_SwappedOp[]; - - -/* -More conventions -================ - -Argument Checking ------------------ - -Functions that take objects as arguments normally don't check for nil -arguments, but they do check the type of the argument, and return an -error if the function doesn't apply to the type. - -Failure Modes -------------- - -Functions may fail for a variety of reasons, including running out of -memory. This is communicated to the caller in two ways: an error string -is set (see errors.h), and the function result differs: functions that -normally return a pointer return NULL for failure, functions returning -an integer return -1 (which could be a legal return value too!), and -other functions return 0 for success and -1 for failure. -Callers should always check for errors before using the result. If -an error was set, the caller must either explicitly clear it, or pass -the error on to its caller. - -Reference Counts ----------------- - -It takes a while to get used to the proper usage of reference counts. - -Functions that create an object set the reference count to 1; such new -objects must be stored somewhere or destroyed again with Py_DECREF(). -Some functions that 'store' objects, such as PyTuple_SetItem() and -PyList_SetItem(), -don't increment the reference count of the object, since the most -frequent use is to store a fresh object. Functions that 'retrieve' -objects, such as PyTuple_GetItem() and PyDict_GetItemString(), also -don't increment -the reference count, since most frequently the object is only looked at -quickly. Thus, to retrieve an object and store it again, the caller -must call Py_INCREF() explicitly. - -NOTE: functions that 'consume' a reference count, like -PyList_SetItem(), consume the reference even if the object wasn't -successfully stored, to simplify error handling. - -It seems attractive to make other functions that take an object as -argument consume a reference count; however, this may quickly get -confusing (even the current practice is already confusing). Consider -it carefully, it may save lots of calls to Py_INCREF() and Py_DECREF() at -times. -*/ - - -/* Trashcan mechanism, thanks to Christian Tismer. - -When deallocating a container object, it's possible to trigger an unbounded -chain of deallocations, as each Py_DECREF in turn drops the refcount on "the -next" object in the chain to 0. This can easily lead to stack faults, and -especially in threads (which typically have less stack space to work with). - -A container object that participates in cyclic gc can avoid this by -bracketing the body of its tp_dealloc function with a pair of macros: - -static void -mytype_dealloc(mytype *p) -{ - ... declarations go here ... - - PyObject_GC_UnTrack(p); // must untrack first - Py_TRASHCAN_SAFE_BEGIN(p) - ... The body of the deallocator goes here, including all calls ... - ... to Py_DECREF on contained objects. ... - Py_TRASHCAN_SAFE_END(p) -} - -CAUTION: Never return from the middle of the body! If the body needs to -"get out early", put a label immediately before the Py_TRASHCAN_SAFE_END -call, and goto it. Else the call-depth counter (see below) will stay -above 0 forever, and the trashcan will never get emptied. - -How it works: The BEGIN macro increments a call-depth counter. So long -as this counter is small, the body of the deallocator is run directly without -further ado. But if the counter gets large, it instead adds p to a list of -objects to be deallocated later, skips the body of the deallocator, and -resumes execution after the END macro. The tp_dealloc routine then returns -without deallocating anything (and so unbounded call-stack depth is avoided). - -When the call stack finishes unwinding again, code generated by the END macro -notices this, and calls another routine to deallocate all the objects that -may have been added to the list of deferred deallocations. In effect, a -chain of N deallocations is broken into N / PyTrash_UNWIND_LEVEL pieces, -with the call stack never exceeding a depth of PyTrash_UNWIND_LEVEL. -*/ - -PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*); -PyAPI_FUNC(void) _PyTrash_destroy_chain(void); -PyAPI_DATA(int) _PyTrash_delete_nesting; -PyAPI_DATA(PyObject *) _PyTrash_delete_later; - -#define PyTrash_UNWIND_LEVEL 50 - -#define Py_TRASHCAN_SAFE_BEGIN(op) \ - if (_PyTrash_delete_nesting < PyTrash_UNWIND_LEVEL) { \ - ++_PyTrash_delete_nesting; - /* The body of the deallocator is here. */ -#define Py_TRASHCAN_SAFE_END(op) \ - --_PyTrash_delete_nesting; \ - if (_PyTrash_delete_later && _PyTrash_delete_nesting <= 0) \ - _PyTrash_destroy_chain(); \ - } \ - else \ - _PyTrash_deposit_object((PyObject*)op); - -#endif /* !Py_OBJECT_H */
--- a/cos/python/Modules/gcmodule.c Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1513 +0,0 @@ -/* - - Reference Cycle Garbage Collection - ================================== - - Neil Schemenauer <nas@arctrix.com> - - Based on a post on the python-dev list. Ideas from Guido van Rossum, - Eric Tiedemann, and various others. - - http://www.arctrix.com/nas/python/gc/ - - The following mailing list threads provide a historical perspective on - the design of this module. Note that a fair amount of refinement has - occurred since those discussions. - - http://mail.python.org/pipermail/python-dev/2000-March/002385.html - http://mail.python.org/pipermail/python-dev/2000-March/002434.html - http://mail.python.org/pipermail/python-dev/2000-March/002497.html - - For a highlevel view of the collection process, read the collect - function. - -*/ - -#include "Python.h" -#include "frameobject.h" /* for PyFrame_ClearFreeList */ - -/* Get an object's GC head */ -#define AS_GC(o) ((PyGC_Head *)(o)-1) - -/* Get the object given the GC head */ -#define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1)) - -/*** Global GC state ***/ - -struct gc_generation { - PyGC_Head head; - int threshold; /* collection threshold */ - int count; /* count of allocations or collections of younger - generations */ -}; - -#define NUM_GENERATIONS 3 -#define GEN_HEAD(n) (&generations[n].head) - -/* linked lists of container objects */ -static struct gc_generation generations[NUM_GENERATIONS] = { - /* PyGC_Head, threshold, count */ - {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0}, - {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0}, - {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0}, -}; - -PyGC_Head *_PyGC_generation0 = GEN_HEAD(0); - -static int enabled = 1; /* automatic collection enabled? */ - -/* true if we are currently running the collector */ -static int collecting = 0; - -/* list of uncollectable objects */ -static PyObject *garbage = NULL; - -/* Python string to use if unhandled exception occurs */ -static PyObject *gc_str = NULL; - -/* Python string used to look for __del__ attribute. */ -static PyObject *delstr = NULL; - -/* This is the number of objects who survived the last full collection. It - approximates the number of long lived objects tracked by the GC. - - (by "full collection", we mean a collection of the oldest generation). -*/ -static Py_ssize_t long_lived_total = 0; - -/* This is the number of objects who survived all "non-full" collections, - and are awaiting to undergo a full collection for the first time. - -*/ -static Py_ssize_t long_lived_pending = 0; - -/* - NOTE: about the counting of long-lived objects. - - To limit the cost of garbage collection, there are two strategies; - - make each collection faster, e.g. by scanning fewer objects - - do less collections - This heuristic is about the latter strategy. - - In addition to the various configurable thresholds, we only trigger a - full collection if the ratio - long_lived_pending / long_lived_total - is above a given value (hardwired to 25%). - - The reason is that, while "non-full" collections (i.e., collections of - the young and middle generations) will always examine roughly the same - number of objects -- determined by the aforementioned thresholds --, - the cost of a full collection is proportional to the total number of - long-lived objects, which is virtually unbounded. - - Indeed, it has been remarked that doing a full collection every - <constant number> of object creations entails a dramatic performance - degradation in workloads which consist in creating and storing lots of - long-lived objects (e.g. building a large list of GC-tracked objects would - show quadratic performance, instead of linear as expected: see issue #4074). - - Using the above ratio, instead, yields amortized linear performance in - the total number of objects (the effect of which can be summarized - thusly: "each full garbage collection is more and more costly as the - number of objects grows, but we do fewer and fewer of them"). - - This heuristic was suggested by Martin von Löwis on python-dev in - June 2008. His original analysis and proposal can be found at: - http://mail.python.org/pipermail/python-dev/2008-June/080579.html -*/ - - -/* set for debugging information */ -#define DEBUG_STATS (1<<0) /* print collection statistics */ -#define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */ -#define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */ -#define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */ -#define DEBUG_LEAK DEBUG_COLLECTABLE | \ - DEBUG_UNCOLLECTABLE | \ - DEBUG_SAVEALL -static int debug; -static PyObject *tmod = NULL; - -/*-------------------------------------------------------------------------- -gc_refs values. - -Between collections, every gc'ed object has one of two gc_refs values: - -GC_UNTRACKED - The initial state; objects returned by PyObject_GC_Malloc are in this - state. The object doesn't live in any generation list, and its - tp_traverse slot must not be called. - -GC_REACHABLE - The object lives in some generation list, and its tp_traverse is safe to - call. An object transitions to GC_REACHABLE when PyObject_GC_Track - is called. - -During a collection, gc_refs can temporarily take on other states: - ->= 0 - At the start of a collection, update_refs() copies the true refcount - to gc_refs, for each object in the generation being collected. - subtract_refs() then adjusts gc_refs so that it equals the number of - times an object is referenced directly from outside the generation - being collected. - gc_refs remains >= 0 throughout these steps. - -GC_TENTATIVELY_UNREACHABLE - move_unreachable() then moves objects not reachable (whether directly or - indirectly) from outside the generation into an "unreachable" set. - Objects that are found to be reachable have gc_refs set to GC_REACHABLE - again. Objects that are found to be unreachable have gc_refs set to - GC_TENTATIVELY_UNREACHABLE. It's "tentatively" because the pass doing - this can't be sure until it ends, and GC_TENTATIVELY_UNREACHABLE may - transition back to GC_REACHABLE. - - Only objects with GC_TENTATIVELY_UNREACHABLE still set are candidates - for collection. If it's decided not to collect such an object (e.g., - it has a __del__ method), its gc_refs is restored to GC_REACHABLE again. ----------------------------------------------------------------------------- -*/ -#define GC_UNTRACKED _PyGC_REFS_UNTRACKED -#define GC_REACHABLE _PyGC_REFS_REACHABLE -#define GC_TENTATIVELY_UNREACHABLE _PyGC_REFS_TENTATIVELY_UNREACHABLE - -#define IS_TRACKED(o) ((AS_GC(o))->gc.gc_refs != GC_UNTRACKED) -#define IS_REACHABLE(o) ((AS_GC(o))->gc.gc_refs == GC_REACHABLE) -#define IS_TENTATIVELY_UNREACHABLE(o) ( \ - (AS_GC(o))->gc.gc_refs == GC_TENTATIVELY_UNREACHABLE) - -/*** list functions ***/ - -static void -gc_list_init(PyGC_Head *list) -{ - list->gc.gc_prev = list; - list->gc.gc_next = list; -} - -static int -gc_list_is_empty(PyGC_Head *list) -{ - return (list->gc.gc_next == list); -} - -#if 0 -/* This became unused after gc_list_move() was introduced. */ -/* Append `node` to `list`. */ -static void -gc_list_append(PyGC_Head *node, PyGC_Head *list) -{ - node->gc.gc_next = list; - node->gc.gc_prev = list->gc.gc_prev; - node->gc.gc_prev->gc.gc_next = node; - list->gc.gc_prev = node; -} -#endif - -/* Remove `node` from the gc list it's currently in. */ -static void -gc_list_remove(PyGC_Head *node) -{ - node->gc.gc_prev->gc.gc_next = node->gc.gc_next; - node->gc.gc_next->gc.gc_prev = node->gc.gc_prev; - node->gc.gc_next = NULL; /* object is not currently tracked */ -} - -/* Move `node` from the gc list it's currently in (which is not explicitly - * named here) to the end of `list`. This is semantically the same as - * gc_list_remove(node) followed by gc_list_append(node, list). - */ -static void -gc_list_move(PyGC_Head *node, PyGC_Head *list) -{ - PyGC_Head *new_prev; - PyGC_Head *current_prev = node->gc.gc_prev; - PyGC_Head *current_next = node->gc.gc_next; - /* Unlink from current list. */ - current_prev->gc.gc_next = current_next; - current_next->gc.gc_prev = current_prev; - /* Relink at end of new list. */ - new_prev = node->gc.gc_prev = list->gc.gc_prev; - new_prev->gc.gc_next = list->gc.gc_prev = node; - node->gc.gc_next = list; -} - -/* append list `from` onto list `to`; `from` becomes an empty list */ -static void -gc_list_merge(PyGC_Head *from, PyGC_Head *to) -{ - PyGC_Head *tail; - assert(from != to); - if (!gc_list_is_empty(from)) { - tail = to->gc.gc_prev; - tail->gc.gc_next = from->gc.gc_next; - tail->gc.gc_next->gc.gc_prev = tail; - to->gc.gc_prev = from->gc.gc_prev; - to->gc.gc_prev->gc.gc_next = to; - } - gc_list_init(from); -} - -static Py_ssize_t -gc_list_size(PyGC_Head *list) -{ - PyGC_Head *gc; - Py_ssize_t n = 0; - for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) { - n++; - } - return n; -} - -/* Append objects in a GC list to a Python list. - * Return 0 if all OK, < 0 if error (out of memory for list). - */ -static int -append_objects(PyObject *py_list, PyGC_Head *gc_list) -{ - PyGC_Head *gc; - for (gc = gc_list->gc.gc_next; gc != gc_list; gc = gc->gc.gc_next) { - PyObject *op = FROM_GC(gc); - if (op != py_list) { - if (PyList_Append(py_list, op)) { - return -1; /* exception */ - } - } - } - return 0; -} - -/*** end of list stuff ***/ - - -/* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects - * in containers, and is GC_REACHABLE for all tracked gc objects not in - * containers. - */ -static void -update_refs(PyGC_Head *containers) -{ - PyGC_Head *gc = containers->gc.gc_next; - for (; gc != containers; gc = gc->gc.gc_next) { - assert(gc->gc.gc_refs == GC_REACHABLE); - gc->gc.gc_refs = Py_REFCNT(FROM_GC(gc)); - /* Python's cyclic gc should never see an incoming refcount - * of 0: if something decref'ed to 0, it should have been - * deallocated immediately at that time. - * Possible cause (if the assert triggers): a tp_dealloc - * routine left a gc-aware object tracked during its teardown - * phase, and did something-- or allowed something to happen -- - * that called back into Python. gc can trigger then, and may - * see the still-tracked dying object. Before this assert - * was added, such mistakes went on to allow gc to try to - * delete the object again. In a debug build, that caused - * a mysterious segfault, when _Py_ForgetReference tried - * to remove the object from the doubly-linked list of all - * objects a second time. In a release build, an actual - * double deallocation occurred, which leads to corruption - * of the allocator's internal bookkeeping pointers. That's - * so serious that maybe this should be a release-build - * check instead of an assert? - */ - assert(gc->gc.gc_refs != 0); - } -} - -/* A traversal callback for subtract_refs. */ -static int -visit_decref(PyObject *op, void *data) -{ - assert(op != NULL); - if (PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - /* We're only interested in gc_refs for objects in the - * generation being collected, which can be recognized - * because only they have positive gc_refs. - */ - assert(gc->gc.gc_refs != 0); /* else refcount was too small */ - if (gc->gc.gc_refs > 0) - gc->gc.gc_refs--; - } - return 0; -} - -/* Subtract internal references from gc_refs. After this, gc_refs is >= 0 - * for all objects in containers, and is GC_REACHABLE for all tracked gc - * objects not in containers. The ones with gc_refs > 0 are directly - * reachable from outside containers, and so can't be collected. - */ -static void -subtract_refs(PyGC_Head *containers) -{ - traverseproc traverse; - PyGC_Head *gc = containers->gc.gc_next; - for (; gc != containers; gc=gc->gc.gc_next) { - traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; - (void) traverse(FROM_GC(gc), - (visitproc)visit_decref, - NULL); - } -} - -/* A traversal callback for move_unreachable. */ -static int -visit_reachable(PyObject *op, PyGC_Head *reachable) -{ - if (PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - const Py_ssize_t gc_refs = gc->gc.gc_refs; - - if (gc_refs == 0) { - /* This is in move_unreachable's 'young' list, but - * the traversal hasn't yet gotten to it. All - * we need to do is tell move_unreachable that it's - * reachable. - */ - gc->gc.gc_refs = 1; - } - else if (gc_refs == GC_TENTATIVELY_UNREACHABLE) { - /* This had gc_refs = 0 when move_unreachable got - * to it, but turns out it's reachable after all. - * Move it back to move_unreachable's 'young' list, - * and move_unreachable will eventually get to it - * again. - */ - gc_list_move(gc, reachable); - gc->gc.gc_refs = 1; - } - /* Else there's nothing to do. - * If gc_refs > 0, it must be in move_unreachable's 'young' - * list, and move_unreachable will eventually get to it. - * If gc_refs == GC_REACHABLE, it's either in some other - * generation so we don't care about it, or move_unreachable - * already dealt with it. - * If gc_refs == GC_UNTRACKED, it must be ignored. - */ - else { - assert(gc_refs > 0 - || gc_refs == GC_REACHABLE - || gc_refs == GC_UNTRACKED); - } - } - return 0; -} - -/* Move the unreachable objects from young to unreachable. After this, - * all objects in young have gc_refs = GC_REACHABLE, and all objects in - * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked - * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE. - * All objects in young after this are directly or indirectly reachable - * from outside the original young; and all objects in unreachable are - * not. - */ -static void -move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) -{ - PyGC_Head *gc = young->gc.gc_next; - - /* Invariants: all objects "to the left" of us in young have gc_refs - * = GC_REACHABLE, and are indeed reachable (directly or indirectly) - * from outside the young list as it was at entry. All other objects - * from the original young "to the left" of us are in unreachable now, - * and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the - * left of us in 'young' now have been scanned, and no objects here - * or to the right have been scanned yet. - */ - - while (gc != young) { - PyGC_Head *next; - - if (gc->gc.gc_refs) { - /* gc is definitely reachable from outside the - * original 'young'. Mark it as such, and traverse - * its pointers to find any other objects that may - * be directly reachable from it. Note that the - * call to tp_traverse may append objects to young, - * so we have to wait until it returns to determine - * the next object to visit. - */ - PyObject *op = FROM_GC(gc); - traverseproc traverse = Py_TYPE(op)->tp_traverse; - assert(gc->gc.gc_refs > 0); - gc->gc.gc_refs = GC_REACHABLE; - (void) traverse(op, - (visitproc)visit_reachable, - (void *)young); - next = gc->gc.gc_next; - if (PyTuple_CheckExact(op)) { - _PyTuple_MaybeUntrack(op); - } - else if (PyDict_CheckExact(op)) { - _PyDict_MaybeUntrack(op); - } - } - else { - /* This *may* be unreachable. To make progress, - * assume it is. gc isn't directly reachable from - * any object we've already traversed, but may be - * reachable from an object we haven't gotten to yet. - * visit_reachable will eventually move gc back into - * young if that's so, and we'll see it again. - */ - next = gc->gc.gc_next; - gc_list_move(gc, unreachable); - gc->gc.gc_refs = GC_TENTATIVELY_UNREACHABLE; - } - gc = next; - } -} - -/* Return true if object has a finalization method. */ -static int -has_finalizer(PyObject *op) -{ - if (PyGen_CheckExact(op)) - return PyGen_NeedsFinalizing((PyGenObject *)op); - else - return op->ob_type->tp_del != NULL; -} - -/* Move the objects in unreachable with __del__ methods into `finalizers`. - * Objects moved into `finalizers` have gc_refs set to GC_REACHABLE; the - * objects remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE. - */ -static void -move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) -{ - PyGC_Head *gc; - PyGC_Head *next; - - /* March over unreachable. Move objects with finalizers into - * `finalizers`. - */ - for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) { - PyObject *op = FROM_GC(gc); - - assert(IS_TENTATIVELY_UNREACHABLE(op)); - next = gc->gc.gc_next; - - if (has_finalizer(op)) { - gc_list_move(gc, finalizers); - gc->gc.gc_refs = GC_REACHABLE; - } - } -} - -/* A traversal callback for move_finalizer_reachable. */ -static int -visit_move(PyObject *op, PyGC_Head *tolist) -{ - if (PyObject_IS_GC(op)) { - if (IS_TENTATIVELY_UNREACHABLE(op)) { - PyGC_Head *gc = AS_GC(op); - gc_list_move(gc, tolist); - gc->gc.gc_refs = GC_REACHABLE; - } - } - return 0; -} - -/* Move objects that are reachable from finalizers, from the unreachable set - * into finalizers set. - */ -static void -move_finalizer_reachable(PyGC_Head *finalizers) -{ - traverseproc traverse; - PyGC_Head *gc = finalizers->gc.gc_next; - for (; gc != finalizers; gc = gc->gc.gc_next) { - /* Note that the finalizers list may grow during this. */ - traverse = Py_TYPE(FROM_GC(gc))->tp_traverse; - (void) traverse(FROM_GC(gc), - (visitproc)visit_move, - (void *)finalizers); - } -} - -/* Clear all weakrefs to unreachable objects, and if such a weakref has a - * callback, invoke it if necessary. Note that it's possible for such - * weakrefs to be outside the unreachable set -- indeed, those are precisely - * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for - * overview & some details. Some weakrefs with callbacks may be reclaimed - * directly by this routine; the number reclaimed is the return value. Other - * weakrefs with callbacks may be moved into the `old` generation. Objects - * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in - * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns, - * no object in `unreachable` is weakly referenced anymore. - */ -static int -handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) -{ - PyGC_Head *gc; - PyObject *op; /* generally FROM_GC(gc) */ - PyWeakReference *wr; /* generally a cast of op */ - PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */ - PyGC_Head *next; - int num_freed = 0; - - gc_list_init(&wrcb_to_call); - - /* Clear all weakrefs to the objects in unreachable. If such a weakref - * also has a callback, move it into `wrcb_to_call` if the callback - * needs to be invoked. Note that we cannot invoke any callbacks until - * all weakrefs to unreachable objects are cleared, lest the callback - * resurrect an unreachable object via a still-active weakref. We - * make another pass over wrcb_to_call, invoking callbacks, after this - * pass completes. - */ - for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) { - PyWeakReference **wrlist; - - op = FROM_GC(gc); - assert(IS_TENTATIVELY_UNREACHABLE(op)); - next = gc->gc.gc_next; - - if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) - continue; - - /* It supports weakrefs. Does it have any? */ - wrlist = (PyWeakReference **) - PyObject_GET_WEAKREFS_LISTPTR(op); - - /* `op` may have some weakrefs. March over the list, clear - * all the weakrefs, and move the weakrefs with callbacks - * that must be called into wrcb_to_call. - */ - for (wr = *wrlist; wr != NULL; wr = *wrlist) { - PyGC_Head *wrasgc; /* AS_GC(wr) */ - - /* _PyWeakref_ClearRef clears the weakref but leaves - * the callback pointer intact. Obscure: it also - * changes *wrlist. - */ - assert(wr->wr_object == op); - _PyWeakref_ClearRef(wr); - assert(wr->wr_object == Py_None); - if (wr->wr_callback == NULL) - continue; /* no callback */ - - /* Headache time. `op` is going away, and is weakly referenced by - * `wr`, which has a callback. Should the callback be invoked? If wr - * is also trash, no: - * - * 1. There's no need to call it. The object and the weakref are - * both going away, so it's legitimate to pretend the weakref is - * going away first. The user has to ensure a weakref outlives its - * referent if they want a guarantee that the wr callback will get - * invoked. - * - * 2. It may be catastrophic to call it. If the callback is also in - * cyclic trash (CT), then although the CT is unreachable from - * outside the current generation, CT may be reachable from the - * callback. Then the callback could resurrect insane objects. - * - * Since the callback is never needed and may be unsafe in this case, - * wr is simply left in the unreachable set. Note that because we - * already called _PyWeakref_ClearRef(wr), its callback will never - * trigger. - * - * OTOH, if wr isn't part of CT, we should invoke the callback: the - * weakref outlived the trash. Note that since wr isn't CT in this - * case, its callback can't be CT either -- wr acted as an external - * root to this generation, and therefore its callback did too. So - * nothing in CT is reachable from the callback either, so it's hard - * to imagine how calling it later could create a problem for us. wr - * is moved to wrcb_to_call in this case. - */ - if (IS_TENTATIVELY_UNREACHABLE(wr)) - continue; - assert(IS_REACHABLE(wr)); - - /* Create a new reference so that wr can't go away - * before we can process it again. - */ - Py_INCREF(wr); - - /* Move wr to wrcb_to_call, for the next pass. */ - wrasgc = AS_GC(wr); - assert(wrasgc != next); /* wrasgc is reachable, but - next isn't, so they can't - be the same */ - gc_list_move(wrasgc, &wrcb_to_call); - } - } - - /* Invoke the callbacks we decided to honor. It's safe to invoke them - * because they can't reference unreachable objects. - */ - while (! gc_list_is_empty(&wrcb_to_call)) { - PyObject *temp; - PyObject *callback; - - gc = wrcb_to_call.gc.gc_next; - op = FROM_GC(gc); - assert(IS_REACHABLE(op)); - assert(PyWeakref_Check(op)); - wr = (PyWeakReference *)op; - callback = wr->wr_callback; - assert(callback != NULL); - - /* copy-paste of weakrefobject.c's handle_callback() */ - temp = PyObject_CallFunctionObjArgs(callback, wr, NULL); - if (temp == NULL) - PyErr_WriteUnraisable(callback); - else - Py_DECREF(temp); - - /* Give up the reference we created in the first pass. When - * op's refcount hits 0 (which it may or may not do right now), - * op's tp_dealloc will decref op->wr_callback too. Note - * that the refcount probably will hit 0 now, and because this - * weakref was reachable to begin with, gc didn't already - * add it to its count of freed objects. Example: a reachable - * weak value dict maps some key to this reachable weakref. - * The callback removes this key->weakref mapping from the - * dict, leaving no other references to the weakref (excepting - * ours). - */ - Py_DECREF(op); - if (wrcb_to_call.gc.gc_next == gc) { - /* object is still alive -- move it */ - gc_list_move(gc, old); - } - else - ++num_freed; - } - - return num_freed; -} - -static void -debug_cycle(char *msg, PyObject *op) -{ - PySys_WriteStderr("gc: %.100s <%.100s %p>\n", - msg, Py_TYPE(op)->tp_name, op); -} - -/* Handle uncollectable garbage (cycles with finalizers, and stuff reachable - * only from such cycles). - * If DEBUG_SAVEALL, all objects in finalizers are appended to the module - * garbage list (a Python list), else only the objects in finalizers with - * __del__ methods are appended to garbage. All objects in finalizers are - * merged into the old list regardless. - * Returns 0 if all OK, <0 on error (out of memory to grow the garbage list). - * The finalizers list is made empty on a successful return. - */ -static int -handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old) -{ - PyGC_Head *gc = finalizers->gc.gc_next; - - if (garbage == NULL) { - garbage = PyList_New(0); - if (garbage == NULL) - Py_FatalError("gc couldn't create gc.garbage list"); - } - for (; gc != finalizers; gc = gc->gc.gc_next) { - PyObject *op = FROM_GC(gc); - - if ((debug & DEBUG_SAVEALL) || has_finalizer(op)) { - if (PyList_Append(garbage, op) < 0) - return -1; - } - } - - gc_list_merge(finalizers, old); - return 0; -} - -/* Break reference cycles by clearing the containers involved. This is - * tricky business as the lists can be changing and we don't know which - * objects may be freed. It is possible I screwed something up here. - */ -static void -delete_garbage(PyGC_Head *collectable, PyGC_Head *old) -{ - inquiry clear; - - while (!gc_list_is_empty(collectable)) { - PyGC_Head *gc = collectable->gc.gc_next; - PyObject *op = FROM_GC(gc); - - assert(IS_TENTATIVELY_UNREACHABLE(op)); - if (debug & DEBUG_SAVEALL) { - PyList_Append(garbage, op); - } - else { - if ((clear = Py_TYPE(op)->tp_clear) != NULL) { - Py_INCREF(op); - clear(op); - Py_DECREF(op); - } - } - if (collectable->gc.gc_next == gc) { - /* object is still alive, move it, it may die later */ - gc_list_move(gc, old); - gc->gc.gc_refs = GC_REACHABLE; - } - } -} - -/* Clear all free lists - * All free lists are cleared during the collection of the highest generation. - * Allocated items in the free list may keep a pymalloc arena occupied. - * Clearing the free lists may give back memory to the OS earlier. - */ -static void -clear_freelists(void) -{ - (void)PyMethod_ClearFreeList(); - (void)PyFrame_ClearFreeList(); - (void)PyCFunction_ClearFreeList(); - (void)PyTuple_ClearFreeList(); - (void)PyUnicode_ClearFreeList(); - (void)PyFloat_ClearFreeList(); -} - -static double -get_time(void) -{ - double result = 0; - if (tmod != NULL) { - PyObject *f = PyObject_CallMethod(tmod, "time", NULL); - if (f == NULL) { - PyErr_Clear(); - } - else { - if (PyFloat_Check(f)) - result = PyFloat_AsDouble(f); - Py_DECREF(f); - } - } - return result; -} - -/* This is the main function. Read this to understand how the - * collection process works. */ -static Py_ssize_t -collect(int generation) -{ - int i; - Py_ssize_t m = 0; /* # objects collected */ - Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */ - PyGC_Head *young; /* the generation we are examining */ - PyGC_Head *old; /* next older generation */ - PyGC_Head unreachable; /* non-problematic unreachable trash */ - PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ - PyGC_Head *gc; - double t1 = 0.0; - - if (delstr == NULL) { - delstr = PyUnicode_InternFromString("__del__"); - if (delstr == NULL) - Py_FatalError("gc couldn't allocate \"__del__\""); - } - - if (debug & DEBUG_STATS) { - PySys_WriteStderr("gc: collecting generation %d...\n", - generation); - PySys_WriteStderr("gc: objects in each generation:"); - for (i = 0; i < NUM_GENERATIONS; i++) - PySys_WriteStderr(" %" PY_FORMAT_SIZE_T "d", - gc_list_size(GEN_HEAD(i))); - t1 = get_time(); - PySys_WriteStderr("\n"); - } - - /* update collection and allocation counters */ - if (generation+1 < NUM_GENERATIONS) - generations[generation+1].count += 1; - for (i = 0; i <= generation; i++) - generations[i].count = 0; - - /* merge younger generations with one we are currently collecting */ - for (i = 0; i < generation; i++) { - gc_list_merge(GEN_HEAD(i), GEN_HEAD(generation)); - } - - /* handy references */ - young = GEN_HEAD(generation); - if (generation < NUM_GENERATIONS-1) - old = GEN_HEAD(generation+1); - else - old = young; - - /* Using ob_refcnt and gc_refs, calculate which objects in the - * container set are reachable from outside the set (i.e., have a - * refcount greater than 0 when all the references within the - * set are taken into account). - */ - update_refs(young); - subtract_refs(young); - - /* Leave everything reachable from outside young in young, and move - * everything else (in young) to unreachable. - * NOTE: This used to move the reachable objects into a reachable - * set instead. But most things usually turn out to be reachable, - * so it's more efficient to move the unreachable things. - */ - gc_list_init(&unreachable); - move_unreachable(young, &unreachable); - - /* Move reachable objects to next generation. */ - if (young != old) { - if (generation == NUM_GENERATIONS - 2) { - long_lived_pending += gc_list_size(young); - } - gc_list_merge(young, old); - } - else { - long_lived_pending = 0; - long_lived_total = gc_list_size(young); - } - - /* All objects in unreachable are trash, but objects reachable from - * finalizers can't safely be deleted. Python programmers should take - * care not to create such things. For Python, finalizers means - * instance objects with __del__ methods. Weakrefs with callbacks - * can also call arbitrary Python code but they will be dealt with by - * handle_weakrefs(). - */ - gc_list_init(&finalizers); - move_finalizers(&unreachable, &finalizers); - /* finalizers contains the unreachable objects with a finalizer; - * unreachable objects reachable *from* those are also uncollectable, - * and we move those into the finalizers list too. - */ - move_finalizer_reachable(&finalizers); - - /* Collect statistics on collectable objects found and print - * debugging information. - */ - for (gc = unreachable.gc.gc_next; gc != &unreachable; - gc = gc->gc.gc_next) { - m++; - if (debug & DEBUG_COLLECTABLE) { - debug_cycle("collectable", FROM_GC(gc)); - } - } - - /* Clear weakrefs and invoke callbacks as necessary. */ - m += handle_weakrefs(&unreachable, old); - - /* Call tp_clear on objects in the unreachable set. This will cause - * the reference cycles to be broken. It may also cause some objects - * in finalizers to be freed. - */ - delete_garbage(&unreachable, old); - - /* Collect statistics on uncollectable objects found and print - * debugging information. */ - for (gc = finalizers.gc.gc_next; - gc != &finalizers; - gc = gc->gc.gc_next) { - n++; - if (debug & DEBUG_UNCOLLECTABLE) - debug_cycle("uncollectable", FROM_GC(gc)); - } - if (debug & DEBUG_STATS) { - double t2 = get_time(); - if (m == 0 && n == 0) - PySys_WriteStderr("gc: done"); - else - PySys_WriteStderr( - "gc: done, " - "%" PY_FORMAT_SIZE_T "d unreachable, " - "%" PY_FORMAT_SIZE_T "d uncollectable", - n+m, n); - if (t1 && t2) { - PySys_WriteStderr(", %.4fs elapsed", t2-t1); - } - PySys_WriteStderr(".\n"); - } - - /* Append instances in the uncollectable set to a Python - * reachable list of garbage. The programmer has to deal with - * this if they insist on creating this type of structure. - */ - (void)handle_finalizers(&finalizers, old); - - /* Clear free list only during the collection of the highest - * generation */ - if (generation == NUM_GENERATIONS-1) { - clear_freelists(); - } - - if (PyErr_Occurred()) { - if (gc_str == NULL) - gc_str = PyUnicode_FromString("garbage collection"); - PyErr_WriteUnraisable(gc_str); - Py_FatalError("unexpected exception during garbage collection"); - } - return n+m; -} - -static Py_ssize_t -collect_generations(void) -{ - int i; - Py_ssize_t n = 0; - - /* Find the oldest generation (highest numbered) where the count - * exceeds the threshold. Objects in the that generation and - * generations younger than it will be collected. */ - for (i = NUM_GENERATIONS-1; i >= 0; i--) { - if (generations[i].count > generations[i].threshold) { - /* Avoid quadratic performance degradation in number - of tracked objects. See comments at the beginning - of this file, and issue #4074. - */ - if (i == NUM_GENERATIONS - 1 - && long_lived_pending < long_lived_total / 4) - continue; - n = collect(i); - break; - } - } - return n; -} - -PyDoc_STRVAR(gc_enable__doc__, -"enable() -> None\n" -"\n" -"Enable automatic garbage collection.\n"); - -static PyObject * -gc_enable(PyObject *self, PyObject *noargs) -{ - enabled = 1; - Py_INCREF(Py_None); - return Py_None; -} - -PyDoc_STRVAR(gc_disable__doc__, -"disable() -> None\n" -"\n" -"Disable automatic garbage collection.\n"); - -static PyObject * -gc_disable(PyObject *self, PyObject *noargs) -{ - enabled = 0; - Py_INCREF(Py_None); - return Py_None; -} - -PyDoc_STRVAR(gc_isenabled__doc__, -"isenabled() -> status\n" -"\n" -"Returns true if automatic garbage collection is enabled.\n"); - -static PyObject * -gc_isenabled(PyObject *self, PyObject *noargs) -{ - return PyBool_FromLong((long)enabled); -} - -PyDoc_STRVAR(gc_collect__doc__, -"collect([generation]) -> n\n" -"\n" -"With no arguments, run a full collection. The optional argument\n" -"may be an integer specifying which generation to collect. A ValueError\n" -"is raised if the generation number is invalid.\n\n" -"The number of unreachable objects is returned.\n"); - -static PyObject * -gc_collect(PyObject *self, PyObject *args, PyObject *kws) -{ - static char *keywords[] = {"generation", NULL}; - int genarg = NUM_GENERATIONS - 1; - Py_ssize_t n; - - if (!PyArg_ParseTupleAndKeywords(args, kws, "|i", keywords, &genarg)) - return NULL; - - else if (genarg < 0 || genarg >= NUM_GENERATIONS) { - PyErr_SetString(PyExc_ValueError, "invalid generation"); - return NULL; - } - - if (collecting) - n = 0; /* already collecting, don't do anything */ - else { - collecting = 1; - n = collect(genarg); - collecting = 0; - } - - return PyLong_FromSsize_t(n); -} - -PyDoc_STRVAR(gc_set_debug__doc__, -"set_debug(flags) -> None\n" -"\n" -"Set the garbage collection debugging flags. Debugging information is\n" -"written to sys.stderr.\n" -"\n" -"flags is an integer and can have the following bits turned on:\n" -"\n" -" DEBUG_STATS - Print statistics during collection.\n" -" DEBUG_COLLECTABLE - Print collectable objects found.\n" -" DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects found.\n" -" DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.\n" -" DEBUG_LEAK - Debug leaking programs (everything but STATS).\n"); - -static PyObject * -gc_set_debug(PyObject *self, PyObject *args) -{ - if (!PyArg_ParseTuple(args, "i:set_debug", &debug)) - return NULL; - - Py_INCREF(Py_None); - return Py_None; -} - -PyDoc_STRVAR(gc_get_debug__doc__, -"get_debug() -> flags\n" -"\n" -"Get the garbage collection debugging flags.\n"); - -static PyObject * -gc_get_debug(PyObject *self, PyObject *noargs) -{ - return Py_BuildValue("i", debug); -} - -PyDoc_STRVAR(gc_set_thresh__doc__, -"set_threshold(threshold0, [threshold1, threshold2]) -> None\n" -"\n" -"Sets the collection thresholds. Setting threshold0 to zero disables\n" -"collection.\n"); - -static PyObject * -gc_set_thresh(PyObject *self, PyObject *args) -{ - int i; - if (!PyArg_ParseTuple(args, "i|ii:set_threshold", - &generations[0].threshold, - &generations[1].threshold, - &generations[2].threshold)) - return NULL; - for (i = 2; i < NUM_GENERATIONS; i++) { - /* generations higher than 2 get the same threshold */ - generations[i].threshold = generations[2].threshold; - } - - Py_INCREF(Py_None); - return Py_None; -} - -PyDoc_STRVAR(gc_get_thresh__doc__, -"get_threshold() -> (threshold0, threshold1, threshold2)\n" -"\n" -"Return the current collection thresholds\n"); - -static PyObject * -gc_get_thresh(PyObject *self, PyObject *noargs) -{ - return Py_BuildValue("(iii)", - generations[0].threshold, - generations[1].threshold, - generations[2].threshold); -} - -PyDoc_STRVAR(gc_get_count__doc__, -"get_count() -> (count0, count1, count2)\n" -"\n" -"Return the current collection counts\n"); - -static PyObject * -gc_get_count(PyObject *self, PyObject *noargs) -{ - return Py_BuildValue("(iii)", - generations[0].count, - generations[1].count, - generations[2].count); -} - -static int -referrersvisit(PyObject* obj, PyObject *objs) -{ - Py_ssize_t i; - for (i = 0; i < PyTuple_GET_SIZE(objs); i++) - if (PyTuple_GET_ITEM(objs, i) == obj) - return 1; - return 0; -} - -static int -gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist) -{ - PyGC_Head *gc; - PyObject *obj; - traverseproc traverse; - for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) { - obj = FROM_GC(gc); - traverse = Py_TYPE(obj)->tp_traverse; - if (obj == objs || obj == resultlist) - continue; - if (traverse(obj, (visitproc)referrersvisit, objs)) { - if (PyList_Append(resultlist, obj) < 0) - return 0; /* error */ - } - } - return 1; /* no error */ -} - -PyDoc_STRVAR(gc_get_referrers__doc__, -"get_referrers(*objs) -> list\n\ -Return the list of objects that directly refer to any of objs."); - -static PyObject * -gc_get_referrers(PyObject *self, PyObject *args) -{ - int i; - PyObject *result = PyList_New(0); - if (!result) return NULL; - - for (i = 0; i < NUM_GENERATIONS; i++) { - if (!(gc_referrers_for(args, GEN_HEAD(i), result))) { - Py_DECREF(result); - return NULL; - } - } - return result; -} - -/* Append obj to list; return true if error (out of memory), false if OK. */ -static int -referentsvisit(PyObject *obj, PyObject *list) -{ - return PyList_Append(list, obj) < 0; -} - -PyDoc_STRVAR(gc_get_referents__doc__, -"get_referents(*objs) -> list\n\ -Return the list of objects that are directly referred to by objs."); - -static PyObject * -gc_get_referents(PyObject *self, PyObject *args) -{ - Py_ssize_t i; - PyObject *result = PyList_New(0); - - if (result == NULL) - return NULL; - - for (i = 0; i < PyTuple_GET_SIZE(args); i++) { - traverseproc traverse; - PyObject *obj = PyTuple_GET_ITEM(args, i); - - if (! PyObject_IS_GC(obj)) - continue; - traverse = Py_TYPE(obj)->tp_traverse; - if (! traverse) - continue; - if (traverse(obj, (visitproc)referentsvisit, result)) { - Py_DECREF(result); - return NULL; - } - } - return result; -} - -PyDoc_STRVAR(gc_get_objects__doc__, -"get_objects() -> [...]\n" -"\n" -"Return a list of objects tracked by the collector (excluding the list\n" -"returned).\n"); - -static PyObject * -gc_get_objects(PyObject *self, PyObject *noargs) -{ - int i; - PyObject* result; - - result = PyList_New(0); - if (result == NULL) - return NULL; - for (i = 0; i < NUM_GENERATIONS; i++) { - if (append_objects(result, GEN_HEAD(i))) { - Py_DECREF(result); - return NULL; - } - } - return result; -} - -PyDoc_STRVAR(gc_is_tracked__doc__, -"is_tracked(obj) -> bool\n" -"\n" -"Returns true if the object is tracked by the garbage collector.\n" -"Simple atomic objects will return false.\n" -); - -static PyObject * -gc_is_tracked(PyObject *self, PyObject *obj) -{ - PyObject *result; - - if (PyObject_IS_GC(obj) && IS_TRACKED(obj)) - result = Py_True; - else - result = Py_False; - Py_INCREF(result); - return result; -} - - -PyDoc_STRVAR(gc__doc__, -"This module provides access to the garbage collector for reference cycles.\n" -"\n" -"enable() -- Enable automatic garbage collection.\n" -"disable() -- Disable automatic garbage collection.\n" -"isenabled() -- Returns true if automatic collection is enabled.\n" -"collect() -- Do a full collection right now.\n" -"get_count() -- Return the current collection counts.\n" -"set_debug() -- Set debugging flags.\n" -"get_debug() -- Get debugging flags.\n" -"set_threshold() -- Set the collection thresholds.\n" -"get_threshold() -- Return the current the collection thresholds.\n" -"get_objects() -- Return a list of all objects tracked by the collector.\n" -"is_tracked() -- Returns true if a given object is tracked.\n" -"get_referrers() -- Return the list of objects that refer to an object.\n" -"get_referents() -- Return the list of objects that an object refers to.\n"); - -static PyMethodDef GcMethods[] = { - {"enable", gc_enable, METH_NOARGS, gc_enable__doc__}, - {"disable", gc_disable, METH_NOARGS, gc_disable__doc__}, - {"isenabled", gc_isenabled, METH_NOARGS, gc_isenabled__doc__}, - {"set_debug", gc_set_debug, METH_VARARGS, gc_set_debug__doc__}, - {"get_debug", gc_get_debug, METH_NOARGS, gc_get_debug__doc__}, - {"get_count", gc_get_count, METH_NOARGS, gc_get_count__doc__}, - {"set_threshold", gc_set_thresh, METH_VARARGS, gc_set_thresh__doc__}, - {"get_threshold", gc_get_thresh, METH_NOARGS, gc_get_thresh__doc__}, - {"collect", (PyCFunction)gc_collect, - METH_VARARGS | METH_KEYWORDS, gc_collect__doc__}, - {"get_objects", gc_get_objects,METH_NOARGS, gc_get_objects__doc__}, - {"is_tracked", gc_is_tracked, METH_O, gc_is_tracked__doc__}, - {"get_referrers", gc_get_referrers, METH_VARARGS, - gc_get_referrers__doc__}, - {"get_referents", gc_get_referents, METH_VARARGS, - gc_get_referents__doc__}, - {NULL, NULL} /* Sentinel */ -}; - -static struct PyModuleDef gcmodule = { - PyModuleDef_HEAD_INIT, - "gc", /* m_name */ - gc__doc__, /* m_doc */ - -1, /* m_size */ - GcMethods, /* m_methods */ - NULL, /* m_reload */ - NULL, /* m_traverse */ - NULL, /* m_clear */ - NULL /* m_free */ -}; - -PyMODINIT_FUNC -PyInit_gc(void) -{ - PyObject *m; - - m = PyModule_Create(&gcmodule); - - if (m == NULL) - return NULL; - - if (garbage == NULL) { - garbage = PyList_New(0); - if (garbage == NULL) - return NULL; - } - Py_INCREF(garbage); - if (PyModule_AddObject(m, "garbage", garbage) < 0) - return NULL; - - /* Importing can't be done in collect() because collect() - * can be called via PyGC_Collect() in Py_Finalize(). - * This wouldn't be a problem, except that <initialized> is - * reset to 0 before calling collect which trips up - * the import and triggers an assertion. - */ - if (tmod == NULL) { - tmod = PyImport_ImportModuleNoBlock("time"); - if (tmod == NULL) - PyErr_Clear(); - } - -#define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return NULL - ADD_INT(DEBUG_STATS); - ADD_INT(DEBUG_COLLECTABLE); - ADD_INT(DEBUG_UNCOLLECTABLE); - ADD_INT(DEBUG_SAVEALL); - ADD_INT(DEBUG_LEAK); -#undef ADD_INT - return m; -} - -/* API to invoke gc.collect() from C */ -Py_ssize_t -PyGC_Collect(void) -{ - Py_ssize_t n; - - if (collecting) - n = 0; /* already collecting, don't do anything */ - else { - collecting = 1; - n = collect(NUM_GENERATIONS - 1); - collecting = 0; - } - - return n; -} - -void -_PyGC_Fini(void) -{ - if (!(debug & DEBUG_SAVEALL) - && garbage != NULL && PyList_GET_SIZE(garbage) > 0) { - char *message; - if (debug & DEBUG_UNCOLLECTABLE) - message = "gc: %zd uncollectable objects at " \ - "shutdown"; - else - message = "gc: %zd uncollectable objects at " \ - "shutdown; use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them"; - if (PyErr_WarnFormat(PyExc_ResourceWarning, 0, message, - PyList_GET_SIZE(garbage)) < 0) - PyErr_WriteUnraisable(NULL); - if (debug & DEBUG_UNCOLLECTABLE) { - PyObject *repr = NULL, *bytes = NULL; - repr = PyObject_Repr(garbage); - if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) - PyErr_WriteUnraisable(garbage); - else { - PySys_WriteStderr( - " %s\n", - PyBytes_AS_STRING(bytes) - ); - } - Py_XDECREF(repr); - Py_XDECREF(bytes); - } - } -} - -/* for debugging */ -void -_PyGC_Dump(PyGC_Head *g) -{ - _PyObject_Dump(FROM_GC(g)); -} - -/* extension modules might be compiled with GC support so these - functions must always be available */ - -#undef PyObject_GC_Track -#undef PyObject_GC_UnTrack -#undef PyObject_GC_Del -#undef _PyObject_GC_Malloc - -void -PyObject_GC_Track(void *op) -{ - _PyObject_GC_TRACK(op); -} - -/* for binary compatibility with 2.2 */ -void -_PyObject_GC_Track(PyObject *op) -{ - PyObject_GC_Track(op); -} - -void -PyObject_GC_UnTrack(void *op) -{ - /* Obscure: the Py_TRASHCAN mechanism requires that we be able to - * call PyObject_GC_UnTrack twice on an object. - */ - if (IS_TRACKED(op)) - _PyObject_GC_UNTRACK(op); -} - -/* for binary compatibility with 2.2 */ -void -_PyObject_GC_UnTrack(PyObject *op) -{ - PyObject_GC_UnTrack(op); -} - -PyObject * -_PyObject_GC_Malloc(size_t basicsize) -{ - PyObject *op; - PyGC_Head *g; - if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) - return PyErr_NoMemory(); - g = (PyGC_Head *)PyObject_MALLOC( - sizeof(PyGC_Head) + basicsize); - if (g == NULL) - return PyErr_NoMemory(); - g->gc.gc_refs = GC_UNTRACKED; - generations[0].count++; /* number of allocated GC objects */ - if (generations[0].count > generations[0].threshold && - enabled && - generations[0].threshold && - !collecting && - !PyErr_Occurred()) { - collecting = 1; - collect_generations(); - collecting = 0; - } - op = FROM_GC(g); - return op; -} - -PyObject * -_PyObject_GC_New(PyTypeObject *tp) -{ - PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp)); - if (op != NULL) - op = PyObject_INIT(op, tp); - return op; -} - -PyVarObject * -_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems) -{ - const size_t size = _PyObject_VAR_SIZE(tp, nitems); - PyVarObject *op = (PyVarObject *) _PyObject_GC_Malloc(size); - if (op != NULL) - op = PyObject_INIT_VAR(op, tp, nitems); - return op; -} - -PyVarObject * -_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) -{ - const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems); - PyGC_Head *g = AS_GC(op); - if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) - return (PyVarObject *)PyErr_NoMemory(); - g = (PyGC_Head *)PyObject_REALLOC(g, sizeof(PyGC_Head) + basicsize); - if (g == NULL) - return (PyVarObject *)PyErr_NoMemory(); - op = (PyVarObject *) FROM_GC(g); - Py_SIZE(op) = nitems; - return op; -} - -void -PyObject_GC_Del(void *op) -{ - PyGC_Head *g = AS_GC(op); - if (IS_TRACKED(op)) - gc_list_remove(g); - if (generations[0].count > 0) { - generations[0].count--; - } - PyObject_FREE(g); -}
--- a/cos/python/Modules/main.c Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ - -
--- a/cos/python/Objects/boolobject.c Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,184 +0,0 @@ -/* Boolean type, a subtype of int */ - -#include "Python.h" -#include "longintrepr.h" - -/* We define bool_repr to return "False" or "True" */ - -static PyObject *false_str = NULL; -static PyObject *true_str = NULL; - -static PyObject * -bool_repr(PyObject *self) -{ - PyObject *s; - - if (self == Py_True) - s = true_str ? true_str : - (true_str = PyUnicode_InternFromString("True")); - else - s = false_str ? false_str : - (false_str = PyUnicode_InternFromString("False")); - Py_XINCREF(s); - return s; -} - -/* Function to return a bool from a C long */ - -PyObject *PyBool_FromLong(long ok) -{ - PyObject *result; - - if (ok) - result = Py_True; - else - result = Py_False; - Py_INCREF(result); - return result; -} - -/* We define bool_new to always return either Py_True or Py_False */ - -static PyObject * -bool_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - static char *kwlist[] = {"x", 0}; - PyObject *x = Py_False; - long ok; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:bool", kwlist, &x)) - return NULL; - ok = PyObject_IsTrue(x); - if (ok < 0) - return NULL; - return PyBool_FromLong(ok); -} - -/* Arithmetic operations redefined to return bool if both args are bool. */ - -static PyObject * -bool_and(PyObject *a, PyObject *b) -{ - if (!PyBool_Check(a) || !PyBool_Check(b)) - return PyLong_Type.tp_as_number->nb_and(a, b); - return PyBool_FromLong((a == Py_True) & (b == Py_True)); -} - -static PyObject * -bool_or(PyObject *a, PyObject *b) -{ - if (!PyBool_Check(a) || !PyBool_Check(b)) - return PyLong_Type.tp_as_number->nb_or(a, b); - return PyBool_FromLong((a == Py_True) | (b == Py_True)); -} - -static PyObject * -bool_xor(PyObject *a, PyObject *b) -{ - if (!PyBool_Check(a) || !PyBool_Check(b)) - return PyLong_Type.tp_as_number->nb_xor(a, b); - return PyBool_FromLong((a == Py_True) ^ (b == Py_True)); -} - -/* Doc string */ - -PyDoc_STRVAR(bool_doc, -"bool(x) -> bool\n\ -\n\ -Returns True when the argument x is true, False otherwise.\n\ -The builtins True and False are the only two instances of the class bool.\n\ -The class bool is a subclass of the class int, and cannot be subclassed."); - -/* Arithmetic methods -- only so we can override &, |, ^. */ - -static PyNumberMethods bool_as_number = { - 0, /* nb_add */ - 0, /* nb_subtract */ - 0, /* nb_multiply */ - 0, /* nb_remainder */ - 0, /* nb_divmod */ - 0, /* nb_power */ - 0, /* nb_negative */ - 0, /* nb_positive */ - 0, /* nb_absolute */ - 0, /* nb_bool */ - 0, /* nb_invert */ - 0, /* nb_lshift */ - 0, /* nb_rshift */ - bool_and, /* nb_and */ - bool_xor, /* nb_xor */ - bool_or, /* nb_or */ - 0, /* nb_int */ - 0, /* nb_reserved */ - 0, /* nb_float */ - 0, /* nb_inplace_add */ - 0, /* nb_inplace_subtract */ - 0, /* nb_inplace_multiply */ - 0, /* nb_inplace_remainder */ - 0, /* nb_inplace_power */ - 0, /* nb_inplace_lshift */ - 0, /* nb_inplace_rshift */ - 0, /* nb_inplace_and */ - 0, /* nb_inplace_xor */ - 0, /* nb_inplace_or */ - 0, /* nb_floor_divide */ - 0, /* nb_true_divide */ - 0, /* nb_inplace_floor_divide */ - 0, /* nb_inplace_true_divide */ - 0, /* nb_index */ -}; - -/* The type object for bool. Note that this cannot be subclassed! */ - -PyTypeObject PyBool_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "bool", - sizeof(struct _longobject), - 0, - 0, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - bool_repr, /* tp_repr */ - &bool_as_number, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - bool_repr, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - bool_doc, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - &PyLong_Type, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - bool_new, /* tp_new */ -}; - -/* The objects representing bool values False and True */ - -struct _longobject _Py_FalseStruct = { - PyVarObject_HEAD_INIT(&PyBool_Type, 0) - { 0 } -}; - -struct _longobject _Py_TrueStruct = { - PyVarObject_HEAD_INIT(&PyBool_Type, 1) - { 1 } -};
--- a/cos/python/Objects/listobject.c Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2849 +0,0 @@ -/* List object implementation */ - -#include "Python.h" - -/* Ensure ob_item has room for at least newsize elements, and set - * ob_size to newsize. If newsize > ob_size on entry, the content - * of the new slots at exit is undefined heap trash; it's the caller's - * responsibility to overwrite them with sane values. - * The number of allocated elements may grow, shrink, or stay the same. - * Failure is impossible if newsize <= self.allocated on entry, although - * that partly relies on an assumption that the system realloc() never - * fails when passed a number of bytes <= the number of bytes last - * allocated (the C standard doesn't guarantee this, but it's hard to - * imagine a realloc implementation where it wouldn't be true). - * Note that self->ob_item may change, and even if newsize is less - * than ob_size on entry. - */ -static int -list_resize(PyListObject *self, Py_ssize_t newsize) -{ - PyObject **items; - size_t new_allocated; - Py_ssize_t allocated = self->allocated; - - /* Bypass realloc() when a previous overallocation is large enough - to accommodate the newsize. If the newsize falls lower than half - the allocated size, then proceed with the realloc() to shrink the list. - */ - if (allocated >= newsize && newsize >= (allocated >> 1)) { - assert(self->ob_item != NULL || newsize == 0); - Py_SIZE(self) = newsize; - return 0; - } - - /* This over-allocates proportional to the list size, making room - * for additional growth. The over-allocation is mild, but is - * enough to give linear-time amortized behavior over a long - * sequence of appends() in the presence of a poorly-performing - * system realloc(). - * The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... - */ - new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6); - - /* check for integer overflow */ - if (new_allocated > PY_SIZE_MAX - newsize) { - PyErr_NoMemory(); - return -1; - } else { - new_allocated += newsize; - } - - if (newsize == 0) - new_allocated = 0; - items = self->ob_item; - if (new_allocated <= (PY_SIZE_MAX / sizeof(PyObject *))) - PyMem_RESIZE(items, PyObject *, new_allocated); - else - items = NULL; - if (items == NULL) { - PyErr_NoMemory(); - return -1; - } - self->ob_item = items; - Py_SIZE(self) = newsize; - self->allocated = new_allocated; - return 0; -} - - -/* Empty list reuse scheme to save calls to malloc and free */ -#ifndef PyList_MAXFREELIST -#define PyList_MAXFREELIST 80 -#endif -static PyListObject *free_list[PyList_MAXFREELIST]; -static int numfree = 0; - -int -PyList_ClearFreeList(void) -{ - PyListObject *op; - int ret = numfree; - while (numfree) { - op = free_list[--numfree]; - assert(PyList_CheckExact(op)); - PyObject_GC_Del(op); - } - return ret; -} - -void -PyList_Fini(void) -{ - PyList_ClearFreeList(); -} - -PyObject * -PyList_New(Py_ssize_t size) -{ - PyListObject *op; - size_t nbytes; - - if (size < 0) { - PyErr_BadInternalCall(); - return NULL; - } - /* Check for overflow without an actual overflow, - * which can cause compiler to optimise out */ - if ((size_t)size > PY_SIZE_MAX / sizeof(PyObject *)) - return PyErr_NoMemory(); - nbytes = size * sizeof(PyObject *); - if (numfree) { - numfree--; - op = free_list[numfree]; - _Py_NewReference((PyObject *)op); - } else { - op = PyObject_GC_New(PyListObject, &PyList_Type); - if (op == NULL) - return NULL; - } - if (size <= 0) - op->ob_item = NULL; - else { - op->ob_item = (PyObject **) PyMem_MALLOC(nbytes); - if (op->ob_item == NULL) { - Py_DECREF(op); - return PyErr_NoMemory(); - } - memset(op->ob_item, 0, nbytes); - } - Py_SIZE(op) = size; - op->allocated = size; - _PyObject_GC_TRACK(op); - return (PyObject *) op; -} - -Py_ssize_t -PyList_Size(PyObject *op) -{ - if (!PyList_Check(op)) { - PyErr_BadInternalCall(); - return -1; - } - else - return Py_SIZE(op); -} - -static PyObject *indexerr = NULL; - -PyObject * -PyList_GetItem(PyObject *op, Py_ssize_t i) -{ - if (!PyList_Check(op)) { - PyErr_BadInternalCall(); - return NULL; - } - if (i < 0 || i >= Py_SIZE(op)) { - if (indexerr == NULL) { - indexerr = PyUnicode_FromString( - "list index out of range"); - if (indexerr == NULL) - return NULL; - } - PyErr_SetObject(PyExc_IndexError, indexerr); - return NULL; - } - return ((PyListObject *)op) -> ob_item[i]; -} - -int -PyList_SetItem(register PyObject *op, register int i, - register PyObject *newitem) -{ - register PyObject *olditem; - register PyObject **p; - if (!PyList_Check(op)) - { - Py_XDECREF(newitem); - PyErr_BadInternalCall(); - return -1; - } - if (i < 0 || i >= Py_SIZE(op)) - { - Py_XDECREF(newitem); - PyErr_SetString(PyExc_IndexError, - "list assignment index out of range"); - return -1; - } - p = ((PyListObject *)op) -> ob_item + i; - olditem = *p; - *p = newitem; - Py_XDECREF(olditem); - return 0; -} - -static int -ins1(PyListObject *self, int where, PyObject *v) -{ - int i, n = Py_SIZE(self); - PyObject **items; - if (v == NULL) { - PyErr_BadInternalCall(); - return -1; - } - if (n == PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, - "cannot add more objects to list"); - return -1; - } - - if (list_resize(self, n+1) == -1) - return -1; - - if (where < 0) { - where += n; - if (where < 0) - where = 0; - } - if (where > n) - where = n; - items = self->ob_item; - for (i = n; --i >= where; ) - items[i+1] = items[i]; - Py_INCREF(v); - items[where] = v; - return 0; -} - -int -PyList_Insert(PyObject *op, int where, PyObject *newitem) -{ - if (!PyList_Check(op)) { - PyErr_BadInternalCall(); - return -1; - } - return ins1((PyListObject *)op, where, newitem); -} - -static int -app1(PyListObject *self, PyObject *v) -{ - Py_ssize_t n = PyList_GET_SIZE(self); - - assert (v != NULL); - if (n == PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, - "cannot add more objects to list"); - return -1; - } - - if (list_resize(self, n+1) == -1) - return -1; - - Py_INCREF(v); - PyList_SET_ITEM(self, n, v); - return 0; -} - -int -PyList_Append(PyObject *op, PyObject *newitem) -{ - if (PyList_Check(op) && (newitem != NULL)) - return app1((PyListObject *)op, newitem); - PyErr_BadInternalCall(); - return -1; -} - -/* Methods */ - -static void -list_dealloc(PyListObject *op) -{ - Py_ssize_t i; - PyObject_GC_UnTrack(op); - Py_TRASHCAN_SAFE_BEGIN(op) - if (op->ob_item != NULL) { - /* Do it backwards, for Christian Tismer. - There's a simple test case where somehow this reduces - thrashing when a *very* large list is created and - immediately deleted. */ - i = Py_SIZE(op); - while (--i >= 0) { - Py_XDECREF(op->ob_item[i]); - } - PyMem_FREE(op->ob_item); - } - if (numfree < PyList_MAXFREELIST && PyList_CheckExact(op)) - free_list[numfree++] = op; - else - Py_TYPE(op)->tp_free((PyObject *)op); - Py_TRASHCAN_SAFE_END(op) -} - -static PyObject * -list_repr(PyListObject *v) -{ - Py_ssize_t i; - PyObject *s = NULL; - _PyAccu acc; - static PyObject *sep = NULL; - - if (Py_SIZE(v) == 0) { - return PyUnicode_FromString("[]"); - } - - if (sep == NULL) { - sep = PyUnicode_FromString(", "); - if (sep == NULL) - return NULL; - } - - i = Py_ReprEnter((PyObject*)v); - if (i != 0) { - return i > 0 ? PyUnicode_FromString("[...]") : NULL; - } - - if (_PyAccu_Init(&acc)) - goto error; - - s = PyUnicode_FromString("["); - if (s == NULL || _PyAccu_Accumulate(&acc, s)) - goto error; - Py_CLEAR(s); - - /* Do repr() on each element. Note that this may mutate the list, - so must refetch the list size on each iteration. */ - for (i = 0; i < Py_SIZE(v); ++i) { - if (Py_EnterRecursiveCall(" while getting the repr of a list")) - goto error; - s = PyObject_Repr(v->ob_item[i]); - Py_LeaveRecursiveCall(); - if (i > 0 && _PyAccu_Accumulate(&acc, sep)) - goto error; - if (s == NULL || _PyAccu_Accumulate(&acc, s)) - goto error; - Py_CLEAR(s); - } - s = PyUnicode_FromString("]"); - if (s == NULL || _PyAccu_Accumulate(&acc, s)) - goto error; - Py_CLEAR(s); - - Py_ReprLeave((PyObject *)v); - return _PyAccu_Finish(&acc); - -error: - _PyAccu_Destroy(&acc); - Py_XDECREF(s); - Py_ReprLeave((PyObject *)v); - return NULL; -} - -static Py_ssize_t -list_length(PyListObject *a) -{ - return Py_SIZE(a); -} - -static int -list_contains(PyListObject *a, PyObject *el) -{ - Py_ssize_t i; - int cmp; - - for (i = 0, cmp = 0 ; cmp == 0 && i < Py_SIZE(a); ++i) - cmp = PyObject_RichCompareBool(el, PyList_GET_ITEM(a, i), - Py_EQ); - return cmp; -} - -static PyObject * -list_item(PyListObject *a, int i) -{ - if (i < 0 || i >= Py_SIZE(a)) { - if (indexerr == NULL) { - indexerr = PyUnicode_FromString( - "list index out of range"); - if (indexerr == NULL) - return NULL; - } - PyErr_SetObject(PyExc_IndexError, indexerr); - return NULL; - } - Py_INCREF(a->ob_item[i]); - return a->ob_item[i]; -} - -static PyObject * -list_slice(PyListObject *a, int ilow, int ihigh) -{ - PyListObject *np; - PyObject **src, **dest; - int i, len; - if (ilow < 0) - ilow = 0; - else if (ilow > Py_SIZE(a)) - ilow = Py_SIZE(a); - if (ihigh < ilow) - ihigh = ilow; - else if (ihigh > Py_SIZE(a)) - ihigh = Py_SIZE(a); - len = ihigh - ilow; - np = (PyListObject *) PyList_New(len); - if (np == NULL) - return NULL; - - src = a->ob_item + ilow; - dest = np->ob_item; - for (i = 0; i < len; i++) { - PyObject *v = src[i]; - Py_INCREF(v); - dest[i] = v; - } - return (PyObject *)np; -} - -PyObject * -PyList_GetSlice(PyObject *a, int ilow, int ihigh) -{ - if (!PyList_Check(a)) { - PyErr_BadInternalCall(); - return NULL; - } - return list_slice((PyListObject *)a, ilow, ihigh); -} - -static PyObject * -list_concat(PyListObject *a, PyObject *bb) -{ - Py_ssize_t size; - Py_ssize_t i; - PyObject **src, **dest; - PyListObject *np; - if (!PyList_Check(bb)) { - PyErr_Format(PyExc_TypeError, - "can only concatenate list (not \"%.200s\") to list", - bb->ob_type->tp_name); - return NULL; - } -#define b ((PyListObject *)bb) - size = Py_SIZE(a) + Py_SIZE(b); - if (size < 0) - return PyErr_NoMemory(); - np = (PyListObject *) PyList_New(size); - if (np == NULL) { - return NULL; - } - src = a->ob_item; - dest = np->ob_item; - for (i = 0; i < Py_SIZE(a); i++) { - PyObject *v = src[i]; - Py_INCREF(v); - dest[i] = v; - } - src = b->ob_item; - dest = np->ob_item + Py_SIZE(a); - for (i = 0; i < Py_SIZE(b); i++) { - PyObject *v = src[i]; - Py_INCREF(v); - dest[i] = v; - } - return (PyObject *)np; -#undef b -} - -static PyObject * -list_repeat(PyListObject *a, int n) -{ - Py_ssize_t i, j; - Py_ssize_t size; - PyListObject *np; - PyObject **p, **items; - PyObject *elem; - if (n < 0) - n = 0; - if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) - return PyErr_NoMemory(); - size = Py_SIZE(a) * n; - if (size == 0) - return PyList_New(0); - np = (PyListObject *) PyList_New(size); - if (np == NULL) - return NULL; - - items = np->ob_item; - if (Py_SIZE(a) == 1) { - elem = a->ob_item[0]; - for (i = 0; i < n; i++) { - items[i] = elem; - Py_INCREF(elem); - } - return (PyObject *) np; - } - p = np->ob_item; - items = a->ob_item; - for (i = 0; i < n; i++) { - for (j = 0; j < Py_SIZE(a); j++) { - *p = items[j]; - Py_INCREF(*p); - p++; - } - } - return (PyObject *) np; -} - -static int -list_clear(PyListObject *a) -{ - Py_ssize_t i; - PyObject **item = a->ob_item; - if (item != NULL) { - /* Because XDECREF can recursively invoke operations on - this list, we make it empty first. */ - i = Py_SIZE(a); - Py_SIZE(a) = 0; - a->ob_item = NULL; - a->allocated = 0; - while (--i >= 0) { - Py_XDECREF(item[i]); - } - PyMem_FREE(item); - } - /* Never fails; the return value can be ignored. - Note that there is no guarantee that the list is actually empty - at this point, because XDECREF may have populated it again! */ - return 0; -} - -/* a[ilow:ihigh] = v if v != NULL. - * del a[ilow:ihigh] if v == NULL. - * - * Special speed gimmick: when v is NULL and ihigh - ilow <= 8, it's - * guaranteed the call cannot fail. - */ -static int -list_ass_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v) -{ - /* Because [X]DECREF can recursively invoke list operations on - this list, we must postpone all [X]DECREF activity until - after the list is back in its canonical shape. Therefore - we must allocate an additional array, 'recycle', into which - we temporarily copy the items that are deleted from the - list. :-( */ - PyObject *recycle_on_stack[8]; - PyObject **recycle = recycle_on_stack; /* will allocate more if needed */ - PyObject **item; - PyObject **vitem = NULL; - PyObject *v_as_SF = NULL; /* PySequence_Fast(v) */ - Py_ssize_t n; /* # of elements in replacement list */ - Py_ssize_t norig; /* # of elements in list getting replaced */ - Py_ssize_t d; /* Change in size */ - Py_ssize_t k; - size_t s; - int result = -1; /* guilty until proved innocent */ -#define b ((PyListObject *)v) - if (v == NULL) - n = 0; - else { - if (a == b) { - /* Special case "a[i:j] = a" -- copy b first */ - v = list_slice(b, 0, Py_SIZE(b)); - if (v == NULL) - return result; - result = list_ass_slice(a, ilow, ihigh, v); - Py_DECREF(v); - return result; - } - v_as_SF = PySequence_Fast(v, "can only assign an iterable"); - if(v_as_SF == NULL) - goto Error; - n = PySequence_Fast_GET_SIZE(v_as_SF); - vitem = PySequence_Fast_ITEMS(v_as_SF); - } - if (ilow < 0) - ilow = 0; - else if (ilow > Py_SIZE(a)) - ilow = Py_SIZE(a); - - if (ihigh < ilow) - ihigh = ilow; - else if (ihigh > Py_SIZE(a)) - ihigh = Py_SIZE(a); - - norig = ihigh - ilow; - assert(norig >= 0); - d = n - norig; - if (Py_SIZE(a) + d == 0) { - Py_XDECREF(v_as_SF); - return list_clear(a); - } - item = a->ob_item; - /* recycle the items that we are about to remove */ - s = norig * sizeof(PyObject *); - if (s > sizeof(recycle_on_stack)) { - recycle = (PyObject **)PyMem_MALLOC(s); - if (recycle == NULL) { - PyErr_NoMemory(); - goto Error; - } - } - memcpy(recycle, &item[ilow], s); - - if (d < 0) { /* Delete -d items */ - memmove(&item[ihigh+d], &item[ihigh], - (Py_SIZE(a) - ihigh)*sizeof(PyObject *)); - list_resize(a, Py_SIZE(a) + d); - item = a->ob_item; - } - else if (d > 0) { /* Insert d items */ - k = Py_SIZE(a); - if (list_resize(a, k+d) < 0) - goto Error; - item = a->ob_item; - memmove(&item[ihigh+d], &item[ihigh], - (k - ihigh)*sizeof(PyObject *)); - } - for (k = 0; k < n; k++, ilow++) { - PyObject *w = vitem[k]; - Py_XINCREF(w); - item[ilow] = w; - } - for (k = norig - 1; k >= 0; --k) - Py_XDECREF(recycle[k]); - result = 0; - Error: - if (recycle != recycle_on_stack) - PyMem_FREE(recycle); - Py_XDECREF(v_as_SF); - return result; -#undef b -} - -int -PyList_SetSlice(PyObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v) -{ - if (!PyList_Check(a)) { - PyErr_BadInternalCall(); - return -1; - } - return list_ass_slice((PyListObject *)a, ilow, ihigh, v); -} - -static PyObject * -list_inplace_repeat(PyListObject *self, Py_ssize_t n) -{ - PyObject **items; - Py_ssize_t size, i, j, p; - - - size = PyList_GET_SIZE(self); - if (size == 0 || n == 1) { - Py_INCREF(self); - return (PyObject *)self; - } - - if (n < 1) { - (void)list_clear(self); - Py_INCREF(self); - return (PyObject *)self; - } - - if (size > PY_SSIZE_T_MAX / n) { - return PyErr_NoMemory(); - } - - if (list_resize(self, size*n) == -1) - return NULL; - - p = size; - items = self->ob_item; - for (i = 1; i < n; i++) { /* Start counting at 1, not 0 */ - for (j = 0; j < size; j++) { - PyObject *o = items[j]; - Py_INCREF(o); - items[p++] = o; - } - } - Py_INCREF(self); - return (PyObject *)self; -} - -static int -list_ass_item(PyListObject *a, Py_ssize_t i, PyObject *v) -{ - PyObject *old_value; - if (i < 0 || i >= Py_SIZE(a)) { - PyErr_SetString(PyExc_IndexError, - "list assignment index out of range"); - return -1; - } - if (v == NULL) - return list_ass_slice(a, i, i+1, v); - Py_INCREF(v); - old_value = a->ob_item[i]; - a->ob_item[i] = v; - Py_DECREF(old_value); - return 0; -} - -static PyObject * -listinsert(PyListObject *self, PyObject *args) -{ - Py_ssize_t i; - PyObject *v; - if (!PyArg_ParseTuple(args, "nO:insert", &i, &v)) - return NULL; - if (ins1(self, i, v) == 0) - Py_RETURN_NONE; - return NULL; -} - -static PyObject * -listclear(PyListObject *self) -{ - list_clear(self); - Py_RETURN_NONE; -} - -static PyObject * -listcopy(PyListObject *self) -{ - return list_slice(self, 0, Py_SIZE(self)); -} - -static PyObject * -listappend(PyListObject *self, PyObject *v) -{ - if (app1(self, v) == 0) - Py_RETURN_NONE; - return NULL; -} - -static PyObject * -listextend(PyListObject *self, PyObject *b) -{ - PyObject *it; /* iter(v) */ - Py_ssize_t m; /* size of self */ - Py_ssize_t n; /* guess for size of b */ - Py_ssize_t mn; /* m + n */ - Py_ssize_t i; - PyObject *(*iternext)(PyObject *); - - /* Special cases: - 1) lists and tuples which can use PySequence_Fast ops - 2) extending self to self requires making a copy first - */ - if (PyList_CheckExact(b) || PyTuple_CheckExact(b) || (PyObject *)self == b) { - PyObject **src, **dest; - b = PySequence_Fast(b, "argument must be iterable"); - if (!b) - return NULL; - n = PySequence_Fast_GET_SIZE(b); - if (n == 0) { - /* short circuit when b is empty */ - Py_DECREF(b); - Py_RETURN_NONE; - } - m = Py_SIZE(self); - if (list_resize(self, m + n) == -1) { - Py_DECREF(b); - return NULL; - } - /* note that we may still have self == b here for the - * situation a.extend(a), but the following code works - * in that case too. Just make sure to resize self - * before calling PySequence_Fast_ITEMS. - */ - /* populate the end of self with b's items */ - src = PySequence_Fast_ITEMS(b); - dest = self->ob_item + m; - for (i = 0; i < n; i++) { - PyObject *o = src[i]; - Py_INCREF(o); - dest[i] = o; - } - Py_DECREF(b); - Py_RETURN_NONE; - } - - it = PyObject_GetIter(b); - if (it == NULL) - return NULL; - iternext = *it->ob_type->tp_iternext; - - /* Guess a result list size. */ - n = _PyObject_LengthHint(b, 8); - if (n == -1) { - Py_DECREF(it); - return NULL; - } - m = Py_SIZE(self); - mn = m + n; - if (mn >= m) { - /* Make room. */ - if (list_resize(self, mn) == -1) - goto error; - /* Make the list sane again. */ - Py_SIZE(self) = m; - } - /* Else m + n overflowed; on the chance that n lied, and there really - * is enough room, ignore it. If n was telling the truth, we'll - * eventually run out of memory during the loop. - */ - - /* Run iterator to exhaustion. */ - for (;;) { - PyObject *item = iternext(it); - if (item == NULL) { - if (PyErr_Occurred()) { - if (PyErr_ExceptionMatches(PyExc_StopIteration)) - PyErr_Clear(); - else - goto error; - } - break; - } - if (Py_SIZE(self) < self->allocated) { - /* steals ref */ - PyList_SET_ITEM(self, Py_SIZE(self), item); - ++Py_SIZE(self); - } - else { - int status = app1(self, item); - Py_DECREF(item); /* append creates a new ref */ - if (status < 0) - goto error; - } - } - - /* Cut back result list if initial guess was too large. */ - if (Py_SIZE(self) < self->allocated) - list_resize(self, Py_SIZE(self)); /* shrinking can't fail */ - - Py_DECREF(it); - Py_RETURN_NONE; - - error: - Py_DECREF(it); - return NULL; -} - -PyObject * -_PyList_Extend(PyListObject *self, PyObject *b) -{ - return listextend(self, b); -} - -static PyObject * -list_inplace_concat(PyListObject *self, PyObject *other) -{ - PyObject *result; - - result = listextend(self, other); - if (result == NULL) - return result; - Py_DECREF(result); - Py_INCREF(self); - return (PyObject *)self; -} - -static PyObject * -listpop(PyListObject *self, PyObject *args) -{ - Py_ssize_t i = -1; - PyObject *v; - int status; - - if (!PyArg_ParseTuple(args, "|n:pop", &i)) - return NULL; - - if (Py_SIZE(self) == 0) { - /* Special-case most common failure cause */ - PyErr_SetString(PyExc_IndexError, "pop from empty list"); - return NULL; - } - if (i < 0) - i += Py_SIZE(self); - if (i < 0 || i >= Py_SIZE(self)) { - PyErr_SetString(PyExc_IndexError, "pop index out of range"); - return NULL; - } - v = self->ob_item[i]; - if (i == Py_SIZE(self) - 1) { - status = list_resize(self, Py_SIZE(self) - 1); - assert(status >= 0); - return v; /* and v now owns the reference the list had */ - } - Py_INCREF(v); - status = list_ass_slice(self, i, i+1, (PyObject *)NULL); - assert(status >= 0); - /* Use status, so that in a release build compilers don't - * complain about the unused name. - */ - (void) status; - - return v; -} - -/* Reverse a slice of a list in place, from lo up to (exclusive) hi. */ -static void -reverse_slice(PyObject **lo, PyObject **hi) -{ - assert(lo && hi); - - --hi; - while (lo < hi) { - PyObject *t = *lo; - *lo = *hi; - *hi = t; - ++lo; - --hi; - } -} - -/* Lots of code for an adaptive, stable, natural mergesort. There are many - * pieces to this algorithm; read listsort.txt for overviews and details. - */ - -/* A sortslice contains a pointer to an array of keys and a pointer to - * an array of corresponding values. In other words, keys[i] - * corresponds with values[i]. If values == NULL, then the keys are - * also the values. - * - * Several convenience routines are provided here, so that keys and - * values are always moved in sync. - */ - -typedef struct { - PyObject **keys; - PyObject **values; -} sortslice; - -Py_LOCAL_INLINE(void) -sortslice_copy(sortslice *s1, int i, sortslice *s2, int j) -{ - s1->keys[i] = s2->keys[j]; - if (s1->values != NULL) - s1->values[i] = s2->values[j]; -} - -Py_LOCAL_INLINE(void) -sortslice_copy_incr(sortslice *dst, sortslice *src) -{ - *dst->keys++ = *src->keys++; - if (dst->values != NULL) - *dst->values++ = *src->values++; -} - -Py_LOCAL_INLINE(void) -sortslice_copy_decr(sortslice *dst, sortslice *src) -{ - *dst->keys-- = *src->keys--; - if (dst->values != NULL) - *dst->values-- = *src->values--; -} - - -Py_LOCAL_INLINE(void) -sortslice_memcpy(sortslice *s1, int i, sortslice *s2, int j, int n) -{ - memcpy(&s1->keys[i], &s2->keys[j], sizeof(PyObject *) * n); - if (s1->values != NULL) - memcpy(&s1->values[i], &s2->values[j], sizeof(PyObject *) * n); -} - -Py_LOCAL_INLINE(void) -sortslice_memmove(sortslice *s1, Py_ssize_t i, sortslice *s2, Py_ssize_t j, - Py_ssize_t n) -{ - memmove(&s1->keys[i], &s2->keys[j], sizeof(PyObject *) * n); - if (s1->values != NULL) - memmove(&s1->values[i], &s2->values[j], sizeof(PyObject *) * n); -} - -Py_LOCAL_INLINE(void) -sortslice_advance(sortslice *slice, Py_ssize_t n) -{ - slice->keys += n; - if (slice->values != NULL) - slice->values += n; -} - -/* Comparison function: PyObject_RichCompareBool with Py_LT. - * Returns -1 on error, 1 if x < y, 0 if x >= y. - */ - -#define ISLT(X, Y) (PyObject_RichCompareBool(X, Y, Py_LT)) - -/* Compare X to Y via "<". Goto "fail" if the comparison raises an - error. Else "k" is set to true iff X<Y, and an "if (k)" block is - started. It makes more sense in context <wink>. X and Y are PyObject*s. -*/ -#define IFLT(X, Y) if ((k = ISLT(X, Y)) < 0) goto fail; \ - if (k) - -/* binarysort is the best method for sorting small arrays: it does - few compares, but can do data movement quadratic in the number of - elements. - [lo, hi) is a contiguous slice of a list, and is sorted via - binary insertion. This sort is stable. - On entry, must have lo <= start <= hi, and that [lo, start) is already - sorted (pass start == lo if you don't know!). - If islt() complains return -1, else 0. - Even in case of error, the output slice will be some permutation of - the input (nothing is lost or duplicated). -*/ -static int -binarysort(sortslice lo, PyObject **hi, PyObject **start) -{ - register Py_ssize_t k; - register PyObject **l, **p, **r; - register PyObject *pivot; - - assert(lo.keys <= start && start <= hi); - /* assert [lo, start) is sorted */ - if (lo.keys == start) - ++start; - for (; start < hi; ++start) { - /* set l to where *start belongs */ - l = lo.keys; - r = start; - pivot = *r; - /* Invariants: - * pivot >= all in [lo, l). - * pivot < all in [r, start). - * The second is vacuously true at the start. - */ - assert(l < r); - do { - p = l + ((r - l) >> 1); - IFLT(pivot, *p) - r = p; - else - l = p+1; - } while (l < r); - assert(l == r); - /* The invariants still hold, so pivot >= all in [lo, l) and - pivot < all in [l, start), so pivot belongs at l. Note - that if there are elements equal to pivot, l points to the - first slot after them -- that's why this sort is stable. - Slide over to make room. - Caution: using memmove is much slower under MSVC 5; - we're not usually moving many slots. */ - for (p = start; p > l; --p) - *p = *(p-1); - *l = pivot; - if (lo.values != NULL) { - Py_ssize_t offset = lo.values - lo.keys; - p = start + offset; - pivot = *p; - l += offset; - for (p = start + offset; p > l; --p) - *p = *(p-1); - *l = pivot; - } - } - return 0; - - fail: - return -1; -} - -/* -Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi -is required on entry. "A run" is the longest ascending sequence, with - - lo[0] <= lo[1] <= lo[2] <= ... - -or the longest descending sequence, with - - lo[0] > lo[1] > lo[2] > ... - -Boolean *descending is set to 0 in the former case, or to 1 in the latter. -For its intended use in a stable mergesort, the strictness of the defn of -"descending" is needed so that the caller can safely reverse a descending -sequence without violating stability (strict > ensures there are no equal -elements to get out of order). - -Returns -1 in case of error. -*/ -static Py_ssize_t -count_run(PyObject **lo, PyObject **hi, int *descending) -{ - Py_ssize_t k; - Py_ssize_t n; - - assert(lo < hi); - *descending = 0; - ++lo; - if (lo == hi) - return 1; - - n = 2; - IFLT(*lo, *(lo-1)) { - *descending = 1; - for (lo = lo+1; lo < hi; ++lo, ++n) { - IFLT(*lo, *(lo-1)) - ; - else - break; - } - } - else { - for (lo = lo+1; lo < hi; ++lo, ++n) { - IFLT(*lo, *(lo-1)) - break; - } - } - - return n; -fail: - return -1; -} - -/* -Locate the proper position of key in a sorted vector; if the vector contains -an element equal to key, return the position immediately to the left of -the leftmost equal element. [gallop_right() does the same except returns -the position to the right of the rightmost equal element (if any).] - -"a" is a sorted vector with n elements, starting at a[0]. n must be > 0. - -"hint" is an index at which to begin the search, 0 <= hint < n. The closer -hint is to the final result, the faster this runs. - -The return value is the int k in 0..n such that - - a[k-1] < key <= a[k] - -pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW, -key belongs at index k; or, IOW, the first k elements of a should precede -key, and the last n-k should follow key. - -Returns -1 on error. See listsort.txt for info on the method. -*/ -static Py_ssize_t -gallop_left(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint) -{ - Py_ssize_t ofs; - Py_ssize_t lastofs; - Py_ssize_t k; - - assert(key && a && n > 0 && hint >= 0 && hint < n); - - a += hint; - lastofs = 0; - ofs = 1; - IFLT(*a, key) { - /* a[hint] < key -- gallop right, until - * a[hint + lastofs] < key <= a[hint + ofs] - */ - const Py_ssize_t maxofs = n - hint; /* &a[n-1] is highest */ - while (ofs < maxofs) { - IFLT(a[ofs], key) { - lastofs = ofs; - ofs = (ofs << 1) + 1; - if (ofs <= 0) /* int overflow */ - ofs = maxofs; - } - else /* key <= a[hint + ofs] */ - break; - } - if (ofs > maxofs) - ofs = maxofs; - /* Translate back to offsets relative to &a[0]. */ - lastofs += hint; - ofs += hint; - } - else { - /* key <= a[hint] -- gallop left, until - * a[hint - ofs] < key <= a[hint - lastofs] - */ - const Py_ssize_t maxofs = hint + 1; /* &a[0] is lowest */ - while (ofs < maxofs) { - IFLT(*(a-ofs), key) - break; - /* key <= a[hint - ofs] */ - lastofs = ofs; - ofs = (ofs << 1) + 1; - if (ofs <= 0) /* int overflow */ - ofs = maxofs; - } - if (ofs > maxofs) - ofs = maxofs; - /* Translate back to positive offsets relative to &a[0]. */ - k = lastofs; - lastofs = hint - ofs; - ofs = hint - k; - } - a -= hint; - - assert(-1 <= lastofs && lastofs < ofs && ofs <= n); - /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the - * right of lastofs but no farther right than ofs. Do a binary - * search, with invariant a[lastofs-1] < key <= a[ofs]. - */ - ++lastofs; - while (lastofs < ofs) { - Py_ssize_t m = lastofs + ((ofs - lastofs) >> 1); - - IFLT(a[m], key) - lastofs = m+1; /* a[m] < key */ - else - ofs = m; /* key <= a[m] */ - } - assert(lastofs == ofs); /* so a[ofs-1] < key <= a[ofs] */ - return ofs; - -fail: - return -1; -} - -/* -Exactly like gallop_left(), except that if key already exists in a[0:n], -finds the position immediately to the right of the rightmost equal value. - -The return value is the int k in 0..n such that - - a[k-1] <= key < a[k] - -or -1 if error. - -The code duplication is massive, but this is enough different given that -we're sticking to "<" comparisons that it's much harder to follow if -written as one routine with yet another "left or right?" flag. -*/ -static Py_ssize_t -gallop_right(PyObject *key, PyObject **a, Py_ssize_t n, Py_ssize_t hint) -{ - Py_ssize_t ofs; - Py_ssize_t lastofs; - Py_ssize_t k; - - assert(key && a && n > 0 && hint >= 0 && hint < n); - - a += hint; - lastofs = 0; - ofs = 1; - IFLT(key, *a) { - /* key < a[hint] -- gallop left, until - * a[hint - ofs] <= key < a[hint - lastofs] - */ - const Py_ssize_t maxofs = hint + 1; /* &a[0] is lowest */ - while (ofs < maxofs) { - IFLT(key, *(a-ofs)) { - lastofs = ofs; - ofs = (ofs << 1) + 1; - if (ofs <= 0) /* int overflow */ - ofs = maxofs; - } - else /* a[hint - ofs] <= key */ - break; - } - if (ofs > maxofs) - ofs = maxofs; - /* Translate back to positive offsets relative to &a[0]. */ - k = lastofs; - lastofs = hint - ofs; - ofs = hint - k; - } - else { - /* a[hint] <= key -- gallop right, until - * a[hint + lastofs] <= key < a[hint + ofs] - */ - const Py_ssize_t maxofs = n - hint; /* &a[n-1] is highest */ - while (ofs < maxofs) { - IFLT(key, a[ofs]) - break; - /* a[hint + ofs] <= key */ - lastofs = ofs; - ofs = (ofs << 1) + 1; - if (ofs <= 0) /* int overflow */ - ofs = maxofs; - } - if (ofs > maxofs) - ofs = maxofs; - /* Translate back to offsets relative to &a[0]. */ - lastofs += hint; - ofs += hint; - } - a -= hint; - - assert(-1 <= lastofs && lastofs < ofs && ofs <= n); - /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the - * right of lastofs but no farther right than ofs. Do a binary - * search, with invariant a[lastofs-1] <= key < a[ofs]. - */ - ++lastofs; - while (lastofs < ofs) { - Py_ssize_t m = lastofs + ((ofs - lastofs) >> 1); - - IFLT(key, a[m]) - ofs = m; /* key < a[m] */ - else - lastofs = m+1; /* a[m] <= key */ - } - assert(lastofs == ofs); /* so a[ofs-1] <= key < a[ofs] */ - return ofs; - -fail: - return -1; -} - -/* The maximum number of entries in a MergeState's pending-runs stack. - * This is enough to sort arrays of size up to about - * 32 * phi ** MAX_MERGE_PENDING - * where phi ~= 1.618. 85 is ridiculouslylarge enough, good for an array - * with 2**64 elements. - */ -#define MAX_MERGE_PENDING 85 - -/* When we get into galloping mode, we stay there until both runs win less - * often than MIN_GALLOP consecutive times. See listsort.txt for more info. - */ -#define MIN_GALLOP 7 - -/* Avoid malloc for small temp arrays. */ -#define MERGESTATE_TEMP_SIZE 256 - -/* One MergeState exists on the stack per invocation of mergesort. It's just - * a convenient way to pass state around among the helper functions. - */ -struct s_slice { - sortslice base; - Py_ssize_t len; -}; - -typedef struct s_MergeState { - /* This controls when we get *into* galloping mode. It's initialized - * to MIN_GALLOP. merge_lo and merge_hi tend to nudge it higher for - * random data, and lower for highly structured data. - */ - Py_ssize_t min_gallop; - - /* 'a' is temp storage to help with merges. It contains room for - * alloced entries. - */ - sortslice a; /* may point to temparray below */ - Py_ssize_t alloced; - - /* A stack of n pending runs yet to be merged. Run #i starts at - * address base[i] and extends for len[i] elements. It's always - * true (so long as the indices are in bounds) that - * - * pending[i].base + pending[i].len == pending[i+1].base - * - * so we could cut the storage for this, but it's a minor amount, - * and keeping all the info explicit simplifies the code. - */ - int n; - struct s_slice pending[MAX_MERGE_PENDING]; - - /* 'a' points to this when possible, rather than muck with malloc. */ - PyObject *temparray[MERGESTATE_TEMP_SIZE]; -} MergeState; - -/* Conceptually a MergeState's constructor. */ -static void -merge_init(MergeState *ms, Py_ssize_t list_size, int has_keyfunc) -{ - assert(ms != NULL); - if (has_keyfunc) { - /* The temporary space for merging will need at most half the list - * size rounded up. Use the minimum possible space so we can use the - * rest of temparray for other things. In particular, if there is - * enough extra space, listsort() will use it to store the keys. - */ - ms->alloced = (list_size + 1) / 2; - - /* ms->alloced describes how many keys will be stored at - ms->temparray, but we also need to store the values. Hence, - ms->alloced is capped at half of MERGESTATE_TEMP_SIZE. */ - if (MERGESTATE_TEMP_SIZE / 2 < ms->alloced) - ms->alloced = MERGESTATE_TEMP_SIZE / 2; - ms->a.values = &ms->temparray[ms->alloced]; - } - else { - ms->alloced = MERGESTATE_TEMP_SIZE; - ms->a.values = NULL; - } - ms->a.keys = ms->temparray; - ms->n = 0; - ms->min_gallop = MIN_GALLOP; -} - -/* Free all the temp memory owned by the MergeState. This must be called - * when you're done with a MergeState, and may be called before then if - * you want to free the temp memory early. - */ -static void -merge_freemem(MergeState *ms) -{ - assert(ms != NULL); - if (ms->a.keys != ms->temparray) - PyMem_Free(ms->a.keys); -} - -/* Ensure enough temp memory for 'need' array slots is available. - * Returns 0 on success and -1 if the memory can't be gotten. - */ -static int -merge_getmem(MergeState *ms, Py_ssize_t need) -{ - int multiplier; - - assert(ms != NULL); - if (need <= ms->alloced) - return 0; - - multiplier = ms->a.values != NULL ? 2 : 1; - - /* Don't realloc! That can cost cycles to copy the old data, but - * we don't care what's in the block. - */ - merge_freemem(ms); - if ((size_t)need > PY_SSIZE_T_MAX / sizeof(PyObject*) / multiplier) { - PyErr_NoMemory(); - return -1; - } - ms->a.keys = (PyObject**)PyMem_Malloc(multiplier * need - * sizeof(PyObject *)); - if (ms->a.keys != NULL) { - ms->alloced = need; - if (ms->a.values != NULL) - ms->a.values = &ms->a.keys[need]; - return 0; - } - PyErr_NoMemory(); - return -1; -} -#define MERGE_GETMEM(MS, NEED) ((NEED) <= (MS)->alloced ? 0 : \ - merge_getmem(MS, NEED)) - -/* Merge the na elements starting at ssa with the nb elements starting at - * ssb.keys = ssa.keys + na in a stable way, in-place. na and nb must be > 0. - * Must also have that ssa.keys[na-1] belongs at the end of the merge, and - * should have na <= nb. See listsort.txt for more info. Return 0 if - * successful, -1 if error. - */ -static Py_ssize_t -merge_lo(MergeState *ms, sortslice ssa, Py_ssize_t na, - sortslice ssb, Py_ssize_t nb) -{ - Py_ssize_t k; - sortslice dest; - int result = -1; /* guilty until proved innocent */ - Py_ssize_t min_gallop; - - assert(ms && ssa.keys && ssb.keys && na > 0 && nb > 0); - assert(ssa.keys + na == ssb.keys); - if (MERGE_GETMEM(ms, na) < 0) - return -1; - sortslice_memcpy(&ms->a, 0, &ssa, 0, na); - dest = ssa; - ssa = ms->a; - - sortslice_copy_incr(&dest, &ssb); - --nb; - if (nb == 0) - goto Succeed; - if (na == 1) - goto CopyB; - - min_gallop = ms->min_gallop; - for (;;) { - Py_ssize_t acount = 0; /* # of times A won in a row */ - Py_ssize_t bcount = 0; /* # of times B won in a row */ - - /* Do the straightforward thing until (if ever) one run - * appears to win consistently. - */ - for (;;) { - assert(na > 1 && nb > 0); - k = ISLT(ssb.keys[0], ssa.keys[0]); - if (k) { - if (k < 0) - goto Fail; - sortslice_copy_incr(&dest, &ssb); - ++bcount; - acount = 0; - --nb; - if (nb == 0) - goto Succeed; - if (bcount >= min_gallop) - break; - } - else { - sortslice_copy_incr(&dest, &ssa); - ++acount; - bcount = 0; - --na; - if (na == 1) - goto CopyB; - if (acount >= min_gallop) - break; - } - } - - /* One run is winning so consistently that galloping may - * be a huge win. So try that, and continue galloping until - * (if ever) neither run appears to be winning consistently - * anymore. - */ - ++min_gallop; - do { - assert(na > 1 && nb > 0); - min_gallop -= min_gallop > 1; - ms->min_gallop = min_gallop; - k = gallop_right(ssb.keys[0], ssa.keys, na, 0); - acount = k; - if (k) { - if (k < 0) - goto Fail; - sortslice_memcpy(&dest, 0, &ssa, 0, k); - sortslice_advance(&dest, k); - sortslice_advance(&ssa, k); - na -= k; - if (na == 1) - goto CopyB; - /* na==0 is impossible now if the comparison - * function is consistent, but we can't assume - * that it is. - */ - if (na == 0) - goto Succeed; - } - sortslice_copy_incr(&dest, &ssb); - --nb; - if (nb == 0) - goto Succeed; - - k = gallop_left(ssa.keys[0], ssb.keys, nb, 0); - bcount = k; - if (k) { - if (k < 0) - goto Fail; - sortslice_memmove(&dest, 0, &ssb, 0, k); - sortslice_advance(&dest, k); - sortslice_advance(&ssb, k); - nb -= k; - if (nb == 0) - goto Succeed; - } - sortslice_copy_incr(&dest, &ssa); - --na; - if (na == 1) - goto CopyB; - } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); - ++min_gallop; /* penalize it for leaving galloping mode */ - ms->min_gallop = min_gallop; - } -Succeed: - result = 0; -Fail: - if (na) - sortslice_memcpy(&dest, 0, &ssa, 0, na); - return result; -CopyB: - assert(na == 1 && nb > 0); - /* The last element of ssa belongs at the end of the merge. */ - sortslice_memmove(&dest, 0, &ssb, 0, nb); - sortslice_copy(&dest, nb, &ssa, 0); - return 0; -} - -/* Merge the na elements starting at pa with the nb elements starting at - * ssb.keys = ssa.keys + na in a stable way, in-place. na and nb must be > 0. - * Must also have that ssa.keys[na-1] belongs at the end of the merge, and - * should have na >= nb. See listsort.txt for more info. Return 0 if - * successful, -1 if error. - */ -static Py_ssize_t -merge_hi(MergeState *ms, sortslice ssa, Py_ssize_t na, - sortslice ssb, Py_ssize_t nb) -{ - Py_ssize_t k; - sortslice dest, basea, baseb; - int result = -1; /* guilty until proved innocent */ - Py_ssize_t min_gallop; - - assert(ms && ssa.keys && ssb.keys && na > 0 && nb > 0); - assert(ssa.keys + na == ssb.keys); - if (MERGE_GETMEM(ms, nb) < 0) - return -1; - dest = ssb; - sortslice_advance(&dest, nb-1); - sortslice_memcpy(&ms->a, 0, &ssb, 0, nb); - basea = ssa; - baseb = ms->a; - ssb.keys = ms->a.keys + nb - 1; - if (ssb.values != NULL) - ssb.values = ms->a.values + nb - 1; - sortslice_advance(&ssa, na - 1); - - sortslice_copy_decr(&dest, &ssa); - --na; - if (na == 0) - goto Succeed; - if (nb == 1) - goto CopyA; - - min_gallop = ms->min_gallop; - for (;;) { - Py_ssize_t acount = 0; /* # of times A won in a row */ - Py_ssize_t bcount = 0; /* # of times B won in a row */ - - /* Do the straightforward thing until (if ever) one run - * appears to win consistently. - */ - for (;;) { - assert(na > 0 && nb > 1); - k = ISLT(ssb.keys[0], ssa.keys[0]); - if (k) { - if (k < 0) - goto Fail; - sortslice_copy_decr(&dest, &ssa); - ++acount; - bcount = 0; - --na; - if (na == 0) - goto Succeed; - if (acount >= min_gallop) - break; - } - else { - sortslice_copy_decr(&dest, &ssb); - ++bcount; - acount = 0; - --nb; - if (nb == 1) - goto CopyA; - if (bcount >= min_gallop) - break; - } - } - - /* One run is winning so consistently that galloping may - * be a huge win. So try that, and continue galloping until - * (if ever) neither run appears to be winning consistently - * anymore. - */ - ++min_gallop; - do { - assert(na > 0 && nb > 1); - min_gallop -= min_gallop > 1; - ms->min_gallop = min_gallop; - k = gallop_right(ssb.keys[0], basea.keys, na, na-1); - if (k < 0) - goto Fail; - k = na - k; - acount = k; - if (k) { - sortslice_advance(&dest, -k); - sortslice_advance(&ssa, -k); - sortslice_memmove(&dest, 1, &ssa, 1, k); - na -= k; - if (na == 0) - goto Succeed; - } - sortslice_copy_decr(&dest, &ssb); - --nb; - if (nb == 1) - goto CopyA; - - k = gallop_left(ssa.keys[0], baseb.keys, nb, nb-1); - if (k < 0) - goto Fail; - k = nb - k; - bcount = k; - if (k) { - sortslice_advance(&dest, -k); - sortslice_advance(&ssb, -k); - sortslice_memcpy(&dest, 1, &ssb, 1, k); - nb -= k; - if (nb == 1) - goto CopyA; - /* nb==0 is impossible now if the comparison - * function is consistent, but we can't assume - * that it is. - */ - if (nb == 0) - goto Succeed; - } - sortslice_copy_decr(&dest, &ssa); - --na; - if (na == 0) - goto Succeed; - } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); - ++min_gallop; /* penalize it for leaving galloping mode */ - ms->min_gallop = min_gallop; - } -Succeed: - result = 0; -Fail: - if (nb) - sortslice_memcpy(&dest, -(nb-1), &baseb, 0, nb); - return result; -CopyA: - assert(nb == 1 && na > 0); - /* The first element of ssb belongs at the front of the merge. */ - sortslice_memmove(&dest, 1-na, &ssa, 1-na, na); - sortslice_advance(&dest, -na); - sortslice_advance(&ssa, -na); - sortslice_copy(&dest, 0, &ssb, 0); - return 0; -} - -/* Merge the two runs at stack indices i and i+1. - * Returns 0 on success, -1 on error. - */ -static Py_ssize_t -merge_at(MergeState *ms, Py_ssize_t i) -{ - sortslice ssa, ssb; - Py_ssize_t na, nb; - Py_ssize_t k; - - assert(ms != NULL); - assert(ms->n >= 2); - assert(i >= 0); - assert(i == ms->n - 2 || i == ms->n - 3); - - ssa = ms->pending[i].base; - na = ms->pending[i].len; - ssb = ms->pending[i+1].base; - nb = ms->pending[i+1].len; - assert(na > 0 && nb > 0); - assert(ssa.keys + na == ssb.keys); - - /* Record the length of the combined runs; if i is the 3rd-last - * run now, also slide over the last run (which isn't involved - * in this merge). The current run i+1 goes away in any case. - */ - ms->pending[i].len = na + nb; - if (i == ms->n - 3) - ms->pending[i+1] = ms->pending[i+2]; - --ms->n; - - /* Where does b start in a? Elements in a before that can be - * ignored (already in place). - */ - k = gallop_right(*ssb.keys, ssa.keys, na, 0); - if (k < 0) - return -1; - sortslice_advance(&ssa, k); - na -= k; - if (na == 0) - return 0; - - /* Where does a end in b? Elements in b after that can be - * ignored (already in place). - */ - nb = gallop_left(ssa.keys[na-1], ssb.keys, nb, nb-1); - if (nb <= 0) - return nb; - - /* Merge what remains of the runs, using a temp array with - * min(na, nb) elements. - */ - if (na <= nb) - return merge_lo(ms, ssa, na, ssb, nb); - else - return merge_hi(ms, ssa, na, ssb, nb); -} - -/* Examine the stack of runs waiting to be merged, merging adjacent runs - * until the stack invariants are re-established: - * - * 1. len[-3] > len[-2] + len[-1] - * 2. len[-2] > len[-1] - * - * See listsort.txt for more info. - * - * Returns 0 on success, -1 on error. - */ -static int -merge_collapse(MergeState *ms) -{ - struct s_slice *p = ms->pending; - - assert(ms); - while (ms->n > 1) { - Py_ssize_t n = ms->n - 2; - if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) { - if (p[n-1].len < p[n+1].len) - --n; - if (merge_at(ms, n) < 0) - return -1; - } - else if (p[n].len <= p[n+1].len) { - if (merge_at(ms, n) < 0) - return -1; - } - else - break; - } - return 0; -} - -/* Regardless of invariants, merge all runs on the stack until only one - * remains. This is used at the end of the mergesort. - * - * Returns 0 on success, -1 on error. - */ -static int -merge_force_collapse(MergeState *ms) -{ - struct s_slice *p = ms->pending; - - assert(ms); - while (ms->n > 1) { - Py_ssize_t n = ms->n - 2; - if (n > 0 && p[n-1].len < p[n+1].len) - --n; - if (merge_at(ms, n) < 0) - return -1; - } - return 0; -} - -/* Compute a good value for the minimum run length; natural runs shorter - * than this are boosted artificially via binary insertion. - * - * If n < 64, return n (it's too small to bother with fancy stuff). - * Else if n is an exact power of 2, return 32. - * Else return an int k, 32 <= k <= 64, such that n/k is close to, but - * strictly less than, an exact power of 2. - * - * See listsort.txt for more info. - */ -static Py_ssize_t -merge_compute_minrun(Py_ssize_t n) -{ - Py_ssize_t r = 0; /* becomes 1 if any 1 bits are shifted off */ - - assert(n >= 0); - while (n >= 64) { - r |= n & 1; - n >>= 1; - } - return n + r; -} - -static void -reverse_sortslice(sortslice *s, Py_ssize_t n) -{ - reverse_slice(s->keys, &s->keys[n]); - if (s->values != NULL) - reverse_slice(s->values, &s->values[n]); -} - -/* An adaptive, stable, natural mergesort. See listsort.txt. - * Returns Py_None on success, NULL on error. Even in case of error, the - * list will be some permutation of its input state (nothing is lost or - * duplicated). - */ -static PyObject * -listsort(PyListObject *self, PyObject *args, PyObject *kwds) -{ - MergeState ms; - Py_ssize_t nremaining; - Py_ssize_t minrun; - sortslice lo; - Py_ssize_t saved_ob_size, saved_allocated; - PyObject **saved_ob_item; - PyObject **final_ob_item; - PyObject *result = NULL; /* guilty until proved innocent */ - int reverse = 0; - PyObject *keyfunc = NULL; - Py_ssize_t i; - static char *kwlist[] = {"key", "reverse", 0}; - PyObject **keys; - - assert(self != NULL); - assert (PyList_Check(self)); - if (args != NULL) { - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oi:sort", - kwlist, &keyfunc, &reverse)) - return NULL; - if (Py_SIZE(args) > 0) { - PyErr_SetString(PyExc_TypeError, - "must use keyword argument for key function"); - return NULL; - } - } - if (keyfunc == Py_None) - keyfunc = NULL; - - /* The list is temporarily made empty, so that mutations performed - * by comparison functions can't affect the slice of memory we're - * sorting (allowing mutations during sorting is a core-dump - * factory, since ob_item may change). - */ - saved_ob_size = Py_SIZE(self); - saved_ob_item = self->ob_item; - saved_allocated = self->allocated; - Py_SIZE(self) = 0; - self->ob_item = NULL; - self->allocated = -1; /* any operation will reset it to >= 0 */ - - if (keyfunc == NULL) { - keys = NULL; - lo.keys = saved_ob_item; - lo.values = NULL; - } - else { - if (saved_ob_size < MERGESTATE_TEMP_SIZE/2) - /* Leverage stack space we allocated but won't otherwise use */ - keys = &ms.temparray[saved_ob_size+1]; - else { - keys = PyMem_MALLOC(sizeof(PyObject *) * saved_ob_size); - if (keys == NULL) - return NULL; - } - - for (i = 0; i < saved_ob_size ; i++) { - keys[i] = PyObject_CallFunctionObjArgs(keyfunc, saved_ob_item[i], - NULL); - if (keys[i] == NULL) { - for (i=i-1 ; i>=0 ; i--) - Py_DECREF(keys[i]); - if (keys != &ms.temparray[saved_ob_size+1]) - PyMem_FREE(keys); - goto keyfunc_fail; - } - } - - lo.keys = keys; - lo.values = saved_ob_item; - } - - merge_init(&ms, saved_ob_size, keys != NULL); - - nremaining = saved_ob_size; - if (nremaining < 2) - goto succeed; - - /* Reverse sort stability achieved by initially reversing the list, - applying a stable forward sort, then reversing the final result. */ - if (reverse) { - if (keys != NULL) - reverse_slice(&keys[0], &keys[saved_ob_size]); - reverse_slice(&saved_ob_item[0], &saved_ob_item[saved_ob_size]); - } - - /* March over the array once, left to right, finding natural runs, - * and extending short natural runs to minrun elements. - */ - minrun = merge_compute_minrun(nremaining); - do { - int descending; - Py_ssize_t n; - - /* Identify next run. */ - n = count_run(lo.keys, lo.keys + nremaining, &descending); - if (n < 0) - goto fail; - if (descending) - reverse_sortslice(&lo, n); - /* If short, extend to min(minrun, nremaining). */ - if (n < minrun) { - const Py_ssize_t force = nremaining <= minrun ? - nremaining : minrun; - if (binarysort(lo, lo.keys + force, lo.keys + n) < 0) - goto fail; - n = force; - } - /* Push run onto pending-runs stack, and maybe merge. */ - assert(ms.n < MAX_MERGE_PENDING); - ms.pending[ms.n].base = lo; - ms.pending[ms.n].len = n; - ++ms.n; - if (merge_collapse(&ms) < 0) - goto fail; - /* Advance to find next run. */ - sortslice_advance(&lo, n); - nremaining -= n; - } while (nremaining); - - if (merge_force_collapse(&ms) < 0) - goto fail; - assert(ms.n == 1); - assert(keys == NULL - ? ms.pending[0].base.keys == saved_ob_item - : ms.pending[0].base.keys == &keys[0]); - assert(ms.pending[0].len == saved_ob_size); - lo = ms.pending[0].base; - -succeed: - result = Py_None; -fail: - if (keys != NULL) { - for (i = 0; i < saved_ob_size; i++) - Py_DECREF(keys[i]); - if (keys != &ms.temparray[saved_ob_size+1]) - PyMem_FREE(keys); - } - - if (self->allocated != -1 && result != NULL) { - /* The user mucked with the list during the sort, - * and we don't already have another error to report. - */ - PyErr_SetString(PyExc_ValueError, "list modified during sort"); - result = NULL; - } - - if (reverse && saved_ob_size > 1) - reverse_slice(saved_ob_item, saved_ob_item + saved_ob_size); - - merge_freemem(&ms); - -keyfunc_fail: - final_ob_item = self->ob_item; - i = Py_SIZE(self); - Py_SIZE(self) = saved_ob_size; - self->ob_item = saved_ob_item; - self->allocated = saved_allocated; - if (final_ob_item != NULL) { - /* we cannot use list_clear() for this because it does not - guarantee that the list is really empty when it returns */ - while (--i >= 0) { - Py_XDECREF(final_ob_item[i]); - } - PyMem_FREE(final_ob_item); - } - Py_XINCREF(result); - return result; -} -#undef IFLT -#undef ISLT - -int -PyList_Sort(PyObject *v) -{ - if (v == NULL || !PyList_Check(v)) { - PyErr_BadInternalCall(); - return -1; - } - v = listsort((PyListObject *)v, (PyObject *)NULL, (PyObject *)NULL); - if (v == NULL) - return -1; - Py_DECREF(v); - return 0; -} - -static PyObject * -listreverse(PyListObject *self) -{ - if (Py_SIZE(self) > 1) - reverse_slice(self->ob_item, self->ob_item + Py_SIZE(self)); - Py_RETURN_NONE; -} - -int -PyList_Reverse(PyObject *v) -{ - PyListObject *self = (PyListObject *)v; - - if (v == NULL || !PyList_Check(v)) { - PyErr_BadInternalCall(); - return -1; - } - if (Py_SIZE(self) > 1) - reverse_slice(self->ob_item, self->ob_item + Py_SIZE(self)); - return 0; -} - -PyObject * -PyList_AsTuple(PyObject *v) -{ - PyObject *w; - PyObject **p, **q; - Py_ssize_t n; - if (v == NULL || !PyList_Check(v)) { - PyErr_BadInternalCall(); - return NULL; - } - n = Py_SIZE(v); - w = PyTuple_New(n); - if (w == NULL) - return NULL; - p = ((PyTupleObject *)w)->ob_item; - q = ((PyListObject *)v)->ob_item; - while (--n >= 0) { - Py_INCREF(*q); - *p = *q; - p++; - q++; - } - return w; -} - -static PyObject * -listindex(PyListObject *self, PyObject *args) -{ - Py_ssize_t i, start=0, stop=Py_SIZE(self); - PyObject *v; - - if (!PyArg_ParseTuple(args, "O|O&O&:index", &v, - _PyEval_SliceIndex, &start, - _PyEval_SliceIndex, &stop)) - return NULL; - if (start < 0) { - start += Py_SIZE(self); - if (start < 0) - start = 0; - } - if (stop < 0) { - stop += Py_SIZE(self); - if (stop < 0) - stop = 0; - } - for (i = start; i < stop && i < Py_SIZE(self); i++) { - int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ); - if (cmp > 0) - return PyLong_FromSsize_t(i); - else if (cmp < 0) - return NULL; - } - PyErr_Format(PyExc_ValueError, "%R is not in list", v); - return NULL; -} - -static PyObject * -listcount(PyListObject *self, PyObject *v) -{ - Py_ssize_t count = 0; - Py_ssize_t i; - - for (i = 0; i < Py_SIZE(self); i++) { - int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ); - if (cmp > 0) - count++; - else if (cmp < 0) - return NULL; - } - return PyLong_FromSsize_t(count); -} - -static PyObject * -listremove(PyListObject *self, PyObject *v) -{ - Py_ssize_t i; - - for (i = 0; i < Py_SIZE(self); i++) { - int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ); - if (cmp > 0) { - if (list_ass_slice(self, i, i+1, - (PyObject *)NULL) == 0) - Py_RETURN_NONE; - return NULL; - } - else if (cmp < 0) - return NULL; - } - PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list"); - return NULL; -} - -static int -list_traverse(PyListObject *o, visitproc visit, void *arg) -{ - Py_ssize_t i; - - for (i = Py_SIZE(o); --i >= 0; ) - Py_VISIT(o->ob_item[i]); - return 0; -} - -static PyObject * -list_richcompare(PyObject *v, PyObject *w, int op) -{ - PyListObject *vl, *wl; - Py_ssize_t i; - - if (!PyList_Check(v) || !PyList_Check(w)) - Py_RETURN_NOTIMPLEMENTED; - - vl = (PyListObject *)v; - wl = (PyListObject *)w; - - if (Py_SIZE(vl) != Py_SIZE(wl) && (op == Py_EQ || op == Py_NE)) { - /* Shortcut: if the lengths differ, the lists differ */ - PyObject *res; - if (op == Py_EQ) - res = Py_False; - else - res = Py_True; - Py_INCREF(res); - return res; - } - - /* Search for the first index where items are different */ - for (i = 0; i < Py_SIZE(vl) && i < Py_SIZE(wl); i++) { - int k = PyObject_RichCompareBool(vl->ob_item[i], - wl->ob_item[i], Py_EQ); - if (k < 0) - return NULL; - if (!k) - break; - } - - if (i >= Py_SIZE(vl) || i >= Py_SIZE(wl)) { - /* No more items to compare -- compare sizes */ - Py_ssize_t vs = Py_SIZE(vl); - Py_ssize_t ws = Py_SIZE(wl); - int cmp; - PyObject *res; - switch (op) { - case Py_LT: cmp = vs < ws; break; - case Py_LE: cmp = vs <= ws; break; - case Py_EQ: cmp = vs == ws; break; - case Py_NE: cmp = vs != ws; break; - case Py_GT: cmp = vs > ws; break; - case Py_GE: cmp = vs >= ws; break; - default: return NULL; /* cannot happen */ - } - if (cmp) - res = Py_True; - else - res = Py_False; - Py_INCREF(res); - return res; - } - - /* We have an item that differs -- shortcuts for EQ/NE */ - if (op == Py_EQ) { - Py_INCREF(Py_False); - return Py_False; - } - if (op == Py_NE) { - Py_INCREF(Py_True); - return Py_True; - } - - /* Compare the final item again using the proper operator */ - return PyObject_RichCompare(vl->ob_item[i], wl->ob_item[i], op); -} - -static int -list_init(PyListObject *self, PyObject *args, PyObject *kw) -{ - PyObject *arg = NULL; - static char *kwlist[] = {"sequence", 0}; - - if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:list", kwlist, &arg)) - return -1; - - /* Verify list invariants established by PyType_GenericAlloc() */ - assert(0 <= Py_SIZE(self)); - assert(Py_SIZE(self) <= self->allocated || self->allocated == -1); - assert(self->ob_item != NULL || - self->allocated == 0 || self->allocated == -1); - - /* Empty previous contents */ - if (self->ob_item != NULL) { - (void)list_clear(self); - } - if (arg != NULL) { - PyObject *rv = listextend(self, arg); - if (rv == NULL) - return -1; - Py_DECREF(rv); - } - return 0; -} - -static PyObject * -list_sizeof(PyListObject *self) -{ - Py_ssize_t res; - - res = sizeof(PyListObject) + self->allocated * sizeof(void*); - return PyLong_FromSsize_t(res); -} - -static PyObject *list_iter(PyObject *seq); -static PyObject *list_reversed(PyListObject* seq, PyObject* unused); - -PyDoc_STRVAR(getitem_doc, -"x.__getitem__(y) <==> x[y]"); -PyDoc_STRVAR(reversed_doc, -"L.__reversed__() -- return a reverse iterator over the list"); -PyDoc_STRVAR(sizeof_doc, -"L.__sizeof__() -- size of L in memory, in bytes"); -PyDoc_STRVAR(clear_doc, -"L.clear() -> None -- remove all items from L"); -PyDoc_STRVAR(copy_doc, -"L.copy() -> list -- a shallow copy of L"); -PyDoc_STRVAR(append_doc, -"L.append(object) -> None -- append object to end"); -PyDoc_STRVAR(extend_doc, -"L.extend(iterable) -> None -- extend list by appending elements from the iterable"); -PyDoc_STRVAR(insert_doc, -"L.insert(index, object) -- insert object before index"); -PyDoc_STRVAR(pop_doc, -"L.pop([index]) -> item -- remove and return item at index (default last).\n" -"Raises IndexError if list is empty or index is out of range."); -PyDoc_STRVAR(remove_doc, -"L.remove(value) -> None -- remove first occurrence of value.\n" -"Raises ValueError if the value is not present."); -PyDoc_STRVAR(index_doc, -"L.index(value, [start, [stop]]) -> integer -- return first index of value.\n" -"Raises ValueError if the value is not present."); -PyDoc_STRVAR(count_doc, -"L.count(value) -> integer -- return number of occurrences of value"); -PyDoc_STRVAR(reverse_doc, -"L.reverse() -- reverse *IN PLACE*"); -PyDoc_STRVAR(sort_doc, -"L.sort(key=None, reverse=False) -> None -- stable sort *IN PLACE*"); - -static PyObject *list_subscript(PyListObject*, PyObject*); - -static PyMethodDef list_methods[] = { - {"__getitem__", (PyCFunction)list_subscript, METH_O|METH_COEXIST, getitem_doc}, - {"__reversed__",(PyCFunction)list_reversed, METH_NOARGS, reversed_doc}, - {"__sizeof__", (PyCFunction)list_sizeof, METH_NOARGS, sizeof_doc}, - {"clear", (PyCFunction)listclear, METH_NOARGS, clear_doc}, - {"copy", (PyCFunction)listcopy, METH_NOARGS, copy_doc}, - {"append", (PyCFunction)listappend, METH_O, append_doc}, - {"insert", (PyCFunction)listinsert, METH_VARARGS, insert_doc}, - {"extend", (PyCFunction)listextend, METH_O, extend_doc}, - {"pop", (PyCFunction)listpop, METH_VARARGS, pop_doc}, - {"remove", (PyCFunction)listremove, METH_O, remove_doc}, - {"index", (PyCFunction)listindex, METH_VARARGS, index_doc}, - {"count", (PyCFunction)listcount, METH_O, count_doc}, - {"reverse", (PyCFunction)listreverse, METH_NOARGS, reverse_doc}, - {"sort", (PyCFunction)listsort, METH_VARARGS | METH_KEYWORDS, sort_doc}, - {NULL, NULL} /* sentinel */ -}; - -static PySequenceMethods list_as_sequence = { - (lenfunc)list_length, /* sq_length */ - (binaryfunc)list_concat, /* sq_concat */ - (ssizeargfunc)list_repeat, /* sq_repeat */ - (ssizeargfunc)list_item, /* sq_item */ - 0, /* sq_slice */ - (ssizeobjargproc)list_ass_item, /* sq_ass_item */ - 0, /* sq_ass_slice */ - (objobjproc)list_contains, /* sq_contains */ - (binaryfunc)list_inplace_concat, /* sq_inplace_concat */ - (ssizeargfunc)list_inplace_repeat, /* sq_inplace_repeat */ -}; - -PyDoc_STRVAR(list_doc, -"list() -> new empty list\n" -"list(iterable) -> new list initialized from iterable's items"); - -static PyObject * -list_subscript(PyListObject* self, PyObject* item) -{ - if (PyIndex_Check(item)) { - Py_ssize_t i; - i = PyNumber_AsSsize_t(item, PyExc_IndexError); - if (i == -1 && PyErr_Occurred()) - return NULL; - if (i < 0) - i += PyList_GET_SIZE(self); - return list_item(self, i); - } - else if (PySlice_Check(item)) { - Py_ssize_t start, stop, step, slicelength, cur, i; - PyObject* result; - PyObject* it; - PyObject **src, **dest; - - if (PySlice_GetIndicesEx(item, Py_SIZE(self), - &start, &stop, &step, &slicelength) < 0) { - return NULL; - } - - if (slicelength <= 0) { - return PyList_New(0); - } - else if (step == 1) { - return list_slice(self, start, stop); - } - else { - result = PyList_New(slicelength); - if (!result) return NULL; - - src = self->ob_item; - dest = ((PyListObject *)result)->ob_item; - for (cur = start, i = 0; i < slicelength; - cur += (size_t)step, i++) { - it = src[cur]; - Py_INCREF(it); - dest[i] = it; - } - - return result; - } - } - else { - PyErr_Format(PyExc_TypeError, - "list indices must be integers, not %.200s", - item->ob_type->tp_name); - return NULL; - } -} - -static int -list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value) -{ - if (PyIndex_Check(item)) { - Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); - if (i == -1 && PyErr_Occurred()) - return -1; - if (i < 0) - i += PyList_GET_SIZE(self); - return list_ass_item(self, i, value); - } - else if (PySlice_Check(item)) { - Py_ssize_t start, stop, step, slicelength; - - if (PySlice_GetIndicesEx(item, Py_SIZE(self), - &start, &stop, &step, &slicelength) < 0) { - return -1; - } - - if (step == 1) - return list_ass_slice(self, start, stop, value); - - /* Make sure s[5:2] = [..] inserts at the right place: - before 5, not before 2. */ - if ((step < 0 && start < stop) || - (step > 0 && start > stop)) - stop = start; - - if (value == NULL) { - /* delete slice */ - PyObject **garbage; - size_t cur; - Py_ssize_t i; - - if (slicelength <= 0) - return 0; - - if (step < 0) { - stop = start + 1; - start = stop + step*(slicelength - 1) - 1; - step = -step; - } - - assert((size_t)slicelength <= - PY_SIZE_MAX / sizeof(PyObject*)); - - garbage = (PyObject**) - PyMem_MALLOC(slicelength*sizeof(PyObject*)); - if (!garbage) { - PyErr_NoMemory(); - return -1; - } - - /* drawing pictures might help understand these for - loops. Basically, we memmove the parts of the - list that are *not* part of the slice: step-1 - items for each item that is part of the slice, - and then tail end of the list that was not - covered by the slice */ - for (cur = start, i = 0; - cur < (size_t)stop; - cur += step, i++) { - Py_ssize_t lim = step - 1; - - garbage[i] = PyList_GET_ITEM(self, cur); - - if (cur + step >= (size_t)Py_SIZE(self)) { - lim = Py_SIZE(self) - cur - 1; - } - - memmove(self->ob_item + cur - i, - self->ob_item + cur + 1, - lim * sizeof(PyObject *)); - } - cur = start + (size_t)slicelength * step; - if (cur < (size_t)Py_SIZE(self)) { - memmove(self->ob_item + cur - slicelength, - self->ob_item + cur, - (Py_SIZE(self) - cur) * - sizeof(PyObject *)); - } - - Py_SIZE(self) -= slicelength; - list_resize(self, Py_SIZE(self)); - - for (i = 0; i < slicelength; i++) { - Py_DECREF(garbage[i]); - } - PyMem_FREE(garbage); - - return 0; - } - else { - /* assign slice */ - PyObject *ins, *seq; - PyObject **garbage, **seqitems, **selfitems; - Py_ssize_t cur, i; - - /* protect against a[::-1] = a */ - if (self == (PyListObject*)value) { - seq = list_slice((PyListObject*)value, 0, - PyList_GET_SIZE(value)); - } - else { - seq = PySequence_Fast(value, - "must assign iterable " - "to extended slice"); - } - if (!seq) - return -1; - - if (PySequence_Fast_GET_SIZE(seq) != slicelength) { - PyErr_Format(PyExc_ValueError, - "attempt to assign sequence of " - "size %zd to extended slice of " - "size %zd", - PySequence_Fast_GET_SIZE(seq), - slicelength); - Py_DECREF(seq); - return -1; - } - - if (!slicelength) { - Py_DECREF(seq); - return 0; - } - - garbage = (PyObject**) - PyMem_MALLOC(slicelength*sizeof(PyObject*)); - if (!garbage) { - Py_DECREF(seq); - PyErr_NoMemory(); - return -1; - } - - selfitems = self->ob_item; - seqitems = PySequence_Fast_ITEMS(seq); - for (cur = start, i = 0; i < slicelength; - cur += (size_t)step, i++) { - garbage[i] = selfitems[cur]; - ins = seqitems[i]; - Py_INCREF(ins); - selfitems[cur] = ins; - } - - for (i = 0; i < slicelength; i++) { - Py_DECREF(garbage[i]); - } - - PyMem_FREE(garbage); - Py_DECREF(seq); - - return 0; - } - } - else { - PyErr_Format(PyExc_TypeError, - "list indices must be integers, not %.200s", - item->ob_type->tp_name); - return -1; - } -} - -static PyMappingMethods list_as_mapping = { - (lenfunc)list_length, - (binaryfunc)list_subscript, - (objobjargproc)list_ass_subscript -}; - -PyTypeObject PyList_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "list", - sizeof(PyListObject), - 0, - (destructor)list_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - (reprfunc)list_repr, /* tp_repr */ - 0, /* tp_as_number */ - &list_as_sequence, /* tp_as_sequence */ - &list_as_mapping, /* tp_as_mapping */ - PyObject_HashNotImplemented, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | - Py_TPFLAGS_BASETYPE | Py_TPFLAGS_LIST_SUBCLASS, /* tp_flags */ - list_doc, /* tp_doc */ - (traverseproc)list_traverse, /* tp_traverse */ - (inquiry)list_clear, /* tp_clear */ - list_richcompare, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - list_iter, /* tp_iter */ - 0, /* tp_iternext */ - list_methods, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)list_init, /* tp_init */ - PyType_GenericAlloc, /* tp_alloc */ - PyType_GenericNew, /* tp_new */ - PyObject_GC_Del, /* tp_free */ -}; - - -/*********************** List Iterator **************************/ - -typedef struct { - PyObject_HEAD - long it_index; - PyListObject *it_seq; /* Set to NULL when iterator is exhausted */ -} listiterobject; - -static PyObject *list_iter(PyObject *); -static void listiter_dealloc(listiterobject *); -static int listiter_traverse(listiterobject *, visitproc, void *); -static PyObject *listiter_next(listiterobject *); -static PyObject *listiter_len(listiterobject *); - -PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); - -static PyMethodDef listiter_methods[] = { - {"__length_hint__", (PyCFunction)listiter_len, METH_NOARGS, length_hint_doc}, - {NULL, NULL} /* sentinel */ -}; - -PyTypeObject PyListIter_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "list_iterator", /* tp_name */ - sizeof(listiterobject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)listiter_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ - 0, /* tp_doc */ - (traverseproc)listiter_traverse, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - PyObject_SelfIter, /* tp_iter */ - (iternextfunc)listiter_next, /* tp_iternext */ - listiter_methods, /* tp_methods */ - 0, /* tp_members */ -}; - - -static PyObject * -list_iter(PyObject *seq) -{ - listiterobject *it; - - if (!PyList_Check(seq)) { - PyErr_BadInternalCall(); - return NULL; - } - it = PyObject_GC_New(listiterobject, &PyListIter_Type); - if (it == NULL) - return NULL; - it->it_index = 0; - Py_INCREF(seq); - it->it_seq = (PyListObject *)seq; - _PyObject_GC_TRACK(it); - return (PyObject *)it; -} - -static void -listiter_dealloc(listiterobject *it) -{ - _PyObject_GC_UNTRACK(it); - Py_XDECREF(it->it_seq); - PyObject_GC_Del(it); -} - -static int -listiter_traverse(listiterobject *it, visitproc visit, void *arg) -{ - Py_VISIT(it->it_seq); - return 0; -} - -static PyObject * -listiter_next(listiterobject *it) -{ - PyListObject *seq; - PyObject *item; - - assert(it != NULL); - seq = it->it_seq; - if (seq == NULL) - return NULL; - assert(PyList_Check(seq)); - - if (it->it_index < PyList_GET_SIZE(seq)) { - item = PyList_GET_ITEM(seq, it->it_index); - ++it->it_index; - Py_INCREF(item); - return item; - } - - Py_DECREF(seq); - it->it_seq = NULL; - return NULL; -} - -static PyObject * -listiter_len(listiterobject *it) -{ - Py_ssize_t len; - if (it->it_seq) { - len = PyList_GET_SIZE(it->it_seq) - it->it_index; - if (len >= 0) - return PyLong_FromSsize_t(len); - } - return PyLong_FromLong(0); -} -/*********************** List Reverse Iterator **************************/ - -typedef struct { - PyObject_HEAD - Py_ssize_t it_index; - PyListObject *it_seq; /* Set to NULL when iterator is exhausted */ -} listreviterobject; - -static PyObject *list_reversed(PyListObject *, PyObject *); -static void listreviter_dealloc(listreviterobject *); -static int listreviter_traverse(listreviterobject *, visitproc, void *); -static PyObject *listreviter_next(listreviterobject *); -static PyObject *listreviter_len(listreviterobject *); - -static PyMethodDef listreviter_methods[] = { - {"__length_hint__", (PyCFunction)listreviter_len, METH_NOARGS, length_hint_doc}, - {NULL, NULL} /* sentinel */ -}; - -PyTypeObject PyListRevIter_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "list_reverseiterator", /* tp_name */ - sizeof(listreviterobject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)listreviter_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ - 0, /* tp_doc */ - (traverseproc)listreviter_traverse, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - PyObject_SelfIter, /* tp_iter */ - (iternextfunc)listreviter_next, /* tp_iternext */ - listreviter_methods, /* tp_methods */ - 0, -}; - -static PyObject * -list_reversed(PyListObject *seq, PyObject *unused) -{ - listreviterobject *it; - - it = PyObject_GC_New(listreviterobject, &PyListRevIter_Type); - if (it == NULL) - return NULL; - assert(PyList_Check(seq)); - it->it_index = PyList_GET_SIZE(seq) - 1; - Py_INCREF(seq); - it->it_seq = seq; - PyObject_GC_Track(it); - return (PyObject *)it; -} - -static void -listreviter_dealloc(listreviterobject *it) -{ - PyObject_GC_UnTrack(it); - Py_XDECREF(it->it_seq); - PyObject_GC_Del(it); -} - -static int -listreviter_traverse(listreviterobject *it, visitproc visit, void *arg) -{ - Py_VISIT(it->it_seq); - return 0; -} - -static PyObject * -listreviter_next(listreviterobject *it) -{ - PyObject *item; - Py_ssize_t index = it->it_index; - PyListObject *seq = it->it_seq; - - if (index>=0 && index < PyList_GET_SIZE(seq)) { - item = PyList_GET_ITEM(seq, index); - it->it_index--; - Py_INCREF(item); - return item; - } - it->it_index = -1; - if (seq != NULL) { - it->it_seq = NULL; - Py_DECREF(seq); - } - return NULL; -} - -static PyObject * -listreviter_len(listreviterobject *it) -{ - Py_ssize_t len = it->it_index + 1; - if (it->it_seq == NULL || PyList_GET_SIZE(it->it_seq) < len) - len = 0; - return PyLong_FromSsize_t(len); -}
--- a/cos/python/Objects/longobject.c Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4836 +0,0 @@ -/* Long (arbitrary precision) integer object implementation */ - -/* XXX The functional organization of this file is terrible */ - -#include "Python.h" -#include "longintrepr.h" - - -/* convert a PyLong of size 1, 0 or -1 to an sdigit */ -#define MEDIUM_VALUE(x) (Py_SIZE(x) < 0 ? -(sdigit)(x)->ob_digit[0] : \ - (Py_SIZE(x) == 0 ? (sdigit)0 : \ - (sdigit)(x)->ob_digit[0])) -#define ABS(x) ((x) < 0 ? -(x) : (x)) - -/* If a freshly-allocated long is already shared, it must - be a small integer, so negating it must go to PyLong_FromLong */ -#define NEGATE(x) \ - do if (Py_REFCNT(x) == 1) Py_SIZE(x) = -Py_SIZE(x); \ - else { PyObject* tmp=PyLong_FromLong(-MEDIUM_VALUE(x)); \ - Py_DECREF(x); (x) = (PyLongObject*)tmp; } \ - while(0) -/* For long multiplication, use the O(N**2) school algorithm unless - * both operands contain more than KARATSUBA_CUTOFF digits (this - * being an internal Python long digit, in base BASE). - */ -#define KARATSUBA_CUTOFF 70 -#define KARATSUBA_SQUARE_CUTOFF (2 * KARATSUBA_CUTOFF) - -/* For exponentiation, use the binary left-to-right algorithm - * unless the exponent contains more than FIVEARY_CUTOFF digits. - * In that case, do 5 bits at a time. The potential drawback is that - * a table of 2**5 intermediate results is computed. - */ -#define FIVEARY_CUTOFF 8 - -#undef MIN -#undef MAX -#define MAX(x, y) ((x) < (y) ? (y) : (x)) -#define MIN(x, y) ((x) > (y) ? (y) : (x)) - -#define SIGCHECK(PyTryBlock) \ - do { \ - if (PyErr_CheckSignals()) PyTryBlock \ - } while(0) - -/* Normalize (remove leading zeros from) a long int object. - Doesn't attempt to free the storage--in most cases, due to the nature - of the algorithms used, this could save at most be one word anyway. */ - -static PyLongObject * -long_normalize(register PyLongObject *v) -{ - Py_ssize_t j = ABS(Py_SIZE(v)); - Py_ssize_t i = j; - - while (i > 0 && v->ob_digit[i-1] == 0) - --i; - if (i != j) - Py_SIZE(v) = (Py_SIZE(v) < 0) ? -(i) : i; - return v; -} - -/* Allocate a new long int object with size digits. - Return NULL and set exception if we run out of memory. */ - -#define MAX_LONG_DIGITS \ - ((PY_SSIZE_T_MAX - offsetof(PyLongObject, ob_digit))/sizeof(digit)) - -PyLongObject * -_PyLong_New(Py_ssize_t size) -{ - PyLongObject *result; - /* Number of bytes needed is: offsetof(PyLongObject, ob_digit) + - sizeof(digit)*size. Previous incarnations of this code used - sizeof(PyVarObject) instead of the offsetof, but this risks being - incorrect in the presence of padding between the PyVarObject header - and the digits. */ - if (size > (Py_ssize_t)MAX_LONG_DIGITS) { - PyErr_SetString(PyExc_OverflowError, - "too many digits in integer"); - return NULL; - } - result = PyObject_MALLOC(offsetof(PyLongObject, ob_digit) + - size*sizeof(digit)); - if (!result) { - PyErr_NoMemory(); - return NULL; - } - return (PyLongObject*)PyObject_INIT_VAR(result, &PyLong_Type, size); -} - -PyObject * -_PyLong_Copy(PyLongObject *src) -{ - PyLongObject *result; - Py_ssize_t i; - - assert(src != NULL); - i = Py_SIZE(src); - if (i < 0) - i = -(i); - if (i < 2) { - sdigit ival = src->ob_digit[0]; - if (Py_SIZE(src) < 0) - ival = -ival; - CHECK_SMALL_INT(ival); - } - result = _PyLong_New(i); - if (result != NULL) { - Py_SIZE(result) = Py_SIZE(src); - while (--i >= 0) - result->ob_digit[i] = src->ob_digit[i]; - } - return (PyObject *)result; -} - -/* Create a new long int object from a C long int */ - -PyObject * -PyLong_FromLong(long ival) -{ - PyLongObject *v; - unsigned long abs_ival; - unsigned long t; /* unsigned so >> doesn't propagate sign bit */ - int ndigits = 0; - int sign = 1; - - if (ival < 0) { - /* negate: can't write this as abs_ival = -ival since that - invokes undefined behaviour when ival is LONG_MIN */ - abs_ival = 0U-(unsigned long)ival; - sign = -1; - } - else { - abs_ival = (unsigned long)ival; - } - - /* Fast path for single-digit ints */ - if (!(abs_ival >> PyLong_SHIFT)) { - v = _PyLong_New(1); - if (v) { - Py_SIZE(v) = sign; - v->ob_digit[0] = Py_SAFE_DOWNCAST( - abs_ival, unsigned long, digit); - } - return (PyObject*)v; - } - -#if PyLong_SHIFT==15 - /* 2 digits */ - if (!(abs_ival >> 2*PyLong_SHIFT)) { - v = _PyLong_New(2); - if (v) { - Py_SIZE(v) = 2*sign; - v->ob_digit[0] = Py_SAFE_DOWNCAST( - abs_ival & PyLong_MASK, unsigned long, digit); - v->ob_digit[1] = Py_SAFE_DOWNCAST( - abs_ival >> PyLong_SHIFT, unsigned long, digit); - } - return (PyObject*)v; - } -#endif - - /* Larger numbers: loop to determine number of digits */ - t = abs_ival; - while (t) { - ++ndigits; - t >>= PyLong_SHIFT; - } - v = _PyLong_New(ndigits); - if (v != NULL) { - digit *p = v->ob_digit; - Py_SIZE(v) = ndigits*sign; - t = abs_ival; - while (t) { - *p++ = Py_SAFE_DOWNCAST( - t & PyLong_MASK, unsigned long, digit); - t >>= PyLong_SHIFT; - } - } - return (PyObject *)v; -} - -/* Create a new long int object from a C unsigned long int */ - -PyObject * -PyLong_FromUnsignedLong(unsigned long ival) -{ - PyLongObject *v; - unsigned long t; - int ndigits = 0; - - if (ival < PyLong_BASE) - return PyLong_FromLong(ival); - /* Count the number of Python digits. */ - t = (unsigned long)ival; - while (t) { - ++ndigits; - t >>= PyLong_SHIFT; - } - v = _PyLong_New(ndigits); - if (v != NULL) { - digit *p = v->ob_digit; - Py_SIZE(v) = ndigits; - while (ival) { - *p++ = (digit)(ival & PyLong_MASK); - ival >>= PyLong_SHIFT; - } - } - return (PyObject *)v; -} - -/* Create a new long int object from a C double */ - -PyObject * -PyLong_FromDouble(double dval) -{ - PyLongObject *v; - double frac; - int i, ndig, expo, neg; - neg = 0; - if (Py_IS_INFINITY(dval)) { - PyErr_SetString(PyExc_OverflowError, - "cannot convert float infinity to integer"); - return NULL; - } - if (Py_IS_NAN(dval)) { - PyErr_SetString(PyExc_ValueError, - "cannot convert float NaN to integer"); - return NULL; - } - if (dval < 0.0) { - neg = 1; - dval = -dval; - } - frac = frexp(dval, &expo); /* dval = frac*2**expo; 0.0 <= frac < 1.0 */ - if (expo <= 0) - return PyLong_FromLong(0L); - ndig = (expo-1) / PyLong_SHIFT + 1; /* Number of 'digits' in result */ - v = _PyLong_New(ndig); - if (v == NULL) - return NULL; - frac = ldexp(frac, (expo-1) % PyLong_SHIFT + 1); - for (i = ndig; --i >= 0; ) { - digit bits = (digit)frac; - v->ob_digit[i] = bits; - frac = frac - (double)bits; - frac = ldexp(frac, PyLong_SHIFT); - } - if (neg) - Py_SIZE(v) = -(Py_SIZE(v)); - return (PyObject *)v; -} - -/* Checking for overflow in PyLong_AsLong is a PITA since C doesn't define - * anything about what happens when a signed integer operation overflows, - * and some compilers think they're doing you a favor by being "clever" - * then. The bit pattern for the largest postive signed long is - * (unsigned long)LONG_MAX, and for the smallest negative signed long - * it is abs(LONG_MIN), which we could write -(unsigned long)LONG_MIN. - * However, some other compilers warn about applying unary minus to an - * unsigned operand. Hence the weird "0-". - */ -#define PY_ABS_LONG_MIN (0-(unsigned long)LONG_MIN) -#define PY_ABS_SSIZE_T_MIN (0-(size_t)PY_SSIZE_T_MIN) - -/* Get a C long int from a long int object or any object that has an __int__ - method. - - On overflow, return -1 and set *overflow to 1 or -1 depending on the sign of - the result. Otherwise *overflow is 0. - - For other errors (e.g., TypeError), return -1 and set an error condition. - In this case *overflow will be 0. -*/ - -long -PyLong_AsLongAndOverflow(PyObject *vv, int *overflow) -{ - /* This version by Tim Peters */ - register PyLongObject *v; - unsigned long x, prev; - long res; - Py_ssize_t i; - int sign; - int do_decref = 0; /* if nb_int was called */ - - *overflow = 0; - if (vv == NULL) { - PyErr_BadInternalCall(); - return -1; - } - - if (!PyLong_Check(vv)) { - PyNumberMethods *nb; - nb = vv->ob_type->tp_as_number; - if (nb == NULL || nb->nb_int == NULL) { - PyErr_SetString(PyExc_TypeError, - "an integer is required"); - return -1; - } - vv = (*nb->nb_int) (vv); - if (vv == NULL) - return -1; - do_decref = 1; - if (!PyLong_Check(vv)) { - Py_DECREF(vv); - PyErr_SetString(PyExc_TypeError, - "nb_int should return int object"); - return -1; - } - } - - res = -1; - v = (PyLongObject *)vv; - i = Py_SIZE(v); - - switch (i) { - case -1: - res = -(sdigit)v->ob_digit[0]; - break; - case 0: - res = 0; - break; - case 1: - res = v->ob_digit[0]; - break; - default: - sign = 1; - x = 0; - if (i < 0) { - sign = -1; - i = -(i); - } - while (--i >= 0) { - prev = x; - x = (x << PyLong_SHIFT) | v->ob_digit[i]; - if ((x >> PyLong_SHIFT) != prev) { - *overflow = sign; - goto exit; - } - } - /* Haven't lost any bits, but casting to long requires extra - * care (see comment above). - */ - if (x <= (unsigned long)LONG_MAX) { - res = (long)x * sign; - } - else if (sign < 0 && x == PY_ABS_LONG_MIN) { - res = LONG_MIN; - } - else { - *overflow = sign; - /* res is already set to -1 */ - } - } - exit: - if (do_decref) { - Py_DECREF(vv); - } - return res; -} - -/* Get a C long int from a long int object or any object that has an __int__ - method. Return -1 and set an error if overflow occurs. */ - -long -PyLong_AsLong(PyObject *obj) -{ - int overflow; - long result = PyLong_AsLongAndOverflow(obj, &overflow); - if (overflow) { - /* XXX: could be cute and give a different - message for overflow == -1 */ - PyErr_SetString(PyExc_OverflowError, - "Python int too large to convert to C long"); - } - return result; -} - -/* Get a Py_ssize_t from a long int object. - Returns -1 and sets an error condition if overflow occurs. */ - -Py_ssize_t -PyLong_AsSsize_t(PyObject *vv) { - register PyLongObject *v; - size_t x, prev; - Py_ssize_t i; - int sign; - - if (vv == NULL) { - PyErr_BadInternalCall(); - return -1; - } - if (!PyLong_Check(vv)) { - PyErr_SetString(PyExc_TypeError, "an integer is required"); - return -1; - } - - v = (PyLongObject *)vv; - i = Py_SIZE(v); - switch (i) { - case -1: return -(sdigit)v->ob_digit[0]; - case 0: return 0; - case 1: return v->ob_digit[0]; - } - sign = 1; - x = 0; - if (i < 0) { - sign = -1; - i = -(i); - } - while (--i >= 0) { - prev = x; - x = (x << PyLong_SHIFT) | v->ob_digit[i]; - if ((x >> PyLong_SHIFT) != prev) - goto overflow; - } - /* Haven't lost any bits, but casting to a signed type requires - * extra care (see comment above). - */ - if (x <= (size_t)PY_SSIZE_T_MAX) { - return (Py_ssize_t)x * sign; - } - else if (sign < 0 && x == PY_ABS_SSIZE_T_MIN) { - return PY_SSIZE_T_MIN; - } - /* else overflow */ - - overflow: - PyErr_SetString(PyExc_OverflowError, - "Python int too large to convert to C ssize_t"); - return -1; -} - -/* Get a C unsigned long int from a long int object. - Returns -1 and sets an error condition if overflow occurs. */ - -unsigned long -PyLong_AsUnsignedLong(PyObject *vv) -{ - register PyLongObject *v; - unsigned long x, prev; - Py_ssize_t i; - - if (vv == NULL) { - PyErr_BadInternalCall(); - return (unsigned long)-1; - } - if (!PyLong_Check(vv)) { - PyErr_SetString(PyExc_TypeError, "an integer is required"); - return (unsigned long)-1; - } - - v = (PyLongObject *)vv; - i = Py_SIZE(v); - x = 0; - if (i < 0) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to unsigned int"); - return (unsigned long) -1; - } - switch (i) { - case 0: return 0; - case 1: return v->ob_digit[0]; - } - while (--i >= 0) { - prev = x; - x = (x << PyLong_SHIFT) | v->ob_digit[i]; - if ((x >> PyLong_SHIFT) != prev) { - PyErr_SetString(PyExc_OverflowError, - "python int too large to convert " - "to C unsigned long"); - return (unsigned long) -1; - } - } - return x; -} - -/* Get a C size_t from a long int object. Returns (size_t)-1 and sets - an error condition if overflow occurs. */ - -size_t -PyLong_AsSize_t(PyObject *vv) -{ - register PyLongObject *v; - size_t x, prev; - Py_ssize_t i; - - if (vv == NULL) { - PyErr_BadInternalCall(); - return (size_t) -1; - } - if (!PyLong_Check(vv)) { - PyErr_SetString(PyExc_TypeError, "an integer is required"); - return (size_t)-1; - } - - v = (PyLongObject *)vv; - i = Py_SIZE(v); - x = 0; - if (i < 0) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to size_t"); - return (size_t) -1; - } - switch (i) { - case 0: return 0; - case 1: return v->ob_digit[0]; - } - while (--i >= 0) { - prev = x; - x = (x << PyLong_SHIFT) | v->ob_digit[i]; - if ((x >> PyLong_SHIFT) != prev) { - PyErr_SetString(PyExc_OverflowError, - "Python int too large to convert to C size_t"); - return (size_t) -1; - } - } - return x; -} - -/* Get a C unsigned long int from a long int object, ignoring the high bits. - Returns -1 and sets an error condition if an error occurs. */ - -static unsigned long -_PyLong_AsUnsignedLongMask(PyObject *vv) -{ - register PyLongObject *v; - unsigned long x; - Py_ssize_t i; - int sign; - - if (vv == NULL || !PyLong_Check(vv)) { - PyErr_BadInternalCall(); - return (unsigned long) -1; - } - v = (PyLongObject *)vv; - i = Py_SIZE(v); - switch (i) { - case 0: return 0; - case 1: return v->ob_digit[0]; - } - sign = 1; - x = 0; - if (i < 0) { - sign = -1; - i = -i; - } - while (--i >= 0) { - x = (x << PyLong_SHIFT) | v->ob_digit[i]; - } - return x * sign; -} - -unsigned long -PyLong_AsUnsignedLongMask(register PyObject *op) -{ - PyNumberMethods *nb; - PyLongObject *lo; - unsigned long val; - - if (op && PyLong_Check(op)) - return _PyLong_AsUnsignedLongMask(op); - - if (op == NULL || (nb = op->ob_type->tp_as_number) == NULL || - nb->nb_int == NULL) { - PyErr_SetString(PyExc_TypeError, "an integer is required"); - return (unsigned long)-1; - } - - lo = (PyLongObject*) (*nb->nb_int) (op); - if (lo == NULL) - return (unsigned long)-1; - if (PyLong_Check(lo)) { - val = _PyLong_AsUnsignedLongMask((PyObject *)lo); - Py_DECREF(lo); - if (PyErr_Occurred()) - return (unsigned long)-1; - return val; - } - else - { - Py_DECREF(lo); - PyErr_SetString(PyExc_TypeError, - "nb_int should return int object"); - return (unsigned long)-1; - } -} - -int -_PyLong_Sign(PyObject *vv) -{ - PyLongObject *v = (PyLongObject *)vv; - - assert(v != NULL); - assert(PyLong_Check(v)); - - return Py_SIZE(v) == 0 ? 0 : (Py_SIZE(v) < 0 ? -1 : 1); -} - -size_t -_PyLong_NumBits(PyObject *vv) -{ - PyLongObject *v = (PyLongObject *)vv; - size_t result = 0; - Py_ssize_t ndigits; - - assert(v != NULL); - assert(PyLong_Check(v)); - ndigits = ABS(Py_SIZE(v)); - assert(ndigits == 0 || v->ob_digit[ndigits - 1] != 0); - if (ndigits > 0) { - digit msd = v->ob_digit[ndigits - 1]; - - result = (ndigits - 1) * PyLong_SHIFT; - if (result / PyLong_SHIFT != (size_t)(ndigits - 1)) - goto Overflow; - do { - ++result; - if (result == 0) - goto Overflow; - msd >>= 1; - } while (msd); - } - return result; - - Overflow: - PyErr_SetString(PyExc_OverflowError, "int has too many bits " - "to express in a platform size_t"); - return (size_t)-1; -} - -PyObject * -_PyLong_FromByteArray(const unsigned char* bytes, size_t n, - int little_endian, int is_signed) -{ - const unsigned char* pstartbyte; /* LSB of bytes */ - int incr; /* direction to move pstartbyte */ - const unsigned char* pendbyte; /* MSB of bytes */ - size_t numsignificantbytes; /* number of bytes that matter */ - Py_ssize_t ndigits; /* number of Python long digits */ - PyLongObject* v; /* result */ - Py_ssize_t idigit = 0; /* next free index in v->ob_digit */ - - if (n == 0) - return PyLong_FromLong(0L); - - if (little_endian) { - pstartbyte = bytes; - pendbyte = bytes + n - 1; - incr = 1; - } - else { - pstartbyte = bytes + n - 1; - pendbyte = bytes; - incr = -1; - } - - if (is_signed) - is_signed = *pendbyte >= 0x80; - - /* Compute numsignificantbytes. This consists of finding the most - significant byte. Leading 0 bytes are insignificant if the number - is positive, and leading 0xff bytes if negative. */ - { - size_t i; - const unsigned char* p = pendbyte; - const int pincr = -incr; /* search MSB to LSB */ - const unsigned char insignficant = is_signed ? 0xff : 0x00; - - for (i = 0; i < n; ++i, p += pincr) { - if (*p != insignficant) - break; - } - numsignificantbytes = n - i; - /* 2's-comp is a bit tricky here, e.g. 0xff00 == -0x0100, so - actually has 2 significant bytes. OTOH, 0xff0001 == - -0x00ffff, so we wouldn't *need* to bump it there; but we - do for 0xffff = -0x0001. To be safe without bothering to - check every case, bump it regardless. */ - if (is_signed && numsignificantbytes < n) - ++numsignificantbytes; - } - - /* How many Python long digits do we need? We have - 8*numsignificantbytes bits, and each Python long digit has - PyLong_SHIFT bits, so it's the ceiling of the quotient. */ - /* catch overflow before it happens */ - if (numsignificantbytes > (PY_SSIZE_T_MAX - PyLong_SHIFT) / 8) { - PyErr_SetString(PyExc_OverflowError, - "byte array too long to convert to int"); - return NULL; - } - ndigits = (numsignificantbytes * 8 + PyLong_SHIFT - 1) / PyLong_SHIFT; - v = _PyLong_New(ndigits); - if (v == NULL) - return NULL; - - /* Copy the bits over. The tricky parts are computing 2's-comp on - the fly for signed numbers, and dealing with the mismatch between - 8-bit bytes and (probably) 15-bit Python digits.*/ - { - size_t i; - twodigits carry = 1; /* for 2's-comp calculation */ - twodigits accum = 0; /* sliding register */ - unsigned int accumbits = 0; /* number of bits in accum */ - const unsigned char* p = pstartbyte; - - for (i = 0; i < numsignificantbytes; ++i, p += incr) { - twodigits thisbyte = *p; - /* Compute correction for 2's comp, if needed. */ - if (is_signed) { - thisbyte = (0xff ^ thisbyte) + carry; - carry = thisbyte >> 8; - thisbyte &= 0xff; - } - /* Because we're going LSB to MSB, thisbyte is - more significant than what's already in accum, - so needs to be prepended to accum. */ - accum |= (twodigits)thisbyte << accumbits; - accumbits += 8; - if (accumbits >= PyLong_SHIFT) { - /* There's enough to fill a Python digit. */ - assert(idigit < ndigits); - v->ob_digit[idigit] = (digit)(accum & PyLong_MASK); - ++idigit; - accum >>= PyLong_SHIFT; - accumbits -= PyLong_SHIFT; - assert(accumbits < PyLong_SHIFT); - } - } - assert(accumbits < PyLong_SHIFT); - if (accumbits) { - assert(idigit < ndigits); - v->ob_digit[idigit] = (digit)accum; - ++idigit; - } - } - - Py_SIZE(v) = is_signed ? -idigit : idigit; - return (PyObject *)long_normalize(v); -} - -int -_PyLong_AsByteArray(PyLongObject* v, - unsigned char* bytes, size_t n, - int little_endian, int is_signed) -{ - Py_ssize_t i; /* index into v->ob_digit */ - Py_ssize_t ndigits; /* |v->ob_size| */ - twodigits accum; /* sliding register */ - unsigned int accumbits; /* # bits in accum */ - int do_twos_comp; /* store 2's-comp? is_signed and v < 0 */ - digit carry; /* for computing 2's-comp */ - size_t j; /* # bytes filled */ - unsigned char* p; /* pointer to next byte in bytes */ - int pincr; /* direction to move p */ - - assert(v != NULL && PyLong_Check(v)); - - if (Py_SIZE(v) < 0) { - ndigits = -(Py_SIZE(v)); - if (!is_signed) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative int to unsigned"); - return -1; - } - do_twos_comp = 1; - } - else { - ndigits = Py_SIZE(v); - do_twos_comp = 0; - } - - if (little_endian) { - p = bytes; - pincr = 1; - } - else { - p = bytes + n - 1; - pincr = -1; - } - - /* Copy over all the Python digits. - It's crucial that every Python digit except for the MSD contribute - exactly PyLong_SHIFT bits to the total, so first assert that the long is - normalized. */ - assert(ndigits == 0 || v->ob_digit[ndigits - 1] != 0); - j = 0; - accum = 0; - accumbits = 0; - carry = do_twos_comp ? 1 : 0; - for (i = 0; i < ndigits; ++i) { - digit thisdigit = v->ob_digit[i]; - if (do_twos_comp) { - thisdigit = (thisdigit ^ PyLong_MASK) + carry; - carry = thisdigit >> PyLong_SHIFT; - thisdigit &= PyLong_MASK; - } - /* Because we're going LSB to MSB, thisdigit is more - significant than what's already in accum, so needs to be - prepended to accum. */ - accum |= (twodigits)thisdigit << accumbits; - - /* The most-significant digit may be (probably is) at least - partly empty. */ - if (i == ndigits - 1) { - /* Count # of sign bits -- they needn't be stored, - * although for signed conversion we need later to - * make sure at least one sign bit gets stored. */ - digit s = do_twos_comp ? thisdigit ^ PyLong_MASK : thisdigit; - while (s != 0) { - s >>= 1; - accumbits++; - } - } - else - accumbits += PyLong_SHIFT; - - /* Store as many bytes as possible. */ - while (accumbits >= 8) { - if (j >= n) - goto Overflow; - ++j; - *p = (unsigned char)(accum & 0xff); - p += pincr; - accumbits -= 8; - accum >>= 8; - } - } - - /* Store the straggler (if any). */ - assert(accumbits < 8); - assert(carry == 0); /* else do_twos_comp and *every* digit was 0 */ - if (accumbits > 0) { - if (j >= n) - goto Overflow; - ++j; - if (do_twos_comp) { - /* Fill leading bits of the byte with sign bits - (appropriately pretending that the long had an - infinite supply of sign bits). */ - accum |= (~(twodigits)0) << accumbits; - } - *p = (unsigned char)(accum & 0xff); - p += pincr; - } - else if (j == n && n > 0 && is_signed) { - /* The main loop filled the byte array exactly, so the code - just above didn't get to ensure there's a sign bit, and the - loop below wouldn't add one either. Make sure a sign bit - exists. */ - unsigned char msb = *(p - pincr); - int sign_bit_set = msb >= 0x80; - assert(accumbits == 0); - if (sign_bit_set == do_twos_comp) - return 0; - else - goto Overflow; - } - - /* Fill remaining bytes with copies of the sign bit. */ - { - unsigned char signbyte = do_twos_comp ? 0xffU : 0U; - for ( ; j < n; ++j, p += pincr) - *p = signbyte; - } - - return 0; - - Overflow: - PyErr_SetString(PyExc_OverflowError, "int too big to convert"); - return -1; - -} - -/* Create a new long int object from a C pointer */ - -PyObject * -PyLong_FromVoidPtr(void *p) -{ -#ifndef HAVE_LONG_LONG -# error "PyLong_FromVoidPtr: sizeof(void*) > sizeof(long), but no long long" -#endif -#if SIZEOF_LONG_LONG < SIZEOF_VOID_P -# error "PyLong_FromVoidPtr: sizeof(PY_LONG_LONG) < sizeof(void*)" -#endif - /* special-case null pointer */ - if (!p) - return PyLong_FromLong(0); - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)(Py_uintptr_t)p); - -} - -/* Get a C pointer from a long int object. */ - -void * -PyLong_AsVoidPtr(PyObject *vv) -{ -#if SIZEOF_VOID_P <= SIZEOF_LONG - long x; - - if (PyLong_Check(vv) && _PyLong_Sign(vv) < 0) - x = PyLong_AsLong(vv); - else - x = PyLong_AsUnsignedLong(vv); -#else - -#ifndef HAVE_LONG_LONG -# error "PyLong_AsVoidPtr: sizeof(void*) > sizeof(long), but no long long" -#endif -#if SIZEOF_LONG_LONG < SIZEOF_VOID_P -# error "PyLong_AsVoidPtr: sizeof(PY_LONG_LONG) < sizeof(void*)" -#endif - PY_LONG_LONG x; - - if (PyLong_Check(vv) && _PyLong_Sign(vv) < 0) - x = PyLong_AsLongLong(vv); - else - x = PyLong_AsUnsignedLongLong(vv); - -#endif /* SIZEOF_VOID_P <= SIZEOF_LONG */ - - if (x == -1 && PyErr_Occurred()) - return NULL; - return (void *)x; -} - -#ifdef HAVE_LONG_LONG - -/* Initial PY_LONG_LONG support by Chris Herborth (chrish@qnx.com), later - * rewritten to use the newer PyLong_{As,From}ByteArray API. - */ - -#define IS_LITTLE_ENDIAN (int)*(unsigned char*)&one -#define PY_ABS_LLONG_MIN (0-(unsigned PY_LONG_LONG)PY_LLONG_MIN) - -/* Create a new long int object from a C PY_LONG_LONG int. */ - -PyObject * -PyLong_FromLongLong(PY_LONG_LONG ival) -{ - PyLongObject *v; - unsigned PY_LONG_LONG abs_ival; - unsigned PY_LONG_LONG t; /* unsigned so >> doesn't propagate sign bit */ - int ndigits = 0; - int negative = 0; - - if (ival < 0) { - /* avoid signed overflow on negation; see comments - in PyLong_FromLong above. */ - abs_ival = (unsigned PY_LONG_LONG)(-1-ival) + 1; - negative = 1; - } - else { - abs_ival = (unsigned PY_LONG_LONG)ival; - } - - /* Count the number of Python digits. - We used to pick 5 ("big enough for anything"), but that's a - waste of time and space given that 5*15 = 75 bits are rarely - needed. */ - t = abs_ival; - while (t) { - ++ndigits; - t >>= PyLong_SHIFT; - } - v = _PyLong_New(ndigits); - if (v != NULL) { - digit *p = v->ob_digit; - Py_SIZE(v) = negative ? -ndigits : ndigits; - t = abs_ival; - while (t) { - *p++ = (digit)(t & PyLong_MASK); - t >>= PyLong_SHIFT; - } - } - return (PyObject *)v; -} - -/* Create a new long int object from a C unsigned PY_LONG_LONG int. */ - -PyObject * -PyLong_FromUnsignedLongLong(unsigned PY_LONG_LONG ival) -{ - PyLongObject *v; - unsigned PY_LONG_LONG t; - int ndigits = 0; - - if (ival < PyLong_BASE) - return PyLong_FromLong((long)ival); - /* Count the number of Python digits. */ - t = (unsigned PY_LONG_LONG)ival; - while (t) { - ++ndigits; - t >>= PyLong_SHIFT; - } - v = _PyLong_New(ndigits); - if (v != NULL) { - digit *p = v->ob_digit; - Py_SIZE(v) = ndigits; - while (ival) { - *p++ = (digit)(ival & PyLong_MASK); - ival >>= PyLong_SHIFT; - } - } - return (PyObject *)v; -} - -/* Create a new long int object from a C Py_ssize_t. */ - -PyObject * -PyLong_FromSsize_t(Py_ssize_t ival) -{ - PyLongObject *v; - size_t abs_ival; - size_t t; /* unsigned so >> doesn't propagate sign bit */ - int ndigits = 0; - int negative = 0; - - CHECK_SMALL_INT(ival); - if (ival < 0) { - /* avoid signed overflow when ival = SIZE_T_MIN */ - abs_ival = (size_t)(-1-ival)+1; - negative = 1; - } - else { - abs_ival = (size_t)ival; - } - - /* Count the number of Python digits. */ - t = abs_ival; - while (t) { - ++ndigits; - t >>= PyLong_SHIFT; - } - v = _PyLong_New(ndigits); - if (v != NULL) { - digit *p = v->ob_digit; - Py_SIZE(v) = negative ? -ndigits : ndigits; - t = abs_ival; - while (t) { - *p++ = (digit)(t & PyLong_MASK); - t >>= PyLong_SHIFT; - } - } - return (PyObject *)v; -} - -/* Create a new long int object from a C size_t. */ - -PyObject * -PyLong_FromSize_t(size_t ival) -{ - PyLongObject *v; - size_t t; - int ndigits = 0; - - if (ival < PyLong_BASE) - return PyLong_FromLong((long)ival); - /* Count the number of Python digits. */ - t = ival; - while (t) { - ++ndigits; - t >>= PyLong_SHIFT; - } - v = _PyLong_New(ndigits); - if (v != NULL) { - digit *p = v->ob_digit; - Py_SIZE(v) = ndigits; - while (ival) { - *p++ = (digit)(ival & PyLong_MASK); - ival >>= PyLong_SHIFT; - } - } - return (PyObject *)v; -} - -/* Get a C long long int from a long int object or any object that has an - __int__ method. Return -1 and set an error if overflow occurs. */ - -PY_LONG_LONG -PyLong_AsLongLong(PyObject *vv) -{ - PyLongObject *v; - PY_LONG_LONG bytes; - int one = 1; - int res; - - if (vv == NULL) { - PyErr_BadInternalCall(); - return -1; - } - if (!PyLong_Check(vv)) { - PyNumberMethods *nb; - PyObject *io; - if ((nb = vv->ob_type->tp_as_number) == NULL || - nb->nb_int == NULL) { - PyErr_SetString(PyExc_TypeError, "an integer is required"); - return -1; - } - io = (*nb->nb_int) (vv); - if (io == NULL) - return -1; - if (PyLong_Check(io)) { - bytes = PyLong_AsLongLong(io); - Py_DECREF(io); - return bytes; - } - Py_DECREF(io); - PyErr_SetString(PyExc_TypeError, "integer conversion failed"); - return -1; - } - - v = (PyLongObject*)vv; - switch(Py_SIZE(v)) { - case -1: return -(sdigit)v->ob_digit[0]; - case 0: return 0; - case 1: return v->ob_digit[0]; - } - res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes, - SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 1); - - /* Plan 9 can't handle PY_LONG_LONG in ? : expressions */ - if (res < 0) - return (PY_LONG_LONG)-1; - else - return bytes; -} - -/* Get a C unsigned PY_LONG_LONG int from a long int object. - Return -1 and set an error if overflow occurs. */ - -unsigned PY_LONG_LONG -PyLong_AsUnsignedLongLong(PyObject *vv) -{ - PyLongObject *v; - unsigned PY_LONG_LONG bytes; - int one = 1; - int res; - - if (vv == NULL) { - PyErr_BadInternalCall(); - return (unsigned PY_LONG_LONG)-1; - } - if (!PyLong_Check(vv)) { - PyErr_SetString(PyExc_TypeError, "an integer is required"); - return (unsigned PY_LONG_LONG)-1; - } - - v = (PyLongObject*)vv; - switch(Py_SIZE(v)) { - case 0: return 0; - case 1: return v->ob_digit[0]; - } - - res = _PyLong_AsByteArray((PyLongObject *)vv, (unsigned char *)&bytes, - SIZEOF_LONG_LONG, IS_LITTLE_ENDIAN, 0); - - /* Plan 9 can't handle PY_LONG_LONG in ? : expressions */ - if (res < 0) - return (unsigned PY_LONG_LONG)res; - else - return bytes; -} - -/* Get a C unsigned long int from a long int object, ignoring the high bits. - Returns -1 and sets an error condition if an error occurs. */ - -static unsigned PY_LONG_LONG -_PyLong_AsUnsignedLongLongMask(PyObject *vv) -{ - register PyLongObject *v; - unsigned PY_LONG_LONG x; - Py_ssize_t i; - int sign; - - if (vv == NULL || !PyLong_Check(vv)) { - PyErr_BadInternalCall(); - return (unsigned long) -1; - } - v = (PyLongObject *)vv; - switch(Py_SIZE(v)) { - case 0: return 0; - case 1: return v->ob_digit[0]; - } - i = Py_SIZE(v); - sign = 1; - x = 0; - if (i < 0) { - sign = -1; - i = -i; - } - while (--i >= 0) { - x = (x << PyLong_SHIFT) | v->ob_digit[i]; - } - return x * sign; -} - -unsigned PY_LONG_LONG -PyLong_AsUnsignedLongLongMask(register PyObject *op) -{ - PyNumberMethods *nb; - PyLongObject *lo; - unsigned PY_LONG_LONG val; - - if (op && PyLong_Check(op)) - return _PyLong_AsUnsignedLongLongMask(op); - - if (op == NULL || (nb = op->ob_type->tp_as_number) == NULL || - nb->nb_int == NULL) { - PyErr_SetString(PyExc_TypeError, "an integer is required"); - return (unsigned PY_LONG_LONG)-1; - } - - lo = (PyLongObject*) (*nb->nb_int) (op); - if (lo == NULL) - return (unsigned PY_LONG_LONG)-1; - if (PyLong_Check(lo)) { - val = _PyLong_AsUnsignedLongLongMask((PyObject *)lo); - Py_DECREF(lo); - if (PyErr_Occurred()) - return (unsigned PY_LONG_LONG)-1; - return val; - } - else - { - Py_DECREF(lo); - PyErr_SetString(PyExc_TypeError, - "nb_int should return int object"); - return (unsigned PY_LONG_LONG)-1; - } -} -#undef IS_LITTLE_ENDIAN - -/* Get a C long long int from a long int object or any object that has an - __int__ method. - - On overflow, return -1 and set *overflow to 1 or -1 depending on the sign of - the result. Otherwise *overflow is 0. - - For other errors (e.g., TypeError), return -1 and set an error condition. - In this case *overflow will be 0. -*/ - -PY_LONG_LONG -PyLong_AsLongLongAndOverflow(PyObject *vv, int *overflow) -{ - /* This version by Tim Peters */ - register PyLongObject *v; - unsigned PY_LONG_LONG x, prev; - PY_LONG_LONG res; - Py_ssize_t i; - int sign; - int do_decref = 0; /* if nb_int was called */ - - *overflow = 0; - if (vv == NULL) { - PyErr_BadInternalCall(); - return -1; - } - - if (!PyLong_Check(vv)) { - PyNumberMethods *nb; - nb = vv->ob_type->tp_as_number; - if (nb == NULL || nb->nb_int == NULL) { - PyErr_SetString(PyExc_TypeError, - "an integer is required"); - return -1; - } - vv = (*nb->nb_int) (vv); - if (vv == NULL) - return -1; - do_decref = 1; - if (!PyLong_Check(vv)) { - Py_DECREF(vv); - PyErr_SetString(PyExc_TypeError, - "nb_int should return int object"); - return -1; - } - } - - res = -1; - v = (PyLongObject *)vv; - i = Py_SIZE(v); - - switch (i) { - case -1: - res = -(sdigit)v->ob_digit[0]; - break; - case 0: - res = 0; - break; - case 1: - res = v->ob_digit[0]; - break; - default: - sign = 1; - x = 0; - if (i < 0) { - sign = -1; - i = -(i); - } - while (--i >= 0) { - prev = x; - x = (x << PyLong_SHIFT) + v->ob_digit[i]; - if ((x >> PyLong_SHIFT) != prev) { - *overflow = sign; - goto exit; - } - } - /* Haven't lost any bits, but casting to long requires extra - * care (see comment above). - */ - if (x <= (unsigned PY_LONG_LONG)PY_LLONG_MAX) { - res = (PY_LONG_LONG)x * sign; - } - else if (sign < 0 && x == PY_ABS_LLONG_MIN) { - res = PY_LLONG_MIN; - } - else { - *overflow = sign; - /* res is already set to -1 */ - } - } - exit: - if (do_decref) { - Py_DECREF(vv); - } - return res; -} - -#endif /* HAVE_LONG_LONG */ - -#define CHECK_BINOP(v,w) \ - do { \ - if (!PyLong_Check(v) || !PyLong_Check(w)) \ - Py_RETURN_NOTIMPLEMENTED; \ - } while(0) - -/* bits_in_digit(d) returns the unique integer k such that 2**(k-1) <= d < - 2**k if d is nonzero, else 0. */ - -static const unsigned char BitLengthTable[32] = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 -}; - -static int -bits_in_digit(digit d) -{ - int d_bits = 0; - while (d >= 32) { - d_bits += 6; - d >>= 6; - } - d_bits += (int)BitLengthTable[d]; - return d_bits; -} - -/* x[0:m] and y[0:n] are digit vectors, LSD first, m >= n required. x[0:n] - * is modified in place, by adding y to it. Carries are propagated as far as - * x[m-1], and the remaining carry (0 or 1) is returned. - */ -static digit -v_iadd(digit *x, Py_ssize_t m, digit *y, Py_ssize_t n) -{ - Py_ssize_t i; - digit carry = 0; - - assert(m >= n); - for (i = 0; i < n; ++i) { - carry += x[i] + y[i]; - x[i] = carry & PyLong_MASK; - carry >>= PyLong_SHIFT; - assert((carry & 1) == carry); - } - for (; carry && i < m; ++i) { - carry += x[i]; - x[i] = carry & PyLong_MASK; - carry >>= PyLong_SHIFT; - assert((carry & 1) == carry); - } - return carry; -} - -/* x[0:m] and y[0:n] are digit vectors, LSD first, m >= n required. x[0:n] - * is modified in place, by subtracting y from it. Borrows are propagated as - * far as x[m-1], and the remaining borrow (0 or 1) is returned. - */ -static digit -v_isub(digit *x, Py_ssize_t m, digit *y, Py_ssize_t n) -{ - Py_ssize_t i; - digit borrow = 0; - - assert(m >= n); - for (i = 0; i < n; ++i) { - borrow = x[i] - y[i] - borrow; - x[i] = borrow & PyLong_MASK; - borrow >>= PyLong_SHIFT; - borrow &= 1; /* keep only 1 sign bit */ - } - for (; borrow && i < m; ++i) { - borrow = x[i] - borrow; - x[i] = borrow & PyLong_MASK; - borrow >>= PyLong_SHIFT; - borrow &= 1; - } - return borrow; -} - -/* Shift digit vector a[0:m] d bits left, with 0 <= d < PyLong_SHIFT. Put - * result in z[0:m], and return the d bits shifted out of the top. - */ -static digit -v_lshift(digit *z, digit *a, Py_ssize_t m, int d) -{ - Py_ssize_t i; - digit carry = 0; - - assert(0 <= d && d < PyLong_SHIFT); - for (i=0; i < m; i++) { - twodigits acc = (twodigits)a[i] << d | carry; - z[i] = (digit)acc & PyLong_MASK; - carry = (digit)(acc >> PyLong_SHIFT); - } - return carry; -} - -/* Shift digit vector a[0:m] d bits right, with 0 <= d < PyLong_SHIFT. Put - * result in z[0:m], and return the d bits shifted out of the bottom. - */ -static digit -v_rshift(digit *z, digit *a, Py_ssize_t m, int d) -{ - Py_ssize_t i; - digit carry = 0; - digit mask = ((digit)1 << d) - 1U; - - assert(0 <= d && d < PyLong_SHIFT); - for (i=m; i-- > 0;) { - twodigits acc = (twodigits)carry << PyLong_SHIFT | a[i]; - carry = (digit)acc & mask; - z[i] = (digit)(acc >> d); - } - return carry; -} - -/* Divide long pin, w/ size digits, by non-zero digit n, storing quotient - in pout, and returning the remainder. pin and pout point at the LSD. - It's OK for pin == pout on entry, which saves oodles of mallocs/frees in - _PyLong_Format, but that should be done with great care since longs are - immutable. */ - -static digit -inplace_divrem1(digit *pout, digit *pin, Py_ssize_t size, digit n) -{ - twodigits rem = 0; - - assert(n > 0 && n <= PyLong_MASK); - pin += size; - pout += size; - while (--size >= 0) { - digit hi; - rem = (rem << PyLong_SHIFT) | *--pin; - *--pout = hi = (digit)(rem / n); - rem -= (twodigits)hi * n; - } - return (digit)rem; -} - -/* Divide a long integer by a digit, returning both the quotient - (as function result) and the remainder (through *prem). - The sign of a is ignored; n should not be zero. */ - -static PyLongObject * -divrem1(PyLongObject *a, digit n, digit *prem) -{ - const Py_ssize_t size = ABS(Py_SIZE(a)); - PyLongObject *z; - - assert(n > 0 && n <= PyLong_MASK); - z = _PyLong_New(size); - if (z == NULL) - return NULL; - *prem = inplace_divrem1(z->ob_digit, a->ob_digit, size, n); - return long_normalize(z); -} - -/* Convert a long integer to a base 10 string. Returns a new non-shared - string. (Return value is non-shared so that callers can modify the - returned value if necessary.) */ - -static PyObject * -long_to_decimal_string(PyObject *aa) -{ - PyLongObject *scratch, *a; - PyObject *str; - Py_ssize_t size, strlen, size_a, i, j; - digit *pout, *pin, rem, tenpow; - unsigned char *p; - int negative; - - a = (PyLongObject *)aa; - if (a == NULL || !PyLong_Check(a)) { - PyErr_BadInternalCall(); - return NULL; - } - size_a = ABS(Py_SIZE(a)); - negative = Py_SIZE(a) < 0; - - /* quick and dirty upper bound for the number of digits - required to express a in base _PyLong_DECIMAL_BASE: - - #digits = 1 + floor(log2(a) / log2(_PyLong_DECIMAL_BASE)) - - But log2(a) < size_a * PyLong_SHIFT, and - log2(_PyLong_DECIMAL_BASE) = log2(10) * _PyLong_DECIMAL_SHIFT - > 3 * _PyLong_DECIMAL_SHIFT - */ - if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) { - PyErr_SetString(PyExc_OverflowError, - "long is too large to format"); - return NULL; - } - /* the expression size_a * PyLong_SHIFT is now safe from overflow */ - size = 1 + size_a * PyLong_SHIFT / (3 * _PyLong_DECIMAL_SHIFT); - scratch = _PyLong_New(size); - if (scratch == NULL) - return NULL; - - /* convert array of base _PyLong_BASE digits in pin to an array of - base _PyLong_DECIMAL_BASE digits in pout, following Knuth (TAOCP, - Volume 2 (3rd edn), section 4.4, Method 1b). */ - pin = a->ob_digit; - pout = scratch->ob_digit; - size = 0; - for (i = size_a; --i >= 0; ) { - digit hi = pin[i]; - for (j = 0; j < size; j++) { - twodigits z = (twodigits)pout[j] << PyLong_SHIFT | hi; - hi = (digit)(z / _PyLong_DECIMAL_BASE); - pout[j] = (digit)(z - (twodigits)hi * - _PyLong_DECIMAL_BASE); - } - while (hi) { - pout[size++] = hi % _PyLong_DECIMAL_BASE; - hi /= _PyLong_DECIMAL_BASE; - } - /* check for keyboard interrupt */ - SIGCHECK({ - Py_DECREF(scratch); - return NULL; - }); - } - /* pout should have at least one digit, so that the case when a = 0 - works correctly */ - if (size == 0) - pout[size++] = 0; - - /* calculate exact length of output string, and allocate */ - strlen = negative + 1 + (size - 1) * _PyLong_DECIMAL_SHIFT; - tenpow = 10; - rem = pout[size-1]; - while (rem >= tenpow) { - tenpow *= 10; - strlen++; - } - str = PyUnicode_New(strlen, '9'); - if (str == NULL) { - Py_DECREF(scratch); - return NULL; - } - - /* fill the string right-to-left */ - assert(PyUnicode_KIND(str) == PyUnicode_1BYTE_KIND); - p = PyUnicode_1BYTE_DATA(str) + strlen; - *p = '\0'; - /* pout[0] through pout[size-2] contribute exactly - _PyLong_DECIMAL_SHIFT digits each */ - for (i=0; i < size - 1; i++) { - rem = pout[i]; - for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) { - *--p = '0' + rem % 10; - rem /= 10; - } - } - /* pout[size-1]: always produce at least one decimal digit */ - rem = pout[i]; - do { - *--p = '0' + rem % 10; - rem /= 10; - } while (rem != 0); - - /* and sign */ - if (negative) - *--p = '-'; - - /* check we've counted correctly */ - assert(p == PyUnicode_1BYTE_DATA(str)); - Py_DECREF(scratch); - return (PyObject *)str; -} - -/* Convert a long int object to a string, using a given conversion base, - which should be one of 2, 8, 10 or 16. Return a string object. - If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'. */ - -PyObject * -_PyLong_Format(PyObject *aa, int base) -{ - register PyLongObject *a = (PyLongObject *)aa; - PyObject *v; - Py_ssize_t i, sz; - Py_ssize_t size_a; - char *p; - char sign = '\0'; - char *buffer; - int bits; - - assert(base == 2 || base == 8 || base == 10 || base == 16); - if (base == 10) - return long_to_decimal_string((PyObject *)a); - - if (a == NULL || !PyLong_Check(a)) { - PyErr_BadInternalCall(); - return NULL; - } - size_a = ABS(Py_SIZE(a)); - - /* Compute a rough upper bound for the length of the string */ - switch (base) { - case 16: - bits = 4; - break; - case 8: - bits = 3; - break; - case 2: - bits = 1; - break; - default: - assert(0); /* shouldn't ever get here */ - bits = 0; /* to silence gcc warning */ - } - /* compute length of output string: allow 2 characters for prefix and - 1 for possible '-' sign. */ - if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT / sizeof(Py_UCS4)) { - PyErr_SetString(PyExc_OverflowError, - "int is too large to format"); - return NULL; - } - /* now size_a * PyLong_SHIFT + 3 <= PY_SSIZE_T_MAX, so the RHS below - is safe from overflow */ - sz = 3 + (size_a * PyLong_SHIFT + (bits - 1)) / bits; - assert(sz >= 0); - buffer = PyMem_Malloc(sz); - if (buffer == NULL) { - PyErr_NoMemory(); - return NULL; - } - p = &buffer[sz]; - if (Py_SIZE(a) < 0) - sign = '-'; - - if (Py_SIZE(a) == 0) { - *--p = '0'; - } - else { - /* JRH: special case for power-of-2 bases */ - twodigits accum = 0; - int accumbits = 0; /* # of bits in accum */ - for (i = 0; i < size_a; ++i) { - accum |= (twodigits)a->ob_digit[i] << accumbits; - accumbits += PyLong_SHIFT; - assert(accumbits >= bits); - do { - char cdigit; - cdigit = (char)(accum & (base - 1)); - cdigit += (cdigit < 10) ? '0' : 'a'-10; - assert(p > buffer); - *--p = cdigit; - accumbits -= bits; - accum >>= bits; - } while (i < size_a-1 ? accumbits >= bits : accum > 0); - } - } - - if (base == 16) - *--p = 'x'; - else if (base == 8) - *--p = 'o'; - else /* (base == 2) */ - *--p = 'b'; - *--p = '0'; - if (sign) - *--p = sign; - v = PyUnicode_DecodeASCII(p, &buffer[sz] - p, NULL); - PyMem_Free(buffer); - return v; -} - -/* Table of digit values for 8-bit string -> integer conversion. - * '0' maps to 0, ..., '9' maps to 9. - * 'a' and 'A' map to 10, ..., 'z' and 'Z' map to 35. - * All other indices map to 37. - * Note that when converting a base B string, a char c is a legitimate - * base B digit iff _PyLong_DigitValue[Py_CHARPyLong_MASK(c)] < B. - */ -unsigned char _PyLong_DigitValue[256] = { - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 37, 37, 37, 37, 37, 37, - 37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37, - 37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, - 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, -}; - -/* *str points to the first digit in a string of base `base` digits. base - * is a power of 2 (2, 4, 8, 16, or 32). *str is set to point to the first - * non-digit (which may be *str!). A normalized long is returned. - * The point to this routine is that it takes time linear in the number of - * string characters. - */ -static PyLongObject * -long_from_binary_base(char **str, int base) -{ - char *p = *str; - char *start = p; - int bits_per_char; - Py_ssize_t n; - PyLongObject *z; - twodigits accum; - int bits_in_accum; - digit *pdigit; - - assert(base >= 2 && base <= 32 && (base & (base - 1)) == 0); - n = base; - for (bits_per_char = -1; n; ++bits_per_char) - n >>= 1; - /* n <- total # of bits needed, while setting p to end-of-string */ - while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base) - ++p; - *str = p; - /* n <- # of Python digits needed, = ceiling(n/PyLong_SHIFT). */ - n = (p - start) * bits_per_char + PyLong_SHIFT - 1; - if (n / bits_per_char < p - start) { - PyErr_SetString(PyExc_ValueError, - "int string too large to convert"); - return NULL; - } - n = n / PyLong_SHIFT; - z = _PyLong_New(n); - if (z == NULL) - return NULL; - /* Read string from right, and fill in long from left; i.e., - * from least to most significant in both. - */ - accum = 0; - bits_in_accum = 0; - pdigit = z->ob_digit; - while (--p >= start) { - int k = (int)_PyLong_DigitValue[Py_CHARMASK(*p)]; - assert(k >= 0 && k < base); - accum |= (twodigits)k << bits_in_accum; - bits_in_accum += bits_per_char; - if (bits_in_accum >= PyLong_SHIFT) { - *pdigit++ = (digit)(accum & PyLong_MASK); - assert(pdigit - z->ob_digit <= n); - accum >>= PyLong_SHIFT; - bits_in_accum -= PyLong_SHIFT; - assert(bits_in_accum < PyLong_SHIFT); - } - } - if (bits_in_accum) { - assert(bits_in_accum <= PyLong_SHIFT); - *pdigit++ = (digit)accum; - assert(pdigit - z->ob_digit <= n); - } - while (pdigit - z->ob_digit < n) - *pdigit++ = 0; - return long_normalize(z); -} - -PyObject * -PyLong_FromString(char *str, char **pend, int base) -{ - int sign = 1, error_if_nonzero = 0; - char *start, *orig_str = str; - PyLongObject *z = NULL; - PyObject *strobj; - Py_ssize_t slen; - - if ((base != 0 && base < 2) || base > 36) { - PyErr_SetString(PyExc_ValueError, - "int() arg 2 must be >= 2 and <= 36"); - return NULL; - } - while (*str != '\0' && isspace(Py_CHARMASK(*str))) - str++; - if (*str == '+') - ++str; - else if (*str == '-') { - ++str; - sign = -1; - } - if (base == 0) { - if (str[0] != '0') - base = 10; - else if (str[1] == 'x' || str[1] == 'X') - base = 16; - else if (str[1] == 'o' || str[1] == 'O') - base = 8; - else if (str[1] == 'b' || str[1] == 'B') - base = 2; - else { - /* "old" (C-style) octal literal, now invalid. - it might still be zero though */ - error_if_nonzero = 1; - base = 10; - } - } - if (str[0] == '0' && - ((base == 16 && (str[1] == 'x' || str[1] == 'X')) || - (base == 8 && (str[1] == 'o' || str[1] == 'O')) || - (base == 2 && (str[1] == 'b' || str[1] == 'B')))) - str += 2; - - start = str; - if ((base & (base - 1)) == 0) - z = long_from_binary_base(&str, base); - else { -/*** -Binary bases can be converted in time linear in the number of digits, because -Python's representation base is binary. Other bases (including decimal!) use -the simple quadratic-time algorithm below, complicated by some speed tricks. - -First some math: the largest integer that can be expressed in N base-B digits -is B**N-1. Consequently, if we have an N-digit input in base B, the worst- -case number of Python digits needed to hold it is the smallest integer n s.t. - - BASE**n-1 >= B**N-1 [or, adding 1 to both sides] - BASE**n >= B**N [taking logs to base BASE] - n >= log(B**N)/log(BASE) = N * log(B)/log(BASE) - -The static array log_base_BASE[base] == log(base)/log(BASE) so we can compute -this quickly. A Python long with that much space is reserved near the start, -and the result is computed into it. - -The input string is actually treated as being in base base**i (i.e., i digits -are processed at a time), where two more static arrays hold: - - convwidth_base[base] = the largest integer i such that base**i <= BASE - convmultmax_base[base] = base ** convwidth_base[base] - -The first of these is the largest i such that i consecutive input digits -must fit in a single Python digit. The second is effectively the input -base we're really using. - -Viewing the input as a sequence <c0, c1, ..., c_n-1> of digits in base -convmultmax_base[base], the result is "simply" - - (((c0*B + c1)*B + c2)*B + c3)*B + ... ))) + c_n-1 - -where B = convmultmax_base[base]. - -Error analysis: as above, the number of Python digits `n` needed is worst- -case - - n >= N * log(B)/log(BASE) - -where `N` is the number of input digits in base `B`. This is computed via - - size_z = (Py_ssize_t)((scan - str) * log_base_BASE[base]) + 1; - -below. Two numeric concerns are how much space this can waste, and whether -the computed result can be too small. To be concrete, assume BASE = 2**15, -which is the default (and it's unlikely anyone changes that). - -Waste isn't a problem: provided the first input digit isn't 0, the difference -between the worst-case input with N digits and the smallest input with N -digits is about a factor of B, but B is small compared to BASE so at most -one allocated Python digit can remain unused on that count. If -N*log(B)/log(BASE) is mathematically an exact integer, then truncating that -and adding 1 returns a result 1 larger than necessary. However, that can't -happen: whenever B is a power of 2, long_from_binary_base() is called -instead, and it's impossible for B**i to be an integer power of 2**15 when -B is not a power of 2 (i.e., it's impossible for N*log(B)/log(BASE) to be -an exact integer when B is not a power of 2, since B**i has a prime factor -other than 2 in that case, but (2**15)**j's only prime factor is 2). - -The computed result can be too small if the true value of N*log(B)/log(BASE) -is a little bit larger than an exact integer, but due to roundoff errors (in -computing log(B), log(BASE), their quotient, and/or multiplying that by N) -yields a numeric result a little less than that integer. Unfortunately, "how -close can a transcendental function get to an integer over some range?" -questions are generally theoretically intractable. Computer analysis via -continued fractions is practical: expand log(B)/log(BASE) via continued -fractions, giving a sequence i/j of "the best" rational approximations. Then -j*log(B)/log(BASE) is approximately equal to (the integer) i. This shows that -we can get very close to being in trouble, but very rarely. For example, -76573 is a denominator in one of the continued-fraction approximations to -log(10)/log(2**15), and indeed: - - >>> log(10)/log(2**15)*76573 - 16958.000000654003 - -is very close to an integer. If we were working with IEEE single-precision, -rounding errors could kill us. Finding worst cases in IEEE double-precision -requires better-than-double-precision log() functions, and Tim didn't bother. -Instead the code checks to see whether the allocated space is enough as each -new Python digit is added, and copies the whole thing to a larger long if not. -This should happen extremely rarely, and in fact I don't have a test case -that triggers it(!). Instead the code was tested by artificially allocating -just 1 digit at the start, so that the copying code was exercised for every -digit beyond the first. -***/ - register twodigits c; /* current input character */ - Py_ssize_t size_z; - int i; - int convwidth; - twodigits convmultmax, convmult; - digit *pz, *pzstop; - char* scan; - - static double log_base_BASE[37] = {0.0e0,}; - static int convwidth_base[37] = {0,}; - static twodigits convmultmax_base[37] = {0,}; - - if (log_base_BASE[base] == 0.0) { - twodigits convmax = base; - int i = 1; - - log_base_BASE[base] = (log((double)base) / - log((double)PyLong_BASE)); - for (;;) { - twodigits next = convmax * base; - if (next > PyLong_BASE) - break; - convmax = next; - ++i; - } - convmultmax_base[base] = convmax; - assert(i > 0); - convwidth_base[base] = i; - } - - /* Find length of the string of numeric characters. */ - scan = str; - while (_PyLong_DigitValue[Py_CHARMASK(*scan)] < base) - ++scan; - - /* Create a long object that can contain the largest possible - * integer with this base and length. Note that there's no - * need to initialize z->ob_digit -- no slot is read up before - * being stored into. - */ - size_z = (Py_ssize_t)((scan - str) * log_base_BASE[base]) + 1; - /* Uncomment next line to test exceedingly rare copy code */ - /* size_z = 1; */ - assert(size_z > 0); - z = _PyLong_New(size_z); - if (z == NULL) - return NULL; - Py_SIZE(z) = 0; - - /* `convwidth` consecutive input digits are treated as a single - * digit in base `convmultmax`. - */ - convwidth = convwidth_base[base]; - convmultmax = convmultmax_base[base]; - - /* Work ;-) */ - while (str < scan) { - /* grab up to convwidth digits from the input string */ - c = (digit)_PyLong_DigitValue[Py_CHARMASK(*str++)]; - for (i = 1; i < convwidth && str != scan; ++i, ++str) { - c = (twodigits)(c * base + - (int)_PyLong_DigitValue[Py_CHARMASK(*str)]); - assert(c < PyLong_BASE); - } - - convmult = convmultmax; - /* Calculate the shift only if we couldn't get - * convwidth digits. - */ - if (i != convwidth) { - convmult = base; - for ( ; i > 1; --i) - convmult *= base; - } - - /* Multiply z by convmult, and add c. */ - pz = z->ob_digit; - pzstop = pz + Py_SIZE(z); - for (; pz < pzstop; ++pz) { - c += (twodigits)*pz * convmult; - *pz = (digit)(c & PyLong_MASK); - c >>= PyLong_SHIFT; - } - /* carry off the current end? */ - if (c) { - assert(c < PyLong_BASE); - if (Py_SIZE(z) < size_z) { - *pz = (digit)c; - ++Py_SIZE(z); - } - else { - PyLongObject *tmp; - /* Extremely rare. Get more space. */ - assert(Py_SIZE(z) == size_z); - tmp = _PyLong_New(size_z + 1); - if (tmp == NULL) { - Py_DECREF(z); - return NULL; - } - memcpy(tmp->ob_digit, - z->ob_digit, - sizeof(digit) * size_z); - Py_DECREF(z); - z = tmp; - z->ob_digit[size_z] = (digit)c; - ++size_z; - } - } - } - } - if (z == NULL) - return NULL; - if (error_if_nonzero) { - /* reset the base to 0, else the exception message - doesn't make too much sense */ - base = 0; - if (Py_SIZE(z) != 0) - goto onError; - /* there might still be other problems, therefore base - remains zero here for the same reason */ - } - if (str == start) - goto onError; - if (sign < 0) - Py_SIZE(z) = -(Py_SIZE(z)); - while (*str && isspace(Py_CHARMASK(*str))) - str++; - if (*str != '\0') - goto onError; - if (pend) - *pend = str; - long_normalize(z); - return (PyObject *) maybe_small_long(z); - - onError: - Py_XDECREF(z); - slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200; - strobj = PyUnicode_FromStringAndSize(orig_str, slen); - if (strobj == NULL) - return NULL; - PyErr_Format(PyExc_ValueError, - "invalid literal for int() with base %d: %R", - base, strobj); - Py_DECREF(strobj); - return NULL; -} - -PyObject * -PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base) -{ - PyObject *v, *unicode = PyUnicode_FromUnicode(u, length); - if (unicode == NULL) - return NULL; - v = PyLong_FromUnicodeObject(unicode, base); - Py_DECREF(unicode); - return v; -} - -PyObject * -PyLong_FromUnicodeObject(PyObject *u, int base) -{ - PyObject *result; - PyObject *asciidig; - char *buffer, *end; - Py_ssize_t buflen; - - asciidig = _PyUnicode_TransformDecimalAndSpaceToASCII(u); - if (asciidig == NULL) - return NULL; - buffer = PyUnicode_AsUTF8AndSize(asciidig, &buflen); - if (buffer == NULL) { - Py_DECREF(asciidig); - return NULL; - } - result = PyLong_FromString(buffer, &end, base); - if (result != NULL && end != buffer + buflen) { - PyErr_SetString(PyExc_ValueError, - "null byte in argument for int()"); - Py_DECREF(result); - result = NULL; - } - Py_DECREF(asciidig); - return result; -} - -/* forward */ -static PyLongObject *x_divrem - (PyLongObject *, PyLongObject *, PyLongObject **); -static PyObject *long_long(PyObject *v); - -/* Long division with remainder, top-level routine */ - -static int -long_divrem(PyLongObject *a, PyLongObject *b, - PyLongObject **pdiv, PyLongObject **prem) -{ - Py_ssize_t size_a = ABS(Py_SIZE(a)), size_b = ABS(Py_SIZE(b)); - PyLongObject *z; - - if (size_b == 0) { - PyErr_SetString(PyExc_ZeroDivisionError, - "integer division or modulo by zero"); - return -1; - } - if (size_a < size_b || - (size_a == size_b && - a->ob_digit[size_a-1] < b->ob_digit[size_b-1])) { - /* |a| < |b|. */ - *pdiv = (PyLongObject*)PyLong_FromLong(0); - if (*pdiv == NULL) - return -1; - Py_INCREF(a); - *prem = (PyLongObject *) a; - return 0; - } - if (size_b == 1) { - digit rem = 0; - z = divrem1(a, b->ob_digit[0], &rem); - if (z == NULL) - return -1; - *prem = (PyLongObject *) PyLong_FromLong((long)rem); - if (*prem == NULL) { - Py_DECREF(z); - return -1; - } - } - else { - z = x_divrem(a, b, prem); - if (z == NULL) - return -1; - } - /* Set the signs. - The quotient z has the sign of a*b; - the remainder r has the sign of a, - so a = b*z + r. */ - if ((Py_SIZE(a) < 0) != (Py_SIZE(b) < 0)) - NEGATE(z); - if (Py_SIZE(a) < 0 && Py_SIZE(*prem) != 0) - NEGATE(*prem); - *pdiv = maybe_small_long(z); - return 0; -} - -/* Unsigned long division with remainder -- the algorithm. The arguments v1 - and w1 should satisfy 2 <= ABS(Py_SIZE(w1)) <= ABS(Py_SIZE(v1)). */ - -static PyLongObject * -x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem) -{ - PyLongObject *v, *w, *a; - Py_ssize_t i, k, size_v, size_w; - int d; - digit wm1, wm2, carry, q, r, vtop, *v0, *vk, *w0, *ak; - twodigits vv; - sdigit zhi; - stwodigits z; - - /* We follow Knuth [The Art of Computer Programming, Vol. 2 (3rd - edn.), section 4.3.1, Algorithm D], except that we don't explicitly - handle the special case when the initial estimate q for a quotient - digit is >= PyLong_BASE: the max value for q is PyLong_BASE+1, and - that won't overflow a digit. */ - - /* allocate space; w will also be used to hold the final remainder */ - size_v = ABS(Py_SIZE(v1)); - size_w = ABS(Py_SIZE(w1)); - assert(size_v >= size_w && size_w >= 2); /* Assert checks by div() */ - v = _PyLong_New(size_v+1); - if (v == NULL) { - *prem = NULL; - return NULL; - } - w = _PyLong_New(size_w); - if (w == NULL) { - Py_DECREF(v); - *prem = NULL; - return NULL; - } - - /* normalize: shift w1 left so that its top digit is >= PyLong_BASE/2. - shift v1 left by the same amount. Results go into w and v. */ - d = PyLong_SHIFT - bits_in_digit(w1->ob_digit[size_w-1]); - carry = v_lshift(w->ob_digit, w1->ob_digit, size_w, d); - assert(carry == 0); - carry = v_lshift(v->ob_digit, v1->ob_digit, size_v, d); - if (carry != 0 || v->ob_digit[size_v-1] >= w->ob_digit[size_w-1]) { - v->ob_digit[size_v] = carry; - size_v++; - } - - /* Now v->ob_digit[size_v-1] < w->ob_digit[size_w-1], so quotient has - at most (and usually exactly) k = size_v - size_w digits. */ - k = size_v - size_w; - assert(k >= 0); - a = _PyLong_New(k); - if (a == NULL) { - Py_DECREF(w); - Py_DECREF(v); - *prem = NULL; - return NULL; - } - v0 = v->ob_digit; - w0 = w->ob_digit; - wm1 = w0[size_w-1]; - wm2 = w0[size_w-2]; - for (vk = v0+k, ak = a->ob_digit + k; vk-- > v0;) { - /* inner loop: divide vk[0:size_w+1] by w0[0:size_w], giving - single-digit quotient q, remainder in vk[0:size_w]. */ - - SIGCHECK({ - Py_DECREF(a); - Py_DECREF(w); - Py_DECREF(v); - *prem = NULL; - return NULL; - }); - - /* estimate quotient digit q; may overestimate by 1 (rare) */ - vtop = vk[size_w]; - assert(vtop <= wm1); - vv = ((twodigits)vtop << PyLong_SHIFT) | vk[size_w-1]; - q = (digit)(vv / wm1); - r = (digit)(vv - (twodigits)wm1 * q); /* r = vv % wm1 */ - while ((twodigits)wm2 * q > (((twodigits)r << PyLong_SHIFT) - | vk[size_w-2])) { - --q; - r += wm1; - if (r >= PyLong_BASE) - break; - } - assert(q <= PyLong_BASE); - - /* subtract q*w0[0:size_w] from vk[0:size_w+1] */ - zhi = 0; - for (i = 0; i < size_w; ++i) { - /* invariants: -PyLong_BASE <= -q <= zhi <= 0; - -PyLong_BASE * q <= z < PyLong_BASE */ - z = (sdigit)vk[i] + zhi - - (stwodigits)q * (stwodigits)w0[i]; - vk[i] = (digit)z & PyLong_MASK; - zhi = (sdigit)Py_ARITHMETIC_RIGHT_SHIFT(stwodigits, - z, PyLong_SHIFT); - } - - /* add w back if q was too large (this branch taken rarely) */ - assert((sdigit)vtop + zhi == -1 || (sdigit)vtop + zhi == 0); - if ((sdigit)vtop + zhi < 0) { - carry = 0; - for (i = 0; i < size_w; ++i) { - carry += vk[i] + w0[i]; - vk[i] = carry & PyLong_MASK; - carry >>= PyLong_SHIFT; - } - --q; - } - - /* store quotient digit */ - assert(q < PyLong_BASE); - *--ak = q; - } - - /* unshift remainder; we reuse w to store the result */ - carry = v_rshift(w0, v0, size_w, d); - assert(carry==0); - Py_DECREF(v); - - *prem = long_normalize(w); - return long_normalize(a); -} - -/* For a nonzero PyLong a, express a in the form x * 2**e, with 0.5 <= - abs(x) < 1.0 and e >= 0; return x and put e in *e. Here x is - rounded to DBL_MANT_DIG significant bits using round-half-to-even. - If a == 0, return 0.0 and set *e = 0. If the resulting exponent - e is larger than PY_SSIZE_T_MAX, raise OverflowError and return - -1.0. */ - -/* attempt to define 2.0**DBL_MANT_DIG as a compile-time constant */ -#if DBL_MANT_DIG == 53 -#define EXP2_DBL_MANT_DIG 9007199254740992.0 -#else -#define EXP2_DBL_MANT_DIG (ldexp(1.0, DBL_MANT_DIG)) -#endif - -double -_PyLong_Frexp(PyLongObject *a, Py_ssize_t *e) -{ - Py_ssize_t a_size, a_bits, shift_digits, shift_bits, x_size; - /* See below for why x_digits is always large enough. */ - digit rem, x_digits[2 + (DBL_MANT_DIG + 1) / PyLong_SHIFT]; - double dx; - /* Correction term for round-half-to-even rounding. For a digit x, - "x + half_even_correction[x & 7]" gives x rounded to the nearest - multiple of 4, rounding ties to a multiple of 8. */ - static const int half_even_correction[8] = {0, -1, -2, 1, 0, -1, 2, 1}; - - a_size = ABS(Py_SIZE(a)); - if (a_size == 0) { - /* Special case for 0: significand 0.0, exponent 0. */ - *e = 0; - return 0.0; - } - a_bits = bits_in_digit(a->ob_digit[a_size-1]); - /* The following is an overflow-free version of the check - "if ((a_size - 1) * PyLong_SHIFT + a_bits > PY_SSIZE_T_MAX) ..." */ - if (a_size >= (PY_SSIZE_T_MAX - 1) / PyLong_SHIFT + 1 && - (a_size > (PY_SSIZE_T_MAX - 1) / PyLong_SHIFT + 1 || - a_bits > (PY_SSIZE_T_MAX - 1) % PyLong_SHIFT + 1)) - goto overflow; - a_bits = (a_size - 1) * PyLong_SHIFT + a_bits; - - /* Shift the first DBL_MANT_DIG + 2 bits of a into x_digits[0:x_size] - (shifting left if a_bits <= DBL_MANT_DIG + 2). - - Number of digits needed for result: write // for floor division. - Then if shifting left, we end up using - - 1 + a_size + (DBL_MANT_DIG + 2 - a_bits) // PyLong_SHIFT - - digits. If shifting right, we use - - a_size - (a_bits - DBL_MANT_DIG - 2) // PyLong_SHIFT - - digits. Using a_size = 1 + (a_bits - 1) // PyLong_SHIFT along with - the inequalities - - m // PyLong_SHIFT + n // PyLong_SHIFT <= (m + n) // PyLong_SHIFT - m // PyLong_SHIFT - n // PyLong_SHIFT <= - 1 + (m - n - 1) // PyLong_SHIFT, - - valid for any integers m and n, we find that x_size satisfies - - x_size <= 2 + (DBL_MANT_DIG + 1) // PyLong_SHIFT - - in both cases. - */ - if (a_bits <= DBL_MANT_DIG + 2) { - shift_digits = (DBL_MANT_DIG + 2 - a_bits) / PyLong_SHIFT; - shift_bits = (DBL_MANT_DIG + 2 - a_bits) % PyLong_SHIFT; - x_size = 0; - while (x_size < shift_digits) - x_digits[x_size++] = 0; - rem = v_lshift(x_digits + x_size, a->ob_digit, a_size, - (int)shift_bits); - x_size += a_size; - x_digits[x_size++] = rem; - } - else { - shift_digits = (a_bits - DBL_MANT_DIG - 2) / PyLong_SHIFT; - shift_bits = (a_bits - DBL_MANT_DIG - 2) % PyLong_SHIFT; - rem = v_rshift(x_digits, a->ob_digit + shift_digits, - a_size - shift_digits, (int)shift_bits); - x_size = a_size - shift_digits; - /* For correct rounding below, we need the least significant - bit of x to be 'sticky' for this shift: if any of the bits - shifted out was nonzero, we set the least significant bit - of x. */ - if (rem) - x_digits[0] |= 1; - else - while (shift_digits > 0) - if (a->ob_digit[--shift_digits]) { - x_digits[0] |= 1; - break; - } - } - assert(1 <= x_size && x_size <= (Py_ssize_t)Py_ARRAY_LENGTH(x_digits)); - - /* Round, and convert to double. */ - x_digits[0] += half_even_correction[x_digits[0] & 7]; - dx = x_digits[--x_size]; - while (x_size > 0) - dx = dx * PyLong_BASE + x_digits[--x_size]; - - /* Rescale; make correction if result is 1.0. */ - dx /= 4.0 * EXP2_DBL_MANT_DIG; - if (dx == 1.0) { - if (a_bits == PY_SSIZE_T_MAX) - goto overflow; - dx = 0.5; - a_bits += 1; - } - - *e = a_bits; - return Py_SIZE(a) < 0 ? -dx : dx; - - overflow: - /* exponent > PY_SSIZE_T_MAX */ - PyErr_SetString(PyExc_OverflowError, - "huge integer: number of bits overflows a Py_ssize_t"); - *e = 0; - return -1.0; -} - -/* Get a C double from a long int object. Rounds to the nearest double, - using the round-half-to-even rule in the case of a tie. */ - -double -PyLong_AsDouble(PyObject *v) -{ - Py_ssize_t exponent; - double x; - - if (v == NULL) { - PyErr_BadInternalCall(); - return -1.0; - } - if (!PyLong_Check(v)) { - PyErr_SetString(PyExc_TypeError, "an integer is required"); - return -1.0; - } - x = _PyLong_Frexp((PyLongObject *)v, &exponent); - if ((x == -1.0 && PyErr_Occurred()) || exponent > DBL_MAX_EXP) { - PyErr_SetString(PyExc_OverflowError, - "long int too large to convert to float"); - return -1.0; - } - return ldexp(x, (int)exponent); -} - -/* Methods */ - -static void -long_dealloc(PyObject *v) -{ - Py_TYPE(v)->tp_free(v); -} - -static int -long_compare(PyLongObject *a, PyLongObject *b) -{ - Py_ssize_t sign; - - if (Py_SIZE(a) != Py_SIZE(b)) { - sign = Py_SIZE(a) - Py_SIZE(b); - } - else { - Py_ssize_t i = ABS(Py_SIZE(a)); - while (--i >= 0 && a->ob_digit[i] == b->ob_digit[i]) - ; - if (i < 0) - sign = 0; - else { - sign = (sdigit)a->ob_digit[i] - (sdigit)b->ob_digit[i]; - if (Py_SIZE(a) < 0) - sign = -sign; - } - } - return sign < 0 ? -1 : sign > 0 ? 1 : 0; -} - -#define TEST_COND(cond) \ - ((cond) ? Py_True : Py_False) - -static PyObject * -long_richcompare(PyObject *self, PyObject *other, int op) -{ - int result; - PyObject *v; - CHECK_BINOP(self, other); - if (self == other) - result = 0; - else - result = long_compare((PyLongObject*)self, (PyLongObject*)other); - /* Convert the return value to a Boolean */ - switch (op) { - case Py_EQ: - v = TEST_COND(result == 0); - break; - case Py_NE: - v = TEST_COND(result != 0); - break; - case Py_LE: - v = TEST_COND(result <= 0); - break; - case Py_GE: - v = TEST_COND(result >= 0); - break; - case Py_LT: - v = TEST_COND(result == -1); - break; - case Py_GT: - v = TEST_COND(result == 1); - break; - default: - PyErr_BadArgument(); - return NULL; - } - Py_INCREF(v); - return v; -} - -static Py_hash_t -long_hash(PyLongObject *v) -{ - Py_uhash_t x; - Py_ssize_t i; - int sign; - - i = Py_SIZE(v); - switch(i) { - case -1: return v->ob_digit[0]==1 ? -2 : -(sdigit)v->ob_digit[0]; - case 0: return 0; - case 1: return v->ob_digit[0]; - } - sign = 1; - x = 0; - if (i < 0) { - sign = -1; - i = -(i); - } - while (--i >= 0) { - /* Here x is a quantity in the range [0, _PyHASH_MODULUS); we - want to compute x * 2**PyLong_SHIFT + v->ob_digit[i] modulo - _PyHASH_MODULUS. - - The computation of x * 2**PyLong_SHIFT % _PyHASH_MODULUS - amounts to a rotation of the bits of x. To see this, write - - x * 2**PyLong_SHIFT = y * 2**_PyHASH_BITS + z - - where y = x >> (_PyHASH_BITS - PyLong_SHIFT) gives the top - PyLong_SHIFT bits of x (those that are shifted out of the - original _PyHASH_BITS bits, and z = (x << PyLong_SHIFT) & - _PyHASH_MODULUS gives the bottom _PyHASH_BITS - PyLong_SHIFT - bits of x, shifted up. Then since 2**_PyHASH_BITS is - congruent to 1 modulo _PyHASH_MODULUS, y*2**_PyHASH_BITS is - congruent to y modulo _PyHASH_MODULUS. So - - x * 2**PyLong_SHIFT = y + z (mod _PyHASH_MODULUS). - - The right-hand side is just the result of rotating the - _PyHASH_BITS bits of x left by PyLong_SHIFT places; since - not all _PyHASH_BITS bits of x are 1s, the same is true - after rotation, so 0 <= y+z < _PyHASH_MODULUS and y + z is - the reduction of x*2**PyLong_SHIFT modulo - _PyHASH_MODULUS. */ - x = ((x << PyLong_SHIFT) & _PyHASH_MODULUS) | - (x >> (_PyHASH_BITS - PyLong_SHIFT)); - x += v->ob_digit[i]; - if (x >= _PyHASH_MODULUS) - x -= _PyHASH_MODULUS; - } - x = x * sign; - if (x == (Py_uhash_t)-1) - x = (Py_uhash_t)-2; - return (Py_hash_t)x; -} - - -/* Add the absolute values of two long integers. */ - -static PyLongObject * -x_add(PyLongObject *a, PyLongObject *b) -{ - Py_ssize_t size_a = ABS(Py_SIZE(a)), size_b = ABS(Py_SIZE(b)); - PyLongObject *z; - Py_ssize_t i; - digit carry = 0; - - /* Ensure a is the larger of the two: */ - if (size_a < size_b) { - { PyLongObject *temp = a; a = b; b = temp; } - { Py_ssize_t size_temp = size_a; - size_a = size_b; - size_b = size_temp; } - } - z = _PyLong_New(size_a+1); - if (z == NULL) - return NULL; - for (i = 0; i < size_b; ++i) { - carry += a->ob_digit[i] + b->ob_digit[i]; - z->ob_digit[i] = carry & PyLong_MASK; - carry >>= PyLong_SHIFT; - } - for (; i < size_a; ++i) { - carry += a->ob_digit[i]; - z->ob_digit[i] = carry & PyLong_MASK; - carry >>= PyLong_SHIFT; - } - z->ob_digit[i] = carry; - return long_normalize(z); -} - -/* Subtract the absolute values of two integers. */ - -static PyLongObject * -x_sub(PyLongObject *a, PyLongObject *b) -{ - Py_ssize_t size_a = ABS(Py_SIZE(a)), size_b = ABS(Py_SIZE(b)); - PyLongObject *z; - Py_ssize_t i; - int sign = 1; - digit borrow = 0; - - /* Ensure a is the larger of the two: */ - if (size_a < size_b) { - sign = -1; - { PyLongObject *temp = a; a = b; b = temp; } - { Py_ssize_t size_temp = size_a; - size_a = size_b; - size_b = size_temp; } - } - else if (size_a == size_b) { - /* Find highest digit where a and b differ: */ - i = size_a; - while (--i >= 0 && a->ob_digit[i] == b->ob_digit[i]) - ; - if (i < 0) - return (PyLongObject *)PyLong_FromLong(0); - if (a->ob_digit[i] < b->ob_digit[i]) { - sign = -1; - { PyLongObject *temp = a; a = b; b = temp; } - } - size_a = size_b = i+1; - } - z = _PyLong_New(size_a); - if (z == NULL) - return NULL; - for (i = 0; i < size_b; ++i) { - /* The following assumes unsigned arithmetic - works module 2**N for some N>PyLong_SHIFT. */ - borrow = a->ob_digit[i] - b->ob_digit[i] - borrow; - z->ob_digit[i] = borrow & PyLong_MASK; - borrow >>= PyLong_SHIFT; - borrow &= 1; /* Keep only one sign bit */ - } - for (; i < size_a; ++i) { - borrow = a->ob_digit[i] - borrow; - z->ob_digit[i] = borrow & PyLong_MASK; - borrow >>= PyLong_SHIFT; - borrow &= 1; /* Keep only one sign bit */ - } - assert(borrow == 0); - if (sign < 0) - NEGATE(z); - return long_normalize(z); -} - -static PyObject * -long_add(PyLongObject *a, PyLongObject *b) -{ - PyLongObject *z; - - CHECK_BINOP(a, b); - - if (ABS(Py_SIZE(a)) <= 1 && ABS(Py_SIZE(b)) <= 1) { - PyObject *result = PyLong_FromLong(MEDIUM_VALUE(a) + - MEDIUM_VALUE(b)); - return result; - } - if (Py_SIZE(a) < 0) { - if (Py_SIZE(b) < 0) { - z = x_add(a, b); - if (z != NULL && Py_SIZE(z) != 0) - Py_SIZE(z) = -(Py_SIZE(z)); - } - else - z = x_sub(b, a); - } - else { - if (Py_SIZE(b) < 0) - z = x_sub(a, b); - else - z = x_add(a, b); - } - return (PyObject *)z; -} - -static PyObject * -long_sub(PyLongObject *a, PyLongObject *b) -{ - PyLongObject *z; - - CHECK_BINOP(a, b); - - if (ABS(Py_SIZE(a)) <= 1 && ABS(Py_SIZE(b)) <= 1) { - PyObject* r; - r = PyLong_FromLong(MEDIUM_VALUE(a)-MEDIUM_VALUE(b)); - return r; - } - if (Py_SIZE(a) < 0) { - if (Py_SIZE(b) < 0) - z = x_sub(a, b); - else - z = x_add(a, b); - if (z != NULL && Py_SIZE(z) != 0) - Py_SIZE(z) = -(Py_SIZE(z)); - } - else { - if (Py_SIZE(b) < 0) - z = x_add(a, b); - else - z = x_sub(a, b); - } - return (PyObject *)z; -} - -/* Grade school multiplication, ignoring the signs. - * Returns the absolute value of the product, or NULL if error. - */ -static PyLongObject * -x_mul(PyLongObject *a, PyLongObject *b) -{ - PyLongObject *z; - Py_ssize_t size_a = ABS(Py_SIZE(a)); - Py_ssize_t size_b = ABS(Py_SIZE(b)); - Py_ssize_t i; - - z = _PyLong_New(size_a + size_b); - if (z == NULL) - return NULL; - - memset(z->ob_digit, 0, Py_SIZE(z) * sizeof(digit)); - if (a == b) { - /* Efficient squaring per HAC, Algorithm 14.16: - * http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf - * Gives slightly less than a 2x speedup when a == b, - * via exploiting that each entry in the multiplication - * pyramid appears twice (except for the size_a squares). - */ - for (i = 0; i < size_a; ++i) { - twodigits carry; - twodigits f = a->ob_digit[i]; - digit *pz = z->ob_digit + (i << 1); - digit *pa = a->ob_digit + i + 1; - digit *paend = a->ob_digit + size_a; - - SIGCHECK({ - Py_DECREF(z); - return NULL; - }); - - carry = *pz + f * f; - *pz++ = (digit)(carry & PyLong_MASK); - carry >>= PyLong_SHIFT; - assert(carry <= PyLong_MASK); - - /* Now f is added in twice in each column of the - * pyramid it appears. Same as adding f<<1 once. - */ - f <<= 1; - while (pa < paend) { - carry += *pz + *pa++ * f; - *pz++ = (digit)(carry & PyLong_MASK); - carry >>= PyLong_SHIFT; - assert(carry <= (PyLong_MASK << 1)); - } - if (carry) { - carry += *pz; - *pz++ = (digit)(carry & PyLong_MASK); - carry >>= PyLong_SHIFT; - } - if (carry) - *pz += (digit)(carry & PyLong_MASK); - assert((carry >> PyLong_SHIFT) == 0); - } - } - else { /* a is not the same as b -- gradeschool long mult */ - for (i = 0; i < size_a; ++i) { - twodigits carry = 0; - twodigits f = a->ob_digit[i]; - digit *pz = z->ob_digit + i; - digit *pb = b->ob_digit; - digit *pbend = b->ob_digit + size_b; - - SIGCHECK({ - Py_DECREF(z); - return NULL; - }); - - while (pb < pbend) { - carry += *pz + *pb++ * f; - *pz++ = (digit)(carry & PyLong_MASK); - carry >>= PyLong_SHIFT; - assert(carry <= PyLong_MASK); - } - if (carry) - *pz += (digit)(carry & PyLong_MASK); - assert((carry >> PyLong_SHIFT) == 0); - } - } - return long_normalize(z); -} - -/* A helper for Karatsuba multiplication (k_mul). - Takes a long "n" and an integer "size" representing the place to - split, and sets low and high such that abs(n) == (high << size) + low, - viewing the shift as being by digits. The sign bit is ignored, and - the return values are >= 0. - Returns 0 on success, -1 on failure. -*/ -static int -kmul_split(PyLongObject *n, - Py_ssize_t size, - PyLongObject **high, - PyLongObject **low) -{ - PyLongObject *hi, *lo; - Py_ssize_t size_lo, size_hi; - const Py_ssize_t size_n = ABS(Py_SIZE(n)); - - size_lo = MIN(size_n, size); - size_hi = size_n - size_lo; - - if ((hi = _PyLong_New(size_hi)) == NULL) - return -1; - if ((lo = _PyLong_New(size_lo)) == NULL) { - Py_DECREF(hi); - return -1; - } - - memcpy(lo->ob_digit, n->ob_digit, size_lo * sizeof(digit)); - memcpy(hi->ob_digit, n->ob_digit + size_lo, size_hi * sizeof(digit)); - - *high = long_normalize(hi); - *low = long_normalize(lo); - return 0; -} - -static PyLongObject *k_lopsided_mul(PyLongObject *a, PyLongObject *b); - -/* Karatsuba multiplication. Ignores the input signs, and returns the - * absolute value of the product (or NULL if error). - * See Knuth Vol. 2 Chapter 4.3.3 (Pp. 294-295). - */ -static PyLongObject * -k_mul(PyLongObject *a, PyLongObject *b) -{ - Py_ssize_t asize = ABS(Py_SIZE(a)); - Py_ssize_t bsize = ABS(Py_SIZE(b)); - PyLongObject *ah = NULL; - PyLongObject *al = NULL; - PyLongObject *bh = NULL; - PyLongObject *bl = NULL; - PyLongObject *ret = NULL; - PyLongObject *t1, *t2, *t3; - Py_ssize_t shift; /* the number of digits we split off */ - Py_ssize_t i; - - /* (ah*X+al)(bh*X+bl) = ah*bh*X*X + (ah*bl + al*bh)*X + al*bl - * Let k = (ah+al)*(bh+bl) = ah*bl + al*bh + ah*bh + al*bl - * Then the original product is - * ah*bh*X*X + (k - ah*bh - al*bl)*X + al*bl - * By picking X to be a power of 2, "*X" is just shifting, and it's - * been reduced to 3 multiplies on numbers half the size. - */ - - /* We want to split based on the larger number; fiddle so that b - * is largest. - */ - if (asize > bsize) { - t1 = a; - a = b; - b = t1; - - i = asize; - asize = bsize; - bsize = i; - } - - /* Use gradeschool math when either number is too small. */ - i = a == b ? KARATSUBA_SQUARE_CUTOFF : KARATSUBA_CUTOFF; - if (asize <= i) { - if (asize == 0) - return (PyLongObject *)PyLong_FromLong(0); - else - return x_mul(a, b); - } - - /* If a is small compared to b, splitting on b gives a degenerate - * case with ah==0, and Karatsuba may be (even much) less efficient - * than "grade school" then. However, we can still win, by viewing - * b as a string of "big digits", each of width a->ob_size. That - * leads to a sequence of balanced calls to k_mul. - */ - if (2 * asize <= bsize) - return k_lopsided_mul(a, b); - - /* Split a & b into hi & lo pieces. */ - shift = bsize >> 1; - if (kmul_split(a, shift, &ah, &al) < 0) goto fail; - assert(Py_SIZE(ah) > 0); /* the split isn't degenerate */ - - if (a == b) { - bh = ah; - bl = al; - Py_INCREF(bh); - Py_INCREF(bl); - } - else if (kmul_split(b, shift, &bh, &bl) < 0) goto fail; - - /* The plan: - * 1. Allocate result space (asize + bsize digits: that's always - * enough). - * 2. Compute ah*bh, and copy into result at 2*shift. - * 3. Compute al*bl, and copy into result at 0. Note that this - * can't overlap with #2. - * 4. Subtract al*bl from the result, starting at shift. This may - * underflow (borrow out of the high digit), but we don't care: - * we're effectively doing unsigned arithmetic mod - * BASE**(sizea + sizeb), and so long as the *final* result fits, - * borrows and carries out of the high digit can be ignored. - * 5. Subtract ah*bh from the result, starting at shift. - * 6. Compute (ah+al)*(bh+bl), and add it into the result starting - * at shift. - */ - - /* 1. Allocate result space. */ - ret = _PyLong_New(asize + bsize); - if (ret == NULL) goto fail; -#ifdef Py_DEBUG - /* Fill with trash, to catch reference to uninitialized digits. */ - memset(ret->ob_digit, 0xDF, Py_SIZE(ret) * sizeof(digit)); -#endif - - /* 2. t1 <- ah*bh, and copy into high digits of result. */ - if ((t1 = k_mul(ah, bh)) == NULL) goto fail; - assert(Py_SIZE(t1) >= 0); - assert(2*shift + Py_SIZE(t1) <= Py_SIZE(ret)); - memcpy(ret->ob_digit + 2*shift, t1->ob_digit, - Py_SIZE(t1) * sizeof(digit)); - - /* Zero-out the digits higher than the ah*bh copy. */ - i = Py_SIZE(ret) - 2*shift - Py_SIZE(t1); - if (i) - memset(ret->ob_digit + 2*shift + Py_SIZE(t1), 0, - i * sizeof(digit)); - - /* 3. t2 <- al*bl, and copy into the low digits. */ - if ((t2 = k_mul(al, bl)) == NULL) { - Py_DECREF(t1); - goto fail; - } - assert(Py_SIZE(t2) >= 0); - assert(Py_SIZE(t2) <= 2*shift); /* no overlap with high digits */ - memcpy(ret->ob_digit, t2->ob_digit, Py_SIZE(t2) * sizeof(digit)); - - /* Zero out remaining digits. */ - i = 2*shift - Py_SIZE(t2); /* number of uninitialized digits */ - if (i) - memset(ret->ob_digit + Py_SIZE(t2), 0, i * sizeof(digit)); - - /* 4 & 5. Subtract ah*bh (t1) and al*bl (t2). We do al*bl first - * because it's fresher in cache. - */ - i = Py_SIZE(ret) - shift; /* # digits after shift */ - (void)v_isub(ret->ob_digit + shift, i, t2->ob_digit, Py_SIZE(t2)); - Py_DECREF(t2); - - (void)v_isub(ret->ob_digit + shift, i, t1->ob_digit, Py_SIZE(t1)); - Py_DECREF(t1); - - /* 6. t3 <- (ah+al)(bh+bl), and add into result. */ - if ((t1 = x_add(ah, al)) == NULL) goto fail; - Py_DECREF(ah); - Py_DECREF(al); - ah = al = NULL; - - if (a == b) { - t2 = t1; - Py_INCREF(t2); - } - else if ((t2 = x_add(bh, bl)) == NULL) { - Py_DECREF(t1); - goto fail; - } - Py_DECREF(bh); - Py_DECREF(bl); - bh = bl = NULL; - - t3 = k_mul(t1, t2); - Py_DECREF(t1); - Py_DECREF(t2); - if (t3 == NULL) goto fail; - assert(Py_SIZE(t3) >= 0); - - /* Add t3. It's not obvious why we can't run out of room here. - * See the (*) comment after this function. - */ - (void)v_iadd(ret->ob_digit + shift, i, t3->ob_digit, Py_SIZE(t3)); - Py_DECREF(t3); - - return long_normalize(ret); - - fail: - Py_XDECREF(ret); - Py_XDECREF(ah); - Py_XDECREF(al); - Py_XDECREF(bh); - Py_XDECREF(bl); - return NULL; -} - -/* (*) Why adding t3 can't "run out of room" above. - -Let f(x) mean the floor of x and c(x) mean the ceiling of x. Some facts -to start with: - -1. For any integer i, i = c(i/2) + f(i/2). In particular, - bsize = c(bsize/2) + f(bsize/2). -2. shift = f(bsize/2) -3. asize <= bsize -4. Since we call k_lopsided_mul if asize*2 <= bsize, asize*2 > bsize in this - routine, so asize > bsize/2 >= f(bsize/2) in this routine. - -We allocated asize + bsize result digits, and add t3 into them at an offset -of shift. This leaves asize+bsize-shift allocated digit positions for t3 -to fit into, = (by #1 and #2) asize + f(bsize/2) + c(bsize/2) - f(bsize/2) = -asize + c(bsize/2) available digit positions. - -bh has c(bsize/2) digits, and bl at most f(size/2) digits. So bh+hl has -at most c(bsize/2) digits + 1 bit. - -If asize == bsize, ah has c(bsize/2) digits, else ah has at most f(bsize/2) -digits, and al has at most f(bsize/2) digits in any case. So ah+al has at -most (asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 1 bit. - -The product (ah+al)*(bh+bl) therefore has at most - - c(bsize/2) + (asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 2 bits - -and we have asize + c(bsize/2) available digit positions. We need to show -this is always enough. An instance of c(bsize/2) cancels out in both, so -the question reduces to whether asize digits is enough to hold -(asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 2 bits. If asize < bsize, -then we're asking whether asize digits >= f(bsize/2) digits + 2 bits. By #4, -asize is at least f(bsize/2)+1 digits, so this in turn reduces to whether 1 -digit is enough to hold 2 bits. This is so since PyLong_SHIFT=15 >= 2. If -asize == bsize, then we're asking whether bsize digits is enough to hold -c(bsize/2) digits + 2 bits, or equivalently (by #1) whether f(bsize/2) digits -is enough to hold 2 bits. This is so if bsize >= 2, which holds because -bsize >= KARATSUBA_CUTOFF >= 2. - -Note that since there's always enough room for (ah+al)*(bh+bl), and that's -clearly >= each of ah*bh and al*bl, there's always enough room to subtract -ah*bh and al*bl too. -*/ - -/* b has at least twice the digits of a, and a is big enough that Karatsuba - * would pay off *if* the inputs had balanced sizes. View b as a sequence - * of slices, each with a->ob_size digits, and multiply the slices by a, - * one at a time. This gives k_mul balanced inputs to work with, and is - * also cache-friendly (we compute one double-width slice of the result - * at a time, then move on, never backtracking except for the helpful - * single-width slice overlap between successive partial sums). - */ -static PyLongObject * -k_lopsided_mul(PyLongObject *a, PyLongObject *b) -{ - const Py_ssize_t asize = ABS(Py_SIZE(a)); - Py_ssize_t bsize = ABS(Py_SIZE(b)); - Py_ssize_t nbdone; /* # of b digits already multiplied */ - PyLongObject *ret; - PyLongObject *bslice = NULL; - - assert(asize > KARATSUBA_CUTOFF); - assert(2 * asize <= bsize); - - /* Allocate result space, and zero it out. */ - ret = _PyLong_New(asize + bsize); - if (ret == NULL) - return NULL; - memset(ret->ob_digit, 0, Py_SIZE(ret) * sizeof(digit)); - - /* Successive slices of b are copied into bslice. */ - bslice = _PyLong_New(asize); - if (bslice == NULL) - goto fail; - - nbdone = 0; - while (bsize > 0) { - PyLongObject *product; - const Py_ssize_t nbtouse = MIN(bsize, asize); - - /* Multiply the next slice of b by a. */ - memcpy(bslice->ob_digit, b->ob_digit + nbdone, - nbtouse * sizeof(digit)); - Py_SIZE(bslice) = nbtouse; - product = k_mul(a, bslice); - if (product == NULL) - goto fail; - - /* Add into result. */ - (void)v_iadd(ret->ob_digit + nbdone, Py_SIZE(ret) - nbdone, - product->ob_digit, Py_SIZE(product)); - Py_DECREF(product); - - bsize -= nbtouse; - nbdone += nbtouse; - } - - Py_DECREF(bslice); - return long_normalize(ret); - - fail: - Py_DECREF(ret); - Py_XDECREF(bslice); - return NULL; -} - -static PyObject * -long_mul(PyLongObject *a, PyLongObject *b) -{ - PyLongObject *z; - - CHECK_BINOP(a, b); - - /* fast path for single-digit multiplication */ - if (ABS(Py_SIZE(a)) <= 1 && ABS(Py_SIZE(b)) <= 1) { - stwodigits v = (stwodigits)(MEDIUM_VALUE(a)) * MEDIUM_VALUE(b); -#ifdef HAVE_LONG_LONG - return PyLong_FromLongLong((PY_LONG_LONG)v); -#else - /* if we don't have long long then we're almost certainly - using 15-bit digits, so v will fit in a long. In the - unlikely event that we're using 30-bit digits on a platform - without long long, a large v will just cause us to fall - through to the general multiplication code below. */ - if (v >= LONG_MIN && v <= LONG_MAX) - return PyLong_FromLong((long)v); -#endif - } - - z = k_mul(a, b); - /* Negate if exactly one of the inputs is negative. */ - if (((Py_SIZE(a) ^ Py_SIZE(b)) < 0) && z) - NEGATE(z); - return (PyObject *)z; -} - -/* The / and % operators are now defined in terms of divmod(). - The expression a mod b has the value a - b*floor(a/b). - The long_divrem function gives the remainder after division of - |a| by |b|, with the sign of a. This is also expressed - as a - b*trunc(a/b), if trunc truncates towards zero. - Some examples: - a b a rem b a mod b - 13 10 3 3 - -13 10 -3 7 - 13 -10 3 -7 - -13 -10 -3 -3 - So, to get from rem to mod, we have to add b if a and b - have different signs. We then subtract one from the 'div' - part of the outcome to keep the invariant intact. */ - -/* Compute - * *pdiv, *pmod = divmod(v, w) - * NULL can be passed for pdiv or pmod, in which case that part of - * the result is simply thrown away. The caller owns a reference to - * each of these it requests (does not pass NULL for). - */ -static int -l_divmod(PyLongObject *v, PyLongObject *w, - PyLongObject **pdiv, PyLongObject **pmod) -{ - PyLongObject *div, *mod; - - if (long_divrem(v, w, &div, &mod) < 0) - return -1; - if ((Py_SIZE(mod) < 0 && Py_SIZE(w) > 0) || - (Py_SIZE(mod) > 0 && Py_SIZE(w) < 0)) { - PyLongObject *temp; - PyLongObject *one; - temp = (PyLongObject *) long_add(mod, w); - Py_DECREF(mod); - mod = temp; - if (mod == NULL) { - Py_DECREF(div); - return -1; - } - one = (PyLongObject *) PyLong_FromLong(1L); - if (one == NULL || - (temp = (PyLongObject *) long_sub(div, one)) == NULL) { - Py_DECREF(mod); - Py_DECREF(div); - Py_XDECREF(one); - return -1; - } - Py_DECREF(one); - Py_DECREF(div); - div = temp; - } - if (pdiv != NULL) - *pdiv = div; - else - Py_DECREF(div); - - if (pmod != NULL) - *pmod = mod; - else - Py_DECREF(mod); - - return 0; -} - -static PyObject * -long_div(PyObject *a, PyObject *b) -{ - PyLongObject *div; - - CHECK_BINOP(a, b); - if (l_divmod((PyLongObject*)a, (PyLongObject*)b, &div, NULL) < 0) - div = NULL; - return (PyObject *)div; -} - -/* PyLong/PyLong -> float, with correctly rounded result. */ - -#define MANT_DIG_DIGITS (DBL_MANT_DIG / PyLong_SHIFT) -#define MANT_DIG_BITS (DBL_MANT_DIG % PyLong_SHIFT) - -static PyObject * -long_true_divide(PyObject *v, PyObject *w) -{ - PyLongObject *a, *b, *x; - Py_ssize_t a_size, b_size, shift, extra_bits, diff, x_size, x_bits; - digit mask, low; - int inexact, negate, a_is_small, b_is_small; - double dx, result; - - CHECK_BINOP(v, w); - a = (PyLongObject *)v; - b = (PyLongObject *)w; - - /* - Method in a nutshell: - - 0. reduce to case a, b > 0; filter out obvious underflow/overflow - 1. choose a suitable integer 'shift' - 2. use integer arithmetic to compute x = floor(2**-shift*a/b) - 3. adjust x for correct rounding - 4. convert x to a double dx with the same value - 5. return ldexp(dx, shift). - - In more detail: - - 0. For any a, a/0 raises ZeroDivisionError; for nonzero b, 0/b - returns either 0.0 or -0.0, depending on the sign of b. For a and - b both nonzero, ignore signs of a and b, and add the sign back in - at the end. Now write a_bits and b_bits for the bit lengths of a - and b respectively (that is, a_bits = 1 + floor(log_2(a)); likewise - for b). Then - - 2**(a_bits - b_bits - 1) < a/b < 2**(a_bits - b_bits + 1). - - So if a_bits - b_bits > DBL_MAX_EXP then a/b > 2**DBL_MAX_EXP and - so overflows. Similarly, if a_bits - b_bits < DBL_MIN_EXP - - DBL_MANT_DIG - 1 then a/b underflows to 0. With these cases out of - the way, we can assume that - - DBL_MIN_EXP - DBL_MANT_DIG - 1 <= a_bits - b_bits <= DBL_MAX_EXP. - - 1. The integer 'shift' is chosen so that x has the right number of - bits for a double, plus two or three extra bits that will be used - in the rounding decisions. Writing a_bits and b_bits for the - number of significant bits in a and b respectively, a - straightforward formula for shift is: - - shift = a_bits - b_bits - DBL_MANT_DIG - 2 - - This is fine in the usual case, but if a/b is smaller than the - smallest normal float then it can lead to double rounding on an - IEEE 754 platform, giving incorrectly rounded results. So we - adjust the formula slightly. The actual formula used is: - - shift = MAX(a_bits - b_bits, DBL_MIN_EXP) - DBL_MANT_DIG - 2 - - 2. The quantity x is computed by first shifting a (left -shift bits - if shift <= 0, right shift bits if shift > 0) and then dividing by - b. For both the shift and the division, we keep track of whether - the result is inexact, in a flag 'inexact'; this information is - needed at the rounding stage. - - With the choice of shift above, together with our assumption that - a_bits - b_bits >= DBL_MIN_EXP - DBL_MANT_DIG - 1, it follows - that x >= 1. - - 3. Now x * 2**shift <= a/b < (x+1) * 2**shift. We want to replace - this with an exactly representable float of the form - - round(x/2**extra_bits) * 2**(extra_bits+shift). - - For float representability, we need x/2**extra_bits < - 2**DBL_MANT_DIG and extra_bits + shift >= DBL_MIN_EXP - - DBL_MANT_DIG. This translates to the condition: - - extra_bits >= MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG - - To round, we just modify the bottom digit of x in-place; this can - end up giving a digit with value > PyLONG_MASK, but that's not a - problem since digits can hold values up to 2*PyLONG_MASK+1. - - With the original choices for shift above, extra_bits will always - be 2 or 3. Then rounding under the round-half-to-even rule, we - round up iff the most significant of the extra bits is 1, and - either: (a) the computation of x in step 2 had an inexact result, - or (b) at least one other of the extra bits is 1, or (c) the least - significant bit of x (above those to be rounded) is 1. - - 4. Conversion to a double is straightforward; all floating-point - operations involved in the conversion are exact, so there's no - danger of rounding errors. - - 5. Use ldexp(x, shift) to compute x*2**shift, the final result. - The result will always be exactly representable as a double, except - in the case that it overflows. To avoid dependence on the exact - behaviour of ldexp on overflow, we check for overflow before - applying ldexp. The result of ldexp is adjusted for sign before - returning. - */ - - /* Reduce to case where a and b are both positive. */ - a_size = ABS(Py_SIZE(a)); - b_size = ABS(Py_SIZE(b)); - negate = (Py_SIZE(a) < 0) ^ (Py_SIZE(b) < 0); - if (b_size == 0) { - PyErr_SetString(PyExc_ZeroDivisionError, - "division by zero"); - goto error; - } - if (a_size == 0) - goto underflow_or_zero; - - /* Fast path for a and b small (exactly representable in a double). - Relies on floating-point division being correctly rounded; results - may be subject to double rounding on x86 machines that operate with - the x87 FPU set to 64-bit precision. */ - a_is_small = a_size <= MANT_DIG_DIGITS || - (a_size == MANT_DIG_DIGITS+1 && - a->ob_digit[MANT_DIG_DIGITS] >> MANT_DIG_BITS == 0); - b_is_small = b_size <= MANT_DIG_DIGITS || - (b_size == MANT_DIG_DIGITS+1 && - b->ob_digit[MANT_DIG_DIGITS] >> MANT_DIG_BITS == 0); - if (a_is_small && b_is_small) { - double da, db; - da = a->ob_digit[--a_size]; - while (a_size > 0) - da = da * PyLong_BASE + a->ob_digit[--a_size]; - db = b->ob_digit[--b_size]; - while (b_size > 0) - db = db * PyLong_BASE + b->ob_digit[--b_size]; - result = da / db; - goto success; - } - - /* Catch obvious cases of underflow and overflow */ - diff = a_size - b_size; - if (diff > PY_SSIZE_T_MAX/PyLong_SHIFT - 1) - /* Extreme overflow */ - goto overflow; - else if (diff < 1 - PY_SSIZE_T_MAX/PyLong_SHIFT) - /* Extreme underflow */ - goto underflow_or_zero; - /* Next line is now safe from overflowing a Py_ssize_t */ - diff = diff * PyLong_SHIFT + bits_in_digit(a->ob_digit[a_size - 1]) - - bits_in_digit(b->ob_digit[b_size - 1]); - /* Now diff = a_bits - b_bits. */ - if (diff > DBL_MAX_EXP) - goto overflow; - else if (diff < DBL_MIN_EXP - DBL_MANT_DIG - 1) - goto underflow_or_zero; - - /* Choose value for shift; see comments for step 1 above. */ - shift = MAX(diff, DBL_MIN_EXP) - DBL_MANT_DIG - 2; - - inexact = 0; - - /* x = abs(a * 2**-shift) */ - if (shift <= 0) { - Py_ssize_t i, shift_digits = -shift / PyLong_SHIFT; - digit rem; - /* x = a << -shift */ - if (a_size >= PY_SSIZE_T_MAX - 1 - shift_digits) { - /* In practice, it's probably impossible to end up - here. Both a and b would have to be enormous, - using close to SIZE_T_MAX bytes of memory each. */ - PyErr_SetString(PyExc_OverflowError, - "intermediate overflow during division"); - goto error; - } - x = _PyLong_New(a_size + shift_digits + 1); - if (x == NULL) - goto error; - for (i = 0; i < shift_digits; i++) - x->ob_digit[i] = 0; - rem = v_lshift(x->ob_digit + shift_digits, a->ob_digit, - a_size, -shift % PyLong_SHIFT); - x->ob_digit[a_size + shift_digits] = rem; - } - else { - Py_ssize_t shift_digits = shift / PyLong_SHIFT; - digit rem; - /* x = a >> shift */ - assert(a_size >= shift_digits); - x = _PyLong_New(a_size - shift_digits); - if (x == NULL) - goto error; - rem = v_rshift(x->ob_digit, a->ob_digit + shift_digits, - a_size - shift_digits, shift % PyLong_SHIFT); - /* set inexact if any of the bits shifted out is nonzero */ - if (rem) - inexact = 1; - while (!inexact && shift_digits > 0) - if (a->ob_digit[--shift_digits]) - inexact = 1; - } - long_normalize(x); - x_size = Py_SIZE(x); - - /* x //= b. If the remainder is nonzero, set inexact. We own the only - reference to x, so it's safe to modify it in-place. */ - if (b_size == 1) { - digit rem = inplace_divrem1(x->ob_digit, x->ob_digit, x_size, - b->ob_digit[0]); - long_normalize(x); - if (rem) - inexact = 1; - } - else { - PyLongObject *div, *rem; - div = x_divrem(x, b, &rem); - Py_DECREF(x); - x = div; - if (x == NULL) - goto error; - if (Py_SIZE(rem)) - inexact = 1; - Py_DECREF(rem); - } - x_size = ABS(Py_SIZE(x)); - assert(x_size > 0); /* result of division is never zero */ - x_bits = (x_size-1)*PyLong_SHIFT+bits_in_digit(x->ob_digit[x_size-1]); - - /* The number of extra bits that have to be rounded away. */ - extra_bits = MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG; - assert(extra_bits == 2 || extra_bits == 3); - - /* Round by directly modifying the low digit of x. */ - mask = (digit)1 << (extra_bits - 1); - low = x->ob_digit[0] | inexact; - if (low & mask && low & (3*mask-1)) - low += mask; - x->ob_digit[0] = low & ~(mask-1U); - - /* Convert x to a double dx; the conversion is exact. */ - dx = x->ob_digit[--x_size]; - while (x_size > 0) - dx = dx * PyLong_BASE + x->ob_digit[--x_size]; - Py_DECREF(x); - - /* Check whether ldexp result will overflow a double. */ - if (shift + x_bits >= DBL_MAX_EXP && - (shift + x_bits > DBL_MAX_EXP || dx == ldexp(1.0, (int)x_bits))) - goto overflow; - result = ldexp(dx, (int)shift); - - success: - return PyFloat_FromDouble(negate ? -result : result); - - underflow_or_zero: - return PyFloat_FromDouble(negate ? -0.0 : 0.0); - - overflow: - PyErr_SetString(PyExc_OverflowError, - "integer division result too large for a float"); - error: - return NULL; -} - -static PyObject * -long_mod(PyObject *a, PyObject *b) -{ - PyLongObject *mod; - - CHECK_BINOP(a, b); - - if (l_divmod((PyLongObject*)a, (PyLongObject*)b, NULL, &mod) < 0) - mod = NULL; - return (PyObject *)mod; -} - -static PyObject * -long_divmod(PyObject *a, PyObject *b) -{ - PyLongObject *div, *mod; - PyObject *z; - - CHECK_BINOP(a, b); - - if (l_divmod((PyLongObject*)a, (PyLongObject*)b, &div, &mod) < 0) { - return NULL; - } - z = PyTuple_New(2); - if (z != NULL) { - PyTuple_SetItem(z, 0, (PyObject *) div); - PyTuple_SetItem(z, 1, (PyObject *) mod); - } - else { - Py_DECREF(div); - Py_DECREF(mod); - } - return z; -} - -/* pow(v, w, x) */ -static PyObject * -long_pow(PyObject *v, PyObject *w, PyObject *x) -{ - PyLongObject *a, *b, *c; /* a,b,c = v,w,x */ - int negativeOutput = 0; /* if x<0 return negative output */ - - PyLongObject *z = NULL; /* accumulated result */ - Py_ssize_t i, j, k; /* counters */ - PyLongObject *temp = NULL; - - /* 5-ary values. If the exponent is large enough, table is - * precomputed so that table[i] == a**i % c for i in range(32). - */ - PyLongObject *table[32] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - - /* a, b, c = v, w, x */ - CHECK_BINOP(v, w); - a = (PyLongObject*)v; Py_INCREF(a); - b = (PyLongObject*)w; Py_INCREF(b); - if (PyLong_Check(x)) { - c = (PyLongObject *)x; - Py_INCREF(x); - } - else if (x == Py_None) - c = NULL; - else { - Py_DECREF(a); - Py_DECREF(b); - Py_RETURN_NOTIMPLEMENTED; - } - - if (Py_SIZE(b) < 0) { /* if exponent is negative */ - if (c) { - PyErr_SetString(PyExc_TypeError, "pow() 2nd argument " - "cannot be negative when 3rd argument specified"); - goto Error; - } - else { - /* else return a float. This works because we know - that this calls float_pow() which converts its - arguments to double. */ - Py_DECREF(a); - Py_DECREF(b); - return PyFloat_Type.tp_as_number->nb_power(v, w, x); - } - } - - if (c) { - /* if modulus == 0: - raise ValueError() */ - if (Py_SIZE(c) == 0) { - PyErr_SetString(PyExc_ValueError, - "pow() 3rd argument cannot be 0"); - goto Error; - } - - /* if modulus < 0: - negativeOutput = True - modulus = -modulus */ - if (Py_SIZE(c) < 0) { - negativeOutput = 1; - temp = (PyLongObject *)_PyLong_Copy(c); - if (temp == NULL) - goto Error; - Py_DECREF(c); - c = temp; - temp = NULL; - NEGATE(c); - } - - /* if modulus == 1: - return 0 */ - if ((Py_SIZE(c) == 1) && (c->ob_digit[0] == 1)) { - z = (PyLongObject *)PyLong_FromLong(0L); - goto Done; - } - - /* if base < 0: - base = base % modulus - Having the base positive just makes things easier. */ - if (Py_SIZE(a) < 0) { - if (l_divmod(a, c, NULL, &temp) < 0) - goto Error; - Py_DECREF(a); - a = temp; - temp = NULL; - } - } - - /* At this point a, b, and c are guaranteed non-negative UNLESS - c is NULL, in which case a may be negative. */ - - z = (PyLongObject *)PyLong_FromLong(1L); - if (z == NULL) - goto Error; - - /* Perform a modular reduction, X = X % c, but leave X alone if c - * is NULL. - */ -#define REDUCE(X) \ - do { \ - if (c != NULL) { \ - if (l_divmod(X, c, NULL, &temp) < 0) \ - goto Error; \ - Py_XDECREF(X); \ - X = temp; \ - temp = NULL; \ - } \ - } while(0) - - /* Multiply two values, then reduce the result: - result = X*Y % c. If c is NULL, skip the mod. */ -#define MULT(X, Y, result) \ - do { \ - temp = (PyLongObject *)long_mul(X, Y); \ - if (temp == NULL) \ - goto Error; \ - Py_XDECREF(result); \ - result = temp; \ - temp = NULL; \ - REDUCE(result); \ - } while(0) - - if (Py_SIZE(b) <= FIVEARY_CUTOFF) { - /* Left-to-right binary exponentiation (HAC Algorithm 14.79) */ - /* http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf */ - for (i = Py_SIZE(b) - 1; i >= 0; --i) { - digit bi = b->ob_digit[i]; - - for (j = (digit)1 << (PyLong_SHIFT-1); j != 0; j >>= 1) { - MULT(z, z, z); - if (bi & j) - MULT(z, a, z); - } - } - } - else { - /* Left-to-right 5-ary exponentiation (HAC Algorithm 14.82) */ - Py_INCREF(z); /* still holds 1L */ - table[0] = z; - for (i = 1; i < 32; ++i) - MULT(table[i-1], a, table[i]); - - for (i = Py_SIZE(b) - 1; i >= 0; --i) { - const digit bi = b->ob_digit[i]; - - for (j = PyLong_SHIFT - 5; j >= 0; j -= 5) { - const int index = (bi >> j) & 0x1f; - for (k = 0; k < 5; ++k) - MULT(z, z, z); - if (index) - MULT(z, table[index], z); - } - } - } - - if (negativeOutput && (Py_SIZE(z) != 0)) { - temp = (PyLongObject *)long_sub(z, c); - if (temp == NULL) - goto Error; - Py_DECREF(z); - z = temp; - temp = NULL; - } - goto Done; - - Error: - if (z != NULL) { - Py_DECREF(z); - z = NULL; - } - /* fall through */ - Done: - if (Py_SIZE(b) > FIVEARY_CUTOFF) { - for (i = 0; i < 32; ++i) - Py_XDECREF(table[i]); - } - Py_DECREF(a); - Py_DECREF(b); - Py_XDECREF(c); - Py_XDECREF(temp); - return (PyObject *)z; -} - -static PyObject * -long_invert(PyLongObject *v) -{ - /* Implement ~x as -(x+1) */ - PyLongObject *x; - PyLongObject *w; - if (ABS(Py_SIZE(v)) <=1) - return PyLong_FromLong(-(MEDIUM_VALUE(v)+1)); - w = (PyLongObject *)PyLong_FromLong(1L); - if (w == NULL) - return NULL; - x = (PyLongObject *) long_add(v, w); - Py_DECREF(w); - if (x == NULL) - return NULL; - Py_SIZE(x) = -(Py_SIZE(x)); - return (PyObject *)maybe_small_long(x); -} - -static PyObject * -long_neg(PyLongObject *v) -{ - PyLongObject *z; - if (ABS(Py_SIZE(v)) <= 1) - return PyLong_FromLong(-MEDIUM_VALUE(v)); - z = (PyLongObject *)_PyLong_Copy(v); - if (z != NULL) - Py_SIZE(z) = -(Py_SIZE(v)); - return (PyObject *)z; -} - -static PyObject * -long_abs(PyLongObject *v) -{ - if (Py_SIZE(v) < 0) - return long_neg(v); - else - return long_long((PyObject *)v); -} - -static int -long_bool(PyLongObject *v) -{ - return Py_SIZE(v) != 0; -} - -static PyObject * -long_rshift(PyLongObject *a, PyLongObject *b) -{ - PyLongObject *z = NULL; - Py_ssize_t shiftby, newsize, wordshift, loshift, hishift, i, j; - digit lomask, himask; - - CHECK_BINOP(a, b); - - if (Py_SIZE(a) < 0) { - /* Right shifting negative numbers is harder */ - PyLongObject *a1, *a2; - a1 = (PyLongObject *) long_invert(a); - if (a1 == NULL) - goto rshift_error; - a2 = (PyLongObject *) long_rshift(a1, b); - Py_DECREF(a1); - if (a2 == NULL) - goto rshift_error; - z = (PyLongObject *) long_invert(a2); - Py_DECREF(a2); - } - else { - shiftby = PyLong_AsSsize_t((PyObject *)b); - if (shiftby == -1L && PyErr_Occurred()) - goto rshift_error; - if (shiftby < 0) { - PyErr_SetString(PyExc_ValueError, - "negative shift count"); - goto rshift_error; - } - wordshift = shiftby / PyLong_SHIFT; - newsize = ABS(Py_SIZE(a)) - wordshift; - if (newsize <= 0) - return PyLong_FromLong(0); - loshift = shiftby % PyLong_SHIFT; - hishift = PyLong_SHIFT - loshift; - lomask = ((digit)1 << hishift) - 1; - himask = PyLong_MASK ^ lomask; - z = _PyLong_New(newsize); - if (z == NULL) - goto rshift_error; - if (Py_SIZE(a) < 0) - Py_SIZE(z) = -(Py_SIZE(z)); - for (i = 0, j = wordshift; i < newsize; i++, j++) { - z->ob_digit[i] = (a->ob_digit[j] >> loshift) & lomask; - if (i+1 < newsize) - z->ob_digit[i] |= (a->ob_digit[j+1] << hishift) & himask; - } - z = long_normalize(z); - } - rshift_error: - return (PyObject *) maybe_small_long(z); - -} - -static PyObject * -long_lshift(PyObject *v, PyObject *w) -{ - /* This version due to Tim Peters */ - PyLongObject *a = (PyLongObject*)v; - PyLongObject *b = (PyLongObject*)w; - PyLongObject *z = NULL; - Py_ssize_t shiftby, oldsize, newsize, wordshift, remshift, i, j; - twodigits accum; - - CHECK_BINOP(a, b); - - shiftby = PyLong_AsSsize_t((PyObject *)b); - if (shiftby == -1L && PyErr_Occurred()) - goto lshift_error; - if (shiftby < 0) { - PyErr_SetString(PyExc_ValueError, "negative shift count"); - goto lshift_error; - } - /* wordshift, remshift = divmod(shiftby, PyLong_SHIFT) */ - wordshift = shiftby / PyLong_SHIFT; - remshift = shiftby - wordshift * PyLong_SHIFT; - - oldsize = ABS(Py_SIZE(a)); - newsize = oldsize + wordshift; - if (remshift) - ++newsize; - z = _PyLong_New(newsize); - if (z == NULL) - goto lshift_error; - if (Py_SIZE(a) < 0) - NEGATE(z); - for (i = 0; i < wordshift; i++) - z->ob_digit[i] = 0; - accum = 0; - for (i = wordshift, j = 0; j < oldsize; i++, j++) { - accum |= (twodigits)a->ob_digit[j] << remshift; - z->ob_digit[i] = (digit)(accum & PyLong_MASK); - accum >>= PyLong_SHIFT; - } - if (remshift) - z->ob_digit[newsize-1] = (digit)accum; - else - assert(!accum); - z = long_normalize(z); - lshift_error: - return (PyObject *) maybe_small_long(z); -} - -/* Compute two's complement of digit vector a[0:m], writing result to - z[0:m]. The digit vector a need not be normalized, but should not - be entirely zero. a and z may point to the same digit vector. */ - -static void -v_complement(digit *z, digit *a, Py_ssize_t m) -{ - Py_ssize_t i; - digit carry = 1; - for (i = 0; i < m; ++i) { - carry += a[i] ^ PyLong_MASK; - z[i] = carry & PyLong_MASK; - carry >>= PyLong_SHIFT; - } - assert(carry == 0); -} - -/* Bitwise and/xor/or operations */ - -static PyObject * -long_bitwise(PyLongObject *a, - int op, /* '&', '|', '^' */ - PyLongObject *b) -{ - int nega, negb, negz; - Py_ssize_t size_a, size_b, size_z, i; - PyLongObject *z; - - /* Bitwise operations for negative numbers operate as though - on a two's complement representation. So convert arguments - from sign-magnitude to two's complement, and convert the - result back to sign-magnitude at the end. */ - - /* If a is negative, replace it by its two's complement. */ - size_a = ABS(Py_SIZE(a)); - nega = Py_SIZE(a) < 0; - if (nega) { - z = _PyLong_New(size_a); - if (z == NULL) - return NULL; - v_complement(z->ob_digit, a->ob_digit, size_a); - a = z; - } - else - /* Keep reference count consistent. */ - Py_INCREF(a); - - /* Same for b. */ - size_b = ABS(Py_SIZE(b)); - negb = Py_SIZE(b) < 0; - if (negb) { - z = _PyLong_New(size_b); - if (z == NULL) { - Py_DECREF(a); - return NULL; - } - v_complement(z->ob_digit, b->ob_digit, size_b); - b = z; - } - else - Py_INCREF(b); - - /* Swap a and b if necessary to ensure size_a >= size_b. */ - if (size_a < size_b) { - z = a; a = b; b = z; - size_z = size_a; size_a = size_b; size_b = size_z; - negz = nega; nega = negb; negb = negz; - } - - /* JRH: The original logic here was to allocate the result value (z) - as the longer of the two operands. However, there are some cases - where the result is guaranteed to be shorter than that: AND of two - positives, OR of two negatives: use the shorter number. AND with - mixed signs: use the positive number. OR with mixed signs: use the - negative number. - */ - switch (op) { - case '^': - negz = nega ^ negb; - size_z = size_a; - break; - case '&': - negz = nega & negb; - size_z = negb ? size_a : size_b; - break; - case '|': - negz = nega | negb; - size_z = negb ? size_b : size_a; - break; - default: - PyErr_BadArgument(); - return NULL; - } - - /* We allow an extra digit if z is negative, to make sure that - the final two's complement of z doesn't overflow. */ - z = _PyLong_New(size_z + negz); - if (z == NULL) { - Py_DECREF(a); - Py_DECREF(b); - return NULL; - } - - /* Compute digits for overlap of a and b. */ - switch(op) { - case '&': - for (i = 0; i < size_b; ++i) - z->ob_digit[i] = a->ob_digit[i] & b->ob_digit[i]; - break; - case '|': - for (i = 0; i < size_b; ++i) - z->ob_digit[i] = a->ob_digit[i] | b->ob_digit[i]; - break; - case '^': - for (i = 0; i < size_b; ++i) - z->ob_digit[i] = a->ob_digit[i] ^ b->ob_digit[i]; - break; - default: - PyErr_BadArgument(); - return NULL; - } - - /* Copy any remaining digits of a, inverting if necessary. */ - if (op == '^' && negb) - for (; i < size_z; ++i) - z->ob_digit[i] = a->ob_digit[i] ^ PyLong_MASK; - else if (i < size_z) - memcpy(&z->ob_digit[i], &a->ob_digit[i], - (size_z-i)*sizeof(digit)); - - /* Complement result if negative. */ - if (negz) { - Py_SIZE(z) = -(Py_SIZE(z)); - z->ob_digit[size_z] = PyLong_MASK; - v_complement(z->ob_digit, z->ob_digit, size_z+1); - } - - Py_DECREF(a); - Py_DECREF(b); - return (PyObject *)maybe_small_long(long_normalize(z)); -} - -static PyObject * -long_and(PyObject *a, PyObject *b) -{ - PyObject *c; - CHECK_BINOP(a, b); - c = long_bitwise((PyLongObject*)a, '&', (PyLongObject*)b); - return c; -} - -static PyObject * -long_xor(PyObject *a, PyObject *b) -{ - PyObject *c; - CHECK_BINOP(a, b); - c = long_bitwise((PyLongObject*)a, '^', (PyLongObject*)b); - return c; -} - -static PyObject * -long_or(PyObject *a, PyObject *b) -{ - PyObject *c; - CHECK_BINOP(a, b); - c = long_bitwise((PyLongObject*)a, '|', (PyLongObject*)b); - return c; -} - -static PyObject * -long_long(PyObject *v) -{ - if (PyLong_CheckExact(v)) - Py_INCREF(v); - else - v = _PyLong_Copy((PyLongObject *)v); - return v; -} - -static PyObject * -long_float(PyObject *v) -{ - double result; - result = PyLong_AsDouble(v); - if (result == -1.0 && PyErr_Occurred()) - return NULL; - return PyFloat_FromDouble(result); -} - -static PyObject * -long_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); - -static PyObject * -long_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyObject *obase = NULL, *x = NULL; - long base; - int overflow; - static char *kwlist[] = {"x", "base", 0}; - - if (type != &PyLong_Type) - return long_subtype_new(type, args, kwds); /* Wimp out */ - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OO:int", kwlist, - &x, &obase)) - return NULL; - if (x == NULL) - return PyLong_FromLong(0L); - if (obase == NULL) - return PyNumber_Long(x); - - base = PyLong_AsLongAndOverflow(obase, &overflow); - if (base == -1 && PyErr_Occurred()) - return NULL; - if (overflow || (base != 0 && base < 2) || base > 36) { - PyErr_SetString(PyExc_ValueError, - "int() arg 2 must be >= 2 and <= 36"); - return NULL; - } - - if (PyUnicode_Check(x)) - return PyLong_FromUnicodeObject(x, (int)base); - else if (PyByteArray_Check(x) || PyBytes_Check(x)) { - /* Since PyLong_FromString doesn't have a length parameter, - * check here for possible NULs in the string. */ - char *string; - Py_ssize_t size = Py_SIZE(x); - if (PyByteArray_Check(x)) - string = PyByteArray_AS_STRING(x); - else - string = PyBytes_AS_STRING(x); - if (strlen(string) != (size_t)size) { - /* We only see this if there's a null byte in x, - x is a bytes or buffer, *and* a base is given. */ - PyErr_Format(PyExc_ValueError, - "invalid literal for int() with base %d: %R", - (int)base, x); - return NULL; - } - return PyLong_FromString(string, NULL, (int)base); - } - else { - PyErr_SetString(PyExc_TypeError, - "int() can't convert non-string with explicit base"); - return NULL; - } -} - -/* Wimpy, slow approach to tp_new calls for subtypes of long: - first create a regular long from whatever arguments we got, - then allocate a subtype instance and initialize it from - the regular long. The regular long is then thrown away. -*/ -static PyObject * -long_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyLongObject *tmp, *newobj; - Py_ssize_t i, n; - - assert(PyType_IsSubtype(type, &PyLong_Type)); - tmp = (PyLongObject *)long_new(&PyLong_Type, args, kwds); - if (tmp == NULL) - return NULL; - assert(PyLong_CheckExact(tmp)); - n = Py_SIZE(tmp); - if (n < 0) - n = -n; - newobj = (PyLongObject *)type->tp_alloc(type, n); - if (newobj == NULL) { - Py_DECREF(tmp); - return NULL; - } - assert(PyLong_Check(newobj)); - Py_SIZE(newobj) = Py_SIZE(tmp); - for (i = 0; i < n; i++) - newobj->ob_digit[i] = tmp->ob_digit[i]; - Py_DECREF(tmp); - return (PyObject *)newobj; -} - -static PyObject * -long_getnewargs(PyLongObject *v) -{ - return Py_BuildValue("(N)", _PyLong_Copy(v)); -} - -static PyObject * -long_get0(PyLongObject *v, void *context) { - return PyLong_FromLong(0L); -} - -static PyObject * -long_get1(PyLongObject *v, void *context) { - return PyLong_FromLong(1L); -} - -static PyObject * -long__format__(PyObject *self, PyObject *args) -{ - PyObject *format_spec; - - if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) - return NULL; - return _PyLong_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); -} - -/* Return a pair (q, r) such that a = b * q + r, and - abs(r) <= abs(b)/2, with equality possible only if q is even. - In other words, q == a / b, rounded to the nearest integer using - round-half-to-even. */ - -PyObject * -_PyLong_DivmodNear(PyObject *a, PyObject *b) -{ - PyLongObject *quo = NULL, *rem = NULL; - PyObject *one = NULL, *twice_rem, *result, *temp; - int cmp, quo_is_odd, quo_is_neg; - - /* Equivalent Python code: - - def divmod_near(a, b): - q, r = divmod(a, b) - # round up if either r / b > 0.5, or r / b == 0.5 and q is odd. - # The expression r / b > 0.5 is equivalent to 2 * r > b if b is - # positive, 2 * r < b if b negative. - greater_than_half = 2*r > b if b > 0 else 2*r < b - exactly_half = 2*r == b - if greater_than_half or exactly_half and q % 2 == 1: - q += 1 - r -= b - return q, r - - */ - if (!PyLong_Check(a) || !PyLong_Check(b)) { - PyErr_SetString(PyExc_TypeError, - "non-integer arguments in division"); - return NULL; - } - - /* Do a and b have different signs? If so, quotient is negative. */ - quo_is_neg = (Py_SIZE(a) < 0) != (Py_SIZE(b) < 0); - - one = PyLong_FromLong(1L); - if (one == NULL) - return NULL; - - if (long_divrem((PyLongObject*)a, (PyLongObject*)b, &quo, &rem) < 0) - goto error; - - /* compare twice the remainder with the divisor, to see - if we need to adjust the quotient and remainder */ - twice_rem = long_lshift((PyObject *)rem, one); - if (twice_rem == NULL) - goto error; - if (quo_is_neg) { - temp = long_neg((PyLongObject*)twice_rem); - Py_DECREF(twice_rem); - twice_rem = temp; - if (twice_rem == NULL) - goto error; - } - cmp = long_compare((PyLongObject *)twice_rem, (PyLongObject *)b); - Py_DECREF(twice_rem); - - quo_is_odd = Py_SIZE(quo) != 0 && ((quo->ob_digit[0] & 1) != 0); - if ((Py_SIZE(b) < 0 ? cmp < 0 : cmp > 0) || (cmp == 0 && quo_is_odd)) { - /* fix up quotient */ - if (quo_is_neg) - temp = long_sub(quo, (PyLongObject *)one); - else - temp = long_add(quo, (PyLongObject *)one); - Py_DECREF(quo); - quo = (PyLongObject *)temp; - if (quo == NULL) - goto error; - /* and remainder */ - if (quo_is_neg) - temp = long_add(rem, (PyLongObject *)b); - else - temp = long_sub(rem, (PyLongObject *)b); - Py_DECREF(rem); - rem = (PyLongObject *)temp; - if (rem == NULL) - goto error; - } - - result = PyTuple_New(2); - if (result == NULL) - goto error; - - /* PyTuple_SET_ITEM steals references */ - PyTuple_SET_ITEM(result, 0, (PyObject *)quo); - PyTuple_SET_ITEM(result, 1, (PyObject *)rem); - Py_DECREF(one); - return result; - - error: - Py_XDECREF(quo); - Py_XDECREF(rem); - Py_XDECREF(one); - return NULL; -} - -static PyObject * -long_round(PyObject *self, PyObject *args) -{ - PyObject *o_ndigits=NULL, *temp, *result, *ndigits; - - /* To round an integer m to the nearest 10**n (n positive), we make use of - * the divmod_near operation, defined by: - * - * divmod_near(a, b) = (q, r) - * - * where q is the nearest integer to the quotient a / b (the - * nearest even integer in the case of a tie) and r == a - q * b. - * Hence q * b = a - r is the nearest multiple of b to a, - * preferring even multiples in the case of a tie. - * - * So the nearest multiple of 10**n to m is: - * - * m - divmod_near(m, 10**n)[1]. - */ - if (!PyArg_ParseTuple(args, "|O", &o_ndigits)) - return NULL; - if (o_ndigits == NULL) - return long_long(self); - - ndigits = PyNumber_Index(o_ndigits); - if (ndigits == NULL) - return NULL; - - /* if ndigits >= 0 then no rounding is necessary; return self unchanged */ - if (Py_SIZE(ndigits) >= 0) { - Py_DECREF(ndigits); - return long_long(self); - } - - /* result = self - divmod_near(self, 10 ** -ndigits)[1] */ - temp = long_neg((PyLongObject*)ndigits); - Py_DECREF(ndigits); - ndigits = temp; - if (ndigits == NULL) - return NULL; - - result = PyLong_FromLong(10L); - if (result == NULL) { - Py_DECREF(ndigits); - return NULL; - } - - temp = long_pow(result, ndigits, Py_None); - Py_DECREF(ndigits); - Py_DECREF(result); - result = temp; - if (result == NULL) - return NULL; - - temp = _PyLong_DivmodNear(self, result); - Py_DECREF(result); - result = temp; - if (result == NULL) - return NULL; - - temp = long_sub((PyLongObject *)self, - (PyLongObject *)PyTuple_GET_ITEM(result, 1)); - Py_DECREF(result); - result = temp; - - return result; -} - -static PyObject * -long_sizeof(PyLongObject *v) -{ - Py_ssize_t res; - - res = offsetof(PyLongObject, ob_digit) + ABS(Py_SIZE(v))*sizeof(digit); - return PyLong_FromSsize_t(res); -} - -static PyObject * -long_bit_length(PyLongObject *v) -{ - PyLongObject *result, *x, *y; - Py_ssize_t ndigits, msd_bits = 0; - digit msd; - - assert(v != NULL); - assert(PyLong_Check(v)); - - ndigits = ABS(Py_SIZE(v)); - if (ndigits == 0) - return PyLong_FromLong(0); - - msd = v->ob_digit[ndigits-1]; - while (msd >= 32) { - msd_bits += 6; - msd >>= 6; - } - msd_bits += (long)(BitLengthTable[msd]); - - if (ndigits <= PY_SSIZE_T_MAX/PyLong_SHIFT) - return PyLong_FromSsize_t((ndigits-1)*PyLong_SHIFT + msd_bits); - - /* expression above may overflow; use Python integers instead */ - result = (PyLongObject *)PyLong_FromSsize_t(ndigits - 1); - if (result == NULL) - return NULL; - x = (PyLongObject *)PyLong_FromLong(PyLong_SHIFT); - if (x == NULL) - goto error; - y = (PyLongObject *)long_mul(result, x); - Py_DECREF(x); - if (y == NULL) - goto error; - Py_DECREF(result); - result = y; - - x = (PyLongObject *)PyLong_FromLong((long)msd_bits); - if (x == NULL) - goto error; - y = (PyLongObject *)long_add(result, x); - Py_DECREF(x); - if (y == NULL) - goto error; - Py_DECREF(result); - result = y; - - return (PyObject *)result; - - error: - Py_DECREF(result); - return NULL; -} - -PyDoc_STRVAR(long_bit_length_doc, -"int.bit_length() -> int\n\ -\n\ -Number of bits necessary to represent self in binary.\n\ ->>> bin(37)\n\ -'0b100101'\n\ ->>> (37).bit_length()\n\ -6"); - -#if 0 -static PyObject * -long_is_finite(PyObject *v) -{ - Py_RETURN_TRUE; -} -#endif - - -static PyObject * -long_to_bytes(PyLongObject *v, PyObject *args, PyObject *kwds) -{ - PyObject *byteorder_str; - PyObject *is_signed_obj = NULL; - Py_ssize_t length; - int little_endian; - int is_signed; - PyObject *bytes; - static char *kwlist[] = {"length", "byteorder", "signed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "nU|O:to_bytes", kwlist, - &length, &byteorder_str, - &is_signed_obj)) - return NULL; - - if (args != NULL && Py_SIZE(args) > 2) { - PyErr_SetString(PyExc_TypeError, - "'signed' is a keyword-only argument"); - return NULL; - } - - if (!PyUnicode_CompareWithASCIIString(byteorder_str, "little")) - little_endian = 1; - else if (!PyUnicode_CompareWithASCIIString(byteorder_str, "big")) - little_endian = 0; - else { - PyErr_SetString(PyExc_ValueError, - "byteorder must be either 'little' or 'big'"); - return NULL; - } - - if (is_signed_obj != NULL) { - int cmp = PyObject_IsTrue(is_signed_obj); - if (cmp < 0) - return NULL; - is_signed = cmp ? 1 : 0; - } - else { - /* If the signed argument was omitted, use False as the - default. */ - is_signed = 0; - } - - if (length < 0) { - PyErr_SetString(PyExc_ValueError, - "length argument must be non-negative"); - return NULL; - } - - bytes = PyBytes_FromStringAndSize(NULL, length); - if (bytes == NULL) - return NULL; - - if (_PyLong_AsByteArray(v, (unsigned char *)PyBytes_AS_STRING(bytes), - length, little_endian, is_signed) < 0) { - Py_DECREF(bytes); - return NULL; - } - - return bytes; -} - -PyDoc_STRVAR(long_to_bytes_doc, -"int.to_bytes(length, byteorder, *, signed=False) -> bytes\n\ -\n\ -Return an array of bytes representing an integer.\n\ -\n\ -The integer is represented using length bytes. An OverflowError is\n\ -raised if the integer is not representable with the given number of\n\ -bytes.\n\ -\n\ -The byteorder argument determines the byte order used to represent the\n\ -integer. If byteorder is 'big', the most significant byte is at the\n\ -beginning of the byte array. If byteorder is 'little', the most\n\ -significant byte is at the end of the byte array. To request the native\n\ -byte order of the host system, use `sys.byteorder' as the byte order value.\n\ -\n\ -The signed keyword-only argument determines whether two's complement is\n\ -used to represent the integer. If signed is False and a negative integer\n\ -is given, an OverflowError is raised."); - -static PyObject * -long_from_bytes(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyObject *byteorder_str; - PyObject *is_signed_obj = NULL; - int little_endian; - int is_signed; - PyObject *obj; - PyObject *bytes; - PyObject *long_obj; - static char *kwlist[] = {"bytes", "byteorder", "signed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OU|O:from_bytes", kwlist, - &obj, &byteorder_str, - &is_signed_obj)) - return NULL; - - if (args != NULL && Py_SIZE(args) > 2) { - PyErr_SetString(PyExc_TypeError, - "'signed' is a keyword-only argument"); - return NULL; - } - - if (!PyUnicode_CompareWithASCIIString(byteorder_str, "little")) - little_endian = 1; - else if (!PyUnicode_CompareWithASCIIString(byteorder_str, "big")) - little_endian = 0; - else { - PyErr_SetString(PyExc_ValueError, - "byteorder must be either 'little' or 'big'"); - return NULL; - } - - if (is_signed_obj != NULL) { - int cmp = PyObject_IsTrue(is_signed_obj); - if (cmp < 0) - return NULL; - is_signed = cmp ? 1 : 0; - } - else { - /* If the signed argument was omitted, use False as the - default. */ - is_signed = 0; - } - - bytes = PyObject_Bytes(obj); - if (bytes == NULL) - return NULL; - - long_obj = _PyLong_FromByteArray( - (unsigned char *)PyBytes_AS_STRING(bytes), Py_SIZE(bytes), - little_endian, is_signed); - Py_DECREF(bytes); - - /* If from_bytes() was used on subclass, allocate new subclass - * instance, initialize it with decoded long value and return it. - */ - if (type != &PyLong_Type && PyType_IsSubtype(type, &PyLong_Type)) { - PyLongObject *newobj; - int i; - Py_ssize_t n = ABS(Py_SIZE(long_obj)); - - newobj = (PyLongObject *)type->tp_alloc(type, n); - if (newobj == NULL) { - Py_DECREF(long_obj); - return NULL; - } - assert(PyLong_Check(newobj)); - Py_SIZE(newobj) = Py_SIZE(long_obj); - for (i = 0; i < n; i++) { - newobj->ob_digit[i] = - ((PyLongObject *)long_obj)->ob_digit[i]; - } - Py_DECREF(long_obj); - return (PyObject *)newobj; - } - - return long_obj; -} - -PyDoc_STRVAR(long_from_bytes_doc, -"int.from_bytes(bytes, byteorder, *, signed=False) -> int\n\ -\n\ -Return the integer represented by the given array of bytes.\n\ -\n\ -The bytes argument must either support the buffer protocol or be an\n\ -iterable object producing bytes. Bytes and bytearray are examples of\n\ -built-in objects that support the buffer protocol.\n\ -\n\ -The byteorder argument determines the byte order used to represent the\n\ -integer. If byteorder is 'big', the most significant byte is at the\n\ -beginning of the byte array. If byteorder is 'little', the most\n\ -significant byte is at the end of the byte array. To request the native\n\ -byte order of the host system, use `sys.byteorder' as the byte order value.\n\ -\n\ -The signed keyword-only argument indicates whether two's complement is\n\ -used to represent the integer."); - -static PyMethodDef long_methods[] = { - {"conjugate", (PyCFunction)long_long, METH_NOARGS, - "Returns self, the complex conjugate of any int."}, - {"bit_length", (PyCFunction)long_bit_length, METH_NOARGS, - long_bit_length_doc}, -#if 0 - {"is_finite", (PyCFunction)long_is_finite, METH_NOARGS, - "Returns always True."}, -#endif - {"to_bytes", (PyCFunction)long_to_bytes, - METH_VARARGS|METH_KEYWORDS, long_to_bytes_doc}, - {"from_bytes", (PyCFunction)long_from_bytes, - METH_VARARGS|METH_KEYWORDS|METH_CLASS, long_from_bytes_doc}, - {"__trunc__", (PyCFunction)long_long, METH_NOARGS, - "Truncating an Integral returns itself."}, - {"__floor__", (PyCFunction)long_long, METH_NOARGS, - "Flooring an Integral returns itself."}, - {"__ceil__", (PyCFunction)long_long, METH_NOARGS, - "Ceiling of an Integral returns itself."}, - {"__round__", (PyCFunction)long_round, METH_VARARGS, - "Rounding an Integral returns itself.\n" - "Rounding with an ndigits argument also returns an integer."}, - {"__getnewargs__", (PyCFunction)long_getnewargs, METH_NOARGS}, - {"__format__", (PyCFunction)long__format__, METH_VARARGS}, - {"__sizeof__", (PyCFunction)long_sizeof, METH_NOARGS, - "Returns size in memory, in bytes"}, - {NULL, NULL} /* sentinel */ -}; - -static PyGetSetDef long_getset[] = { - {"real", - (getter)long_long, (setter)NULL, - "the real part of a complex number", - NULL}, - {"imag", - (getter)long_get0, (setter)NULL, - "the imaginary part of a complex number", - NULL}, - {"numerator", - (getter)long_long, (setter)NULL, - "the numerator of a rational number in lowest terms", - NULL}, - {"denominator", - (getter)long_get1, (setter)NULL, - "the denominator of a rational number in lowest terms", - NULL}, - {NULL} /* Sentinel */ -}; - -PyDoc_STRVAR(long_doc, -"int(x[, base]) -> integer\n\ -\n\ -Convert a string or number to an integer, if possible. A floating\n\ -point argument will be truncated towards zero (this does not include a\n\ -string representation of a floating point number!) When converting a\n\ -string, use the optional base. It is an error to supply a base when\n\ -converting a non-string."); - -static PyNumberMethods long_as_number = { - (binaryfunc)long_add, /*nb_add*/ - (binaryfunc)long_sub, /*nb_subtract*/ - (binaryfunc)long_mul, /*nb_multiply*/ - long_mod, /*nb_remainder*/ - long_divmod, /*nb_divmod*/ - long_pow, /*nb_power*/ - (unaryfunc)long_neg, /*nb_negative*/ - (unaryfunc)long_long, /*tp_positive*/ - (unaryfunc)long_abs, /*tp_absolute*/ - (inquiry)long_bool, /*tp_bool*/ - (unaryfunc)long_invert, /*nb_invert*/ - long_lshift, /*nb_lshift*/ - (binaryfunc)long_rshift, /*nb_rshift*/ - long_and, /*nb_and*/ - long_xor, /*nb_xor*/ - long_or, /*nb_or*/ - long_long, /*nb_int*/ - 0, /*nb_reserved*/ - long_float, /*nb_float*/ - 0, /* nb_inplace_add */ - 0, /* nb_inplace_subtract */ - 0, /* nb_inplace_multiply */ - 0, /* nb_inplace_remainder */ - 0, /* nb_inplace_power */ - 0, /* nb_inplace_lshift */ - 0, /* nb_inplace_rshift */ - 0, /* nb_inplace_and */ - 0, /* nb_inplace_xor */ - 0, /* nb_inplace_or */ - long_div, /* nb_floor_divide */ - long_true_divide, /* nb_true_divide */ - 0, /* nb_inplace_floor_divide */ - 0, /* nb_inplace_true_divide */ - long_long, /* nb_index */ -}; - -PyTypeObject PyLong_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "int", /* tp_name */ - offsetof(PyLongObject, ob_digit), /* tp_basicsize */ - sizeof(digit), /* tp_itemsize */ - long_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - long_to_decimal_string, /* tp_repr */ - &long_as_number, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - (hashfunc)long_hash, /* tp_hash */ - 0, /* tp_call */ - long_to_decimal_string, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | - Py_TPFLAGS_LONG_SUBCLASS, /* tp_flags */ - long_doc, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - long_richcompare, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - long_methods, /* tp_methods */ - 0, /* tp_members */ - long_getset, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - long_new, /* tp_new */ - PyObject_Del, /* tp_free */ -}; - -static PyTypeObject Int_InfoType; - -PyDoc_STRVAR(int_info__doc__, -"sys.int_info\n\ -\n\ -A struct sequence that holds information about Python's\n\ -internal representation of integers. The attributes are read only."); - -static PyStructSequence_Field int_info_fields[] = { - {"bits_per_digit", "size of a digit in bits"}, - {"sizeof_digit", "size in bytes of the C type used to represent a digit"}, - {NULL, NULL} -}; - -static PyStructSequence_Desc int_info_desc = { - "sys.int_info", /* name */ - int_info__doc__, /* doc */ - int_info_fields, /* fields */ - 2 /* number of fields */ -}; - -PyObject * -PyLong_GetInfo(void) -{ - PyObject* int_info; - int field = 0; - int_info = PyStructSequence_New(&Int_InfoType); - if (int_info == NULL) - return NULL; - PyStructSequence_SET_ITEM(int_info, field++, - PyLong_FromLong(PyLong_SHIFT)); - PyStructSequence_SET_ITEM(int_info, field++, - PyLong_FromLong(sizeof(digit))); - if (PyErr_Occurred()) { - Py_CLEAR(int_info); - return NULL; - } - return int_info; -} - -int -_PyLong_Init(void) -{ -#if NSMALLNEGINTS + NSMALLPOSINTS > 0 - int ival, size; - PyLongObject *v = small_ints; - - for (ival = -NSMALLNEGINTS; ival < NSMALLPOSINTS; ival++, v++) { - size = (ival < 0) ? -1 : ((ival == 0) ? 0 : 1); - if (Py_TYPE(v) == &PyLong_Type) { - /* The element is already initialized, most likely - * the Python interpreter was initialized before. - */ - Py_ssize_t refcnt; - PyObject* op = (PyObject*)v; - - refcnt = Py_REFCNT(op) < 0 ? 0 : Py_REFCNT(op); - _Py_NewReference(op); - /* _Py_NewReference sets the ref count to 1 but - * the ref count might be larger. Set the refcnt - * to the original refcnt + 1 */ - Py_REFCNT(op) = refcnt + 1; - assert(Py_SIZE(op) == size); - assert(v->ob_digit[0] == abs(ival)); - } - else { - PyObject_INIT(v, &PyLong_Type); - } - Py_SIZE(v) = size; - v->ob_digit[0] = abs(ival); - } -#endif - /* initialize int_info */ - if (Int_InfoType.tp_name == 0) - PyStructSequence_InitType(&Int_InfoType, &int_info_desc); - - return 1; -} - -void -PyLong_Fini(void) -{ - /* Integers are currently statically allocated. Py_DECREF is not - needed, but Python must forget about the reference or multiple - reinitializations will fail. */ -#if NSMALLNEGINTS + NSMALLPOSINTS > 0 - int i; - PyLongObject *v = small_ints; - for (i = 0; i < NSMALLNEGINTS + NSMALLPOSINTS; i++, v++) { - _Py_DEC_REFTOTAL; - _Py_ForgetReference((PyObject*)v); - } -#endif -}
--- a/cos/python/Objects/object.c Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1737 +0,0 @@ - -/* Generic object operations; and implementation of None */ - -#include "Python.h" -#include "frameobject.h" - -/* Object allocation routines used by NEWOBJ and NEWVAROBJ macros. - These are used by the individual routines for object creation. - Do not call them otherwise, they do not initialize the object! */ - -void -Py_IncRef(PyObject *o) -{ - Py_XINCREF(o); -} - -void -Py_DecRef(PyObject *o) -{ - Py_XDECREF(o); -} - -PyObject * -PyObject_Init(PyObject *op, PyTypeObject *tp) -{ - if (op == NULL) - return PyErr_NoMemory(); - /* Any changes should be reflected in PyObject_INIT (objimpl.h) */ - Py_TYPE(op) = tp; - _Py_NewReference(op); - return op; -} - -PyVarObject * -PyObject_InitVar(PyVarObject *op, PyTypeObject *tp, Py_ssize_t size) -{ - if (op == NULL) - return (PyVarObject *) PyErr_NoMemory(); - /* Any changes should be reflected in PyObject_INIT_VAR */ - op->ob_size = size; - Py_TYPE(op) = tp; - _Py_NewReference((PyObject *)op); - return op; -} - -PyObject * -_PyObject_New(PyTypeObject *tp) -{ - PyObject *op; - op = (PyObject *) PyObject_MALLOC(_PyObject_SIZE(tp)); - if (op == NULL) - return PyErr_NoMemory(); - return PyObject_INIT(op, tp); -} - -PyVarObject * -_PyObject_NewVar(PyTypeObject *tp, Py_ssize_t nitems) -{ - PyVarObject *op; - const size_t size = _PyObject_VAR_SIZE(tp, nitems); - op = (PyVarObject *) PyObject_MALLOC(size); - if (op == NULL) - return (PyVarObject *)PyErr_NoMemory(); - return PyObject_INIT_VAR(op, tp, nitems); -} - -int -PyObject_Print(PyObject *op, FILE *fp, int flags) -{ - int ret = 0; - if (PyErr_CheckSignals()) - return -1; -#ifdef USE_STACKCHECK - if (PyOS_CheckStack()) { - PyErr_SetString(PyExc_MemoryError, "stack overflow"); - return -1; - } -#endif - clearerr(fp); /* Clear any previous error condition */ - if (op == NULL) { - Py_BEGIN_ALLOW_THREADS - fprintf(fp, "<nil>"); - Py_END_ALLOW_THREADS - } - else { - if (op->ob_refcnt <= 0) - /* XXX(twouters) cast refcount to long until %zd is - universally available */ - Py_BEGIN_ALLOW_THREADS - fprintf(fp, "<refcnt %ld at %p>", - (long)op->ob_refcnt, op); - Py_END_ALLOW_THREADS - else { - PyObject *s; - if (flags & Py_PRINT_RAW) - s = PyObject_Str(op); - else - s = PyObject_Repr(op); - if (s == NULL) - ret = -1; - else if (PyBytes_Check(s)) { - fwrite(PyBytes_AS_STRING(s), 1, - PyBytes_GET_SIZE(s), fp); - } - else if (PyUnicode_Check(s)) { - PyObject *t; - t = PyUnicode_AsEncodedString(s, "utf-8", "backslashreplace"); - if (t == NULL) - ret = 0; - else { - fwrite(PyBytes_AS_STRING(t), 1, - PyBytes_GET_SIZE(t), fp); - Py_DECREF(t); - } - } - else { - PyErr_Format(PyExc_TypeError, - "str() or repr() returned '%.100s'", - s->ob_type->tp_name); - ret = -1; - } - Py_XDECREF(s); - } - } - if (ret == 0) { - if (ferror(fp)) { - PyErr_SetFromErrno(PyExc_IOError); - clearerr(fp); - ret = -1; - } - } - return ret; -} - -/* For debugging convenience. Set a breakpoint here and call it from your DLL */ -void -_Py_BreakPoint(void) -{ -} - - -/* For debugging convenience. See Misc/gdbinit for some useful gdb hooks */ -void -_PyObject_Dump(PyObject* op) -{ - if (op == NULL) - fprintf(stderr, "NULL\n"); - else { -#ifdef WITH_THREAD - PyGILState_STATE gil; -#endif - fprintf(stderr, "object : "); -#ifdef WITH_THREAD - gil = PyGILState_Ensure(); -#endif - (void)PyObject_Print(op, stderr, 0); -#ifdef WITH_THREAD - PyGILState_Release(gil); -#endif - /* XXX(twouters) cast refcount to long until %zd is - universally available */ - fprintf(stderr, "\n" - "type : %s\n" - "refcount: %ld\n" - "address : %p\n", - Py_TYPE(op)==NULL ? "NULL" : Py_TYPE(op)->tp_name, - (long)op->ob_refcnt, - op); - } -} - -PyObject * -PyObject_Repr(PyObject *v) -{ - PyObject *res; - if (PyErr_CheckSignals()) - return NULL; -#ifdef USE_STACKCHECK - if (PyOS_CheckStack()) { - PyErr_SetString(PyExc_MemoryError, "stack overflow"); - return NULL; - } -#endif - if (v == NULL) - return PyUnicode_FromString("<NULL>"); - if (Py_TYPE(v)->tp_repr == NULL) - return PyUnicode_FromFormat("<%s object at %p>", - v->ob_type->tp_name, v); - res = (*v->ob_type->tp_repr)(v); - if (res == NULL) - return NULL; - if (!PyUnicode_Check(res)) { - PyErr_Format(PyExc_TypeError, - "__repr__ returned non-string (type %.200s)", - res->ob_type->tp_name); - Py_DECREF(res); - return NULL; - } -#ifndef Py_DEBUG - if (PyUnicode_READY(res) < 0) - return NULL; -#endif - return res; -} - -PyObject * -PyObject_Str(PyObject *v) -{ - PyObject *res; - if (PyErr_CheckSignals()) - return NULL; -#ifdef USE_STACKCHECK - if (PyOS_CheckStack()) { - PyErr_SetString(PyExc_MemoryError, "stack overflow"); - return NULL; - } -#endif - if (v == NULL) - return PyUnicode_FromString("<NULL>"); - if (PyUnicode_CheckExact(v)) { -#ifndef Py_DEBUG - if (PyUnicode_READY(v) < 0) - return NULL; -#endif - Py_INCREF(v); - return v; - } - if (Py_TYPE(v)->tp_str == NULL) - return PyObject_Repr(v); - - /* It is possible for a type to have a tp_str representation that loops - infinitely. */ - if (Py_EnterRecursiveCall(" while getting the str of an object")) - return NULL; - res = (*Py_TYPE(v)->tp_str)(v); - Py_LeaveRecursiveCall(); - if (res == NULL) - return NULL; - if (!PyUnicode_Check(res)) { - PyErr_Format(PyExc_TypeError, - "__str__ returned non-string (type %.200s)", - Py_TYPE(res)->tp_name); - Py_DECREF(res); - return NULL; - } -#ifndef Py_DEBUG - if (PyUnicode_READY(res) < 0) - return NULL; -#endif - assert(_PyUnicode_CheckConsistency(res, 1)); - return res; -} - -PyObject * -PyObject_ASCII(PyObject *v) -{ - PyObject *repr, *ascii, *res; - - repr = PyObject_Repr(v); - if (repr == NULL) - return NULL; - - /* repr is guaranteed to be a PyUnicode object by PyObject_Repr */ - ascii = _PyUnicode_AsASCIIString(repr, "backslashreplace"); - Py_DECREF(repr); - if (ascii == NULL) - return NULL; - - res = PyUnicode_DecodeASCII( - PyBytes_AS_STRING(ascii), - PyBytes_GET_SIZE(ascii), - NULL); - - Py_DECREF(ascii); - return res; -} - -PyObject * -PyObject_Bytes(PyObject *v) -{ - PyObject *result, *func; - static PyObject *bytesstring = NULL; - - if (v == NULL) - return PyBytes_FromString("<NULL>"); - - if (PyBytes_CheckExact(v)) { - Py_INCREF(v); - return v; - } - - func = _PyObject_LookupSpecial(v, "__bytes__", &bytesstring); - if (func != NULL) { - result = PyObject_CallFunctionObjArgs(func, NULL); - Py_DECREF(func); - if (result == NULL) - return NULL; - if (!PyBytes_Check(result)) { - PyErr_Format(PyExc_TypeError, - "__bytes__ returned non-bytes (type %.200s)", - Py_TYPE(result)->tp_name); - Py_DECREF(result); - return NULL; - } - return result; - } - else if (PyErr_Occurred()) - return NULL; - return PyBytes_FromObject(v); -} - -/* For Python 3.0.1 and later, the old three-way comparison has been - completely removed in favour of rich comparisons. PyObject_Compare() and - PyObject_Cmp() are gone, and the builtin cmp function no longer exists. - The old tp_compare slot has been renamed to tp_reserved, and should no - longer be used. Use tp_richcompare instead. - - See (*) below for practical amendments. - - tp_richcompare gets called with a first argument of the appropriate type - and a second object of an arbitrary type. We never do any kind of - coercion. - - The tp_richcompare slot should return an object, as follows: - - NULL if an exception occurred - NotImplemented if the requested comparison is not implemented - any other false value if the requested comparison is false - any other true value if the requested comparison is true - - The PyObject_RichCompare[Bool]() wrappers raise TypeError when they get - NotImplemented. - - (*) Practical amendments: - - - If rich comparison returns NotImplemented, == and != are decided by - comparing the object pointer (i.e. falling back to the base object - implementation). - -*/ - -/* Map rich comparison operators to their swapped version, e.g. LT <--> GT */ -int _Py_SwappedOp[] = {Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE}; - -static char *opstrings[] = {"<", "<=", "==", "!=", ">", ">="}; - -/* Perform a rich comparison, raising TypeError when the requested comparison - operator is not supported. */ -static PyObject * -do_richcompare(PyObject *v, PyObject *w, int op) -{ - richcmpfunc f; - PyObject *res; - int checked_reverse_op = 0; - - if (v->ob_type != w->ob_type && - PyType_IsSubtype(w->ob_type, v->ob_type) && - (f = w->ob_type->tp_richcompare) != NULL) { - checked_reverse_op = 1; - res = (*f)(w, v, _Py_SwappedOp[op]); - if (res != Py_NotImplemented) - return res; - Py_DECREF(res); - } - if ((f = v->ob_type->tp_richcompare) != NULL) { - res = (*f)(v, w, op); - if (res != Py_NotImplemented) - return res; - Py_DECREF(res); - } - if (!checked_reverse_op && (f = w->ob_type->tp_richcompare) != NULL) { - res = (*f)(w, v, _Py_SwappedOp[op]); - if (res != Py_NotImplemented) - return res; - Py_DECREF(res); - } - /* If neither object implements it, provide a sensible default - for == and !=, but raise an exception for ordering. */ - switch (op) { - case Py_EQ: - res = (v == w) ? Py_True : Py_False; - break; - case Py_NE: - res = (v != w) ? Py_True : Py_False; - break; - default: - /* XXX Special-case None so it doesn't show as NoneType() */ - PyErr_Format(PyExc_TypeError, - "unorderable types: %.100s() %s %.100s()", - v->ob_type->tp_name, - opstrings[op], - w->ob_type->tp_name); - return NULL; - } - Py_INCREF(res); - return res; -} - -/* Perform a rich comparison with object result. This wraps do_richcompare() - with a check for NULL arguments and a recursion check. */ - -PyObject * -PyObject_RichCompare(PyObject *v, PyObject *w, int op) -{ - PyObject *res; - - assert(Py_LT <= op && op <= Py_GE); - if (v == NULL || w == NULL) { - if (!PyErr_Occurred()) - PyErr_BadInternalCall(); - return NULL; - } - if (Py_EnterRecursiveCall(" in comparison")) - return NULL; - res = do_richcompare(v, w, op); - Py_LeaveRecursiveCall(); - return res; -} - -/* Perform a rich comparison with integer result. This wraps - PyObject_RichCompare(), returning -1 for error, 0 for false, 1 for true. */ -int -PyObject_RichCompareBool(PyObject *v, PyObject *w, int op) -{ - PyObject *res; - int ok; - - /* Quick result when objects are the same. - Guarantees that identity implies equality. */ - if (v == w) { - if (op == Py_EQ) - return 1; - else if (op == Py_NE) - return 0; - } - - res = PyObject_RichCompare(v, w, op); - if (res == NULL) - return -1; - if (PyBool_Check(res)) - ok = (res == Py_True); - else - ok = PyObject_IsTrue(res); - Py_DECREF(res); - return ok; -} - -/* Set of hash utility functions to help maintaining the invariant that - if a==b then hash(a)==hash(b) - - All the utility functions (_Py_Hash*()) return "-1" to signify an error. -*/ - -/* For numeric types, the hash of a number x is based on the reduction - of x modulo the prime P = 2**_PyHASH_BITS - 1. It's designed so that - hash(x) == hash(y) whenever x and y are numerically equal, even if - x and y have different types. - - A quick summary of the hashing strategy: - - (1) First define the 'reduction of x modulo P' for any rational - number x; this is a standard extension of the usual notion of - reduction modulo P for integers. If x == p/q (written in lowest - terms), the reduction is interpreted as the reduction of p times - the inverse of the reduction of q, all modulo P; if q is exactly - divisible by P then define the reduction to be infinity. So we've - got a well-defined map - - reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }. - - (2) Now for a rational number x, define hash(x) by: - - reduce(x) if x >= 0 - -reduce(-x) if x < 0 - - If the result of the reduction is infinity (this is impossible for - integers, floats and Decimals) then use the predefined hash value - _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead. - _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the - hashes of float and Decimal infinities and nans. - - A selling point for the above strategy is that it makes it possible - to compute hashes of decimal and binary floating-point numbers - efficiently, even if the exponent of the binary or decimal number - is large. The key point is that - - reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS) - - provided that {reduce(x), reduce(y)} != {0, infinity}. The reduction of a - binary or decimal float is never infinity, since the denominator is a power - of 2 (for binary) or a divisor of a power of 10 (for decimal). So we have, - for nonnegative x, - - reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS - - reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS - - and reduce(10**e) can be computed efficiently by the usual modular - exponentiation algorithm. For reduce(2**e) it's even better: since - P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication - by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits. - - */ - -Py_hash_t -_Py_HashDouble(double v) -{ - int e, sign; - double m; - Py_uhash_t x, y; - - if (!Py_IS_FINITE(v)) { - if (Py_IS_INFINITY(v)) - return v > 0 ? _PyHASH_INF : -_PyHASH_INF; - else - return _PyHASH_NAN; - } - - m = frexp(v, &e); - - sign = 1; - if (m < 0) { - sign = -1; - m = -m; - } - - /* process 28 bits at a time; this should work well both for binary - and hexadecimal floating point. */ - x = 0; - while (m) { - x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28); - m *= 268435456.0; /* 2**28 */ - e -= 28; - y = (Py_uhash_t)m; /* pull out integer part */ - m -= y; - x += y; - if (x >= _PyHASH_MODULUS) - x -= _PyHASH_MODULUS; - } - - /* adjust for the exponent; first reduce it modulo _PyHASH_BITS */ - e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS); - x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e); - - x = x * sign; - if (x == (Py_uhash_t)-1) - x = (Py_uhash_t)-2; - return (Py_hash_t)x; -} - -Py_hash_t -_Py_HashPointer(void *p) -{ - Py_hash_t x; - size_t y = (size_t)p; - /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid - excessive hash collisions for dicts and sets */ - y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4)); - x = (Py_hash_t)y; - if (x == -1) - x = -2; - return x; -} - -Py_hash_t -_Py_HashBytes(unsigned char *p, Py_ssize_t len) -{ - Py_uhash_t x; - Py_ssize_t i; - - x = (Py_uhash_t) *p << 7; - for (i = 0; i < len; i++) - x = (1000003U * x) ^ (Py_uhash_t) *p++; - x ^= (Py_uhash_t) len; - if (x == -1) - x = -2; - return x; -} - -Py_hash_t -PyObject_HashNotImplemented(PyObject *v) -{ - PyErr_Format(PyExc_TypeError, "unhashable type: '%.200s'", - Py_TYPE(v)->tp_name); - return -1; -} - -Py_hash_t -PyObject_Hash(PyObject *v) -{ - PyTypeObject *tp = Py_TYPE(v); - if (tp->tp_hash != NULL) - return (*tp->tp_hash)(v); - /* To keep to the general practice that inheriting - * solely from object in C code should work without - * an explicit call to PyType_Ready, we implicitly call - * PyType_Ready here and then check the tp_hash slot again - */ - if (tp->tp_dict == NULL) { - if (PyType_Ready(tp) < 0) - return -1; - if (tp->tp_hash != NULL) - return (*tp->tp_hash)(v); - } - /* Otherwise, the object can't be hashed */ - return PyObject_HashNotImplemented(v); -} - -PyObject * -PyObject_GetAttrString(PyObject *v, const char *name) -{ - PyObject *w, *res; - - if (Py_TYPE(v)->tp_getattr != NULL) - return (*Py_TYPE(v)->tp_getattr)(v, (char*)name); - w = PyUnicode_InternFromString(name); - if (w == NULL) - return NULL; - res = PyObject_GetAttr(v, w); - Py_XDECREF(w); - return res; -} - -int -PyObject_HasAttrString(PyObject *v, const char *name) -{ - PyObject *res = PyObject_GetAttrString(v, name); - if (res != NULL) { - Py_DECREF(res); - return 1; - } - PyErr_Clear(); - return 0; -} - -int -PyObject_SetAttrString(PyObject *v, const char *name, PyObject *w) -{ - PyObject *s; - int res; - - if (Py_TYPE(v)->tp_setattr != NULL) - return (*Py_TYPE(v)->tp_setattr)(v, (char*)name, w); - s = PyUnicode_InternFromString(name); - if (s == NULL) - return -1; - res = PyObject_SetAttr(v, s, w); - Py_XDECREF(s); - return res; -} - -int -_PyObject_IsAbstract(PyObject *obj) -{ - int res; - PyObject* isabstract; - _Py_IDENTIFIER(__isabstractmethod__); - - if (obj == NULL) - return 0; - - isabstract = _PyObject_GetAttrId(obj, &PyId___isabstractmethod__); - if (isabstract == NULL) { - if (PyErr_ExceptionMatches(PyExc_AttributeError)) { - PyErr_Clear(); - return 0; - } - return -1; - } - res = PyObject_IsTrue(isabstract); - Py_DECREF(isabstract); - return res; -} - -PyObject * -_PyObject_GetAttrId(PyObject *v, _Py_Identifier *name) -{ - PyObject *result; - PyObject *oname = _PyUnicode_FromId(name); /* borrowed */ - if (!oname) - return NULL; - result = PyObject_GetAttr(v, oname); - return result; -} - -int -_PyObject_HasAttrId(PyObject *v, _Py_Identifier *name) -{ - int result; - PyObject *oname = _PyUnicode_FromId(name); /* borrowed */ - if (!oname) - return -1; - result = PyObject_HasAttr(v, oname); - return result; -} - -int -_PyObject_SetAttrId(PyObject *v, _Py_Identifier *name, PyObject *w) -{ - int result; - PyObject *oname = _PyUnicode_FromId(name); /* borrowed */ - if (!oname) - return -1; - result = PyObject_SetAttr(v, oname, w); - return result; -} - -PyObject * -PyObject_GetAttr(PyObject *v, PyObject *name) -{ - PyTypeObject *tp = Py_TYPE(v); - - if (!PyUnicode_Check(name)) { - PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - name->ob_type->tp_name); - return NULL; - } - if (tp->tp_getattro != NULL) - return (*tp->tp_getattro)(v, name); - if (tp->tp_getattr != NULL) { - char *name_str = _PyUnicode_AsString(name); - if (name_str == NULL) - return NULL; - return (*tp->tp_getattr)(v, name_str); - } - PyErr_Format(PyExc_AttributeError, - "'%.50s' object has no attribute '%U'", - tp->tp_name, name); - return NULL; -} - -int -PyObject_HasAttr(PyObject *v, PyObject *name) -{ - PyObject *res = PyObject_GetAttr(v, name); - if (res != NULL) { - Py_DECREF(res); - return 1; - } - PyErr_Clear(); - return 0; -} - -int -PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) -{ - PyTypeObject *tp = Py_TYPE(v); - int err; - - if (!PyUnicode_Check(name)) { - PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - name->ob_type->tp_name); - return -1; - } - Py_INCREF(name); - - PyUnicode_InternInPlace(&name); - if (tp->tp_setattro != NULL) { - err = (*tp->tp_setattro)(v, name, value); - Py_DECREF(name); - return err; - } - if (tp->tp_setattr != NULL) { - char *name_str = _PyUnicode_AsString(name); - if (name_str == NULL) - return -1; - err = (*tp->tp_setattr)(v, name_str, value); - Py_DECREF(name); - return err; - } - Py_DECREF(name); - assert(name->ob_refcnt >= 1); - if (tp->tp_getattr == NULL && tp->tp_getattro == NULL) - PyErr_Format(PyExc_TypeError, - "'%.100s' object has no attributes " - "(%s .%U)", - tp->tp_name, - value==NULL ? "del" : "assign to", - name); - else - PyErr_Format(PyExc_TypeError, - "'%.100s' object has only read-only attributes " - "(%s .%U)", - tp->tp_name, - value==NULL ? "del" : "assign to", - name); - return -1; -} - -/* Helper to get a pointer to an object's __dict__ slot, if any */ - -PyObject ** -_PyObject_GetDictPtr(PyObject *obj) -{ - Py_ssize_t dictoffset; - PyTypeObject *tp = Py_TYPE(obj); - - dictoffset = tp->tp_dictoffset; - if (dictoffset == 0) - return NULL; - if (dictoffset < 0) { - Py_ssize_t tsize; - size_t size; - - tsize = ((PyVarObject *)obj)->ob_size; - if (tsize < 0) - tsize = -tsize; - size = _PyObject_VAR_SIZE(tp, tsize); - - dictoffset += (long)size; - assert(dictoffset > 0); - assert(dictoffset % SIZEOF_VOID_P == 0); - } - return (PyObject **) ((char *)obj + dictoffset); -} - -PyObject * -PyObject_SelfIter(PyObject *obj) -{ - Py_INCREF(obj); - return obj; -} - -/* Helper used when the __next__ method is removed from a type: - tp_iternext is never NULL and can be safely called without checking - on every iteration. - */ - -PyObject * -_PyObject_NextNotImplemented(PyObject *self) -{ - PyErr_Format(PyExc_TypeError, - "'%.200s' object is not iterable", - Py_TYPE(self)->tp_name); - return NULL; -} - -/* Generic GetAttr functions - put these in your tp_[gs]etattro slot */ - -PyObject * -_PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name, PyObject *dict) -{ - PyTypeObject *tp = Py_TYPE(obj); - PyObject *descr = NULL; - PyObject *res = NULL; - descrgetfunc f; - Py_ssize_t dictoffset; - PyObject **dictptr; - - if (!PyUnicode_Check(name)){ - PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - name->ob_type->tp_name); - return NULL; - } - else - Py_INCREF(name); - - if (tp->tp_dict == NULL) { - if (PyType_Ready(tp) < 0) - goto done; - } - - descr = _PyType_Lookup(tp, name); - Py_XINCREF(descr); - - f = NULL; - if (descr != NULL) { - f = descr->ob_type->tp_descr_get; - if (f != NULL && PyDescr_IsData(descr)) { - res = f(descr, obj, (PyObject *)obj->ob_type); - Py_DECREF(descr); - goto done; - } - } - - if (dict == NULL) { - /* Inline _PyObject_GetDictPtr */ - dictoffset = tp->tp_dictoffset; - if (dictoffset != 0) { - if (dictoffset < 0) { - Py_ssize_t tsize; - size_t size; - - tsize = ((PyVarObject *)obj)->ob_size; - if (tsize < 0) - tsize = -tsize; - size = _PyObject_VAR_SIZE(tp, tsize); - - dictoffset += (long)size; - assert(dictoffset > 0); - assert(dictoffset % SIZEOF_VOID_P == 0); - } - dictptr = (PyObject **) ((char *)obj + dictoffset); - dict = *dictptr; - } - } - if (dict != NULL) { - Py_INCREF(dict); - res = PyDict_GetItem(dict, name); - if (res != NULL) { - Py_INCREF(res); - Py_XDECREF(descr); - Py_DECREF(dict); - goto done; - } - Py_DECREF(dict); - } - - if (f != NULL) { - res = f(descr, obj, (PyObject *)Py_TYPE(obj)); - Py_DECREF(descr); - goto done; - } - - if (descr != NULL) { - res = descr; - /* descr was already increfed above */ - goto done; - } - - PyErr_Format(PyExc_AttributeError, - "'%.50s' object has no attribute '%U'", - tp->tp_name, name); - done: - Py_DECREF(name); - return res; -} - -PyObject * -PyObject_GenericGetAttr(PyObject *obj, PyObject *name) -{ - return _PyObject_GenericGetAttrWithDict(obj, name, NULL); -} - -int -_PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name, - PyObject *value, PyObject *dict) -{ - PyTypeObject *tp = Py_TYPE(obj); - PyObject *descr; - descrsetfunc f; - PyObject **dictptr; - int res = -1; - - if (!PyUnicode_Check(name)){ - PyErr_Format(PyExc_TypeError, - "attribute name must be string, not '%.200s'", - name->ob_type->tp_name); - return -1; - } - else - Py_INCREF(name); - - if (tp->tp_dict == NULL) { - if (PyType_Ready(tp) < 0) - goto done; - } - - descr = _PyType_Lookup(tp, name); - f = NULL; - if (descr != NULL) { - f = descr->ob_type->tp_descr_set; - if (f != NULL && PyDescr_IsData(descr)) { - res = f(descr, obj, value); - goto done; - } - } - - if (dict == NULL) { - dictptr = _PyObject_GetDictPtr(obj); - if (dictptr != NULL) { - dict = *dictptr; - if (dict == NULL && value != NULL) { - dict = PyDict_New(); - if (dict == NULL) - goto done; - *dictptr = dict; - } - } - } - if (dict != NULL) { - Py_INCREF(dict); - if (value == NULL) - res = PyDict_DelItem(dict, name); - else - res = PyDict_SetItem(dict, name, value); - if (res < 0 && PyErr_ExceptionMatches(PyExc_KeyError)) - PyErr_SetObject(PyExc_AttributeError, name); - Py_DECREF(dict); - goto done; - } - - if (f != NULL) { - res = f(descr, obj, value); - goto done; - } - - if (descr == NULL) { - PyErr_Format(PyExc_AttributeError, - "'%.100s' object has no attribute '%U'", - tp->tp_name, name); - goto done; - } - - PyErr_Format(PyExc_AttributeError, - "'%.50s' object attribute '%U' is read-only", - tp->tp_name, name); - done: - Py_DECREF(name); - return res; -} - -int -PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value) -{ - return _PyObject_GenericSetAttrWithDict(obj, name, value, NULL); -} - - -/* Test a value used as condition, e.g., in a for or if statement. - Return -1 if an error occurred */ - -int -PyObject_IsTrue(PyObject *v) -{ - Py_ssize_t res; - if (v == Py_True) - return 1; - if (v == Py_False) - return 0; - if (v == Py_None) - return 0; - else if (v->ob_type->tp_as_number != NULL && - v->ob_type->tp_as_number->nb_bool != NULL) - res = (*v->ob_type->tp_as_number->nb_bool)(v); - else if (v->ob_type->tp_as_mapping != NULL && - v->ob_type->tp_as_mapping->mp_length != NULL) - res = (*v->ob_type->tp_as_mapping->mp_length)(v); - else if (v->ob_type->tp_as_sequence != NULL && - v->ob_type->tp_as_sequence->sq_length != NULL) - res = (*v->ob_type->tp_as_sequence->sq_length)(v); - else - return 1; - /* if it is negative, it should be either -1 or -2 */ - return (res > 0) ? 1 : Py_SAFE_DOWNCAST(res, Py_ssize_t, int); -} - -/* equivalent of 'not v' - Return -1 if an error occurred */ - -int -PyObject_Not(PyObject *v) -{ - int res; - res = PyObject_IsTrue(v); - if (res < 0) - return res; - return res == 0; -} - -/* Test whether an object can be called */ - -int -PyCallable_Check(PyObject *x) -{ - if (x == NULL) - return 0; - return x->ob_type->tp_call != NULL; -} - - -/* Helper for PyObject_Dir without arguments: returns the local scope. */ -static PyObject * -_dir_locals(void) -{ - PyObject *names; - PyObject *locals = PyEval_GetLocals(); - - if (locals == NULL) { - PyErr_SetString(PyExc_SystemError, "frame does not exist"); - return NULL; - } - - names = PyMapping_Keys(locals); - if (!names) - return NULL; - if (!PyList_Check(names)) { - PyErr_Format(PyExc_TypeError, - "dir(): expected keys() of locals to be a list, " - "not '%.200s'", Py_TYPE(names)->tp_name); - Py_DECREF(names); - return NULL; - } - if (PyList_Sort(names)) { - Py_DECREF(names); - return NULL; - } - /* the locals don't need to be DECREF'd */ - return names; -} - -/* Helper for PyObject_Dir: object introspection. */ -static PyObject * -_dir_object(PyObject *obj) -{ - PyObject *result, *sorted; - static PyObject *dir_str = NULL; - PyObject *dirfunc = _PyObject_LookupSpecial(obj, "__dir__", &dir_str); - - assert(obj); - if (dirfunc == NULL) { - if (!PyErr_Occurred()) - PyErr_SetString(PyExc_TypeError, "object does not provide __dir__"); - return NULL; - } - /* use __dir__ */ - result = PyObject_CallFunctionObjArgs(dirfunc, NULL); - Py_DECREF(dirfunc); - if (result == NULL) - return NULL; - /* return sorted(result) */ - sorted = PySequence_List(result); - Py_DECREF(result); - if (sorted == NULL) - return NULL; - if (PyList_Sort(sorted)) { - Py_DECREF(sorted); - return NULL; - } - return sorted; -} - -/* Implementation of dir() -- if obj is NULL, returns the names in the current - (local) scope. Otherwise, performs introspection of the object: returns a - sorted list of attribute names (supposedly) accessible from the object -*/ -PyObject * -PyObject_Dir(PyObject *obj) -{ - return (obj == NULL) ? _dir_locals() : _dir_object(obj); -} - -/* -None is a non-NULL undefined value. -There is (and should be!) no way to create other objects of this type, -so there is exactly one (which is indestructible, by the way). -*/ - -/* ARGSUSED */ -static PyObject * -none_repr(PyObject *op) -{ - return PyUnicode_FromString("None"); -} - -/* ARGUSED */ -static void -none_dealloc(PyObject* ignore) -{ - /* This should never get called, but we also don't want to SEGV if - * we accidentally decref None out of existence. - */ - Py_FatalError("deallocating None"); -} - -static PyObject * -none_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) -{ - if (PyTuple_GET_SIZE(args) || (kwargs && PyDict_Size(kwargs))) { - PyErr_SetString(PyExc_TypeError, "NoneType takes no arguments"); - return NULL; - } - Py_RETURN_NONE; -} - -static int -none_bool(PyObject *v) -{ - return 0; -} - -static PyNumberMethods none_as_number = { - 0, /* nb_add */ - 0, /* nb_subtract */ - 0, /* nb_multiply */ - 0, /* nb_remainder */ - 0, /* nb_divmod */ - 0, /* nb_power */ - 0, /* nb_negative */ - 0, /* nb_positive */ - 0, /* nb_absolute */ - (inquiry)none_bool, /* nb_bool */ - 0, /* nb_invert */ - 0, /* nb_lshift */ - 0, /* nb_rshift */ - 0, /* nb_and */ - 0, /* nb_xor */ - 0, /* nb_or */ - 0, /* nb_int */ - 0, /* nb_reserved */ - 0, /* nb_float */ - 0, /* nb_inplace_add */ - 0, /* nb_inplace_subtract */ - 0, /* nb_inplace_multiply */ - 0, /* nb_inplace_remainder */ - 0, /* nb_inplace_power */ - 0, /* nb_inplace_lshift */ - 0, /* nb_inplace_rshift */ - 0, /* nb_inplace_and */ - 0, /* nb_inplace_xor */ - 0, /* nb_inplace_or */ - 0, /* nb_floor_divide */ - 0, /* nb_true_divide */ - 0, /* nb_inplace_floor_divide */ - 0, /* nb_inplace_true_divide */ - 0, /* nb_index */ -}; - -static PyTypeObject PyNone_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "NoneType", - 0, - 0, - none_dealloc, /*tp_dealloc*/ /*never called*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_reserved*/ - none_repr, /*tp_repr*/ - &none_as_number, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call */ - 0, /*tp_str */ - 0, /*tp_getattro */ - 0, /*tp_setattro */ - 0, /*tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /*tp_flags */ - 0, /*tp_doc */ - 0, /*tp_traverse */ - 0, /*tp_clear */ - 0, /*tp_richcompare */ - 0, /*tp_weaklistoffset */ - 0, /*tp_iter */ - 0, /*tp_iternext */ - 0, /*tp_methods */ - 0, /*tp_members */ - 0, /*tp_getset */ - 0, /*tp_base */ - 0, /*tp_dict */ - 0, /*tp_descr_get */ - 0, /*tp_descr_set */ - 0, /*tp_dictoffset */ - 0, /*tp_init */ - 0, /*tp_alloc */ - none_new, /*tp_new */ -}; - -PyObject _Py_NoneStruct = { - _PyObject_EXTRA_INIT - 1, &PyNone_Type -}; - -/* NotImplemented is an object that can be used to signal that an - operation is not implemented for the given type combination. */ - -static PyObject * -NotImplemented_repr(PyObject *op) -{ - return PyUnicode_FromString("NotImplemented"); -} - -static PyObject * -notimplemented_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) -{ - if (PyTuple_GET_SIZE(args) || (kwargs && PyDict_Size(kwargs))) { - PyErr_SetString(PyExc_TypeError, "NotImplementedType takes no arguments"); - return NULL; - } - Py_RETURN_NOTIMPLEMENTED; -} - -static PyTypeObject PyNotImplemented_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "NotImplementedType", - 0, - 0, - none_dealloc, /*tp_dealloc*/ /*never called*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_reserved*/ - NotImplemented_repr, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call */ - 0, /*tp_str */ - 0, /*tp_getattro */ - 0, /*tp_setattro */ - 0, /*tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /*tp_flags */ - 0, /*tp_doc */ - 0, /*tp_traverse */ - 0, /*tp_clear */ - 0, /*tp_richcompare */ - 0, /*tp_weaklistoffset */ - 0, /*tp_iter */ - 0, /*tp_iternext */ - 0, /*tp_methods */ - 0, /*tp_members */ - 0, /*tp_getset */ - 0, /*tp_base */ - 0, /*tp_dict */ - 0, /*tp_descr_get */ - 0, /*tp_descr_set */ - 0, /*tp_dictoffset */ - 0, /*tp_init */ - 0, /*tp_alloc */ - notimplemented_new, /*tp_new */ -}; - -PyObject _Py_NotImplementedStruct = { - _PyObject_EXTRA_INIT - 1, &PyNotImplemented_Type -}; - -void -_Py_ReadyTypes(void) -{ - if (PyType_Ready(&PyType_Type) < 0) - Py_FatalError("Can't initialize type type"); - - if (PyType_Ready(&_PyWeakref_RefType) < 0) - Py_FatalError("Can't initialize weakref type"); - - if (PyType_Ready(&_PyWeakref_CallableProxyType) < 0) - Py_FatalError("Can't initialize callable weakref proxy type"); - - if (PyType_Ready(&_PyWeakref_ProxyType) < 0) - Py_FatalError("Can't initialize weakref proxy type"); - - if (PyType_Ready(&PyBool_Type) < 0) - Py_FatalError("Can't initialize bool type"); - - if (PyType_Ready(&PyByteArray_Type) < 0) - Py_FatalError("Can't initialize bytearray type"); - - if (PyType_Ready(&PyBytes_Type) < 0) - Py_FatalError("Can't initialize 'str'"); - - if (PyType_Ready(&PyList_Type) < 0) - Py_FatalError("Can't initialize list type"); - - if (PyType_Ready(&PyNone_Type) < 0) - Py_FatalError("Can't initialize None type"); - - if (PyType_Ready(&PyNotImplemented_Type) < 0) - Py_FatalError("Can't initialize NotImplemented type"); - - if (PyType_Ready(&PyTraceBack_Type) < 0) - Py_FatalError("Can't initialize traceback type"); - - if (PyType_Ready(&PySuper_Type) < 0) - Py_FatalError("Can't initialize super type"); - - if (PyType_Ready(&PyBaseObject_Type) < 0) - Py_FatalError("Can't initialize object type"); - - if (PyType_Ready(&PyRange_Type) < 0) - Py_FatalError("Can't initialize range type"); - - if (PyType_Ready(&PyDict_Type) < 0) - Py_FatalError("Can't initialize dict type"); - - if (PyType_Ready(&PySet_Type) < 0) - Py_FatalError("Can't initialize set type"); - - if (PyType_Ready(&PyUnicode_Type) < 0) - Py_FatalError("Can't initialize str type"); - - if (PyType_Ready(&PySlice_Type) < 0) - Py_FatalError("Can't initialize slice type"); - - if (PyType_Ready(&PyStaticMethod_Type) < 0) - Py_FatalError("Can't initialize static method type"); - - if (PyType_Ready(&PyComplex_Type) < 0) - Py_FatalError("Can't initialize complex type"); - - if (PyType_Ready(&PyFloat_Type) < 0) - Py_FatalError("Can't initialize float type"); - - if (PyType_Ready(&PyLong_Type) < 0) - Py_FatalError("Can't initialize int type"); - - if (PyType_Ready(&PyFrozenSet_Type) < 0) - Py_FatalError("Can't initialize frozenset type"); - - if (PyType_Ready(&PyProperty_Type) < 0) - Py_FatalError("Can't initialize property type"); - - if (PyType_Ready(&PyMemoryView_Type) < 0) - Py_FatalError("Can't initialize memoryview type"); - - if (PyType_Ready(&PyTuple_Type) < 0) - Py_FatalError("Can't initialize tuple type"); - - if (PyType_Ready(&PyEnum_Type) < 0) - Py_FatalError("Can't initialize enumerate type"); - - if (PyType_Ready(&PyReversed_Type) < 0) - Py_FatalError("Can't initialize reversed type"); - - if (PyType_Ready(&PyStdPrinter_Type) < 0) - Py_FatalError("Can't initialize StdPrinter"); - - if (PyType_Ready(&PyCode_Type) < 0) - Py_FatalError("Can't initialize code type"); - - if (PyType_Ready(&PyFrame_Type) < 0) - Py_FatalError("Can't initialize frame type"); - - if (PyType_Ready(&PyCFunction_Type) < 0) - Py_FatalError("Can't initialize builtin function type"); - - if (PyType_Ready(&PyMethod_Type) < 0) - Py_FatalError("Can't initialize method type"); - - if (PyType_Ready(&PyFunction_Type) < 0) - Py_FatalError("Can't initialize function type"); - - if (PyType_Ready(&PyDictProxy_Type) < 0) - Py_FatalError("Can't initialize dict proxy type"); - - if (PyType_Ready(&PyGen_Type) < 0) - Py_FatalError("Can't initialize generator type"); - - if (PyType_Ready(&PyGetSetDescr_Type) < 0) - Py_FatalError("Can't initialize get-set descriptor type"); - - if (PyType_Ready(&PyWrapperDescr_Type) < 0) - Py_FatalError("Can't initialize wrapper type"); - - if (PyType_Ready(&_PyMethodWrapper_Type) < 0) - Py_FatalError("Can't initialize method wrapper type"); - - if (PyType_Ready(&PyEllipsis_Type) < 0) - Py_FatalError("Can't initialize ellipsis type"); - - if (PyType_Ready(&PyMemberDescr_Type) < 0) - Py_FatalError("Can't initialize member descriptor type"); - - if (PyType_Ready(&PyFilter_Type) < 0) - Py_FatalError("Can't initialize filter type"); - - if (PyType_Ready(&PyMap_Type) < 0) - Py_FatalError("Can't initialize map type"); - - if (PyType_Ready(&PyZip_Type) < 0) - Py_FatalError("Can't initialize zip type"); -} - - -#ifdef Py_TRACE_REFS - -void -_Py_NewReference(PyObject *op) -{ - _Py_INC_REFTOTAL; - op->ob_refcnt = 1; - _Py_AddToAllObjects(op, 1); - _Py_INC_TPALLOCS(op); -} - -void -_Py_ForgetReference(register PyObject *op) -{ -#ifdef SLOW_UNREF_CHECK - register PyObject *p; -#endif - if (op->ob_refcnt < 0) - Py_FatalError("UNREF negative refcnt"); - if (op == &refchain || - op->_ob_prev->_ob_next != op || op->_ob_next->_ob_prev != op) { - fprintf(stderr, "* ob\n"); - _PyObject_Dump(op); - fprintf(stderr, "* op->_ob_prev->_ob_next\n"); - _PyObject_Dump(op->_ob_prev->_ob_next); - fprintf(stderr, "* op->_ob_next->_ob_prev\n"); - _PyObject_Dump(op->_ob_next->_ob_prev); - Py_FatalError("UNREF invalid object"); - } -#ifdef SLOW_UNREF_CHECK - for (p = refchain._ob_next; p != &refchain; p = p->_ob_next) { - if (p == op) - break; - } - if (p == &refchain) /* Not found */ - Py_FatalError("UNREF unknown object"); -#endif - op->_ob_next->_ob_prev = op->_ob_prev; - op->_ob_prev->_ob_next = op->_ob_next; - op->_ob_next = op->_ob_prev = NULL; - _Py_INC_TPFREES(op); -} - -void -_Py_Dealloc(PyObject *op) -{ - destructor dealloc = Py_TYPE(op)->tp_dealloc; - _Py_ForgetReference(op); - (*dealloc)(op); -} - -/* Print all live objects. Because PyObject_Print is called, the - * interpreter must be in a healthy state. - */ -void -_Py_PrintReferences(FILE *fp) -{ - PyObject *op; - fprintf(fp, "Remaining objects:\n"); - for (op = refchain._ob_next; op != &refchain; op = op->_ob_next) { - fprintf(fp, "%p [%" PY_FORMAT_SIZE_T "d] ", op, op->ob_refcnt); - if (PyObject_Print(op, fp, 0) != 0) - PyErr_Clear(); - putc('\n', fp); - } -} - -/* Print the addresses of all live objects. Unlike _Py_PrintReferences, this - * doesn't make any calls to the Python C API, so is always safe to call. - */ -void -_Py_PrintReferenceAddresses(FILE *fp) -{ - PyObject *op; - fprintf(fp, "Remaining object addresses:\n"); - for (op = refchain._ob_next; op != &refchain; op = op->_ob_next) - fprintf(fp, "%p [%" PY_FORMAT_SIZE_T "d] %s\n", op, - op->ob_refcnt, Py_TYPE(op)->tp_name); -} - -PyObject * -_Py_GetObjects(PyObject *self, PyObject *args) -{ - int i, n; - PyObject *t = NULL; - PyObject *res, *op; - - if (!PyArg_ParseTuple(args, "i|O", &n, &t)) - return NULL; - op = refchain._ob_next; - res = PyList_New(0); - if (res == NULL) - return NULL; - for (i = 0; (n == 0 || i < n) && op != &refchain; i++) { - while (op == self || op == args || op == res || op == t || - (t != NULL && Py_TYPE(op) != (PyTypeObject *) t)) { - op = op->_ob_next; - if (op == &refchain) - return res; - } - if (PyList_Append(res, op) < 0) { - Py_DECREF(res); - return NULL; - } - op = op->_ob_next; - } - return res; -} - -#endif - -/* Hack to force loading of pycapsule.o */ -PyTypeObject *_PyCapsule_hack = &PyCapsule_Type; - - -/* Hack to force loading of abstract.o */ -Py_ssize_t (*_Py_abstract_hack)(PyObject *) = PyObject_Size; - - -/* Python's malloc wrappers (see pymem.h) */ - -void * -PyMem_Malloc(size_t nbytes) -{ - return PyMem_MALLOC(nbytes); -} - -void * -PyMem_Realloc(void *p, size_t nbytes) -{ - return PyMem_REALLOC(p, nbytes); -} - -void -PyMem_Free(void *p) -{ - PyMem_FREE(p); -} - - -/* These methods are used to control infinite recursion in repr, str, print, - etc. Container objects that may recursively contain themselves, - e.g. builtin dictionaries and lists, should used Py_ReprEnter() and - Py_ReprLeave() to avoid infinite recursion. - - Py_ReprEnter() returns 0 the first time it is called for a particular - object and 1 every time thereafter. It returns -1 if an exception - occurred. Py_ReprLeave() has no return value. - - See dictobject.c and listobject.c for examples of use. -*/ - -#define KEY "Py_Repr" - -int -Py_ReprEnter(PyObject *obj) -{ - PyObject *dict; - PyObject *list; - Py_ssize_t i; - - dict = PyThreadState_GetDict(); - if (dict == NULL) - return 0; - list = PyDict_GetItemString(dict, KEY); - if (list == NULL) { - list = PyList_New(0); - if (list == NULL) - return -1; - if (PyDict_SetItemString(dict, KEY, list) < 0) - return -1; - Py_DECREF(list); - } - i = PyList_GET_SIZE(list); - while (--i >= 0) { - if (PyList_GET_ITEM(list, i) == obj) - return 1; - } - PyList_Append(list, obj); - return 0; -} - -void -Py_ReprLeave(PyObject *obj) -{ - PyObject *dict; - PyObject *list; - Py_ssize_t i; - - dict = PyThreadState_GetDict(); - if (dict == NULL) - return; - list = PyDict_GetItemString(dict, KEY); - if (list == NULL || !PyList_Check(list)) - return; - i = PyList_GET_SIZE(list); - /* Count backwards because we always expect obj to be list[-1] */ - while (--i >= 0) { - if (PyList_GET_ITEM(list, i) == obj) { - PyList_SetSlice(list, i, i + 1, NULL); - break; - } - } -} - -/* Trashcan support. */ - -/* Current call-stack depth of tp_dealloc calls. */ -int _PyTrash_delete_nesting = 0; - -/* List of objects that still need to be cleaned up, singly linked via their - * gc headers' gc_prev pointers. - */ -PyObject *_PyTrash_delete_later = NULL; - -/* Add op to the _PyTrash_delete_later list. Called when the current - * call-stack depth gets large. op must be a currently untracked gc'ed - * object, with refcount 0. Py_DECREF must already have been called on it. - */ -void -_PyTrash_deposit_object(PyObject *op) -{ - assert(PyObject_IS_GC(op)); - assert(_Py_AS_GC(op)->gc.gc_refs == _PyGC_REFS_UNTRACKED); - assert(op->ob_refcnt == 0); - _Py_AS_GC(op)->gc.gc_prev = (PyGC_Head *)_PyTrash_delete_later; - _PyTrash_delete_later = op; -} - -/* Dealloccate all the objects in the _PyTrash_delete_later list. Called when - * the call-stack unwinds again. - */ -void -_PyTrash_destroy_chain(void) -{ - while (_PyTrash_delete_later) { - PyObject *op = _PyTrash_delete_later; - destructor dealloc = Py_TYPE(op)->tp_dealloc; - - _PyTrash_delete_later = - (PyObject*) _Py_AS_GC(op)->gc.gc_prev; - - /* Call the deallocator directly. This used to try to - * fool Py_DECREF into calling it indirectly, but - * Py_DECREF was already called on this object, and in - * assorted non-release builds calling Py_DECREF again ends - * up distorting allocation statistics. - */ - assert(op->ob_refcnt == 0); - ++_PyTrash_delete_nesting; - (*dealloc)(op); - --_PyTrash_delete_nesting; - } -} - -#ifndef Py_TRACE_REFS -/* For Py_LIMITED_API, we need an out-of-line version of _Py_Dealloc. - Define this here, so we can undefine the macro. */ -#undef _Py_Dealloc -PyAPI_FUNC(void) _Py_Dealloc(PyObject *); -void -_Py_Dealloc(PyObject *op) -{ - _Py_INC_TPFREES(op) _Py_COUNT_ALLOCS_COMMA - (*Py_TYPE(op)->tp_dealloc)(op); -} -#endif -
--- a/cos/python/Objects/tupleobject.c Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,907 +0,0 @@ - -/* Tuple object implementation */ - -#include "Python.h" - -PyObject * -PyTuple_New(register Py_ssize_t size) -{ - register PyTupleObject *op; - Py_ssize_t i; - if (size < 0) { - PyErr_BadInternalCall(); - return NULL; - } - { - Py_ssize_t nbytes = size * sizeof(PyObject *); - /* Check for overflow */ - if (nbytes / sizeof(PyObject *) != (size_t)size || - (nbytes > PY_SSIZE_T_MAX - sizeof(PyTupleObject) - sizeof(PyObject *))) - { - return PyErr_NoMemory(); - } - /* nbytes += sizeof(PyTupleObject) - sizeof(PyObject *); */ - - op = PyObject_GC_NewVar(PyTupleObject, &PyTuple_Type, size); - if (op == NULL) - return NULL; - } - for (i=0; i < size; i++) - op->ob_item[i] = NULL; - _PyObject_GC_TRACK(op); - return (PyObject *) op; -} - -Py_ssize_t -PyTuple_Size(register PyObject *op) -{ - if (!PyTuple_Check(op)) { - PyErr_BadInternalCall(); - return -1; - } - else - return Py_SIZE(op); -} - -PyObject * -PyTuple_GetItem(register PyObject *op, register Py_ssize_t i) -{ - if (!PyTuple_Check(op)) { - PyErr_BadInternalCall(); - return NULL; - } - if (i < 0 || i >= Py_SIZE(op)) { - PyErr_SetString(PyExc_IndexError, "tuple index out of range"); - return NULL; - } - return ((PyTupleObject *)op) -> ob_item[i]; -} - -int -PyTuple_SetItem(register PyObject *op, register Py_ssize_t i, PyObject *newitem) -{ - register PyObject *olditem; - register PyObject **p; - if (!PyTuple_Check(op) || op->ob_refcnt != 1) { - Py_XDECREF(newitem); - PyErr_BadInternalCall(); - return -1; - } - if (i < 0 || i >= Py_SIZE(op)) { - Py_XDECREF(newitem); - PyErr_SetString(PyExc_IndexError, - "tuple assignment index out of range"); - return -1; - } - p = ((PyTupleObject *)op) -> ob_item + i; - olditem = *p; - *p = newitem; - Py_XDECREF(olditem); - return 0; -} - -void -_PyTuple_MaybeUntrack(PyObject *op) -{ - PyTupleObject *t; - Py_ssize_t i, n; - - if (!PyTuple_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op)) - return; - t = (PyTupleObject *) op; - n = Py_SIZE(t); - for (i = 0; i < n; i++) { - PyObject *elt = PyTuple_GET_ITEM(t, i); - /* Tuple with NULL elements aren't - fully constructed, don't untrack - them yet. */ - if (!elt || - _PyObject_GC_MAY_BE_TRACKED(elt)) - return; - } - _PyObject_GC_UNTRACK(op); -} - -PyObject * -PyTuple_Pack(Py_ssize_t n, ...) -{ - Py_ssize_t i; - PyObject *o; - PyObject *result; - PyObject **items; - va_list vargs; - - va_start(vargs, n); - result = PyTuple_New(n); - if (result == NULL) - return NULL; - items = ((PyTupleObject *)result)->ob_item; - for (i = 0; i < n; i++) { - o = va_arg(vargs, PyObject *); - Py_INCREF(o); - items[i] = o; - } - va_end(vargs); - return result; -} - - -/* Methods */ - -static void -tupledealloc(register PyTupleObject *op) -{ - register Py_ssize_t i; - register Py_ssize_t len = Py_SIZE(op); - PyObject_GC_UnTrack(op); - Py_TRASHCAN_SAFE_BEGIN(op) - if (len > 0) { - i = len; - while (--i >= 0) - Py_XDECREF(op->ob_item[i]); - } - Py_TYPE(op)->tp_free((PyObject *)op); -done: - Py_TRASHCAN_SAFE_END(op) -} - -static PyObject * -tuplerepr(PyTupleObject *v) -{ - Py_ssize_t i, n; - PyObject *s = NULL; - _PyAccu acc; - static PyObject *sep = NULL; - - n = Py_SIZE(v); - if (n == 0) - return PyUnicode_FromString("()"); - - if (sep == NULL) { - sep = PyUnicode_FromString(", "); - if (sep == NULL) - return NULL; - } - - /* While not mutable, it is still possible to end up with a cycle in a - tuple through an object that stores itself within a tuple (and thus - infinitely asks for the repr of itself). This should only be - possible within a type. */ - i = Py_ReprEnter((PyObject *)v); - if (i != 0) { - return i > 0 ? PyUnicode_FromString("(...)") : NULL; - } - - if (_PyAccu_Init(&acc)) - goto error; - - s = PyUnicode_FromString("("); - if (s == NULL || _PyAccu_Accumulate(&acc, s)) - goto error; - Py_CLEAR(s); - - /* Do repr() on each element. */ - for (i = 0; i < n; ++i) { - if (Py_EnterRecursiveCall(" while getting the repr of a tuple")) - goto error; - s = PyObject_Repr(v->ob_item[i]); - Py_LeaveRecursiveCall(); - if (i > 0 && _PyAccu_Accumulate(&acc, sep)) - goto error; - if (s == NULL || _PyAccu_Accumulate(&acc, s)) - goto error; - Py_CLEAR(s); - } - if (n > 1) - s = PyUnicode_FromString(")"); - else - s = PyUnicode_FromString(",)"); - if (s == NULL || _PyAccu_Accumulate(&acc, s)) - goto error; - Py_CLEAR(s); - - Py_ReprLeave((PyObject *)v); - return _PyAccu_Finish(&acc); - -error: - _PyAccu_Destroy(&acc); - Py_XDECREF(s); - Py_ReprLeave((PyObject *)v); - return NULL; -} - -/* The addend 82520, was selected from the range(0, 1000000) for - generating the greatest number of prime multipliers for tuples - upto length eight: - - 1082527, 1165049, 1082531, 1165057, 1247581, 1330103, 1082533, - 1330111, 1412633, 1165069, 1247599, 1495177, 1577699 -*/ - -static Py_hash_t -tuplehash(PyTupleObject *v) -{ - register Py_uhash_t x; - register Py_hash_t y; - register Py_ssize_t len = Py_SIZE(v); - register PyObject **p; - Py_uhash_t mult = 1000003; - x = 0x345678; - p = v->ob_item; - while (--len >= 0) { - y = PyObject_Hash(*p++); - if (y == -1) - return -1; - x = (x ^ y) * mult; - /* the cast might truncate len; that doesn't change hash stability */ - mult += (Py_hash_t)(82520L + len + len); - } - x += 97531L; - if (x == (Py_uhash_t)-1) - x = -2; - return x; -} - -static Py_ssize_t -tuplelength(PyTupleObject *a) -{ - return Py_SIZE(a); -} - -static int -tuplecontains(PyTupleObject *a, PyObject *el) -{ - Py_ssize_t i; - int cmp; - - for (i = 0, cmp = 0 ; cmp == 0 && i < Py_SIZE(a); ++i) - cmp = PyObject_RichCompareBool(el, PyTuple_GET_ITEM(a, i), - Py_EQ); - return cmp; -} - -static PyObject * -tupleitem(register PyTupleObject *a, register Py_ssize_t i) -{ - if (i < 0 || i >= Py_SIZE(a)) { - PyErr_SetString(PyExc_IndexError, "tuple index out of range"); - return NULL; - } - Py_INCREF(a->ob_item[i]); - return a->ob_item[i]; -} - -static PyObject * -tupleslice(register PyTupleObject *a, register Py_ssize_t ilow, - register Py_ssize_t ihigh) -{ - register PyTupleObject *np; - PyObject **src, **dest; - register Py_ssize_t i; - Py_ssize_t len; - if (ilow < 0) - ilow = 0; - if (ihigh > Py_SIZE(a)) - ihigh = Py_SIZE(a); - if (ihigh < ilow) - ihigh = ilow; - if (ilow == 0 && ihigh == Py_SIZE(a) && PyTuple_CheckExact(a)) { - Py_INCREF(a); - return (PyObject *)a; - } - len = ihigh - ilow; - np = (PyTupleObject *)PyTuple_New(len); - if (np == NULL) - return NULL; - src = a->ob_item + ilow; - dest = np->ob_item; - for (i = 0; i < len; i++) { - PyObject *v = src[i]; - Py_INCREF(v); - dest[i] = v; - } - return (PyObject *)np; -} - -PyObject * -PyTuple_GetSlice(PyObject *op, Py_ssize_t i, Py_ssize_t j) -{ - if (op == NULL || !PyTuple_Check(op)) { - PyErr_BadInternalCall(); - return NULL; - } - return tupleslice((PyTupleObject *)op, i, j); -} - -static PyObject * -tupleconcat(register PyTupleObject *a, register PyObject *bb) -{ - register Py_ssize_t size; - register Py_ssize_t i; - PyObject **src, **dest; - PyTupleObject *np; - if (!PyTuple_Check(bb)) { - PyErr_Format(PyExc_TypeError, - "can only concatenate tuple (not \"%.200s\") to tuple", - Py_TYPE(bb)->tp_name); - return NULL; - } -#define b ((PyTupleObject *)bb) - size = Py_SIZE(a) + Py_SIZE(b); - if (size < 0) - return PyErr_NoMemory(); - np = (PyTupleObject *) PyTuple_New(size); - if (np == NULL) { - return NULL; - } - src = a->ob_item; - dest = np->ob_item; - for (i = 0; i < Py_SIZE(a); i++) { - PyObject *v = src[i]; - Py_INCREF(v); - dest[i] = v; - } - src = b->ob_item; - dest = np->ob_item + Py_SIZE(a); - for (i = 0; i < Py_SIZE(b); i++) { - PyObject *v = src[i]; - Py_INCREF(v); - dest[i] = v; - } - return (PyObject *)np; -#undef b -} - -static PyObject * -tuplerepeat(PyTupleObject *a, Py_ssize_t n) -{ - Py_ssize_t i, j; - Py_ssize_t size; - PyTupleObject *np; - PyObject **p, **items; - if (n < 0) - n = 0; - if (Py_SIZE(a) == 0 || n == 1) { - if (PyTuple_CheckExact(a)) { - /* Since tuples are immutable, we can return a shared - copy in this case */ - Py_INCREF(a); - return (PyObject *)a; - } - if (Py_SIZE(a) == 0) - return PyTuple_New(0); - } - size = Py_SIZE(a) * n; - if (size/Py_SIZE(a) != n) - return PyErr_NoMemory(); - np = (PyTupleObject *) PyTuple_New(size); - if (np == NULL) - return NULL; - p = np->ob_item; - items = a->ob_item; - for (i = 0; i < n; i++) { - for (j = 0; j < Py_SIZE(a); j++) { - *p = items[j]; - Py_INCREF(*p); - p++; - } - } - return (PyObject *) np; -} - -static PyObject * -tupleindex(PyTupleObject *self, PyObject *args) -{ - Py_ssize_t i, start=0, stop=Py_SIZE(self); - PyObject *v; - - if (!PyArg_ParseTuple(args, "O|O&O&:index", &v, - _PyEval_SliceIndex, &start, - _PyEval_SliceIndex, &stop)) - return NULL; - if (start < 0) { - start += Py_SIZE(self); - if (start < 0) - start = 0; - } - if (stop < 0) { - stop += Py_SIZE(self); - if (stop < 0) - stop = 0; - } - for (i = start; i < stop && i < Py_SIZE(self); i++) { - int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ); - if (cmp > 0) - return PyLong_FromSsize_t(i); - else if (cmp < 0) - return NULL; - } - PyErr_SetString(PyExc_ValueError, "tuple.index(x): x not in tuple"); - return NULL; -} - -static PyObject * -tuplecount(PyTupleObject *self, PyObject *v) -{ - Py_ssize_t count = 0; - Py_ssize_t i; - - for (i = 0; i < Py_SIZE(self); i++) { - int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ); - if (cmp > 0) - count++; - else if (cmp < 0) - return NULL; - } - return PyLong_FromSsize_t(count); -} - -static int -tupletraverse(PyTupleObject *o, visitproc visit, void *arg) -{ - Py_ssize_t i; - - for (i = Py_SIZE(o); --i >= 0; ) - Py_VISIT(o->ob_item[i]); - return 0; -} - -static PyObject * -tuplerichcompare(PyObject *v, PyObject *w, int op) -{ - PyTupleObject *vt, *wt; - Py_ssize_t i; - Py_ssize_t vlen, wlen; - - if (!PyTuple_Check(v) || !PyTuple_Check(w)) - Py_RETURN_NOTIMPLEMENTED; - - vt = (PyTupleObject *)v; - wt = (PyTupleObject *)w; - - vlen = Py_SIZE(vt); - wlen = Py_SIZE(wt); - - /* Note: the corresponding code for lists has an "early out" test - * here when op is EQ or NE and the lengths differ. That pays there, - * but Tim was unable to find any real code where EQ/NE tuple - * compares don't have the same length, so testing for it here would - * have cost without benefit. - */ - - /* Search for the first index where items are different. - * Note that because tuples are immutable, it's safe to reuse - * vlen and wlen across the comparison calls. - */ - for (i = 0; i < vlen && i < wlen; i++) { - int k = PyObject_RichCompareBool(vt->ob_item[i], - wt->ob_item[i], Py_EQ); - if (k < 0) - return NULL; - if (!k) - break; - } - - if (i >= vlen || i >= wlen) { - /* No more items to compare -- compare sizes */ - int cmp; - PyObject *res; - switch (op) { - case Py_LT: cmp = vlen < wlen; break; - case Py_LE: cmp = vlen <= wlen; break; - case Py_EQ: cmp = vlen == wlen; break; - case Py_NE: cmp = vlen != wlen; break; - case Py_GT: cmp = vlen > wlen; break; - case Py_GE: cmp = vlen >= wlen; break; - default: return NULL; /* cannot happen */ - } - if (cmp) - res = Py_True; - else - res = Py_False; - Py_INCREF(res); - return res; - } - - /* We have an item that differs -- shortcuts for EQ/NE */ - if (op == Py_EQ) { - Py_INCREF(Py_False); - return Py_False; - } - if (op == Py_NE) { - Py_INCREF(Py_True); - return Py_True; - } - - /* Compare the final item again using the proper operator */ - return PyObject_RichCompare(vt->ob_item[i], wt->ob_item[i], op); -} - -static PyObject * -tuple_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); - -static PyObject * -tuple_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyObject *arg = NULL; - static char *kwlist[] = {"sequence", 0}; - - if (type != &PyTuple_Type) - return tuple_subtype_new(type, args, kwds); - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:tuple", kwlist, &arg)) - return NULL; - - if (arg == NULL) - return PyTuple_New(0); - else - return PySequence_Tuple(arg); -} - -static PyObject * -tuple_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyObject *tmp, *newobj, *item; - Py_ssize_t i, n; - - assert(PyType_IsSubtype(type, &PyTuple_Type)); - tmp = tuple_new(&PyTuple_Type, args, kwds); - if (tmp == NULL) - return NULL; - assert(PyTuple_Check(tmp)); - newobj = type->tp_alloc(type, n = PyTuple_GET_SIZE(tmp)); - if (newobj == NULL) - return NULL; - for (i = 0; i < n; i++) { - item = PyTuple_GET_ITEM(tmp, i); - Py_INCREF(item); - PyTuple_SET_ITEM(newobj, i, item); - } - Py_DECREF(tmp); - return newobj; -} - -PyDoc_STRVAR(tuple_doc, -"tuple() -> empty tuple\n\ -tuple(iterable) -> tuple initialized from iterable's items\n\ -\n\ -If the argument is a tuple, the return value is the same object."); - -static PySequenceMethods tuple_as_sequence = { - (lenfunc)tuplelength, /* sq_length */ - (binaryfunc)tupleconcat, /* sq_concat */ - (ssizeargfunc)tuplerepeat, /* sq_repeat */ - (ssizeargfunc)tupleitem, /* sq_item */ - 0, /* sq_slice */ - 0, /* sq_ass_item */ - 0, /* sq_ass_slice */ - (objobjproc)tuplecontains, /* sq_contains */ -}; - -static PyObject* -tuplesubscript(PyTupleObject* self, PyObject* item) -{ - if (PyIndex_Check(item)) { - Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); - if (i == -1 && PyErr_Occurred()) - return NULL; - if (i < 0) - i += PyTuple_GET_SIZE(self); - return tupleitem(self, i); - } - else if (PySlice_Check(item)) { - Py_ssize_t start, stop, step, slicelength, cur, i; - PyObject* result; - PyObject* it; - PyObject **src, **dest; - - if (PySlice_GetIndicesEx(item, - PyTuple_GET_SIZE(self), - &start, &stop, &step, &slicelength) < 0) { - return NULL; - } - - if (slicelength <= 0) { - return PyTuple_New(0); - } - else if (start == 0 && step == 1 && - slicelength == PyTuple_GET_SIZE(self) && - PyTuple_CheckExact(self)) { - Py_INCREF(self); - return (PyObject *)self; - } - else { - result = PyTuple_New(slicelength); - if (!result) return NULL; - - src = self->ob_item; - dest = ((PyTupleObject *)result)->ob_item; - for (cur = start, i = 0; i < slicelength; - cur += step, i++) { - it = src[cur]; - Py_INCREF(it); - dest[i] = it; - } - - return result; - } - } - else { - PyErr_Format(PyExc_TypeError, - "tuple indices must be integers, not %.200s", - Py_TYPE(item)->tp_name); - return NULL; - } -} - -static PyObject * -tuple_getnewargs(PyTupleObject *v) -{ - return Py_BuildValue("(N)", tupleslice(v, 0, Py_SIZE(v))); - -} - -static PyObject * -tuple_sizeof(PyTupleObject *self) -{ - Py_ssize_t res; - - res = PyTuple_Type.tp_basicsize + Py_SIZE(self) * sizeof(PyObject *); - return PyLong_FromSsize_t(res); -} - -PyDoc_STRVAR(index_doc, -"T.index(value, [start, [stop]]) -> integer -- return first index of value.\n" -"Raises ValueError if the value is not present." -); -PyDoc_STRVAR(count_doc, -"T.count(value) -> integer -- return number of occurrences of value"); -PyDoc_STRVAR(sizeof_doc, -"T.__sizeof__() -- size of T in memory, in bytes"); - -static PyMethodDef tuple_methods[] = { - {"__getnewargs__", (PyCFunction)tuple_getnewargs, METH_NOARGS}, - {"__sizeof__", (PyCFunction)tuple_sizeof, METH_NOARGS, sizeof_doc}, - {"index", (PyCFunction)tupleindex, METH_VARARGS, index_doc}, - {"count", (PyCFunction)tuplecount, METH_O, count_doc}, - {NULL, NULL} /* sentinel */ -}; - -static PyMappingMethods tuple_as_mapping = { - (lenfunc)tuplelength, - (binaryfunc)tuplesubscript, - 0 -}; - -static PyObject *tuple_iter(PyObject *seq); - -PyTypeObject PyTuple_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "tuple", - sizeof(PyTupleObject) - sizeof(PyObject *), - sizeof(PyObject *), - (destructor)tupledealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - (reprfunc)tuplerepr, /* tp_repr */ - 0, /* tp_as_number */ - &tuple_as_sequence, /* tp_as_sequence */ - &tuple_as_mapping, /* tp_as_mapping */ - (hashfunc)tuplehash, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | - Py_TPFLAGS_BASETYPE | Py_TPFLAGS_TUPLE_SUBCLASS, /* tp_flags */ - tuple_doc, /* tp_doc */ - (traverseproc)tupletraverse, /* tp_traverse */ - 0, /* tp_clear */ - tuplerichcompare, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - tuple_iter, /* tp_iter */ - 0, /* tp_iternext */ - tuple_methods, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - tuple_new, /* tp_new */ - PyObject_GC_Del, /* tp_free */ -}; - -/* The following function breaks the notion that tuples are immutable: - it changes the size of a tuple. We get away with this only if there - is only one module referencing the object. You can also think of it - as creating a new tuple object and destroying the old one, only more - efficiently. In any case, don't use this if the tuple may already be - known to some other part of the code. */ - -int -_PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) -{ - register PyTupleObject *v; - register PyTupleObject *sv; - Py_ssize_t i; - Py_ssize_t oldsize; - - v = (PyTupleObject *) *pv; - if (v == NULL || Py_TYPE(v) != &PyTuple_Type || - (Py_SIZE(v) != 0 && Py_REFCNT(v) != 1)) { - *pv = 0; - Py_XDECREF(v); - PyErr_BadInternalCall(); - return -1; - } - oldsize = Py_SIZE(v); - if (oldsize == newsize) - return 0; - - if (oldsize == 0) { - /* Empty tuples are often shared, so we should never - resize them in-place even if we do own the only - (current) reference */ - Py_DECREF(v); - *pv = PyTuple_New(newsize); - return *pv == NULL ? -1 : 0; - } - - /* XXX UNREF/NEWREF interface should be more symmetrical */ - _Py_DEC_REFTOTAL; - if (_PyObject_GC_IS_TRACKED(v)) - _PyObject_GC_UNTRACK(v); - _Py_ForgetReference((PyObject *) v); - /* DECREF items deleted by shrinkage */ - for (i = newsize; i < oldsize; i++) { - Py_XDECREF(v->ob_item[i]); - v->ob_item[i] = NULL; - } - sv = PyObject_GC_Resize(PyTupleObject, v, newsize); - if (sv == NULL) { - *pv = NULL; - PyObject_GC_Del(v); - return -1; - } - _Py_NewReference((PyObject *) sv); - /* Zero out items added by growing */ - if (newsize > oldsize) - memset(&sv->ob_item[oldsize], 0, - sizeof(*sv->ob_item) * (newsize - oldsize)); - *pv = (PyObject *) sv; - _PyObject_GC_TRACK(sv); - return 0; -} - -int -PyTuple_ClearFreeList(void) -{ - int freelist_size = 0; - return freelist_size; -} - -void -PyTuple_Fini(void) -{ -} - -/*********************** Tuple Iterator **************************/ - -typedef struct { - PyObject_HEAD - long it_index; - PyTupleObject *it_seq; /* Set to NULL when iterator is exhausted */ -} tupleiterobject; - -static void -tupleiter_dealloc(tupleiterobject *it) -{ - _PyObject_GC_UNTRACK(it); - Py_XDECREF(it->it_seq); - PyObject_GC_Del(it); -} - -static int -tupleiter_traverse(tupleiterobject *it, visitproc visit, void *arg) -{ - Py_VISIT(it->it_seq); - return 0; -} - -static PyObject * -tupleiter_next(tupleiterobject *it) -{ - PyTupleObject *seq; - PyObject *item; - - assert(it != NULL); - seq = it->it_seq; - if (seq == NULL) - return NULL; - assert(PyTuple_Check(seq)); - - if (it->it_index < PyTuple_GET_SIZE(seq)) { - item = PyTuple_GET_ITEM(seq, it->it_index); - ++it->it_index; - Py_INCREF(item); - return item; - } - - Py_DECREF(seq); - it->it_seq = NULL; - return NULL; -} - -static PyObject * -tupleiter_len(tupleiterobject *it) -{ - Py_ssize_t len = 0; - if (it->it_seq) - len = PyTuple_GET_SIZE(it->it_seq) - it->it_index; - return PyLong_FromSsize_t(len); -} - -PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); - -static PyMethodDef tupleiter_methods[] = { - {"__length_hint__", (PyCFunction)tupleiter_len, METH_NOARGS, length_hint_doc}, - {NULL, NULL} /* sentinel */ -}; - -PyTypeObject PyTupleIter_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "tuple_iterator", /* tp_name */ - sizeof(tupleiterobject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)tupleiter_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ - 0, /* tp_doc */ - (traverseproc)tupleiter_traverse, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - PyObject_SelfIter, /* tp_iter */ - (iternextfunc)tupleiter_next, /* tp_iternext */ - tupleiter_methods, /* tp_methods */ - 0, -}; - -static PyObject * -tuple_iter(PyObject *seq) -{ - tupleiterobject *it; - - if (!PyTuple_Check(seq)) { - PyErr_BadInternalCall(); - return NULL; - } - it = PyObject_GC_New(tupleiterobject, &PyTupleIter_Type); - if (it == NULL) - return NULL; - it->it_index = 0; - Py_INCREF(seq); - it->it_seq = (PyTupleObject *)seq; - _PyObject_GC_TRACK(it); - return (PyObject *)it; -}
--- a/cos/python/Objects/unicodeobject.c Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14135 +0,0 @@ -/* - -Unicode implementation based on original code by Fredrik Lundh, -modified by Marc-Andre Lemburg <mal@lemburg.com>. - -Major speed upgrades to the method implementations at the Reykjavik -NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. - -Copyright (c) Corporation for National Research Initiatives. - --------------------------------------------------------------------- -The original string type implementation is: - - Copyright (c) 1999 by Secret Labs AB - Copyright (c) 1999 by Fredrik Lundh - -By obtaining, using, and/or copying this software and/or its -associated documentation, you agree that you have read, understood, -and will comply with the following terms and conditions: - -Permission to use, copy, modify, and distribute this software and its -associated documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appears in all -copies, and that both that copyright notice and this permission notice -appear in supporting documentation, and that the name of Secret Labs -AB or the author not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO -THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR -ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT -OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. --------------------------------------------------------------------- - -*/ - -#define PY_SSIZE_T_CLEAN -#include "Python.h" -#include "ucnhash.h" - -/* Endianness switches; defaults to little endian */ - -#ifdef WORDS_BIGENDIAN -# define BYTEORDER_IS_BIG_ENDIAN -#else -# define BYTEORDER_IS_LITTLE_ENDIAN -#endif - -/* --- Globals ------------------------------------------------------------ - - The globals are initialized by the _PyUnicode_Init() API and should - not be used before calling that API. - -*/ - - -/* Maximum code point of Unicode 6.0: 0x10ffff (1,114,111) */ -#define MAX_UNICODE 0x10ffff - -#ifdef Py_DEBUG -# define _PyUnicode_CHECK(op) _PyUnicode_CheckConsistency(op, 0) -#else -# define _PyUnicode_CHECK(op) PyUnicode_Check(op) -#endif - -#define _PyUnicode_UTF8(op) \ - (((PyCompactUnicodeObject*)(op))->utf8) -#define PyUnicode_UTF8(op) \ - (assert(_PyUnicode_CHECK(op)), \ - assert(PyUnicode_IS_READY(op)), \ - PyUnicode_IS_COMPACT_ASCII(op) ? \ - ((char*)((PyASCIIObject*)(op) + 1)) : \ - _PyUnicode_UTF8(op)) -#define _PyUnicode_UTF8_LENGTH(op) \ - (((PyCompactUnicodeObject*)(op))->utf8_length) -#define PyUnicode_UTF8_LENGTH(op) \ - (assert(_PyUnicode_CHECK(op)), \ - assert(PyUnicode_IS_READY(op)), \ - PyUnicode_IS_COMPACT_ASCII(op) ? \ - ((PyASCIIObject*)(op))->length : \ - _PyUnicode_UTF8_LENGTH(op)) -#define _PyUnicode_WSTR(op) \ - (((PyASCIIObject*)(op))->wstr) -#define _PyUnicode_WSTR_LENGTH(op) \ - (((PyCompactUnicodeObject*)(op))->wstr_length) -#define _PyUnicode_LENGTH(op) \ - (((PyASCIIObject *)(op))->length) -#define _PyUnicode_STATE(op) \ - (((PyASCIIObject *)(op))->state) -#define _PyUnicode_HASH(op) \ - (((PyASCIIObject *)(op))->hash) -#define _PyUnicode_KIND(op) \ - (assert(_PyUnicode_CHECK(op)), \ - ((PyASCIIObject *)(op))->state.kind) -#define _PyUnicode_GET_LENGTH(op) \ - (assert(_PyUnicode_CHECK(op)), \ - ((PyASCIIObject *)(op))->length) -#define _PyUnicode_DATA_ANY(op) \ - (((PyUnicodeObject*)(op))->data.any) - -#undef PyUnicode_READY -#define PyUnicode_READY(op) \ - (assert(_PyUnicode_CHECK(op)), \ - (PyUnicode_IS_READY(op) ? \ - 0 : \ - _PyUnicode_Ready(op))) - -#define _PyUnicode_SHARE_UTF8(op) \ - (assert(_PyUnicode_CHECK(op)), \ - assert(!PyUnicode_IS_COMPACT_ASCII(op)), \ - (_PyUnicode_UTF8(op) == PyUnicode_DATA(op))) -#define _PyUnicode_SHARE_WSTR(op) \ - (assert(_PyUnicode_CHECK(op)), \ - (_PyUnicode_WSTR(unicode) == PyUnicode_DATA(op))) - -/* true if the Unicode object has an allocated UTF-8 memory block - (not shared with other data) */ -#define _PyUnicode_HAS_UTF8_MEMORY(op) \ - (assert(_PyUnicode_CHECK(op)), \ - (!PyUnicode_IS_COMPACT_ASCII(op) \ - && _PyUnicode_UTF8(op) \ - && _PyUnicode_UTF8(op) != PyUnicode_DATA(op))) - -/* true if the Unicode object has an allocated wstr memory block - (not shared with other data) */ -#define _PyUnicode_HAS_WSTR_MEMORY(op) \ - (assert(_PyUnicode_CHECK(op)), \ - (_PyUnicode_WSTR(op) && \ - (!PyUnicode_IS_READY(op) || \ - _PyUnicode_WSTR(op) != PyUnicode_DATA(op)))) - -/* Generic helper macro to convert characters of different types. - from_type and to_type have to be valid type names, begin and end - are pointers to the source characters which should be of type - "from_type *". to is a pointer of type "to_type *" and points to the - buffer where the result characters are written to. */ -#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \ - do { \ - to_type *_to = (to_type *) to; \ - const from_type *_iter = (begin); \ - const from_type *_end = (end); \ - Py_ssize_t n = (_end) - (_iter); \ - const from_type *_unrolled_end = \ - _iter + (n & ~ (Py_ssize_t) 3); \ - while (_iter < (_unrolled_end)) { \ - _to[0] = (to_type) _iter[0]; \ - _to[1] = (to_type) _iter[1]; \ - _to[2] = (to_type) _iter[2]; \ - _to[3] = (to_type) _iter[3]; \ - _iter += 4; _to += 4; \ - } \ - while (_iter < (_end)) \ - *_to++ = (to_type) *_iter++; \ - } while (0) - -/* The Unicode string has been modified: reset the hash */ -#define _PyUnicode_DIRTY(op) do { _PyUnicode_HASH(op) = -1; } while (0) - -/* This dictionary holds all interned unicode strings. Note that references - to strings in this dictionary are *not* counted in the string's ob_refcnt. - When the interned string reaches a refcnt of 0 the string deallocation - function will delete the reference from this dictionary. - - Another way to look at this is that to say that the actual reference - count of a string is: s->ob_refcnt + (s->state ? 2 : 0) -*/ -static PyObject *interned; - -/* The empty Unicode object is shared to improve performance. */ -static PyObject *unicode_empty; - -/* List of static strings. */ -static _Py_Identifier *static_strings; - -/* Single character Unicode strings in the Latin-1 range are being - shared as well. */ -static PyObject *unicode_latin1[256]; - -/* Fast detection of the most frequent whitespace characters */ -const unsigned char _Py_ascii_whitespace[] = { - 0, 0, 0, 0, 0, 0, 0, 0, -/* case 0x0009: * CHARACTER TABULATION */ -/* case 0x000A: * LINE FEED */ -/* case 0x000B: * LINE TABULATION */ -/* case 0x000C: * FORM FEED */ -/* case 0x000D: * CARRIAGE RETURN */ - 0, 1, 1, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, -/* case 0x001C: * FILE SEPARATOR */ -/* case 0x001D: * GROUP SEPARATOR */ -/* case 0x001E: * RECORD SEPARATOR */ -/* case 0x001F: * UNIT SEPARATOR */ - 0, 0, 0, 0, 1, 1, 1, 1, -/* case 0x0020: * SPACE */ - 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* forward */ -static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length); -static PyObject* get_latin1_char(unsigned char ch); -static void copy_characters( - PyObject *to, Py_ssize_t to_start, - PyObject *from, Py_ssize_t from_start, - Py_ssize_t how_many); - -static PyObject * -unicode_fromascii(const unsigned char *s, Py_ssize_t size); -static PyObject * -_PyUnicode_FromUCS1(const unsigned char *s, Py_ssize_t size); -static PyObject * -_PyUnicode_FromUCS2(const Py_UCS2 *s, Py_ssize_t size); -static PyObject * -_PyUnicode_FromUCS4(const Py_UCS4 *s, Py_ssize_t size); - -static PyObject * -unicode_encode_call_errorhandler(const char *errors, - PyObject **errorHandler,const char *encoding, const char *reason, - PyObject *unicode, PyObject **exceptionObject, - Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos); - -static void -raise_encode_exception(PyObject **exceptionObject, - const char *encoding, - PyObject *unicode, - Py_ssize_t startpos, Py_ssize_t endpos, - const char *reason); - -/* Same for linebreaks */ -static unsigned char ascii_linebreak[] = { - 0, 0, 0, 0, 0, 0, 0, 0, -/* 0x000A, * LINE FEED */ -/* 0x000B, * LINE TABULATION */ -/* 0x000C, * FORM FEED */ -/* 0x000D, * CARRIAGE RETURN */ - 0, 0, 1, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, -/* 0x001C, * FILE SEPARATOR */ -/* 0x001D, * GROUP SEPARATOR */ -/* 0x001E, * RECORD SEPARATOR */ - 0, 0, 0, 0, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* The max unicode value is always 0x10FFFF while using the PEP-393 API. - This function is kept for backward compatibility with the old API. */ -Py_UNICODE -PyUnicode_GetMax(void) -{ -#ifdef Py_UNICODE_WIDE - return 0x10FFFF; -#else - /* This is actually an illegal character, so it should - not be passed to unichr. */ - return 0xFFFF; -#endif -} - -#ifdef Py_DEBUG -int -_PyUnicode_CheckConsistency(PyObject *op, int check_content) -{ - PyASCIIObject *ascii; - unsigned int kind; - - assert(PyUnicode_Check(op)); - - ascii = (PyASCIIObject *)op; - kind = ascii->state.kind; - - if (ascii->state.ascii == 1 && ascii->state.compact == 1) { - assert(kind == PyUnicode_1BYTE_KIND); - assert(ascii->state.ready == 1); - } - else { - PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op; - void *data; - - if (ascii->state.compact == 1) { - data = compact + 1; - assert(kind == PyUnicode_1BYTE_KIND - || kind == PyUnicode_2BYTE_KIND - || kind == PyUnicode_4BYTE_KIND); - assert(ascii->state.ascii == 0); - assert(ascii->state.ready == 1); - assert (compact->utf8 != data); - } - else { - PyUnicodeObject *unicode = (PyUnicodeObject *)op; - - data = unicode->data.any; - if (kind == PyUnicode_WCHAR_KIND) { - assert(ascii->length == 0); - assert(ascii->hash == -1); - assert(ascii->state.compact == 0); - assert(ascii->state.ascii == 0); - assert(ascii->state.ready == 0); - assert(ascii->state.interned == SSTATE_NOT_INTERNED); - assert(ascii->wstr != NULL); - assert(data == NULL); - assert(compact->utf8 == NULL); - } - else { - assert(kind == PyUnicode_1BYTE_KIND - || kind == PyUnicode_2BYTE_KIND - || kind == PyUnicode_4BYTE_KIND); - assert(ascii->state.compact == 0); - assert(ascii->state.ready == 1); - assert(data != NULL); - if (ascii->state.ascii) { - assert (compact->utf8 == data); - assert (compact->utf8_length == ascii->length); - } - else - assert (compact->utf8 != data); - } - } - if (kind != PyUnicode_WCHAR_KIND) { - if ( - kind == PyUnicode_4BYTE_KIND - ) - { - assert(ascii->wstr == data); - assert(compact->wstr_length == ascii->length); - } else - assert(ascii->wstr != data); - } - - if (compact->utf8 == NULL) - assert(compact->utf8_length == 0); - if (ascii->wstr == NULL) - assert(compact->wstr_length == 0); - } - /* check that the best kind is used */ - if (check_content && kind != PyUnicode_WCHAR_KIND) - { - Py_ssize_t i; - Py_UCS4 maxchar = 0; - void *data = PyUnicode_DATA(ascii); - for (i=0; i < ascii->length; i++) - { - Py_UCS4 ch = PyUnicode_READ(kind, data, i); - if (ch > maxchar) - maxchar = ch; - } - if (kind == PyUnicode_1BYTE_KIND) { - if (ascii->state.ascii == 0) { - assert(maxchar >= 128); - assert(maxchar <= 255); - } - else - assert(maxchar < 128); - } - else if (kind == PyUnicode_2BYTE_KIND) { - assert(maxchar >= 0x100); - assert(maxchar <= 0xFFFF); - } - else { - assert(maxchar >= 0x10000); - assert(maxchar <= MAX_UNICODE); - } - } - return 1; -} -#endif - -static PyObject* -unicode_result_wchar(PyObject *unicode) -{ -#ifndef Py_DEBUG - Py_ssize_t len; - - assert(Py_REFCNT(unicode) == 1); - - len = _PyUnicode_WSTR_LENGTH(unicode); - if (len == 0) { - Py_INCREF(unicode_empty); - Py_DECREF(unicode); - return unicode_empty; - } - - if (len == 1) { - wchar_t ch = _PyUnicode_WSTR(unicode)[0]; - if (ch < 256) { - PyObject *latin1_char = get_latin1_char((unsigned char)ch); - Py_DECREF(unicode); - return latin1_char; - } - } - - if (_PyUnicode_Ready(unicode) < 0) { - Py_XDECREF(unicode); - return NULL; - } -#else - /* don't make the result ready in debug mode to ensure that the caller - makes the string ready before using it */ - assert(_PyUnicode_CheckConsistency(unicode, 1)); -#endif - return unicode; -} - -static PyObject* -unicode_result_ready(PyObject *unicode) -{ - Py_ssize_t length; - - length = PyUnicode_GET_LENGTH(unicode); - if (length == 0) { - if (unicode != unicode_empty) { - Py_INCREF(unicode_empty); - Py_DECREF(unicode); - } - return unicode_empty; - } - - if (length == 1) { - Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0); - if (ch < 256) { - PyObject *latin1_char = unicode_latin1[ch]; - if (latin1_char != NULL) { - if (unicode != latin1_char) { - Py_INCREF(latin1_char); - Py_DECREF(unicode); - } - return latin1_char; - } - else { - assert(_PyUnicode_CheckConsistency(unicode, 1)); - Py_INCREF(unicode); - unicode_latin1[ch] = unicode; - return unicode; - } - } - } - - assert(_PyUnicode_CheckConsistency(unicode, 1)); - return unicode; -} - -static PyObject* -unicode_result(PyObject *unicode) -{ - assert(_PyUnicode_CHECK(unicode)); - if (PyUnicode_IS_READY(unicode)) - return unicode_result_ready(unicode); - else - return unicode_result_wchar(unicode); -} - -#ifdef HAVE_MBCS -static OSVERSIONINFOEX winver; -#endif - -/* --- Bloom Filters ----------------------------------------------------- */ - -/* stuff to implement simple "bloom filters" for Unicode characters. - to keep things simple, we use a single bitmask, using the least 5 - bits from each unicode characters as the bit index. */ - -/* the linebreak mask is set up by Unicode_Init below */ - -#if LONG_BIT >= 128 -#define BLOOM_WIDTH 128 -#elif LONG_BIT >= 64 -#define BLOOM_WIDTH 64 -#elif LONG_BIT >= 32 -#define BLOOM_WIDTH 32 -#else -#error "LONG_BIT is smaller than 32" -#endif - -#define BLOOM_MASK unsigned long - -static BLOOM_MASK bloom_linebreak; - -#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1))))) -#define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1))))) - -#define BLOOM_LINEBREAK(ch) \ - ((ch) < 128U ? ascii_linebreak[(ch)] : \ - (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch))) - -Py_LOCAL_INLINE(BLOOM_MASK) -make_bloom_mask(int kind, void* ptr, Py_ssize_t len) -{ - /* calculate simple bloom-style bitmask for a given unicode string */ - - BLOOM_MASK mask; - Py_ssize_t i; - - mask = 0; - for (i = 0; i < len; i++) - BLOOM_ADD(mask, PyUnicode_READ(kind, ptr, i)); - - return mask; -} - -#define BLOOM_MEMBER(mask, chr, str) \ - (BLOOM(mask, chr) \ - && (PyUnicode_FindChar(str, chr, 0, PyUnicode_GET_LENGTH(str), 1) >= 0)) - -/* Compilation of templated routines */ - -#include "stringlib/asciilib.h" -#include "stringlib/fastsearch.h" -#include "stringlib/partition.h" -#include "stringlib/split.h" -#include "stringlib/count.h" -#include "stringlib/find.h" -#include "stringlib/find_max_char.h" -#include "stringlib/localeutil.h" -#include "stringlib/undef.h" - -#include "stringlib/ucs1lib.h" -#include "stringlib/fastsearch.h" -#include "stringlib/partition.h" -#include "stringlib/split.h" -#include "stringlib/count.h" -#include "stringlib/find.h" -#include "stringlib/find_max_char.h" -#include "stringlib/localeutil.h" -#include "stringlib/undef.h" - -#include "stringlib/ucs2lib.h" -#include "stringlib/fastsearch.h" -#include "stringlib/partition.h" -#include "stringlib/split.h" -#include "stringlib/count.h" -#include "stringlib/find.h" -#include "stringlib/find_max_char.h" -#include "stringlib/localeutil.h" -#include "stringlib/undef.h" - -#include "stringlib/ucs4lib.h" -#include "stringlib/fastsearch.h" -#include "stringlib/partition.h" -#include "stringlib/split.h" -#include "stringlib/count.h" -#include "stringlib/find.h" -#include "stringlib/find_max_char.h" -#include "stringlib/localeutil.h" -#include "stringlib/undef.h" - -#include "stringlib/unicodedefs.h" -#include "stringlib/fastsearch.h" -#include "stringlib/count.h" -#include "stringlib/find.h" -#include "stringlib/undef.h" - -/* --- Unicode Object ----------------------------------------------------- */ - -static PyObject * -fixup(PyObject *self, Py_UCS4 (*fixfct)(PyObject *s)); - -Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind, - Py_ssize_t size, Py_UCS4 ch, - int direction) -{ - int mode = (direction == 1) ? FAST_SEARCH : FAST_RSEARCH; - - switch (kind) { - case PyUnicode_1BYTE_KIND: - { - Py_UCS1 ch1 = (Py_UCS1) ch; - if (ch1 == ch) - return ucs1lib_fastsearch((Py_UCS1 *) s, size, &ch1, 1, 0, mode); - else - return -1; - } - case PyUnicode_2BYTE_KIND: - { - Py_UCS2 ch2 = (Py_UCS2) ch; - if (ch2 == ch) - return ucs2lib_fastsearch((Py_UCS2 *) s, size, &ch2, 1, 0, mode); - else - return -1; - } - case PyUnicode_4BYTE_KIND: - return ucs4lib_fastsearch((Py_UCS4 *) s, size, &ch, 1, 0, mode); - default: - assert(0); - return -1; - } -} - -static PyObject* -resize_compact(PyObject *unicode, Py_ssize_t length) -{ - Py_ssize_t char_size; - Py_ssize_t struct_size; - Py_ssize_t new_size; - int share_wstr; - - assert(PyUnicode_IS_READY(unicode)); - char_size = PyUnicode_KIND(unicode); - if (PyUnicode_IS_COMPACT_ASCII(unicode)) - struct_size = sizeof(PyASCIIObject); - else - struct_size = sizeof(PyCompactUnicodeObject); - share_wstr = _PyUnicode_SHARE_WSTR(unicode); - - _Py_DEC_REFTOTAL; - _Py_ForgetReference(unicode); - - if (length > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) { - PyErr_NoMemory(); - return NULL; - } - new_size = (struct_size + (length + 1) * char_size); - - unicode = (PyObject *)PyObject_REALLOC((char *)unicode, new_size); - if (unicode == NULL) { - PyObject_Del(unicode); - PyErr_NoMemory(); - return NULL; - } - _Py_NewReference(unicode); - _PyUnicode_LENGTH(unicode) = length; - if (share_wstr) { - _PyUnicode_WSTR(unicode) = PyUnicode_DATA(unicode); - if (!PyUnicode_IS_COMPACT_ASCII(unicode)) - _PyUnicode_WSTR_LENGTH(unicode) = length; - } - PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode), - length, 0); - return unicode; -} - -static int -resize_inplace(PyObject *unicode, Py_ssize_t length) -{ - wchar_t *wstr; - assert(!PyUnicode_IS_COMPACT(unicode)); - assert(Py_REFCNT(unicode) == 1); - - _PyUnicode_DIRTY(unicode); - - if (PyUnicode_IS_READY(unicode)) { - Py_ssize_t char_size; - Py_ssize_t new_size; - int share_wstr, share_utf8; - void *data; - - data = _PyUnicode_DATA_ANY(unicode); - assert(data != NULL); - char_size = PyUnicode_KIND(unicode); - share_wstr = _PyUnicode_SHARE_WSTR(unicode); - share_utf8 = _PyUnicode_SHARE_UTF8(unicode); - if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode)) - { - PyObject_DEL(_PyUnicode_UTF8(unicode)); - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; - } - - if (length > (PY_SSIZE_T_MAX / char_size - 1)) { - PyErr_NoMemory(); - return -1; - } - new_size = (length + 1) * char_size; - - data = (PyObject *)PyObject_REALLOC(data, new_size); - if (data == NULL) { - PyErr_NoMemory(); - return -1; - } - _PyUnicode_DATA_ANY(unicode) = data; - if (share_wstr) { - _PyUnicode_WSTR(unicode) = data; - _PyUnicode_WSTR_LENGTH(unicode) = length; - } - if (share_utf8) { - _PyUnicode_UTF8(unicode) = data; - _PyUnicode_UTF8_LENGTH(unicode) = length; - } - _PyUnicode_LENGTH(unicode) = length; - PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0); - if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) { - assert(_PyUnicode_CheckConsistency(unicode, 0)); - return 0; - } - } - assert(_PyUnicode_WSTR(unicode) != NULL); - - /* check for integer overflow */ - if (length > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) { - PyErr_NoMemory(); - return -1; - } - wstr = _PyUnicode_WSTR(unicode); - wstr = PyObject_REALLOC(wstr, sizeof(wchar_t) * (length + 1)); - if (!wstr) { - PyErr_NoMemory(); - return -1; - } - _PyUnicode_WSTR(unicode) = wstr; - _PyUnicode_WSTR(unicode)[length] = 0; - _PyUnicode_WSTR_LENGTH(unicode) = length; - assert(_PyUnicode_CheckConsistency(unicode, 0)); - return 0; -} - -static PyObject* -resize_copy(PyObject *unicode, Py_ssize_t length) -{ - Py_ssize_t copy_length; - if (PyUnicode_IS_COMPACT(unicode)) { - PyObject *copy; - assert(PyUnicode_IS_READY(unicode)); - - copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); - if (copy == NULL) - return NULL; - - copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); - copy_characters(copy, 0, unicode, 0, copy_length); - return copy; - } - else { - PyObject *w; - assert(_PyUnicode_WSTR(unicode) != NULL); - assert(_PyUnicode_DATA_ANY(unicode) == NULL); - w = (PyObject*)_PyUnicode_New(length); - if (w == NULL) - return NULL; - copy_length = _PyUnicode_WSTR_LENGTH(unicode); - copy_length = Py_MIN(copy_length, length); - Py_UNICODE_COPY(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode), - copy_length); - return w; - } -} - -/* We allocate one more byte to make sure the string is - Ux0000 terminated; some code (e.g. new_identifier) - relies on that. - - XXX This allocator could further be enhanced by assuring that the - free list never reduces its size below 1. - -*/ - -#ifdef Py_DEBUG -static int unicode_old_new_calls = 0; -#endif - -static PyUnicodeObject * -_PyUnicode_New(Py_ssize_t length) -{ - register PyUnicodeObject *unicode; - size_t new_size; - - /* Optimization for empty strings */ - if (length == 0 && unicode_empty != NULL) { - Py_INCREF(unicode_empty); - return (PyUnicodeObject*)unicode_empty; - } - - /* Ensure we won't overflow the size. */ - if (length > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) { - return (PyUnicodeObject *)PyErr_NoMemory(); - } - if (length < 0) { - PyErr_SetString(PyExc_SystemError, - "Negative size passed to _PyUnicode_New"); - return NULL; - } - -#ifdef Py_DEBUG - ++unicode_old_new_calls; -#endif - - unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); - if (unicode == NULL) - return NULL; - new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); - _PyUnicode_WSTR(unicode) = (Py_UNICODE*) PyObject_MALLOC(new_size); - if (!_PyUnicode_WSTR(unicode)) { - PyErr_NoMemory(); - goto onError; - } - - /* Initialize the first element to guard against cases where - * the caller fails before initializing str -- unicode_resize() - * reads str[0], and the Keep-Alive optimization can keep memory - * allocated for str alive across a call to unicode_dealloc(unicode). - * We don't want unicode_resize to read uninitialized memory in - * that case. - */ - _PyUnicode_WSTR(unicode)[0] = 0; - _PyUnicode_WSTR(unicode)[length] = 0; - _PyUnicode_WSTR_LENGTH(unicode) = length; - _PyUnicode_HASH(unicode) = -1; - _PyUnicode_STATE(unicode).interned = 0; - _PyUnicode_STATE(unicode).kind = 0; - _PyUnicode_STATE(unicode).compact = 0; - _PyUnicode_STATE(unicode).ready = 0; - _PyUnicode_STATE(unicode).ascii = 0; - _PyUnicode_DATA_ANY(unicode) = NULL; - _PyUnicode_LENGTH(unicode) = 0; - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; - assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0)); - return unicode; - - onError: - /* XXX UNREF/NEWREF interface should be more symmetrical */ - _Py_DEC_REFTOTAL; - _Py_ForgetReference((PyObject *)unicode); - PyObject_Del(unicode); - return NULL; -} - -static const char* -unicode_kind_name(PyObject *unicode) -{ - /* don't check consistency: unicode_kind_name() is called from - _PyUnicode_Dump() */ - if (!PyUnicode_IS_COMPACT(unicode)) - { - if (!PyUnicode_IS_READY(unicode)) - return "wstr"; - switch(PyUnicode_KIND(unicode)) - { - case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(unicode)) - return "legacy ascii"; - else - return "legacy latin1"; - case PyUnicode_2BYTE_KIND: - return "legacy UCS2"; - case PyUnicode_4BYTE_KIND: - return "legacy UCS4"; - default: - return "<legacy invalid kind>"; - } - } - assert(PyUnicode_IS_READY(unicode)); - switch(PyUnicode_KIND(unicode)) - { - case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(unicode)) - return "ascii"; - else - return "latin1"; - case PyUnicode_2BYTE_KIND: - return "UCS2"; - case PyUnicode_4BYTE_KIND: - return "UCS4"; - default: - return "<invalid compact kind>"; - } -} - -#ifdef Py_DEBUG -static int unicode_new_new_calls = 0; - -/* Functions wrapping macros for use in debugger */ -char *_PyUnicode_utf8(void *unicode){ - return PyUnicode_UTF8(unicode); -} - -void *_PyUnicode_compact_data(void *unicode) { - return _PyUnicode_COMPACT_DATA(unicode); -} -void *_PyUnicode_data(void *unicode){ - printf("obj %p\n", unicode); - printf("compact %d\n", PyUnicode_IS_COMPACT(unicode)); - printf("compact ascii %d\n", PyUnicode_IS_COMPACT_ASCII(unicode)); - printf("ascii op %p\n", ((void*)((PyASCIIObject*)(unicode) + 1))); - printf("compact op %p\n", ((void*)((PyCompactUnicodeObject*)(unicode) + 1))); - printf("compact data %p\n", _PyUnicode_COMPACT_DATA(unicode)); - return PyUnicode_DATA(unicode); -} - -void -_PyUnicode_Dump(PyObject *op) -{ - PyASCIIObject *ascii = (PyASCIIObject *)op; - PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op; - PyUnicodeObject *unicode = (PyUnicodeObject *)op; - void *data; - - if (ascii->state.compact) - { - if (ascii->state.ascii) - data = (ascii + 1); - else - data = (compact + 1); - } - else - data = unicode->data.any; - printf("%s: len=%zu, ",unicode_kind_name(op), ascii->length); - - if (ascii->wstr == data) - printf("shared "); - printf("wstr=%p", ascii->wstr); - - if (!(ascii->state.ascii == 1 && ascii->state.compact == 1)) { - printf(" (%zu), ", compact->wstr_length); - if (!ascii->state.compact && compact->utf8 == unicode->data.any) - printf("shared "); - printf("utf8=%p (%zu)", compact->utf8, compact->utf8_length); - } - printf(", data=%p\n", data); -} -#endif - -PyObject * -PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) -{ - PyObject *obj; - PyCompactUnicodeObject *unicode; - void *data; - int kind_state; - int is_sharing, is_ascii; - Py_ssize_t char_size; - Py_ssize_t struct_size; - - /* Optimization for empty strings */ - if (size == 0 && unicode_empty != NULL) { - Py_INCREF(unicode_empty); - return unicode_empty; - } - -#ifdef Py_DEBUG - ++unicode_new_new_calls; -#endif - - is_ascii = 0; - is_sharing = 0; - struct_size = sizeof(PyCompactUnicodeObject); - if (maxchar < 128) { - kind_state = PyUnicode_1BYTE_KIND; - char_size = 1; - is_ascii = 1; - struct_size = sizeof(PyASCIIObject); - } - else if (maxchar < 256) { - kind_state = PyUnicode_1BYTE_KIND; - char_size = 1; - } - else if (maxchar < 65536) { - kind_state = PyUnicode_2BYTE_KIND; - char_size = 2; - if (sizeof(wchar_t) == 2) - is_sharing = 1; - } - else { - kind_state = PyUnicode_4BYTE_KIND; - char_size = 4; - if (sizeof(wchar_t) == 4) - is_sharing = 1; - } - - /* Ensure we won't overflow the size. */ - if (size < 0) { - PyErr_SetString(PyExc_SystemError, - "Negative size passed to PyUnicode_New"); - return NULL; - } - if (size > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) - return PyErr_NoMemory(); - - /* Duplicated allocation code from _PyObject_New() instead of a call to - * PyObject_New() so we are able to allocate space for the object and - * it's data buffer. - */ - obj = (PyObject *) PyObject_MALLOC(struct_size + (size + 1) * char_size); - if (obj == NULL) - return PyErr_NoMemory(); - obj = PyObject_INIT(obj, &PyUnicode_Type); - if (obj == NULL) - return NULL; - - unicode = (PyCompactUnicodeObject *)obj; - if (is_ascii) - data = ((PyASCIIObject*)obj) + 1; - else - data = unicode + 1; - _PyUnicode_LENGTH(unicode) = size; - _PyUnicode_HASH(unicode) = -1; - _PyUnicode_STATE(unicode).interned = 0; - _PyUnicode_STATE(unicode).kind = kind_state; - _PyUnicode_STATE(unicode).compact = 1; - _PyUnicode_STATE(unicode).ready = 1; - _PyUnicode_STATE(unicode).ascii = is_ascii; - if (is_ascii) { - ((char*)data)[size] = 0; - _PyUnicode_WSTR(unicode) = NULL; - } - else if (kind_state == PyUnicode_1BYTE_KIND) { - ((char*)data)[size] = 0; - _PyUnicode_WSTR(unicode) = NULL; - _PyUnicode_WSTR_LENGTH(unicode) = 0; - unicode->utf8 = NULL; - unicode->utf8_length = 0; - } - else { - unicode->utf8 = NULL; - unicode->utf8_length = 0; - if (kind_state == PyUnicode_2BYTE_KIND) - ((Py_UCS2*)data)[size] = 0; - else /* kind_state == PyUnicode_4BYTE_KIND */ - ((Py_UCS4*)data)[size] = 0; - if (is_sharing) { - _PyUnicode_WSTR_LENGTH(unicode) = size; - _PyUnicode_WSTR(unicode) = (wchar_t *)data; - } - else { - _PyUnicode_WSTR_LENGTH(unicode) = 0; - _PyUnicode_WSTR(unicode) = NULL; - } - } - assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0)); - return obj; -} - -static int -_PyUnicode_Dirty(PyObject *unicode) -{ - assert(_PyUnicode_CHECK(unicode)); - if (Py_REFCNT(unicode) != 1) { - PyErr_SetString(PyExc_SystemError, - "Cannot modify a string having more than 1 reference"); - return -1; - } - _PyUnicode_DIRTY(unicode); - return 0; -} - -static int -_copy_characters(PyObject *to, Py_ssize_t to_start, - PyObject *from, Py_ssize_t from_start, - Py_ssize_t how_many, int check_maxchar) -{ - unsigned int from_kind, to_kind; - void *from_data, *to_data; - int fast; - - assert(PyUnicode_Check(from)); - assert(PyUnicode_Check(to)); - assert(PyUnicode_IS_READY(from)); - assert(PyUnicode_IS_READY(to)); - - assert(PyUnicode_GET_LENGTH(from) >= how_many); - assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); - assert(0 <= how_many); - - if (how_many == 0) - return 0; - - from_kind = PyUnicode_KIND(from); - from_data = PyUnicode_DATA(from); - to_kind = PyUnicode_KIND(to); - to_data = PyUnicode_DATA(to); - -#ifdef Py_DEBUG - if (!check_maxchar - && (from_kind > to_kind - || (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to)))) - { - const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to); - Py_UCS4 ch; - Py_ssize_t i; - for (i=0; i < how_many; i++) { - ch = PyUnicode_READ(from_kind, from_data, from_start + i); - assert(ch <= to_maxchar); - } - } -#endif - fast = (from_kind == to_kind); - if (check_maxchar - && (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to))) - { - /* deny latin1 => ascii */ - fast = 0; - } - - if (fast) { - Py_MEMCPY((char*)to_data + to_kind * to_start, - (char*)from_data + from_kind * from_start, - to_kind * how_many); - } - else if (from_kind == PyUnicode_1BYTE_KIND - && to_kind == PyUnicode_2BYTE_KIND) - { - _PyUnicode_CONVERT_BYTES( - Py_UCS1, Py_UCS2, - PyUnicode_1BYTE_DATA(from) + from_start, - PyUnicode_1BYTE_DATA(from) + from_start + how_many, - PyUnicode_2BYTE_DATA(to) + to_start - ); - } - else if (from_kind == PyUnicode_1BYTE_KIND - && to_kind == PyUnicode_4BYTE_KIND) - { - _PyUnicode_CONVERT_BYTES( - Py_UCS1, Py_UCS4, - PyUnicode_1BYTE_DATA(from) + from_start, - PyUnicode_1BYTE_DATA(from) + from_start + how_many, - PyUnicode_4BYTE_DATA(to) + to_start - ); - } - else if (from_kind == PyUnicode_2BYTE_KIND - && to_kind == PyUnicode_4BYTE_KIND) - { - _PyUnicode_CONVERT_BYTES( - Py_UCS2, Py_UCS4, - PyUnicode_2BYTE_DATA(from) + from_start, - PyUnicode_2BYTE_DATA(from) + from_start + how_many, - PyUnicode_4BYTE_DATA(to) + to_start - ); - } - else { - /* check if max_char(from substring) <= max_char(to) */ - if (from_kind > to_kind - /* latin1 => ascii */ - || (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to))) - { - /* slow path to check for character overflow */ - const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to); - Py_UCS4 ch; - Py_ssize_t i; - -#ifdef Py_DEBUG - for (i=0; i < how_many; i++) { - ch = PyUnicode_READ(from_kind, from_data, from_start + i); - assert(ch <= to_maxchar); - PyUnicode_WRITE(to_kind, to_data, to_start + i, ch); - } -#else - if (!check_maxchar) { - for (i=0; i < how_many; i++) { - ch = PyUnicode_READ(from_kind, from_data, from_start + i); - PyUnicode_WRITE(to_kind, to_data, to_start + i, ch); - } - } - else { - for (i=0; i < how_many; i++) { - ch = PyUnicode_READ(from_kind, from_data, from_start + i); - if (ch > to_maxchar) - return 1; - PyUnicode_WRITE(to_kind, to_data, to_start + i, ch); - } - } -#endif - } - else { - assert(0 && "inconsistent state"); - return 1; - } - } - return 0; -} - -static void -copy_characters(PyObject *to, Py_ssize_t to_start, - PyObject *from, Py_ssize_t from_start, - Py_ssize_t how_many) -{ - (void)_copy_characters(to, to_start, from, from_start, how_many, 0); -} - -Py_ssize_t -PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, - PyObject *from, Py_ssize_t from_start, - Py_ssize_t how_many) -{ - int err; - - if (!PyUnicode_Check(from) || !PyUnicode_Check(to)) { - PyErr_BadInternalCall(); - return -1; - } - - if (PyUnicode_READY(from)) - return -1; - if (PyUnicode_READY(to)) - return -1; - - how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many); - if (to_start + how_many > PyUnicode_GET_LENGTH(to)) { - PyErr_Format(PyExc_SystemError, - "Cannot write %zi characters at %zi " - "in a string of %zi characters", - how_many, to_start, PyUnicode_GET_LENGTH(to)); - return -1; - } - - if (how_many == 0) - return 0; - - if (_PyUnicode_Dirty(to)) - return -1; - - err = _copy_characters(to, to_start, from, from_start, how_many, 1); - if (err) { - PyErr_Format(PyExc_SystemError, - "Cannot copy %s characters " - "into a string of %s characters", - unicode_kind_name(from), - unicode_kind_name(to)); - return -1; - } - return how_many; -} - -/* Find the maximum code point and count the number of surrogate pairs so a - correct string length can be computed before converting a string to UCS4. - This function counts single surrogates as a character and not as a pair. - - Return 0 on success, or -1 on error. */ -static int -find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, - Py_UCS4 *maxchar, Py_ssize_t *num_surrogates) -{ - const wchar_t *iter; - Py_UCS4 ch; - - assert(num_surrogates != NULL && maxchar != NULL); - *num_surrogates = 0; - *maxchar = 0; - - for (iter = begin; iter < end; ) { - { - ch = *iter; - iter++; - } - if (ch > *maxchar) { - *maxchar = ch; - if (*maxchar > MAX_UNICODE) { - PyErr_Format(PyExc_ValueError, - "character U+%x is not in range [U+0000; U+10ffff]", - ch); - return -1; - } - } - } - return 0; -} - -int -_PyUnicode_Ready(PyObject *unicode) -{ - wchar_t *end; - Py_UCS4 maxchar = 0; - Py_ssize_t num_surrogates; - - /* _PyUnicode_Ready() is only intended for old-style API usage where - strings were created using _PyObject_New() and where no canonical - representation (the str field) has been set yet aka strings - which are not yet ready. */ - assert(_PyUnicode_CHECK(unicode)); - assert(_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND); - assert(_PyUnicode_WSTR(unicode) != NULL); - assert(_PyUnicode_DATA_ANY(unicode) == NULL); - assert(_PyUnicode_UTF8(unicode) == NULL); - /* Actually, it should neither be interned nor be anything else: */ - assert(_PyUnicode_STATE(unicode).interned == SSTATE_NOT_INTERNED); - - end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode); - if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end, - &maxchar, &num_surrogates) == -1) - return -1; - - if (maxchar < 256) { - _PyUnicode_DATA_ANY(unicode) = PyObject_MALLOC(_PyUnicode_WSTR_LENGTH(unicode) + 1); - if (!_PyUnicode_DATA_ANY(unicode)) { - PyErr_NoMemory(); - return -1; - } - _PyUnicode_CONVERT_BYTES(wchar_t, unsigned char, - _PyUnicode_WSTR(unicode), end, - PyUnicode_1BYTE_DATA(unicode)); - PyUnicode_1BYTE_DATA(unicode)[_PyUnicode_WSTR_LENGTH(unicode)] = '\0'; - _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode); - _PyUnicode_STATE(unicode).kind = PyUnicode_1BYTE_KIND; - if (maxchar < 128) { - _PyUnicode_STATE(unicode).ascii = 1; - _PyUnicode_UTF8(unicode) = _PyUnicode_DATA_ANY(unicode); - _PyUnicode_UTF8_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode); - } - else { - _PyUnicode_STATE(unicode).ascii = 0; - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; - } - PyObject_FREE(_PyUnicode_WSTR(unicode)); - _PyUnicode_WSTR(unicode) = NULL; - _PyUnicode_WSTR_LENGTH(unicode) = 0; - } - /* In this case we might have to convert down from 4-byte native - wchar_t to 2-byte unicode. */ - else if (maxchar < 65536) { - assert(num_surrogates == 0 && - "FindMaxCharAndNumSurrogatePairs() messed up"); - - /* sizeof(wchar_t) == 4 */ - _PyUnicode_DATA_ANY(unicode) = PyObject_MALLOC( - 2 * (_PyUnicode_WSTR_LENGTH(unicode) + 1)); - if (!_PyUnicode_DATA_ANY(unicode)) { - PyErr_NoMemory(); - return -1; - } - _PyUnicode_CONVERT_BYTES(wchar_t, Py_UCS2, - _PyUnicode_WSTR(unicode), end, - PyUnicode_2BYTE_DATA(unicode)); - PyUnicode_2BYTE_DATA(unicode)[_PyUnicode_WSTR_LENGTH(unicode)] = '\0'; - _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode); - _PyUnicode_STATE(unicode).kind = PyUnicode_2BYTE_KIND; - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; - PyObject_FREE(_PyUnicode_WSTR(unicode)); - _PyUnicode_WSTR(unicode) = NULL; - _PyUnicode_WSTR_LENGTH(unicode) = 0; - } - /* maxchar exeeds 16 bit, wee need 4 bytes for unicode characters */ - else { - assert(num_surrogates == 0); - - _PyUnicode_DATA_ANY(unicode) = _PyUnicode_WSTR(unicode); - _PyUnicode_LENGTH(unicode) = _PyUnicode_WSTR_LENGTH(unicode); - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; - _PyUnicode_STATE(unicode).kind = PyUnicode_4BYTE_KIND; - PyUnicode_4BYTE_DATA(unicode)[_PyUnicode_LENGTH(unicode)] = '\0'; - } - _PyUnicode_STATE(unicode).ready = 1; - assert(_PyUnicode_CheckConsistency(unicode, 1)); - return 0; -} - -static void -unicode_dealloc(register PyObject *unicode) -{ - switch (PyUnicode_CHECK_INTERNED(unicode)) { - case SSTATE_NOT_INTERNED: - break; - - case SSTATE_INTERNED_MORTAL: - /* revive dead object temporarily for DelItem */ - Py_REFCNT(unicode) = 3; - if (PyDict_DelItem(interned, unicode) != 0) - Py_FatalError( - "deletion of interned string failed"); - break; - - case SSTATE_INTERNED_IMMORTAL: - Py_FatalError("Immortal interned string died."); - - default: - Py_FatalError("Inconsistent interned string state."); - } - - if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) - PyObject_DEL(_PyUnicode_WSTR(unicode)); - if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) - PyObject_DEL(_PyUnicode_UTF8(unicode)); - - if (PyUnicode_IS_COMPACT(unicode)) { - Py_TYPE(unicode)->tp_free(unicode); - } - else { - if (_PyUnicode_DATA_ANY(unicode)) - PyObject_DEL(_PyUnicode_DATA_ANY(unicode)); - Py_TYPE(unicode)->tp_free(unicode); - } -} - -#ifdef Py_DEBUG -static int -unicode_is_singleton(PyObject *unicode) -{ - PyASCIIObject *ascii = (PyASCIIObject *)unicode; - if (unicode == unicode_empty) - return 1; - if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1) - { - Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0); - if (ch < 256 && unicode_latin1[ch] == unicode) - return 1; - } - return 0; -} -#endif - -static int -unicode_resizable(PyObject *unicode) -{ - if (Py_REFCNT(unicode) != 1) - return 0; - if (PyUnicode_CHECK_INTERNED(unicode)) - return 0; -#ifdef Py_DEBUG - /* singleton refcount is greater than 1 */ - assert(!unicode_is_singleton(unicode)); -#endif - return 1; -} - -static int -unicode_resize(PyObject **p_unicode, Py_ssize_t length) -{ - PyObject *unicode; - Py_ssize_t old_length; - - assert(p_unicode != NULL); - unicode = *p_unicode; - - assert(unicode != NULL); - assert(PyUnicode_Check(unicode)); - assert(0 <= length); - - if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND) - old_length = PyUnicode_WSTR_LENGTH(unicode); - else - old_length = PyUnicode_GET_LENGTH(unicode); - if (old_length == length) - return 0; - - if (length == 0) { - Py_DECREF(*p_unicode); - *p_unicode = unicode_empty; - Py_INCREF(*p_unicode); - return 0; - } - - if (!unicode_resizable(unicode)) { - PyObject *copy = resize_copy(unicode, length); - if (copy == NULL) - return -1; - Py_DECREF(*p_unicode); - *p_unicode = copy; - return 0; - } - - if (PyUnicode_IS_COMPACT(unicode)) { - *p_unicode = resize_compact(unicode, length); - if (*p_unicode == NULL) - return -1; - assert(_PyUnicode_CheckConsistency(*p_unicode, 0)); - return 0; - } - return resize_inplace(unicode, length); -} - -int -PyUnicode_Resize(PyObject **p_unicode, Py_ssize_t length) -{ - PyObject *unicode; - if (p_unicode == NULL) { - PyErr_BadInternalCall(); - return -1; - } - unicode = *p_unicode; - if (unicode == NULL || !PyUnicode_Check(unicode) || length < 0) - { - PyErr_BadInternalCall(); - return -1; - } - return unicode_resize(p_unicode, length); -} - -static int -unicode_widen(PyObject **p_unicode, unsigned int maxchar) -{ - PyObject *result; - assert(PyUnicode_IS_READY(*p_unicode)); - if (maxchar <= PyUnicode_MAX_CHAR_VALUE(*p_unicode)) - return 0; - result = PyUnicode_New(PyUnicode_GET_LENGTH(*p_unicode), - maxchar); - if (result == NULL) - return -1; - PyUnicode_CopyCharacters(result, 0, *p_unicode, 0, - PyUnicode_GET_LENGTH(*p_unicode)); - Py_DECREF(*p_unicode); - *p_unicode = result; - return 0; -} - -static int -unicode_putchar(PyObject **p_unicode, Py_ssize_t *pos, - Py_UCS4 ch) -{ - if (unicode_widen(p_unicode, ch) < 0) - return -1; - PyUnicode_WRITE(PyUnicode_KIND(*p_unicode), - PyUnicode_DATA(*p_unicode), - (*pos)++, ch); - return 0; -} - -static PyObject* -get_latin1_char(unsigned char ch) -{ - PyObject *unicode = unicode_latin1[ch]; - if (!unicode) { - unicode = PyUnicode_New(1, ch); - if (!unicode) - return NULL; - PyUnicode_1BYTE_DATA(unicode)[0] = ch; - assert(_PyUnicode_CheckConsistency(unicode, 1)); - unicode_latin1[ch] = unicode; - } - Py_INCREF(unicode); - return unicode; -} - -PyObject * -PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size) -{ - PyObject *unicode; - Py_UCS4 maxchar = 0; - Py_ssize_t num_surrogates; - - if (u == NULL) - return (PyObject*)_PyUnicode_New(size); - - /* If the Unicode data is known at construction time, we can apply - some optimizations which share commonly used objects. */ - - /* Optimization for empty strings */ - if (size == 0 && unicode_empty != NULL) { - Py_INCREF(unicode_empty); - return unicode_empty; - } - - /* Single character Unicode objects in the Latin-1 range are - shared when using this constructor */ - if (size == 1 && *u < 256) - return get_latin1_char((unsigned char)*u); - - /* If not empty and not single character, copy the Unicode data - into the new object */ - if (find_maxchar_surrogates(u, u + size, - &maxchar, &num_surrogates) == -1) - return NULL; - - unicode = PyUnicode_New(size - num_surrogates, maxchar); - if (!unicode) - return NULL; - - switch (PyUnicode_KIND(unicode)) { - case PyUnicode_1BYTE_KIND: - _PyUnicode_CONVERT_BYTES(Py_UNICODE, unsigned char, - u, u + size, PyUnicode_1BYTE_DATA(unicode)); - break; - case PyUnicode_2BYTE_KIND: -#if Py_UNICODE_SIZE == 2 - Py_MEMCPY(PyUnicode_2BYTE_DATA(unicode), u, size * 2); -#else - _PyUnicode_CONVERT_BYTES(Py_UNICODE, Py_UCS2, - u, u + size, PyUnicode_2BYTE_DATA(unicode)); -#endif - break; - case PyUnicode_4BYTE_KIND: -#if SIZEOF_WCHAR_T == 2 - /* This is the only case which has to process surrogates, thus - a simple copy loop is not enough and we need a function. */ - unicode_convert_wchar_to_ucs4(u, u + size, unicode); -#else - assert(num_surrogates == 0); - Py_MEMCPY(PyUnicode_4BYTE_DATA(unicode), u, size * 4); -#endif - break; - default: - assert(0 && "Impossible state"); - } - - return unicode_result(unicode); -} - -PyObject * -PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) -{ - if (size < 0) { - PyErr_SetString(PyExc_SystemError, - "Negative size passed to PyUnicode_FromStringAndSize"); - return NULL; - } - - /* If the Unicode data is known at construction time, we can apply - some optimizations which share commonly used objects. - Also, this means the input must be UTF-8, so fall back to the - UTF-8 decoder at the end. */ - if (u != NULL) { - - /* Optimization for empty strings */ - if (size == 0 && unicode_empty != NULL) { - Py_INCREF(unicode_empty); - return unicode_empty; - } - - /* Single characters are shared when using this constructor. - Restrict to ASCII, since the input must be UTF-8. */ - if (size == 1 && (unsigned char)*u < 128) - return get_latin1_char((unsigned char)*u); - - return PyUnicode_DecodeUTF8(u, size, NULL); - } - - return (PyObject *)_PyUnicode_New(size); -} - -PyObject * -PyUnicode_FromString(const char *u) -{ - size_t size = strlen(u); - if (size > PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, "input too long"); - return NULL; - } - - return PyUnicode_FromStringAndSize(u, size); -} - -PyObject * -_PyUnicode_FromId(_Py_Identifier *id) -{ - if (!id->object) { - id->object = PyUnicode_FromString(id->string); - if (!id->object) - return NULL; - PyUnicode_InternInPlace(&id->object); - assert(!id->next); - id->next = static_strings; - static_strings = id; - } - return id->object; -} - -void -_PyUnicode_ClearStaticStrings() -{ - _Py_Identifier *i; - for (i = static_strings; i; i = i->next) { - Py_DECREF(i->object); - i->object = NULL; - i->next = NULL; - } -} - -/* Internal function, don't check maximum character */ - -static PyObject* -unicode_fromascii(const unsigned char* s, Py_ssize_t size) -{ - PyObject *res; -#ifdef Py_DEBUG - const unsigned char *p; - const unsigned char *end = s + size; - for (p=s; p < end; p++) { - assert(*p < 128); - } -#endif - if (size == 1) - return get_latin1_char(s[0]); - res = PyUnicode_New(size, 127); - if (!res) - return NULL; - memcpy(PyUnicode_1BYTE_DATA(res), s, size); - return res; -} - -static Py_UCS4 -kind_maxchar_limit(unsigned int kind) -{ - switch(kind) { - case PyUnicode_1BYTE_KIND: - return 0x80; - case PyUnicode_2BYTE_KIND: - return 0x100; - case PyUnicode_4BYTE_KIND: - return 0x10000; - default: - assert(0 && "invalid kind"); - return MAX_UNICODE; - } -} - -static PyObject* -_PyUnicode_FromUCS1(const unsigned char* u, Py_ssize_t size) -{ - PyObject *res; - unsigned char max_char; - - if (size == 0) { - Py_INCREF(unicode_empty); - return unicode_empty; - } - assert(size > 0); - if (size == 1) - return get_latin1_char(u[0]); - - max_char = ucs1lib_find_max_char(u, u + size); - res = PyUnicode_New(size, max_char); - if (!res) - return NULL; - memcpy(PyUnicode_1BYTE_DATA(res), u, size); - assert(_PyUnicode_CheckConsistency(res, 1)); - return res; -} - -static PyObject* -_PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size) -{ - PyObject *res; - Py_UCS2 max_char; - - if (size == 0) { - Py_INCREF(unicode_empty); - return unicode_empty; - } - assert(size > 0); - if (size == 1 && u[0] < 256) - return get_latin1_char((unsigned char)u[0]); - - max_char = ucs2lib_find_max_char(u, u + size); - res = PyUnicode_New(size, max_char); - if (!res) - return NULL; - if (max_char >= 256) - memcpy(PyUnicode_2BYTE_DATA(res), u, sizeof(Py_UCS2)*size); - else { - _PyUnicode_CONVERT_BYTES( - Py_UCS2, Py_UCS1, u, u + size, PyUnicode_1BYTE_DATA(res)); - } - assert(_PyUnicode_CheckConsistency(res, 1)); - return res; -} - -static PyObject* -_PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size) -{ - PyObject *res; - Py_UCS4 max_char; - - if (size == 0) { - Py_INCREF(unicode_empty); - return unicode_empty; - } - assert(size > 0); - if (size == 1 && u[0] < 256) - return get_latin1_char((unsigned char)u[0]); - - max_char = ucs4lib_find_max_char(u, u + size); - res = PyUnicode_New(size, max_char); - if (!res) - return NULL; - if (max_char < 256) - _PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS1, u, u + size, - PyUnicode_1BYTE_DATA(res)); - else if (max_char < 0x10000) - _PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS2, u, u + size, - PyUnicode_2BYTE_DATA(res)); - else - memcpy(PyUnicode_4BYTE_DATA(res), u, sizeof(Py_UCS4)*size); - assert(_PyUnicode_CheckConsistency(res, 1)); - return res; -} - -PyObject* -PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size) -{ - if (size < 0) { - PyErr_SetString(PyExc_ValueError, "size must be positive"); - return NULL; - } - switch(kind) { - case PyUnicode_1BYTE_KIND: - return _PyUnicode_FromUCS1(buffer, size); - case PyUnicode_2BYTE_KIND: - return _PyUnicode_FromUCS2(buffer, size); - case PyUnicode_4BYTE_KIND: - return _PyUnicode_FromUCS4(buffer, size); - default: - PyErr_SetString(PyExc_SystemError, "invalid kind"); - return NULL; - } -} - -/* Ensure that a string uses the most efficient storage, if it is not the - case: create a new string with of the right kind. Write NULL into *p_unicode - on error. */ -static void -unicode_adjust_maxchar(PyObject **p_unicode) -{ - PyObject *unicode, *copy; - Py_UCS4 max_char; - Py_ssize_t len; - unsigned int kind; - - assert(p_unicode != NULL); - unicode = *p_unicode; - assert(PyUnicode_IS_READY(unicode)); - if (PyUnicode_IS_ASCII(unicode)) - return; - - len = PyUnicode_GET_LENGTH(unicode); - kind = PyUnicode_KIND(unicode); - if (kind == PyUnicode_1BYTE_KIND) { - const Py_UCS1 *u = PyUnicode_1BYTE_DATA(unicode); - max_char = ucs1lib_find_max_char(u, u + len); - if (max_char >= 128) - return; - } - else if (kind == PyUnicode_2BYTE_KIND) { - const Py_UCS2 *u = PyUnicode_2BYTE_DATA(unicode); - max_char = ucs2lib_find_max_char(u, u + len); - if (max_char >= 256) - return; - } - else { - const Py_UCS4 *u = PyUnicode_4BYTE_DATA(unicode); - assert(kind == PyUnicode_4BYTE_KIND); - max_char = ucs4lib_find_max_char(u, u + len); - if (max_char >= 0x10000) - return; - } - copy = PyUnicode_New(len, max_char); - copy_characters(copy, 0, unicode, 0, len); - Py_DECREF(unicode); - *p_unicode = copy; -} - -PyObject* -PyUnicode_Copy(PyObject *unicode) -{ - Py_ssize_t length; - PyObject *copy; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadInternalCall(); - return NULL; - } - if (PyUnicode_READY(unicode)) - return NULL; - - length = PyUnicode_GET_LENGTH(unicode); - copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); - if (!copy) - return NULL; - assert(PyUnicode_KIND(copy) == PyUnicode_KIND(unicode)); - - Py_MEMCPY(PyUnicode_DATA(copy), PyUnicode_DATA(unicode), - length * PyUnicode_KIND(unicode)); - assert(_PyUnicode_CheckConsistency(copy, 1)); - return copy; -} - - -/* Widen Unicode objects to larger buffers. Don't write terminating null - character. Return NULL on error. */ - -void* -_PyUnicode_AsKind(PyObject *s, unsigned int kind) -{ - Py_ssize_t len; - void *result; - unsigned int skind; - - if (PyUnicode_READY(s)) - return NULL; - - len = PyUnicode_GET_LENGTH(s); - skind = PyUnicode_KIND(s); - if (skind >= kind) { - PyErr_SetString(PyExc_SystemError, "invalid widening attempt"); - return NULL; - } - switch(kind) { - case PyUnicode_2BYTE_KIND: - result = PyMem_Malloc(len * sizeof(Py_UCS2)); - if (!result) - return PyErr_NoMemory(); - assert(skind == PyUnicode_1BYTE_KIND); - _PyUnicode_CONVERT_BYTES( - Py_UCS1, Py_UCS2, - PyUnicode_1BYTE_DATA(s), - PyUnicode_1BYTE_DATA(s) + len, - result); - return result; - case PyUnicode_4BYTE_KIND: - result = PyMem_Malloc(len * sizeof(Py_UCS4)); - if (!result) - return PyErr_NoMemory(); - if (skind == PyUnicode_2BYTE_KIND) { - _PyUnicode_CONVERT_BYTES( - Py_UCS2, Py_UCS4, - PyUnicode_2BYTE_DATA(s), - PyUnicode_2BYTE_DATA(s) + len, - result); - } - else { - assert(skind == PyUnicode_1BYTE_KIND); - _PyUnicode_CONVERT_BYTES( - Py_UCS1, Py_UCS4, - PyUnicode_1BYTE_DATA(s), - PyUnicode_1BYTE_DATA(s) + len, - result); - } - return result; - default: - break; - } - PyErr_SetString(PyExc_SystemError, "invalid kind"); - return NULL; -} - -static Py_UCS4* -as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize, - int copy_null) -{ - int kind; - void *data; - Py_ssize_t len, targetlen; - if (PyUnicode_READY(string) == -1) - return NULL; - kind = PyUnicode_KIND(string); - data = PyUnicode_DATA(string); - len = PyUnicode_GET_LENGTH(string); - targetlen = len; - if (copy_null) - targetlen++; - if (!target) { - if (PY_SSIZE_T_MAX / sizeof(Py_UCS4) < targetlen) { - PyErr_NoMemory(); - return NULL; - } - target = PyMem_Malloc(targetlen * sizeof(Py_UCS4)); - if (!target) { - PyErr_NoMemory(); - return NULL; - } - } - else { - if (targetsize < targetlen) { - PyErr_Format(PyExc_SystemError, - "string is longer than the buffer"); - if (copy_null && 0 < targetsize) - target[0] = 0; - return NULL; - } - } - if (kind == PyUnicode_1BYTE_KIND) { - Py_UCS1 *start = (Py_UCS1 *) data; - _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS4, start, start + len, target); - } - else if (kind == PyUnicode_2BYTE_KIND) { - Py_UCS2 *start = (Py_UCS2 *) data; - _PyUnicode_CONVERT_BYTES(Py_UCS2, Py_UCS4, start, start + len, target); - } - else { - assert(kind == PyUnicode_4BYTE_KIND); - Py_MEMCPY(target, data, len * sizeof(Py_UCS4)); - } - if (copy_null) - target[len] = 0; - return target; -} - -Py_UCS4* -PyUnicode_AsUCS4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize, - int copy_null) -{ - if (target == NULL || targetsize < 0) { - PyErr_BadInternalCall(); - return NULL; - } - return as_ucs4(string, target, targetsize, copy_null); -} - -Py_UCS4* -PyUnicode_AsUCS4Copy(PyObject *string) -{ - return as_ucs4(string, NULL, 0, 1); -} - -#ifdef HAVE_WCHAR_H - -PyObject * -PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size) -{ - if (w == NULL) { - if (size == 0) - return PyUnicode_New(0, 0); - PyErr_BadInternalCall(); - return NULL; - } - - if (size == -1) { - size = wcslen(w); - } - - return PyUnicode_FromUnicode(w, size); -} - -#endif /* HAVE_WCHAR_H */ - -static void -makefmt(char *fmt, int longflag, int longlongflag, int size_tflag, - int zeropad, int width, int precision, char c) -{ - *fmt++ = '%'; - if (width) { - if (zeropad) - *fmt++ = '0'; - fmt += sprintf(fmt, "%d", width); - } - if (precision) - fmt += sprintf(fmt, ".%d", precision); - if (longflag) - *fmt++ = 'l'; - else if (longlongflag) { - /* longlongflag should only ever be nonzero on machines with - HAVE_LONG_LONG defined */ -#ifdef HAVE_LONG_LONG - char *f = PY_FORMAT_LONG_LONG; - while (*f) - *fmt++ = *f++; -#else - /* we shouldn't ever get here */ - assert(0); - *fmt++ = 'l'; -#endif - } - else if (size_tflag) { - char *f = PY_FORMAT_SIZE_T; - while (*f) - *fmt++ = *f++; - } - *fmt++ = c; - *fmt = '\0'; -} - -/* helper for PyUnicode_FromFormatV() */ - -static const char* -parse_format_flags(const char *f, - int *p_width, int *p_precision, - int *p_longflag, int *p_longlongflag, int *p_size_tflag) -{ - int width, precision, longflag, longlongflag, size_tflag; - - /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */ - f++; - width = 0; - while (Py_ISDIGIT((unsigned)*f)) - width = (width*10) + *f++ - '0'; - precision = 0; - if (*f == '.') { - f++; - while (Py_ISDIGIT((unsigned)*f)) - precision = (precision*10) + *f++ - '0'; - if (*f == '%') { - /* "%.3%s" => f points to "3" */ - f--; - } - } - if (*f == '\0') { - /* bogus format "%.1" => go backward, f points to "1" */ - f--; - } - if (p_width != NULL) - *p_width = width; - if (p_precision != NULL) - *p_precision = precision; - - /* Handle %ld, %lu, %lld and %llu. */ - longflag = 0; - longlongflag = 0; - size_tflag = 0; - - if (*f == 'l') { - if (f[1] == 'd' || f[1] == 'u' || f[1] == 'i') { - longflag = 1; - ++f; - } -#ifdef HAVE_LONG_LONG - else if (f[1] == 'l' && - (f[2] == 'd' || f[2] == 'u' || f[2] == 'i')) { - longlongflag = 1; - f += 2; - } -#endif - } - /* handle the size_t flag. */ - else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u' || f[1] == 'i')) { - size_tflag = 1; - ++f; - } - if (p_longflag != NULL) - *p_longflag = longflag; - if (p_longlongflag != NULL) - *p_longlongflag = longlongflag; - if (p_size_tflag != NULL) - *p_size_tflag = size_tflag; - return f; -} - -/* maximum number of characters required for output of %ld. 21 characters - allows for 64-bit integers (in decimal) and an optional sign. */ -#define MAX_LONG_CHARS 21 -/* maximum number of characters required for output of %lld. - We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits, - plus 1 for the sign. 53/22 is an upper bound for log10(256). */ -#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22) - -PyObject * -PyUnicode_FromFormatV(const char *format, va_list vargs) -{ - va_list count; - Py_ssize_t callcount = 0; - PyObject **callresults = NULL; - PyObject **callresult = NULL; - Py_ssize_t n = 0; - int width = 0; - int precision = 0; - int zeropad; - const char* f; - PyObject *string; - /* used by sprintf */ - char fmt[61]; /* should be enough for %0width.precisionlld */ - Py_UCS4 maxchar = 127; /* result is ASCII by default */ - Py_UCS4 argmaxchar; - Py_ssize_t numbersize = 0; - char *numberresults = NULL; - char *numberresult = NULL; - Py_ssize_t i; - int kind; - void *data; - - Py_VA_COPY(count, vargs); - /* step 1: count the number of %S/%R/%A/%s format specifications - * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/ - * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the - * result in an array) - * also estimate a upper bound for all the number formats in the string, - * numbers will be formatted in step 3 and be kept in a '\0'-separated - * buffer before putting everything together. */ - for (f = format; *f; f++) { - if (*f == '%') { - int longlongflag; - /* skip width or width.precision (eg. "1.2" of "%1.2f") */ - f = parse_format_flags(f, &width, NULL, NULL, &longlongflag, NULL); - if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V') - ++callcount; - - else if (*f == 'd' || *f=='u' || *f=='i' || *f=='x' || *f=='p') { -#ifdef HAVE_LONG_LONG - if (longlongflag) { - if (width < MAX_LONG_LONG_CHARS) - width = MAX_LONG_LONG_CHARS; - } - else -#endif - /* MAX_LONG_CHARS is enough to hold a 64-bit integer, - including sign. Decimal takes the most space. This - isn't enough for octal. If a width is specified we - need more (which we allocate later). */ - if (width < MAX_LONG_CHARS) - width = MAX_LONG_CHARS; - - /* account for the size + '\0' to separate numbers - inside of the numberresults buffer */ - numbersize += (width + 1); - } - } - else if ((unsigned char)*f > 127) { - PyErr_Format(PyExc_ValueError, - "PyUnicode_FromFormatV() expects an ASCII-encoded format " - "string, got a non-ASCII byte: 0x%02x", - (unsigned char)*f); - return NULL; - } - } - /* step 2: allocate memory for the results of - * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */ - if (callcount) { - callresults = PyObject_Malloc(sizeof(PyObject *) * callcount); - if (!callresults) { - PyErr_NoMemory(); - return NULL; - } - callresult = callresults; - } - /* step 2.5: allocate memory for the results of formating numbers */ - if (numbersize) { - numberresults = PyObject_Malloc(numbersize); - if (!numberresults) { - PyErr_NoMemory(); - goto fail; - } - numberresult = numberresults; - } - - /* step 3: format numbers and figure out how large a buffer we need */ - for (f = format; *f; f++) { - if (*f == '%') { - const char* p; - int longflag; - int longlongflag; - int size_tflag; - int numprinted; - - p = f; - zeropad = (f[1] == '0'); - f = parse_format_flags(f, &width, &precision, - &longflag, &longlongflag, &size_tflag); - switch (*f) { - case 'c': - { - Py_UCS4 ordinal = va_arg(count, int); - maxchar = Py_MAX(maxchar, ordinal); - n++; - break; - } - case '%': - n++; - break; - case 'i': - case 'd': - makefmt(fmt, longflag, longlongflag, size_tflag, zeropad, - width, precision, *f); - if (longflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, long)); -#ifdef HAVE_LONG_LONG - else if (longlongflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, PY_LONG_LONG)); -#endif - else if (size_tflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, Py_ssize_t)); - else - numprinted = sprintf(numberresult, fmt, - va_arg(count, int)); - n += numprinted; - /* advance by +1 to skip over the '\0' */ - numberresult += (numprinted + 1); - assert(*(numberresult - 1) == '\0'); - assert(*(numberresult - 2) != '\0'); - assert(numprinted >= 0); - assert(numberresult <= numberresults + numbersize); - break; - case 'u': - makefmt(fmt, longflag, longlongflag, size_tflag, zeropad, - width, precision, 'u'); - if (longflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, unsigned long)); -#ifdef HAVE_LONG_LONG - else if (longlongflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, unsigned PY_LONG_LONG)); -#endif - else if (size_tflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, size_t)); - else - numprinted = sprintf(numberresult, fmt, - va_arg(count, unsigned int)); - n += numprinted; - numberresult += (numprinted + 1); - assert(*(numberresult - 1) == '\0'); - assert(*(numberresult - 2) != '\0'); - assert(numprinted >= 0); - assert(numberresult <= numberresults + numbersize); - break; - case 'x': - makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x'); - numprinted = sprintf(numberresult, fmt, va_arg(count, int)); - n += numprinted; - numberresult += (numprinted + 1); - assert(*(numberresult - 1) == '\0'); - assert(*(numberresult - 2) != '\0'); - assert(numprinted >= 0); - assert(numberresult <= numberresults + numbersize); - break; - case 'p': - numprinted = sprintf(numberresult, "%p", va_arg(count, void*)); - /* %p is ill-defined: ensure leading 0x. */ - if (numberresult[1] == 'X') - numberresult[1] = 'x'; - else if (numberresult[1] != 'x') { - memmove(numberresult + 2, numberresult, - strlen(numberresult) + 1); - numberresult[0] = '0'; - numberresult[1] = 'x'; - numprinted += 2; - } - n += numprinted; - numberresult += (numprinted + 1); - assert(*(numberresult - 1) == '\0'); - assert(*(numberresult - 2) != '\0'); - assert(numprinted >= 0); - assert(numberresult <= numberresults + numbersize); - break; - case 's': - { - /* UTF-8 */ - const char *s = va_arg(count, const char*); - PyObject *str = PyUnicode_DecodeUTF8(s, strlen(s), "replace"); - if (!str) - goto fail; - /* since PyUnicode_DecodeUTF8 returns already flexible - unicode objects, there is no need to call ready on them */ - argmaxchar = PyUnicode_MAX_CHAR_VALUE(str); - maxchar = Py_MAX(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(str); - /* Remember the str and switch to the next slot */ - *callresult++ = str; - break; - } - case 'U': - { - PyObject *obj = va_arg(count, PyObject *); - assert(obj && _PyUnicode_CHECK(obj)); - if (PyUnicode_READY(obj) == -1) - goto fail; - argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj); - maxchar = Py_MAX(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(obj); - break; - } - case 'V': - { - PyObject *obj = va_arg(count, PyObject *); - const char *str = va_arg(count, const char *); - PyObject *str_obj; - assert(obj || str); - assert(!obj || _PyUnicode_CHECK(obj)); - if (obj) { - if (PyUnicode_READY(obj) == -1) - goto fail; - argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj); - maxchar = Py_MAX(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(obj); - *callresult++ = NULL; - } - else { - str_obj = PyUnicode_DecodeUTF8(str, strlen(str), "replace"); - if (!str_obj) - goto fail; - if (PyUnicode_READY(str_obj)) { - Py_DECREF(str_obj); - goto fail; - } - argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj); - maxchar = Py_MAX(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(str_obj); - *callresult++ = str_obj; - } - break; - } - case 'S': - { - PyObject *obj = va_arg(count, PyObject *); - PyObject *str; - assert(obj); - str = PyObject_Str(obj); - if (!str || PyUnicode_READY(str) == -1) - goto fail; - argmaxchar = PyUnicode_MAX_CHAR_VALUE(str); - maxchar = Py_MAX(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(str); - /* Remember the str and switch to the next slot */ - *callresult++ = str; - break; - } - case 'R': - { - PyObject *obj = va_arg(count, PyObject *); - PyObject *repr; - assert(obj); - repr = PyObject_Repr(obj); - if (!repr || PyUnicode_READY(repr) == -1) - goto fail; - argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr); - maxchar = Py_MAX(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(repr); - /* Remember the repr and switch to the next slot */ - *callresult++ = repr; - break; - } - case 'A': - { - PyObject *obj = va_arg(count, PyObject *); - PyObject *ascii; - assert(obj); - ascii = PyObject_ASCII(obj); - if (!ascii || PyUnicode_READY(ascii) == -1) - goto fail; - argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii); - maxchar = Py_MAX(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(ascii); - /* Remember the repr and switch to the next slot */ - *callresult++ = ascii; - break; - } - default: - /* if we stumble upon an unknown - formatting code, copy the rest of - the format string to the output - string. (we cannot just skip the - code, since there's no way to know - what's in the argument list) */ - n += strlen(p); - goto expand; - } - } else - n++; - } - expand: - /* step 4: fill the buffer */ - /* Since we've analyzed how much space we need, - we don't have to resize the string. - There can be no errors beyond this point. */ - string = PyUnicode_New(n, maxchar); - if (!string) - goto fail; - kind = PyUnicode_KIND(string); - data = PyUnicode_DATA(string); - callresult = callresults; - numberresult = numberresults; - - for (i = 0, f = format; *f; f++) { - if (*f == '%') { - const char* p; - - p = f; - f = parse_format_flags(f, NULL, NULL, NULL, NULL, NULL); - /* checking for == because the last argument could be a empty - string, which causes i to point to end, the assert at the end of - the loop */ - assert(i <= PyUnicode_GET_LENGTH(string)); - - switch (*f) { - case 'c': - { - const int ordinal = va_arg(vargs, int); - PyUnicode_WRITE(kind, data, i++, ordinal); - break; - } - case 'i': - case 'd': - case 'u': - case 'x': - case 'p': - /* unused, since we already have the result */ - if (*f == 'p') - (void) va_arg(vargs, void *); - else - (void) va_arg(vargs, int); - /* extract the result from numberresults and append. */ - for (; *numberresult; ++i, ++numberresult) - PyUnicode_WRITE(kind, data, i, *numberresult); - /* skip over the separating '\0' */ - assert(*numberresult == '\0'); - numberresult++; - assert(numberresult <= numberresults + numbersize); - break; - case 's': - { - /* unused, since we already have the result */ - Py_ssize_t size; - (void) va_arg(vargs, char *); - size = PyUnicode_GET_LENGTH(*callresult); - assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - copy_characters(string, i, *callresult, 0, size); - i += size; - /* We're done with the unicode()/repr() => forget it */ - Py_DECREF(*callresult); - /* switch to next unicode()/repr() result */ - ++callresult; - break; - } - case 'U': - { - PyObject *obj = va_arg(vargs, PyObject *); - Py_ssize_t size; - assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); - size = PyUnicode_GET_LENGTH(obj); - copy_characters(string, i, obj, 0, size); - i += size; - break; - } - case 'V': - { - Py_ssize_t size; - PyObject *obj = va_arg(vargs, PyObject *); - va_arg(vargs, const char *); - if (obj) { - size = PyUnicode_GET_LENGTH(obj); - assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); - copy_characters(string, i, obj, 0, size); - i += size; - } else { - size = PyUnicode_GET_LENGTH(*callresult); - assert(PyUnicode_KIND(*callresult) <= - PyUnicode_KIND(string)); - copy_characters(string, i, *callresult, 0, size); - i += size; - Py_DECREF(*callresult); - } - ++callresult; - break; - } - case 'S': - case 'R': - case 'A': - { - Py_ssize_t size = PyUnicode_GET_LENGTH(*callresult); - /* unused, since we already have the result */ - (void) va_arg(vargs, PyObject *); - assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - copy_characters(string, i, *callresult, 0, size); - i += size; - /* We're done with the unicode()/repr() => forget it */ - Py_DECREF(*callresult); - /* switch to next unicode()/repr() result */ - ++callresult; - break; - } - case '%': - PyUnicode_WRITE(kind, data, i++, '%'); - break; - default: - for (; *p; ++p, ++i) - PyUnicode_WRITE(kind, data, i, *p); - assert(i == PyUnicode_GET_LENGTH(string)); - goto end; - } - } - else { - assert(i < PyUnicode_GET_LENGTH(string)); - PyUnicode_WRITE(kind, data, i++, *f); - } - } - assert(i == PyUnicode_GET_LENGTH(string)); - - end: - if (callresults) - PyObject_Free(callresults); - if (numberresults) - PyObject_Free(numberresults); - return unicode_result(string); - fail: - if (callresults) { - PyObject **callresult2 = callresults; - while (callresult2 < callresult) { - Py_XDECREF(*callresult2); - ++callresult2; - } - PyObject_Free(callresults); - } - if (numberresults) - PyObject_Free(numberresults); - return NULL; -} - -PyObject * -PyUnicode_FromFormat(const char *format, ...) -{ - PyObject* ret; - va_list vargs; - -#ifdef HAVE_STDARG_PROTOTYPES - va_start(vargs, format); -#else - va_start(vargs); -#endif - ret = PyUnicode_FromFormatV(format, vargs); - va_end(vargs); - return ret; -} - -#ifdef HAVE_WCHAR_H - -/* Helper function for PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(): - convert a Unicode object to a wide character string. - - - If w is NULL: return the number of wide characters (including the null - character) required to convert the unicode object. Ignore size argument. - - - Otherwise: return the number of wide characters (excluding the null - character) written into w. Write at most size wide characters (including - the null character). */ -static Py_ssize_t -unicode_aswidechar(PyObject *unicode, - wchar_t *w, - Py_ssize_t size) -{ - Py_ssize_t res; - const wchar_t *wstr; - - wstr = PyUnicode_AsUnicodeAndSize(unicode, &res); - if (wstr == NULL) - return -1; - - if (w != NULL) { - if (size > res) - size = res + 1; - else - res = size; - Py_MEMCPY(w, wstr, size * sizeof(wchar_t)); - return res; - } - else - return res + 1; -} - -Py_ssize_t -PyUnicode_AsWideChar(PyObject *unicode, - wchar_t *w, - Py_ssize_t size) -{ - if (unicode == NULL) { - PyErr_BadInternalCall(); - return -1; - } - return unicode_aswidechar(unicode, w, size); -} - -wchar_t* -PyUnicode_AsWideCharString(PyObject *unicode, - Py_ssize_t *size) -{ - wchar_t* buffer; - Py_ssize_t buflen; - - if (unicode == NULL) { - PyErr_BadInternalCall(); - return NULL; - } - - buflen = unicode_aswidechar(unicode, NULL, 0); - if (buflen == -1) - return NULL; - if (PY_SSIZE_T_MAX / sizeof(wchar_t) < buflen) { - PyErr_NoMemory(); - return NULL; - } - - buffer = PyMem_MALLOC(buflen * sizeof(wchar_t)); - if (buffer == NULL) { - PyErr_NoMemory(); - return NULL; - } - buflen = unicode_aswidechar(unicode, buffer, buflen); - if (buflen == -1) - return NULL; - if (size != NULL) - *size = buflen; - return buffer; -} - -#endif /* HAVE_WCHAR_H */ - -PyObject * -PyUnicode_FromOrdinal(int ordinal) -{ - PyObject *v; - if (ordinal < 0 || ordinal > MAX_UNICODE) { - PyErr_SetString(PyExc_ValueError, - "chr() arg not in range(0x110000)"); - return NULL; - } - - if (ordinal < 256) - return get_latin1_char(ordinal); - - v = PyUnicode_New(1, ordinal); - if (v == NULL) - return NULL; - PyUnicode_WRITE(PyUnicode_KIND(v), PyUnicode_DATA(v), 0, ordinal); - assert(_PyUnicode_CheckConsistency(v, 1)); - return v; -} - -PyObject * -PyUnicode_FromObject(register PyObject *obj) -{ - /* XXX Perhaps we should make this API an alias of - PyObject_Str() instead ?! */ - if (PyUnicode_CheckExact(obj)) { - if (PyUnicode_READY(obj)) - return NULL; - Py_INCREF(obj); - return obj; - } - if (PyUnicode_Check(obj)) { - /* For a Unicode subtype that's not a Unicode object, - return a true Unicode object with the same data. */ - return PyUnicode_Copy(obj); - } - PyErr_Format(PyExc_TypeError, - "Can't convert '%.100s' object to str implicitly", - Py_TYPE(obj)->tp_name); - return NULL; -} - -PyObject * -PyUnicode_FromEncodedObject(register PyObject *obj, - const char *encoding, - const char *errors) -{ - Py_buffer buffer; - PyObject *v; - - if (obj == NULL) { - PyErr_BadInternalCall(); - return NULL; - } - - /* Decoding bytes objects is the most common case and should be fast */ - if (PyBytes_Check(obj)) { - if (PyBytes_GET_SIZE(obj) == 0) { - Py_INCREF(unicode_empty); - v = unicode_empty; - } - else { - v = PyUnicode_Decode( - PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj), - encoding, errors); - } - return v; - } - - if (PyUnicode_Check(obj)) { - PyErr_SetString(PyExc_TypeError, - "decoding str is not supported"); - return NULL; - } - - /* Retrieve a bytes buffer view through the PEP 3118 buffer interface */ - if (PyObject_GetBuffer(obj, &buffer, PyBUF_SIMPLE) < 0) { - PyErr_Format(PyExc_TypeError, - "coercing to str: need bytes, bytearray " - "or buffer-like object, %.80s found", - Py_TYPE(obj)->tp_name); - return NULL; - } - - if (buffer.len == 0) { - Py_INCREF(unicode_empty); - v = unicode_empty; - } - else - v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors); - - PyBuffer_Release(&buffer); - return v; -} - -/* Convert encoding to lower case and replace '_' with '-' in order to - catch e.g. UTF_8. Return 0 on error (encoding is longer than lower_len-1), - 1 on success. */ -static int -normalize_encoding(const char *encoding, - char *lower, - size_t lower_len) -{ - const char *e; - char *l; - char *l_end; - - if (encoding == NULL) { - strcpy(lower, "utf-8"); - return 1; - } - e = encoding; - l = lower; - l_end = &lower[lower_len - 1]; - while (*e) { - if (l == l_end) - return 0; - if (Py_ISUPPER(*e)) { - *l++ = Py_TOLOWER(*e++); - } - else if (*e == '_') { - *l++ = '-'; - e++; - } - else { - *l++ = *e++; - } - } - *l = '\0'; - return 1; -} - -PyObject * -PyUnicode_Decode(const char *s, - Py_ssize_t size, - const char *encoding, - const char *errors) -{ - PyObject *buffer = NULL, *unicode; - Py_buffer info; - char lower[11]; /* Enough for any encoding shortcut */ - - /* Shortcuts for common default encodings */ - if (normalize_encoding(encoding, lower, sizeof(lower))) { - if ((strcmp(lower, "utf-8") == 0) || - (strcmp(lower, "utf8") == 0)) - return PyUnicode_DecodeUTF8(s, size, errors); - else if ((strcmp(lower, "latin-1") == 0) || - (strcmp(lower, "latin1") == 0) || - (strcmp(lower, "iso-8859-1") == 0)) - return PyUnicode_DecodeLatin1(s, size, errors); -#ifdef HAVE_MBCS - else if (strcmp(lower, "mbcs") == 0) - return PyUnicode_DecodeMBCS(s, size, errors); -#endif - else if (strcmp(lower, "ascii") == 0) - return PyUnicode_DecodeASCII(s, size, errors); - else if (strcmp(lower, "utf-16") == 0) - return PyUnicode_DecodeUTF16(s, size, errors, 0); - else if (strcmp(lower, "utf-32") == 0) - return PyUnicode_DecodeUTF32(s, size, errors, 0); - } - - /* Decode via the codec registry */ - buffer = NULL; - if (PyBuffer_FillInfo(&info, NULL, (void *)s, size, 1, PyBUF_FULL_RO) < 0) - goto onError; - buffer = PyMemoryView_FromBuffer(&info); - if (buffer == NULL) - goto onError; - unicode = PyCodec_Decode(buffer, encoding, errors); - if (unicode == NULL) - goto onError; - if (!PyUnicode_Check(unicode)) { - PyErr_Format(PyExc_TypeError, - "decoder did not return a str object (type=%.400s)", - Py_TYPE(unicode)->tp_name); - Py_DECREF(unicode); - goto onError; - } - Py_DECREF(buffer); - return unicode_result(unicode); - - onError: - Py_XDECREF(buffer); - return NULL; -} - -PyObject * -PyUnicode_AsDecodedObject(PyObject *unicode, - const char *encoding, - const char *errors) -{ - PyObject *v; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - goto onError; - } - - if (encoding == NULL) - encoding = PyUnicode_GetDefaultEncoding(); - - /* Decode via the codec registry */ - v = PyCodec_Decode(unicode, encoding, errors); - if (v == NULL) - goto onError; - return unicode_result(v); - - onError: - return NULL; -} - -PyObject * -PyUnicode_AsDecodedUnicode(PyObject *unicode, - const char *encoding, - const char *errors) -{ - PyObject *v; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - goto onError; - } - - if (encoding == NULL) - encoding = PyUnicode_GetDefaultEncoding(); - - /* Decode via the codec registry */ - v = PyCodec_Decode(unicode, encoding, errors); - if (v == NULL) - goto onError; - if (!PyUnicode_Check(v)) { - PyErr_Format(PyExc_TypeError, - "decoder did not return a str object (type=%.400s)", - Py_TYPE(v)->tp_name); - Py_DECREF(v); - goto onError; - } - return unicode_result(v); - - onError: - return NULL; -} - -PyObject * -PyUnicode_Encode(const Py_UNICODE *s, - Py_ssize_t size, - const char *encoding, - const char *errors) -{ - PyObject *v, *unicode; - - unicode = PyUnicode_FromUnicode(s, size); - if (unicode == NULL) - return NULL; - v = PyUnicode_AsEncodedString(unicode, encoding, errors); - Py_DECREF(unicode); - return v; -} - -PyObject * -PyUnicode_AsEncodedObject(PyObject *unicode, - const char *encoding, - const char *errors) -{ - PyObject *v; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - goto onError; - } - - if (encoding == NULL) - encoding = PyUnicode_GetDefaultEncoding(); - - /* Encode via the codec registry */ - v = PyCodec_Encode(unicode, encoding, errors); - if (v == NULL) - goto onError; - return v; - - onError: - return NULL; -} - -PyObject * -PyUnicode_EncodeFSDefault(PyObject *unicode) -{ -#ifdef HAVE_MBCS - return PyUnicode_EncodeCodePage(CP_ACP, unicode, NULL); -#elif defined(__APPLE__) - return _PyUnicode_AsUTF8String(unicode, "surrogateescape"); -#else - PyInterpreterState *interp = PyThreadState_GET()->interp; - /* Bootstrap check: if the filesystem codec is implemented in Python, we - cannot use it to encode and decode filenames before it is loaded. Load - the Python codec requires to encode at least its own filename. Use the C - version of the locale codec until the codec registry is initialized and - the Python codec is loaded. - - Py_FileSystemDefaultEncoding is shared between all interpreters, we - cannot only rely on it: check also interp->fscodec_initialized for - subinterpreters. */ - if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) { - return PyUnicode_AsEncodedString(unicode, - Py_FileSystemDefaultEncoding, - "surrogateescape"); - } - else { - /* locale encoding with surrogateescape */ - wchar_t *wchar; - char *bytes; - PyObject *bytes_obj; - size_t error_pos; - - wchar = PyUnicode_AsWideCharString(unicode, NULL); - if (wchar == NULL) - return NULL; - bytes = _Py_wchar2char(wchar, &error_pos); - if (bytes == NULL) { - if (error_pos != (size_t)-1) { - char *errmsg = strerror(errno); - PyObject *exc = NULL; - if (errmsg == NULL) - errmsg = "Py_wchar2char() failed"; - raise_encode_exception(&exc, - "filesystemencoding", unicode, - error_pos, error_pos+1, - errmsg); - Py_XDECREF(exc); - } - else - PyErr_NoMemory(); - PyMem_Free(wchar); - return NULL; - } - PyMem_Free(wchar); - - bytes_obj = PyBytes_FromString(bytes); - PyMem_Free(bytes); - return bytes_obj; - } -#endif -} - -PyObject * -PyUnicode_AsEncodedString(PyObject *unicode, - const char *encoding, - const char *errors) -{ - PyObject *v; - char lower[11]; /* Enough for any encoding shortcut */ - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - - /* Shortcuts for common default encodings */ - if (normalize_encoding(encoding, lower, sizeof(lower))) { - if ((strcmp(lower, "utf-8") == 0) || - (strcmp(lower, "utf8") == 0)) - { - if (errors == NULL || strcmp(errors, "strict") == 0) - return _PyUnicode_AsUTF8String(unicode, NULL); - else - return _PyUnicode_AsUTF8String(unicode, errors); - } - else if ((strcmp(lower, "latin-1") == 0) || - (strcmp(lower, "latin1") == 0) || - (strcmp(lower, "iso-8859-1") == 0)) - return _PyUnicode_AsLatin1String(unicode, errors); -#ifdef HAVE_MBCS - else if (strcmp(lower, "mbcs") == 0) - return PyUnicode_EncodeCodePage(CP_ACP, unicode, errors); -#endif - else if (strcmp(lower, "ascii") == 0) - return _PyUnicode_AsASCIIString(unicode, errors); - } - - /* Encode via the codec registry */ - v = PyCodec_Encode(unicode, encoding, errors); - if (v == NULL) - return NULL; - - /* The normal path */ - if (PyBytes_Check(v)) - return v; - - /* If the codec returns a buffer, raise a warning and convert to bytes */ - if (PyByteArray_Check(v)) { - int error; - PyObject *b; - - error = PyErr_WarnFormat(PyExc_RuntimeWarning, 1, - "encoder %s returned bytearray instead of bytes", - encoding); - if (error) { - Py_DECREF(v); - return NULL; - } - - b = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v)); - Py_DECREF(v); - return b; - } - - PyErr_Format(PyExc_TypeError, - "encoder did not return a bytes object (type=%.400s)", - Py_TYPE(v)->tp_name); - Py_DECREF(v); - return NULL; -} - -PyObject * -PyUnicode_AsEncodedUnicode(PyObject *unicode, - const char *encoding, - const char *errors) -{ - PyObject *v; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - goto onError; - } - - if (encoding == NULL) - encoding = PyUnicode_GetDefaultEncoding(); - - /* Encode via the codec registry */ - v = PyCodec_Encode(unicode, encoding, errors); - if (v == NULL) - goto onError; - if (!PyUnicode_Check(v)) { - PyErr_Format(PyExc_TypeError, - "encoder did not return an str object (type=%.400s)", - Py_TYPE(v)->tp_name); - Py_DECREF(v); - goto onError; - } - return v; - - onError: - return NULL; -} - -PyObject* -PyUnicode_DecodeFSDefault(const char *s) { - Py_ssize_t size = (Py_ssize_t)strlen(s); - return PyUnicode_DecodeFSDefaultAndSize(s, size); -} - -PyObject* -PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) -{ -#ifdef HAVE_MBCS - return PyUnicode_DecodeMBCS(s, size, NULL); -#elif defined(__APPLE__) - return PyUnicode_DecodeUTF8(s, size, "surrogateescape"); -#else - PyInterpreterState *interp = PyThreadState_GET()->interp; - /* Bootstrap check: if the filesystem codec is implemented in Python, we - cannot use it to encode and decode filenames before it is loaded. Load - the Python codec requires to encode at least its own filename. Use the C - version of the locale codec until the codec registry is initialized and - the Python codec is loaded. - - Py_FileSystemDefaultEncoding is shared between all interpreters, we - cannot only rely on it: check also interp->fscodec_initialized for - subinterpreters. */ - if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) { - return PyUnicode_Decode(s, size, - Py_FileSystemDefaultEncoding, - "surrogateescape"); - } - else { - /* locale encoding with surrogateescape */ - wchar_t *wchar; - PyObject *unicode; - size_t len; - - if (s[size] != '\0' || size != strlen(s)) { - PyErr_SetString(PyExc_TypeError, "embedded NUL character"); - return NULL; - } - - wchar = _Py_char2wchar(s, &len); - if (wchar == NULL) - return PyErr_NoMemory(); - - unicode = PyUnicode_FromWideChar(wchar, len); - PyMem_Free(wchar); - return unicode; - } -#endif -} - - -int -PyUnicode_FSConverter(PyObject* arg, void* addr) -{ - PyObject *output = NULL; - Py_ssize_t size; - void *data; - if (arg == NULL) { - Py_DECREF(*(PyObject**)addr); - return 1; - } - if (PyBytes_Check(arg)) { - output = arg; - Py_INCREF(output); - } - else { - arg = PyUnicode_FromObject(arg); - if (!arg) - return 0; - output = PyUnicode_EncodeFSDefault(arg); - Py_DECREF(arg); - if (!output) - return 0; - if (!PyBytes_Check(output)) { - Py_DECREF(output); - PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes"); - return 0; - } - } - size = PyBytes_GET_SIZE(output); - data = PyBytes_AS_STRING(output); - if (size != strlen(data)) { - PyErr_SetString(PyExc_TypeError, "embedded NUL character"); - Py_DECREF(output); - return 0; - } - *(PyObject**)addr = output; - return Py_CLEANUP_SUPPORTED; -} - - -int -PyUnicode_FSDecoder(PyObject* arg, void* addr) -{ - PyObject *output = NULL; - if (arg == NULL) { - Py_DECREF(*(PyObject**)addr); - return 1; - } - if (PyUnicode_Check(arg)) { - if (PyUnicode_READY(arg)) - return 0; - output = arg; - Py_INCREF(output); - } - else { - arg = PyBytes_FromObject(arg); - if (!arg) - return 0; - output = PyUnicode_DecodeFSDefaultAndSize(PyBytes_AS_STRING(arg), - PyBytes_GET_SIZE(arg)); - Py_DECREF(arg); - if (!output) - return 0; - if (!PyUnicode_Check(output)) { - Py_DECREF(output); - PyErr_SetString(PyExc_TypeError, "decoder failed to return unicode"); - return 0; - } - } - if (PyUnicode_READY(output) < 0) { - Py_DECREF(output); - return 0; - } - if (findchar(PyUnicode_DATA(output), PyUnicode_KIND(output), - PyUnicode_GET_LENGTH(output), 0, 1) >= 0) { - PyErr_SetString(PyExc_TypeError, "embedded NUL character"); - Py_DECREF(output); - return 0; - } - *(PyObject**)addr = output; - return Py_CLEANUP_SUPPORTED; -} - - -char* -PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize) -{ - PyObject *bytes; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - if (PyUnicode_READY(unicode) == -1) - return NULL; - - if (PyUnicode_UTF8(unicode) == NULL) { - assert(!PyUnicode_IS_COMPACT_ASCII(unicode)); - bytes = _PyUnicode_AsUTF8String(unicode, "strict"); - if (bytes == NULL) - return NULL; - _PyUnicode_UTF8(unicode) = PyObject_MALLOC(PyBytes_GET_SIZE(bytes) + 1); - if (_PyUnicode_UTF8(unicode) == NULL) { - Py_DECREF(bytes); - return NULL; - } - _PyUnicode_UTF8_LENGTH(unicode) = PyBytes_GET_SIZE(bytes); - Py_MEMCPY(_PyUnicode_UTF8(unicode), - PyBytes_AS_STRING(bytes), - _PyUnicode_UTF8_LENGTH(unicode) + 1); - Py_DECREF(bytes); - } - - if (psize) - *psize = PyUnicode_UTF8_LENGTH(unicode); - return PyUnicode_UTF8(unicode); -} - -char* -PyUnicode_AsUTF8(PyObject *unicode) -{ - return PyUnicode_AsUTF8AndSize(unicode, NULL); -} - -#ifdef Py_DEBUG -static int unicode_as_unicode_calls = 0; -#endif - - -Py_UNICODE * -PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) -{ - const unsigned char *one_byte; -#if SIZEOF_WCHAR_T == 4 - const Py_UCS2 *two_bytes; -#else - const Py_UCS4 *four_bytes; - const Py_UCS4 *ucs4_end; - Py_ssize_t num_surrogates; -#endif - wchar_t *w; - wchar_t *wchar_end; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - if (_PyUnicode_WSTR(unicode) == NULL) { - /* Non-ASCII compact unicode object */ - assert(_PyUnicode_KIND(unicode) != 0); - assert(PyUnicode_IS_READY(unicode)); - -#ifdef Py_DEBUG - ++unicode_as_unicode_calls; -#endif - - if (PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND) { -#if SIZEOF_WCHAR_T == 2 - four_bytes = PyUnicode_4BYTE_DATA(unicode); - ucs4_end = four_bytes + _PyUnicode_LENGTH(unicode); - num_surrogates = 0; - - for (; four_bytes < ucs4_end; ++four_bytes) { - if (*four_bytes > 0xFFFF) - ++num_surrogates; - } - - _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC( - sizeof(wchar_t) * (_PyUnicode_LENGTH(unicode) + 1 + num_surrogates)); - if (!_PyUnicode_WSTR(unicode)) { - PyErr_NoMemory(); - return NULL; - } - _PyUnicode_WSTR_LENGTH(unicode) = _PyUnicode_LENGTH(unicode) + num_surrogates; - - w = _PyUnicode_WSTR(unicode); - wchar_end = w + _PyUnicode_WSTR_LENGTH(unicode); - four_bytes = PyUnicode_4BYTE_DATA(unicode); - for (; four_bytes < ucs4_end; ++four_bytes, ++w) { - if (*four_bytes > 0xFFFF) { - assert(*four_bytes <= MAX_UNICODE); - /* encode surrogate pair in this case */ - *w++ = Py_UNICODE_HIGH_SURROGATE(*four_bytes); - *w = Py_UNICODE_LOW_SURROGATE(*four_bytes); - } - else - *w = *four_bytes; - - if (w > wchar_end) { - assert(0 && "Miscalculated string end"); - } - } - *w = 0; -#else - /* sizeof(wchar_t) == 4 */ - Py_FatalError("Impossible unicode object state, wstr and str " - "should share memory already."); - return NULL; -#endif - } - else { - _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) * - (_PyUnicode_LENGTH(unicode) + 1)); - if (!_PyUnicode_WSTR(unicode)) { - PyErr_NoMemory(); - return NULL; - } - if (!PyUnicode_IS_COMPACT_ASCII(unicode)) - _PyUnicode_WSTR_LENGTH(unicode) = _PyUnicode_LENGTH(unicode); - w = _PyUnicode_WSTR(unicode); - wchar_end = w + _PyUnicode_LENGTH(unicode); - - if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) { - one_byte = PyUnicode_1BYTE_DATA(unicode); - for (; w < wchar_end; ++one_byte, ++w) - *w = *one_byte; - /* null-terminate the wstr */ - *w = 0; - } - else if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) { -#if SIZEOF_WCHAR_T == 4 - two_bytes = PyUnicode_2BYTE_DATA(unicode); - for (; w < wchar_end; ++two_bytes, ++w) - *w = *two_bytes; - /* null-terminate the wstr */ - *w = 0; -#else - /* sizeof(wchar_t) == 2 */ - PyObject_FREE(_PyUnicode_WSTR(unicode)); - _PyUnicode_WSTR(unicode) = NULL; - Py_FatalError("Impossible unicode object state, wstr " - "and str should share memory already."); - return NULL; -#endif - } - else { - assert(0 && "This should never happen."); - } - } - } - if (size != NULL) - *size = PyUnicode_WSTR_LENGTH(unicode); - return _PyUnicode_WSTR(unicode); -} - -Py_UNICODE * -PyUnicode_AsUnicode(PyObject *unicode) -{ - return PyUnicode_AsUnicodeAndSize(unicode, NULL); -} - - -Py_ssize_t -PyUnicode_GetSize(PyObject *unicode) -{ - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - goto onError; - } - return PyUnicode_GET_SIZE(unicode); - - onError: - return -1; -} - -Py_ssize_t -PyUnicode_GetLength(PyObject *unicode) -{ - if (!PyUnicode_Check(unicode) || PyUnicode_READY(unicode) == -1) { - PyErr_BadArgument(); - return -1; - } - - return PyUnicode_GET_LENGTH(unicode); -} - -Py_UCS4 -PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index) -{ - if (!PyUnicode_Check(unicode) || PyUnicode_READY(unicode) == -1) { - PyErr_BadArgument(); - return (Py_UCS4)-1; - } - if (index < 0 || index >= _PyUnicode_LENGTH(unicode)) { - PyErr_SetString(PyExc_IndexError, "string index out of range"); - return (Py_UCS4)-1; - } - return PyUnicode_READ_CHAR(unicode, index); -} - -int -PyUnicode_WriteChar(PyObject *unicode, Py_ssize_t index, Py_UCS4 ch) -{ - if (!PyUnicode_Check(unicode) || !PyUnicode_IS_COMPACT(unicode)) { - PyErr_BadArgument(); - return -1; - } - if (index < 0 || index >= _PyUnicode_LENGTH(unicode)) { - PyErr_SetString(PyExc_IndexError, "string index out of range"); - return -1; - } - if (_PyUnicode_Dirty(unicode)) - return -1; - PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode), - index, ch); - return 0; -} - -const char * -PyUnicode_GetDefaultEncoding(void) -{ - return "utf-8"; -} - -/* create or adjust a UnicodeDecodeError */ -static void -make_decode_exception(PyObject **exceptionObject, - const char *encoding, - const char *input, Py_ssize_t length, - Py_ssize_t startpos, Py_ssize_t endpos, - const char *reason) -{ - if (*exceptionObject == NULL) { - *exceptionObject = PyUnicodeDecodeError_Create( - encoding, input, length, startpos, endpos, reason); - } - else { - if (PyUnicodeDecodeError_SetStart(*exceptionObject, startpos)) - goto onError; - if (PyUnicodeDecodeError_SetEnd(*exceptionObject, endpos)) - goto onError; - if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason)) - goto onError; - } - return; - -onError: - Py_DECREF(*exceptionObject); - *exceptionObject = NULL; -} - -/* error handling callback helper: - build arguments, call the callback and check the arguments, - if no exception occurred, copy the replacement to the output - and adjust various state variables. - return 0 on success, -1 on error -*/ - -static int -unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, - const char *encoding, const char *reason, - const char **input, const char **inend, Py_ssize_t *startinpos, - Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr, - PyObject **output, Py_ssize_t *outpos) -{ - static char *argparse = "O!n;decoding error handler must return (str, int) tuple"; - - PyObject *restuple = NULL; - PyObject *repunicode = NULL; - Py_ssize_t outsize; - Py_ssize_t insize; - Py_ssize_t requiredsize; - Py_ssize_t newpos; - PyObject *inputobj = NULL; - int res = -1; - - if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) - outsize = PyUnicode_GET_LENGTH(*output); - else - outsize = _PyUnicode_WSTR_LENGTH(*output); - - if (*errorHandler == NULL) { - *errorHandler = PyCodec_LookupError(errors); - if (*errorHandler == NULL) - goto onError; - } - - make_decode_exception(exceptionObject, - encoding, - *input, *inend - *input, - *startinpos, *endinpos, - reason); - if (*exceptionObject == NULL) - goto onError; - - restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL); - if (restuple == NULL) - goto onError; - if (!PyTuple_Check(restuple)) { - PyErr_SetString(PyExc_TypeError, &argparse[4]); - goto onError; - } - if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos)) - goto onError; - if (PyUnicode_READY(repunicode) < 0) - goto onError; - - /* Copy back the bytes variables, which might have been modified by the - callback */ - inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject); - if (!inputobj) - goto onError; - if (!PyBytes_Check(inputobj)) { - PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes"); - } - *input = PyBytes_AS_STRING(inputobj); - insize = PyBytes_GET_SIZE(inputobj); - *inend = *input + insize; - /* we can DECREF safely, as the exception has another reference, - so the object won't go away. */ - Py_DECREF(inputobj); - - if (newpos<0) - newpos = insize+newpos; - if (newpos<0 || newpos>insize) { - PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos); - goto onError; - } - - if (_PyUnicode_KIND(*output) != PyUnicode_WCHAR_KIND) { - /* need more space? (at least enough for what we - have+the replacement+the rest of the string (starting - at the new input position), so we won't have to check space - when there are no errors in the rest of the string) */ - Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode); - requiredsize = *outpos + replen + insize-newpos; - if (requiredsize > outsize) { - if (requiredsize<2*outsize) - requiredsize = 2*outsize; - if (unicode_resize(output, requiredsize) < 0) - goto onError; - } - if (unicode_widen(output, PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0) - goto onError; - copy_characters(*output, *outpos, repunicode, 0, replen); - *outpos += replen; - } - else { - wchar_t *repwstr; - Py_ssize_t repwlen; - repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen); - if (repwstr == NULL) - goto onError; - /* need more space? (at least enough for what we - have+the replacement+the rest of the string (starting - at the new input position), so we won't have to check space - when there are no errors in the rest of the string) */ - requiredsize = *outpos + repwlen + insize-newpos; - if (requiredsize > outsize) { - if (requiredsize < 2*outsize) - requiredsize = 2*outsize; - if (unicode_resize(output, requiredsize) < 0) - goto onError; - } - wcsncpy(_PyUnicode_WSTR(*output) + *outpos, repwstr, repwlen); - *outpos += repwlen; - } - *endinpos = newpos; - *inptr = *input + newpos; - - /* we made it! */ - res = 0; - - onError: - Py_XDECREF(restuple); - return res; -} - -/* --- UTF-7 Codec -------------------------------------------------------- */ - -/* See RFC2152 for details. We encode conservatively and decode liberally. */ - -/* Three simple macros defining base-64. */ - -/* Is c a base-64 character? */ - -#define IS_BASE64(c) \ - (((c) >= 'A' && (c) <= 'Z') || \ - ((c) >= 'a' && (c) <= 'z') || \ - ((c) >= '0' && (c) <= '9') || \ - (c) == '+' || (c) == '/') - -/* given that c is a base-64 character, what is its base-64 value? */ - -#define FROM_BASE64(c) \ - (((c) >= 'A' && (c) <= 'Z') ? (c) - 'A' : \ - ((c) >= 'a' && (c) <= 'z') ? (c) - 'a' + 26 : \ - ((c) >= '0' && (c) <= '9') ? (c) - '0' + 52 : \ - (c) == '+' ? 62 : 63) - -/* What is the base-64 character of the bottom 6 bits of n? */ - -#define TO_BASE64(n) \ - ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f]) - -/* DECODE_DIRECT: this byte encountered in a UTF-7 string should be - * decoded as itself. We are permissive on decoding; the only ASCII - * byte not decoding to itself is the + which begins a base64 - * string. */ - -#define DECODE_DIRECT(c) \ - ((c) <= 127 && (c) != '+') - -/* The UTF-7 encoder treats ASCII characters differently according to - * whether they are Set D, Set O, Whitespace, or special (i.e. none of - * the above). See RFC2152. This array identifies these different - * sets: - * 0 : "Set D" - * alphanumeric and '(),-./:? - * 1 : "Set O" - * !"#$%&*;<=>@[]^_`{|} - * 2 : "whitespace" - * ht nl cr sp - * 3 : special (must be base64 encoded) - * everything else (i.e. +\~ and non-printing codes 0-8 11-12 14-31 127) - */ - -static -char utf7_category[128] = { -/* nul soh stx etx eot enq ack bel bs ht nl vt np cr so si */ - 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, -/* dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us */ - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -/* sp ! " # $ % & ' ( ) * + , - . / */ - 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 0, -/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, -/* @ A B C D E F G H I J K L M N O */ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/* P Q R S T U V W X Y Z [ \ ] ^ _ */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1, -/* ` a b c d e f g h i j k l m n o */ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/* p q r s t u v w x y z { | } ~ del */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3, -}; - -/* ENCODE_DIRECT: this character should be encoded as itself. The - * answer depends on whether we are encoding set O as itself, and also - * on whether we are encoding whitespace as itself. RFC2152 makes it - * clear that the answers to these questions vary between - * applications, so this code needs to be flexible. */ - -#define ENCODE_DIRECT(c, directO, directWS) \ - ((c) < 128 && (c) > 0 && \ - ((utf7_category[(c)] == 0) || \ - (directWS && (utf7_category[(c)] == 2)) || \ - (directO && (utf7_category[(c)] == 1)))) - -PyObject * -PyUnicode_DecodeUTF7(const char *s, - Py_ssize_t size, - const char *errors) -{ - return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL); -} - -/* The decoder. The only state we preserve is our read position, - * i.e. how many characters we have consumed. So if we end in the - * middle of a shift sequence we have to back off the read position - * and the output to the beginning of the sequence, otherwise we lose - * all the shift state (seen bits, number of bits seen, high - * surrogate). */ - -PyObject * -PyUnicode_DecodeUTF7Stateful(const char *s, - Py_ssize_t size, - const char *errors, - Py_ssize_t *consumed) -{ - const char *starts = s; - Py_ssize_t startinpos; - Py_ssize_t endinpos; - Py_ssize_t outpos; - const char *e; - PyObject *unicode; - const char *errmsg = ""; - int inShift = 0; - Py_ssize_t shiftOutStart; - unsigned int base64bits = 0; - unsigned long base64buffer = 0; - Py_UCS4 surrogate = 0; - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - - /* Start off assuming it's all ASCII. Widen later as necessary. */ - unicode = PyUnicode_New(size, 127); - if (!unicode) - return NULL; - if (size == 0) { - if (consumed) - *consumed = 0; - return unicode; - } - - shiftOutStart = outpos = 0; - e = s + size; - - while (s < e) { - Py_UCS4 ch; - restart: - ch = (unsigned char) *s; - - if (inShift) { /* in a base-64 section */ - if (IS_BASE64(ch)) { /* consume a base-64 character */ - base64buffer = (base64buffer << 6) | FROM_BASE64(ch); - base64bits += 6; - s++; - if (base64bits >= 16) { - /* we have enough bits for a UTF-16 value */ - Py_UCS4 outCh = (Py_UCS4)(base64buffer >> (base64bits-16)); - base64bits -= 16; - base64buffer &= (1 << base64bits) - 1; /* clear high bits */ - if (surrogate) { - /* expecting a second surrogate */ - if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) { - Py_UCS4 ch2 = Py_UNICODE_JOIN_SURROGATES(surrogate, outCh); - if (unicode_putchar(&unicode, &outpos, ch2) < 0) - goto onError; - surrogate = 0; - continue; - } - else { - if (unicode_putchar(&unicode, &outpos, surrogate) < 0) - goto onError; - surrogate = 0; - } - } - if (Py_UNICODE_IS_HIGH_SURROGATE(outCh)) { - /* first surrogate */ - surrogate = outCh; - } - else { - if (unicode_putchar(&unicode, &outpos, outCh) < 0) - goto onError; - } - } - } - else { /* now leaving a base-64 section */ - inShift = 0; - s++; - if (surrogate) { - if (unicode_putchar(&unicode, &outpos, surrogate) < 0) - goto onError; - surrogate = 0; - } - if (base64bits > 0) { /* left-over bits */ - if (base64bits >= 6) { - /* We've seen at least one base-64 character */ - errmsg = "partial character in shift sequence"; - goto utf7Error; - } - else { - /* Some bits remain; they should be zero */ - if (base64buffer != 0) { - errmsg = "non-zero padding bits in shift sequence"; - goto utf7Error; - } - } - } - if (ch != '-') { - /* '-' is absorbed; other terminating - characters are preserved */ - if (unicode_putchar(&unicode, &outpos, ch) < 0) - goto onError; - } - } - } - else if ( ch == '+' ) { - startinpos = s-starts; - s++; /* consume '+' */ - if (s < e && *s == '-') { /* '+-' encodes '+' */ - s++; - if (unicode_putchar(&unicode, &outpos, '+') < 0) - goto onError; - } - else { /* begin base64-encoded section */ - inShift = 1; - shiftOutStart = outpos; - base64bits = 0; - } - } - else if (DECODE_DIRECT(ch)) { /* character decodes as itself */ - if (unicode_putchar(&unicode, &outpos, ch) < 0) - goto onError; - s++; - } - else { - startinpos = s-starts; - s++; - errmsg = "unexpected special character"; - goto utf7Error; - } - continue; -utf7Error: - endinpos = s-starts; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "utf7", errmsg, - &starts, &e, &startinpos, &endinpos, &exc, &s, - &unicode, &outpos)) - goto onError; - } - - /* end of string */ - - if (inShift && !consumed) { /* in shift sequence, no more to follow */ - /* if we're in an inconsistent state, that's an error */ - if (surrogate || - (base64bits >= 6) || - (base64bits > 0 && base64buffer != 0)) { - endinpos = size; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "utf7", "unterminated shift sequence", - &starts, &e, &startinpos, &endinpos, &exc, &s, - &unicode, &outpos)) - goto onError; - if (s < e) - goto restart; - } - } - - /* return state */ - if (consumed) { - if (inShift) { - outpos = shiftOutStart; /* back off output */ - *consumed = startinpos; - } - else { - *consumed = s-starts; - } - } - - if (unicode_resize(&unicode, outpos) < 0) - goto onError; - - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return unicode_result(unicode); - - onError: - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - Py_DECREF(unicode); - return NULL; -} - - -PyObject * -_PyUnicode_EncodeUTF7(PyObject *str, - int base64SetO, - int base64WhiteSpace, - const char *errors) -{ - int kind; - void *data; - Py_ssize_t len; - PyObject *v; - Py_ssize_t allocated; - int inShift = 0; - Py_ssize_t i; - unsigned int base64bits = 0; - unsigned long base64buffer = 0; - char * out; - char * start; - - if (PyUnicode_READY(str) < 0) - return NULL; - kind = PyUnicode_KIND(str); - data = PyUnicode_DATA(str); - len = PyUnicode_GET_LENGTH(str); - - if (len == 0) - return PyBytes_FromStringAndSize(NULL, 0); - - /* It might be possible to tighten this worst case */ - allocated = 8 * len; - if (allocated / 8 != len) - return PyErr_NoMemory(); - - v = PyBytes_FromStringAndSize(NULL, allocated); - if (v == NULL) - return NULL; - - start = out = PyBytes_AS_STRING(v); - for (i = 0; i < len; ++i) { - Py_UCS4 ch = PyUnicode_READ(kind, data, i); - - if (inShift) { - if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) { - /* shifting out */ - if (base64bits) { /* output remaining bits */ - *out++ = TO_BASE64(base64buffer << (6-base64bits)); - base64buffer = 0; - base64bits = 0; - } - inShift = 0; - /* Characters not in the BASE64 set implicitly unshift the sequence - so no '-' is required, except if the character is itself a '-' */ - if (IS_BASE64(ch) || ch == '-') { - *out++ = '-'; - } - *out++ = (char) ch; - } - else { - goto encode_char; - } - } - else { /* not in a shift sequence */ - if (ch == '+') { - *out++ = '+'; - *out++ = '-'; - } - else if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) { - *out++ = (char) ch; - } - else { - *out++ = '+'; - inShift = 1; - goto encode_char; - } - } - continue; -encode_char: - if (ch >= 0x10000) { - assert(ch <= MAX_UNICODE); - - /* code first surrogate */ - base64bits += 16; - base64buffer = (base64buffer << 16) | 0xd800 | ((ch-0x10000) >> 10); - while (base64bits >= 6) { - *out++ = TO_BASE64(base64buffer >> (base64bits-6)); - base64bits -= 6; - } - /* prepare second surrogate */ - ch = Py_UNICODE_LOW_SURROGATE(ch); - } - base64bits += 16; - base64buffer = (base64buffer << 16) | ch; - while (base64bits >= 6) { - *out++ = TO_BASE64(base64buffer >> (base64bits-6)); - base64bits -= 6; - } - } - if (base64bits) - *out++= TO_BASE64(base64buffer << (6-base64bits) ); - if (inShift) - *out++ = '-'; - if (_PyBytes_Resize(&v, out - start) < 0) - return NULL; - return v; -} -PyObject * -PyUnicode_EncodeUTF7(const Py_UNICODE *s, - Py_ssize_t size, - int base64SetO, - int base64WhiteSpace, - const char *errors) -{ - PyObject *result; - PyObject *tmp = PyUnicode_FromUnicode(s, size); - if (tmp == NULL) - return NULL; - result = _PyUnicode_EncodeUTF7(tmp, base64SetO, - base64WhiteSpace, errors); - Py_DECREF(tmp); - return result; -} - -#undef IS_BASE64 -#undef FROM_BASE64 -#undef TO_BASE64 -#undef DECODE_DIRECT -#undef ENCODE_DIRECT - -/* --- UTF-8 Codec -------------------------------------------------------- */ - -static -char utf8_code_length[256] = { - /* Map UTF-8 encoded prefix byte to sequence length. Zero means - illegal prefix. See RFC 3629 for details */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00-0F */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 70-7F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80-8F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0-BF */ - 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* C0-C1 + C2-CF */ - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* D0-DF */ - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* E0-EF */ - 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F0-F4 + F5-FF */ -}; - -PyObject * -PyUnicode_DecodeUTF8(const char *s, - Py_ssize_t size, - const char *errors) -{ - return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL); -} - -#include "stringlib/ucs1lib.h" -#include "stringlib/codecs.h" -#include "stringlib/undef.h" - -#include "stringlib/ucs2lib.h" -#include "stringlib/codecs.h" -#include "stringlib/undef.h" - -#include "stringlib/ucs4lib.h" -#include "stringlib/codecs.h" -#include "stringlib/undef.h" - -/* Mask to check or force alignment of a pointer to C 'long' boundaries */ -#define LONG_PTR_MASK (size_t) (SIZEOF_LONG - 1) - -/* Mask to quickly check whether a C 'long' contains a - non-ASCII, UTF8-encoded char. */ -#if (SIZEOF_LONG == 8) -# define ASCII_CHAR_MASK 0x8080808080808080L -#elif (SIZEOF_LONG == 4) -# define ASCII_CHAR_MASK 0x80808080L -#else -# error C 'long' size should be either 4 or 8! -#endif - -/* Scans a UTF-8 string and returns the maximum character to be expected - and the size of the decoded unicode string. - - This function doesn't check for errors, these checks are performed in - PyUnicode_DecodeUTF8Stateful. - */ -static Py_UCS4 -utf8_max_char_size_and_char_count(const char *s, Py_ssize_t string_size, - Py_ssize_t *unicode_size) -{ - Py_ssize_t char_count = 0; - const unsigned char *p = (const unsigned char *)s; - const unsigned char *end = p + string_size; - const unsigned char *aligned_end = (const unsigned char *) ((size_t) end & ~LONG_PTR_MASK); - - assert(unicode_size != NULL); - - /* By having a cascade of independent loops which fallback onto each - other, we minimize the amount of work done in the average loop - iteration, and we also maximize the CPU's ability to predict - branches correctly (because a given condition will have always the - same boolean outcome except perhaps in the last iteration of the - corresponding loop). - In the general case this brings us rather close to decoding - performance pre-PEP 393, despite the two-pass decoding. - - Note that the pure ASCII loop is not duplicated once a non-ASCII - character has been encountered. It is actually a pessimization (by - a significant factor) to use this loop on text with many non-ASCII - characters, and it is important to avoid bad performance on valid - utf-8 data (invalid utf-8 being a different can of worms). - */ - - /* ASCII */ - for (; p < end; ++p) { - /* Only check value if it's not a ASCII char... */ - if (*p < 0x80) { - /* Fast path, see below in PyUnicode_DecodeUTF8Stateful for - an explanation. */ - if (!((size_t) p & LONG_PTR_MASK)) { - /* Help register allocation */ - register const unsigned char *_p = p; - while (_p < aligned_end) { - unsigned long value = *(unsigned long *) _p; - if (value & ASCII_CHAR_MASK) - break; - _p += SIZEOF_LONG; - char_count += SIZEOF_LONG; - } - p = _p; - if (p == end) - break; - } - } - if (*p < 0x80) - ++char_count; - else - goto _ucs1loop; - } - *unicode_size = char_count; - return 127; - -_ucs1loop: - for (; p < end; ++p) { - if (*p < 0xc4) - char_count += ((*p & 0xc0) != 0x80); - else - goto _ucs2loop; - } - *unicode_size = char_count; - return 255; - -_ucs2loop: - for (; p < end; ++p) { - if (*p < 0xf0) - char_count += ((*p & 0xc0) != 0x80); - else - goto _ucs4loop; - } - *unicode_size = char_count; - return 65535; - -_ucs4loop: - for (; p < end; ++p) { - char_count += ((*p & 0xc0) != 0x80); - } - *unicode_size = char_count; - return 65537; -} - -/* Called when we encountered some error that wasn't detected in the original - scan, e.g. an encoded surrogate character. The original maxchar computation - may have been incorrect, so redo it. */ -static int -refit_partial_string(PyObject **unicode, int kind, void *data, Py_ssize_t n) -{ - PyObject *tmp; - Py_ssize_t k; - Py_UCS4 maxchar; - for (k = 0, maxchar = 0; k < n; k++) - maxchar = Py_MAX(maxchar, PyUnicode_READ(kind, data, k)); - tmp = PyUnicode_New(PyUnicode_GET_LENGTH(*unicode), maxchar); - if (tmp == NULL) - return -1; - PyUnicode_CopyCharacters(tmp, 0, *unicode, 0, n); - Py_DECREF(*unicode); - *unicode = tmp; - return 0; -} - -/* Similar to PyUnicode_WRITE but may attempt to widen and resize the string - in case of errors. Implicit parameters: unicode, kind, data, has_errors, - onError. Potential resizing overallocates, so the result needs to shrink - at the end. -*/ -#define WRITE_MAYBE_FAIL(index, value) \ - do { \ - if (has_errors) { \ - Py_ssize_t pos = index; \ - if (pos > PyUnicode_GET_LENGTH(unicode) && \ - unicode_resize(&unicode, pos + pos/8) < 0) \ - goto onError; \ - if (unicode_putchar(&unicode, &pos, value) < 0) \ - goto onError; \ - } \ - else \ - PyUnicode_WRITE(kind, data, index, value); \ - } while (0) - -PyObject * -PyUnicode_DecodeUTF8Stateful(const char *s, - Py_ssize_t size, - const char *errors, - Py_ssize_t *consumed) -{ - const char *starts = s; - int n; - int k; - Py_ssize_t startinpos; - Py_ssize_t endinpos; - const char *e, *aligned_end; - PyObject *unicode; - const char *errmsg = ""; - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - Py_UCS4 maxchar = 0; - Py_ssize_t unicode_size; - Py_ssize_t i; - int kind; - void *data; - int has_errors = 0; - - if (size == 0) { - if (consumed) - *consumed = 0; - return (PyObject *)PyUnicode_New(0, 0); - } - maxchar = utf8_max_char_size_and_char_count(s, size, &unicode_size); - /* When the string is ASCII only, just use memcpy and return. - unicode_size may be != size if there is an incomplete UTF-8 - sequence at the end of the ASCII block. */ - if (maxchar < 128 && size == unicode_size) { - if (consumed) - *consumed = size; - - if (size == 1) - return get_latin1_char((unsigned char)s[0]); - - unicode = PyUnicode_New(unicode_size, maxchar); - if (!unicode) - return NULL; - Py_MEMCPY(PyUnicode_1BYTE_DATA(unicode), s, unicode_size); - assert(_PyUnicode_CheckConsistency(unicode, 1)); - return unicode; - } - - /* In case of errors, maxchar and size computation might be incorrect; - code below refits and resizes as necessary. */ - unicode = PyUnicode_New(unicode_size, maxchar); - if (!unicode) - return NULL; - kind = PyUnicode_KIND(unicode); - data = PyUnicode_DATA(unicode); - - /* Unpack UTF-8 encoded data */ - i = 0; - e = s + size; - switch (kind) { - case PyUnicode_1BYTE_KIND: - has_errors = ucs1lib_utf8_try_decode(s, e, (Py_UCS1 *) data, &s, &i); - break; - case PyUnicode_2BYTE_KIND: - has_errors = ucs2lib_utf8_try_decode(s, e, (Py_UCS2 *) data, &s, &i); - break; - case PyUnicode_4BYTE_KIND: - has_errors = ucs4lib_utf8_try_decode(s, e, (Py_UCS4 *) data, &s, &i); - break; - } - if (!has_errors) { - /* Ensure the unicode size calculation was correct */ - assert(i == unicode_size); - assert(s == e); - if (consumed) - *consumed = s-starts; - return unicode; - } - /* Fall through to the generic decoding loop for the rest of - the string */ - if (refit_partial_string(&unicode, kind, data, i) < 0) - goto onError; - - aligned_end = (const char *) ((size_t) e & ~LONG_PTR_MASK); - - while (s < e) { - Py_UCS4 ch = (unsigned char)*s; - - if (ch < 0x80) { - /* Fast path for runs of ASCII characters. Given that common UTF-8 - input will consist of an overwhelming majority of ASCII - characters, we try to optimize for this case by checking - as many characters as a C 'long' can contain. - First, check if we can do an aligned read, as most CPUs have - a penalty for unaligned reads. - */ - if (!((size_t) s & LONG_PTR_MASK)) { - /* Help register allocation */ - register const char *_s = s; - register Py_ssize_t _i = i; - while (_s < aligned_end) { - /* Read a whole long at a time (either 4 or 8 bytes), - and do a fast unrolled copy if it only contains ASCII - characters. */ - unsigned long value = *(unsigned long *) _s; - if (value & ASCII_CHAR_MASK) - break; - WRITE_MAYBE_FAIL(_i+0, _s[0]); - WRITE_MAYBE_FAIL(_i+1, _s[1]); - WRITE_MAYBE_FAIL(_i+2, _s[2]); - WRITE_MAYBE_FAIL(_i+3, _s[3]); -#if (SIZEOF_LONG == 8) - WRITE_MAYBE_FAIL(_i+4, _s[4]); - WRITE_MAYBE_FAIL(_i+5, _s[5]); - WRITE_MAYBE_FAIL(_i+6, _s[6]); - WRITE_MAYBE_FAIL(_i+7, _s[7]); -#endif - _s += SIZEOF_LONG; - _i += SIZEOF_LONG; - } - s = _s; - i = _i; - if (s == e) - break; - ch = (unsigned char)*s; - } - } - - if (ch < 0x80) { - WRITE_MAYBE_FAIL(i++, ch); - s++; - continue; - } - - n = utf8_code_length[ch]; - - if (s + n > e) { - if (consumed) - break; - else { - errmsg = "unexpected end of data"; - startinpos = s-starts; - endinpos = startinpos+1; - for (k=1; (k < size-startinpos) && ((s[k]&0xC0) == 0x80); k++) - endinpos++; - goto utf8Error; - } - } - - switch (n) { - - case 0: - errmsg = "invalid start byte"; - startinpos = s-starts; - endinpos = startinpos+1; - goto utf8Error; - - case 1: - errmsg = "internal error"; - startinpos = s-starts; - endinpos = startinpos+1; - goto utf8Error; - - case 2: - if ((s[1] & 0xc0) != 0x80) { - errmsg = "invalid continuation byte"; - startinpos = s-starts; - endinpos = startinpos + 1; - goto utf8Error; - } - ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f); - assert ((ch > 0x007F) && (ch <= 0x07FF)); - WRITE_MAYBE_FAIL(i++, ch); - break; - - case 3: - /* Decoding UTF-8 sequences in range \xed\xa0\x80-\xed\xbf\xbf - will result in surrogates in range d800-dfff. Surrogates are - not valid UTF-8 so they are rejected. - See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf - (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */ - if ((s[1] & 0xc0) != 0x80 || - (s[2] & 0xc0) != 0x80 || - ((unsigned char)s[0] == 0xE0 && - (unsigned char)s[1] < 0xA0) || - ((unsigned char)s[0] == 0xED && - (unsigned char)s[1] > 0x9F)) { - errmsg = "invalid continuation byte"; - startinpos = s-starts; - endinpos = startinpos + 1; - - /* if s[1] first two bits are 1 and 0, then the invalid - continuation byte is s[2], so increment endinpos by 1, - if not, s[1] is invalid and endinpos doesn't need to - be incremented. */ - if ((s[1] & 0xC0) == 0x80) - endinpos++; - goto utf8Error; - } - ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f); - assert ((ch > 0x07FF) && (ch <= 0xFFFF)); - WRITE_MAYBE_FAIL(i++, ch); - break; - - case 4: - if ((s[1] & 0xc0) != 0x80 || - (s[2] & 0xc0) != 0x80 || - (s[3] & 0xc0) != 0x80 || - ((unsigned char)s[0] == 0xF0 && - (unsigned char)s[1] < 0x90) || - ((unsigned char)s[0] == 0xF4 && - (unsigned char)s[1] > 0x8F)) { - errmsg = "invalid continuation byte"; - startinpos = s-starts; - endinpos = startinpos + 1; - if ((s[1] & 0xC0) == 0x80) { - endinpos++; - if ((s[2] & 0xC0) == 0x80) - endinpos++; - } - goto utf8Error; - } - ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) + - ((s[2] & 0x3f) << 6) + (s[3] & 0x3f); - assert ((ch > 0xFFFF) && (ch <= MAX_UNICODE)); - - WRITE_MAYBE_FAIL(i++, ch); - break; - } - s += n; - continue; - - utf8Error: - if (!has_errors) { - if (refit_partial_string(&unicode, kind, data, i) < 0) - goto onError; - has_errors = 1; - } - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "utf8", errmsg, - &starts, &e, &startinpos, &endinpos, &exc, &s, - &unicode, &i)) - goto onError; - /* Update data because unicode_decode_call_errorhandler might have - re-created or resized the unicode object. */ - data = PyUnicode_DATA(unicode); - kind = PyUnicode_KIND(unicode); - aligned_end = (const char *) ((size_t) e & ~LONG_PTR_MASK); - } - /* Ensure the unicode_size calculation above was correct: */ - assert(has_errors || i == unicode_size); - - if (consumed) - *consumed = s-starts; - - /* Adjust length and ready string when it contained errors and - is of the old resizable kind. */ - if (has_errors) { - if (PyUnicode_Resize(&unicode, i) < 0) - goto onError; - } - - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - assert(_PyUnicode_CheckConsistency(unicode, 1)); - return unicode; - - onError: - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - Py_DECREF(unicode); - return NULL; -} - -#undef WRITE_MAYBE_FAIL - -#ifdef __APPLE__ - -/* Simplified UTF-8 decoder using surrogateescape error handler, - used to decode the command line arguments on Mac OS X. */ - -wchar_t* -_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size) -{ - int n; - const char *e; - wchar_t *unicode, *p; - - /* Note: size will always be longer than the resulting Unicode - character count */ - if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1)) { - PyErr_NoMemory(); - return NULL; - } - unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t)); - if (!unicode) - return NULL; - - /* Unpack UTF-8 encoded data */ - p = unicode; - e = s + size; - while (s < e) { - Py_UCS4 ch = (unsigned char)*s; - - if (ch < 0x80) { - *p++ = (wchar_t)ch; - s++; - continue; - } - - n = utf8_code_length[ch]; - if (s + n > e) { - goto surrogateescape; - } - - switch (n) { - case 0: - case 1: - goto surrogateescape; - - case 2: - if ((s[1] & 0xc0) != 0x80) - goto surrogateescape; - ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f); - assert ((ch > 0x007F) && (ch <= 0x07FF)); - *p++ = (wchar_t)ch; - break; - - case 3: - /* Decoding UTF-8 sequences in range \xed\xa0\x80-\xed\xbf\xbf - will result in surrogates in range d800-dfff. Surrogates are - not valid UTF-8 so they are rejected. - See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf - (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */ - if ((s[1] & 0xc0) != 0x80 || - (s[2] & 0xc0) != 0x80 || - ((unsigned char)s[0] == 0xE0 && - (unsigned char)s[1] < 0xA0) || - ((unsigned char)s[0] == 0xED && - (unsigned char)s[1] > 0x9F)) { - - goto surrogateescape; - } - ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f); - assert ((ch > 0x07FF) && (ch <= 0xFFFF)); - *p++ = (wchar_t)ch; - break; - - case 4: - if ((s[1] & 0xc0) != 0x80 || - (s[2] & 0xc0) != 0x80 || - (s[3] & 0xc0) != 0x80 || - ((unsigned char)s[0] == 0xF0 && - (unsigned char)s[1] < 0x90) || - ((unsigned char)s[0] == 0xF4 && - (unsigned char)s[1] > 0x8F)) { - goto surrogateescape; - } - ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) + - ((s[2] & 0x3f) << 6) + (s[3] & 0x3f); - assert ((ch > 0xFFFF) && (ch <= MAX_UNICODE)); - -#if SIZEOF_WCHAR_T == 4 - *p++ = (wchar_t)ch; -#else - /* compute and append the two surrogates: */ - *p++ = (wchar_t)Py_UNICODE_HIGH_SURROGATE(ch); - *p++ = (wchar_t)Py_UNICODE_LOW_SURROGATE(ch); -#endif - break; - } - s += n; - continue; - - surrogateescape: - *p++ = 0xDC00 + ch; - s++; - } - *p = L'\0'; - return unicode; -} - -#endif /* __APPLE__ */ - -/* Primary internal function which creates utf8 encoded bytes objects. - - Allocation strategy: if the string is short, convert into a stack buffer - and allocate exactly as much space needed at the end. Else allocate the - maximum possible needed (4 result bytes per Unicode character), and return - the excess memory at the end. -*/ -PyObject * -_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors) -{ -#define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */ - - Py_ssize_t i; /* index into s of next input byte */ - PyObject *result; /* result string object */ - char *p; /* next free byte in output buffer */ - Py_ssize_t nallocated; /* number of result bytes allocated */ - Py_ssize_t nneeded; /* number of result bytes needed */ - char stackbuf[MAX_SHORT_UNICHARS * 4]; - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - int kind; - void *data; - Py_ssize_t size; - PyObject *rep = NULL; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - - if (PyUnicode_READY(unicode) == -1) - return NULL; - - if (PyUnicode_UTF8(unicode)) - return PyBytes_FromStringAndSize(PyUnicode_UTF8(unicode), - PyUnicode_UTF8_LENGTH(unicode)); - - kind = PyUnicode_KIND(unicode); - data = PyUnicode_DATA(unicode); - size = PyUnicode_GET_LENGTH(unicode); - - assert(size >= 0); - - if (size <= MAX_SHORT_UNICHARS) { - /* Write into the stack buffer; nallocated can't overflow. - * At the end, we'll allocate exactly as much heap space as it - * turns out we need. - */ - nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int); - result = NULL; /* will allocate after we're done */ - p = stackbuf; - } - else { - /* Overallocate on the heap, and give the excess back at the end. */ - nallocated = size * 4; - if (nallocated / 4 != size) /* overflow! */ - return PyErr_NoMemory(); - result = PyBytes_FromStringAndSize(NULL, nallocated); - if (result == NULL) - return NULL; - p = PyBytes_AS_STRING(result); - } - - for (i = 0; i < size;) { - Py_UCS4 ch = PyUnicode_READ(kind, data, i++); - - if (ch < 0x80) - /* Encode ASCII */ - *p++ = (char) ch; - - else if (ch < 0x0800) { - /* Encode Latin-1 */ - *p++ = (char)(0xc0 | (ch >> 6)); - *p++ = (char)(0x80 | (ch & 0x3f)); - } else if (Py_UNICODE_IS_SURROGATE(ch)) { - Py_ssize_t newpos; - Py_ssize_t repsize, k, startpos; - startpos = i-1; - rep = unicode_encode_call_errorhandler( - errors, &errorHandler, "utf-8", "surrogates not allowed", - unicode, &exc, startpos, startpos+1, &newpos); - if (!rep) - goto error; - - if (PyBytes_Check(rep)) - repsize = PyBytes_GET_SIZE(rep); - else - repsize = PyUnicode_GET_LENGTH(rep); - - if (repsize > 4) { - Py_ssize_t offset; - - if (result == NULL) - offset = p - stackbuf; - else - offset = p - PyBytes_AS_STRING(result); - - if (nallocated > PY_SSIZE_T_MAX - repsize + 4) { - /* integer overflow */ - PyErr_NoMemory(); - goto error; - } - nallocated += repsize - 4; - if (result != NULL) { - if (_PyBytes_Resize(&result, nallocated) < 0) - goto error; - } else { - result = PyBytes_FromStringAndSize(NULL, nallocated); - if (result == NULL) - goto error; - Py_MEMCPY(PyBytes_AS_STRING(result), stackbuf, offset); - } - p = PyBytes_AS_STRING(result) + offset; - } - - if (PyBytes_Check(rep)) { - char *prep = PyBytes_AS_STRING(rep); - for(k = repsize; k > 0; k--) - *p++ = *prep++; - } else /* rep is unicode */ { - enum PyUnicode_Kind repkind; - void *repdata; - - if (PyUnicode_READY(rep) < 0) - goto error; - repkind = PyUnicode_KIND(rep); - repdata = PyUnicode_DATA(rep); - - for(k=0; k<repsize; k++) { - Py_UCS4 c = PyUnicode_READ(repkind, repdata, k); - if (0x80 <= c) { - raise_encode_exception(&exc, "utf-8", - unicode, - i-1, i, - "surrogates not allowed"); - goto error; - } - *p++ = (char)c; - } - } - Py_CLEAR(rep); - } else if (ch < 0x10000) { - *p++ = (char)(0xe0 | (ch >> 12)); - *p++ = (char)(0x80 | ((ch >> 6) & 0x3f)); - *p++ = (char)(0x80 | (ch & 0x3f)); - } else /* ch >= 0x10000 */ { - assert(ch <= MAX_UNICODE); - /* Encode UCS4 Unicode ordinals */ - *p++ = (char)(0xf0 | (ch >> 18)); - *p++ = (char)(0x80 | ((ch >> 12) & 0x3f)); - *p++ = (char)(0x80 | ((ch >> 6) & 0x3f)); - *p++ = (char)(0x80 | (ch & 0x3f)); - } - } - - if (result == NULL) { - /* This was stack allocated. */ - nneeded = p - stackbuf; - assert(nneeded <= nallocated); - result = PyBytes_FromStringAndSize(stackbuf, nneeded); - } - else { - /* Cut back to size actually needed. */ - nneeded = p - PyBytes_AS_STRING(result); - assert(nneeded <= nallocated); - _PyBytes_Resize(&result, nneeded); - } - - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return result; - error: - Py_XDECREF(rep); - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - Py_XDECREF(result); - return NULL; - -#undef MAX_SHORT_UNICHARS -} - -PyObject * -PyUnicode_EncodeUTF8(const Py_UNICODE *s, - Py_ssize_t size, - const char *errors) -{ - PyObject *v, *unicode; - - unicode = PyUnicode_FromUnicode(s, size); - if (unicode == NULL) - return NULL; - v = _PyUnicode_AsUTF8String(unicode, errors); - Py_DECREF(unicode); - return v; -} - -PyObject * -PyUnicode_AsUTF8String(PyObject *unicode) -{ - return _PyUnicode_AsUTF8String(unicode, NULL); -} - -/* --- UTF-32 Codec ------------------------------------------------------- */ - -PyObject * -PyUnicode_DecodeUTF32(const char *s, - Py_ssize_t size, - const char *errors, - int *byteorder) -{ - return PyUnicode_DecodeUTF32Stateful(s, size, errors, byteorder, NULL); -} - -PyObject * -PyUnicode_DecodeUTF32Stateful(const char *s, - Py_ssize_t size, - const char *errors, - int *byteorder, - Py_ssize_t *consumed) -{ - const char *starts = s; - Py_ssize_t startinpos; - Py_ssize_t endinpos; - Py_ssize_t outpos; - PyObject *unicode; - const unsigned char *q, *e; - int bo = 0; /* assume native ordering by default */ - const char *errmsg = ""; - /* Offsets from q for retrieving bytes in the right order. */ -#ifdef BYTEORDER_IS_LITTLE_ENDIAN - int iorder[] = {0, 1, 2, 3}; -#else - int iorder[] = {3, 2, 1, 0}; -#endif - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - - q = (unsigned char *)s; - e = q + size; - - if (byteorder) - bo = *byteorder; - - /* Check for BOM marks (U+FEFF) in the input and adjust current - byte order setting accordingly. In native mode, the leading BOM - mark is skipped, in all other modes, it is copied to the output - stream as-is (giving a ZWNBSP character). */ - if (bo == 0) { - if (size >= 4) { - const Py_UCS4 bom = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) | - (q[iorder[1]] << 8) | q[iorder[0]]; -#ifdef BYTEORDER_IS_LITTLE_ENDIAN - if (bom == 0x0000FEFF) { - q += 4; - bo = -1; - } - else if (bom == 0xFFFE0000) { - q += 4; - bo = 1; - } -#else - if (bom == 0x0000FEFF) { - q += 4; - bo = 1; - } - else if (bom == 0xFFFE0000) { - q += 4; - bo = -1; - } -#endif - } - } - - if (bo == -1) { - /* force LE */ - iorder[0] = 0; - iorder[1] = 1; - iorder[2] = 2; - iorder[3] = 3; - } - else if (bo == 1) { - /* force BE */ - iorder[0] = 3; - iorder[1] = 2; - iorder[2] = 1; - iorder[3] = 0; - } - - /* This might be one to much, because of a BOM */ - unicode = PyUnicode_New((size+3)/4, 127); - if (!unicode) - return NULL; - if (size == 0) - return unicode; - outpos = 0; - - while (q < e) { - Py_UCS4 ch; - /* remaining bytes at the end? (size should be divisible by 4) */ - if (e-q<4) { - if (consumed) - break; - errmsg = "truncated data"; - startinpos = ((const char *)q)-starts; - endinpos = ((const char *)e)-starts; - goto utf32Error; - /* The remaining input chars are ignored if the callback - chooses to skip the input */ - } - ch = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) | - (q[iorder[1]] << 8) | q[iorder[0]]; - - if (ch >= 0x110000) - { - errmsg = "codepoint not in range(0x110000)"; - startinpos = ((const char *)q)-starts; - endinpos = startinpos+4; - goto utf32Error; - } - if (unicode_putchar(&unicode, &outpos, ch) < 0) - goto onError; - q += 4; - continue; - utf32Error: - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "utf32", errmsg, - &starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q, - &unicode, &outpos)) - goto onError; - } - - if (byteorder) - *byteorder = bo; - - if (consumed) - *consumed = (const char *)q-starts; - - /* Adjust length */ - if (PyUnicode_Resize(&unicode, outpos) < 0) - goto onError; - - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return unicode_result(unicode); - - onError: - Py_DECREF(unicode); - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return NULL; -} - -PyObject * -_PyUnicode_EncodeUTF32(PyObject *str, - const char *errors, - int byteorder) -{ - int kind; - void *data; - Py_ssize_t len; - PyObject *v; - unsigned char *p; - Py_ssize_t nsize, bytesize, i; - /* Offsets from p for storing byte pairs in the right order. */ -#ifdef BYTEORDER_IS_LITTLE_ENDIAN - int iorder[] = {0, 1, 2, 3}; -#else - int iorder[] = {3, 2, 1, 0}; -#endif - -#define STORECHAR(CH) \ - do { \ - p[iorder[3]] = ((CH) >> 24) & 0xff; \ - p[iorder[2]] = ((CH) >> 16) & 0xff; \ - p[iorder[1]] = ((CH) >> 8) & 0xff; \ - p[iorder[0]] = (CH) & 0xff; \ - p += 4; \ - } while(0) - - if (!PyUnicode_Check(str)) { - PyErr_BadArgument(); - return NULL; - } - if (PyUnicode_READY(str) < 0) - return NULL; - kind = PyUnicode_KIND(str); - data = PyUnicode_DATA(str); - len = PyUnicode_GET_LENGTH(str); - - nsize = len + (byteorder == 0); - bytesize = nsize * 4; - if (bytesize / 4 != nsize) - return PyErr_NoMemory(); - v = PyBytes_FromStringAndSize(NULL, bytesize); - if (v == NULL) - return NULL; - - p = (unsigned char *)PyBytes_AS_STRING(v); - if (byteorder == 0) - STORECHAR(0xFEFF); - if (len == 0) - goto done; - - if (byteorder == -1) { - /* force LE */ - iorder[0] = 0; - iorder[1] = 1; - iorder[2] = 2; - iorder[3] = 3; - } - else if (byteorder == 1) { - /* force BE */ - iorder[0] = 3; - iorder[1] = 2; - iorder[2] = 1; - iorder[3] = 0; - } - - for (i = 0; i < len; i++) - STORECHAR(PyUnicode_READ(kind, data, i)); - - done: - return v; -#undef STORECHAR -} - -PyObject * -PyUnicode_EncodeUTF32(const Py_UNICODE *s, - Py_ssize_t size, - const char *errors, - int byteorder) -{ - PyObject *result; - PyObject *tmp = PyUnicode_FromUnicode(s, size); - if (tmp == NULL) - return NULL; - result = _PyUnicode_EncodeUTF32(tmp, errors, byteorder); - Py_DECREF(tmp); - return result; -} - -PyObject * -PyUnicode_AsUTF32String(PyObject *unicode) -{ - return _PyUnicode_EncodeUTF32(unicode, NULL, 0); -} - -/* --- UTF-16 Codec ------------------------------------------------------- */ - -PyObject * -PyUnicode_DecodeUTF16(const char *s, - Py_ssize_t size, - const char *errors, - int *byteorder) -{ - return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL); -} - -/* Two masks for fast checking of whether a C 'long' may contain - UTF16-encoded surrogate characters. This is an efficient heuristic, - assuming that non-surrogate characters with a code point >= 0x8000 are - rare in most input. - FAST_CHAR_MASK is used when the input is in native byte ordering, - SWAPPED_FAST_CHAR_MASK when the input is in byteswapped ordering. -*/ -#if (SIZEOF_LONG == 8) -# define FAST_CHAR_MASK 0x8000800080008000L -# define SWAPPED_FAST_CHAR_MASK 0x0080008000800080L -#elif (SIZEOF_LONG == 4) -# define FAST_CHAR_MASK 0x80008000L -# define SWAPPED_FAST_CHAR_MASK 0x00800080L -#else -# error C 'long' size should be either 4 or 8! -#endif - -PyObject * -PyUnicode_DecodeUTF16Stateful(const char *s, - Py_ssize_t size, - const char *errors, - int *byteorder, - Py_ssize_t *consumed) -{ - const char *starts = s; - Py_ssize_t startinpos; - Py_ssize_t endinpos; - Py_ssize_t outpos; - PyObject *unicode; - const unsigned char *q, *e, *aligned_end; - int bo = 0; /* assume native ordering by default */ - int native_ordering = 0; - const char *errmsg = ""; - /* Offsets from q for retrieving byte pairs in the right order. */ -#ifdef BYTEORDER_IS_LITTLE_ENDIAN - int ihi = 1, ilo = 0; -#else - int ihi = 0, ilo = 1; -#endif - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - - /* Note: size will always be longer than the resulting Unicode - character count */ - unicode = PyUnicode_New(size, 127); - if (!unicode) - return NULL; - if (size == 0) - return unicode; - outpos = 0; - - q = (unsigned char *)s; - e = q + size - 1; - - if (byteorder) - bo = *byteorder; - - /* Check for BOM marks (U+FEFF) in the input and adjust current - byte order setting accordingly. In native mode, the leading BOM - mark is skipped, in all other modes, it is copied to the output - stream as-is (giving a ZWNBSP character). */ - if (bo == 0) { - if (size >= 2) { - const Py_UCS4 bom = (q[ihi] << 8) | q[ilo]; -#ifdef BYTEORDER_IS_LITTLE_ENDIAN - if (bom == 0xFEFF) { - q += 2; - bo = -1; - } - else if (bom == 0xFFFE) { - q += 2; - bo = 1; - } -#else - if (bom == 0xFEFF) { - q += 2; - bo = 1; - } - else if (bom == 0xFFFE) { - q += 2; - bo = -1; - } -#endif - } - } - - if (bo == -1) { - /* force LE */ - ihi = 1; - ilo = 0; - } - else if (bo == 1) { - /* force BE */ - ihi = 0; - ilo = 1; - } -#ifdef BYTEORDER_IS_LITTLE_ENDIAN - native_ordering = ilo < ihi; -#else - native_ordering = ilo > ihi; -#endif - - aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK); - while (q < e) { - Py_UCS4 ch; - /* First check for possible aligned read of a C 'long'. Unaligned - reads are more expensive, better to defer to another iteration. */ - if (!((size_t) q & LONG_PTR_MASK)) { - /* Fast path for runs of non-surrogate chars. */ - register const unsigned char *_q = q; - int kind = PyUnicode_KIND(unicode); - void *data = PyUnicode_DATA(unicode); - while (_q < aligned_end) { - unsigned long block = * (unsigned long *) _q; - unsigned short *pblock = (unsigned short*)█ - Py_UCS4 maxch; - if (native_ordering) { - /* Can use buffer directly */ - if (block & FAST_CHAR_MASK) - break; - } - else { - /* Need to byte-swap */ - unsigned char *_p = (unsigned char*)pblock; - if (block & SWAPPED_FAST_CHAR_MASK) - break; - _p[0] = _q[1]; - _p[1] = _q[0]; - _p[2] = _q[3]; - _p[3] = _q[2]; -#if (SIZEOF_LONG == 8) - _p[4] = _q[5]; - _p[5] = _q[4]; - _p[6] = _q[7]; - _p[7] = _q[6]; -#endif - } - maxch = Py_MAX(pblock[0], pblock[1]); -#if SIZEOF_LONG == 8 - maxch = Py_MAX(maxch, Py_MAX(pblock[2], pblock[3])); -#endif - if (maxch > PyUnicode_MAX_CHAR_VALUE(unicode)) { - if (unicode_widen(&unicode, maxch) < 0) - goto onError; - kind = PyUnicode_KIND(unicode); - data = PyUnicode_DATA(unicode); - } - PyUnicode_WRITE(kind, data, outpos++, pblock[0]); - PyUnicode_WRITE(kind, data, outpos++, pblock[1]); -#if SIZEOF_LONG == 8 - PyUnicode_WRITE(kind, data, outpos++, pblock[2]); - PyUnicode_WRITE(kind, data, outpos++, pblock[3]); -#endif - _q += SIZEOF_LONG; - } - q = _q; - if (q >= e) - break; - } - ch = (q[ihi] << 8) | q[ilo]; - - q += 2; - - if (!Py_UNICODE_IS_SURROGATE(ch)) { - if (unicode_putchar(&unicode, &outpos, ch) < 0) - goto onError; - continue; - } - - /* UTF-16 code pair: */ - if (q > e) { - errmsg = "unexpected end of data"; - startinpos = (((const char *)q) - 2) - starts; - endinpos = ((const char *)e) + 1 - starts; - goto utf16Error; - } - if (Py_UNICODE_IS_HIGH_SURROGATE(ch)) { - Py_UCS4 ch2 = (q[ihi] << 8) | q[ilo]; - q += 2; - if (Py_UNICODE_IS_LOW_SURROGATE(ch2)) { - if (unicode_putchar(&unicode, &outpos, - Py_UNICODE_JOIN_SURROGATES(ch, ch2)) < 0) - goto onError; - continue; - } - else { - errmsg = "illegal UTF-16 surrogate"; - startinpos = (((const char *)q)-4)-starts; - endinpos = startinpos+2; - goto utf16Error; - } - - } - errmsg = "illegal encoding"; - startinpos = (((const char *)q)-2)-starts; - endinpos = startinpos+2; - /* Fall through to report the error */ - - utf16Error: - if (unicode_decode_call_errorhandler( - errors, - &errorHandler, - "utf16", errmsg, - &starts, - (const char **)&e, - &startinpos, - &endinpos, - &exc, - (const char **)&q, - &unicode, - &outpos)) - goto onError; - } - /* remaining byte at the end? (size should be even) */ - if (e == q) { - if (!consumed) { - errmsg = "truncated data"; - startinpos = ((const char *)q) - starts; - endinpos = ((const char *)e) + 1 - starts; - if (unicode_decode_call_errorhandler( - errors, - &errorHandler, - "utf16", errmsg, - &starts, - (const char **)&e, - &startinpos, - &endinpos, - &exc, - (const char **)&q, - &unicode, - &outpos)) - goto onError; - /* The remaining input chars are ignored if the callback - chooses to skip the input */ - } - } - - if (byteorder) - *byteorder = bo; - - if (consumed) - *consumed = (const char *)q-starts; - - /* Adjust length */ - if (PyUnicode_Resize(&unicode, outpos) < 0) - goto onError; - - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return unicode_result(unicode); - - onError: - Py_DECREF(unicode); - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return NULL; -} - -#undef FAST_CHAR_MASK -#undef SWAPPED_FAST_CHAR_MASK - -PyObject * -_PyUnicode_EncodeUTF16(PyObject *str, - const char *errors, - int byteorder) -{ - int kind; - void *data; - Py_ssize_t len; - PyObject *v; - unsigned char *p; - Py_ssize_t nsize, bytesize; - Py_ssize_t i, pairs; - /* Offsets from p for storing byte pairs in the right order. */ -#ifdef BYTEORDER_IS_LITTLE_ENDIAN - int ihi = 1, ilo = 0; -#else - int ihi = 0, ilo = 1; -#endif - -#define STORECHAR(CH) \ - do { \ - p[ihi] = ((CH) >> 8) & 0xff; \ - p[ilo] = (CH) & 0xff; \ - p += 2; \ - } while(0) - - if (!PyUnicode_Check(str)) { - PyErr_BadArgument(); - return NULL; - } - if (PyUnicode_READY(str) < 0) - return NULL; - kind = PyUnicode_KIND(str); - data = PyUnicode_DATA(str); - len = PyUnicode_GET_LENGTH(str); - - pairs = 0; - if (kind == PyUnicode_4BYTE_KIND) - for (i = 0; i < len; i++) - if (PyUnicode_READ(kind, data, i) >= 0x10000) - pairs++; - /* 2 * (len + pairs + (byteorder == 0)) */ - if (len > PY_SSIZE_T_MAX - pairs - (byteorder == 0)) - return PyErr_NoMemory(); - nsize = len + pairs + (byteorder == 0); - bytesize = nsize * 2; - if (bytesize / 2 != nsize) - return PyErr_NoMemory(); - v = PyBytes_FromStringAndSize(NULL, bytesize); - if (v == NULL) - return NULL; - - p = (unsigned char *)PyBytes_AS_STRING(v); - if (byteorder == 0) - STORECHAR(0xFEFF); - if (len == 0) - goto done; - - if (byteorder == -1) { - /* force LE */ - ihi = 1; - ilo = 0; - } - else if (byteorder == 1) { - /* force BE */ - ihi = 0; - ilo = 1; - } - - for (i = 0; i < len; i++) { - Py_UCS4 ch = PyUnicode_READ(kind, data, i); - Py_UCS4 ch2 = 0; - if (ch >= 0x10000) { - ch2 = Py_UNICODE_LOW_SURROGATE(ch); - ch = Py_UNICODE_HIGH_SURROGATE(ch); - } - STORECHAR(ch); - if (ch2) - STORECHAR(ch2); - } - - done: - return v; -#undef STORECHAR -} - -PyObject * -PyUnicode_EncodeUTF16(const Py_UNICODE *s, - Py_ssize_t size, - const char *errors, - int byteorder) -{ - PyObject *result; - PyObject *tmp = PyUnicode_FromUnicode(s, size); - if (tmp == NULL) - return NULL; - result = _PyUnicode_EncodeUTF16(tmp, errors, byteorder); - Py_DECREF(tmp); - return result; -} - -PyObject * -PyUnicode_AsUTF16String(PyObject *unicode) -{ - return _PyUnicode_EncodeUTF16(unicode, NULL, 0); -} - -/* --- Unicode Escape Codec ----------------------------------------------- */ - -/* Helper function for PyUnicode_DecodeUnicodeEscape, determines - if all the escapes in the string make it still a valid ASCII string. - Returns -1 if any escapes were found which cause the string to - pop out of ASCII range. Otherwise returns the length of the - required buffer to hold the string. - */ -static Py_ssize_t -length_of_escaped_ascii_string(const char *s, Py_ssize_t size) -{ - const unsigned char *p = (const unsigned char *)s; - const unsigned char *end = p + size; - Py_ssize_t length = 0; - - if (size < 0) - return -1; - - for (; p < end; ++p) { - if (*p > 127) { - /* Non-ASCII */ - return -1; - } - else if (*p != '\\') { - /* Normal character */ - ++length; - } - else { - /* Backslash-escape, check next char */ - ++p; - /* Escape sequence reaches till end of string or - non-ASCII follow-up. */ - if (p >= end || *p > 127) - return -1; - switch (*p) { - case '\n': - /* backslash + \n result in zero characters */ - break; - case '\\': case '\'': case '\"': - case 'b': case 'f': case 't': - case 'n': case 'r': case 'v': case 'a': - ++length; - break; - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - case 'x': case 'u': case 'U': case 'N': - /* these do not guarantee ASCII characters */ - return -1; - default: - /* count the backslash + the other character */ - length += 2; - } - } - } - return length; -} - -static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; - -PyObject * -PyUnicode_DecodeUnicodeEscape(const char *s, - Py_ssize_t size, - const char *errors) -{ - const char *starts = s; - Py_ssize_t startinpos; - Py_ssize_t endinpos; - int j; - PyObject *v; - const char *end; - char* message; - Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */ - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - Py_ssize_t len; - Py_ssize_t i; - - len = length_of_escaped_ascii_string(s, size); - - /* After length_of_escaped_ascii_string() there are two alternatives, - either the string is pure ASCII with named escapes like \n, etc. - and we determined it's exact size (common case) - or it contains \x, \u, ... escape sequences. then we create a - legacy wchar string and resize it at the end of this function. */ - if (len >= 0) { - v = PyUnicode_New(len, 127); - if (!v) - goto onError; - assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); - } - else { - /* Escaped strings will always be longer than the resulting - Unicode string, so we start with size here and then reduce the - length after conversion to the true value. - (but if the error callback returns a long replacement string - we'll have to allocate more space) */ - v = PyUnicode_New(size, 127); - if (!v) - goto onError; - len = size; - } - - if (size == 0) - return v; - i = 0; - end = s + size; - - while (s < end) { - unsigned char c; - Py_UCS4 x; - int digits; - - /* The only case in which i == ascii_length is a backslash - followed by a newline. */ - assert(i <= len); - - /* Non-escape characters are interpreted as Unicode ordinals */ - if (*s != '\\') { - if (unicode_putchar(&v, &i, (unsigned char) *s++) < 0) - goto onError; - continue; - } - - startinpos = s-starts; - /* \ - Escapes */ - s++; - c = *s++; - if (s > end) - c = '\0'; /* Invalid after \ */ - - /* The only case in which i == ascii_length is a backslash - followed by a newline. */ - assert(i < len || (i == len && c == '\n')); - - switch (c) { - - /* \x escapes */ -#define WRITECHAR(ch) \ - do { \ - if (unicode_putchar(&v, &i, ch) < 0) \ - goto onError; \ - }while(0) - - case '\n': break; - case '\\': WRITECHAR('\\'); break; - case '\'': WRITECHAR('\''); break; - case '\"': WRITECHAR('\"'); break; - case 'b': WRITECHAR('\b'); break; - /* FF */ - case 'f': WRITECHAR('\014'); break; - case 't': WRITECHAR('\t'); break; - case 'n': WRITECHAR('\n'); break; - case 'r': WRITECHAR('\r'); break; - /* VT */ - case 'v': WRITECHAR('\013'); break; - /* BEL, not classic C */ - case 'a': WRITECHAR('\007'); break; - - /* \OOO (octal) escapes */ - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - x = s[-1] - '0'; - if (s < end && '0' <= *s && *s <= '7') { - x = (x<<3) + *s++ - '0'; - if (s < end && '0' <= *s && *s <= '7') - x = (x<<3) + *s++ - '0'; - } - WRITECHAR(x); - break; - - /* hex escapes */ - /* \xXX */ - case 'x': - digits = 2; - message = "truncated \\xXX escape"; - goto hexescape; - - /* \uXXXX */ - case 'u': - digits = 4; - message = "truncated \\uXXXX escape"; - goto hexescape; - - /* \UXXXXXXXX */ - case 'U': - digits = 8; - message = "truncated \\UXXXXXXXX escape"; - hexescape: - chr = 0; - if (s+digits>end) { - endinpos = size; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", "end of string in escape sequence", - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) - goto onError; - goto nextByte; - } - for (j = 0; j < digits; ++j) { - c = (unsigned char) s[j]; - if (!Py_ISXDIGIT(c)) { - endinpos = (s+j+1)-starts; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", message, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) - goto onError; - len = PyUnicode_GET_LENGTH(v); - goto nextByte; - } - chr = (chr<<4) & ~0xF; - if (c >= '0' && c <= '9') - chr += c - '0'; - else if (c >= 'a' && c <= 'f') - chr += 10 + c - 'a'; - else - chr += 10 + c - 'A'; - } - s += j; - if (chr == 0xffffffff && PyErr_Occurred()) - /* _decoding_error will have already written into the - target buffer. */ - break; - store: - /* when we get here, chr is a 32-bit unicode character */ - if (chr <= MAX_UNICODE) { - WRITECHAR(chr); - } else { - endinpos = s-starts; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", "illegal Unicode character", - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) - goto onError; - } - break; - - /* \N{name} */ - case 'N': - message = "malformed \\N character escape"; - if (ucnhash_CAPI == NULL) { - /* load the unicode data module */ - ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import( - PyUnicodeData_CAPSULE_NAME, 1); - if (ucnhash_CAPI == NULL) - goto ucnhashError; - } - if (*s == '{') { - const char *start = s+1; - /* look for the closing brace */ - while (*s != '}' && s < end) - s++; - if (s > start && s < end && *s == '}') { - /* found a name. look it up in the unicode database */ - message = "unknown Unicode character name"; - s++; - if (ucnhash_CAPI->getcode(NULL, start, (int)(s-start-1), - &chr, 0)) - goto store; - } - } - endinpos = s-starts; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", message, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) - goto onError; - break; - - default: - if (s > end) { - message = "\\ at end of string"; - s--; - endinpos = s-starts; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicodeescape", message, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &i)) - goto onError; - } - else { - WRITECHAR('\\'); - WRITECHAR(s[-1]); - } - break; - } - nextByte: - ; - } -#undef WRITECHAR - - if (PyUnicode_Resize(&v, i) < 0) - goto onError; - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return unicode_result(v); - - ucnhashError: - PyErr_SetString( - PyExc_UnicodeError, - "\\N escapes not supported (can't load unicodedata module)" - ); - Py_XDECREF(v); - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return NULL; - - onError: - Py_XDECREF(v); - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return NULL; -} - -/* Return a Unicode-Escape string version of the Unicode object. - - If quotes is true, the string is enclosed in u"" or u'' quotes as - appropriate. - -*/ - -PyObject * -PyUnicode_AsUnicodeEscapeString(PyObject *unicode) -{ - Py_ssize_t i, len; - PyObject *repr; - char *p; - int kind; - void *data; - Py_ssize_t expandsize = 0; - - /* Initial allocation is based on the longest-possible unichr - escape. - - In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source - unichr, so in this case it's the longest unichr escape. In - narrow (UTF-16) builds this is five chars per source unichr - since there are two unichrs in the surrogate pair, so in narrow - (UTF-16) builds it's not the longest unichr escape. - - In wide or narrow builds '\uxxxx' is 6 chars per source unichr, - so in the narrow (UTF-16) build case it's the longest unichr - escape. - */ - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - if (PyUnicode_READY(unicode) < 0) - return NULL; - len = PyUnicode_GET_LENGTH(unicode); - kind = PyUnicode_KIND(unicode); - data = PyUnicode_DATA(unicode); - switch(kind) { - case PyUnicode_1BYTE_KIND: expandsize = 4; break; - case PyUnicode_2BYTE_KIND: expandsize = 6; break; - case PyUnicode_4BYTE_KIND: expandsize = 10; break; - } - - if (len == 0) - return PyBytes_FromStringAndSize(NULL, 0); - - if (len > (PY_SSIZE_T_MAX - 2 - 1) / expandsize) - return PyErr_NoMemory(); - - repr = PyBytes_FromStringAndSize(NULL, - 2 - + expandsize*len - + 1); - if (repr == NULL) - return NULL; - - p = PyBytes_AS_STRING(repr); - - for (i = 0; i < len; i++) { - Py_UCS4 ch = PyUnicode_READ(kind, data, i); - - /* Escape backslashes */ - if (ch == '\\') { - *p++ = '\\'; - *p++ = (char) ch; - continue; - } - - /* Map 21-bit characters to '\U00xxxxxx' */ - else if (ch >= 0x10000) { - assert(ch <= MAX_UNICODE); - *p++ = '\\'; - *p++ = 'U'; - *p++ = Py_hexdigits[(ch >> 28) & 0x0000000F]; - *p++ = Py_hexdigits[(ch >> 24) & 0x0000000F]; - *p++ = Py_hexdigits[(ch >> 20) & 0x0000000F]; - *p++ = Py_hexdigits[(ch >> 16) & 0x0000000F]; - *p++ = Py_hexdigits[(ch >> 12) & 0x0000000F]; - *p++ = Py_hexdigits[(ch >> 8) & 0x0000000F]; - *p++ = Py_hexdigits[(ch >> 4) & 0x0000000F]; - *p++ = Py_hexdigits[ch & 0x0000000F]; - continue; - } - - /* Map 16-bit characters to '\uxxxx' */ - if (ch >= 256) { - *p++ = '\\'; - *p++ = 'u'; - *p++ = Py_hexdigits[(ch >> 12) & 0x000F]; - *p++ = Py_hexdigits[(ch >> 8) & 0x000F]; - *p++ = Py_hexdigits[(ch >> 4) & 0x000F]; - *p++ = Py_hexdigits[ch & 0x000F]; - } - - /* Map special whitespace to '\t', \n', '\r' */ - else if (ch == '\t') { - *p++ = '\\'; - *p++ = 't'; - } - else if (ch == '\n') { - *p++ = '\\'; - *p++ = 'n'; - } - else if (ch == '\r') { - *p++ = '\\'; - *p++ = 'r'; - } - - /* Map non-printable US ASCII to '\xhh' */ - else if (ch < ' ' || ch >= 0x7F) { - *p++ = '\\'; - *p++ = 'x'; - *p++ = Py_hexdigits[(ch >> 4) & 0x000F]; - *p++ = Py_hexdigits[ch & 0x000F]; - } - - /* Copy everything else as-is */ - else - *p++ = (char) ch; - } - - assert(p - PyBytes_AS_STRING(repr) > 0); - if (_PyBytes_Resize(&repr, p - PyBytes_AS_STRING(repr)) < 0) - return NULL; - return repr; -} - -PyObject * -PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, - Py_ssize_t size) -{ - PyObject *result; - PyObject *tmp = PyUnicode_FromUnicode(s, size); - if (tmp == NULL) - return NULL; - result = PyUnicode_AsUnicodeEscapeString(tmp); - Py_DECREF(tmp); - return result; -} - -/* --- Raw Unicode Escape Codec ------------------------------------------- */ - -PyObject * -PyUnicode_DecodeRawUnicodeEscape(const char *s, - Py_ssize_t size, - const char *errors) -{ - const char *starts = s; - Py_ssize_t startinpos; - Py_ssize_t endinpos; - Py_ssize_t outpos; - PyObject *v; - const char *end; - const char *bs; - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - - /* Escaped strings will always be longer than the resulting - Unicode string, so we start with size here and then reduce the - length after conversion to the true value. (But decoding error - handler might have to resize the string) */ - v = PyUnicode_New(size, 127); - if (v == NULL) - goto onError; - if (size == 0) - return v; - outpos = 0; - end = s + size; - while (s < end) { - unsigned char c; - Py_UCS4 x; - int i; - int count; - - /* Non-escape characters are interpreted as Unicode ordinals */ - if (*s != '\\') { - if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0) - goto onError; - continue; - } - startinpos = s-starts; - - /* \u-escapes are only interpreted iff the number of leading - backslashes if odd */ - bs = s; - for (;s < end;) { - if (*s != '\\') - break; - if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0) - goto onError; - } - if (((s - bs) & 1) == 0 || - s >= end || - (*s != 'u' && *s != 'U')) { - continue; - } - outpos--; - count = *s=='u' ? 4 : 8; - s++; - - /* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */ - for (x = 0, i = 0; i < count; ++i, ++s) { - c = (unsigned char)*s; - if (!Py_ISXDIGIT(c)) { - endinpos = s-starts; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "rawunicodeescape", "truncated \\uXXXX", - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) - goto onError; - goto nextByte; - } - x = (x<<4) & ~0xF; - if (c >= '0' && c <= '9') - x += c - '0'; - else if (c >= 'a' && c <= 'f') - x += 10 + c - 'a'; - else - x += 10 + c - 'A'; - } - if (x <= MAX_UNICODE) { - if (unicode_putchar(&v, &outpos, x) < 0) - goto onError; - } else { - endinpos = s-starts; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "rawunicodeescape", "\\Uxxxxxxxx out of range", - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) - goto onError; - } - nextByte: - ; - } - if (PyUnicode_Resize(&v, outpos) < 0) - goto onError; - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return unicode_result(v); - - onError: - Py_XDECREF(v); - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return NULL; -} - - -PyObject * -PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode) -{ - PyObject *repr; - char *p; - char *q; - Py_ssize_t expandsize, pos; - int kind; - void *data; - Py_ssize_t len; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - if (PyUnicode_READY(unicode) < 0) - return NULL; - kind = PyUnicode_KIND(unicode); - data = PyUnicode_DATA(unicode); - len = PyUnicode_GET_LENGTH(unicode); - /* 4 byte characters can take up 10 bytes, 2 byte characters can take up 6 - bytes, and 1 byte characters 4. */ - expandsize = kind * 2 + 2; - - if (len > PY_SSIZE_T_MAX / expandsize) - return PyErr_NoMemory(); - - repr = PyBytes_FromStringAndSize(NULL, expandsize * len); - if (repr == NULL) - return NULL; - if (len == 0) - return repr; - - p = q = PyBytes_AS_STRING(repr); - for (pos = 0; pos < len; pos++) { - Py_UCS4 ch = PyUnicode_READ(kind, data, pos); - /* Map 32-bit characters to '\Uxxxxxxxx' */ - if (ch >= 0x10000) { - assert(ch <= MAX_UNICODE); - *p++ = '\\'; - *p++ = 'U'; - *p++ = Py_hexdigits[(ch >> 28) & 0xf]; - *p++ = Py_hexdigits[(ch >> 24) & 0xf]; - *p++ = Py_hexdigits[(ch >> 20) & 0xf]; - *p++ = Py_hexdigits[(ch >> 16) & 0xf]; - *p++ = Py_hexdigits[(ch >> 12) & 0xf]; - *p++ = Py_hexdigits[(ch >> 8) & 0xf]; - *p++ = Py_hexdigits[(ch >> 4) & 0xf]; - *p++ = Py_hexdigits[ch & 15]; - } - /* Map 16-bit characters to '\uxxxx' */ - else if (ch >= 256) { - *p++ = '\\'; - *p++ = 'u'; - *p++ = Py_hexdigits[(ch >> 12) & 0xf]; - *p++ = Py_hexdigits[(ch >> 8) & 0xf]; - *p++ = Py_hexdigits[(ch >> 4) & 0xf]; - *p++ = Py_hexdigits[ch & 15]; - } - /* Copy everything else as-is */ - else - *p++ = (char) ch; - } - - assert(p > q); - if (_PyBytes_Resize(&repr, p - q) < 0) - return NULL; - return repr; -} - -PyObject * -PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, - Py_ssize_t size) -{ - PyObject *result; - PyObject *tmp = PyUnicode_FromUnicode(s, size); - if (tmp == NULL) - return NULL; - result = PyUnicode_AsRawUnicodeEscapeString(tmp); - Py_DECREF(tmp); - return result; -} - -/* --- Unicode Internal Codec ------------------------------------------- */ - -PyObject * -_PyUnicode_DecodeUnicodeInternal(const char *s, - Py_ssize_t size, - const char *errors) -{ - const char *starts = s; - Py_ssize_t startinpos; - Py_ssize_t endinpos; - Py_ssize_t outpos; - PyObject *v; - const char *end; - const char *reason; - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "unicode_internal codec has been deprecated", - 1)) - return NULL; - - /* XXX overflow detection missing */ - v = PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127); - if (v == NULL) - goto onError; - if (PyUnicode_GET_LENGTH(v) == 0) - return v; - outpos = 0; - end = s + size; - - while (s < end) { - Py_UNICODE uch; - Py_UCS4 ch; - /* We copy the raw representation one byte at a time because the - pointer may be unaligned (see test_codeccallbacks). */ - ((char *) &uch)[0] = s[0]; - ((char *) &uch)[1] = s[1]; -#ifdef Py_UNICODE_WIDE - ((char *) &uch)[2] = s[2]; - ((char *) &uch)[3] = s[3]; -#endif - ch = uch; - - /* We have to sanity check the raw data, otherwise doom looms for - some malformed UCS-4 data. */ - if ( -#ifdef Py_UNICODE_WIDE - ch > 0x10ffff || -#endif - end-s < Py_UNICODE_SIZE - ) - { - startinpos = s - starts; - if (end-s < Py_UNICODE_SIZE) { - endinpos = end-starts; - reason = "truncated input"; - } - else { - endinpos = s - starts + Py_UNICODE_SIZE; - reason = "illegal code point (> 0x10FFFF)"; - } - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicode_internal", reason, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) - goto onError; - continue; - } - - s += Py_UNICODE_SIZE; -#ifndef Py_UNICODE_WIDE - if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && s < end) - { - Py_UNICODE uch2; - ((char *) &uch2)[0] = s[0]; - ((char *) &uch2)[1] = s[1]; - if (Py_UNICODE_IS_LOW_SURROGATE(uch2)) - { - ch = Py_UNICODE_JOIN_SURROGATES(uch, uch2); - s += Py_UNICODE_SIZE; - } - } -#endif - - if (unicode_putchar(&v, &outpos, ch) < 0) - goto onError; - } - - if (PyUnicode_Resize(&v, outpos) < 0) - goto onError; - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return unicode_result(v); - - onError: - Py_XDECREF(v); - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return NULL; -} - -/* --- Latin-1 Codec ------------------------------------------------------ */ - -PyObject * -PyUnicode_DecodeLatin1(const char *s, - Py_ssize_t size, - const char *errors) -{ - /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */ - return _PyUnicode_FromUCS1((unsigned char*)s, size); -} - -/* create or adjust a UnicodeEncodeError */ -static void -make_encode_exception(PyObject **exceptionObject, - const char *encoding, - PyObject *unicode, - Py_ssize_t startpos, Py_ssize_t endpos, - const char *reason) -{ - if (*exceptionObject == NULL) { - *exceptionObject = PyObject_CallFunction( - PyExc_UnicodeEncodeError, "sOnns", - encoding, unicode, startpos, endpos, reason); - } - else { - if (PyUnicodeEncodeError_SetStart(*exceptionObject, startpos)) - goto onError; - if (PyUnicodeEncodeError_SetEnd(*exceptionObject, endpos)) - goto onError; - if (PyUnicodeEncodeError_SetReason(*exceptionObject, reason)) - goto onError; - return; - onError: - Py_DECREF(*exceptionObject); - *exceptionObject = NULL; - } -} - -/* raises a UnicodeEncodeError */ -static void -raise_encode_exception(PyObject **exceptionObject, - const char *encoding, - PyObject *unicode, - Py_ssize_t startpos, Py_ssize_t endpos, - const char *reason) -{ - make_encode_exception(exceptionObject, - encoding, unicode, startpos, endpos, reason); - if (*exceptionObject != NULL) - PyCodec_StrictErrors(*exceptionObject); -} - -/* error handling callback helper: - build arguments, call the callback and check the arguments, - put the result into newpos and return the replacement string, which - has to be freed by the caller */ -static PyObject * -unicode_encode_call_errorhandler(const char *errors, - PyObject **errorHandler, - const char *encoding, const char *reason, - PyObject *unicode, PyObject **exceptionObject, - Py_ssize_t startpos, Py_ssize_t endpos, - Py_ssize_t *newpos) -{ - static char *argparse = "On;encoding error handler must return (str/bytes, int) tuple"; - Py_ssize_t len; - PyObject *restuple; - PyObject *resunicode; - - if (*errorHandler == NULL) { - *errorHandler = PyCodec_LookupError(errors); - if (*errorHandler == NULL) - return NULL; - } - - if (PyUnicode_READY(unicode) < 0) - return NULL; - len = PyUnicode_GET_LENGTH(unicode); - - make_encode_exception(exceptionObject, - encoding, unicode, startpos, endpos, reason); - if (*exceptionObject == NULL) - return NULL; - - restuple = PyObject_CallFunctionObjArgs( - *errorHandler, *exceptionObject, NULL); - if (restuple == NULL) - return NULL; - if (!PyTuple_Check(restuple)) { - PyErr_SetString(PyExc_TypeError, &argparse[3]); - Py_DECREF(restuple); - return NULL; - } - if (!PyArg_ParseTuple(restuple, argparse, - &resunicode, newpos)) { - Py_DECREF(restuple); - return NULL; - } - if (!PyUnicode_Check(resunicode) && !PyBytes_Check(resunicode)) { - PyErr_SetString(PyExc_TypeError, &argparse[3]); - Py_DECREF(restuple); - return NULL; - } - if (*newpos<0) - *newpos = len + *newpos; - if (*newpos<0 || *newpos>len) { - PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos); - Py_DECREF(restuple); - return NULL; - } - Py_INCREF(resunicode); - Py_DECREF(restuple); - return resunicode; -} - -static PyObject * -unicode_encode_ucs1(PyObject *unicode, - const char *errors, - unsigned int limit) -{ - /* input state */ - Py_ssize_t pos=0, size; - int kind; - void *data; - /* output object */ - PyObject *res; - /* pointer into the output */ - char *str; - /* current output position */ - Py_ssize_t ressize; - const char *encoding = (limit == 256) ? "latin-1" : "ascii"; - const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)"; - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - /* the following variable is used for caching string comparisons - * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */ - int known_errorHandler = -1; - - if (PyUnicode_READY(unicode) < 0) - return NULL; - size = PyUnicode_GET_LENGTH(unicode); - kind = PyUnicode_KIND(unicode); - data = PyUnicode_DATA(unicode); - /* allocate enough for a simple encoding without - replacements, if we need more, we'll resize */ - if (size == 0) - return PyBytes_FromStringAndSize(NULL, 0); - res = PyBytes_FromStringAndSize(NULL, size); - if (res == NULL) - return NULL; - str = PyBytes_AS_STRING(res); - ressize = size; - - while (pos < size) { - Py_UCS4 c = PyUnicode_READ(kind, data, pos); - - /* can we encode this? */ - if (c<limit) { - /* no overflow check, because we know that the space is enough */ - *str++ = (char)c; - ++pos; - } - else { - Py_ssize_t requiredsize; - PyObject *repunicode; - Py_ssize_t repsize, newpos, respos, i; - /* startpos for collecting unencodable chars */ - Py_ssize_t collstart = pos; - Py_ssize_t collend = pos; - /* find all unecodable characters */ - while ((collend < size) && (PyUnicode_READ(kind, data, collend)>=limit)) - ++collend; - /* cache callback name lookup (if not done yet, i.e. it's the first error) */ - if (known_errorHandler==-1) { - if ((errors==NULL) || (!strcmp(errors, "strict"))) - known_errorHandler = 1; - else if (!strcmp(errors, "replace")) - known_errorHandler = 2; - else if (!strcmp(errors, "ignore")) - known_errorHandler = 3; - else if (!strcmp(errors, "xmlcharrefreplace")) - known_errorHandler = 4; - else - known_errorHandler = 0; - } - switch (known_errorHandler) { - case 1: /* strict */ - raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason); - goto onError; - case 2: /* replace */ - while (collstart++<collend) - *str++ = '?'; /* fall through */ - case 3: /* ignore */ - pos = collend; - break; - case 4: /* xmlcharrefreplace */ - respos = str - PyBytes_AS_STRING(res); - /* determine replacement size */ - for (i = collstart, repsize = 0; i < collend; ++i) { - Py_UCS4 ch = PyUnicode_READ(kind, data, i); - if (ch < 10) - repsize += 2+1+1; - else if (ch < 100) - repsize += 2+2+1; - else if (ch < 1000) - repsize += 2+3+1; - else if (ch < 10000) - repsize += 2+4+1; - else if (ch < 100000) - repsize += 2+5+1; - else if (ch < 1000000) - repsize += 2+6+1; - else { - assert(ch <= MAX_UNICODE); - repsize += 2+7+1; - } - } - requiredsize = respos+repsize+(size-collend); - if (requiredsize > ressize) { - if (requiredsize<2*ressize) - requiredsize = 2*ressize; - if (_PyBytes_Resize(&res, requiredsize)) - goto onError; - str = PyBytes_AS_STRING(res) + respos; - ressize = requiredsize; - } - /* generate replacement */ - for (i = collstart; i < collend; ++i) { - str += sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i)); - } - pos = collend; - break; - default: - repunicode = unicode_encode_call_errorhandler(errors, &errorHandler, - encoding, reason, unicode, &exc, - collstart, collend, &newpos); - if (repunicode == NULL || (PyUnicode_Check(repunicode) && - PyUnicode_READY(repunicode) < 0)) - goto onError; - if (PyBytes_Check(repunicode)) { - /* Directly copy bytes result to output. */ - repsize = PyBytes_Size(repunicode); - if (repsize > 1) { - /* Make room for all additional bytes. */ - respos = str - PyBytes_AS_STRING(res); - if (_PyBytes_Resize(&res, ressize+repsize-1)) { - Py_DECREF(repunicode); - goto onError; - } - str = PyBytes_AS_STRING(res) + respos; - ressize += repsize-1; - } - memcpy(str, PyBytes_AsString(repunicode), repsize); - str += repsize; - pos = newpos; - Py_DECREF(repunicode); - break; - } - /* need more space? (at least enough for what we - have+the replacement+the rest of the string, so - we won't have to check space for encodable characters) */ - respos = str - PyBytes_AS_STRING(res); - repsize = PyUnicode_GET_LENGTH(repunicode); - requiredsize = respos+repsize+(size-collend); - if (requiredsize > ressize) { - if (requiredsize<2*ressize) - requiredsize = 2*ressize; - if (_PyBytes_Resize(&res, requiredsize)) { - Py_DECREF(repunicode); - goto onError; - } - str = PyBytes_AS_STRING(res) + respos; - ressize = requiredsize; - } - /* check if there is anything unencodable in the replacement - and copy it to the output */ - for (i = 0; repsize-->0; ++i, ++str) { - c = PyUnicode_READ_CHAR(repunicode, i); - if (c >= limit) { - raise_encode_exception(&exc, encoding, unicode, - pos, pos+1, reason); - Py_DECREF(repunicode); - goto onError; - } - *str = (char)c; - } - pos = newpos; - Py_DECREF(repunicode); - } - } - } - /* Resize if we allocated to much */ - size = str - PyBytes_AS_STRING(res); - if (size < ressize) { /* If this falls res will be NULL */ - assert(size >= 0); - if (_PyBytes_Resize(&res, size) < 0) - goto onError; - } - - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return res; - - onError: - Py_XDECREF(res); - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return NULL; -} - -/* Deprecated */ -PyObject * -PyUnicode_EncodeLatin1(const Py_UNICODE *p, - Py_ssize_t size, - const char *errors) -{ - PyObject *result; - PyObject *unicode = PyUnicode_FromUnicode(p, size); - if (unicode == NULL) - return NULL; - result = unicode_encode_ucs1(unicode, errors, 256); - Py_DECREF(unicode); - return result; -} - -PyObject * -_PyUnicode_AsLatin1String(PyObject *unicode, const char *errors) -{ - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - if (PyUnicode_READY(unicode) == -1) - return NULL; - /* Fast path: if it is a one-byte string, construct - bytes object directly. */ - if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) - return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode), - PyUnicode_GET_LENGTH(unicode)); - /* Non-Latin-1 characters present. Defer to above function to - raise the exception. */ - return unicode_encode_ucs1(unicode, errors, 256); -} - -PyObject* -PyUnicode_AsLatin1String(PyObject *unicode) -{ - return _PyUnicode_AsLatin1String(unicode, NULL); -} - -/* --- 7-bit ASCII Codec -------------------------------------------------- */ - -PyObject * -PyUnicode_DecodeASCII(const char *s, - Py_ssize_t size, - const char *errors) -{ - const char *starts = s; - PyObject *v; - int kind; - void *data; - Py_ssize_t startinpos; - Py_ssize_t endinpos; - Py_ssize_t outpos; - const char *e; - int has_error; - const unsigned char *p = (const unsigned char *)s; - const unsigned char *end = p + size; - const unsigned char *aligned_end = (const unsigned char *) ((size_t) end & ~LONG_PTR_MASK); - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - - if (size == 0) { - Py_INCREF(unicode_empty); - return unicode_empty; - } - - /* ASCII is equivalent to the first 128 ordinals in Unicode. */ - if (size == 1 && (unsigned char)s[0] < 128) - return get_latin1_char((unsigned char)s[0]); - - has_error = 0; - while (p < end && !has_error) { - /* Fast path, see below in PyUnicode_DecodeUTF8Stateful for - an explanation. */ - if (!((size_t) p & LONG_PTR_MASK)) { - /* Help register allocation */ - register const unsigned char *_p = p; - while (_p < aligned_end) { - unsigned long value = *(unsigned long *) _p; - if (value & ASCII_CHAR_MASK) { - has_error = 1; - break; - } - _p += SIZEOF_LONG; - } - if (_p == end) - break; - if (has_error) - break; - p = _p; - } - if (*p & 0x80) { - has_error = 1; - break; - } - else { - ++p; - } - } - if (!has_error) - return unicode_fromascii((const unsigned char *)s, size); - - v = PyUnicode_New(size, 127); - if (v == NULL) - goto onError; - if (size == 0) - return v; - kind = PyUnicode_KIND(v); - data = PyUnicode_DATA(v); - outpos = 0; - e = s + size; - while (s < e) { - register unsigned char c = (unsigned char)*s; - if (c < 128) { - PyUnicode_WRITE(kind, data, outpos++, c); - ++s; - } - else { - startinpos = s-starts; - endinpos = startinpos + 1; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "ascii", "ordinal not in range(128)", - &starts, &e, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) - goto onError; - kind = PyUnicode_KIND(v); - data = PyUnicode_DATA(v); - } - } - if (PyUnicode_Resize(&v, outpos) < 0) - goto onError; - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - assert(_PyUnicode_CheckConsistency(v, 1)); - return v; - - onError: - Py_XDECREF(v); - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return NULL; -} - -/* Deprecated */ -PyObject * -PyUnicode_EncodeASCII(const Py_UNICODE *p, - Py_ssize_t size, - const char *errors) -{ - PyObject *result; - PyObject *unicode = PyUnicode_FromUnicode(p, size); - if (unicode == NULL) - return NULL; - result = unicode_encode_ucs1(unicode, errors, 128); - Py_DECREF(unicode); - return result; -} - -PyObject * -_PyUnicode_AsASCIIString(PyObject *unicode, const char *errors) -{ - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - if (PyUnicode_READY(unicode) == -1) - return NULL; - /* Fast path: if it is an ASCII-only string, construct bytes object - directly. Else defer to above function to raise the exception. */ - if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128) - return PyBytes_FromStringAndSize(PyUnicode_DATA(unicode), - PyUnicode_GET_LENGTH(unicode)); - return unicode_encode_ucs1(unicode, errors, 128); -} - -PyObject * -PyUnicode_AsASCIIString(PyObject *unicode) -{ - return _PyUnicode_AsASCIIString(unicode, NULL); -} - -#ifdef HAVE_MBCS - -/* --- MBCS codecs for Windows -------------------------------------------- */ - -#if SIZEOF_INT < SIZEOF_SIZE_T -#define NEED_RETRY -#endif - -#ifndef WC_ERR_INVALID_CHARS -# define WC_ERR_INVALID_CHARS 0x0080 -#endif - -static char* -code_page_name(UINT code_page, PyObject **obj) -{ - *obj = NULL; - if (code_page == CP_ACP) - return "mbcs"; - if (code_page == CP_UTF7) - return "CP_UTF7"; - if (code_page == CP_UTF8) - return "CP_UTF8"; - - *obj = PyBytes_FromFormat("cp%u", code_page); - if (*obj == NULL) - return NULL; - return PyBytes_AS_STRING(*obj); -} - -static int -is_dbcs_lead_byte(UINT code_page, const char *s, int offset) -{ - const char *curr = s + offset; - const char *prev; - - if (!IsDBCSLeadByteEx(code_page, *curr)) - return 0; - - prev = CharPrevExA(code_page, s, curr, 0); - if (prev == curr) - return 1; - /* FIXME: This code is limited to "true" double-byte encodings, - as it assumes an incomplete character consists of a single - byte. */ - if (curr - prev == 2) - return 1; - if (!IsDBCSLeadByteEx(code_page, *prev)) - return 1; - return 0; -} - -static DWORD -decode_code_page_flags(UINT code_page) -{ - if (code_page == CP_UTF7) { - /* The CP_UTF7 decoder only supports flags=0 */ - return 0; - } - else - return MB_ERR_INVALID_CHARS; -} - -/* - * Decode a byte string from a Windows code page into unicode object in strict - * mode. - * - * Returns consumed size if succeed, returns -2 on decode error, or raise a - * WindowsError and returns -1 on other error. - */ -static int -decode_code_page_strict(UINT code_page, - PyObject **v, - const char *in, - int insize) -{ - const DWORD flags = decode_code_page_flags(code_page); - wchar_t *out; - DWORD outsize; - - /* First get the size of the result */ - assert(insize > 0); - outsize = MultiByteToWideChar(code_page, flags, in, insize, NULL, 0); - if (outsize <= 0) - goto error; - - if (*v == NULL) { - /* Create unicode object */ - *v = (PyObject*)_PyUnicode_New(outsize); - if (*v == NULL) - return -1; - out = PyUnicode_AS_UNICODE(*v); - } - else { - /* Extend unicode object */ - Py_ssize_t n = PyUnicode_GET_SIZE(*v); - if (PyUnicode_Resize(v, n + outsize) < 0) - return -1; - out = PyUnicode_AS_UNICODE(*v) + n; - } - - /* Do the conversion */ - outsize = MultiByteToWideChar(code_page, flags, in, insize, out, outsize); - if (outsize <= 0) - goto error; - return insize; - -error: - if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) - return -2; - PyErr_SetFromWindowsErr(0); - return -1; -} - -/* - * Decode a byte string from a code page into unicode object with an error - * handler. - * - * Returns consumed size if succeed, or raise a WindowsError or - * UnicodeDecodeError exception and returns -1 on error. - */ -static int -decode_code_page_errors(UINT code_page, - PyObject **v, - const char *in, const int size, - const char *errors) -{ - const char *startin = in; - const char *endin = in + size; - const DWORD flags = decode_code_page_flags(code_page); - /* Ideally, we should get reason from FormatMessage. This is the Windows - 2000 English version of the message. */ - const char *reason = "No mapping for the Unicode character exists " - "in the target code page."; - /* each step cannot decode more than 1 character, but a character can be - represented as a surrogate pair */ - wchar_t buffer[2], *startout, *out; - int insize, outsize; - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - PyObject *encoding_obj = NULL; - char *encoding; - DWORD err; - int ret = -1; - - assert(size > 0); - - encoding = code_page_name(code_page, &encoding_obj); - if (encoding == NULL) - return -1; - - if (errors == NULL || strcmp(errors, "strict") == 0) { - /* The last error was ERROR_NO_UNICODE_TRANSLATION, then we raise a - UnicodeDecodeError. */ - make_decode_exception(&exc, encoding, in, size, 0, 0, reason); - if (exc != NULL) { - PyCodec_StrictErrors(exc); - Py_CLEAR(exc); - } - goto error; - } - - if (*v == NULL) { - /* Create unicode object */ - if (size > PY_SSIZE_T_MAX / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) { - PyErr_NoMemory(); - goto error; - } - *v = (PyObject*)_PyUnicode_New(size * Py_ARRAY_LENGTH(buffer)); - if (*v == NULL) - goto error; - startout = PyUnicode_AS_UNICODE(*v); - } - else { - /* Extend unicode object */ - Py_ssize_t n = PyUnicode_GET_SIZE(*v); - if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) { - PyErr_NoMemory(); - goto error; - } - if (PyUnicode_Resize(v, n + size * Py_ARRAY_LENGTH(buffer)) < 0) - goto error; - startout = PyUnicode_AS_UNICODE(*v) + n; - } - - /* Decode the byte string character per character */ - out = startout; - while (in < endin) - { - /* Decode a character */ - insize = 1; - do - { - outsize = MultiByteToWideChar(code_page, flags, - in, insize, - buffer, Py_ARRAY_LENGTH(buffer)); - if (outsize > 0) - break; - err = GetLastError(); - if (err != ERROR_NO_UNICODE_TRANSLATION - && err != ERROR_INSUFFICIENT_BUFFER) - { - PyErr_SetFromWindowsErr(0); - goto error; - } - insize++; - } - /* 4=maximum length of a UTF-8 sequence */ - while (insize <= 4 && (in + insize) <= endin); - - if (outsize <= 0) { - Py_ssize_t startinpos, endinpos, outpos; - - startinpos = in - startin; - endinpos = startinpos + 1; - outpos = out - PyUnicode_AS_UNICODE(*v); - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - encoding, reason, - &startin, &endin, &startinpos, &endinpos, &exc, &in, - v, &outpos)) - { - goto error; - } - out = PyUnicode_AS_UNICODE(*v) + outpos; - } - else { - in += insize; - memcpy(out, buffer, outsize * sizeof(wchar_t)); - out += outsize; - } - } - - /* write a NUL character at the end */ - *out = 0; - - /* Extend unicode object */ - outsize = out - startout; - assert(outsize <= PyUnicode_WSTR_LENGTH(*v)); - if (PyUnicode_Resize(v, outsize) < 0) - goto error; - ret = size; - -error: - Py_XDECREF(encoding_obj); - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return ret; -} - -static PyObject * -decode_code_page_stateful(int code_page, - const char *s, Py_ssize_t size, - const char *errors, Py_ssize_t *consumed) -{ - PyObject *v = NULL; - int chunk_size, final, converted, done; - - if (code_page < 0) { - PyErr_SetString(PyExc_ValueError, "invalid code page number"); - return NULL; - } - - if (consumed) - *consumed = 0; - - do - { -#ifdef NEED_RETRY - if (size > INT_MAX) { - chunk_size = INT_MAX; - final = 0; - done = 0; - } - else -#endif - { - chunk_size = (int)size; - final = (consumed == NULL); - done = 1; - } - - /* Skip trailing lead-byte unless 'final' is set */ - if (!final && is_dbcs_lead_byte(code_page, s, chunk_size - 1)) - --chunk_size; - - if (chunk_size == 0 && done) { - if (v != NULL) - break; - Py_INCREF(unicode_empty); - return unicode_empty; - } - - - converted = decode_code_page_strict(code_page, &v, - s, chunk_size); - if (converted == -2) - converted = decode_code_page_errors(code_page, &v, - s, chunk_size, - errors); - assert(converted != 0); - - if (converted < 0) { - Py_XDECREF(v); - return NULL; - } - - if (consumed) - *consumed += converted; - - s += converted; - size -= converted; - } while (!done); - - return unicode_result(v); -} - -PyObject * -PyUnicode_DecodeCodePageStateful(int code_page, - const char *s, - Py_ssize_t size, - const char *errors, - Py_ssize_t *consumed) -{ - return decode_code_page_stateful(code_page, s, size, errors, consumed); -} - -PyObject * -PyUnicode_DecodeMBCSStateful(const char *s, - Py_ssize_t size, - const char *errors, - Py_ssize_t *consumed) -{ - return decode_code_page_stateful(CP_ACP, s, size, errors, consumed); -} - -PyObject * -PyUnicode_DecodeMBCS(const char *s, - Py_ssize_t size, - const char *errors) -{ - return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL); -} - -static DWORD -encode_code_page_flags(UINT code_page, const char *errors) -{ - if (code_page == CP_UTF8) { - if (winver.dwMajorVersion >= 6) - /* CP_UTF8 supports WC_ERR_INVALID_CHARS on Windows Vista - and later */ - return WC_ERR_INVALID_CHARS; - else - /* CP_UTF8 only supports flags=0 on Windows older than Vista */ - return 0; - } - else if (code_page == CP_UTF7) { - /* CP_UTF7 only supports flags=0 */ - return 0; - } - else { - if (errors != NULL && strcmp(errors, "replace") == 0) - return 0; - else - return WC_NO_BEST_FIT_CHARS; - } -} - -/* - * Encode a Unicode string to a Windows code page into a byte string in strict - * mode. - * - * Returns consumed characters if succeed, returns -2 on encode error, or raise - * a WindowsError and returns -1 on other error. - */ -static int -encode_code_page_strict(UINT code_page, PyObject **outbytes, - PyObject *unicode, Py_ssize_t offset, int len, - const char* errors) -{ - BOOL usedDefaultChar = FALSE; - BOOL *pusedDefaultChar = &usedDefaultChar; - int outsize; - PyObject *exc = NULL; - wchar_t *p; - Py_ssize_t size; - const DWORD flags = encode_code_page_flags(code_page, NULL); - char *out; - /* Create a substring so that we can get the UTF-16 representation - of just the slice under consideration. */ - PyObject *substring; - - assert(len > 0); - - if (code_page != CP_UTF8 && code_page != CP_UTF7) - pusedDefaultChar = &usedDefaultChar; - else - pusedDefaultChar = NULL; - - substring = PyUnicode_Substring(unicode, offset, offset+len); - if (substring == NULL) - return -1; - p = PyUnicode_AsUnicodeAndSize(substring, &size); - if (p == NULL) { - Py_DECREF(substring); - return -1; - } - - /* First get the size of the result */ - outsize = WideCharToMultiByte(code_page, flags, - p, size, - NULL, 0, - NULL, pusedDefaultChar); - if (outsize <= 0) - goto error; - /* If we used a default char, then we failed! */ - if (pusedDefaultChar && *pusedDefaultChar) { - Py_DECREF(substring); - return -2; - } - - if (*outbytes == NULL) { - /* Create string object */ - *outbytes = PyBytes_FromStringAndSize(NULL, outsize); - if (*outbytes == NULL) { - Py_DECREF(substring); - return -1; - } - out = PyBytes_AS_STRING(*outbytes); - } - else { - /* Extend string object */ - const Py_ssize_t n = PyBytes_Size(*outbytes); - if (outsize > PY_SSIZE_T_MAX - n) { - PyErr_NoMemory(); - Py_DECREF(substring); - return -1; - } - if (_PyBytes_Resize(outbytes, n + outsize) < 0) { - Py_DECREF(substring); - return -1; - } - out = PyBytes_AS_STRING(*outbytes) + n; - } - - /* Do the conversion */ - outsize = WideCharToMultiByte(code_page, flags, - p, size, - out, outsize, - NULL, pusedDefaultChar); - Py_CLEAR(substring); - if (outsize <= 0) - goto error; - if (pusedDefaultChar && *pusedDefaultChar) - return -2; - return 0; - -error: - Py_XDECREF(substring); - if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) - return -2; - PyErr_SetFromWindowsErr(0); - return -1; -} - -/* - * Encode a Unicode string to a Windows code page into a byte string using a - * error handler. - * - * Returns consumed characters if succeed, or raise a WindowsError and returns - * -1 on other error. - */ -static int -encode_code_page_errors(UINT code_page, PyObject **outbytes, - PyObject *unicode, Py_ssize_t unicode_offset, - Py_ssize_t insize, const char* errors) -{ - const DWORD flags = encode_code_page_flags(code_page, errors); - Py_ssize_t pos = unicode_offset; - Py_ssize_t endin = unicode_offset + insize; - /* Ideally, we should get reason from FormatMessage. This is the Windows - 2000 English version of the message. */ - const char *reason = "invalid character"; - /* 4=maximum length of a UTF-8 sequence */ - char buffer[4]; - BOOL usedDefaultChar = FALSE, *pusedDefaultChar; - Py_ssize_t outsize; - char *out; - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - PyObject *encoding_obj = NULL; - char *encoding; - Py_ssize_t newpos, newoutsize; - PyObject *rep; - int ret = -1; - - assert(insize > 0); - - encoding = code_page_name(code_page, &encoding_obj); - if (encoding == NULL) - return -1; - - if (errors == NULL || strcmp(errors, "strict") == 0) { - /* The last error was ERROR_NO_UNICODE_TRANSLATION, - then we raise a UnicodeEncodeError. */ - make_encode_exception(&exc, encoding, unicode, 0, 0, reason); - if (exc != NULL) { - PyCodec_StrictErrors(exc); - Py_DECREF(exc); - } - Py_XDECREF(encoding_obj); - return -1; - } - - if (code_page != CP_UTF8 && code_page != CP_UTF7) - pusedDefaultChar = &usedDefaultChar; - else - pusedDefaultChar = NULL; - - if (Py_ARRAY_LENGTH(buffer) > PY_SSIZE_T_MAX / insize) { - PyErr_NoMemory(); - goto error; - } - outsize = insize * Py_ARRAY_LENGTH(buffer); - - if (*outbytes == NULL) { - /* Create string object */ - *outbytes = PyBytes_FromStringAndSize(NULL, outsize); - if (*outbytes == NULL) - goto error; - out = PyBytes_AS_STRING(*outbytes); - } - else { - /* Extend string object */ - Py_ssize_t n = PyBytes_Size(*outbytes); - if (n > PY_SSIZE_T_MAX - outsize) { - PyErr_NoMemory(); - goto error; - } - if (_PyBytes_Resize(outbytes, n + outsize) < 0) - goto error; - out = PyBytes_AS_STRING(*outbytes) + n; - } - - /* Encode the string character per character */ - while (pos < endin) - { - Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, pos); - wchar_t chars[2]; - int charsize; - if (ch < 0x10000) { - chars[0] = (wchar_t)ch; - charsize = 1; - } - else { - ch -= 0x10000; - chars[0] = 0xd800 + (ch >> 10); - chars[1] = 0xdc00 + (ch & 0x3ff); - charsize = 2; - } - - outsize = WideCharToMultiByte(code_page, flags, - chars, charsize, - buffer, Py_ARRAY_LENGTH(buffer), - NULL, pusedDefaultChar); - if (outsize > 0) { - if (pusedDefaultChar == NULL || !(*pusedDefaultChar)) - { - pos++; - memcpy(out, buffer, outsize); - out += outsize; - continue; - } - } - else if (GetLastError() != ERROR_NO_UNICODE_TRANSLATION) { - PyErr_SetFromWindowsErr(0); - goto error; - } - - rep = unicode_encode_call_errorhandler( - errors, &errorHandler, encoding, reason, - unicode, &exc, - pos, pos + 1, &newpos); - if (rep == NULL) - goto error; - pos = newpos; - - if (PyBytes_Check(rep)) { - outsize = PyBytes_GET_SIZE(rep); - if (outsize != 1) { - Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes); - newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1); - if (_PyBytes_Resize(outbytes, newoutsize) < 0) { - Py_DECREF(rep); - goto error; - } - out = PyBytes_AS_STRING(*outbytes) + offset; - } - memcpy(out, PyBytes_AS_STRING(rep), outsize); - out += outsize; - } - else { - Py_ssize_t i; - enum PyUnicode_Kind kind; - void *data; - - if (PyUnicode_READY(rep) < 0) { - Py_DECREF(rep); - goto error; - } - - outsize = PyUnicode_GET_LENGTH(rep); - if (outsize != 1) { - Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes); - newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1); - if (_PyBytes_Resize(outbytes, newoutsize) < 0) { - Py_DECREF(rep); - goto error; - } - out = PyBytes_AS_STRING(*outbytes) + offset; - } - kind = PyUnicode_KIND(rep); - data = PyUnicode_DATA(rep); - for (i=0; i < outsize; i++) { - Py_UCS4 ch = PyUnicode_READ(kind, data, i); - if (ch > 127) { - raise_encode_exception(&exc, - encoding, unicode, - pos, pos + 1, - "unable to encode error handler result to ASCII"); - Py_DECREF(rep); - goto error; - } - *out = (unsigned char)ch; - out++; - } - } - Py_DECREF(rep); - } - /* write a NUL byte */ - *out = 0; - outsize = out - PyBytes_AS_STRING(*outbytes); - assert(outsize <= PyBytes_GET_SIZE(*outbytes)); - if (_PyBytes_Resize(outbytes, outsize) < 0) - goto error; - ret = 0; - -error: - Py_XDECREF(encoding_obj); - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return ret; -} - -static PyObject * -encode_code_page(int code_page, - PyObject *unicode, - const char *errors) -{ - Py_ssize_t len; - PyObject *outbytes = NULL; - Py_ssize_t offset; - int chunk_len, ret, done; - - if (PyUnicode_READY(unicode) < 0) - return NULL; - len = PyUnicode_GET_LENGTH(unicode); - - if (code_page < 0) { - PyErr_SetString(PyExc_ValueError, "invalid code page number"); - return NULL; - } - - if (len == 0) - return PyBytes_FromStringAndSize(NULL, 0); - - offset = 0; - do - { -#ifdef NEED_RETRY - /* UTF-16 encoding may double the size, so use only INT_MAX/2 - chunks. */ - if (len > INT_MAX/2) { - chunk_len = INT_MAX/2; - done = 0; - } - else -#endif - { - chunk_len = (int)len; - done = 1; - } - - ret = encode_code_page_strict(code_page, &outbytes, - unicode, offset, chunk_len, - errors); - if (ret == -2) - ret = encode_code_page_errors(code_page, &outbytes, - unicode, offset, - chunk_len, errors); - if (ret < 0) { - Py_XDECREF(outbytes); - return NULL; - } - - offset += chunk_len; - len -= chunk_len; - } while (!done); - - return outbytes; -} - -PyObject * -PyUnicode_EncodeMBCS(const Py_UNICODE *p, - Py_ssize_t size, - const char *errors) -{ - PyObject *unicode, *res; - unicode = PyUnicode_FromUnicode(p, size); - if (unicode == NULL) - return NULL; - res = encode_code_page(CP_ACP, unicode, errors); - Py_DECREF(unicode); - return res; -} - -PyObject * -PyUnicode_EncodeCodePage(int code_page, - PyObject *unicode, - const char *errors) -{ - return encode_code_page(code_page, unicode, errors); -} - -PyObject * -PyUnicode_AsMBCSString(PyObject *unicode) -{ - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - return PyUnicode_EncodeCodePage(CP_ACP, unicode, NULL); -} - -#undef NEED_RETRY - -#endif /* HAVE_MBCS */ - -/* --- Character Mapping Codec -------------------------------------------- */ - -PyObject * -PyUnicode_DecodeCharmap(const char *s, - Py_ssize_t size, - PyObject *mapping, - const char *errors) -{ - const char *starts = s; - Py_ssize_t startinpos; - Py_ssize_t endinpos; - Py_ssize_t outpos; - const char *e; - PyObject *v; - Py_ssize_t extrachars = 0; - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - - /* Default to Latin-1 */ - if (mapping == NULL) - return PyUnicode_DecodeLatin1(s, size, errors); - - v = PyUnicode_New(size, 127); - if (v == NULL) - goto onError; - if (size == 0) - return v; - outpos = 0; - e = s + size; - if (PyUnicode_CheckExact(mapping)) { - Py_ssize_t maplen; - enum PyUnicode_Kind kind; - void *data; - Py_UCS4 x; - - if (PyUnicode_READY(mapping) < 0) - return NULL; - - maplen = PyUnicode_GET_LENGTH(mapping); - data = PyUnicode_DATA(mapping); - kind = PyUnicode_KIND(mapping); - while (s < e) { - unsigned char ch = *s; - - if (ch < maplen) - x = PyUnicode_READ(kind, data, ch); - else - x = 0xfffe; /* invalid value */ - - if (x == 0xfffe) - { - /* undefined mapping */ - startinpos = s-starts; - endinpos = startinpos+1; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "charmap", "character maps to <undefined>", - &starts, &e, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) { - goto onError; - } - continue; - } - - if (unicode_putchar(&v, &outpos, x) < 0) - goto onError; - ++s; - } - } - else { - while (s < e) { - unsigned char ch = *s; - PyObject *w, *x; - - /* Get mapping (char ordinal -> integer, Unicode char or None) */ - w = PyLong_FromLong((long)ch); - if (w == NULL) - goto onError; - x = PyObject_GetItem(mapping, w); - Py_DECREF(w); - if (x == NULL) { - if (PyErr_ExceptionMatches(PyExc_LookupError)) { - /* No mapping found means: mapping is undefined. */ - PyErr_Clear(); - x = Py_None; - Py_INCREF(x); - } else - goto onError; - } - - /* Apply mapping */ - if (PyLong_Check(x)) { - long value = PyLong_AS_LONG(x); - if (value < 0 || value > 65535) { - PyErr_SetString(PyExc_TypeError, - "character mapping must be in range(65536)"); - Py_DECREF(x); - goto onError; - } - if (unicode_putchar(&v, &outpos, value) < 0) - goto onError; - } - else if (x == Py_None) { - /* undefined mapping */ - startinpos = s-starts; - endinpos = startinpos+1; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "charmap", "character maps to <undefined>", - &starts, &e, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) { - Py_DECREF(x); - goto onError; - } - Py_DECREF(x); - continue; - } - else if (PyUnicode_Check(x)) { - Py_ssize_t targetsize; - - if (PyUnicode_READY(x) < 0) - goto onError; - targetsize = PyUnicode_GET_LENGTH(x); - - if (targetsize == 1) { - /* 1-1 mapping */ - if (unicode_putchar(&v, &outpos, - PyUnicode_READ_CHAR(x, 0)) < 0) - goto onError; - } - else if (targetsize > 1) { - /* 1-n mapping */ - if (targetsize > extrachars) { - /* resize first */ - Py_ssize_t needed = (targetsize - extrachars) + \ - (targetsize << 2); - extrachars += needed; - /* XXX overflow detection missing */ - if (PyUnicode_Resize(&v, - PyUnicode_GET_LENGTH(v) + needed) < 0) { - Py_DECREF(x); - goto onError; - } - } - if (unicode_widen(&v, PyUnicode_MAX_CHAR_VALUE(x)) < 0) - goto onError; - PyUnicode_CopyCharacters(v, outpos, x, 0, targetsize); - outpos += targetsize; - extrachars -= targetsize; - } - /* 1-0 mapping: skip the character */ - } - else { - /* wrong return value */ - PyErr_SetString(PyExc_TypeError, - "character mapping must return integer, None or str"); - Py_DECREF(x); - goto onError; - } - Py_DECREF(x); - ++s; - } - } - if (PyUnicode_Resize(&v, outpos) < 0) - goto onError; - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - return unicode_result(v); - - onError: - Py_XDECREF(errorHandler); - Py_XDECREF(exc); - Py_XDECREF(v); - return NULL; -} - -/* Charmap encoding: the lookup table */ - -struct encoding_map { - PyObject_HEAD - unsigned char level1[32]; - int count2, count3; - unsigned char level23[1]; -}; - -static PyObject* -encoding_map_size(PyObject *obj, PyObject* args) -{ - struct encoding_map *map = (struct encoding_map*)obj; - return PyLong_FromLong(sizeof(*map) - 1 + 16*map->count2 + - 128*map->count3); -} - -static PyMethodDef encoding_map_methods[] = { - {"size", encoding_map_size, METH_NOARGS, - PyDoc_STR("Return the size (in bytes) of this object") }, - { 0 } -}; - -static void -encoding_map_dealloc(PyObject* o) -{ - PyObject_FREE(o); -} - -static PyTypeObject EncodingMapType = { - PyVarObject_HEAD_INIT(NULL, 0) - "EncodingMap", /*tp_name*/ - sizeof(struct encoding_map), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - /* methods */ - encoding_map_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_reserved*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT, /*tp_flags*/ - 0, /*tp_doc*/ - 0, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - encoding_map_methods, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - 0, /*tp_dictoffset*/ - 0, /*tp_init*/ - 0, /*tp_alloc*/ - 0, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ -}; - -PyObject* -PyUnicode_BuildEncodingMap(PyObject* string) -{ - PyObject *result; - struct encoding_map *mresult; - int i; - int need_dict = 0; - unsigned char level1[32]; - unsigned char level2[512]; - unsigned char *mlevel1, *mlevel2, *mlevel3; - int count2 = 0, count3 = 0; - int kind; - void *data; - Py_UCS4 ch; - - if (!PyUnicode_Check(string) || PyUnicode_GET_LENGTH(string) != 256) { - PyErr_BadArgument(); - return NULL; - } - kind = PyUnicode_KIND(string); - data = PyUnicode_DATA(string); - memset(level1, 0xFF, sizeof level1); - memset(level2, 0xFF, sizeof level2); - - /* If there isn't a one-to-one mapping of NULL to \0, - or if there are non-BMP characters, we need to use - a mapping dictionary. */ - if (PyUnicode_READ(kind, data, 0) != 0) - need_dict = 1; - for (i = 1; i < 256; i++) { - int l1, l2; - ch = PyUnicode_READ(kind, data, i); - if (ch == 0 || ch > 0xFFFF) { - need_dict = 1; - break; - } - if (ch == 0xFFFE) - /* unmapped character */ - continue; - l1 = ch >> 11; - l2 = ch >> 7; - if (level1[l1] == 0xFF) - level1[l1] = count2++; - if (level2[l2] == 0xFF) - level2[l2] = count3++; - } - - if (count2 >= 0xFF || count3 >= 0xFF) - need_dict = 1; - - if (need_dict) { - PyObject *result = PyDict_New(); - PyObject *key, *value; - if (!result) - return NULL; - for (i = 0; i < 256; i++) { - key = PyLong_FromLong(PyUnicode_READ(kind, data, i)); - value = PyLong_FromLong(i); - if (!key || !value) - goto failed1; - if (PyDict_SetItem(result, key, value) == -1) - goto failed1; - Py_DECREF(key); - Py_DECREF(value); - } - return result; - failed1: - Py_XDECREF(key); - Py_XDECREF(value); - Py_DECREF(result); - return NULL; - } - - /* Create a three-level trie */ - result = PyObject_MALLOC(sizeof(struct encoding_map) + - 16*count2 + 128*count3 - 1); - if (!result) - return PyErr_NoMemory(); - PyObject_Init(result, &EncodingMapType); - mresult = (struct encoding_map*)result; - mresult->count2 = count2; - mresult->count3 = count3; - mlevel1 = mresult->level1; - mlevel2 = mresult->level23; - mlevel3 = mresult->level23 + 16*count2; - memcpy(mlevel1, level1, 32); - memset(mlevel2, 0xFF, 16*count2); - memset(mlevel3, 0, 128*count3); - count3 = 0; - for (i = 1; i < 256; i++) { - int o1, o2, o3, i2, i3; - if (PyUnicode_READ(kind, data, i) == 0xFFFE) - /* unmapped character */ - continue; - o1 = PyUnicode_READ(kind, data, i)>>11; - o2 = (PyUnicode_READ(kind, data, i)>>7) & 0xF; - i2 = 16*mlevel1[o1] + o2; - if (mlevel2[i2] == 0xFF) - mlevel2[i2] = count3++; - o3 = PyUnicode_READ(kind, data, i) & 0x7F; - i3 = 128*mlevel2[i2] + o3; - mlevel3[i3] = i; - } - return result; -} - -static int -encoding_map_lookup(Py_UCS4 c, PyObject *mapping) -{ - struct encoding_map *map = (struct encoding_map*)mapping; - int l1 = c>>11; - int l2 = (c>>7) & 0xF; - int l3 = c & 0x7F; - int i; - - if (c > 0xFFFF) - return -1; - if (c == 0) - return 0; - /* level 1*/ - i = map->level1[l1]; - if (i == 0xFF) { - return -1; - } - /* level 2*/ - i = map->level23[16*i+l2]; - if (i == 0xFF) { - return -1; - } - /* level 3 */ - i = map->level23[16*map->count2 + 128*i + l3]; - if (i == 0) { - return -1; - } - return i; -} - -/* Lookup the character ch in the mapping. If the character - can't be found, Py_None is returned (or NULL, if another - error occurred). */ -static PyObject * -charmapencode_lookup(Py_UCS4 c, PyObject *mapping) -{ - PyObject *w = PyLong_FromLong((long)c); - PyObject *x; - - if (w == NULL) - return NULL; - x = PyObject_GetItem(mapping, w); - Py_DECREF(w); - if (x == NULL) { - if (PyErr_ExceptionMatches(PyExc_LookupError)) { - /* No mapping found means: mapping is undefined. */ - PyErr_Clear(); - x = Py_None; - Py_INCREF(x); - return x; - } else - return NULL; - } - else if (x == Py_None) - return x; - else if (PyLong_Check(x)) { - long value = PyLong_AS_LONG(x); - if (value < 0 || value > 255) { - PyErr_SetString(PyExc_TypeError, - "character mapping must be in range(256)"); - Py_DECREF(x); - return NULL; - } - return x; - } - else if (PyBytes_Check(x)) - return x; - else { - /* wrong return value */ - PyErr_Format(PyExc_TypeError, - "character mapping must return integer, bytes or None, not %.400s", - x->ob_type->tp_name); - Py_DECREF(x); - return NULL; - } -} - -static int -charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize) -{ - Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj); - /* exponentially overallocate to minimize reallocations */ - if (requiredsize < 2*outsize) - requiredsize = 2*outsize; - if (_PyBytes_Resize(outobj, requiredsize)) - return -1; - return 0; -} - -typedef enum charmapencode_result { - enc_SUCCESS, enc_FAILED, enc_EXCEPTION -} charmapencode_result; -/* lookup the character, put the result in the output string and adjust - various state variables. Resize the output bytes object if not enough - space is available. Return a new reference to the object that - was put in the output buffer, or Py_None, if the mapping was undefined - (in which case no character was written) or NULL, if a - reallocation error occurred. The caller must decref the result */ -static charmapencode_result -charmapencode_output(Py_UCS4 c, PyObject *mapping, - PyObject **outobj, Py_ssize_t *outpos) -{ - PyObject *rep; - char *outstart; - Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj); - - if (Py_TYPE(mapping) == &EncodingMapType) { - int res = encoding_map_lookup(c, mapping); - Py_ssize_t requiredsize = *outpos+1; - if (res == -1) - return enc_FAILED; - if (outsize<requiredsize) - if (charmapencode_resize(outobj, outpos, requiredsize)) - return enc_EXCEPTION; - outstart = PyBytes_AS_STRING(*outobj); - outstart[(*outpos)++] = (char)res; - return enc_SUCCESS; - } - - rep = charmapencode_lookup(c, mapping); - if (rep==NULL) - return enc_EXCEPTION; - else if (rep==Py_None) { - Py_DECREF(rep); - return enc_FAILED; - } else { - if (PyLong_Check(rep)) { - Py_ssize_t requiredsize = *outpos+1; - if (outsize<requiredsize) - if (charmapencode_resize(outobj, outpos, requiredsize)) { - Py_DECREF(rep); - return enc_EXCEPTION; - } - outstart = PyBytes_AS_STRING(*outobj); - outstart[(*outpos)++] = (char)PyLong_AS_LONG(rep); - } - else { - const char *repchars = PyBytes_AS_STRING(rep); - Py_ssize_t repsize = PyBytes_GET_SIZE(rep); - Py_ssize_t requiredsize = *outpos+repsize; - if (outsize<requiredsize) - if (charmapencode_resize(outobj, outpos, requiredsize)) { - Py_DECREF(rep); - return enc_EXCEPTION; - } - outstart = PyBytes_AS_STRING(*outobj); - memcpy(outstart + *outpos, repchars, repsize); - *outpos += repsize; - } - } - Py_DECREF(rep); - return enc_SUCCESS; -} - -/* handle an error in PyUnicode_EncodeCharmap - Return 0 on success, -1 on error */ -static int -charmap_encoding_error( - PyObject *unicode, Py_ssize_t *inpos, PyObject *mapping, - PyObject **exceptionObject, - int *known_errorHandler, PyObject **errorHandler, const char *errors, - PyObject **res, Py_ssize_t *respos) -{ - PyObject *repunicode = NULL; /* initialize to prevent gcc warning */ - Py_ssize_t size, repsize; - Py_ssize_t newpos; - enum PyUnicode_Kind kind; - void *data; - Py_ssize_t index; - /* startpos for collecting unencodable chars */ - Py_ssize_t collstartpos = *inpos; - Py_ssize_t collendpos = *inpos+1; - Py_ssize_t collpos; - char *encoding = "charmap"; - char *reason = "character maps to <undefined>"; - charmapencode_result x; - Py_UCS4 ch; - int val; - - if (PyUnicode_READY(unicode) < 0) - return -1; - size = PyUnicode_GET_LENGTH(unicode); - /* find all unencodable characters */ - while (collendpos < size) { - PyObject *rep; - if (Py_TYPE(mapping) == &EncodingMapType) { - ch = PyUnicode_READ_CHAR(unicode, collendpos); - val = encoding_map_lookup(ch, mapping); - if (val != -1) - break; - ++collendpos; - continue; - } - - ch = PyUnicode_READ_CHAR(unicode, collendpos); - rep = charmapencode_lookup(ch, mapping); - if (rep==NULL) - return -1; - else if (rep!=Py_None) { - Py_DECREF(rep); - break; - } - Py_DECREF(rep); - ++collendpos; - } - /* cache callback name lookup - * (if not done yet, i.e. it's the first error) */ - if (*known_errorHandler==-1) { - if ((errors==NULL) || (!strcmp(errors, "strict"))) - *known_errorHandler = 1; - else if (!strcmp(errors, "replace")) - *known_errorHandler = 2; - else if (!strcmp(errors, "ignore")) - *known_errorHandler = 3; - else if (!strcmp(errors, "xmlcharrefreplace")) - *known_errorHandler = 4; - else - *known_errorHandler = 0; - } - switch (*known_errorHandler) { - case 1: /* strict */ - raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason); - return -1; - case 2: /* replace */ - for (collpos = collstartpos; collpos<collendpos; ++collpos) { - x = charmapencode_output('?', mapping, res, respos); - if (x==enc_EXCEPTION) { - return -1; - } - else if (x==enc_FAILED) { - raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason); - return -1; - } - } - /* fall through */ - case 3: /* ignore */ - *inpos = collendpos; - break; - case 4: /* xmlcharrefreplace */ - /* generate replacement (temporarily (mis)uses p) */ - for (collpos = collstartpos; collpos < collendpos; ++collpos) { - char buffer[2+29+1+1]; - char *cp; - sprintf(buffer, "&#%d;", (int)PyUnicode_READ_CHAR(unicode, collpos)); - for (cp = buffer; *cp; ++cp) { - x = charmapencode_output(*cp, mapping, res, respos); - if (x==enc_EXCEPTION) - return -1; - else if (x==enc_FAILED) { - raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason); - return -1; - } - } - } - *inpos = collendpos; - break; - default: - repunicode = unicode_encode_call_errorhandler(errors, errorHandler, - encoding, reason, unicode, exceptionObject, - collstartpos, collendpos, &newpos); - if (repunicode == NULL) - return -1; - if (PyBytes_Check(repunicode)) { - /* Directly copy bytes result to output. */ - Py_ssize_t outsize = PyBytes_Size(*res); - Py_ssize_t requiredsize; - repsize = PyBytes_Size(repunicode); - requiredsize = *respos + repsize; - if (requiredsize > outsize) - /* Make room for all additional bytes. */ - if (charmapencode_resize(res, respos, requiredsize)) { - Py_DECREF(repunicode); - return -1; - } - memcpy(PyBytes_AsString(*res) + *respos, - PyBytes_AsString(repunicode), repsize); - *respos += repsize; - *inpos = newpos; - Py_DECREF(repunicode); - break; - } - /* generate replacement */ - if (PyUnicode_READY(repunicode) < 0) { - Py_DECREF(repunicode); - return -1; - } - repsize = PyUnicode_GET_LENGTH(repunicode); - data = PyUnicode_DATA(repunicode); - kind = PyUnicode_KIND(repunicode); - for (index = 0; index < repsize; index++) { - Py_UCS4 repch = PyUnicode_READ(kind, data, index); - x = charmapencode_output(repch, mapping, res, respos); - if (x==enc_EXCEPTION) { - Py_DECREF(repunicode); - return -1; - } - else if (x==enc_FAILED) { - Py_DECREF(repunicode); - raise_encode_exception(exceptionObject, encoding, unicode, collstartpos, collendpos, reason); - return -1; - } - } - *inpos = newpos; - Py_DECREF(repunicode); - } - return 0; -} - -PyObject * -_PyUnicode_EncodeCharmap(PyObject *unicode, - PyObject *mapping, - const char *errors) -{ - /* output object */ - PyObject *res = NULL; - /* current input position */ - Py_ssize_t inpos = 0; - Py_ssize_t size; - /* current output position */ - Py_ssize_t respos = 0; - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - /* the following variable is used for caching string comparisons - * -1=not initialized, 0=unknown, 1=strict, 2=replace, - * 3=ignore, 4=xmlcharrefreplace */ - int known_errorHandler = -1; - - if (PyUnicode_READY(unicode) < 0) - return NULL; - size = PyUnicode_GET_LENGTH(unicode); - - /* Default to Latin-1 */ - if (mapping == NULL) - return unicode_encode_ucs1(unicode, errors, 256); - - /* allocate enough for a simple encoding without - replacements, if we need more, we'll resize */ - res = PyBytes_FromStringAndSize(NULL, size); - if (res == NULL) - goto onError; - if (size == 0) - return res; - - while (inpos<size) { - Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, inpos); - /* try to encode it */ - charmapencode_result x = charmapencode_output(ch, mapping, &res, &respos); - if (x==enc_EXCEPTION) /* error */ - goto onError; - if (x==enc_FAILED) { /* unencodable character */ - if (charmap_encoding_error(unicode, &inpos, mapping, - &exc, - &known_errorHandler, &errorHandler, errors, - &res, &respos)) { - goto onError; - } - } - else - /* done with this character => adjust input position */ - ++inpos; - } - - /* Resize if we allocated to much */ - if (respos<PyBytes_GET_SIZE(res)) - if (_PyBytes_Resize(&res, respos) < 0) - goto onError; - - Py_XDECREF(exc); - Py_XDECREF(errorHandler); - return res; - - onError: - Py_XDECREF(res); - Py_XDECREF(exc); - Py_XDECREF(errorHandler); - return NULL; -} - -/* Deprecated */ -PyObject * -PyUnicode_EncodeCharmap(const Py_UNICODE *p, - Py_ssize_t size, - PyObject *mapping, - const char *errors) -{ - PyObject *result; - PyObject *unicode = PyUnicode_FromUnicode(p, size); - if (unicode == NULL) - return NULL; - result = _PyUnicode_EncodeCharmap(unicode, mapping, errors); - Py_DECREF(unicode); - return result; -} - -PyObject * -PyUnicode_AsCharmapString(PyObject *unicode, - PyObject *mapping) -{ - if (!PyUnicode_Check(unicode) || mapping == NULL) { - PyErr_BadArgument(); - return NULL; - } - return _PyUnicode_EncodeCharmap(unicode, mapping, NULL); -} - -/* create or adjust a UnicodeTranslateError */ -static void -make_translate_exception(PyObject **exceptionObject, - PyObject *unicode, - Py_ssize_t startpos, Py_ssize_t endpos, - const char *reason) -{ - if (*exceptionObject == NULL) { - *exceptionObject = _PyUnicodeTranslateError_Create( - unicode, startpos, endpos, reason); - } - else { - if (PyUnicodeTranslateError_SetStart(*exceptionObject, startpos)) - goto onError; - if (PyUnicodeTranslateError_SetEnd(*exceptionObject, endpos)) - goto onError; - if (PyUnicodeTranslateError_SetReason(*exceptionObject, reason)) - goto onError; - return; - onError: - Py_DECREF(*exceptionObject); - *exceptionObject = NULL; - } -} - -/* raises a UnicodeTranslateError */ -static void -raise_translate_exception(PyObject **exceptionObject, - PyObject *unicode, - Py_ssize_t startpos, Py_ssize_t endpos, - const char *reason) -{ - make_translate_exception(exceptionObject, - unicode, startpos, endpos, reason); - if (*exceptionObject != NULL) - PyCodec_StrictErrors(*exceptionObject); -} - -/* error handling callback helper: - build arguments, call the callback and check the arguments, - put the result into newpos and return the replacement string, which - has to be freed by the caller */ -static PyObject * -unicode_translate_call_errorhandler(const char *errors, - PyObject **errorHandler, - const char *reason, - PyObject *unicode, PyObject **exceptionObject, - Py_ssize_t startpos, Py_ssize_t endpos, - Py_ssize_t *newpos) -{ - static char *argparse = "O!n;translating error handler must return (str, int) tuple"; - - Py_ssize_t i_newpos; - PyObject *restuple; - PyObject *resunicode; - - if (*errorHandler == NULL) { - *errorHandler = PyCodec_LookupError(errors); - if (*errorHandler == NULL) - return NULL; - } - - make_translate_exception(exceptionObject, - unicode, startpos, endpos, reason); - if (*exceptionObject == NULL) - return NULL; - - restuple = PyObject_CallFunctionObjArgs( - *errorHandler, *exceptionObject, NULL); - if (restuple == NULL) - return NULL; - if (!PyTuple_Check(restuple)) { - PyErr_SetString(PyExc_TypeError, &argparse[4]); - Py_DECREF(restuple); - return NULL; - } - if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, - &resunicode, &i_newpos)) { - Py_DECREF(restuple); - return NULL; - } - if (i_newpos<0) - *newpos = PyUnicode_GET_LENGTH(unicode)+i_newpos; - else - *newpos = i_newpos; - if (*newpos<0 || *newpos>PyUnicode_GET_LENGTH(unicode)) { - PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos); - Py_DECREF(restuple); - return NULL; - } - Py_INCREF(resunicode); - Py_DECREF(restuple); - return resunicode; -} - -/* Lookup the character ch in the mapping and put the result in result, - which must be decrefed by the caller. - Return 0 on success, -1 on error */ -static int -charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result) -{ - PyObject *w = PyLong_FromLong((long)c); - PyObject *x; - - if (w == NULL) - return -1; - x = PyObject_GetItem(mapping, w); - Py_DECREF(w); - if (x == NULL) { - if (PyErr_ExceptionMatches(PyExc_LookupError)) { - /* No mapping found means: use 1:1 mapping. */ - PyErr_Clear(); - *result = NULL; - return 0; - } else - return -1; - } - else if (x == Py_None) { - *result = x; - return 0; - } - else if (PyLong_Check(x)) { - long value = PyLong_AS_LONG(x); - long max = PyUnicode_GetMax(); - if (value < 0 || value > max) { - PyErr_Format(PyExc_TypeError, - "character mapping must be in range(0x%x)", max+1); - Py_DECREF(x); - return -1; - } - *result = x; - return 0; - } - else if (PyUnicode_Check(x)) { - *result = x; - return 0; - } - else { - /* wrong return value */ - PyErr_SetString(PyExc_TypeError, - "character mapping must return integer, None or str"); - Py_DECREF(x); - return -1; - } -} -/* ensure that *outobj is at least requiredsize characters long, - if not reallocate and adjust various state variables. - Return 0 on success, -1 on error */ -static int -charmaptranslate_makespace(Py_UCS4 **outobj, Py_ssize_t *psize, - Py_ssize_t requiredsize) -{ - Py_ssize_t oldsize = *psize; - if (requiredsize > oldsize) { - /* exponentially overallocate to minimize reallocations */ - if (requiredsize < 2 * oldsize) - requiredsize = 2 * oldsize; - *outobj = PyMem_Realloc(*outobj, requiredsize * sizeof(Py_UCS4)); - if (*outobj == 0) - return -1; - *psize = requiredsize; - } - return 0; -} -/* lookup the character, put the result in the output string and adjust - various state variables. Return a new reference to the object that - was put in the output buffer in *result, or Py_None, if the mapping was - undefined (in which case no character was written). - The called must decref result. - Return 0 on success, -1 on error. */ -static int -charmaptranslate_output(PyObject *input, Py_ssize_t ipos, - PyObject *mapping, Py_UCS4 **output, - Py_ssize_t *osize, Py_ssize_t *opos, - PyObject **res) -{ - Py_UCS4 curinp = PyUnicode_READ_CHAR(input, ipos); - if (charmaptranslate_lookup(curinp, mapping, res)) - return -1; - if (*res==NULL) { - /* not found => default to 1:1 mapping */ - (*output)[(*opos)++] = curinp; - } - else if (*res==Py_None) - ; - else if (PyLong_Check(*res)) { - /* no overflow check, because we know that the space is enough */ - (*output)[(*opos)++] = (Py_UCS4)PyLong_AS_LONG(*res); - } - else if (PyUnicode_Check(*res)) { - Py_ssize_t repsize; - if (PyUnicode_READY(*res) == -1) - return -1; - repsize = PyUnicode_GET_LENGTH(*res); - if (repsize==1) { - /* no overflow check, because we know that the space is enough */ - (*output)[(*opos)++] = PyUnicode_READ_CHAR(*res, 0); - } - else if (repsize!=0) { - /* more than one character */ - Py_ssize_t requiredsize = *opos + - (PyUnicode_GET_LENGTH(input) - ipos) + - repsize - 1; - Py_ssize_t i; - if (charmaptranslate_makespace(output, osize, requiredsize)) - return -1; - for(i = 0; i < repsize; i++) - (*output)[(*opos)++] = PyUnicode_READ_CHAR(*res, i); - } - } - else - return -1; - return 0; -} - -PyObject * -_PyUnicode_TranslateCharmap(PyObject *input, - PyObject *mapping, - const char *errors) -{ - /* input object */ - char *idata; - Py_ssize_t size, i; - int kind; - /* output buffer */ - Py_UCS4 *output = NULL; - Py_ssize_t osize; - PyObject *res; - /* current output position */ - Py_ssize_t opos; - char *reason = "character maps to <undefined>"; - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - /* the following variable is used for caching string comparisons - * -1=not initialized, 0=unknown, 1=strict, 2=replace, - * 3=ignore, 4=xmlcharrefreplace */ - int known_errorHandler = -1; - - if (mapping == NULL) { - PyErr_BadArgument(); - return NULL; - } - - if (PyUnicode_READY(input) == -1) - return NULL; - idata = (char*)PyUnicode_DATA(input); - kind = PyUnicode_KIND(input); - size = PyUnicode_GET_LENGTH(input); - i = 0; - - if (size == 0) { - Py_INCREF(input); - return input; - } - - /* allocate enough for a simple 1:1 translation without - replacements, if we need more, we'll resize */ - osize = size; - output = PyMem_Malloc(osize * sizeof(Py_UCS4)); - opos = 0; - if (output == NULL) { - PyErr_NoMemory(); - goto onError; - } - - while (i<size) { - /* try to encode it */ - PyObject *x = NULL; - if (charmaptranslate_output(input, i, mapping, - &output, &osize, &opos, &x)) { - Py_XDECREF(x); - goto onError; - } - Py_XDECREF(x); - if (x!=Py_None) /* it worked => adjust input pointer */ - ++i; - else { /* untranslatable character */ - PyObject *repunicode = NULL; /* initialize to prevent gcc warning */ - Py_ssize_t repsize; - Py_ssize_t newpos; - Py_ssize_t uni2; - /* startpos for collecting untranslatable chars */ - Py_ssize_t collstart = i; - Py_ssize_t collend = i+1; - Py_ssize_t coll; - - /* find all untranslatable characters */ - while (collend < size) { - if (charmaptranslate_lookup(PyUnicode_READ(kind,idata, collend), mapping, &x)) - goto onError; - Py_XDECREF(x); - if (x!=Py_None) - break; - ++collend; - } - /* cache callback name lookup - * (if not done yet, i.e. it's the first error) */ - if (known_errorHandler==-1) { - if ((errors==NULL) || (!strcmp(errors, "strict"))) - known_errorHandler = 1; - else if (!strcmp(errors, "replace")) - known_errorHandler = 2; - else if (!strcmp(errors, "ignore")) - known_errorHandler = 3; - else if (!strcmp(errors, "xmlcharrefreplace")) - known_errorHandler = 4; - else - known_errorHandler = 0; - } - switch (known_errorHandler) { - case 1: /* strict */ - raise_translate_exception(&exc, input, collstart, - collend, reason); - goto onError; - case 2: /* replace */ - /* No need to check for space, this is a 1:1 replacement */ - for (coll = collstart; coll<collend; coll++) - output[opos++] = '?'; - /* fall through */ - case 3: /* ignore */ - i = collend; - break; - case 4: /* xmlcharrefreplace */ - /* generate replacement (temporarily (mis)uses i) */ - for (i = collstart; i < collend; ++i) { - char buffer[2+29+1+1]; - char *cp; - sprintf(buffer, "&#%d;", PyUnicode_READ(kind, idata, i)); - if (charmaptranslate_makespace(&output, &osize, - opos+strlen(buffer)+(size-collend))) - goto onError; - for (cp = buffer; *cp; ++cp) - output[opos++] = *cp; - } - i = collend; - break; - default: - repunicode = unicode_translate_call_errorhandler(errors, &errorHandler, - reason, input, &exc, - collstart, collend, &newpos); - if (repunicode == NULL) - goto onError; - if (PyUnicode_READY(repunicode) < 0) { - Py_DECREF(repunicode); - goto onError; - } - /* generate replacement */ - repsize = PyUnicode_GET_LENGTH(repunicode); - if (charmaptranslate_makespace(&output, &osize, - opos+repsize+(size-collend))) { - Py_DECREF(repunicode); - goto onError; - } - for (uni2 = 0; repsize-->0; ++uni2) - output[opos++] = PyUnicode_READ_CHAR(repunicode, uni2); - i = newpos; - Py_DECREF(repunicode); - } - } - } - res = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, opos); - if (!res) - goto onError; - PyMem_Free(output); - Py_XDECREF(exc); - Py_XDECREF(errorHandler); - return res; - - onError: - PyMem_Free(output); - Py_XDECREF(exc); - Py_XDECREF(errorHandler); - return NULL; -} - -/* Deprecated. Use PyUnicode_Translate instead. */ -PyObject * -PyUnicode_TranslateCharmap(const Py_UNICODE *p, - Py_ssize_t size, - PyObject *mapping, - const char *errors) -{ - PyObject *unicode = PyUnicode_FromUnicode(p, size); - if (!unicode) - return NULL; - return _PyUnicode_TranslateCharmap(unicode, mapping, errors); -} - -PyObject * -PyUnicode_Translate(PyObject *str, - PyObject *mapping, - const char *errors) -{ - PyObject *result; - - str = PyUnicode_FromObject(str); - if (str == NULL) - goto onError; - result = _PyUnicode_TranslateCharmap(str, mapping, errors); - Py_DECREF(str); - return result; - - onError: - Py_XDECREF(str); - return NULL; -} - -static Py_UCS4 -fix_decimal_and_space_to_ascii(PyObject *self) -{ - /* No need to call PyUnicode_READY(self) because this function is only - called as a callback from fixup() which does it already. */ - const Py_ssize_t len = PyUnicode_GET_LENGTH(self); - const int kind = PyUnicode_KIND(self); - void *data = PyUnicode_DATA(self); - Py_UCS4 maxchar = 0, ch, fixed; - Py_ssize_t i; - - for (i = 0; i < len; ++i) { - ch = PyUnicode_READ(kind, data, i); - fixed = 0; - if (ch > 127) { - if (Py_UNICODE_ISSPACE(ch)) - fixed = ' '; - else { - const int decimal = Py_UNICODE_TODECIMAL(ch); - if (decimal >= 0) - fixed = '0' + decimal; - } - if (fixed != 0) { - if (fixed > maxchar) - maxchar = fixed; - PyUnicode_WRITE(kind, data, i, fixed); - } - else if (ch > maxchar) - maxchar = ch; - } - else if (ch > maxchar) - maxchar = ch; - } - - return maxchar; -} - -PyObject * -_PyUnicode_TransformDecimalAndSpaceToASCII(PyObject *unicode) -{ - if (!PyUnicode_Check(unicode)) { - PyErr_BadInternalCall(); - return NULL; - } - if (PyUnicode_READY(unicode) == -1) - return NULL; - if (PyUnicode_MAX_CHAR_VALUE(unicode) <= 127) { - /* If the string is already ASCII, just return the same string */ - Py_INCREF(unicode); - return unicode; - } - return fixup(unicode, fix_decimal_and_space_to_ascii); -} - -PyObject * -PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, - Py_ssize_t length) -{ - PyObject *decimal; - Py_ssize_t i; - Py_UCS4 maxchar; - enum PyUnicode_Kind kind; - void *data; - - maxchar = 0; - for (i = 0; i < length; i++) { - Py_UNICODE ch = s[i]; - if (ch > 127) { - int decimal = Py_UNICODE_TODECIMAL(ch); - if (decimal >= 0) - ch = '0' + decimal; - } - maxchar = Py_MAX(maxchar, ch); - } - - /* Copy to a new string */ - decimal = PyUnicode_New(length, maxchar); - if (decimal == NULL) - return decimal; - kind = PyUnicode_KIND(decimal); - data = PyUnicode_DATA(decimal); - /* Iterate over code points */ - for (i = 0; i < length; i++) { - Py_UNICODE ch = s[i]; - if (ch > 127) { - int decimal = Py_UNICODE_TODECIMAL(ch); - if (decimal >= 0) - ch = '0' + decimal; - } - PyUnicode_WRITE(kind, data, i, ch); - } - return unicode_result(decimal); -} -/* --- Decimal Encoder ---------------------------------------------------- */ - -int -PyUnicode_EncodeDecimal(Py_UNICODE *s, - Py_ssize_t length, - char *output, - const char *errors) -{ - PyObject *unicode; - Py_ssize_t i; - enum PyUnicode_Kind kind; - void *data; - - if (output == NULL) { - PyErr_BadArgument(); - return -1; - } - - unicode = PyUnicode_FromUnicode(s, length); - if (unicode == NULL) - return -1; - - if (PyUnicode_READY(unicode) < 0) { - Py_DECREF(unicode); - return -1; - } - kind = PyUnicode_KIND(unicode); - data = PyUnicode_DATA(unicode); - - for (i=0; i < length; ) { - PyObject *exc; - Py_UCS4 ch; - int decimal; - Py_ssize_t startpos; - - ch = PyUnicode_READ(kind, data, i); - - if (Py_UNICODE_ISSPACE(ch)) { - *output++ = ' '; - i++; - continue; - } - decimal = Py_UNICODE_TODECIMAL(ch); - if (decimal >= 0) { - *output++ = '0' + decimal; - i++; - continue; - } - if (0 < ch && ch < 256) { - *output++ = (char)ch; - i++; - continue; - } - - startpos = i; - exc = NULL; - raise_encode_exception(&exc, "decimal", unicode, - startpos, startpos+1, - "invalid decimal Unicode string"); - Py_XDECREF(exc); - Py_DECREF(unicode); - return -1; - } - /* 0-terminate the output string */ - *output++ = '\0'; - Py_DECREF(unicode); - return 0; -} - -/* --- Helpers ------------------------------------------------------------ */ - -static Py_ssize_t -any_find_slice(int direction, PyObject* s1, PyObject* s2, - Py_ssize_t start, - Py_ssize_t end) -{ - int kind1, kind2, kind; - void *buf1, *buf2; - Py_ssize_t len1, len2, result; - - kind1 = PyUnicode_KIND(s1); - kind2 = PyUnicode_KIND(s2); - kind = kind1 > kind2 ? kind1 : kind2; - buf1 = PyUnicode_DATA(s1); - buf2 = PyUnicode_DATA(s2); - if (kind1 != kind) - buf1 = _PyUnicode_AsKind(s1, kind); - if (!buf1) - return -2; - if (kind2 != kind) - buf2 = _PyUnicode_AsKind(s2, kind); - if (!buf2) { - if (kind1 != kind) PyMem_Free(buf1); - return -2; - } - len1 = PyUnicode_GET_LENGTH(s1); - len2 = PyUnicode_GET_LENGTH(s2); - - if (direction > 0) { - switch(kind) { - case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2)) - result = asciilib_find_slice(buf1, len1, buf2, len2, start, end); - else - result = ucs1lib_find_slice(buf1, len1, buf2, len2, start, end); - break; - case PyUnicode_2BYTE_KIND: - result = ucs2lib_find_slice(buf1, len1, buf2, len2, start, end); - break; - case PyUnicode_4BYTE_KIND: - result = ucs4lib_find_slice(buf1, len1, buf2, len2, start, end); - break; - default: - assert(0); result = -2; - } - } - else { - switch(kind) { - case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2)) - result = asciilib_rfind_slice(buf1, len1, buf2, len2, start, end); - else - result = ucs1lib_rfind_slice(buf1, len1, buf2, len2, start, end); - break; - case PyUnicode_2BYTE_KIND: - result = ucs2lib_rfind_slice(buf1, len1, buf2, len2, start, end); - break; - case PyUnicode_4BYTE_KIND: - result = ucs4lib_rfind_slice(buf1, len1, buf2, len2, start, end); - break; - default: - assert(0); result = -2; - } - } - - if (kind1 != kind) - PyMem_Free(buf1); - if (kind2 != kind) - PyMem_Free(buf2); - - return result; -} - -Py_ssize_t -_PyUnicode_InsertThousandsGrouping(PyObject *unicode, int kind, void *data, - Py_ssize_t n_buffer, - void *digits, Py_ssize_t n_digits, - Py_ssize_t min_width, - const char *grouping, - const char *thousands_sep) -{ - switch(kind) { - case PyUnicode_1BYTE_KIND: - if (unicode != NULL && PyUnicode_IS_ASCII(unicode)) - return _PyUnicode_ascii_InsertThousandsGrouping( - (Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits, - min_width, grouping, thousands_sep); - else - return _PyUnicode_ucs1_InsertThousandsGrouping( - (Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits, - min_width, grouping, thousands_sep); - case PyUnicode_2BYTE_KIND: - return _PyUnicode_ucs2_InsertThousandsGrouping( - (Py_UCS2*)data, n_buffer, (Py_UCS2*)digits, n_digits, - min_width, grouping, thousands_sep); - case PyUnicode_4BYTE_KIND: - return _PyUnicode_ucs4_InsertThousandsGrouping( - (Py_UCS4*)data, n_buffer, (Py_UCS4*)digits, n_digits, - min_width, grouping, thousands_sep); - } - assert(0); - return -1; -} - - -/* helper macro to fixup start/end slice values */ -#define ADJUST_INDICES(start, end, len) \ - if (end > len) \ - end = len; \ - else if (end < 0) { \ - end += len; \ - if (end < 0) \ - end = 0; \ - } \ - if (start < 0) { \ - start += len; \ - if (start < 0) \ - start = 0; \ - } - -Py_ssize_t -PyUnicode_Count(PyObject *str, - PyObject *substr, - Py_ssize_t start, - Py_ssize_t end) -{ - Py_ssize_t result; - PyObject* str_obj; - PyObject* sub_obj; - int kind1, kind2, kind; - void *buf1 = NULL, *buf2 = NULL; - Py_ssize_t len1, len2; - - str_obj = PyUnicode_FromObject(str); - if (!str_obj || PyUnicode_READY(str_obj) == -1) - return -1; - sub_obj = PyUnicode_FromObject(substr); - if (!sub_obj || PyUnicode_READY(sub_obj) == -1) { - Py_DECREF(str_obj); - return -1; - } - - kind1 = PyUnicode_KIND(str_obj); - kind2 = PyUnicode_KIND(sub_obj); - kind = kind1 > kind2 ? kind1 : kind2; - buf1 = PyUnicode_DATA(str_obj); - if (kind1 != kind) - buf1 = _PyUnicode_AsKind(str_obj, kind); - if (!buf1) - goto onError; - buf2 = PyUnicode_DATA(sub_obj); - if (kind2 != kind) - buf2 = _PyUnicode_AsKind(sub_obj, kind); - if (!buf2) - goto onError; - len1 = PyUnicode_GET_LENGTH(str_obj); - len2 = PyUnicode_GET_LENGTH(sub_obj); - - ADJUST_INDICES(start, end, len1); - switch(kind) { - case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sub_obj)) - result = asciilib_count( - ((Py_UCS1*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - else - result = ucs1lib_count( - ((Py_UCS1*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - case PyUnicode_2BYTE_KIND: - result = ucs2lib_count( - ((Py_UCS2*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - case PyUnicode_4BYTE_KIND: - result = ucs4lib_count( - ((Py_UCS4*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - default: - assert(0); result = 0; - } - - Py_DECREF(sub_obj); - Py_DECREF(str_obj); - - if (kind1 != kind) - PyMem_Free(buf1); - if (kind2 != kind) - PyMem_Free(buf2); - - return result; - onError: - Py_DECREF(sub_obj); - Py_DECREF(str_obj); - if (kind1 != kind && buf1) - PyMem_Free(buf1); - if (kind2 != kind && buf2) - PyMem_Free(buf2); - return -1; -} - -Py_ssize_t -PyUnicode_Find(PyObject *str, - PyObject *sub, - Py_ssize_t start, - Py_ssize_t end, - int direction) -{ - Py_ssize_t result; - - str = PyUnicode_FromObject(str); - if (!str || PyUnicode_READY(str) == -1) - return -2; - sub = PyUnicode_FromObject(sub); - if (!sub || PyUnicode_READY(sub) == -1) { - Py_DECREF(str); - return -2; - } - - result = any_find_slice(direction, - str, sub, start, end - ); - - Py_DECREF(str); - Py_DECREF(sub); - - return result; -} - -Py_ssize_t -PyUnicode_FindChar(PyObject *str, Py_UCS4 ch, - Py_ssize_t start, Py_ssize_t end, - int direction) -{ - int kind; - Py_ssize_t result; - if (PyUnicode_READY(str) == -1) - return -2; - if (start < 0 || end < 0) { - PyErr_SetString(PyExc_IndexError, "string index out of range"); - return -2; - } - if (end > PyUnicode_GET_LENGTH(str)) - end = PyUnicode_GET_LENGTH(str); - kind = PyUnicode_KIND(str); - result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start, - kind, end-start, ch, direction); - if (result == -1) - return -1; - else - return start + result; -} - -static int -tailmatch(PyObject *self, - PyObject *substring, - Py_ssize_t start, - Py_ssize_t end, - int direction) -{ - int kind_self; - int kind_sub; - void *data_self; - void *data_sub; - Py_ssize_t offset; - Py_ssize_t i; - Py_ssize_t end_sub; - - if (PyUnicode_READY(self) == -1 || - PyUnicode_READY(substring) == -1) - return 0; - - if (PyUnicode_GET_LENGTH(substring) == 0) - return 1; - - ADJUST_INDICES(start, end, PyUnicode_GET_LENGTH(self)); - end -= PyUnicode_GET_LENGTH(substring); - if (end < start) - return 0; - - kind_self = PyUnicode_KIND(self); - data_self = PyUnicode_DATA(self); - kind_sub = PyUnicode_KIND(substring); - data_sub = PyUnicode_DATA(substring); - end_sub = PyUnicode_GET_LENGTH(substring) - 1; - - if (direction > 0) - offset = end; - else - offset = start; - - if (PyUnicode_READ(kind_self, data_self, offset) == - PyUnicode_READ(kind_sub, data_sub, 0) && - PyUnicode_READ(kind_self, data_self, offset + end_sub) == - PyUnicode_READ(kind_sub, data_sub, end_sub)) { - /* If both are of the same kind, memcmp is sufficient */ - if (kind_self == kind_sub) { - return ! memcmp((char *)data_self + - (offset * PyUnicode_KIND(substring)), - data_sub, - PyUnicode_GET_LENGTH(substring) * - PyUnicode_KIND(substring)); - } - /* otherwise we have to compare each character by first accesing it */ - else { - /* We do not need to compare 0 and len(substring)-1 because - the if statement above ensured already that they are equal - when we end up here. */ - // TODO: honor direction and do a forward or backwards search - for (i = 1; i < end_sub; ++i) { - if (PyUnicode_READ(kind_self, data_self, offset + i) != - PyUnicode_READ(kind_sub, data_sub, i)) - return 0; - } - return 1; - } - } - - return 0; -} - -Py_ssize_t -PyUnicode_Tailmatch(PyObject *str, - PyObject *substr, - Py_ssize_t start, - Py_ssize_t end, - int direction) -{ - Py_ssize_t result; - - str = PyUnicode_FromObject(str); - if (str == NULL) - return -1; - substr = PyUnicode_FromObject(substr); - if (substr == NULL) { - Py_DECREF(str); - return -1; - } - - result = tailmatch(str, substr, - start, end, direction); - Py_DECREF(str); - Py_DECREF(substr); - return result; -} - -/* Apply fixfct filter to the Unicode object self and return a - reference to the modified object */ - -static PyObject * -fixup(PyObject *self, - Py_UCS4 (*fixfct)(PyObject *s)) -{ - PyObject *u; - Py_UCS4 maxchar_old, maxchar_new = 0; - - u = PyUnicode_Copy(self); - if (u == NULL) - return NULL; - maxchar_old = PyUnicode_MAX_CHAR_VALUE(u); - - /* fix functions return the new maximum character in a string, - if the kind of the resulting unicode object does not change, - everything is fine. Otherwise we need to change the string kind - and re-run the fix function. */ - maxchar_new = fixfct(u); - if (maxchar_new == 0) - /* do nothing, keep maxchar_new at 0 which means no changes. */; - else if (maxchar_new <= 127) - maxchar_new = 127; - else if (maxchar_new <= 255) - maxchar_new = 255; - else if (maxchar_new <= 65535) - maxchar_new = 65535; - else - maxchar_new = MAX_UNICODE; - - if (!maxchar_new && PyUnicode_CheckExact(self)) { - /* fixfct should return TRUE if it modified the buffer. If - FALSE, return a reference to the original buffer instead - (to save space, not time) */ - Py_INCREF(self); - Py_DECREF(u); - return self; - } - else if (maxchar_new == maxchar_old) { - return u; - } - else { - /* In case the maximum character changed, we need to - convert the string to the new category. */ - PyObject *v = PyUnicode_New(PyUnicode_GET_LENGTH(self), maxchar_new); - if (v == NULL) { - Py_DECREF(u); - return NULL; - } - if (maxchar_new > maxchar_old) { - /* If the maxchar increased so that the kind changed, not all - characters are representable anymore and we need to fix the - string again. This only happens in very few cases. */ - copy_characters(v, 0, self, 0, PyUnicode_GET_LENGTH(self)); - maxchar_old = fixfct(v); - assert(maxchar_old > 0 && maxchar_old <= maxchar_new); - } - else { - copy_characters(v, 0, u, 0, PyUnicode_GET_LENGTH(self)); - } - - Py_DECREF(u); - assert(_PyUnicode_CheckConsistency(v, 1)); - return v; - } -} - -static Py_UCS4 -fixupper(PyObject *self) -{ - /* No need to call PyUnicode_READY(self) because this function is only - called as a callback from fixup() which does it already. */ - const Py_ssize_t len = PyUnicode_GET_LENGTH(self); - const int kind = PyUnicode_KIND(self); - void *data = PyUnicode_DATA(self); - int touched = 0; - Py_UCS4 maxchar = 0; - Py_ssize_t i; - - for (i = 0; i < len; ++i) { - const Py_UCS4 ch = PyUnicode_READ(kind, data, i); - const Py_UCS4 up = Py_UNICODE_TOUPPER(ch); - if (up != ch) { - if (up > maxchar) - maxchar = up; - PyUnicode_WRITE(kind, data, i, up); - touched = 1; - } - else if (ch > maxchar) - maxchar = ch; - } - - if (touched) - return maxchar; - else - return 0; -} - -static Py_UCS4 -fixlower(PyObject *self) -{ - /* No need to call PyUnicode_READY(self) because fixup() which does it. */ - const Py_ssize_t len = PyUnicode_GET_LENGTH(self); - const int kind = PyUnicode_KIND(self); - void *data = PyUnicode_DATA(self); - int touched = 0; - Py_UCS4 maxchar = 0; - Py_ssize_t i; - - for(i = 0; i < len; ++i) { - const Py_UCS4 ch = PyUnicode_READ(kind, data, i); - const Py_UCS4 lo = Py_UNICODE_TOLOWER(ch); - if (lo != ch) { - if (lo > maxchar) - maxchar = lo; - PyUnicode_WRITE(kind, data, i, lo); - touched = 1; - } - else if (ch > maxchar) - maxchar = ch; - } - - if (touched) - return maxchar; - else - return 0; -} - -static Py_UCS4 -fixswapcase(PyObject *self) -{ - /* No need to call PyUnicode_READY(self) because fixup() which does it. */ - const Py_ssize_t len = PyUnicode_GET_LENGTH(self); - const int kind = PyUnicode_KIND(self); - void *data = PyUnicode_DATA(self); - int touched = 0; - Py_UCS4 maxchar = 0; - Py_ssize_t i; - - for(i = 0; i < len; ++i) { - const Py_UCS4 ch = PyUnicode_READ(kind, data, i); - Py_UCS4 nu = 0; - - if (Py_UNICODE_ISUPPER(ch)) - nu = Py_UNICODE_TOLOWER(ch); - else if (Py_UNICODE_ISLOWER(ch)) - nu = Py_UNICODE_TOUPPER(ch); - - if (nu != 0) { - if (nu > maxchar) - maxchar = nu; - PyUnicode_WRITE(kind, data, i, nu); - touched = 1; - } - else if (ch > maxchar) - maxchar = ch; - } - - if (touched) - return maxchar; - else - return 0; -} - -static Py_UCS4 -fixcapitalize(PyObject *self) -{ - /* No need to call PyUnicode_READY(self) because fixup() which does it. */ - const Py_ssize_t len = PyUnicode_GET_LENGTH(self); - const int kind = PyUnicode_KIND(self); - void *data = PyUnicode_DATA(self); - int touched = 0; - Py_UCS4 maxchar = 0; - Py_ssize_t i = 0; - Py_UCS4 ch; - - if (len == 0) - return 0; - - ch = PyUnicode_READ(kind, data, i); - if (!Py_UNICODE_ISUPPER(ch)) { - maxchar = Py_UNICODE_TOUPPER(ch); - PyUnicode_WRITE(kind, data, i, maxchar); - touched = 1; - } - ++i; - for(; i < len; ++i) { - ch = PyUnicode_READ(kind, data, i); - if (!Py_UNICODE_ISLOWER(ch)) { - const Py_UCS4 lo = Py_UNICODE_TOLOWER(ch); - if (lo > maxchar) - maxchar = lo; - PyUnicode_WRITE(kind, data, i, lo); - touched = 1; - } - else if (ch > maxchar) - maxchar = ch; - } - - if (touched) - return maxchar; - else - return 0; -} - -static Py_UCS4 -fixtitle(PyObject *self) -{ - /* No need to call PyUnicode_READY(self) because fixup() which does it. */ - const Py_ssize_t len = PyUnicode_GET_LENGTH(self); - const int kind = PyUnicode_KIND(self); - void *data = PyUnicode_DATA(self); - Py_UCS4 maxchar = 0; - Py_ssize_t i = 0; - int previous_is_cased; - - /* Shortcut for single character strings */ - if (len == 1) { - const Py_UCS4 ch = PyUnicode_READ(kind, data, i); - const Py_UCS4 ti = Py_UNICODE_TOTITLE(ch); - if (ti != ch) { - PyUnicode_WRITE(kind, data, i, ti); - return ti; - } - else - return 0; - } - previous_is_cased = 0; - for(; i < len; ++i) { - const Py_UCS4 ch = PyUnicode_READ(kind, data, i); - Py_UCS4 nu; - - if (previous_is_cased) - nu = Py_UNICODE_TOLOWER(ch); - else - nu = Py_UNICODE_TOTITLE(ch); - - if (nu > maxchar) - maxchar = nu; - PyUnicode_WRITE(kind, data, i, nu); - - if (Py_UNICODE_ISLOWER(ch) || - Py_UNICODE_ISUPPER(ch) || - Py_UNICODE_ISTITLE(ch)) - previous_is_cased = 1; - else - previous_is_cased = 0; - } - return maxchar; -} - -PyObject * -PyUnicode_Join(PyObject *separator, PyObject *seq) -{ - PyObject *sep = NULL; - Py_ssize_t seplen; - PyObject *res = NULL; /* the result */ - PyObject *fseq; /* PySequence_Fast(seq) */ - Py_ssize_t seqlen; /* len(fseq) -- number of items in sequence */ - PyObject **items; - PyObject *item; - Py_ssize_t sz, i, res_offset; - Py_UCS4 maxchar; - Py_UCS4 item_maxchar; - int use_memcpy; - unsigned char *res_data = NULL, *sep_data = NULL; - PyObject *last_obj; - unsigned int kind = 0; - - fseq = PySequence_Fast(seq, ""); - if (fseq == NULL) { - return NULL; - } - - /* NOTE: the following code can't call back into Python code, - * so we are sure that fseq won't be mutated. - */ - - seqlen = PySequence_Fast_GET_SIZE(fseq); - /* If empty sequence, return u"". */ - if (seqlen == 0) { - Py_DECREF(fseq); - Py_INCREF(unicode_empty); - res = unicode_empty; - return res; - } - - /* If singleton sequence with an exact Unicode, return that. */ - last_obj = NULL; - items = PySequence_Fast_ITEMS(fseq); - if (seqlen == 1) { - if (PyUnicode_CheckExact(items[0])) { - res = items[0]; - Py_INCREF(res); - Py_DECREF(fseq); - return res; - } - seplen = 0; - maxchar = 0; - } - else { - /* Set up sep and seplen */ - if (separator == NULL) { - /* fall back to a blank space separator */ - sep = PyUnicode_FromOrdinal(' '); - if (!sep) - goto onError; - seplen = 1; - maxchar = 32; - } - else { - if (!PyUnicode_Check(separator)) { - PyErr_Format(PyExc_TypeError, - "separator: expected str instance," - " %.80s found", - Py_TYPE(separator)->tp_name); - goto onError; - } - if (PyUnicode_READY(separator)) - goto onError; - sep = separator; - seplen = PyUnicode_GET_LENGTH(separator); - maxchar = PyUnicode_MAX_CHAR_VALUE(separator); - /* inc refcount to keep this code path symmetric with the - above case of a blank separator */ - Py_INCREF(sep); - } - last_obj = sep; - } - - /* There are at least two things to join, or else we have a subclass - * of str in the sequence. - * Do a pre-pass to figure out the total amount of space we'll - * need (sz), and see whether all argument are strings. - */ - sz = 0; -#ifdef Py_DEBUG - use_memcpy = 0; -#else - use_memcpy = 1; -#endif - for (i = 0; i < seqlen; i++) { - const Py_ssize_t old_sz = sz; - item = items[i]; - if (!PyUnicode_Check(item)) { - PyErr_Format(PyExc_TypeError, - "sequence item %zd: expected str instance," - " %.80s found", - i, Py_TYPE(item)->tp_name); - goto onError; - } - if (PyUnicode_READY(item) == -1) - goto onError; - sz += PyUnicode_GET_LENGTH(item); - item_maxchar = PyUnicode_MAX_CHAR_VALUE(item); - maxchar = Py_MAX(maxchar, item_maxchar); - if (i != 0) - sz += seplen; - if (sz < old_sz || sz > PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, - "join() result is too long for a Python string"); - goto onError; - } - if (use_memcpy && last_obj != NULL) { - if (PyUnicode_KIND(last_obj) != PyUnicode_KIND(item)) - use_memcpy = 0; - } - last_obj = item; - } - - res = PyUnicode_New(sz, maxchar); - if (res == NULL) - goto onError; - - /* Catenate everything. */ -#ifdef Py_DEBUG - use_memcpy = 0; -#else - if (use_memcpy) { - res_data = PyUnicode_1BYTE_DATA(res); - kind = PyUnicode_KIND(res); - if (seplen != 0) - sep_data = PyUnicode_1BYTE_DATA(sep); - } -#endif - for (i = 0, res_offset = 0; i < seqlen; ++i) { - Py_ssize_t itemlen; - item = items[i]; - /* Copy item, and maybe the separator. */ - if (i && seplen != 0) { - if (use_memcpy) { - Py_MEMCPY(res_data, - sep_data, - kind * seplen); - res_data += kind * seplen; - } - else { - copy_characters(res, res_offset, sep, 0, seplen); - res_offset += seplen; - } - } - itemlen = PyUnicode_GET_LENGTH(item); - if (itemlen != 0) { - if (use_memcpy) { - Py_MEMCPY(res_data, - PyUnicode_DATA(item), - kind * itemlen); - res_data += kind * itemlen; - } - else { - copy_characters(res, res_offset, item, 0, itemlen); - res_offset += itemlen; - } - } - } - if (use_memcpy) - assert(res_data == PyUnicode_1BYTE_DATA(res) - + kind * PyUnicode_GET_LENGTH(res)); - else - assert(res_offset == PyUnicode_GET_LENGTH(res)); - - Py_DECREF(fseq); - Py_XDECREF(sep); - assert(_PyUnicode_CheckConsistency(res, 1)); - return res; - - onError: - Py_DECREF(fseq); - Py_XDECREF(sep); - Py_XDECREF(res); - return NULL; -} - -#define FILL(kind, data, value, start, length) \ - do { \ - Py_ssize_t i_ = 0; \ - assert(kind != PyUnicode_WCHAR_KIND); \ - switch ((kind)) { \ - case PyUnicode_1BYTE_KIND: { \ - unsigned char * to_ = (unsigned char *)((data)) + (start); \ - memset(to_, (unsigned char)value, length); \ - break; \ - } \ - case PyUnicode_2BYTE_KIND: { \ - Py_UCS2 * to_ = (Py_UCS2 *)((data)) + (start); \ - for (; i_ < (length); ++i_, ++to_) *to_ = (value); \ - break; \ - } \ - default: { \ - Py_UCS4 * to_ = (Py_UCS4 *)((data)) + (start); \ - for (; i_ < (length); ++i_, ++to_) *to_ = (value); \ - break; \ - } \ - } \ - } while (0) - -static PyObject * -pad(PyObject *self, - Py_ssize_t left, - Py_ssize_t right, - Py_UCS4 fill) -{ - PyObject *u; - Py_UCS4 maxchar; - int kind; - void *data; - - if (left < 0) - left = 0; - if (right < 0) - right = 0; - - if (left == 0 && right == 0 && PyUnicode_CheckExact(self)) { - Py_INCREF(self); - return self; - } - - if (left > PY_SSIZE_T_MAX - _PyUnicode_LENGTH(self) || - right > PY_SSIZE_T_MAX - (left + _PyUnicode_LENGTH(self))) { - PyErr_SetString(PyExc_OverflowError, "padded string is too long"); - return NULL; - } - maxchar = PyUnicode_MAX_CHAR_VALUE(self); - if (fill > maxchar) - maxchar = fill; - u = PyUnicode_New(left + _PyUnicode_LENGTH(self) + right, maxchar); - if (!u) - return NULL; - - kind = PyUnicode_KIND(u); - data = PyUnicode_DATA(u); - if (left) - FILL(kind, data, fill, 0, left); - if (right) - FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right); - copy_characters(u, left, self, 0, _PyUnicode_LENGTH(self)); - assert(_PyUnicode_CheckConsistency(u, 1)); - return u; -} -#undef FILL - -PyObject * -PyUnicode_Splitlines(PyObject *string, int keepends) -{ - PyObject *list; - - string = PyUnicode_FromObject(string); - if (string == NULL || PyUnicode_READY(string) == -1) - return NULL; - - switch(PyUnicode_KIND(string)) { - case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(string)) - list = asciilib_splitlines( - string, PyUnicode_1BYTE_DATA(string), - PyUnicode_GET_LENGTH(string), keepends); - else - list = ucs1lib_splitlines( - string, PyUnicode_1BYTE_DATA(string), - PyUnicode_GET_LENGTH(string), keepends); - break; - case PyUnicode_2BYTE_KIND: - list = ucs2lib_splitlines( - string, PyUnicode_2BYTE_DATA(string), - PyUnicode_GET_LENGTH(string), keepends); - break; - case PyUnicode_4BYTE_KIND: - list = ucs4lib_splitlines( - string, PyUnicode_4BYTE_DATA(string), - PyUnicode_GET_LENGTH(string), keepends); - break; - default: - assert(0); - list = 0; - } - Py_DECREF(string); - return list; -} - -static PyObject * -split(PyObject *self, - PyObject *substring, - Py_ssize_t maxcount) -{ - int kind1, kind2, kind; - void *buf1, *buf2; - Py_ssize_t len1, len2; - PyObject* out; - - if (maxcount < 0) - maxcount = PY_SSIZE_T_MAX; - - if (PyUnicode_READY(self) == -1) - return NULL; - - if (substring == NULL) - switch(PyUnicode_KIND(self)) { - case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(self)) - return asciilib_split_whitespace( - self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount - ); - else - return ucs1lib_split_whitespace( - self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount - ); - case PyUnicode_2BYTE_KIND: - return ucs2lib_split_whitespace( - self, PyUnicode_2BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount - ); - case PyUnicode_4BYTE_KIND: - return ucs4lib_split_whitespace( - self, PyUnicode_4BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount - ); - default: - assert(0); - return NULL; - } - - if (PyUnicode_READY(substring) == -1) - return NULL; - - kind1 = PyUnicode_KIND(self); - kind2 = PyUnicode_KIND(substring); - kind = kind1 > kind2 ? kind1 : kind2; - buf1 = PyUnicode_DATA(self); - buf2 = PyUnicode_DATA(substring); - if (kind1 != kind) - buf1 = _PyUnicode_AsKind(self, kind); - if (!buf1) - return NULL; - if (kind2 != kind) - buf2 = _PyUnicode_AsKind(substring, kind); - if (!buf2) { - if (kind1 != kind) PyMem_Free(buf1); - return NULL; - } - len1 = PyUnicode_GET_LENGTH(self); - len2 = PyUnicode_GET_LENGTH(substring); - - switch(kind) { - case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring)) - out = asciilib_split( - self, buf1, len1, buf2, len2, maxcount); - else - out = ucs1lib_split( - self, buf1, len1, buf2, len2, maxcount); - break; - case PyUnicode_2BYTE_KIND: - out = ucs2lib_split( - self, buf1, len1, buf2, len2, maxcount); - break; - case PyUnicode_4BYTE_KIND: - out = ucs4lib_split( - self, buf1, len1, buf2, len2, maxcount); - break; - default: - out = NULL; - } - if (kind1 != kind) - PyMem_Free(buf1); - if (kind2 != kind) - PyMem_Free(buf2); - return out; -} - -static PyObject * -rsplit(PyObject *self, - PyObject *substring, - Py_ssize_t maxcount) -{ - int kind1, kind2, kind; - void *buf1, *buf2; - Py_ssize_t len1, len2; - PyObject* out; - - if (maxcount < 0) - maxcount = PY_SSIZE_T_MAX; - - if (PyUnicode_READY(self) == -1) - return NULL; - - if (substring == NULL) - switch(PyUnicode_KIND(self)) { - case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(self)) - return asciilib_rsplit_whitespace( - self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount - ); - else - return ucs1lib_rsplit_whitespace( - self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount - ); - case PyUnicode_2BYTE_KIND: - return ucs2lib_rsplit_whitespace( - self, PyUnicode_2BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount - ); - case PyUnicode_4BYTE_KIND: - return ucs4lib_rsplit_whitespace( - self, PyUnicode_4BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount - ); - default: - assert(0); - return NULL; - } - - if (PyUnicode_READY(substring) == -1) - return NULL; - - kind1 = PyUnicode_KIND(self); - kind2 = PyUnicode_KIND(substring); - kind = kind1 > kind2 ? kind1 : kind2; - buf1 = PyUnicode_DATA(self); - buf2 = PyUnicode_DATA(substring); - if (kind1 != kind) - buf1 = _PyUnicode_AsKind(self, kind); - if (!buf1) - return NULL; - if (kind2 != kind) - buf2 = _PyUnicode_AsKind(substring, kind); - if (!buf2) { - if (kind1 != kind) PyMem_Free(buf1); - return NULL; - } - len1 = PyUnicode_GET_LENGTH(self); - len2 = PyUnicode_GET_LENGTH(substring); - - switch(kind) { - case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring)) - out = asciilib_rsplit( - self, buf1, len1, buf2, len2, maxcount); - else - out = ucs1lib_rsplit( - self, buf1, len1, buf2, len2, maxcount); - break; - case PyUnicode_2BYTE_KIND: - out = ucs2lib_rsplit( - self, buf1, len1, buf2, len2, maxcount); - break; - case PyUnicode_4BYTE_KIND: - out = ucs4lib_rsplit( - self, buf1, len1, buf2, len2, maxcount); - break; - default: - out = NULL; - } - if (kind1 != kind) - PyMem_Free(buf1); - if (kind2 != kind) - PyMem_Free(buf2); - return out; -} - -static Py_ssize_t -anylib_find(int kind, PyObject *str1, void *buf1, Py_ssize_t len1, - PyObject *str2, void *buf2, Py_ssize_t len2, Py_ssize_t offset) -{ - switch(kind) { - case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(str1) && PyUnicode_IS_ASCII(str2)) - return asciilib_find(buf1, len1, buf2, len2, offset); - else - return ucs1lib_find(buf1, len1, buf2, len2, offset); - case PyUnicode_2BYTE_KIND: - return ucs2lib_find(buf1, len1, buf2, len2, offset); - case PyUnicode_4BYTE_KIND: - return ucs4lib_find(buf1, len1, buf2, len2, offset); - } - assert(0); - return -1; -} - -static Py_ssize_t -anylib_count(int kind, PyObject *sstr, void* sbuf, Py_ssize_t slen, - PyObject *str1, void *buf1, Py_ssize_t len1, Py_ssize_t maxcount) -{ - switch(kind) { - case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(sstr) && PyUnicode_IS_ASCII(str1)) - return asciilib_count(sbuf, slen, buf1, len1, maxcount); - else - return ucs1lib_count(sbuf, slen, buf1, len1, maxcount); - case PyUnicode_2BYTE_KIND: - return ucs2lib_count(sbuf, slen, buf1, len1, maxcount); - case PyUnicode_4BYTE_KIND: - return ucs4lib_count(sbuf, slen, buf1, len1, maxcount); - } - assert(0); - return 0; -} - -static PyObject * -replace(PyObject *self, PyObject *str1, - PyObject *str2, Py_ssize_t maxcount) -{ - PyObject *u; - char *sbuf = PyUnicode_DATA(self); - char *buf1 = PyUnicode_DATA(str1); - char *buf2 = PyUnicode_DATA(str2); - int srelease = 0, release1 = 0, release2 = 0; - int skind = PyUnicode_KIND(self); - int kind1 = PyUnicode_KIND(str1); - int kind2 = PyUnicode_KIND(str2); - Py_ssize_t slen = PyUnicode_GET_LENGTH(self); - Py_ssize_t len1 = PyUnicode_GET_LENGTH(str1); - Py_ssize_t len2 = PyUnicode_GET_LENGTH(str2); - int mayshrink; - Py_UCS4 maxchar, maxchar_str2; - - if (maxcount < 0) - maxcount = PY_SSIZE_T_MAX; - else if (maxcount == 0 || slen == 0) - goto nothing; - - if (str1 == str2) - goto nothing; - if (skind < kind1) - /* substring too wide to be present */ - goto nothing; - - maxchar = PyUnicode_MAX_CHAR_VALUE(self); - maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2); - /* Replacing str1 with str2 may cause a maxchar reduction in the - result string. */ - mayshrink = (maxchar_str2 < maxchar); - maxchar = Py_MAX(maxchar, maxchar_str2); - - if (len1 == len2) { - Py_ssize_t i; - /* same length */ - if (len1 == 0) - goto nothing; - if (len1 == 1) { - /* replace characters */ - Py_UCS4 u1, u2; - int rkind; - u1 = PyUnicode_READ_CHAR(str1, 0); - if (findchar(sbuf, PyUnicode_KIND(self), - slen, u1, 1) < 0) - goto nothing; - u2 = PyUnicode_READ_CHAR(str2, 0); - u = PyUnicode_New(slen, maxchar); - if (!u) - goto error; - copy_characters(u, 0, self, 0, slen); - rkind = PyUnicode_KIND(u); - for (i = 0; i < PyUnicode_GET_LENGTH(u); i++) - if (PyUnicode_READ(rkind, PyUnicode_DATA(u), i) == u1) { - if (--maxcount < 0) - break; - PyUnicode_WRITE(rkind, PyUnicode_DATA(u), i, u2); - } - } - else { - int rkind = skind; - char *res; - - if (kind1 < rkind) { - /* widen substring */ - buf1 = _PyUnicode_AsKind(str1, rkind); - if (!buf1) goto error; - release1 = 1; - } - i = anylib_find(rkind, self, sbuf, slen, str1, buf1, len1, 0); - if (i < 0) - goto nothing; - if (rkind > kind2) { - /* widen replacement */ - buf2 = _PyUnicode_AsKind(str2, rkind); - if (!buf2) goto error; - release2 = 1; - } - else if (rkind < kind2) { - /* widen self and buf1 */ - rkind = kind2; - if (release1) PyMem_Free(buf1); - sbuf = _PyUnicode_AsKind(self, rkind); - if (!sbuf) goto error; - srelease = 1; - buf1 = _PyUnicode_AsKind(str1, rkind); - if (!buf1) goto error; - release1 = 1; - } - u = PyUnicode_New(slen, maxchar); - if (!u) - goto error; - assert(PyUnicode_KIND(u) == rkind); - res = PyUnicode_DATA(u); - - memcpy(res, sbuf, rkind * slen); - /* change everything in-place, starting with this one */ - memcpy(res + rkind * i, - buf2, - rkind * len2); - i += len1; - - while ( --maxcount > 0) { - i = anylib_find(rkind, self, - sbuf+rkind*i, slen-i, - str1, buf1, len1, i); - if (i == -1) - break; - memcpy(res + rkind * i, - buf2, - rkind * len2); - i += len1; - } - } - } - else { - Py_ssize_t n, i, j, ires; - Py_ssize_t product, new_size; - int rkind = skind; - char *res; - - if (kind1 < rkind) { - /* widen substring */ - buf1 = _PyUnicode_AsKind(str1, rkind); - if (!buf1) goto error; - release1 = 1; - } - n = anylib_count(rkind, self, sbuf, slen, str1, buf1, len1, maxcount); - if (n == 0) - goto nothing; - if (kind2 < rkind) { - /* widen replacement */ - buf2 = _PyUnicode_AsKind(str2, rkind); - if (!buf2) goto error; - release2 = 1; - } - else if (kind2 > rkind) { - /* widen self and buf1 */ - rkind = kind2; - sbuf = _PyUnicode_AsKind(self, rkind); - if (!sbuf) goto error; - srelease = 1; - if (release1) PyMem_Free(buf1); - buf1 = _PyUnicode_AsKind(str1, rkind); - if (!buf1) goto error; - release1 = 1; - } - /* new_size = PyUnicode_GET_LENGTH(self) + n * (PyUnicode_GET_LENGTH(str2) - - PyUnicode_GET_LENGTH(str1))); */ - product = n * (len2-len1); - if ((product / (len2-len1)) != n) { - PyErr_SetString(PyExc_OverflowError, - "replace string is too long"); - goto error; - } - new_size = slen + product; - if (new_size == 0) { - Py_INCREF(unicode_empty); - u = unicode_empty; - goto done; - } - if (new_size < 0 || new_size > (PY_SSIZE_T_MAX >> (rkind-1))) { - PyErr_SetString(PyExc_OverflowError, - "replace string is too long"); - goto error; - } - u = PyUnicode_New(new_size, maxchar); - if (!u) - goto error; - assert(PyUnicode_KIND(u) == rkind); - res = PyUnicode_DATA(u); - ires = i = 0; - if (len1 > 0) { - while (n-- > 0) { - /* look for next match */ - j = anylib_find(rkind, self, - sbuf + rkind * i, slen-i, - str1, buf1, len1, i); - if (j == -1) - break; - else if (j > i) { - /* copy unchanged part [i:j] */ - memcpy(res + rkind * ires, - sbuf + rkind * i, - rkind * (j-i)); - ires += j - i; - } - /* copy substitution string */ - if (len2 > 0) { - memcpy(res + rkind * ires, - buf2, - rkind * len2); - ires += len2; - } - i = j + len1; - } - if (i < slen) - /* copy tail [i:] */ - memcpy(res + rkind * ires, - sbuf + rkind * i, - rkind * (slen-i)); - } - else { - /* interleave */ - while (n > 0) { - memcpy(res + rkind * ires, - buf2, - rkind * len2); - ires += len2; - if (--n <= 0) - break; - memcpy(res + rkind * ires, - sbuf + rkind * i, - rkind); - ires++; - i++; - } - memcpy(res + rkind * ires, - sbuf + rkind * i, - rkind * (slen-i)); - } - } - - if (mayshrink) { - unicode_adjust_maxchar(&u); - if (u == NULL) - goto error; - } - - done: - if (srelease) - PyMem_FREE(sbuf); - if (release1) - PyMem_FREE(buf1); - if (release2) - PyMem_FREE(buf2); - assert(_PyUnicode_CheckConsistency(u, 1)); - return u; - - nothing: - /* nothing to replace; return original string (when possible) */ - if (srelease) - PyMem_FREE(sbuf); - if (release1) - PyMem_FREE(buf1); - if (release2) - PyMem_FREE(buf2); - if (PyUnicode_CheckExact(self)) { - Py_INCREF(self); - return self; - } - return PyUnicode_Copy(self); - error: - if (srelease && sbuf) - PyMem_FREE(sbuf); - if (release1 && buf1) - PyMem_FREE(buf1); - if (release2 && buf2) - PyMem_FREE(buf2); - return NULL; -} - -/* --- Unicode Object Methods --------------------------------------------- */ - -PyDoc_STRVAR(title__doc__, - "S.title() -> str\n\ -\n\ -Return a titlecased version of S, i.e. words start with title case\n\ -characters, all remaining cased characters have lower case."); - -static PyObject* -unicode_title(PyObject *self) -{ - return fixup(self, fixtitle); -} - -PyDoc_STRVAR(capitalize__doc__, - "S.capitalize() -> str\n\ -\n\ -Return a capitalized version of S, i.e. make the first character\n\ -have upper case and the rest lower case."); - -static PyObject* -unicode_capitalize(PyObject *self) -{ - return fixup(self, fixcapitalize); -} - -#if 0 -PyDoc_STRVAR(capwords__doc__, - "S.capwords() -> str\n\ -\n\ -Apply .capitalize() to all words in S and return the result with\n\ -normalized whitespace (all whitespace strings are replaced by ' ')."); - -static PyObject* -unicode_capwords(PyObject *self) -{ - PyObject *list; - PyObject *item; - Py_ssize_t i; - - /* Split into words */ - list = split(self, NULL, -1); - if (!list) - return NULL; - - /* Capitalize each word */ - for (i = 0; i < PyList_GET_SIZE(list); i++) { - item = fixup(PyList_GET_ITEM(list, i), - fixcapitalize); - if (item == NULL) - goto onError; - Py_DECREF(PyList_GET_ITEM(list, i)); - PyList_SET_ITEM(list, i, item); - } - - /* Join the words to form a new string */ - item = PyUnicode_Join(NULL, list); - - onError: - Py_DECREF(list); - return item; -} -#endif - -/* Argument converter. Coerces to a single unicode character */ - -static int -convert_uc(PyObject *obj, void *addr) -{ - Py_UCS4 *fillcharloc = (Py_UCS4 *)addr; - PyObject *uniobj; - - uniobj = PyUnicode_FromObject(obj); - if (uniobj == NULL) { - PyErr_SetString(PyExc_TypeError, - "The fill character cannot be converted to Unicode"); - return 0; - } - if (PyUnicode_GET_LENGTH(uniobj) != 1) { - PyErr_SetString(PyExc_TypeError, - "The fill character must be exactly one character long"); - Py_DECREF(uniobj); - return 0; - } - *fillcharloc = PyUnicode_READ_CHAR(uniobj, 0); - Py_DECREF(uniobj); - return 1; -} - -PyDoc_STRVAR(center__doc__, - "S.center(width[, fillchar]) -> str\n\ -\n\ -Return S centered in a string of length width. Padding is\n\ -done using the specified fill character (default is a space)"); - -static PyObject * -unicode_center(PyObject *self, PyObject *args) -{ - Py_ssize_t marg, left; - Py_ssize_t width; - Py_UCS4 fillchar = ' '; - - if (!PyArg_ParseTuple(args, "n|O&:center", &width, convert_uc, &fillchar)) - return NULL; - - if (PyUnicode_READY(self) == -1) - return NULL; - - if (_PyUnicode_LENGTH(self) >= width && PyUnicode_CheckExact(self)) { - Py_INCREF(self); - return self; - } - - marg = width - _PyUnicode_LENGTH(self); - left = marg / 2 + (marg & width & 1); - - return pad(self, left, marg - left, fillchar); -} - -/* This function assumes that str1 and str2 are readied by the caller. */ - -static int -unicode_compare(PyObject *str1, PyObject *str2) -{ - int kind1, kind2; - void *data1, *data2; - Py_ssize_t len1, len2, i; - - kind1 = PyUnicode_KIND(str1); - kind2 = PyUnicode_KIND(str2); - data1 = PyUnicode_DATA(str1); - data2 = PyUnicode_DATA(str2); - len1 = PyUnicode_GET_LENGTH(str1); - len2 = PyUnicode_GET_LENGTH(str2); - - for (i = 0; i < len1 && i < len2; ++i) { - Py_UCS4 c1, c2; - c1 = PyUnicode_READ(kind1, data1, i); - c2 = PyUnicode_READ(kind2, data2, i); - - if (c1 != c2) - return (c1 < c2) ? -1 : 1; - } - - return (len1 < len2) ? -1 : (len1 != len2); -} - -int -PyUnicode_Compare(PyObject *left, PyObject *right) -{ - if (PyUnicode_Check(left) && PyUnicode_Check(right)) { - if (PyUnicode_READY(left) == -1 || - PyUnicode_READY(right) == -1) - return -1; - return unicode_compare(left, right); - } - PyErr_Format(PyExc_TypeError, - "Can't compare %.100s and %.100s", - left->ob_type->tp_name, - right->ob_type->tp_name); - return -1; -} - -int -PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) -{ - Py_ssize_t i; - int kind; - void *data; - Py_UCS4 chr; - - assert(_PyUnicode_CHECK(uni)); - if (PyUnicode_READY(uni) == -1) - return -1; - kind = PyUnicode_KIND(uni); - data = PyUnicode_DATA(uni); - /* Compare Unicode string and source character set string */ - for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++) - if (chr != str[i]) - return (chr < (unsigned char)(str[i])) ? -1 : 1; - /* This check keeps Python strings that end in '\0' from comparing equal - to C strings identical up to that point. */ - if (PyUnicode_GET_LENGTH(uni) != i || chr) - return 1; /* uni is longer */ - if (str[i]) - return -1; /* str is longer */ - return 0; -} - - -#define TEST_COND(cond) \ - ((cond) ? Py_True : Py_False) - -PyObject * -PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) -{ - int result; - - if (PyUnicode_Check(left) && PyUnicode_Check(right)) { - PyObject *v; - if (PyUnicode_READY(left) == -1 || - PyUnicode_READY(right) == -1) - return NULL; - if (PyUnicode_GET_LENGTH(left) != PyUnicode_GET_LENGTH(right) || - PyUnicode_KIND(left) != PyUnicode_KIND(right)) { - if (op == Py_EQ) { - Py_INCREF(Py_False); - return Py_False; - } - if (op == Py_NE) { - Py_INCREF(Py_True); - return Py_True; - } - } - if (left == right) - result = 0; - else - result = unicode_compare(left, right); - - /* Convert the return value to a Boolean */ - switch (op) { - case Py_EQ: - v = TEST_COND(result == 0); - break; - case Py_NE: - v = TEST_COND(result != 0); - break; - case Py_LE: - v = TEST_COND(result <= 0); - break; - case Py_GE: - v = TEST_COND(result >= 0); - break; - case Py_LT: - v = TEST_COND(result == -1); - break; - case Py_GT: - v = TEST_COND(result == 1); - break; - default: - PyErr_BadArgument(); - return NULL; - } - Py_INCREF(v); - return v; - } - - Py_RETURN_NOTIMPLEMENTED; -} - -int -PyUnicode_Contains(PyObject *container, PyObject *element) -{ - PyObject *str, *sub; - int kind1, kind2, kind; - void *buf1, *buf2; - Py_ssize_t len1, len2; - int result; - - /* Coerce the two arguments */ - sub = PyUnicode_FromObject(element); - if (!sub) { - PyErr_Format(PyExc_TypeError, - "'in <string>' requires string as left operand, not %s", - element->ob_type->tp_name); - return -1; - } - if (PyUnicode_READY(sub) == -1) - return -1; - - str = PyUnicode_FromObject(container); - if (!str || PyUnicode_READY(str) == -1) { - Py_DECREF(sub); - return -1; - } - - kind1 = PyUnicode_KIND(str); - kind2 = PyUnicode_KIND(sub); - kind = kind1 > kind2 ? kind1 : kind2; - buf1 = PyUnicode_DATA(str); - buf2 = PyUnicode_DATA(sub); - if (kind1 != kind) - buf1 = _PyUnicode_AsKind(str, kind); - if (!buf1) { - Py_DECREF(sub); - return -1; - } - if (kind2 != kind) - buf2 = _PyUnicode_AsKind(sub, kind); - if (!buf2) { - Py_DECREF(sub); - if (kind1 != kind) PyMem_Free(buf1); - return -1; - } - len1 = PyUnicode_GET_LENGTH(str); - len2 = PyUnicode_GET_LENGTH(sub); - - switch(kind) { - case PyUnicode_1BYTE_KIND: - result = ucs1lib_find(buf1, len1, buf2, len2, 0) != -1; - break; - case PyUnicode_2BYTE_KIND: - result = ucs2lib_find(buf1, len1, buf2, len2, 0) != -1; - break; - case PyUnicode_4BYTE_KIND: - result = ucs4lib_find(buf1, len1, buf2, len2, 0) != -1; - break; - default: - result = -1; - assert(0); - } - - Py_DECREF(str); - Py_DECREF(sub); - - if (kind1 != kind) - PyMem_Free(buf1); - if (kind2 != kind) - PyMem_Free(buf2); - - return result; -} - -/* Concat to string or Unicode object giving a new Unicode object. */ - -PyObject * -PyUnicode_Concat(PyObject *left, PyObject *right) -{ - PyObject *u = NULL, *v = NULL, *w; - Py_UCS4 maxchar, maxchar2; - - /* Coerce the two arguments */ - u = PyUnicode_FromObject(left); - if (u == NULL) - goto onError; - v = PyUnicode_FromObject(right); - if (v == NULL) - goto onError; - - /* Shortcuts */ - if (v == unicode_empty) { - Py_DECREF(v); - return u; - } - if (u == unicode_empty) { - Py_DECREF(u); - return v; - } - - maxchar = PyUnicode_MAX_CHAR_VALUE(u); - maxchar2 = PyUnicode_MAX_CHAR_VALUE(v); - maxchar = Py_MAX(maxchar, maxchar2); - - /* Concat the two Unicode strings */ - w = PyUnicode_New( - PyUnicode_GET_LENGTH(u) + PyUnicode_GET_LENGTH(v), - maxchar); - if (w == NULL) - goto onError; - copy_characters(w, 0, u, 0, PyUnicode_GET_LENGTH(u)); - copy_characters(w, PyUnicode_GET_LENGTH(u), v, 0, PyUnicode_GET_LENGTH(v)); - Py_DECREF(u); - Py_DECREF(v); - assert(_PyUnicode_CheckConsistency(w, 1)); - return w; - - onError: - Py_XDECREF(u); - Py_XDECREF(v); - return NULL; -} - -static void -unicode_append_inplace(PyObject **p_left, PyObject *right) -{ - Py_ssize_t left_len, right_len, new_len; - - assert(PyUnicode_IS_READY(*p_left)); - assert(PyUnicode_IS_READY(right)); - - left_len = PyUnicode_GET_LENGTH(*p_left); - right_len = PyUnicode_GET_LENGTH(right); - if (left_len > PY_SSIZE_T_MAX - right_len) { - PyErr_SetString(PyExc_OverflowError, - "strings are too large to concat"); - goto error; - } - new_len = left_len + right_len; - - /* Now we own the last reference to 'left', so we can resize it - * in-place. - */ - if (unicode_resize(p_left, new_len) != 0) { - /* XXX if _PyUnicode_Resize() fails, 'left' has been - * deallocated so it cannot be put back into - * 'variable'. The MemoryError is raised when there - * is no value in 'variable', which might (very - * remotely) be a cause of incompatibilities. - */ - goto error; - } - /* copy 'right' into the newly allocated area of 'left' */ - copy_characters(*p_left, left_len, right, 0, right_len); - _PyUnicode_DIRTY(*p_left); - return; - -error: - Py_DECREF(*p_left); - *p_left = NULL; -} - -void -PyUnicode_Append(PyObject **p_left, PyObject *right) -{ - PyObject *left, *res; - - if (p_left == NULL) { - if (!PyErr_Occurred()) - PyErr_BadInternalCall(); - return; - } - left = *p_left; - if (right == NULL || !PyUnicode_Check(left)) { - if (!PyErr_Occurred()) - PyErr_BadInternalCall(); - goto error; - } - - if (PyUnicode_READY(left)) - goto error; - if (PyUnicode_READY(right)) - goto error; - - if (PyUnicode_CheckExact(left) && left != unicode_empty - && PyUnicode_CheckExact(right) && right != unicode_empty - && unicode_resizable(left) - && (_PyUnicode_KIND(right) <= _PyUnicode_KIND(left) - || _PyUnicode_WSTR(left) != NULL)) - { - /* Don't resize for ascii += latin1. Convert ascii to latin1 requires - to change the structure size, but characters are stored just after - the structure, and so it requires to move all characters which is - not so different than duplicating the string. */ - if (!(PyUnicode_IS_ASCII(left) && !PyUnicode_IS_ASCII(right))) - { - unicode_append_inplace(p_left, right); - assert(p_left == NULL || _PyUnicode_CheckConsistency(*p_left, 1)); - return; - } - } - - res = PyUnicode_Concat(left, right); - if (res == NULL) - goto error; - Py_DECREF(left); - *p_left = res; - return; - -error: - Py_DECREF(*p_left); - *p_left = NULL; -} - -void -PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right) -{ - PyUnicode_Append(pleft, right); - Py_XDECREF(right); -} - -PyDoc_STRVAR(count__doc__, - "S.count(sub[, start[, end]]) -> int\n\ -\n\ -Return the number of non-overlapping occurrences of substring sub in\n\ -string S[start:end]. Optional arguments start and end are\n\ -interpreted as in slice notation."); - -static PyObject * -unicode_count(PyObject *self, PyObject *args) -{ - PyObject *substring; - Py_ssize_t start = 0; - Py_ssize_t end = PY_SSIZE_T_MAX; - PyObject *result; - int kind1, kind2, kind; - void *buf1, *buf2; - Py_ssize_t len1, len2, iresult; - - if (!stringlib_parse_args_finds_unicode("count", args, &substring, - &start, &end)) - return NULL; - - kind1 = PyUnicode_KIND(self); - kind2 = PyUnicode_KIND(substring); - kind = kind1 > kind2 ? kind1 : kind2; - buf1 = PyUnicode_DATA(self); - buf2 = PyUnicode_DATA(substring); - if (kind1 != kind) - buf1 = _PyUnicode_AsKind(self, kind); - if (!buf1) { - Py_DECREF(substring); - return NULL; - } - if (kind2 != kind) - buf2 = _PyUnicode_AsKind(substring, kind); - if (!buf2) { - Py_DECREF(substring); - if (kind1 != kind) PyMem_Free(buf1); - return NULL; - } - len1 = PyUnicode_GET_LENGTH(self); - len2 = PyUnicode_GET_LENGTH(substring); - - ADJUST_INDICES(start, end, len1); - switch(kind) { - case PyUnicode_1BYTE_KIND: - iresult = ucs1lib_count( - ((Py_UCS1*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - case PyUnicode_2BYTE_KIND: - iresult = ucs2lib_count( - ((Py_UCS2*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - case PyUnicode_4BYTE_KIND: - iresult = ucs4lib_count( - ((Py_UCS4*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - default: - assert(0); iresult = 0; - } - - result = PyLong_FromSsize_t(iresult); - - if (kind1 != kind) - PyMem_Free(buf1); - if (kind2 != kind) - PyMem_Free(buf2); - - Py_DECREF(substring); - - return result; -} - -PyDoc_STRVAR(encode__doc__, - "S.encode(encoding='utf-8', errors='strict') -> bytes\n\ -\n\ -Encode S using the codec registered for encoding. Default encoding\n\ -is 'utf-8'. errors may be given to set a different error\n\ -handling scheme. Default is 'strict' meaning that encoding errors raise\n\ -a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\ -'xmlcharrefreplace' as well as any other name registered with\n\ -codecs.register_error that can handle UnicodeEncodeErrors."); - -static PyObject * -unicode_encode(PyObject *self, PyObject *args, PyObject *kwargs) -{ - static char *kwlist[] = {"encoding", "errors", 0}; - char *encoding = NULL; - char *errors = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode", - kwlist, &encoding, &errors)) - return NULL; - return PyUnicode_AsEncodedString(self, encoding, errors); -} - -PyDoc_STRVAR(expandtabs__doc__, - "S.expandtabs([tabsize]) -> str\n\ -\n\ -Return a copy of S where all tab characters are expanded using spaces.\n\ -If tabsize is not given, a tab size of 8 characters is assumed."); - -static PyObject* -unicode_expandtabs(PyObject *self, PyObject *args) -{ - Py_ssize_t i, j, line_pos, src_len, incr; - Py_UCS4 ch; - PyObject *u; - void *src_data, *dest_data; - int tabsize = 8; - int kind; - int found; - - if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) - return NULL; - - if (PyUnicode_READY(self) == -1) - return NULL; - - /* First pass: determine size of output string */ - src_len = PyUnicode_GET_LENGTH(self); - i = j = line_pos = 0; - kind = PyUnicode_KIND(self); - src_data = PyUnicode_DATA(self); - found = 0; - for (; i < src_len; i++) { - ch = PyUnicode_READ(kind, src_data, i); - if (ch == '\t') { - found = 1; - if (tabsize > 0) { - incr = tabsize - (line_pos % tabsize); /* cannot overflow */ - if (j > PY_SSIZE_T_MAX - incr) - goto overflow; - line_pos += incr; - j += incr; - } - } - else { - if (j > PY_SSIZE_T_MAX - 1) - goto overflow; - line_pos++; - j++; - if (ch == '\n' || ch == '\r') - line_pos = 0; - } - } - if (!found && PyUnicode_CheckExact(self)) { - Py_INCREF(self); - return self; - } - - /* Second pass: create output string and fill it */ - u = PyUnicode_New(j, PyUnicode_MAX_CHAR_VALUE(self)); - if (!u) - return NULL; - dest_data = PyUnicode_DATA(u); - - i = j = line_pos = 0; - - for (; i < src_len; i++) { - ch = PyUnicode_READ(kind, src_data, i); - if (ch == '\t') { - if (tabsize > 0) { - incr = tabsize - (line_pos % tabsize); - line_pos += incr; - while (incr--) { - PyUnicode_WRITE(kind, dest_data, j, ' '); - j++; - } - } - } - else { - line_pos++; - PyUnicode_WRITE(kind, dest_data, j, ch); - j++; - if (ch == '\n' || ch == '\r') - line_pos = 0; - } - } - assert (j == PyUnicode_GET_LENGTH(u)); - return unicode_result(u); - - overflow: - PyErr_SetString(PyExc_OverflowError, "new string is too long"); - return NULL; -} - -PyDoc_STRVAR(find__doc__, - "S.find(sub[, start[, end]]) -> int\n\ -\n\ -Return the lowest index in S where substring sub is found,\n\ -such that sub is contained within S[start:end]. Optional\n\ -arguments start and end are interpreted as in slice notation.\n\ -\n\ -Return -1 on failure."); - -static PyObject * -unicode_find(PyObject *self, PyObject *args) -{ - PyObject *substring; - Py_ssize_t start; - Py_ssize_t end; - Py_ssize_t result; - - if (!stringlib_parse_args_finds_unicode("find", args, &substring, - &start, &end)) - return NULL; - - if (PyUnicode_READY(self) == -1) - return NULL; - if (PyUnicode_READY(substring) == -1) - return NULL; - - result = any_find_slice(1, self, substring, start, end); - - Py_DECREF(substring); - - if (result == -2) - return NULL; - - return PyLong_FromSsize_t(result); -} - -static PyObject * -unicode_getitem(PyObject *self, Py_ssize_t index) -{ - Py_UCS4 ch = PyUnicode_ReadChar(self, index); - if (ch == (Py_UCS4)-1) - return NULL; - return PyUnicode_FromOrdinal(ch); -} - -/* Believe it or not, this produces the same value for ASCII strings - as bytes_hash(). */ -static Py_hash_t -unicode_hash(PyObject *self) -{ - Py_ssize_t len; - Py_uhash_t x; - - if (_PyUnicode_HASH(self) != -1) - return _PyUnicode_HASH(self); - if (PyUnicode_READY(self) == -1) - return -1; - len = PyUnicode_GET_LENGTH(self); - - /* The hash function as a macro, gets expanded three times below. */ -#define HASH(P) \ - x = (Py_uhash_t)*P << 7; \ - while (--len >= 0) \ - x = (1000003*x) ^ (Py_uhash_t)*P++; - - switch (PyUnicode_KIND(self)) { - case PyUnicode_1BYTE_KIND: { - const unsigned char *c = PyUnicode_1BYTE_DATA(self); - HASH(c); - break; - } - case PyUnicode_2BYTE_KIND: { - const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self); - HASH(s); - break; - } - default: { - Py_UCS4 *l; - assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND && - "Impossible switch case in unicode_hash"); - l = PyUnicode_4BYTE_DATA(self); - HASH(l); - break; - } - } - x ^= (Py_uhash_t)PyUnicode_GET_LENGTH(self); - - if (x == -1) - x = -2; - _PyUnicode_HASH(self) = x; - return x; -} -#undef HASH - -PyDoc_STRVAR(index__doc__, - "S.index(sub[, start[, end]]) -> int\n\ -\n\ -Like S.find() but raise ValueError when the substring is not found."); - -static PyObject * -unicode_index(PyObject *self, PyObject *args) -{ - Py_ssize_t result; - PyObject *substring; - Py_ssize_t start; - Py_ssize_t end; - - if (!stringlib_parse_args_finds_unicode("index", args, &substring, - &start, &end)) - return NULL; - - if (PyUnicode_READY(self) == -1) - return NULL; - if (PyUnicode_READY(substring) == -1) - return NULL; - - result = any_find_slice(1, self, substring, start, end); - - Py_DECREF(substring); - - if (result == -2) - return NULL; - - if (result < 0) { - PyErr_SetString(PyExc_ValueError, "substring not found"); - return NULL; - } - - return PyLong_FromSsize_t(result); -} - -PyDoc_STRVAR(islower__doc__, - "S.islower() -> bool\n\ -\n\ -Return True if all cased characters in S are lowercase and there is\n\ -at least one cased character in S, False otherwise."); - -static PyObject* -unicode_islower(PyObject *self) -{ - Py_ssize_t i, length; - int kind; - void *data; - int cased; - - if (PyUnicode_READY(self) == -1) - return NULL; - length = PyUnicode_GET_LENGTH(self); - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); - - /* Shortcut for single character strings */ - if (length == 1) - return PyBool_FromLong( - Py_UNICODE_ISLOWER(PyUnicode_READ(kind, data, 0))); - - /* Special case for empty strings */ - if (length == 0) - return PyBool_FromLong(0); - - cased = 0; - for (i = 0; i < length; i++) { - const Py_UCS4 ch = PyUnicode_READ(kind, data, i); - - if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch)) - return PyBool_FromLong(0); - else if (!cased && Py_UNICODE_ISLOWER(ch)) - cased = 1; - } - return PyBool_FromLong(cased); -} - -PyDoc_STRVAR(isupper__doc__, - "S.isupper() -> bool\n\ -\n\ -Return True if all cased characters in S are uppercase and there is\n\ -at least one cased character in S, False otherwise."); - -static PyObject* -unicode_isupper(PyObject *self) -{ - Py_ssize_t i, length; - int kind; - void *data; - int cased; - - if (PyUnicode_READY(self) == -1) - return NULL; - length = PyUnicode_GET_LENGTH(self); - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); - - /* Shortcut for single character strings */ - if (length == 1) - return PyBool_FromLong( - Py_UNICODE_ISUPPER(PyUnicode_READ(kind, data, 0)) != 0); - - /* Special case for empty strings */ - if (length == 0) - return PyBool_FromLong(0); - - cased = 0; - for (i = 0; i < length; i++) { - const Py_UCS4 ch = PyUnicode_READ(kind, data, i); - - if (Py_UNICODE_ISLOWER(ch) || Py_UNICODE_ISTITLE(ch)) - return PyBool_FromLong(0); - else if (!cased && Py_UNICODE_ISUPPER(ch)) - cased = 1; - } - return PyBool_FromLong(cased); -} - -PyDoc_STRVAR(istitle__doc__, - "S.istitle() -> bool\n\ -\n\ -Return True if S is a titlecased string and there is at least one\n\ -character in S, i.e. upper- and titlecase characters may only\n\ -follow uncased characters and lowercase characters only cased ones.\n\ -Return False otherwise."); - -static PyObject* -unicode_istitle(PyObject *self) -{ - Py_ssize_t i, length; - int kind; - void *data; - int cased, previous_is_cased; - - if (PyUnicode_READY(self) == -1) - return NULL; - length = PyUnicode_GET_LENGTH(self); - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); - - /* Shortcut for single character strings */ - if (length == 1) { - Py_UCS4 ch = PyUnicode_READ(kind, data, 0); - return PyBool_FromLong((Py_UNICODE_ISTITLE(ch) != 0) || - (Py_UNICODE_ISUPPER(ch) != 0)); - } - - /* Special case for empty strings */ - if (length == 0) - return PyBool_FromLong(0); - - cased = 0; - previous_is_cased = 0; - for (i = 0; i < length; i++) { - const Py_UCS4 ch = PyUnicode_READ(kind, data, i); - - if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch)) { - if (previous_is_cased) - return PyBool_FromLong(0); - previous_is_cased = 1; - cased = 1; - } - else if (Py_UNICODE_ISLOWER(ch)) { - if (!previous_is_cased) - return PyBool_FromLong(0); - previous_is_cased = 1; - cased = 1; - } - else - previous_is_cased = 0; - } - return PyBool_FromLong(cased); -} - -PyDoc_STRVAR(isspace__doc__, - "S.isspace() -> bool\n\ -\n\ -Return True if all characters in S are whitespace\n\ -and there is at least one character in S, False otherwise."); - -static PyObject* -unicode_isspace(PyObject *self) -{ - Py_ssize_t i, length; - int kind; - void *data; - - if (PyUnicode_READY(self) == -1) - return NULL; - length = PyUnicode_GET_LENGTH(self); - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); - - /* Shortcut for single character strings */ - if (length == 1) - return PyBool_FromLong( - Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, 0))); - - /* Special case for empty strings */ - if (length == 0) - return PyBool_FromLong(0); - - for (i = 0; i < length; i++) { - const Py_UCS4 ch = PyUnicode_READ(kind, data, i); - if (!Py_UNICODE_ISSPACE(ch)) - return PyBool_FromLong(0); - } - return PyBool_FromLong(1); -} - -PyDoc_STRVAR(isalpha__doc__, - "S.isalpha() -> bool\n\ -\n\ -Return True if all characters in S are alphabetic\n\ -and there is at least one character in S, False otherwise."); - -static PyObject* -unicode_isalpha(PyObject *self) -{ - Py_ssize_t i, length; - int kind; - void *data; - - if (PyUnicode_READY(self) == -1) - return NULL; - length = PyUnicode_GET_LENGTH(self); - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); - - /* Shortcut for single character strings */ - if (length == 1) - return PyBool_FromLong( - Py_UNICODE_ISALPHA(PyUnicode_READ(kind, data, 0))); - - /* Special case for empty strings */ - if (length == 0) - return PyBool_FromLong(0); - - for (i = 0; i < length; i++) { - if (!Py_UNICODE_ISALPHA(PyUnicode_READ(kind, data, i))) - return PyBool_FromLong(0); - } - return PyBool_FromLong(1); -} - -PyDoc_STRVAR(isalnum__doc__, - "S.isalnum() -> bool\n\ -\n\ -Return True if all characters in S are alphanumeric\n\ -and there is at least one character in S, False otherwise."); - -static PyObject* -unicode_isalnum(PyObject *self) -{ - int kind; - void *data; - Py_ssize_t len, i; - - if (PyUnicode_READY(self) == -1) - return NULL; - - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); - len = PyUnicode_GET_LENGTH(self); - - /* Shortcut for single character strings */ - if (len == 1) { - const Py_UCS4 ch = PyUnicode_READ(kind, data, 0); - return PyBool_FromLong(Py_UNICODE_ISALNUM(ch)); - } - - /* Special case for empty strings */ - if (len == 0) - return PyBool_FromLong(0); - - for (i = 0; i < len; i++) { - const Py_UCS4 ch = PyUnicode_READ(kind, data, i); - if (!Py_UNICODE_ISALNUM(ch)) - return PyBool_FromLong(0); - } - return PyBool_FromLong(1); -} - -PyDoc_STRVAR(isdecimal__doc__, - "S.isdecimal() -> bool\n\ -\n\ -Return True if there are only decimal characters in S,\n\ -False otherwise."); - -static PyObject* -unicode_isdecimal(PyObject *self) -{ - Py_ssize_t i, length; - int kind; - void *data; - - if (PyUnicode_READY(self) == -1) - return NULL; - length = PyUnicode_GET_LENGTH(self); - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); - - /* Shortcut for single character strings */ - if (length == 1) - return PyBool_FromLong( - Py_UNICODE_ISDECIMAL(PyUnicode_READ(kind, data, 0))); - - /* Special case for empty strings */ - if (length == 0) - return PyBool_FromLong(0); - - for (i = 0; i < length; i++) { - if (!Py_UNICODE_ISDECIMAL(PyUnicode_READ(kind, data, i))) - return PyBool_FromLong(0); - } - return PyBool_FromLong(1); -} - -PyDoc_STRVAR(isdigit__doc__, - "S.isdigit() -> bool\n\ -\n\ -Return True if all characters in S are digits\n\ -and there is at least one character in S, False otherwise."); - -static PyObject* -unicode_isdigit(PyObject *self) -{ - Py_ssize_t i, length; - int kind; - void *data; - - if (PyUnicode_READY(self) == -1) - return NULL; - length = PyUnicode_GET_LENGTH(self); - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); - - /* Shortcut for single character strings */ - if (length == 1) { - const Py_UCS4 ch = PyUnicode_READ(kind, data, 0); - return PyBool_FromLong(Py_UNICODE_ISDIGIT(ch)); - } - - /* Special case for empty strings */ - if (length == 0) - return PyBool_FromLong(0); - - for (i = 0; i < length; i++) { - if (!Py_UNICODE_ISDIGIT(PyUnicode_READ(kind, data, i))) - return PyBool_FromLong(0); - } - return PyBool_FromLong(1); -} - -PyDoc_STRVAR(isnumeric__doc__, - "S.isnumeric() -> bool\n\ -\n\ -Return True if there are only numeric characters in S,\n\ -False otherwise."); - -static PyObject* -unicode_isnumeric(PyObject *self) -{ - Py_ssize_t i, length; - int kind; - void *data; - - if (PyUnicode_READY(self) == -1) - return NULL; - length = PyUnicode_GET_LENGTH(self); - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); - - /* Shortcut for single character strings */ - if (length == 1) - return PyBool_FromLong( - Py_UNICODE_ISNUMERIC(PyUnicode_READ(kind, data, 0))); - - /* Special case for empty strings */ - if (length == 0) - return PyBool_FromLong(0); - - for (i = 0; i < length; i++) { - if (!Py_UNICODE_ISNUMERIC(PyUnicode_READ(kind, data, i))) - return PyBool_FromLong(0); - } - return PyBool_FromLong(1); -} - -int -PyUnicode_IsIdentifier(PyObject *self) -{ - int kind; - void *data; - Py_ssize_t i; - Py_UCS4 first; - - if (PyUnicode_READY(self) == -1) { - Py_FatalError("identifier not ready"); - return 0; - } - - /* Special case for empty strings */ - if (PyUnicode_GET_LENGTH(self) == 0) - return 0; - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); - - /* PEP 3131 says that the first character must be in - XID_Start and subsequent characters in XID_Continue, - and for the ASCII range, the 2.x rules apply (i.e - start with letters and underscore, continue with - letters, digits, underscore). However, given the current - definition of XID_Start and XID_Continue, it is sufficient - to check just for these, except that _ must be allowed - as starting an identifier. */ - first = PyUnicode_READ(kind, data, 0); - if (!_PyUnicode_IsXidStart(first) && first != 0x5F /* LOW LINE */) - return 0; - - for (i = 1; i < PyUnicode_GET_LENGTH(self); i++) - if (!_PyUnicode_IsXidContinue(PyUnicode_READ(kind, data, i))) - return 0; - return 1; -} - -PyDoc_STRVAR(isidentifier__doc__, - "S.isidentifier() -> bool\n\ -\n\ -Return True if S is a valid identifier according\n\ -to the language definition."); - -static PyObject* -unicode_isidentifier(PyObject *self) -{ - return PyBool_FromLong(PyUnicode_IsIdentifier(self)); -} - -PyDoc_STRVAR(isprintable__doc__, - "S.isprintable() -> bool\n\ -\n\ -Return True if all characters in S are considered\n\ -printable in repr() or S is empty, False otherwise."); - -static PyObject* -unicode_isprintable(PyObject *self) -{ - Py_ssize_t i, length; - int kind; - void *data; - - if (PyUnicode_READY(self) == -1) - return NULL; - length = PyUnicode_GET_LENGTH(self); - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); - - /* Shortcut for single character strings */ - if (length == 1) - return PyBool_FromLong( - Py_UNICODE_ISPRINTABLE(PyUnicode_READ(kind, data, 0))); - - for (i = 0; i < length; i++) { - if (!Py_UNICODE_ISPRINTABLE(PyUnicode_READ(kind, data, i))) { - Py_RETURN_FALSE; - } - } - Py_RETURN_TRUE; -} - -PyDoc_STRVAR(join__doc__, - "S.join(iterable) -> str\n\ -\n\ -Return a string which is the concatenation of the strings in the\n\ -iterable. The separator between elements is S."); - -static PyObject* -unicode_join(PyObject *self, PyObject *data) -{ - return PyUnicode_Join(self, data); -} - -static Py_ssize_t -unicode_length(PyObject *self) -{ - if (PyUnicode_READY(self) == -1) - return -1; - return PyUnicode_GET_LENGTH(self); -} - -PyDoc_STRVAR(ljust__doc__, - "S.ljust(width[, fillchar]) -> str\n\ -\n\ -Return S left-justified in a Unicode string of length width. Padding is\n\ -done using the specified fill character (default is a space)."); - -static PyObject * -unicode_ljust(PyObject *self, PyObject *args) -{ - Py_ssize_t width; - Py_UCS4 fillchar = ' '; - - if (PyUnicode_READY(self) == -1) - return NULL; - - if (!PyArg_ParseTuple(args, "n|O&:ljust", &width, convert_uc, &fillchar)) - return NULL; - - if (_PyUnicode_LENGTH(self) >= width && PyUnicode_CheckExact(self)) { - Py_INCREF(self); - return self; - } - - return pad(self, 0, width - _PyUnicode_LENGTH(self), fillchar); -} - -PyDoc_STRVAR(lower__doc__, - "S.lower() -> str\n\ -\n\ -Return a copy of the string S converted to lowercase."); - -static PyObject* -unicode_lower(PyObject *self) -{ - return fixup(self, fixlower); -} - -#define LEFTSTRIP 0 -#define RIGHTSTRIP 1 -#define BOTHSTRIP 2 - -/* Arrays indexed by above */ -static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; - -#define STRIPNAME(i) (stripformat[i]+3) - -/* externally visible for str.strip(unicode) */ -PyObject * -_PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj) -{ - void *data; - int kind; - Py_ssize_t i, j, len; - BLOOM_MASK sepmask; - - if (PyUnicode_READY(self) == -1 || PyUnicode_READY(sepobj) == -1) - return NULL; - - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); - len = PyUnicode_GET_LENGTH(self); - sepmask = make_bloom_mask(PyUnicode_KIND(sepobj), - PyUnicode_DATA(sepobj), - PyUnicode_GET_LENGTH(sepobj)); - - i = 0; - if (striptype != RIGHTSTRIP) { - while (i < len && - BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, i), sepobj)) { - i++; - } - } - - j = len; - if (striptype != LEFTSTRIP) { - do { - j--; - } while (j >= i && - BLOOM_MEMBER(sepmask, PyUnicode_READ(kind, data, j), sepobj)); - j++; - } - - return PyUnicode_Substring(self, i, j); -} - -PyObject* -PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) -{ - unsigned char *data; - int kind; - Py_ssize_t length; - - if (PyUnicode_READY(self) == -1) - return NULL; - - end = Py_MIN(end, PyUnicode_GET_LENGTH(self)); - - if (start == 0 && end == PyUnicode_GET_LENGTH(self)) - { - if (PyUnicode_CheckExact(self)) { - Py_INCREF(self); - return self; - } - else - return PyUnicode_Copy(self); - } - - length = end - start; - if (length == 1) - return unicode_getitem(self, start); - - if (start < 0 || end < 0) { - PyErr_SetString(PyExc_IndexError, "string index out of range"); - return NULL; - } - - if (PyUnicode_IS_ASCII(self)) { - kind = PyUnicode_KIND(self); - data = PyUnicode_1BYTE_DATA(self); - return unicode_fromascii(data + start, length); - } - else { - kind = PyUnicode_KIND(self); - data = PyUnicode_1BYTE_DATA(self); - return PyUnicode_FromKindAndData(kind, - data + kind * start, - length); - } -} - -static PyObject * -do_strip(PyObject *self, int striptype) -{ - int kind; - void *data; - Py_ssize_t len, i, j; - - if (PyUnicode_READY(self) == -1) - return NULL; - - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); - len = PyUnicode_GET_LENGTH(self); - - i = 0; - if (striptype != RIGHTSTRIP) { - while (i < len && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) { - i++; - } - } - - j = len; - if (striptype != LEFTSTRIP) { - do { - j--; - } while (j >= i && Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j))); - j++; - } - - return PyUnicode_Substring(self, i, j); -} - - -static PyObject * -do_argstrip(PyObject *self, int striptype, PyObject *args) -{ - PyObject *sep = NULL; - - if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep)) - return NULL; - - if (sep != NULL && sep != Py_None) { - if (PyUnicode_Check(sep)) - return _PyUnicode_XStrip(self, striptype, sep); - else { - PyErr_Format(PyExc_TypeError, - "%s arg must be None or str", - STRIPNAME(striptype)); - return NULL; - } - } - - return do_strip(self, striptype); -} - - -PyDoc_STRVAR(strip__doc__, - "S.strip([chars]) -> str\n\ -\n\ -Return a copy of the string S with leading and trailing\n\ -whitespace removed.\n\ -If chars is given and not None, remove characters in chars instead."); - -static PyObject * -unicode_strip(PyObject *self, PyObject *args) -{ - if (PyTuple_GET_SIZE(args) == 0) - return do_strip(self, BOTHSTRIP); /* Common case */ - else - return do_argstrip(self, BOTHSTRIP, args); -} - - -PyDoc_STRVAR(lstrip__doc__, - "S.lstrip([chars]) -> str\n\ -\n\ -Return a copy of the string S with leading whitespace removed.\n\ -If chars is given and not None, remove characters in chars instead."); - -static PyObject * -unicode_lstrip(PyObject *self, PyObject *args) -{ - if (PyTuple_GET_SIZE(args) == 0) - return do_strip(self, LEFTSTRIP); /* Common case */ - else - return do_argstrip(self, LEFTSTRIP, args); -} - - -PyDoc_STRVAR(rstrip__doc__, - "S.rstrip([chars]) -> str\n\ -\n\ -Return a copy of the string S with trailing whitespace removed.\n\ -If chars is given and not None, remove characters in chars instead."); - -static PyObject * -unicode_rstrip(PyObject *self, PyObject *args) -{ - if (PyTuple_GET_SIZE(args) == 0) - return do_strip(self, RIGHTSTRIP); /* Common case */ - else - return do_argstrip(self, RIGHTSTRIP, args); -} - - -static PyObject* -unicode_repeat(PyObject *str, Py_ssize_t len) -{ - PyObject *u; - Py_ssize_t nchars, n; - - if (len < 1) { - Py_INCREF(unicode_empty); - return unicode_empty; - } - - if (len == 1 && PyUnicode_CheckExact(str)) { - /* no repeat, return original string */ - Py_INCREF(str); - return str; - } - - if (PyUnicode_READY(str) == -1) - return NULL; - - if (PyUnicode_GET_LENGTH(str) > PY_SSIZE_T_MAX / len) { - PyErr_SetString(PyExc_OverflowError, - "repeated string is too long"); - return NULL; - } - nchars = len * PyUnicode_GET_LENGTH(str); - - u = PyUnicode_New(nchars, PyUnicode_MAX_CHAR_VALUE(str)); - if (!u) - return NULL; - assert(PyUnicode_KIND(u) == PyUnicode_KIND(str)); - - if (PyUnicode_GET_LENGTH(str) == 1) { - const int kind = PyUnicode_KIND(str); - const Py_UCS4 fill_char = PyUnicode_READ(kind, PyUnicode_DATA(str), 0); - void *to = PyUnicode_DATA(u); - if (kind == PyUnicode_1BYTE_KIND) - memset(to, (unsigned char)fill_char, len); - else { - for (n = 0; n < len; ++n) - PyUnicode_WRITE(kind, to, n, fill_char); - } - } - else { - /* number of characters copied this far */ - Py_ssize_t done = PyUnicode_GET_LENGTH(str); - const Py_ssize_t char_size = PyUnicode_KIND(str); - char *to = (char *) PyUnicode_DATA(u); - Py_MEMCPY(to, PyUnicode_DATA(str), - PyUnicode_GET_LENGTH(str) * char_size); - while (done < nchars) { - n = (done <= nchars-done) ? done : nchars-done; - Py_MEMCPY(to + (done * char_size), to, n * char_size); - done += n; - } - } - - assert(_PyUnicode_CheckConsistency(u, 1)); - return u; -} - -PyObject * -PyUnicode_Replace(PyObject *obj, - PyObject *subobj, - PyObject *replobj, - Py_ssize_t maxcount) -{ - PyObject *self; - PyObject *str1; - PyObject *str2; - PyObject *result; - - self = PyUnicode_FromObject(obj); - if (self == NULL || PyUnicode_READY(self) == -1) - return NULL; - str1 = PyUnicode_FromObject(subobj); - if (str1 == NULL || PyUnicode_READY(str1) == -1) { - Py_DECREF(self); - return NULL; - } - str2 = PyUnicode_FromObject(replobj); - if (str2 == NULL || PyUnicode_READY(str2)) { - Py_DECREF(self); - Py_DECREF(str1); - return NULL; - } - result = replace(self, str1, str2, maxcount); - Py_DECREF(self); - Py_DECREF(str1); - Py_DECREF(str2); - return result; -} - -PyDoc_STRVAR(replace__doc__, - "S.replace(old, new[, count]) -> str\n\ -\n\ -Return a copy of S with all occurrences of substring\n\ -old replaced by new. If the optional argument count is\n\ -given, only the first count occurrences are replaced."); - -static PyObject* -unicode_replace(PyObject *self, PyObject *args) -{ - PyObject *str1; - PyObject *str2; - Py_ssize_t maxcount = -1; - PyObject *result; - - if (!PyArg_ParseTuple(args, "OO|n:replace", &str1, &str2, &maxcount)) - return NULL; - if (!PyUnicode_READY(self) == -1) - return NULL; - str1 = PyUnicode_FromObject(str1); - if (str1 == NULL || PyUnicode_READY(str1) == -1) - return NULL; - str2 = PyUnicode_FromObject(str2); - if (str2 == NULL || PyUnicode_READY(str2) == -1) { - Py_DECREF(str1); - return NULL; - } - - result = replace(self, str1, str2, maxcount); - - Py_DECREF(str1); - Py_DECREF(str2); - return result; -} - -static PyObject * -unicode_repr(PyObject *unicode) -{ - PyObject *repr; - Py_ssize_t isize; - Py_ssize_t osize, squote, dquote, i, o; - Py_UCS4 max, quote; - int ikind, okind; - void *idata, *odata; - - if (PyUnicode_READY(unicode) == -1) - return NULL; - - isize = PyUnicode_GET_LENGTH(unicode); - idata = PyUnicode_DATA(unicode); - - /* Compute length of output, quote characters, and - maximum character */ - osize = 2; /* quotes */ - max = 127; - squote = dquote = 0; - ikind = PyUnicode_KIND(unicode); - for (i = 0; i < isize; i++) { - Py_UCS4 ch = PyUnicode_READ(ikind, idata, i); - switch (ch) { - case '\'': squote++; osize++; break; - case '"': dquote++; osize++; break; - case '\\': case '\t': case '\r': case '\n': - osize += 2; break; - default: - /* Fast-path ASCII */ - if (ch < ' ' || ch == 0x7f) - osize += 4; /* \xHH */ - else if (ch < 0x7f) - osize++; - else if (Py_UNICODE_ISPRINTABLE(ch)) { - osize++; - max = ch > max ? ch : max; - } - else if (ch < 0x100) - osize += 4; /* \xHH */ - else if (ch < 0x10000) - osize += 6; /* \uHHHH */ - else - osize += 10; /* \uHHHHHHHH */ - } - } - - quote = '\''; - if (squote) { - if (dquote) - /* Both squote and dquote present. Use squote, - and escape them */ - osize += squote; - else - quote = '"'; - } - - repr = PyUnicode_New(osize, max); - if (repr == NULL) - return NULL; - okind = PyUnicode_KIND(repr); - odata = PyUnicode_DATA(repr); - - PyUnicode_WRITE(okind, odata, 0, quote); - PyUnicode_WRITE(okind, odata, osize-1, quote); - - for (i = 0, o = 1; i < isize; i++) { - Py_UCS4 ch = PyUnicode_READ(ikind, idata, i); - - /* Escape quotes and backslashes */ - if ((ch == quote) || (ch == '\\')) { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, ch); - continue; - } - - /* Map special whitespace to '\t', \n', '\r' */ - if (ch == '\t') { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 't'); - } - else if (ch == '\n') { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 'n'); - } - else if (ch == '\r') { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 'r'); - } - - /* Map non-printable US ASCII to '\xhh' */ - else if (ch < ' ' || ch == 0x7F) { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 'x'); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); - } - - /* Copy ASCII characters as-is */ - else if (ch < 0x7F) { - PyUnicode_WRITE(okind, odata, o++, ch); - } - - /* Non-ASCII characters */ - else { - /* Map Unicode whitespace and control characters - (categories Z* and C* except ASCII space) - */ - if (!Py_UNICODE_ISPRINTABLE(ch)) { - /* Map 8-bit characters to '\xhh' */ - if (ch <= 0xff) { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 'x'); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]); - } - /* Map 21-bit characters to '\U00xxxxxx' */ - else if (ch >= 0x10000) { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 'U'); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); - } - /* Map 16-bit characters to '\uxxxx' */ - else { - PyUnicode_WRITE(okind, odata, o++, '\\'); - PyUnicode_WRITE(okind, odata, o++, 'u'); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]); - PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]); - } - } - /* Copy characters as-is */ - else { - PyUnicode_WRITE(okind, odata, o++, ch); - } - } - } - /* Closing quote already added at the beginning */ - assert(_PyUnicode_CheckConsistency(repr, 1)); - return repr; -} - -PyDoc_STRVAR(rfind__doc__, - "S.rfind(sub[, start[, end]]) -> int\n\ -\n\ -Return the highest index in S where substring sub is found,\n\ -such that sub is contained within S[start:end]. Optional\n\ -arguments start and end are interpreted as in slice notation.\n\ -\n\ -Return -1 on failure."); - -static PyObject * -unicode_rfind(PyObject *self, PyObject *args) -{ - PyObject *substring; - Py_ssize_t start; - Py_ssize_t end; - Py_ssize_t result; - - if (!stringlib_parse_args_finds_unicode("rfind", args, &substring, - &start, &end)) - return NULL; - - if (PyUnicode_READY(self) == -1) - return NULL; - if (PyUnicode_READY(substring) == -1) - return NULL; - - result = any_find_slice(-1, self, substring, start, end); - - Py_DECREF(substring); - - if (result == -2) - return NULL; - - return PyLong_FromSsize_t(result); -} - -PyDoc_STRVAR(rindex__doc__, - "S.rindex(sub[, start[, end]]) -> int\n\ -\n\ -Like S.rfind() but raise ValueError when the substring is not found."); - -static PyObject * -unicode_rindex(PyObject *self, PyObject *args) -{ - PyObject *substring; - Py_ssize_t start; - Py_ssize_t end; - Py_ssize_t result; - - if (!stringlib_parse_args_finds_unicode("rindex", args, &substring, - &start, &end)) - return NULL; - - if (PyUnicode_READY(self) == -1) - return NULL; - if (PyUnicode_READY(substring) == -1) - return NULL; - - result = any_find_slice(-1, self, substring, start, end); - - Py_DECREF(substring); - - if (result == -2) - return NULL; - - if (result < 0) { - PyErr_SetString(PyExc_ValueError, "substring not found"); - return NULL; - } - - return PyLong_FromSsize_t(result); -} - -PyDoc_STRVAR(rjust__doc__, - "S.rjust(width[, fillchar]) -> str\n\ -\n\ -Return S right-justified in a string of length width. Padding is\n\ -done using the specified fill character (default is a space)."); - -static PyObject * -unicode_rjust(PyObject *self, PyObject *args) -{ - Py_ssize_t width; - Py_UCS4 fillchar = ' '; - - if (!PyArg_ParseTuple(args, "n|O&:rjust", &width, convert_uc, &fillchar)) - return NULL; - - if (PyUnicode_READY(self) == -1) - return NULL; - - if (_PyUnicode_LENGTH(self) >= width && PyUnicode_CheckExact(self)) { - Py_INCREF(self); - return self; - } - - return pad(self, width - _PyUnicode_LENGTH(self), 0, fillchar); -} - -PyObject * -PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) -{ - PyObject *result; - - s = PyUnicode_FromObject(s); - if (s == NULL) - return NULL; - if (sep != NULL) { - sep = PyUnicode_FromObject(sep); - if (sep == NULL) { - Py_DECREF(s); - return NULL; - } - } - - result = split(s, sep, maxsplit); - - Py_DECREF(s); - Py_XDECREF(sep); - return result; -} - -PyDoc_STRVAR(split__doc__, - "S.split([sep[, maxsplit]]) -> list of strings\n\ -\n\ -Return a list of the words in S, using sep as the\n\ -delimiter string. If maxsplit is given, at most maxsplit\n\ -splits are done. If sep is not specified or is None, any\n\ -whitespace string is a separator and empty strings are\n\ -removed from the result."); - -static PyObject* -unicode_split(PyObject *self, PyObject *args) -{ - PyObject *substring = Py_None; - Py_ssize_t maxcount = -1; - - if (!PyArg_ParseTuple(args, "|On:split", &substring, &maxcount)) - return NULL; - - if (substring == Py_None) - return split(self, NULL, maxcount); - else if (PyUnicode_Check(substring)) - return split(self, substring, maxcount); - else - return PyUnicode_Split(self, substring, maxcount); -} - -PyObject * -PyUnicode_Partition(PyObject *str_in, PyObject *sep_in) -{ - PyObject* str_obj; - PyObject* sep_obj; - PyObject* out; - int kind1, kind2, kind; - void *buf1 = NULL, *buf2 = NULL; - Py_ssize_t len1, len2; - - str_obj = PyUnicode_FromObject(str_in); - if (!str_obj || PyUnicode_READY(str_obj) == -1) - return NULL; - sep_obj = PyUnicode_FromObject(sep_in); - if (!sep_obj || PyUnicode_READY(sep_obj) == -1) { - Py_DECREF(str_obj); - return NULL; - } - - kind1 = PyUnicode_KIND(str_obj); - kind2 = PyUnicode_KIND(sep_obj); - kind = Py_MAX(kind1, kind2); - buf1 = PyUnicode_DATA(str_obj); - if (kind1 != kind) - buf1 = _PyUnicode_AsKind(str_obj, kind); - if (!buf1) - goto onError; - buf2 = PyUnicode_DATA(sep_obj); - if (kind2 != kind) - buf2 = _PyUnicode_AsKind(sep_obj, kind); - if (!buf2) - goto onError; - len1 = PyUnicode_GET_LENGTH(str_obj); - len2 = PyUnicode_GET_LENGTH(sep_obj); - - switch(PyUnicode_KIND(str_obj)) { - case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj)) - out = asciilib_partition(str_obj, buf1, len1, sep_obj, buf2, len2); - else - out = ucs1lib_partition(str_obj, buf1, len1, sep_obj, buf2, len2); - break; - case PyUnicode_2BYTE_KIND: - out = ucs2lib_partition(str_obj, buf1, len1, sep_obj, buf2, len2); - break; - case PyUnicode_4BYTE_KIND: - out = ucs4lib_partition(str_obj, buf1, len1, sep_obj, buf2, len2); - break; - default: - assert(0); - out = 0; - } - - Py_DECREF(sep_obj); - Py_DECREF(str_obj); - if (kind1 != kind) - PyMem_Free(buf1); - if (kind2 != kind) - PyMem_Free(buf2); - - return out; - onError: - Py_DECREF(sep_obj); - Py_DECREF(str_obj); - if (kind1 != kind && buf1) - PyMem_Free(buf1); - if (kind2 != kind && buf2) - PyMem_Free(buf2); - return NULL; -} - - -PyObject * -PyUnicode_RPartition(PyObject *str_in, PyObject *sep_in) -{ - PyObject* str_obj; - PyObject* sep_obj; - PyObject* out; - int kind1, kind2, kind; - void *buf1 = NULL, *buf2 = NULL; - Py_ssize_t len1, len2; - - str_obj = PyUnicode_FromObject(str_in); - if (!str_obj) - return NULL; - sep_obj = PyUnicode_FromObject(sep_in); - if (!sep_obj) { - Py_DECREF(str_obj); - return NULL; - } - - kind1 = PyUnicode_KIND(str_in); - kind2 = PyUnicode_KIND(sep_obj); - kind = Py_MAX(kind1, kind2); - buf1 = PyUnicode_DATA(str_in); - if (kind1 != kind) - buf1 = _PyUnicode_AsKind(str_in, kind); - if (!buf1) - goto onError; - buf2 = PyUnicode_DATA(sep_obj); - if (kind2 != kind) - buf2 = _PyUnicode_AsKind(sep_obj, kind); - if (!buf2) - goto onError; - len1 = PyUnicode_GET_LENGTH(str_obj); - len2 = PyUnicode_GET_LENGTH(sep_obj); - - switch(PyUnicode_KIND(str_in)) { - case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj)) - out = asciilib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2); - else - out = ucs1lib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2); - break; - case PyUnicode_2BYTE_KIND: - out = ucs2lib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2); - break; - case PyUnicode_4BYTE_KIND: - out = ucs4lib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2); - break; - default: - assert(0); - out = 0; - } - - Py_DECREF(sep_obj); - Py_DECREF(str_obj); - if (kind1 != kind) - PyMem_Free(buf1); - if (kind2 != kind) - PyMem_Free(buf2); - - return out; - onError: - Py_DECREF(sep_obj); - Py_DECREF(str_obj); - if (kind1 != kind && buf1) - PyMem_Free(buf1); - if (kind2 != kind && buf2) - PyMem_Free(buf2); - return NULL; -} - -PyDoc_STRVAR(partition__doc__, - "S.partition(sep) -> (head, sep, tail)\n\ -\n\ -Search for the separator sep in S, and return the part before it,\n\ -the separator itself, and the part after it. If the separator is not\n\ -found, return S and two empty strings."); - -static PyObject* -unicode_partition(PyObject *self, PyObject *separator) -{ - return PyUnicode_Partition(self, separator); -} - -PyDoc_STRVAR(rpartition__doc__, - "S.rpartition(sep) -> (head, sep, tail)\n\ -\n\ -Search for the separator sep in S, starting at the end of S, and return\n\ -the part before it, the separator itself, and the part after it. If the\n\ -separator is not found, return two empty strings and S."); - -static PyObject* -unicode_rpartition(PyObject *self, PyObject *separator) -{ - return PyUnicode_RPartition(self, separator); -} - -PyObject * -PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) -{ - PyObject *result; - - s = PyUnicode_FromObject(s); - if (s == NULL) - return NULL; - if (sep != NULL) { - sep = PyUnicode_FromObject(sep); - if (sep == NULL) { - Py_DECREF(s); - return NULL; - } - } - - result = rsplit(s, sep, maxsplit); - - Py_DECREF(s); - Py_XDECREF(sep); - return result; -} - -PyDoc_STRVAR(rsplit__doc__, - "S.rsplit([sep[, maxsplit]]) -> list of strings\n\ -\n\ -Return a list of the words in S, using sep as the\n\ -delimiter string, starting at the end of the string and\n\ -working to the front. If maxsplit is given, at most maxsplit\n\ -splits are done. If sep is not specified, any whitespace string\n\ -is a separator."); - -static PyObject* -unicode_rsplit(PyObject *self, PyObject *args) -{ - PyObject *substring = Py_None; - Py_ssize_t maxcount = -1; - - if (!PyArg_ParseTuple(args, "|On:rsplit", &substring, &maxcount)) - return NULL; - - if (substring == Py_None) - return rsplit(self, NULL, maxcount); - else if (PyUnicode_Check(substring)) - return rsplit(self, substring, maxcount); - else - return PyUnicode_RSplit(self, substring, maxcount); -} - -PyDoc_STRVAR(splitlines__doc__, - "S.splitlines([keepends]) -> list of strings\n\ -\n\ -Return a list of the lines in S, breaking at line boundaries.\n\ -Line breaks are not included in the resulting list unless keepends\n\ -is given and true."); - -static PyObject* -unicode_splitlines(PyObject *self, PyObject *args, PyObject *kwds) -{ - static char *kwlist[] = {"keepends", 0}; - int keepends = 0; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines", - kwlist, &keepends)) - return NULL; - - return PyUnicode_Splitlines(self, keepends); -} - -static -PyObject *unicode_str(PyObject *self) -{ - if (PyUnicode_CheckExact(self)) { - Py_INCREF(self); - return self; - } else - /* Subtype -- return genuine unicode string with the same value. */ - return PyUnicode_Copy(self); -} - -PyDoc_STRVAR(swapcase__doc__, - "S.swapcase() -> str\n\ -\n\ -Return a copy of S with uppercase characters converted to lowercase\n\ -and vice versa."); - -static PyObject* -unicode_swapcase(PyObject *self) -{ - return fixup(self, fixswapcase); -} - -PyDoc_STRVAR(maketrans__doc__, - "str.maketrans(x[, y[, z]]) -> dict (static method)\n\ -\n\ -Return a translation table usable for str.translate().\n\ -If there is only one argument, it must be a dictionary mapping Unicode\n\ -ordinals (integers) or characters to Unicode ordinals, strings or None.\n\ -Character keys will be then converted to ordinals.\n\ -If there are two arguments, they must be strings of equal length, and\n\ -in the resulting dictionary, each character in x will be mapped to the\n\ -character at the same position in y. If there is a third argument, it\n\ -must be a string, whose characters will be mapped to None in the result."); - -static PyObject* -unicode_maketrans(PyObject *null, PyObject *args) -{ - PyObject *x, *y = NULL, *z = NULL; - PyObject *new = NULL, *key, *value; - Py_ssize_t i = 0; - int res; - - if (!PyArg_ParseTuple(args, "O|UU:maketrans", &x, &y, &z)) - return NULL; - new = PyDict_New(); - if (!new) - return NULL; - if (y != NULL) { - int x_kind, y_kind, z_kind; - void *x_data, *y_data, *z_data; - - /* x must be a string too, of equal length */ - if (!PyUnicode_Check(x)) { - PyErr_SetString(PyExc_TypeError, "first maketrans argument must " - "be a string if there is a second argument"); - goto err; - } - if (PyUnicode_GET_LENGTH(x) != PyUnicode_GET_LENGTH(y)) { - PyErr_SetString(PyExc_ValueError, "the first two maketrans " - "arguments must have equal length"); - goto err; - } - /* create entries for translating chars in x to those in y */ - x_kind = PyUnicode_KIND(x); - y_kind = PyUnicode_KIND(y); - x_data = PyUnicode_DATA(x); - y_data = PyUnicode_DATA(y); - for (i = 0; i < PyUnicode_GET_LENGTH(x); i++) { - key = PyLong_FromLong(PyUnicode_READ(x_kind, x_data, i)); - value = PyLong_FromLong(PyUnicode_READ(y_kind, y_data, i)); - if (!key || !value) - goto err; - res = PyDict_SetItem(new, key, value); - Py_DECREF(key); - Py_DECREF(value); - if (res < 0) - goto err; - } - /* create entries for deleting chars in z */ - if (z != NULL) { - z_kind = PyUnicode_KIND(z); - z_data = PyUnicode_DATA(z); - for (i = 0; i < PyUnicode_GET_LENGTH(z); i++) { - key = PyLong_FromLong(PyUnicode_READ(z_kind, z_data, i)); - if (!key) - goto err; - res = PyDict_SetItem(new, key, Py_None); - Py_DECREF(key); - if (res < 0) - goto err; - } - } - } else { - int kind; - void *data; - - /* x must be a dict */ - if (!PyDict_CheckExact(x)) { - PyErr_SetString(PyExc_TypeError, "if you give only one argument " - "to maketrans it must be a dict"); - goto err; - } - /* copy entries into the new dict, converting string keys to int keys */ - while (PyDict_Next(x, &i, &key, &value)) { - if (PyUnicode_Check(key)) { - /* convert string keys to integer keys */ - PyObject *newkey; - if (PyUnicode_GET_LENGTH(key) != 1) { - PyErr_SetString(PyExc_ValueError, "string keys in translate " - "table must be of length 1"); - goto err; - } - kind = PyUnicode_KIND(key); - data = PyUnicode_DATA(key); - newkey = PyLong_FromLong(PyUnicode_READ(kind, data, 0)); - if (!newkey) - goto err; - res = PyDict_SetItem(new, newkey, value); - Py_DECREF(newkey); - if (res < 0) - goto err; - } else if (PyLong_Check(key)) { - /* just keep integer keys */ - if (PyDict_SetItem(new, key, value) < 0) - goto err; - } else { - PyErr_SetString(PyExc_TypeError, "keys in translate table must " - "be strings or integers"); - goto err; - } - } - } - return new; - err: - Py_DECREF(new); - return NULL; -} - -PyDoc_STRVAR(translate__doc__, - "S.translate(table) -> str\n\ -\n\ -Return a copy of the string S, where all characters have been mapped\n\ -through the given translation table, which must be a mapping of\n\ -Unicode ordinals to Unicode ordinals, strings, or None.\n\ -Unmapped characters are left untouched. Characters mapped to None\n\ -are deleted."); - -static PyObject* -unicode_translate(PyObject *self, PyObject *table) -{ - return _PyUnicode_TranslateCharmap(self, table, "ignore"); -} - -PyDoc_STRVAR(upper__doc__, - "S.upper() -> str\n\ -\n\ -Return a copy of S converted to uppercase."); - -static PyObject* -unicode_upper(PyObject *self) -{ - return fixup(self, fixupper); -} - -PyDoc_STRVAR(zfill__doc__, - "S.zfill(width) -> str\n\ -\n\ -Pad a numeric string S with zeros on the left, to fill a field\n\ -of the specified width. The string S is never truncated."); - -static PyObject * -unicode_zfill(PyObject *self, PyObject *args) -{ - Py_ssize_t fill; - PyObject *u; - Py_ssize_t width; - int kind; - void *data; - Py_UCS4 chr; - - if (PyUnicode_READY(self) == -1) - return NULL; - - if (!PyArg_ParseTuple(args, "n:zfill", &width)) - return NULL; - - if (PyUnicode_GET_LENGTH(self) >= width) { - if (PyUnicode_CheckExact(self)) { - Py_INCREF(self); - return self; - } - else - return PyUnicode_Copy(self); - } - - fill = width - _PyUnicode_LENGTH(self); - - u = pad(self, fill, 0, '0'); - - if (u == NULL) - return NULL; - - kind = PyUnicode_KIND(u); - data = PyUnicode_DATA(u); - chr = PyUnicode_READ(kind, data, fill); - - if (chr == '+' || chr == '-') { - /* move sign to beginning of string */ - PyUnicode_WRITE(kind, data, 0, chr); - PyUnicode_WRITE(kind, data, fill, '0'); - } - - assert(_PyUnicode_CheckConsistency(u, 1)); - return u; -} - -#if 0 -static PyObject * -unicode__decimal2ascii(PyObject *self) -{ - return PyUnicode_TransformDecimalAndSpaceToASCII(self); -} -#endif - -PyDoc_STRVAR(startswith__doc__, - "S.startswith(prefix[, start[, end]]) -> bool\n\ -\n\ -Return True if S starts with the specified prefix, False otherwise.\n\ -With optional start, test S beginning at that position.\n\ -With optional end, stop comparing S at that position.\n\ -prefix can also be a tuple of strings to try."); - -static PyObject * -unicode_startswith(PyObject *self, - PyObject *args) -{ - PyObject *subobj; - PyObject *substring; - Py_ssize_t start = 0; - Py_ssize_t end = PY_SSIZE_T_MAX; - int result; - - if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end)) - return NULL; - if (PyTuple_Check(subobj)) { - Py_ssize_t i; - for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { - substring = PyUnicode_FromObject(PyTuple_GET_ITEM(subobj, i)); - if (substring == NULL) - return NULL; - result = tailmatch(self, substring, start, end, -1); - Py_DECREF(substring); - if (result) { - Py_RETURN_TRUE; - } - } - /* nothing matched */ - Py_RETURN_FALSE; - } - substring = PyUnicode_FromObject(subobj); - if (substring == NULL) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) - PyErr_Format(PyExc_TypeError, "startswith first arg must be str or " - "a tuple of str, not %s", Py_TYPE(subobj)->tp_name); - return NULL; - } - result = tailmatch(self, substring, start, end, -1); - Py_DECREF(substring); - return PyBool_FromLong(result); -} - - -PyDoc_STRVAR(endswith__doc__, - "S.endswith(suffix[, start[, end]]) -> bool\n\ -\n\ -Return True if S ends with the specified suffix, False otherwise.\n\ -With optional start, test S beginning at that position.\n\ -With optional end, stop comparing S at that position.\n\ -suffix can also be a tuple of strings to try."); - -static PyObject * -unicode_endswith(PyObject *self, - PyObject *args) -{ - PyObject *subobj; - PyObject *substring; - Py_ssize_t start = 0; - Py_ssize_t end = PY_SSIZE_T_MAX; - int result; - - if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end)) - return NULL; - if (PyTuple_Check(subobj)) { - Py_ssize_t i; - for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { - substring = PyUnicode_FromObject( - PyTuple_GET_ITEM(subobj, i)); - if (substring == NULL) - return NULL; - result = tailmatch(self, substring, start, end, +1); - Py_DECREF(substring); - if (result) { - Py_RETURN_TRUE; - } - } - Py_RETURN_FALSE; - } - substring = PyUnicode_FromObject(subobj); - if (substring == NULL) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) - PyErr_Format(PyExc_TypeError, "endswith first arg must be str or " - "a tuple of str, not %s", Py_TYPE(subobj)->tp_name); - return NULL; - } - result = tailmatch(self, substring, start, end, +1); - Py_DECREF(substring); - return PyBool_FromLong(result); -} - -#include "stringlib/unicode_format.h" - -PyDoc_STRVAR(format__doc__, - "S.format(*args, **kwargs) -> str\n\ -\n\ -Return a formatted version of S, using substitutions from args and kwargs.\n\ -The substitutions are identified by braces ('{' and '}')."); - -PyDoc_STRVAR(format_map__doc__, - "S.format_map(mapping) -> str\n\ -\n\ -Return a formatted version of S, using substitutions from mapping.\n\ -The substitutions are identified by braces ('{' and '}')."); - -static PyObject * -unicode__format__(PyObject* self, PyObject* args) -{ - PyObject *format_spec, *out; - - if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) - return NULL; - - out = _PyUnicode_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); - return out; -} - -PyDoc_STRVAR(p_format__doc__, - "S.__format__(format_spec) -> str\n\ -\n\ -Return a formatted version of S as described by format_spec."); - -static PyObject * -unicode__sizeof__(PyObject *v) -{ - Py_ssize_t size; - - /* If it's a compact object, account for base structure + - character data. */ - if (PyUnicode_IS_COMPACT_ASCII(v)) - size = sizeof(PyASCIIObject) + PyUnicode_GET_LENGTH(v) + 1; - else if (PyUnicode_IS_COMPACT(v)) - size = sizeof(PyCompactUnicodeObject) + - (PyUnicode_GET_LENGTH(v) + 1) * PyUnicode_KIND(v); - else { - /* If it is a two-block object, account for base object, and - for character block if present. */ - size = sizeof(PyUnicodeObject); - if (_PyUnicode_DATA_ANY(v)) - size += (PyUnicode_GET_LENGTH(v) + 1) * - PyUnicode_KIND(v); - } - /* If the wstr pointer is present, account for it unless it is shared - with the data pointer. Check if the data is not shared. */ - if (_PyUnicode_HAS_WSTR_MEMORY(v)) - size += (PyUnicode_WSTR_LENGTH(v) + 1) * sizeof(wchar_t); - if (_PyUnicode_HAS_UTF8_MEMORY(v)) - size += PyUnicode_UTF8_LENGTH(v) + 1; - - return PyLong_FromSsize_t(size); -} - -PyDoc_STRVAR(sizeof__doc__, - "S.__sizeof__() -> size of S in memory, in bytes"); - -static PyObject * -unicode_getnewargs(PyObject *v) -{ - PyObject *copy = PyUnicode_Copy(v); - if (!copy) - return NULL; - return Py_BuildValue("(N)", copy); -} - -static PyMethodDef unicode_methods[] = { - - /* Order is according to common usage: often used methods should - appear first, since lookup is done sequentially. */ - - {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__}, - {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__}, - {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__}, - {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__}, - {"join", (PyCFunction) unicode_join, METH_O, join__doc__}, - {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__}, - {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__}, - {"center", (PyCFunction) unicode_center, METH_VARARGS, center__doc__}, - {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__}, - {"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS, expandtabs__doc__}, - {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__}, - {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__}, - {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__}, - {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__}, - {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__}, - {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__}, - {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__}, - {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__}, - {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__}, - {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__}, - {"rpartition", (PyCFunction) unicode_rpartition, METH_O, rpartition__doc__}, - {"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS | METH_KEYWORDS, splitlines__doc__}, - {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__}, - {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__}, - {"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__}, - {"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__}, - {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__}, - {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__}, - {"islower", (PyCFunction) unicode_islower, METH_NOARGS, islower__doc__}, - {"isupper", (PyCFunction) unicode_isupper, METH_NOARGS, isupper__doc__}, - {"istitle", (PyCFunction) unicode_istitle, METH_NOARGS, istitle__doc__}, - {"isspace", (PyCFunction) unicode_isspace, METH_NOARGS, isspace__doc__}, - {"isdecimal", (PyCFunction) unicode_isdecimal, METH_NOARGS, isdecimal__doc__}, - {"isdigit", (PyCFunction) unicode_isdigit, METH_NOARGS, isdigit__doc__}, - {"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__}, - {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__}, - {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__}, - {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__}, - {"isprintable", (PyCFunction) unicode_isprintable, METH_NOARGS, isprintable__doc__}, - {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__}, - {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, - {"format_map", (PyCFunction) do_string_format_map, METH_O, format_map__doc__}, - {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__}, - {"maketrans", (PyCFunction) unicode_maketrans, - METH_VARARGS | METH_STATIC, maketrans__doc__}, - {"__sizeof__", (PyCFunction) unicode__sizeof__, METH_NOARGS, sizeof__doc__}, -#if 0 - {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__}, -#endif - -#if 0 - /* These methods are just used for debugging the implementation. */ - {"_decimal2ascii", (PyCFunction) unicode__decimal2ascii, METH_NOARGS}, -#endif - - {"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS}, - {NULL, NULL} -}; - -static PyObject * -unicode_mod(PyObject *v, PyObject *w) -{ - if (!PyUnicode_Check(v)) - Py_RETURN_NOTIMPLEMENTED; - return PyUnicode_Format(v, w); -} - -static PyNumberMethods unicode_as_number = { - 0, /*nb_add*/ - 0, /*nb_subtract*/ - 0, /*nb_multiply*/ - unicode_mod, /*nb_remainder*/ -}; - -static PySequenceMethods unicode_as_sequence = { - (lenfunc) unicode_length, /* sq_length */ - PyUnicode_Concat, /* sq_concat */ - (ssizeargfunc) unicode_repeat, /* sq_repeat */ - (ssizeargfunc) unicode_getitem, /* sq_item */ - 0, /* sq_slice */ - 0, /* sq_ass_item */ - 0, /* sq_ass_slice */ - PyUnicode_Contains, /* sq_contains */ -}; - -static PyObject* -unicode_subscript(PyObject* self, PyObject* item) -{ - if (PyUnicode_READY(self) == -1) - return NULL; - - if (PyIndex_Check(item)) { - Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); - if (i == -1 && PyErr_Occurred()) - return NULL; - if (i < 0) - i += PyUnicode_GET_LENGTH(self); - return unicode_getitem(self, i); - } else if (PySlice_Check(item)) { - Py_ssize_t start, stop, step, slicelength, cur, i; - PyObject *result; - void *src_data, *dest_data; - int src_kind, dest_kind; - Py_UCS4 ch, max_char, kind_limit; - - if (PySlice_GetIndicesEx(item, PyUnicode_GET_LENGTH(self), - &start, &stop, &step, &slicelength) < 0) { - return NULL; - } - - if (slicelength <= 0) { - return PyUnicode_New(0, 0); - } else if (start == 0 && step == 1 && - slicelength == PyUnicode_GET_LENGTH(self) && - PyUnicode_CheckExact(self)) { - Py_INCREF(self); - return self; - } else if (step == 1) { - return PyUnicode_Substring(self, - start, start + slicelength); - } - /* General case */ - src_kind = PyUnicode_KIND(self); - src_data = PyUnicode_DATA(self); - if (!PyUnicode_IS_ASCII(self)) { - kind_limit = kind_maxchar_limit(src_kind); - max_char = 0; - for (cur = start, i = 0; i < slicelength; cur += step, i++) { - ch = PyUnicode_READ(src_kind, src_data, cur); - if (ch > max_char) { - max_char = ch; - if (max_char >= kind_limit) - break; - } - } - } - else - max_char = 127; - result = PyUnicode_New(slicelength, max_char); - if (result == NULL) - return NULL; - dest_kind = PyUnicode_KIND(result); - dest_data = PyUnicode_DATA(result); - - for (cur = start, i = 0; i < slicelength; cur += step, i++) { - Py_UCS4 ch = PyUnicode_READ(src_kind, src_data, cur); - PyUnicode_WRITE(dest_kind, dest_data, i, ch); - } - assert(_PyUnicode_CheckConsistency(result, 1)); - return result; - } else { - PyErr_SetString(PyExc_TypeError, "string indices must be integers"); - return NULL; - } -} - -static PyMappingMethods unicode_as_mapping = { - (lenfunc)unicode_length, /* mp_length */ - (binaryfunc)unicode_subscript, /* mp_subscript */ - (objobjargproc)0, /* mp_ass_subscript */ -}; - - -/* Helpers for PyUnicode_Format() */ - -static PyObject * -getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) -{ - Py_ssize_t argidx = *p_argidx; - if (argidx < arglen) { - (*p_argidx)++; - if (arglen < 0) - return args; - else - return PyTuple_GetItem(args, argidx); - } - PyErr_SetString(PyExc_TypeError, - "not enough arguments for format string"); - return NULL; -} - -/* Returns a new reference to a PyUnicode object, or NULL on failure. */ - -static PyObject * -formatfloat(PyObject *v, int flags, int prec, int type) -{ - char *p; - PyObject *result; - double x; - - x = PyFloat_AsDouble(v); - if (x == -1.0 && PyErr_Occurred()) - return NULL; - - if (prec < 0) - prec = 6; - - p = PyOS_double_to_string(x, type, prec, - (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); - if (p == NULL) - return NULL; - result = PyUnicode_DecodeASCII(p, strlen(p), NULL); - PyMem_Free(p); - return result; -} - -static PyObject* -formatlong(PyObject *val, int flags, int prec, int type) -{ - char *buf; - int len; - PyObject *str; /* temporary string object. */ - PyObject *result; - - str = _PyBytes_FormatLong(val, flags, prec, type, &buf, &len); - if (!str) - return NULL; - result = PyUnicode_DecodeASCII(buf, len, NULL); - Py_DECREF(str); - return result; -} - -static Py_UCS4 -formatchar(PyObject *v) -{ - /* presume that the buffer is at least 3 characters long */ - if (PyUnicode_Check(v)) { - if (PyUnicode_GET_LENGTH(v) == 1) { - return PyUnicode_READ_CHAR(v, 0); - } - goto onError; - } - else { - /* Integer input truncated to a character */ - long x; - x = PyLong_AsLong(v); - if (x == -1 && PyErr_Occurred()) - goto onError; - - if (x < 0 || x > MAX_UNICODE) { - PyErr_SetString(PyExc_OverflowError, - "%c arg not in range(0x110000)"); - return (Py_UCS4) -1; - } - - return (Py_UCS4) x; - } - - onError: - PyErr_SetString(PyExc_TypeError, - "%c requires int or char"); - return (Py_UCS4) -1; -} - -static int -repeat_accumulate(_PyAccu *acc, PyObject *obj, Py_ssize_t count) -{ - int r; - assert(count > 0); - assert(PyUnicode_Check(obj)); - if (count > 5) { - PyObject *repeated = unicode_repeat(obj, count); - if (repeated == NULL) - return -1; - r = _PyAccu_Accumulate(acc, repeated); - Py_DECREF(repeated); - return r; - } - else { - do { - if (_PyAccu_Accumulate(acc, obj)) - return -1; - } while (--count); - return 0; - } -} - -PyObject * -PyUnicode_Format(PyObject *format, PyObject *args) -{ - void *fmt; - int fmtkind; - PyObject *result; - int kind; - int r; - Py_ssize_t fmtcnt, fmtpos, arglen, argidx; - int args_owned = 0; - PyObject *dict = NULL; - PyObject *temp = NULL; - PyObject *second = NULL; - PyObject *uformat; - _PyAccu acc; - static PyObject *plus, *minus, *blank, *zero, *percent; - - if (!plus && !(plus = get_latin1_char('+'))) - return NULL; - if (!minus && !(minus = get_latin1_char('-'))) - return NULL; - if (!blank && !(blank = get_latin1_char(' '))) - return NULL; - if (!zero && !(zero = get_latin1_char('0'))) - return NULL; - if (!percent && !(percent = get_latin1_char('%'))) - return NULL; - - if (format == NULL || args == NULL) { - PyErr_BadInternalCall(); - return NULL; - } - uformat = PyUnicode_FromObject(format); - if (uformat == NULL || PyUnicode_READY(uformat) == -1) - return NULL; - if (_PyAccu_Init(&acc)) - goto onError; - fmt = PyUnicode_DATA(uformat); - fmtkind = PyUnicode_KIND(uformat); - fmtcnt = PyUnicode_GET_LENGTH(uformat); - fmtpos = 0; - - if (PyTuple_Check(args)) { - arglen = PyTuple_Size(args); - argidx = 0; - } - else { - arglen = -1; - argidx = -2; - } - if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) && - !PyUnicode_Check(args)) - dict = args; - - while (--fmtcnt >= 0) { - if (PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') { - PyObject *nonfmt; - Py_ssize_t nonfmtpos; - nonfmtpos = fmtpos++; - while (fmtcnt >= 0 && - PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') { - fmtpos++; - fmtcnt--; - } - nonfmt = PyUnicode_Substring(uformat, nonfmtpos, fmtpos); - if (nonfmt == NULL) - goto onError; - r = _PyAccu_Accumulate(&acc, nonfmt); - Py_DECREF(nonfmt); - if (r) - goto onError; - } - else { - /* Got a format specifier */ - int flags = 0; - Py_ssize_t width = -1; - int prec = -1; - Py_UCS4 c = '\0'; - Py_UCS4 fill, sign; - int isnumok; - PyObject *v = NULL; - void *pbuf = NULL; - Py_ssize_t pindex, len; - PyObject *signobj = NULL, *fillobj = NULL; - - fmtpos++; - if (PyUnicode_READ(fmtkind, fmt, fmtpos) == '(') { - Py_ssize_t keystart; - Py_ssize_t keylen; - PyObject *key; - int pcount = 1; - - if (dict == NULL) { - PyErr_SetString(PyExc_TypeError, - "format requires a mapping"); - goto onError; - } - ++fmtpos; - --fmtcnt; - keystart = fmtpos; - /* Skip over balanced parentheses */ - while (pcount > 0 && --fmtcnt >= 0) { - if (PyUnicode_READ(fmtkind, fmt, fmtpos) == ')') - --pcount; - else if (PyUnicode_READ(fmtkind, fmt, fmtpos) == '(') - ++pcount; - fmtpos++; - } - keylen = fmtpos - keystart - 1; - if (fmtcnt < 0 || pcount > 0) { - PyErr_SetString(PyExc_ValueError, - "incomplete format key"); - goto onError; - } - key = PyUnicode_Substring(uformat, - keystart, keystart + keylen); - if (key == NULL) - goto onError; - if (args_owned) { - Py_DECREF(args); - args_owned = 0; - } - args = PyObject_GetItem(dict, key); - Py_DECREF(key); - if (args == NULL) { - goto onError; - } - args_owned = 1; - arglen = -1; - argidx = -2; - } - while (--fmtcnt >= 0) { - switch (c = PyUnicode_READ(fmtkind, fmt, fmtpos++)) { - case '-': flags |= F_LJUST; continue; - case '+': flags |= F_SIGN; continue; - case ' ': flags |= F_BLANK; continue; - case '#': flags |= F_ALT; continue; - case '0': flags |= F_ZERO; continue; - } - break; - } - if (c == '*') { - v = getnextarg(args, arglen, &argidx); - if (v == NULL) - goto onError; - if (!PyLong_Check(v)) { - PyErr_SetString(PyExc_TypeError, - "* wants int"); - goto onError; - } - width = PyLong_AsLong(v); - if (width == -1 && PyErr_Occurred()) - goto onError; - if (width < 0) { - flags |= F_LJUST; - width = -width; - } - if (--fmtcnt >= 0) - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - } - else if (c >= '0' && c <= '9') { - width = c - '0'; - while (--fmtcnt >= 0) { - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - if (c < '0' || c > '9') - break; - if ((width*10) / 10 != width) { - PyErr_SetString(PyExc_ValueError, - "width too big"); - goto onError; - } - width = width*10 + (c - '0'); - } - } - if (c == '.') { - prec = 0; - if (--fmtcnt >= 0) - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - if (c == '*') { - v = getnextarg(args, arglen, &argidx); - if (v == NULL) - goto onError; - if (!PyLong_Check(v)) { - PyErr_SetString(PyExc_TypeError, - "* wants int"); - goto onError; - } - prec = PyLong_AsLong(v); - if (prec == -1 && PyErr_Occurred()) - goto onError; - if (prec < 0) - prec = 0; - if (--fmtcnt >= 0) - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - } - else if (c >= '0' && c <= '9') { - prec = c - '0'; - while (--fmtcnt >= 0) { - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - if (c < '0' || c > '9') - break; - if ((prec*10) / 10 != prec) { - PyErr_SetString(PyExc_ValueError, - "prec too big"); - goto onError; - } - prec = prec*10 + (c - '0'); - } - } - } /* prec */ - if (fmtcnt >= 0) { - if (c == 'h' || c == 'l' || c == 'L') { - if (--fmtcnt >= 0) - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - } - } - if (fmtcnt < 0) { - PyErr_SetString(PyExc_ValueError, - "incomplete format"); - goto onError; - } - if (c != '%') { - v = getnextarg(args, arglen, &argidx); - if (v == NULL) - goto onError; - } - sign = 0; - fill = ' '; - fillobj = blank; - switch (c) { - - case '%': - _PyAccu_Accumulate(&acc, percent); - continue; - - case 's': - case 'r': - case 'a': - if (PyUnicode_CheckExact(v) && c == 's') { - temp = v; - Py_INCREF(temp); - } - else { - if (c == 's') - temp = PyObject_Str(v); - else if (c == 'r') - temp = PyObject_Repr(v); - else - temp = PyObject_ASCII(v); - if (temp == NULL) - goto onError; - if (PyUnicode_Check(temp)) - /* nothing to do */; - else { - Py_DECREF(temp); - PyErr_SetString(PyExc_TypeError, - "%s argument has non-string str()"); - goto onError; - } - } - if (PyUnicode_READY(temp) == -1) { - Py_CLEAR(temp); - goto onError; - } - pbuf = PyUnicode_DATA(temp); - kind = PyUnicode_KIND(temp); - len = PyUnicode_GET_LENGTH(temp); - if (prec >= 0 && len > prec) - len = prec; - break; - - case 'i': - case 'd': - case 'u': - case 'o': - case 'x': - case 'X': - isnumok = 0; - if (PyNumber_Check(v)) { - PyObject *iobj=NULL; - - if (PyLong_Check(v)) { - iobj = v; - Py_INCREF(iobj); - } - else { - iobj = PyNumber_Long(v); - } - if (iobj!=NULL) { - if (PyLong_Check(iobj)) { - isnumok = 1; - temp = formatlong(iobj, flags, prec, (c == 'i'? 'd': c)); - Py_DECREF(iobj); - if (!temp) - goto onError; - if (PyUnicode_READY(temp) == -1) { - Py_CLEAR(temp); - goto onError; - } - pbuf = PyUnicode_DATA(temp); - kind = PyUnicode_KIND(temp); - len = PyUnicode_GET_LENGTH(temp); - sign = 1; - } - else { - Py_DECREF(iobj); - } - } - } - if (!isnumok) { - PyErr_Format(PyExc_TypeError, - "%%%c format: a number is required, " - "not %.200s", (char)c, Py_TYPE(v)->tp_name); - goto onError; - } - if (flags & F_ZERO) { - fill = '0'; - fillobj = zero; - } - break; - - case 'e': - case 'E': - case 'f': - case 'F': - case 'g': - case 'G': - temp = formatfloat(v, flags, prec, c); - if (!temp) - goto onError; - if (PyUnicode_READY(temp) == -1) { - Py_CLEAR(temp); - goto onError; - } - pbuf = PyUnicode_DATA(temp); - kind = PyUnicode_KIND(temp); - len = PyUnicode_GET_LENGTH(temp); - sign = 1; - if (flags & F_ZERO) { - fill = '0'; - fillobj = zero; - } - break; - - case 'c': - { - Py_UCS4 ch = formatchar(v); - if (ch == (Py_UCS4) -1) - goto onError; - temp = _PyUnicode_FromUCS4(&ch, 1); - if (temp == NULL) - goto onError; - pbuf = PyUnicode_DATA(temp); - kind = PyUnicode_KIND(temp); - len = PyUnicode_GET_LENGTH(temp); - break; - } - - default: - PyErr_Format(PyExc_ValueError, - "unsupported format character '%c' (0x%x) " - "at index %zd", - (31<=c && c<=126) ? (char)c : '?', - (int)c, - fmtpos - 1); - goto onError; - } - /* pbuf is initialized here. */ - pindex = 0; - if (sign) { - if (PyUnicode_READ(kind, pbuf, pindex) == '-') { - signobj = minus; - len--; - pindex++; - } - else if (PyUnicode_READ(kind, pbuf, pindex) == '+') { - signobj = plus; - len--; - pindex++; - } - else if (flags & F_SIGN) - signobj = plus; - else if (flags & F_BLANK) - signobj = blank; - else - sign = 0; - } - if (width < len) - width = len; - if (sign) { - if (fill != ' ') { - assert(signobj != NULL); - if (_PyAccu_Accumulate(&acc, signobj)) - goto onError; - } - if (width > len) - width--; - } - if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) { - assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); - assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c); - if (fill != ' ') { - second = get_latin1_char( - PyUnicode_READ(kind, pbuf, pindex + 1)); - pindex += 2; - if (second == NULL || - _PyAccu_Accumulate(&acc, zero) || - _PyAccu_Accumulate(&acc, second)) - goto onError; - Py_CLEAR(second); - } - width -= 2; - if (width < 0) - width = 0; - len -= 2; - } - if (width > len && !(flags & F_LJUST)) { - assert(fillobj != NULL); - if (repeat_accumulate(&acc, fillobj, width - len)) - goto onError; - width = len; - } - if (fill == ' ') { - if (sign) { - assert(signobj != NULL); - if (_PyAccu_Accumulate(&acc, signobj)) - goto onError; - } - if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) { - assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); - assert(PyUnicode_READ(kind, pbuf, pindex+1) == c); - second = get_latin1_char( - PyUnicode_READ(kind, pbuf, pindex + 1)); - pindex += 2; - if (second == NULL || - _PyAccu_Accumulate(&acc, zero) || - _PyAccu_Accumulate(&acc, second)) - goto onError; - Py_CLEAR(second); - } - } - /* Copy all characters, preserving len */ - if (temp != NULL) { - assert(pbuf == PyUnicode_DATA(temp)); - v = PyUnicode_Substring(temp, pindex, pindex + len); - } - else { - const char *p = (const char *) pbuf; - assert(pbuf != NULL); - p += kind * pindex; - v = PyUnicode_FromKindAndData(kind, p, len); - } - if (v == NULL) - goto onError; - r = _PyAccu_Accumulate(&acc, v); - Py_DECREF(v); - if (r) - goto onError; - if (width > len && repeat_accumulate(&acc, blank, width - len)) - goto onError; - if (dict && (argidx < arglen) && c != '%') { - PyErr_SetString(PyExc_TypeError, - "not all arguments converted during string formatting"); - goto onError; - } - Py_CLEAR(temp); - } /* '%' */ - } /* until end */ - if (argidx < arglen && !dict) { - PyErr_SetString(PyExc_TypeError, - "not all arguments converted during string formatting"); - goto onError; - } - - result = _PyAccu_Finish(&acc); - if (args_owned) { - Py_DECREF(args); - } - Py_DECREF(uformat); - Py_XDECREF(temp); - Py_XDECREF(second); - return result; - - onError: - Py_DECREF(uformat); - Py_XDECREF(temp); - Py_XDECREF(second); - _PyAccu_Destroy(&acc); - if (args_owned) { - Py_DECREF(args); - } - return NULL; -} - -static PyObject * -unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); - -static PyObject * -unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyObject *x = NULL; - static char *kwlist[] = {"object", "encoding", "errors", 0}; - char *encoding = NULL; - char *errors = NULL; - - if (type != &PyUnicode_Type) - return unicode_subtype_new(type, args, kwds); - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str", - kwlist, &x, &encoding, &errors)) - return NULL; - if (x == NULL) - return PyUnicode_New(0, 0); - if (encoding == NULL && errors == NULL) - return PyObject_Str(x); - else - return PyUnicode_FromEncodedObject(x, encoding, errors); -} - -static PyObject * -unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyObject *unicode, *self; - Py_ssize_t length, char_size; - int share_wstr, share_utf8; - unsigned int kind; - void *data; - - assert(PyType_IsSubtype(type, &PyUnicode_Type)); - - unicode = unicode_new(&PyUnicode_Type, args, kwds); - if (unicode == NULL) - return NULL; - assert(_PyUnicode_CHECK(unicode)); - if (PyUnicode_READY(unicode)) - return NULL; - - self = type->tp_alloc(type, 0); - if (self == NULL) { - Py_DECREF(unicode); - return NULL; - } - kind = PyUnicode_KIND(unicode); - length = PyUnicode_GET_LENGTH(unicode); - - _PyUnicode_LENGTH(self) = length; -#ifdef Py_DEBUG - _PyUnicode_HASH(self) = -1; -#else - _PyUnicode_HASH(self) = _PyUnicode_HASH(unicode); -#endif - _PyUnicode_STATE(self).interned = 0; - _PyUnicode_STATE(self).kind = kind; - _PyUnicode_STATE(self).compact = 0; - _PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii; - _PyUnicode_STATE(self).ready = 1; - _PyUnicode_WSTR(self) = NULL; - _PyUnicode_UTF8_LENGTH(self) = 0; - _PyUnicode_UTF8(self) = NULL; - _PyUnicode_WSTR_LENGTH(self) = 0; - _PyUnicode_DATA_ANY(self) = NULL; - - share_utf8 = 0; - share_wstr = 0; - if (kind == PyUnicode_1BYTE_KIND) { - char_size = 1; - if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128) - share_utf8 = 1; - } - else if (kind == PyUnicode_2BYTE_KIND) { - char_size = 2; - if (sizeof(wchar_t) == 2) - share_wstr = 1; - } - else { - assert(kind == PyUnicode_4BYTE_KIND); - char_size = 4; - if (sizeof(wchar_t) == 4) - share_wstr = 1; - } - - /* Ensure we won't overflow the length. */ - if (length > (PY_SSIZE_T_MAX / char_size - 1)) { - PyErr_NoMemory(); - goto onError; - } - data = PyObject_MALLOC((length + 1) * char_size); - if (data == NULL) { - PyErr_NoMemory(); - goto onError; - } - - _PyUnicode_DATA_ANY(self) = data; - if (share_utf8) { - _PyUnicode_UTF8_LENGTH(self) = length; - _PyUnicode_UTF8(self) = data; - } - if (share_wstr) { - _PyUnicode_WSTR_LENGTH(self) = length; - _PyUnicode_WSTR(self) = (wchar_t *)data; - } - - Py_MEMCPY(data, PyUnicode_DATA(unicode), - kind * (length + 1)); - assert(_PyUnicode_CheckConsistency(self, 1)); -#ifdef Py_DEBUG - _PyUnicode_HASH(self) = _PyUnicode_HASH(unicode); -#endif - Py_DECREF(unicode); - return self; - -onError: - Py_DECREF(unicode); - Py_DECREF(self); - return NULL; -} - -PyDoc_STRVAR(unicode_doc, - "str(string[, encoding[, errors]]) -> str\n\ -\n\ -Create a new string object from the given encoded string.\n\ -encoding defaults to the current default string encoding.\n\ -errors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'."); - -static PyObject *unicode_iter(PyObject *seq); - -PyTypeObject PyUnicode_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "str", /* tp_name */ - sizeof(PyUnicodeObject), /* tp_size */ - 0, /* tp_itemsize */ - /* Slots */ - (destructor)unicode_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - unicode_repr, /* tp_repr */ - &unicode_as_number, /* tp_as_number */ - &unicode_as_sequence, /* tp_as_sequence */ - &unicode_as_mapping, /* tp_as_mapping */ - (hashfunc) unicode_hash, /* tp_hash*/ - 0, /* tp_call*/ - (reprfunc) unicode_str, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | - Py_TPFLAGS_UNICODE_SUBCLASS, /* tp_flags */ - unicode_doc, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - PyUnicode_RichCompare, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - unicode_iter, /* tp_iter */ - 0, /* tp_iternext */ - unicode_methods, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - &PyBaseObject_Type, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - unicode_new, /* tp_new */ - PyObject_Del, /* tp_free */ -}; - -/* Initialize the Unicode implementation */ - -int _PyUnicode_Init(void) -{ - int i; - - /* XXX - move this array to unicodectype.c ? */ - Py_UCS2 linebreak[] = { - 0x000A, /* LINE FEED */ - 0x000D, /* CARRIAGE RETURN */ - 0x001C, /* FILE SEPARATOR */ - 0x001D, /* GROUP SEPARATOR */ - 0x001E, /* RECORD SEPARATOR */ - 0x0085, /* NEXT LINE */ - 0x2028, /* LINE SEPARATOR */ - 0x2029, /* PARAGRAPH SEPARATOR */ - }; - - /* Init the implementation */ - unicode_empty = PyUnicode_New(0, 0); - if (!unicode_empty) - Py_FatalError("Can't create empty string"); - assert(_PyUnicode_CheckConsistency(unicode_empty, 1)); - - for (i = 0; i < 256; i++) - unicode_latin1[i] = NULL; - if (PyType_Ready(&PyUnicode_Type) < 0) - Py_FatalError("Can't initialize 'unicode'"); - - /* initialize the linebreak bloom filter */ - bloom_linebreak = make_bloom_mask( - PyUnicode_2BYTE_KIND, linebreak, - Py_ARRAY_LENGTH(linebreak)); - - PyType_Ready(&EncodingMapType); - -#ifdef HAVE_MBCS - winver.dwOSVersionInfoSize = sizeof(winver); - if (!GetVersionEx((OSVERSIONINFO*)&winver)) { - PyErr_SetFromWindowsErr(0); - return -1; - } -#endif - return 0; -} - -/* Finalize the Unicode implementation */ - -int -PyUnicode_ClearFreeList(void) -{ - return 0; -} - -void -_PyUnicode_Fini(void) -{ - int i; - - Py_XDECREF(unicode_empty); - unicode_empty = NULL; - - for (i = 0; i < 256; i++) { - if (unicode_latin1[i]) { - Py_DECREF(unicode_latin1[i]); - unicode_latin1[i] = NULL; - } - } - _PyUnicode_ClearStaticStrings(); - (void)PyUnicode_ClearFreeList(); -} - -void -PyUnicode_InternInPlace(PyObject **p) -{ - register PyObject *s = *p; - PyObject *t; -#ifdef Py_DEBUG - assert(s != NULL); - assert(_PyUnicode_CHECK(s)); -#else - if (s == NULL || !PyUnicode_Check(s)) - return; -#endif - /* If it's a subclass, we don't really know what putting - it in the interned dict might do. */ - if (!PyUnicode_CheckExact(s)) - return; - if (PyUnicode_CHECK_INTERNED(s)) - return; - if (interned == NULL) { - interned = PyDict_New(); - if (interned == NULL) { - PyErr_Clear(); /* Don't leave an exception */ - return; - } - } - /* It might be that the GetItem call fails even - though the key is present in the dictionary, - namely when this happens during a stack overflow. */ - Py_ALLOW_RECURSION - t = PyDict_GetItem(interned, s); - Py_END_ALLOW_RECURSION - - if (t) { - Py_INCREF(t); - Py_DECREF(*p); - *p = t; - return; - } - - PyThreadState_GET()->recursion_critical = 1; - if (PyDict_SetItem(interned, s, s) < 0) { - PyErr_Clear(); - PyThreadState_GET()->recursion_critical = 0; - return; - } - PyThreadState_GET()->recursion_critical = 0; - /* The two references in interned are not counted by refcnt. - The deallocator will take care of this */ - Py_REFCNT(s) -= 2; - _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL; -} - -void -PyUnicode_InternImmortal(PyObject **p) -{ - PyUnicode_InternInPlace(p); - if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { - _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL; - Py_INCREF(*p); - } -} - -PyObject * -PyUnicode_InternFromString(const char *cp) -{ - PyObject *s = PyUnicode_FromString(cp); - if (s == NULL) - return NULL; - PyUnicode_InternInPlace(&s); - return s; -} - -void -_Py_ReleaseInternedUnicodeStrings(void) -{ - PyObject *keys; - PyObject *s; - Py_ssize_t i, n; - Py_ssize_t immortal_size = 0, mortal_size = 0; - - if (interned == NULL || !PyDict_Check(interned)) - return; - keys = PyDict_Keys(interned); - if (keys == NULL || !PyList_Check(keys)) { - PyErr_Clear(); - return; - } - - /* Since _Py_ReleaseInternedUnicodeStrings() is intended to help a leak - detector, interned unicode strings are not forcibly deallocated; - rather, we give them their stolen references back, and then clear - and DECREF the interned dict. */ - - n = PyList_GET_SIZE(keys); - fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n", - n); - for (i = 0; i < n; i++) { - s = PyList_GET_ITEM(keys, i); - if (PyUnicode_READY(s) == -1) { - assert(0 && "could not ready string"); - fprintf(stderr, "could not ready string\n"); - } - switch (PyUnicode_CHECK_INTERNED(s)) { - case SSTATE_NOT_INTERNED: - /* XXX Shouldn't happen */ - break; - case SSTATE_INTERNED_IMMORTAL: - Py_REFCNT(s) += 1; - immortal_size += PyUnicode_GET_LENGTH(s); - break; - case SSTATE_INTERNED_MORTAL: - Py_REFCNT(s) += 2; - mortal_size += PyUnicode_GET_LENGTH(s); - break; - default: - Py_FatalError("Inconsistent interned string state."); - } - _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED; - } - fprintf(stderr, "total size of all interned strings: " - "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d " - "mortal/immortal\n", mortal_size, immortal_size); - Py_DECREF(keys); - PyDict_Clear(interned); - Py_DECREF(interned); - interned = NULL; -} - - -/********************* Unicode Iterator **************************/ - -typedef struct { - PyObject_HEAD - Py_ssize_t it_index; - PyObject *it_seq; /* Set to NULL when iterator is exhausted */ -} unicodeiterobject; - -static void -unicodeiter_dealloc(unicodeiterobject *it) -{ - _PyObject_GC_UNTRACK(it); - Py_XDECREF(it->it_seq); - PyObject_GC_Del(it); -} - -static int -unicodeiter_traverse(unicodeiterobject *it, visitproc visit, void *arg) -{ - Py_VISIT(it->it_seq); - return 0; -} - -static PyObject * -unicodeiter_next(unicodeiterobject *it) -{ - PyObject *seq, *item; - - assert(it != NULL); - seq = it->it_seq; - if (seq == NULL) - return NULL; - assert(_PyUnicode_CHECK(seq)); - - if (it->it_index < PyUnicode_GET_LENGTH(seq)) { - int kind = PyUnicode_KIND(seq); - void *data = PyUnicode_DATA(seq); - Py_UCS4 chr = PyUnicode_READ(kind, data, it->it_index); - item = PyUnicode_FromOrdinal(chr); - if (item != NULL) - ++it->it_index; - return item; - } - - Py_DECREF(seq); - it->it_seq = NULL; - return NULL; -} - -static PyObject * -unicodeiter_len(unicodeiterobject *it) -{ - Py_ssize_t len = 0; - if (it->it_seq) - len = PyUnicode_GET_LENGTH(it->it_seq) - it->it_index; - return PyLong_FromSsize_t(len); -} - -PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); - -static PyMethodDef unicodeiter_methods[] = { - {"__length_hint__", (PyCFunction)unicodeiter_len, METH_NOARGS, - length_hint_doc}, - {NULL, NULL} /* sentinel */ -}; - -PyTypeObject PyUnicodeIter_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "str_iterator", /* tp_name */ - sizeof(unicodeiterobject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)unicodeiter_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ - 0, /* tp_doc */ - (traverseproc)unicodeiter_traverse, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - PyObject_SelfIter, /* tp_iter */ - (iternextfunc)unicodeiter_next, /* tp_iternext */ - unicodeiter_methods, /* tp_methods */ - 0, -}; - -static PyObject * -unicode_iter(PyObject *seq) -{ - unicodeiterobject *it; - - if (!PyUnicode_Check(seq)) { - PyErr_BadInternalCall(); - return NULL; - } - if (PyUnicode_READY(seq) == -1) - return NULL; - it = PyObject_GC_New(unicodeiterobject, &PyUnicodeIter_Type); - if (it == NULL) - return NULL; - it->it_index = 0; - Py_INCREF(seq); - it->it_seq = seq; - _PyObject_GC_TRACK(it); - return (PyObject *)it; -} - - -size_t -Py_UNICODE_strlen(const Py_UNICODE *u) -{ - int res = 0; - while(*u++) - res++; - return res; -} - -Py_UNICODE* -Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2) -{ - Py_UNICODE *u = s1; - while ((*u++ = *s2++)); - return s1; -} - -Py_UNICODE* -Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) -{ - Py_UNICODE *u = s1; - while ((*u++ = *s2++)) - if (n-- == 0) - break; - return s1; -} - -Py_UNICODE* -Py_UNICODE_strcat(Py_UNICODE *s1, const Py_UNICODE *s2) -{ - Py_UNICODE *u1 = s1; - u1 += Py_UNICODE_strlen(u1); - Py_UNICODE_strcpy(u1, s2); - return s1; -} - -int -Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2) -{ - while (*s1 && *s2 && *s1 == *s2) - s1++, s2++; - if (*s1 && *s2) - return (*s1 < *s2) ? -1 : +1; - if (*s1) - return 1; - if (*s2) - return -1; - return 0; -} - -int -Py_UNICODE_strncmp(const Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) -{ - register Py_UNICODE u1, u2; - for (; n != 0; n--) { - u1 = *s1; - u2 = *s2; - if (u1 != u2) - return (u1 < u2) ? -1 : +1; - if (u1 == '\0') - return 0; - s1++; - s2++; - } - return 0; -} - -Py_UNICODE* -Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c) -{ - const Py_UNICODE *p; - for (p = s; *p; p++) - if (*p == c) - return (Py_UNICODE*)p; - return NULL; -} - -Py_UNICODE* -Py_UNICODE_strrchr(const Py_UNICODE *s, Py_UNICODE c) -{ - const Py_UNICODE *p; - p = s + Py_UNICODE_strlen(s); - while (p != s) { - p--; - if (*p == c) - return (Py_UNICODE*)p; - } - return NULL; -} - -Py_UNICODE* -PyUnicode_AsUnicodeCopy(PyObject *unicode) -{ - Py_UNICODE *u, *copy; - Py_ssize_t len, size; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - u = PyUnicode_AsUnicodeAndSize(unicode, &len); - if (u == NULL) - return NULL; - /* Ensure we won't overflow the size. */ - if (len > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) { - PyErr_NoMemory(); - return NULL; - } - size = len + 1; /* copy the null character */ - size *= sizeof(Py_UNICODE); - copy = PyMem_Malloc(size); - if (copy == NULL) { - PyErr_NoMemory(); - return NULL; - } - memcpy(copy, u, size); - return copy; -} - -/* A _string module, to export formatter_parser and formatter_field_name_split - to the string.Formatter class implemented in Python. */ - -static PyMethodDef _string_methods[] = { - {"formatter_field_name_split", (PyCFunction) formatter_field_name_split, - METH_O, PyDoc_STR("split the argument as a field name")}, - {"formatter_parser", (PyCFunction) formatter_parser, - METH_O, PyDoc_STR("parse the argument as a format string")}, - {NULL, NULL} -}; - -static struct PyModuleDef _string_module = { - PyModuleDef_HEAD_INIT, - "_string", - PyDoc_STR("string helper module"), - 0, - _string_methods, - NULL, - NULL, - NULL, - NULL -}; - -PyMODINIT_FUNC -PyInit__string(void) -{ - return PyModule_Create(&_string_module); -} - -
--- a/cos/python/Python/bltinmodule.c Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2413 +0,0 @@ -/* Built-in functions */ - -#include "Python.h" -#include "Python-ast.h" - -#include "node.h" -#include "code.h" - -#include "asdl.h" -#include "ast.h" - -#include <ctype.h> - -#ifdef HAVE_LANGINFO_H -#include <langinfo.h> /* CODESET */ -#endif - -/* The default encoding used by the platform file system APIs - Can remain NULL for all platforms that don't have such a concept - - Don't forget to modify PyUnicode_DecodeFSDefault() if you touch any of the - values for Py_FileSystemDefaultEncoding! -*/ -#ifdef HAVE_MBCS -const char *Py_FileSystemDefaultEncoding = "mbcs"; -int Py_HasFileSystemDefaultEncoding = 1; -#elif defined(__APPLE__) -const char *Py_FileSystemDefaultEncoding = "utf-8"; -int Py_HasFileSystemDefaultEncoding = 1; -#else -const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */ -int Py_HasFileSystemDefaultEncoding = 0; -#endif - -_Py_IDENTIFIER(fileno); -_Py_IDENTIFIER(flush); - -static PyObject * -builtin___build_class__(PyObject *self, PyObject *args, PyObject *kwds) -{ - PyObject *func, *name, *bases, *mkw, *meta, *winner, *prep, *ns, *cell; - PyObject *cls = NULL; - Py_ssize_t nargs; - int isclass; - _Py_IDENTIFIER(__prepare__); - - assert(args != NULL); - if (!PyTuple_Check(args)) { - PyErr_SetString(PyExc_TypeError, - "__build_class__: args is not a tuple"); - return NULL; - } - nargs = PyTuple_GET_SIZE(args); - if (nargs < 2) { - PyErr_SetString(PyExc_TypeError, - "__build_class__: not enough arguments"); - return NULL; - } - func = PyTuple_GET_ITEM(args, 0); /* Better be callable */ - name = PyTuple_GET_ITEM(args, 1); - if (!PyUnicode_Check(name)) { - PyErr_SetString(PyExc_TypeError, - "__build_class__: name is not a string"); - return NULL; - } - bases = PyTuple_GetSlice(args, 2, nargs); - if (bases == NULL) - return NULL; - - if (kwds == NULL) { - meta = NULL; - mkw = NULL; - } - else { - mkw = PyDict_Copy(kwds); /* Don't modify kwds passed in! */ - if (mkw == NULL) { - Py_DECREF(bases); - return NULL; - } - meta = PyDict_GetItemString(mkw, "metaclass"); - if (meta != NULL) { - Py_INCREF(meta); - if (PyDict_DelItemString(mkw, "metaclass") < 0) { - Py_DECREF(meta); - Py_DECREF(mkw); - Py_DECREF(bases); - return NULL; - } - /* metaclass is explicitly given, check if it's indeed a class */ - isclass = PyType_Check(meta); - } - } - if (meta == NULL) { - /* if there are no bases, use type: */ - if (PyTuple_GET_SIZE(bases) == 0) { - meta = (PyObject *) (&PyType_Type); - } - /* else get the type of the first base */ - else { - PyObject *base0 = PyTuple_GET_ITEM(bases, 0); - meta = (PyObject *) (base0->ob_type); - } - Py_INCREF(meta); - isclass = 1; /* meta is really a class */ - } - - if (isclass) { - /* meta is really a class, so check for a more derived - metaclass, or possible metaclass conflicts: */ - winner = (PyObject *)_PyType_CalculateMetaclass((PyTypeObject *)meta, - bases); - if (winner == NULL) { - Py_DECREF(meta); - Py_XDECREF(mkw); - Py_DECREF(bases); - return NULL; - } - if (winner != meta) { - Py_DECREF(meta); - meta = winner; - Py_INCREF(meta); - } - } - /* else: meta is not a class, so we cannot do the metaclass - calculation, so we will use the explicitly given object as it is */ - prep = _PyObject_GetAttrId(meta, &PyId___prepare__); - if (prep == NULL) { - if (PyErr_ExceptionMatches(PyExc_AttributeError)) { - PyErr_Clear(); - ns = PyDict_New(); - } - else { - Py_DECREF(meta); - Py_XDECREF(mkw); - Py_DECREF(bases); - return NULL; - } - } - else { - PyObject *pargs = PyTuple_Pack(2, name, bases); - if (pargs == NULL) { - Py_DECREF(prep); - Py_DECREF(meta); - Py_XDECREF(mkw); - Py_DECREF(bases); - return NULL; - } - ns = PyEval_CallObjectWithKeywords(prep, pargs, mkw); - Py_DECREF(pargs); - Py_DECREF(prep); - } - if (ns == NULL) { - Py_DECREF(meta); - Py_XDECREF(mkw); - Py_DECREF(bases); - return NULL; - } - cell = PyObject_CallFunctionObjArgs(func, ns, NULL); - if (cell != NULL) { - PyObject *margs; - margs = PyTuple_Pack(3, name, bases, ns); - if (margs != NULL) { - cls = PyEval_CallObjectWithKeywords(meta, margs, mkw); - Py_DECREF(margs); - } - if (cls != NULL && PyCell_Check(cell)) { - Py_INCREF(cls); - PyCell_SET(cell, cls); - } - Py_DECREF(cell); - } - Py_DECREF(ns); - Py_DECREF(meta); - Py_XDECREF(mkw); - Py_DECREF(bases); - return cls; -} - -PyDoc_STRVAR(build_class_doc, -"__build_class__(func, name, *bases, metaclass=None, **kwds) -> class\n\ -\n\ -Internal helper function used by the class statement."); - -static PyObject * -builtin___import__(PyObject *self, PyObject *args, PyObject *kwds) -{ - static char *kwlist[] = {"name", "globals", "locals", "fromlist", - "level", 0}; - PyObject *name, *globals = NULL, *locals = NULL, *fromlist = NULL; - int level = -1; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "U|OOOi:__import__", - kwlist, &name, &globals, &locals, &fromlist, &level)) - return NULL; - return PyImport_ImportModuleLevelObject(name, globals, locals, - fromlist, level); -} - -PyDoc_STRVAR(import_doc, -"__import__(name, globals={}, locals={}, fromlist=[], level=-1) -> module\n\ -\n\ -Import a module. Because this function is meant for use by the Python\n\ -interpreter and not for general use it is better to use\n\ -importlib.import_module() to programmatically import a module.\n\ -\n\ -The globals argument is only used to determine the context;\n\ -they are not modified. The locals argument is unused. The fromlist\n\ -should be a list of names to emulate ``from name import ...'', or an\n\ -empty list to emulate ``import name''.\n\ -When importing a module from a package, note that __import__('A.B', ...)\n\ -returns package A when fromlist is empty, but its submodule B when\n\ -fromlist is not empty. Level is used to determine whether to perform \n\ -absolute or relative imports. -1 is the original strategy of attempting\n\ -both absolute and relative imports, 0 is absolute, a positive number\n\ -is the number of parent directories to search relative to the current module."); - - -static PyObject * -builtin_abs(PyObject *self, PyObject *v) -{ - return PyNumber_Absolute(v); -} - -PyDoc_STRVAR(abs_doc, -"abs(number) -> number\n\ -\n\ -Return the absolute value of the argument."); - -static PyObject * -builtin_all(PyObject *self, PyObject *v) -{ - PyObject *it, *item; - PyObject *(*iternext)(PyObject *); - int cmp; - - it = PyObject_GetIter(v); - if (it == NULL) - return NULL; - iternext = *Py_TYPE(it)->tp_iternext; - - for (;;) { - item = iternext(it); - if (item == NULL) - break; - cmp = PyObject_IsTrue(item); - Py_DECREF(item); - if (cmp < 0) { - Py_DECREF(it); - return NULL; - } - if (cmp == 0) { - Py_DECREF(it); - Py_RETURN_FALSE; - } - } - Py_DECREF(it); - if (PyErr_Occurred()) { - if (PyErr_ExceptionMatches(PyExc_StopIteration)) - PyErr_Clear(); - else - return NULL; - } - Py_RETURN_TRUE; -} - -PyDoc_STRVAR(all_doc, -"all(iterable) -> bool\n\ -\n\ -Return True if bool(x) is True for all values x in the iterable."); - -static PyObject * -builtin_any(PyObject *self, PyObject *v) -{ - PyObject *it, *item; - PyObject *(*iternext)(PyObject *); - int cmp; - - it = PyObject_GetIter(v); - if (it == NULL) - return NULL; - iternext = *Py_TYPE(it)->tp_iternext; - - for (;;) { - item = iternext(it); - if (item == NULL) - break; - cmp = PyObject_IsTrue(item); - Py_DECREF(item); - if (cmp < 0) { - Py_DECREF(it); - return NULL; - } - if (cmp == 1) { - Py_DECREF(it); - Py_RETURN_TRUE; - } - } - Py_DECREF(it); - if (PyErr_Occurred()) { - if (PyErr_ExceptionMatches(PyExc_StopIteration)) - PyErr_Clear(); - else - return NULL; - } - Py_RETURN_FALSE; -} - -PyDoc_STRVAR(any_doc, -"any(iterable) -> bool\n\ -\n\ -Return True if bool(x) is True for any x in the iterable."); - -static PyObject * -builtin_ascii(PyObject *self, PyObject *v) -{ - return PyObject_ASCII(v); -} - -PyDoc_STRVAR(ascii_doc, -"ascii(object) -> string\n\ -\n\ -As repr(), return a string containing a printable representation of an\n\ -object, but escape the non-ASCII characters in the string returned by\n\ -repr() using \\x, \\u or \\U escapes. This generates a string similar\n\ -to that returned by repr() in Python 2."); - - -static PyObject * -builtin_bin(PyObject *self, PyObject *v) -{ - return PyNumber_ToBase(v, 2); -} - -PyDoc_STRVAR(bin_doc, -"bin(number) -> string\n\ -\n\ -Return the binary representation of an integer."); - - -static PyObject * -builtin_callable(PyObject *self, PyObject *v) -{ - return PyBool_FromLong((long)PyCallable_Check(v)); -} - -PyDoc_STRVAR(callable_doc, -"callable(object) -> bool\n\ -\n\ -Return whether the object is callable (i.e., some kind of function).\n\ -Note that classes are callable, as are instances of classes with a\n\ -__call__() method."); - - -typedef struct { - PyObject_HEAD - PyObject *func; - PyObject *it; -} filterobject; - -static PyObject * -filter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyObject *func, *seq; - PyObject *it; - filterobject *lz; - - if (type == &PyFilter_Type && !_PyArg_NoKeywords("filter()", kwds)) - return NULL; - - if (!PyArg_UnpackTuple(args, "filter", 2, 2, &func, &seq)) - return NULL; - - /* Get iterator. */ - it = PyObject_GetIter(seq); - if (it == NULL) - return NULL; - - /* create filterobject structure */ - lz = (filterobject *)type->tp_alloc(type, 0); - if (lz == NULL) { - Py_DECREF(it); - return NULL; - } - Py_INCREF(func); - lz->func = func; - lz->it = it; - - return (PyObject *)lz; -} - -static void -filter_dealloc(filterobject *lz) -{ - PyObject_GC_UnTrack(lz); - Py_XDECREF(lz->func); - Py_XDECREF(lz->it); - Py_TYPE(lz)->tp_free(lz); -} - -static int -filter_traverse(filterobject *lz, visitproc visit, void *arg) -{ - Py_VISIT(lz->it); - Py_VISIT(lz->func); - return 0; -} - -static PyObject * -filter_next(filterobject *lz) -{ - PyObject *item; - PyObject *it = lz->it; - long ok; - PyObject *(*iternext)(PyObject *); - - iternext = *Py_TYPE(it)->tp_iternext; - for (;;) { - item = iternext(it); - if (item == NULL) - return NULL; - - if (lz->func == Py_None || lz->func == (PyObject *)&PyBool_Type) { - ok = PyObject_IsTrue(item); - } else { - PyObject *good; - good = PyObject_CallFunctionObjArgs(lz->func, - item, NULL); - if (good == NULL) { - Py_DECREF(item); - return NULL; - } - ok = PyObject_IsTrue(good); - Py_DECREF(good); - } - if (ok) - return item; - Py_DECREF(item); - } -} - -PyDoc_STRVAR(filter_doc, -"filter(function or None, iterable) --> filter object\n\ -\n\ -Return an iterator yielding those items of iterable for which function(item)\n\ -is true. If function is None, return the items that are true."); - -PyTypeObject PyFilter_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "filter", /* tp_name */ - sizeof(filterobject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)filter_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | - Py_TPFLAGS_BASETYPE, /* tp_flags */ - filter_doc, /* tp_doc */ - (traverseproc)filter_traverse, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - PyObject_SelfIter, /* tp_iter */ - (iternextfunc)filter_next, /* tp_iternext */ - 0, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - PyType_GenericAlloc, /* tp_alloc */ - filter_new, /* tp_new */ - PyObject_GC_Del, /* tp_free */ -}; - - -static PyObject * -builtin_format(PyObject *self, PyObject *args) -{ - PyObject *value; - PyObject *format_spec = NULL; - - if (!PyArg_ParseTuple(args, "O|U:format", &value, &format_spec)) - return NULL; - - return PyObject_Format(value, format_spec); -} - -PyDoc_STRVAR(format_doc, -"format(value[, format_spec]) -> string\n\ -\n\ -Returns value.__format__(format_spec)\n\ -format_spec defaults to \"\""); - -static PyObject * -builtin_chr(PyObject *self, PyObject *args) -{ - int x; - - if (!PyArg_ParseTuple(args, "i:chr", &x)) - return NULL; - - return PyUnicode_FromOrdinal(x); -} - -PyDoc_STRVAR(chr_doc, -"chr(i) -> Unicode character\n\ -\n\ -Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff."); - - -static char * -source_as_string(PyObject *cmd, char *funcname, char *what, PyCompilerFlags *cf) -{ - char *str; - Py_ssize_t size; - - if (PyUnicode_Check(cmd)) { - cf->cf_flags |= PyCF_IGNORE_COOKIE; - str = PyUnicode_AsUTF8AndSize(cmd, &size); - if (str == NULL) - return NULL; - } - else if (!PyObject_CheckReadBuffer(cmd)) { - PyErr_Format(PyExc_TypeError, - "%s() arg 1 must be a %s object", - funcname, what); - return NULL; - } - else if (PyObject_AsReadBuffer(cmd, (const void **)&str, &size) < 0) { - return NULL; - } - - if (strlen(str) != size) { - PyErr_SetString(PyExc_TypeError, - "source code string cannot contain null bytes"); - return NULL; - } - return str; -} - -static PyObject * -builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) -{ - char *str; - PyObject *filename_obj; - char *filename; - char *startstr; - int mode = -1; - int dont_inherit = 0; - int supplied_flags = 0; - int optimize = -1; - int is_ast; - PyCompilerFlags cf; - PyObject *cmd; - static char *kwlist[] = {"source", "filename", "mode", "flags", - "dont_inherit", "optimize", NULL}; - int start[] = {Py_file_input, Py_eval_input, Py_single_input}; - PyObject *result; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&s|iii:compile", kwlist, - &cmd, - PyUnicode_FSConverter, &filename_obj, - &startstr, &supplied_flags, - &dont_inherit, &optimize)) - return NULL; - - filename = PyBytes_AS_STRING(filename_obj); - cf.cf_flags = supplied_flags | PyCF_SOURCE_IS_UTF8; - - if (supplied_flags & - ~(PyCF_MASK | PyCF_MASK_OBSOLETE | PyCF_DONT_IMPLY_DEDENT | PyCF_ONLY_AST)) - { - PyErr_SetString(PyExc_ValueError, - "compile(): unrecognised flags"); - goto error; - } - /* XXX Warn if (supplied_flags & PyCF_MASK_OBSOLETE) != 0? */ - - if (optimize < -1 || optimize > 2) { - PyErr_SetString(PyExc_ValueError, - "compile(): invalid optimize value"); - goto error; - } - - if (!dont_inherit) { - PyEval_MergeCompilerFlags(&cf); - } - - if (strcmp(startstr, "exec") == 0) - mode = 0; - else if (strcmp(startstr, "eval") == 0) - mode = 1; - else if (strcmp(startstr, "single") == 0) - mode = 2; - else { - PyErr_SetString(PyExc_ValueError, - "compile() arg 3 must be 'exec', 'eval' or 'single'"); - goto error; - } - - is_ast = PyAST_Check(cmd); - if (is_ast == -1) - goto error; - if (is_ast) { - if (supplied_flags & PyCF_ONLY_AST) { - Py_INCREF(cmd); - result = cmd; - } - else { - PyArena *arena; - mod_ty mod; - - arena = PyArena_New(); - mod = PyAST_obj2mod(cmd, arena, mode); - if (mod == NULL) { - PyArena_Free(arena); - goto error; - } - if (!PyAST_Validate(mod)) { - PyArena_Free(arena); - goto error; - } - result = (PyObject*)PyAST_CompileEx(mod, filename, - &cf, optimize, arena); - PyArena_Free(arena); - } - goto finally; - } - - str = source_as_string(cmd, "compile", "string, bytes, AST or code", &cf); - if (str == NULL) - goto error; - - result = Py_CompileStringExFlags(str, filename, start[mode], &cf, optimize); - goto finally; - -error: - result = NULL; -finally: - Py_DECREF(filename_obj); - return result; -} - -PyDoc_STRVAR(compile_doc, -"compile(source, filename, mode[, flags[, dont_inherit]]) -> code object\n\ -\n\ -Compile the source string (a Python module, statement or expression)\n\ -into a code object that can be executed by exec() or eval().\n\ -The filename will be used for run-time error messages.\n\ -The mode must be 'exec' to compile a module, 'single' to compile a\n\ -single (interactive) statement, or 'eval' to compile an expression.\n\ -The flags argument, if present, controls which future statements influence\n\ -the compilation of the code.\n\ -The dont_inherit argument, if non-zero, stops the compilation inheriting\n\ -the effects of any future statements in effect in the code calling\n\ -compile; if absent or zero these statements do influence the compilation,\n\ -in addition to any features explicitly specified."); - -static PyObject * -builtin_dir(PyObject *self, PyObject *args) -{ - PyObject *arg = NULL; - - if (!PyArg_UnpackTuple(args, "dir", 0, 1, &arg)) - return NULL; - return PyObject_Dir(arg); -} - -PyDoc_STRVAR(dir_doc, -"dir([object]) -> list of strings\n" -"\n" -"If called without an argument, return the names in the current scope.\n" -"Else, return an alphabetized list of names comprising (some of) the attributes\n" -"of the given object, and of attributes reachable from it.\n" -"If the object supplies a method named __dir__, it will be used; otherwise\n" -"the default dir() logic is used and returns:\n" -" for a module object: the module's attributes.\n" -" for a class object: its attributes, and recursively the attributes\n" -" of its bases.\n" -" for any other object: its attributes, its class's attributes, and\n" -" recursively the attributes of its class's base classes."); - -static PyObject * -builtin_divmod(PyObject *self, PyObject *args) -{ - PyObject *v, *w; - - if (!PyArg_UnpackTuple(args, "divmod", 2, 2, &v, &w)) - return NULL; - return PyNumber_Divmod(v, w); -} - -PyDoc_STRVAR(divmod_doc, -"divmod(x, y) -> (div, mod)\n\ -\n\ -Return the tuple ((x-x%y)/y, x%y). Invariant: div*y + mod == x."); - - -static PyObject * -builtin_eval(PyObject *self, PyObject *args) -{ - PyObject *cmd, *result, *tmp = NULL; - PyObject *globals = Py_None, *locals = Py_None; - char *str; - PyCompilerFlags cf; - - if (!PyArg_UnpackTuple(args, "eval", 1, 3, &cmd, &globals, &locals)) - return NULL; - if (locals != Py_None && !PyMapping_Check(locals)) { - PyErr_SetString(PyExc_TypeError, "locals must be a mapping"); - return NULL; - } - if (globals != Py_None && !PyDict_Check(globals)) { - PyErr_SetString(PyExc_TypeError, PyMapping_Check(globals) ? - "globals must be a real dict; try eval(expr, {}, mapping)" - : "globals must be a dict"); - return NULL; - } - if (globals == Py_None) { - globals = PyEval_GetGlobals(); - if (locals == Py_None) - locals = PyEval_GetLocals(); - } - else if (locals == Py_None) - locals = globals; - - if (globals == NULL || locals == NULL) { - PyErr_SetString(PyExc_TypeError, - "eval must be given globals and locals " - "when called without a frame"); - return NULL; - } - - if (PyDict_GetItemString(globals, "__builtins__") == NULL) { - if (PyDict_SetItemString(globals, "__builtins__", - PyEval_GetBuiltins()) != 0) - return NULL; - } - - if (PyCode_Check(cmd)) { - if (PyCode_GetNumFree((PyCodeObject *)cmd) > 0) { - PyErr_SetString(PyExc_TypeError, - "code object passed to eval() may not contain free variables"); - return NULL; - } - return PyEval_EvalCode(cmd, globals, locals); - } - - cf.cf_flags = PyCF_SOURCE_IS_UTF8; - str = source_as_string(cmd, "eval", "string, bytes or code", &cf); - if (str == NULL) - return NULL; - - while (*str == ' ' || *str == '\t') - str++; - - (void)PyEval_MergeCompilerFlags(&cf); - result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf); - Py_XDECREF(tmp); - return result; -} - -PyDoc_STRVAR(eval_doc, -"eval(source[, globals[, locals]]) -> value\n\ -\n\ -Evaluate the source in the context of globals and locals.\n\ -The source may be a string representing a Python expression\n\ -or a code object as returned by compile().\n\ -The globals must be a dictionary and locals can be any mapping,\n\ -defaulting to the current globals and locals.\n\ -If only globals is given, locals defaults to it.\n"); - -static PyObject * -builtin_exec(PyObject *self, PyObject *args) -{ - PyObject *v; - PyObject *prog, *globals = Py_None, *locals = Py_None; - - if (!PyArg_UnpackTuple(args, "exec", 1, 3, &prog, &globals, &locals)) - return NULL; - - if (globals == Py_None) { - globals = PyEval_GetGlobals(); - if (locals == Py_None) { - locals = PyEval_GetLocals(); - } - if (!globals || !locals) { - PyErr_SetString(PyExc_SystemError, - "globals and locals cannot be NULL"); - return NULL; - } - } - else if (locals == Py_None) - locals = globals; - - if (!PyDict_Check(globals)) { - PyErr_Format(PyExc_TypeError, "exec() arg 2 must be a dict, not %.100s", - globals->ob_type->tp_name); - return NULL; - } - if (!PyMapping_Check(locals)) { - PyErr_Format(PyExc_TypeError, - "arg 3 must be a mapping or None, not %.100s", - locals->ob_type->tp_name); - return NULL; - } - if (PyDict_GetItemString(globals, "__builtins__") == NULL) { - if (PyDict_SetItemString(globals, "__builtins__", - PyEval_GetBuiltins()) != 0) - return NULL; - } - - if (PyCode_Check(prog)) { - if (PyCode_GetNumFree((PyCodeObject *)prog) > 0) { - PyErr_SetString(PyExc_TypeError, - "code object passed to exec() may not " - "contain free variables"); - return NULL; - } - v = PyEval_EvalCode(prog, globals, locals); - } - else { - char *str; - PyCompilerFlags cf; - cf.cf_flags = PyCF_SOURCE_IS_UTF8; - str = source_as_string(prog, "exec", - "string, bytes or code", &cf); - if (str == NULL) - return NULL; - if (PyEval_MergeCompilerFlags(&cf)) - v = PyRun_StringFlags(str, Py_file_input, globals, - locals, &cf); - else - v = PyRun_String(str, Py_file_input, globals, locals); - } - if (v == NULL) - return NULL; - Py_DECREF(v); - Py_RETURN_NONE; -} - -PyDoc_STRVAR(exec_doc, -"exec(object[, globals[, locals]])\n\ -\n\ -Read and execute code from an object, which can be a string or a code\n\ -object.\n\ -The globals and locals are dictionaries, defaulting to the current\n\ -globals and locals. If only globals is given, locals defaults to it."); - - -static PyObject * -builtin_getattr(PyObject *self, PyObject *args) -{ - PyObject *v, *result, *dflt = NULL; - PyObject *name; - - if (!PyArg_UnpackTuple(args, "getattr", 2, 3, &v, &name, &dflt)) - return NULL; - - if (!PyUnicode_Check(name)) { - PyErr_SetString(PyExc_TypeError, - "getattr(): attribute name must be string"); - return NULL; - } - result = PyObject_GetAttr(v, name); - if (result == NULL && dflt != NULL && - PyErr_ExceptionMatches(PyExc_AttributeError)) - { - PyErr_Clear(); - Py_INCREF(dflt); - result = dflt; - } - return result; -} - -PyDoc_STRVAR(getattr_doc, -"getattr(object, name[, default]) -> value\n\ -\n\ -Get a named attribute from an object; getattr(x, 'y') is equivalent to x.y.\n\ -When a default argument is given, it is returned when the attribute doesn't\n\ -exist; without it, an exception is raised in that case."); - - -static PyObject * -builtin_globals(PyObject *self) -{ - PyObject *d; - - d = PyEval_GetGlobals(); - Py_XINCREF(d); - return d; -} - -PyDoc_STRVAR(globals_doc, -"globals() -> dictionary\n\ -\n\ -Return the dictionary containing the current scope's global variables."); - - -static PyObject * -builtin_hasattr(PyObject *self, PyObject *args) -{ - PyObject *v; - PyObject *name; - - if (!PyArg_UnpackTuple(args, "hasattr", 2, 2, &v, &name)) - return NULL; - if (!PyUnicode_Check(name)) { - PyErr_SetString(PyExc_TypeError, - "hasattr(): attribute name must be string"); - return NULL; - } - v = PyObject_GetAttr(v, name); - if (v == NULL) { - if (PyErr_ExceptionMatches(PyExc_AttributeError)) { - PyErr_Clear(); - Py_RETURN_FALSE; - } - return NULL; - } - Py_DECREF(v); - Py_RETURN_TRUE; -} - -PyDoc_STRVAR(hasattr_doc, -"hasattr(object, name) -> bool\n\ -\n\ -Return whether the object has an attribute with the given name.\n\ -(This is done by calling getattr(object, name) and catching AttributeError.)"); - - -static PyObject * -builtin_id(PyObject *self, PyObject *v) -{ - return PyLong_FromVoidPtr(v); -} - -PyDoc_STRVAR(id_doc, -"id(object) -> integer\n\ -\n\ -Return the identity of an object. This is guaranteed to be unique among\n\ -simultaneously existing objects. (Hint: it's the object's memory address.)"); - - -/* map object ************************************************************/ - -typedef struct { - PyObject_HEAD - PyObject *iters; - PyObject *func; -} mapobject; - -static PyObject * -map_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyObject *it, *iters, *func; - mapobject *lz; - Py_ssize_t numargs, i; - - if (type == &PyMap_Type && !_PyArg_NoKeywords("map()", kwds)) - return NULL; - - numargs = PyTuple_Size(args); - if (numargs < 2) { - PyErr_SetString(PyExc_TypeError, - "map() must have at least two arguments."); - return NULL; - } - - iters = PyTuple_New(numargs-1); - if (iters == NULL) - return NULL; - - for (i=1 ; i<numargs ; i++) { - /* Get iterator. */ - it = PyObject_GetIter(PyTuple_GET_ITEM(args, i)); - if (it == NULL) { - Py_DECREF(iters); - return NULL; - } - PyTuple_SET_ITEM(iters, i-1, it); - } - - /* create mapobject structure */ - lz = (mapobject *)type->tp_alloc(type, 0); - if (lz == NULL) { - Py_DECREF(iters); - return NULL; - } - lz->iters = iters; - func = PyTuple_GET_ITEM(args, 0); - Py_INCREF(func); - lz->func = func; - - return (PyObject *)lz; -} - -static void -map_dealloc(mapobject *lz) -{ - PyObject_GC_UnTrack(lz); - Py_XDECREF(lz->iters); - Py_XDECREF(lz->func); - Py_TYPE(lz)->tp_free(lz); -} - -static int -map_traverse(mapobject *lz, visitproc visit, void *arg) -{ - Py_VISIT(lz->iters); - Py_VISIT(lz->func); - return 0; -} - -static PyObject * -map_next(mapobject *lz) -{ - PyObject *val; - PyObject *argtuple; - PyObject *result; - Py_ssize_t numargs, i; - - numargs = PyTuple_Size(lz->iters); - argtuple = PyTuple_New(numargs); - if (argtuple == NULL) - return NULL; - - for (i=0 ; i<numargs ; i++) { - val = PyIter_Next(PyTuple_GET_ITEM(lz->iters, i)); - if (val == NULL) { - Py_DECREF(argtuple); - return NULL; - } - PyTuple_SET_ITEM(argtuple, i, val); - } - result = PyObject_Call(lz->func, argtuple, NULL); - Py_DECREF(argtuple); - return result; -} - -PyDoc_STRVAR(map_doc, -"map(func, *iterables) --> map object\n\ -\n\ -Make an iterator that computes the function using arguments from\n\ -each of the iterables. Stops when the shortest iterable is exhausted."); - -PyTypeObject PyMap_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "map", /* tp_name */ - sizeof(mapobject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)map_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | - Py_TPFLAGS_BASETYPE, /* tp_flags */ - map_doc, /* tp_doc */ - (traverseproc)map_traverse, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - PyObject_SelfIter, /* tp_iter */ - (iternextfunc)map_next, /* tp_iternext */ - 0, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - PyType_GenericAlloc, /* tp_alloc */ - map_new, /* tp_new */ - PyObject_GC_Del, /* tp_free */ -}; - -static PyObject * -builtin_next(PyObject *self, PyObject *args) -{ - PyObject *it, *res; - PyObject *def = NULL; - - if (!PyArg_UnpackTuple(args, "next", 1, 2, &it, &def)) - return NULL; - if (!PyIter_Check(it)) { - PyErr_Format(PyExc_TypeError, - "'%.200s' object is not an iterator", - it->ob_type->tp_name); - return NULL; - } - - res = (*it->ob_type->tp_iternext)(it); - if (res != NULL) { - return res; - } else if (def != NULL) { - if (PyErr_Occurred()) { - if(!PyErr_ExceptionMatches(PyExc_StopIteration)) - return NULL; - PyErr_Clear(); - } - Py_INCREF(def); - return def; - } else if (PyErr_Occurred()) { - return NULL; - } else { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } -} - -PyDoc_STRVAR(next_doc, -"next(iterator[, default])\n\ -\n\ -Return the next item from the iterator. If default is given and the iterator\n\ -is exhausted, it is returned instead of raising StopIteration."); - - -static PyObject * -builtin_setattr(PyObject *self, PyObject *args) -{ - PyObject *v; - PyObject *name; - PyObject *value; - - if (!PyArg_UnpackTuple(args, "setattr", 3, 3, &v, &name, &value)) - return NULL; - if (PyObject_SetAttr(v, name, value) != 0) - return NULL; - Py_INCREF(Py_None); - return Py_None; -} - -PyDoc_STRVAR(setattr_doc, -"setattr(object, name, value)\n\ -\n\ -Set a named attribute on an object; setattr(x, 'y', v) is equivalent to\n\ -``x.y = v''."); - - -static PyObject * -builtin_delattr(PyObject *self, PyObject *args) -{ - PyObject *v; - PyObject *name; - - if (!PyArg_UnpackTuple(args, "delattr", 2, 2, &v, &name)) - return NULL; - if (PyObject_SetAttr(v, name, (PyObject *)NULL) != 0) - return NULL; - Py_INCREF(Py_None); - return Py_None; -} - -PyDoc_STRVAR(delattr_doc, -"delattr(object, name)\n\ -\n\ -Delete a named attribute on an object; delattr(x, 'y') is equivalent to\n\ -``del x.y''."); - - -static PyObject * -builtin_hash(PyObject *self, PyObject *v) -{ - Py_hash_t x; - - x = PyObject_Hash(v); - if (x == -1) - return NULL; - return PyLong_FromSsize_t(x); -} - -PyDoc_STRVAR(hash_doc, -"hash(object) -> integer\n\ -\n\ -Return a hash value for the object. Two objects with the same value have\n\ -the same hash value. The reverse is not necessarily true, but likely."); - - -static PyObject * -builtin_hex(PyObject *self, PyObject *v) -{ - return PyNumber_ToBase(v, 16); -} - -PyDoc_STRVAR(hex_doc, -"hex(number) -> string\n\ -\n\ -Return the hexadecimal representation of an integer."); - - -static PyObject * -builtin_iter(PyObject *self, PyObject *args) -{ - PyObject *v, *w = NULL; - - if (!PyArg_UnpackTuple(args, "iter", 1, 2, &v, &w)) - return NULL; - if (w == NULL) - return PyObject_GetIter(v); - if (!PyCallable_Check(v)) { - PyErr_SetString(PyExc_TypeError, - "iter(v, w): v must be callable"); - return NULL; - } - return PyCallIter_New(v, w); -} - -PyDoc_STRVAR(iter_doc, -"iter(iterable) -> iterator\n\ -iter(callable, sentinel) -> iterator\n\ -\n\ -Get an iterator from an object. In the first form, the argument must\n\ -supply its own iterator, or be a sequence.\n\ -In the second form, the callable is called until it returns the sentinel."); - - -static PyObject * -builtin_len(PyObject *self, PyObject *v) -{ - Py_ssize_t res; - - res = PyObject_Size(v); - if (res < 0 && PyErr_Occurred()) - return NULL; - return PyLong_FromSsize_t(res); -} - -PyDoc_STRVAR(len_doc, -"len(object) -> integer\n\ -\n\ -Return the number of items of a sequence or mapping."); - - -static PyObject * -builtin_locals(PyObject *self) -{ - PyObject *d; - - d = PyEval_GetLocals(); - Py_XINCREF(d); - return d; -} - -PyDoc_STRVAR(locals_doc, -"locals() -> dictionary\n\ -\n\ -Update and return a dictionary containing the current scope's local variables."); - - -static PyObject * -min_max(PyObject *args, PyObject *kwds, int op) -{ - PyObject *v, *it, *item, *val, *maxitem, *maxval, *keyfunc=NULL; - const char *name = op == Py_LT ? "min" : "max"; - - if (PyTuple_Size(args) > 1) - v = args; - else if (!PyArg_UnpackTuple(args, (char *)name, 1, 1, &v)) - return NULL; - - if (kwds != NULL && PyDict_Check(kwds) && PyDict_Size(kwds)) { - keyfunc = PyDict_GetItemString(kwds, "key"); - if (PyDict_Size(kwds)!=1 || keyfunc == NULL) { - PyErr_Format(PyExc_TypeError, - "%s() got an unexpected keyword argument", name); - return NULL; - } - Py_INCREF(keyfunc); - } - - it = PyObject_GetIter(v); - if (it == NULL) { - Py_XDECREF(keyfunc); - return NULL; - } - - maxitem = NULL; /* the result */ - maxval = NULL; /* the value associated with the result */ - while (( item = PyIter_Next(it) )) { - /* get the value from the key function */ - if (keyfunc != NULL) { - val = PyObject_CallFunctionObjArgs(keyfunc, item, NULL); - if (val == NULL) - goto Fail_it_item; - } - /* no key function; the value is the item */ - else { - val = item; - Py_INCREF(val); - } - - /* maximum value and item are unset; set them */ - if (maxval == NULL) { - maxitem = item; - maxval = val; - } - /* maximum value and item are set; update them as necessary */ - else { - int cmp = PyObject_RichCompareBool(val, maxval, op); - if (cmp < 0) - goto Fail_it_item_and_val; - else if (cmp > 0) { - Py_DECREF(maxval); - Py_DECREF(maxitem); - maxval = val; - maxitem = item; - } - else { - Py_DECREF(item); - Py_DECREF(val); - } - } - } - if (PyErr_Occurred()) - goto Fail_it; - if (maxval == NULL) { - PyErr_Format(PyExc_ValueError, - "%s() arg is an empty sequence", name); - assert(maxitem == NULL); - } - else - Py_DECREF(maxval); - Py_DECREF(it); - Py_XDECREF(keyfunc); - return maxitem; - -Fail_it_item_and_val: - Py_DECREF(val); -Fail_it_item: - Py_DECREF(item); -Fail_it: - Py_XDECREF(maxval); - Py_XDECREF(maxitem); - Py_DECREF(it); - Py_XDECREF(keyfunc); - return NULL; -} - -static PyObject * -builtin_min(PyObject *self, PyObject *args, PyObject *kwds) -{ - return min_max(args, kwds, Py_LT); -} - -PyDoc_STRVAR(min_doc, -"min(iterable[, key=func]) -> value\n\ -min(a, b, c, ...[, key=func]) -> value\n\ -\n\ -With a single iterable argument, return its smallest item.\n\ -With two or more arguments, return the smallest argument."); - - -static PyObject * -builtin_max(PyObject *self, PyObject *args, PyObject *kwds) -{ - return min_max(args, kwds, Py_GT); -} - -PyDoc_STRVAR(max_doc, -"max(iterable[, key=func]) -> value\n\ -max(a, b, c, ...[, key=func]) -> value\n\ -\n\ -With a single iterable argument, return its largest item.\n\ -With two or more arguments, return the largest argument."); - - -static PyObject * -builtin_oct(PyObject *self, PyObject *v) -{ - return PyNumber_ToBase(v, 8); -} - -PyDoc_STRVAR(oct_doc, -"oct(number) -> string\n\ -\n\ -Return the octal representation of an integer."); - - -static PyObject * -builtin_ord(PyObject *self, PyObject* obj) -{ - long ord; - Py_ssize_t size; - - if (PyBytes_Check(obj)) { - size = PyBytes_GET_SIZE(obj); - if (size == 1) { - ord = (long)((unsigned char)*PyBytes_AS_STRING(obj)); - return PyLong_FromLong(ord); - } - } - else if (PyUnicode_Check(obj)) { - if (PyUnicode_READY(obj) == -1) - return NULL; - size = PyUnicode_GET_LENGTH(obj); - if (size == 1) { - ord = (long)PyUnicode_READ_CHAR(obj, 0); - return PyLong_FromLong(ord); - } - } - else if (PyByteArray_Check(obj)) { - /* XXX Hopefully this is temporary */ - size = PyByteArray_GET_SIZE(obj); - if (size == 1) { - ord = (long)((unsigned char)*PyByteArray_AS_STRING(obj)); - return PyLong_FromLong(ord); - } - } - else { - PyErr_Format(PyExc_TypeError, - "ord() expected string of length 1, but " \ - "%.200s found", obj->ob_type->tp_name); - return NULL; - } - - PyErr_Format(PyExc_TypeError, - "ord() expected a character, " - "but string of length %zd found", - size); - return NULL; -} - -PyDoc_VAR(ord_doc) = PyDoc_STR( -"ord(c) -> integer\n\ -\n\ -Return the integer ordinal of a one-character string." -) -#ifndef Py_UNICODE_WIDE -PyDoc_STR( -"\nA valid surrogate pair is also accepted." -) -#endif -; - - -static PyObject * -builtin_pow(PyObject *self, PyObject *args) -{ - PyObject *v, *w, *z = Py_None; - - if (!PyArg_UnpackTuple(args, "pow", 2, 3, &v, &w, &z)) - return NULL; - return PyNumber_Power(v, w, z); -} - -PyDoc_STRVAR(pow_doc, -"pow(x, y[, z]) -> number\n\ -\n\ -With two arguments, equivalent to x**y. With three arguments,\n\ -equivalent to (x**y) % z, but may be more efficient (e.g. for longs)."); - - - -static PyObject * -builtin_print(PyObject *self, PyObject *args, PyObject *kwds) -{ - static char *kwlist[] = {"sep", "end", "file", 0}; - static PyObject *dummy_args; - PyObject *sep = NULL, *end = NULL, *file = NULL; - int i, err; - - if (dummy_args == NULL) { - if (!(dummy_args = PyTuple_New(0))) - return NULL; - } - if (!PyArg_ParseTupleAndKeywords(dummy_args, kwds, "|OOO:print", - kwlist, &sep, &end, &file)) - return NULL; - if (file == NULL || file == Py_None) { - file = PySys_GetObject("stdout"); - /* sys.stdout may be None when FILE* stdout isn't connected */ - if (file == Py_None) - Py_RETURN_NONE; - } - - if (sep == Py_None) { - sep = NULL; - } - else if (sep && !PyUnicode_Check(sep)) { - PyErr_Format(PyExc_TypeError, - "sep must be None or a string, not %.200s", - sep->ob_type->tp_name); - return NULL; - } - if (end == Py_None) { - end = NULL; - } - else if (end && !PyUnicode_Check(end)) { - PyErr_Format(PyExc_TypeError, - "end must be None or a string, not %.200s", - end->ob_type->tp_name); - return NULL; - } - - for (i = 0; i < PyTuple_Size(args); i++) { - if (i > 0) { - if (sep == NULL) - err = PyFile_WriteString(" ", file); - else - err = PyFile_WriteObject(sep, file, - Py_PRINT_RAW); - if (err) - return NULL; - } - err = PyFile_WriteObject(PyTuple_GetItem(args, i), file, - Py_PRINT_RAW); - if (err) - return NULL; - } - - if (end == NULL) - err = PyFile_WriteString("\n", file); - else - err = PyFile_WriteObject(end, file, Py_PRINT_RAW); - if (err) - return NULL; - - Py_RETURN_NONE; -} - -PyDoc_STRVAR(print_doc, -"print(value, ..., sep=' ', end='\\n', file=sys.stdout)\n\ -\n\ -Prints the values to a stream, or to sys.stdout by default.\n\ -Optional keyword arguments:\n\ -file: a file-like object (stream); defaults to the current sys.stdout.\n\ -sep: string inserted between values, default a space.\n\ -end: string appended after the last value, default a newline."); - - -static PyObject * -builtin_input(PyObject *self, PyObject *args) -{ - PyObject *promptarg = NULL; - PyObject *fin = PySys_GetObject("stdin"); - PyObject *fout = PySys_GetObject("stdout"); - PyObject *ferr = PySys_GetObject("stderr"); - PyObject *tmp; - long fd; - int tty; - - /* Parse arguments */ - if (!PyArg_UnpackTuple(args, "input", 0, 1, &promptarg)) - return NULL; - - /* Check that stdin/out/err are intact */ - if (fin == NULL || fin == Py_None) { - PyErr_SetString(PyExc_RuntimeError, - "input(): lost sys.stdin"); - return NULL; - } - if (fout == NULL || fout == Py_None) { - PyErr_SetString(PyExc_RuntimeError, - "input(): lost sys.stdout"); - return NULL; - } - if (ferr == NULL || ferr == Py_None) { - PyErr_SetString(PyExc_RuntimeError, - "input(): lost sys.stderr"); - return NULL; - } - - /* First of all, flush stderr */ - tmp = _PyObject_CallMethodId(ferr, &PyId_flush, ""); - if (tmp == NULL) - PyErr_Clear(); - else - Py_DECREF(tmp); - - /* We should only use (GNU) readline if Python's sys.stdin and - sys.stdout are the same as C's stdin and stdout, because we - need to pass it those. */ - tmp = _PyObject_CallMethodId(fin, &PyId_fileno, ""); - if (tmp == NULL) { - PyErr_Clear(); - tty = 0; - } - else { - fd = PyLong_AsLong(tmp); - Py_DECREF(tmp); - if (fd < 0 && PyErr_Occurred()) - return NULL; - tty = fd == fileno(stdin) && isatty(fd); - } - if (tty) { - tmp = _PyObject_CallMethodId(fout, &PyId_fileno, ""); - if (tmp == NULL) - PyErr_Clear(); - else { - fd = PyLong_AsLong(tmp); - Py_DECREF(tmp); - if (fd < 0 && PyErr_Occurred()) - return NULL; - tty = fd == fileno(stdout) && isatty(fd); - } - } - - /* If we're interactive, use (GNU) readline */ - if (tty) { - PyObject *po = NULL; - char *prompt; - char *s = NULL; - PyObject *stdin_encoding = NULL, *stdin_errors = NULL; - PyObject *stdout_encoding = NULL, *stdout_errors = NULL; - char *stdin_encoding_str, *stdin_errors_str; - PyObject *result; - size_t len; - _Py_IDENTIFIER(encoding); - _Py_IDENTIFIER(errors); - - stdin_encoding = _PyObject_GetAttrId(fin, &PyId_encoding); - stdin_errors = _PyObject_GetAttrId(fin, &PyId_errors); - if (!stdin_encoding || !stdin_errors) - /* stdin is a text stream, so it must have an - encoding. */ - goto _readline_errors; - stdin_encoding_str = _PyUnicode_AsString(stdin_encoding); - stdin_errors_str = _PyUnicode_AsString(stdin_errors); - if (!stdin_encoding_str || !stdin_errors_str) - goto _readline_errors; - tmp = _PyObject_CallMethodId(fout, &PyId_flush, ""); - if (tmp == NULL) - PyErr_Clear(); - else - Py_DECREF(tmp); - if (promptarg != NULL) { - /* We have a prompt, encode it as stdout would */ - char *stdout_encoding_str, *stdout_errors_str; - PyObject *stringpo; - stdout_encoding = _PyObject_GetAttrId(fout, &PyId_encoding); - stdout_errors = _PyObject_GetAttrId(fout, &PyId_errors); - if (!stdout_encoding || !stdout_errors) - goto _readline_errors; - stdout_encoding_str = _PyUnicode_AsString(stdout_encoding); - stdout_errors_str = _PyUnicode_AsString(stdout_errors); - if (!stdout_encoding_str || !stdout_errors_str) - goto _readline_errors; - stringpo = PyObject_Str(promptarg); - if (stringpo == NULL) - goto _readline_errors; - po = PyUnicode_AsEncodedString(stringpo, - stdout_encoding_str, stdout_errors_str); - Py_CLEAR(stdout_encoding); - Py_CLEAR(stdout_errors); - Py_CLEAR(stringpo); - if (po == NULL) - goto _readline_errors; - prompt = PyBytes_AsString(po); - if (prompt == NULL) - goto _readline_errors; - } - else { - po = NULL; - prompt = ""; - } - s = PyOS_Readline(stdin, stdout, prompt); - if (s == NULL) { - if (!PyErr_Occurred()) - PyErr_SetNone(PyExc_KeyboardInterrupt); - goto _readline_errors; - } - - len = strlen(s); - if (len == 0) { - PyErr_SetNone(PyExc_EOFError); - result = NULL; - } - else { - if (len > PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, - "input: input too long"); - result = NULL; - } - else { - len--; /* strip trailing '\n' */ - if (len != 0 && s[len-1] == '\r') - len--; /* strip trailing '\r' */ - result = PyUnicode_Decode(s, len, stdin_encoding_str, - stdin_errors_str); - } - } - Py_DECREF(stdin_encoding); - Py_DECREF(stdin_errors); - Py_XDECREF(po); - PyMem_FREE(s); - return result; - _readline_errors: - Py_XDECREF(stdin_encoding); - Py_XDECREF(stdout_encoding); - Py_XDECREF(stdin_errors); - Py_XDECREF(stdout_errors); - Py_XDECREF(po); - return NULL; - } - - /* Fallback if we're not interactive */ - if (promptarg != NULL) { - if (PyFile_WriteObject(promptarg, fout, Py_PRINT_RAW) != 0) - return NULL; - } - tmp = _PyObject_CallMethodId(fout, &PyId_flush, ""); - if (tmp == NULL) - PyErr_Clear(); - else - Py_DECREF(tmp); - return PyFile_GetLine(fin, -1); -} - -PyDoc_STRVAR(input_doc, -"input([prompt]) -> string\n\ -\n\ -Read a string from standard input. The trailing newline is stripped.\n\ -If the user hits EOF (Unix: Ctl-D, Windows: Ctl-Z+Return), raise EOFError.\n\ -On Unix, GNU readline is used if enabled. The prompt string, if given,\n\ -is printed without a trailing newline before reading."); - - -static PyObject * -builtin_repr(PyObject *self, PyObject *v) -{ - return PyObject_Repr(v); -} - -PyDoc_STRVAR(repr_doc, -"repr(object) -> string\n\ -\n\ -Return the canonical string representation of the object.\n\ -For most object types, eval(repr(object)) == object."); - - -static PyObject * -builtin_round(PyObject *self, PyObject *args, PyObject *kwds) -{ - static PyObject *round_str = NULL; - PyObject *ndigits = NULL; - static char *kwlist[] = {"number", "ndigits", 0}; - PyObject *number, *round; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:round", - kwlist, &number, &ndigits)) - return NULL; - - if (Py_TYPE(number)->tp_dict == NULL) { - if (PyType_Ready(Py_TYPE(number)) < 0) - return NULL; - } - - if (round_str == NULL) { - round_str = PyUnicode_InternFromString("__round__"); - if (round_str == NULL) - return NULL; - } - - round = _PyType_Lookup(Py_TYPE(number), round_str); - if (round == NULL) { - PyErr_Format(PyExc_TypeError, - "type %.100s doesn't define __round__ method", - Py_TYPE(number)->tp_name); - return NULL; - } - - if (ndigits == NULL) - return PyObject_CallFunction(round, "O", number); - else - return PyObject_CallFunction(round, "OO", number, ndigits); -} - -PyDoc_STRVAR(round_doc, -"round(number[, ndigits]) -> number\n\ -\n\ -Round a number to a given precision in decimal digits (default 0 digits).\n\ -This returns an int when called with one argument, otherwise the\n\ -same type as the number. ndigits may be negative."); - - -static PyObject * -builtin_sorted(PyObject *self, PyObject *args, PyObject *kwds) -{ - PyObject *newlist, *v, *seq, *keyfunc=NULL, *newargs; - PyObject *callable; - static char *kwlist[] = {"iterable", "key", "reverse", 0}; - int reverse; - _Py_IDENTIFIER(sort); - - /* args 1-3 should match listsort in Objects/listobject.c */ - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:sorted", - kwlist, &seq, &keyfunc, &reverse)) - return NULL; - - newlist = PySequence_List(seq); - if (newlist == NULL) - return NULL; - - callable = _PyObject_GetAttrId(newlist, &PyId_sort); - if (callable == NULL) { - Py_DECREF(newlist); - return NULL; - } - - newargs = PyTuple_GetSlice(args, 1, 4); - if (newargs == NULL) { - Py_DECREF(newlist); - Py_DECREF(callable); - return NULL; - } - - v = PyObject_Call(callable, newargs, kwds); - Py_DECREF(newargs); - Py_DECREF(callable); - if (v == NULL) { - Py_DECREF(newlist); - return NULL; - } - Py_DECREF(v); - return newlist; -} - -PyDoc_STRVAR(sorted_doc, -"sorted(iterable, key=None, reverse=False) --> new sorted list"); - -static PyObject * -builtin_vars(PyObject *self, PyObject *args) -{ - PyObject *v = NULL; - PyObject *d; - - if (!PyArg_UnpackTuple(args, "vars", 0, 1, &v)) - return NULL; - if (v == NULL) { - d = PyEval_GetLocals(); - if (d == NULL) { - if (!PyErr_Occurred()) - PyErr_SetString(PyExc_SystemError, - "vars(): no locals!?"); - } - else - Py_INCREF(d); - } - else { - _Py_IDENTIFIER(__dict__); - d = _PyObject_GetAttrId(v, &PyId___dict__); - if (d == NULL) { - PyErr_SetString(PyExc_TypeError, - "vars() argument must have __dict__ attribute"); - return NULL; - } - } - return d; -} - -PyDoc_STRVAR(vars_doc, -"vars([object]) -> dictionary\n\ -\n\ -Without arguments, equivalent to locals().\n\ -With an argument, equivalent to object.__dict__."); - -static PyObject* -builtin_sum(PyObject *self, PyObject *args) -{ - PyObject *seq; - PyObject *result = NULL; - PyObject *temp, *item, *iter; - - if (!PyArg_UnpackTuple(args, "sum", 1, 2, &seq, &result)) - return NULL; - - iter = PyObject_GetIter(seq); - if (iter == NULL) - return NULL; - - if (result == NULL) { - result = PyLong_FromLong(0); - if (result == NULL) { - Py_DECREF(iter); - return NULL; - } - } else { - /* reject string values for 'start' parameter */ - if (PyUnicode_Check(result)) { - PyErr_SetString(PyExc_TypeError, - "sum() can't sum strings [use ''.join(seq) instead]"); - Py_DECREF(iter); - return NULL; - } - if (PyBytes_Check(result)) { - PyErr_SetString(PyExc_TypeError, - "sum() can't sum bytes [use b''.join(seq) instead]"); - Py_DECREF(iter); - return NULL; - } - if (PyByteArray_Check(result)) { - PyErr_SetString(PyExc_TypeError, - "sum() can't sum bytearray [use b''.join(seq) instead]"); - Py_DECREF(iter); - return NULL; - } - - Py_INCREF(result); - } - -#ifndef SLOW_SUM - /* Fast addition by keeping temporary sums in C instead of new Python objects. - Assumes all inputs are the same type. If the assumption fails, default - to the more general routine. - */ - if (PyLong_CheckExact(result)) { - int overflow; - long i_result = PyLong_AsLongAndOverflow(result, &overflow); - /* If this already overflowed, don't even enter the loop. */ - if (overflow == 0) { - Py_DECREF(result); - result = NULL; - } - while(result == NULL) { - item = PyIter_Next(iter); - if (item == NULL) { - Py_DECREF(iter); - if (PyErr_Occurred()) - return NULL; - return PyLong_FromLong(i_result); - } - if (PyLong_CheckExact(item)) { - long b = PyLong_AsLongAndOverflow(item, &overflow); - long x = i_result + b; - if (overflow == 0 && ((x^i_result) >= 0 || (x^b) >= 0)) { - i_result = x; - Py_DECREF(item); - continue; - } - } - /* Either overflowed or is not an int. Restore real objects and process normally */ - result = PyLong_FromLong(i_result); - temp = PyNumber_Add(result, item); - Py_DECREF(result); - Py_DECREF(item); - result = temp; - if (result == NULL) { - Py_DECREF(iter); - return NULL; - } - } - } - - if (PyFloat_CheckExact(result)) { - double f_result = PyFloat_AS_DOUBLE(result); - Py_DECREF(result); - result = NULL; - while(result == NULL) { - item = PyIter_Next(iter); - if (item == NULL) { - Py_DECREF(iter); - if (PyErr_Occurred()) - return NULL; - return PyFloat_FromDouble(f_result); - } - if (PyFloat_CheckExact(item)) { - PyFPE_START_PROTECT("add", Py_DECREF(item); Py_DECREF(iter); return 0) - f_result += PyFloat_AS_DOUBLE(item); - PyFPE_END_PROTECT(f_result) - Py_DECREF(item); - continue; - } - if (PyLong_CheckExact(item)) { - long value; - int overflow; - value = PyLong_AsLongAndOverflow(item, &overflow); - if (!overflow) { - PyFPE_START_PROTECT("add", Py_DECREF(item); Py_DECREF(iter); return 0) - f_result += (double)value; - PyFPE_END_PROTECT(f_result) - Py_DECREF(item); - continue; - } - } - result = PyFloat_FromDouble(f_result); - temp = PyNumber_Add(result, item); - Py_DECREF(result); - Py_DECREF(item); - result = temp; - if (result == NULL) { - Py_DECREF(iter); - return NULL; - } - } - } -#endif - - for(;;) { - item = PyIter_Next(iter); - if (item == NULL) { - /* error, or end-of-sequence */ - if (PyErr_Occurred()) { - Py_DECREF(result); - result = NULL; - } - break; - } - /* It's tempting to use PyNumber_InPlaceAdd instead of - PyNumber_Add here, to avoid quadratic running time - when doing 'sum(list_of_lists, [])'. However, this - would produce a change in behaviour: a snippet like - - empty = [] - sum([[x] for x in range(10)], empty) - - would change the value of empty. */ - temp = PyNumber_Add(result, item); - Py_DECREF(result); - Py_DECREF(item); - result = temp; - if (result == NULL) - break; - } - Py_DECREF(iter); - return result; -} - -PyDoc_STRVAR(sum_doc, -"sum(iterable[, start]) -> value\n\ -\n\ -Returns the sum of an iterable of numbers (NOT strings) plus the value\n\ -of parameter 'start' (which defaults to 0). When the iterable is\n\ -empty, returns start."); - - -static PyObject * -builtin_isinstance(PyObject *self, PyObject *args) -{ - PyObject *inst; - PyObject *cls; - int retval; - - if (!PyArg_UnpackTuple(args, "isinstance", 2, 2, &inst, &cls)) - return NULL; - - retval = PyObject_IsInstance(inst, cls); - if (retval < 0) - return NULL; - return PyBool_FromLong(retval); -} - -PyDoc_STRVAR(isinstance_doc, -"isinstance(object, class-or-type-or-tuple) -> bool\n\ -\n\ -Return whether an object is an instance of a class or of a subclass thereof.\n\ -With a type as second argument, return whether that is the object's type.\n\ -The form using a tuple, isinstance(x, (A, B, ...)), is a shortcut for\n\ -isinstance(x, A) or isinstance(x, B) or ... (etc.)."); - - -static PyObject * -builtin_issubclass(PyObject *self, PyObject *args) -{ - PyObject *derived; - PyObject *cls; - int retval; - - if (!PyArg_UnpackTuple(args, "issubclass", 2, 2, &derived, &cls)) - return NULL; - - retval = PyObject_IsSubclass(derived, cls); - if (retval < 0) - return NULL; - return PyBool_FromLong(retval); -} - -PyDoc_STRVAR(issubclass_doc, -"issubclass(C, B) -> bool\n\ -\n\ -Return whether class C is a subclass (i.e., a derived class) of class B.\n\ -When using a tuple as the second argument issubclass(X, (A, B, ...)),\n\ -is a shortcut for issubclass(X, A) or issubclass(X, B) or ... (etc.)."); - - -typedef struct { - PyObject_HEAD - Py_ssize_t tuplesize; - PyObject *ittuple; /* tuple of iterators */ - PyObject *result; -} zipobject; - -static PyObject * -zip_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - zipobject *lz; - Py_ssize_t i; - PyObject *ittuple; /* tuple of iterators */ - PyObject *result; - Py_ssize_t tuplesize = PySequence_Length(args); - - if (type == &PyZip_Type && !_PyArg_NoKeywords("zip()", kwds)) - return NULL; - - /* args must be a tuple */ - assert(PyTuple_Check(args)); - - /* obtain iterators */ - ittuple = PyTuple_New(tuplesize); - if (ittuple == NULL) - return NULL; - for (i=0; i < tuplesize; ++i) { - PyObject *item = PyTuple_GET_ITEM(args, i); - PyObject *it = PyObject_GetIter(item); - if (it == NULL) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) - PyErr_Format(PyExc_TypeError, - "zip argument #%zd must support iteration", - i+1); - Py_DECREF(ittuple); - return NULL; - } - PyTuple_SET_ITEM(ittuple, i, it); - } - - /* create a result holder */ - result = PyTuple_New(tuplesize); - if (result == NULL) { - Py_DECREF(ittuple); - return NULL; - } - for (i=0 ; i < tuplesize ; i++) { - Py_INCREF(Py_None); - PyTuple_SET_ITEM(result, i, Py_None); - } - - /* create zipobject structure */ - lz = (zipobject *)type->tp_alloc(type, 0); - if (lz == NULL) { - Py_DECREF(ittuple); - Py_DECREF(result); - return NULL; - } - lz->ittuple = ittuple; - lz->tuplesize = tuplesize; - lz->result = result; - - return (PyObject *)lz; -} - -static void -zip_dealloc(zipobject *lz) -{ - PyObject_GC_UnTrack(lz); - Py_XDECREF(lz->ittuple); - Py_XDECREF(lz->result); - Py_TYPE(lz)->tp_free(lz); -} - -static int -zip_traverse(zipobject *lz, visitproc visit, void *arg) -{ - Py_VISIT(lz->ittuple); - Py_VISIT(lz->result); - return 0; -} - -static PyObject * -zip_next(zipobject *lz) -{ - Py_ssize_t i; - Py_ssize_t tuplesize = lz->tuplesize; - PyObject *result = lz->result; - PyObject *it; - PyObject *item; - PyObject *olditem; - - if (tuplesize == 0) - return NULL; - if (Py_REFCNT(result) == 1) { - Py_INCREF(result); - for (i=0 ; i < tuplesize ; i++) { - it = PyTuple_GET_ITEM(lz->ittuple, i); - item = (*Py_TYPE(it)->tp_iternext)(it); - if (item == NULL) { - Py_DECREF(result); - return NULL; - } - olditem = PyTuple_GET_ITEM(result, i); - PyTuple_SET_ITEM(result, i, item); - Py_DECREF(olditem); - } - } else { - result = PyTuple_New(tuplesize); - if (result == NULL) - return NULL; - for (i=0 ; i < tuplesize ; i++) { - it = PyTuple_GET_ITEM(lz->ittuple, i); - item = (*Py_TYPE(it)->tp_iternext)(it); - if (item == NULL) { - Py_DECREF(result); - return NULL; - } - PyTuple_SET_ITEM(result, i, item); - } - } - return result; -} - -PyDoc_STRVAR(zip_doc, -"zip(iter1 [,iter2 [...]]) --> zip object\n\ -\n\ -Return a zip object whose .__next__() method returns a tuple where\n\ -the i-th element comes from the i-th iterable argument. The .__next__()\n\ -method continues until the shortest iterable in the argument sequence\n\ -is exhausted and then it raises StopIteration."); - -PyTypeObject PyZip_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "zip", /* tp_name */ - sizeof(zipobject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)zip_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | - Py_TPFLAGS_BASETYPE, /* tp_flags */ - zip_doc, /* tp_doc */ - (traverseproc)zip_traverse, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - PyObject_SelfIter, /* tp_iter */ - (iternextfunc)zip_next, /* tp_iternext */ - 0, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - PyType_GenericAlloc, /* tp_alloc */ - zip_new, /* tp_new */ - PyObject_GC_Del, /* tp_free */ -}; - - -static PyMethodDef builtin_methods[] = { - {"__build_class__", (PyCFunction)builtin___build_class__, - METH_VARARGS | METH_KEYWORDS, build_class_doc}, - {"__import__", (PyCFunction)builtin___import__, METH_VARARGS | METH_KEYWORDS, import_doc}, - {"abs", builtin_abs, METH_O, abs_doc}, - {"all", builtin_all, METH_O, all_doc}, - {"any", builtin_any, METH_O, any_doc}, - {"ascii", builtin_ascii, METH_O, ascii_doc}, - {"bin", builtin_bin, METH_O, bin_doc}, - {"callable", builtin_callable, METH_O, callable_doc}, - {"chr", builtin_chr, METH_VARARGS, chr_doc}, - {"compile", (PyCFunction)builtin_compile, METH_VARARGS | METH_KEYWORDS, compile_doc}, - {"delattr", builtin_delattr, METH_VARARGS, delattr_doc}, - {"dir", builtin_dir, METH_VARARGS, dir_doc}, - {"divmod", builtin_divmod, METH_VARARGS, divmod_doc}, - {"eval", builtin_eval, METH_VARARGS, eval_doc}, - {"exec", builtin_exec, METH_VARARGS, exec_doc}, - {"format", builtin_format, METH_VARARGS, format_doc}, - {"getattr", builtin_getattr, METH_VARARGS, getattr_doc}, - {"globals", (PyCFunction)builtin_globals, METH_NOARGS, globals_doc}, - {"hasattr", builtin_hasattr, METH_VARARGS, hasattr_doc}, - {"hash", builtin_hash, METH_O, hash_doc}, - {"hex", builtin_hex, METH_O, hex_doc}, - {"id", builtin_id, METH_O, id_doc}, - {"input", builtin_input, METH_VARARGS, input_doc}, - {"isinstance", builtin_isinstance, METH_VARARGS, isinstance_doc}, - {"issubclass", builtin_issubclass, METH_VARARGS, issubclass_doc}, - {"iter", builtin_iter, METH_VARARGS, iter_doc}, - {"len", builtin_len, METH_O, len_doc}, - {"locals", (PyCFunction)builtin_locals, METH_NOARGS, locals_doc}, - {"max", (PyCFunction)builtin_max, METH_VARARGS | METH_KEYWORDS, max_doc}, - {"min", (PyCFunction)builtin_min, METH_VARARGS | METH_KEYWORDS, min_doc}, - {"next", (PyCFunction)builtin_next, METH_VARARGS, next_doc}, - {"oct", builtin_oct, METH_O, oct_doc}, - {"ord", builtin_ord, METH_O, ord_doc}, - {"pow", builtin_pow, METH_VARARGS, pow_doc}, - {"print", (PyCFunction)builtin_print, METH_VARARGS | METH_KEYWORDS, print_doc}, - {"repr", builtin_repr, METH_O, repr_doc}, - {"round", (PyCFunction)builtin_round, METH_VARARGS | METH_KEYWORDS, round_doc}, - {"setattr", builtin_setattr, METH_VARARGS, setattr_doc}, - {"sorted", (PyCFunction)builtin_sorted, METH_VARARGS | METH_KEYWORDS, sorted_doc}, - {"sum", builtin_sum, METH_VARARGS, sum_doc}, - {"vars", builtin_vars, METH_VARARGS, vars_doc}, - {NULL, NULL}, -}; - -PyDoc_STRVAR(builtin_doc, -"Built-in functions, exceptions, and other objects.\n\ -\n\ -Noteworthy: None is the `nil' object; Ellipsis represents `...' in slices."); - -static struct PyModuleDef builtinsmodule = { - PyModuleDef_HEAD_INIT, - "builtins", - builtin_doc, - -1, /* multiple "initialization" just copies the module dict. */ - builtin_methods, - NULL, - NULL, - NULL, - NULL -}; - - -PyObject * -_PyBuiltin_Init(void) -{ - PyObject *mod, *dict, *debug; - mod = PyModule_Create(&builtinsmodule); - if (mod == NULL) - return NULL; - dict = PyModule_GetDict(mod); - -#ifdef Py_TRACE_REFS - /* "builtins" exposes a number of statically allocated objects - * that, before this code was added in 2.3, never showed up in - * the list of "all objects" maintained by Py_TRACE_REFS. As a - * result, programs leaking references to None and False (etc) - * couldn't be diagnosed by examining sys.getobjects(0). - */ -#define ADD_TO_ALL(OBJECT) _Py_AddToAllObjects((PyObject *)(OBJECT), 0) -#else -#define ADD_TO_ALL(OBJECT) (void)0 -#endif - -#define SETBUILTIN(NAME, OBJECT) \ - if (PyDict_SetItemString(dict, NAME, (PyObject *)OBJECT) < 0) \ - return NULL; \ - ADD_TO_ALL(OBJECT) - - SETBUILTIN("None", Py_None); - SETBUILTIN("Ellipsis", Py_Ellipsis); - SETBUILTIN("NotImplemented", Py_NotImplemented); - SETBUILTIN("False", Py_False); - SETBUILTIN("True", Py_True); - SETBUILTIN("bool", &PyBool_Type); - SETBUILTIN("memoryview", &PyMemoryView_Type); - SETBUILTIN("bytearray", &PyByteArray_Type); - SETBUILTIN("bytes", &PyBytes_Type); - SETBUILTIN("classmethod", &PyClassMethod_Type); - SETBUILTIN("complex", &PyComplex_Type); - SETBUILTIN("dict", &PyDict_Type); - SETBUILTIN("enumerate", &PyEnum_Type); - SETBUILTIN("filter", &PyFilter_Type); - SETBUILTIN("float", &PyFloat_Type); - SETBUILTIN("frozenset", &PyFrozenSet_Type); - SETBUILTIN("property", &PyProperty_Type); - SETBUILTIN("int", &PyLong_Type); - SETBUILTIN("list", &PyList_Type); - SETBUILTIN("map", &PyMap_Type); - SETBUILTIN("object", &PyBaseObject_Type); - SETBUILTIN("range", &PyRange_Type); - SETBUILTIN("reversed", &PyReversed_Type); - SETBUILTIN("set", &PySet_Type); - SETBUILTIN("slice", &PySlice_Type); - SETBUILTIN("staticmethod", &PyStaticMethod_Type); - SETBUILTIN("str", &PyUnicode_Type); - SETBUILTIN("super", &PySuper_Type); - SETBUILTIN("tuple", &PyTuple_Type); - SETBUILTIN("type", &PyType_Type); - SETBUILTIN("zip", &PyZip_Type); - debug = PyBool_FromLong(Py_OptimizeFlag == 0); - if (PyDict_SetItemString(dict, "__debug__", debug) < 0) { - Py_XDECREF(debug); - return NULL; - } - Py_XDECREF(debug); - - return mod; -#undef ADD_TO_ALL -#undef SETBUILTIN -}
--- a/cos/python/Python/ceval.c Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4327 +0,0 @@ - -/* Execute compiled code */ - -/* XXX TO DO: - XXX speed up searching for keywords by using a dictionary - XXX document it! - */ - -/* enable more aggressive intra-module optimizations, where available */ -#define PY_LOCAL_AGGRESSIVE - -#include "Python.h" - -#include "code.h" -#include "frameobject.h" -#include "opcode.h" -#include "structmember.h" - -#include <ctype.h> - -typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *); - -/* Forward declarations */ -static PyObject * call_function(PyObject ***, int); -static PyObject * fast_function(PyObject *, PyObject ***, int, int, int); -static PyObject * do_call(PyObject *, PyObject ***, int, int); -static PyObject * ext_do_call(PyObject *, PyObject ***, int, int, int); -static PyObject * update_keyword_args(PyObject *, int, PyObject ***, - PyObject *); -static PyObject * update_star_args(int, int, PyObject *, PyObject ***); -static PyObject * load_args(PyObject ***, int); -#define CALL_FLAG_VAR 1 -#define CALL_FLAG_KW 2 - -static int call_trace(Py_tracefunc, PyObject *, PyFrameObject *, - int, PyObject *); -static int call_trace_protected(Py_tracefunc, PyObject *, - PyFrameObject *, int, PyObject *); -static void call_exc_trace(Py_tracefunc, PyObject *, PyFrameObject *); -static int maybe_call_line_trace(Py_tracefunc, PyObject *, - PyFrameObject *, int *, int *, int *); - -static PyObject * cmp_outcome(int, PyObject *, PyObject *); -static PyObject * import_from(PyObject *, PyObject *); -static int import_all_from(PyObject *, PyObject *); -static void format_exc_check_arg(PyObject *, const char *, PyObject *); -static void format_exc_unbound(PyCodeObject *co, int oparg); -static PyObject * unicode_concatenate(PyObject *, PyObject *, - PyFrameObject *, unsigned char *); -static PyObject * special_lookup(PyObject *, char *, PyObject **); - -#define NAME_ERROR_MSG \ - "name '%.200s' is not defined" -#define GLOBAL_NAME_ERROR_MSG \ - "global name '%.200s' is not defined" -#define UNBOUNDLOCAL_ERROR_MSG \ - "local variable '%.200s' referenced before assignment" -#define UNBOUNDFREE_ERROR_MSG \ - "free variable '%.200s' referenced before assignment" \ - " in enclosing scope" - -#define PCALL(O) - -PyObject * -PyEval_GetCallStats(PyObject *self) -{ - Py_INCREF(Py_None); - return Py_None; -} - - -#ifdef WITH_THREAD -#define GIL_REQUEST _Py_atomic_load_relaxed(&gil_drop_request) -#else -#define GIL_REQUEST 0 -#endif - -/* This can set eval_breaker to 0 even though gil_drop_request became - 1. We believe this is all right because the eval loop will release - the GIL eventually anyway. */ -#define COMPUTE_EVAL_BREAKER() \ - _Py_atomic_store_relaxed( \ - &eval_breaker, \ - GIL_REQUEST | \ - _Py_atomic_load_relaxed(&pendingcalls_to_do) | \ - pending_async_exc) - -#ifdef WITH_THREAD - -#define SET_GIL_DROP_REQUEST() \ - do { \ - _Py_atomic_store_relaxed(&gil_drop_request, 1); \ - _Py_atomic_store_relaxed(&eval_breaker, 1); \ - } while (0) - -#define RESET_GIL_DROP_REQUEST() \ - do { \ - _Py_atomic_store_relaxed(&gil_drop_request, 0); \ - COMPUTE_EVAL_BREAKER(); \ - } while (0) - -#endif - -/* Pending calls are only modified under pending_lock */ -#define SIGNAL_PENDING_CALLS() \ - do { \ - _Py_atomic_store_relaxed(&pendingcalls_to_do, 1); \ - _Py_atomic_store_relaxed(&eval_breaker, 1); \ - } while (0) - -#define UNSIGNAL_PENDING_CALLS() \ - do { \ - _Py_atomic_store_relaxed(&pendingcalls_to_do, 0); \ - COMPUTE_EVAL_BREAKER(); \ - } while (0) - -#define SIGNAL_ASYNC_EXC() \ - do { \ - pending_async_exc = 1; \ - _Py_atomic_store_relaxed(&eval_breaker, 1); \ - } while (0) - -#define UNSIGNAL_ASYNC_EXC() \ - do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0) - - -#ifdef WITH_THREAD - -#include "pythread.h" - -static PyThread_type_lock pending_lock = 0; /* for pending calls */ -static long main_thread = 0; -/* This single variable consolidates all requests to break out of the fast path - in the eval loop. */ -static _Py_atomic_int eval_breaker = {0}; -/* Request for dropping the GIL */ -static _Py_atomic_int gil_drop_request = {0}; -/* Request for running pending calls. */ -static _Py_atomic_int pendingcalls_to_do = {0}; -/* Request for looking at the `async_exc` field of the current thread state. - Guarded by the GIL. */ -static int pending_async_exc = 0; - -#include "ceval_gil.h" - -int -PyEval_ThreadsInitialized(void) -{ - return gil_created(); -} - -void -PyEval_InitThreads(void) -{ - if (gil_created()) - return; - create_gil(); - take_gil(PyThreadState_GET()); - main_thread = PyThread_get_thread_ident(); - if (!pending_lock) - pending_lock = PyThread_allocate_lock(); -} - -void -_PyEval_FiniThreads(void) -{ - if (!gil_created()) - return; - destroy_gil(); - assert(!gil_created()); -} - -void -PyEval_AcquireLock(void) -{ - PyThreadState *tstate = PyThreadState_GET(); - if (tstate == NULL) - Py_FatalError("PyEval_AcquireLock: current thread state is NULL"); - take_gil(tstate); -} - -void -PyEval_ReleaseLock(void) -{ - /* This function must succeed when the current thread state is NULL. - We therefore avoid PyThreadState_GET() which dumps a fatal error - in debug mode. - */ - drop_gil((PyThreadState*)_Py_atomic_load_relaxed( - &_PyThreadState_Current)); -} - -void -PyEval_AcquireThread(PyThreadState *tstate) -{ - if (tstate == NULL) - Py_FatalError("PyEval_AcquireThread: NULL new thread state"); - /* Check someone has called PyEval_InitThreads() to create the lock */ - assert(gil_created()); - take_gil(tstate); - if (PyThreadState_Swap(tstate) != NULL) - Py_FatalError( - "PyEval_AcquireThread: non-NULL old thread state"); -} - -void -PyEval_ReleaseThread(PyThreadState *tstate) -{ - if (tstate == NULL) - Py_FatalError("PyEval_ReleaseThread: NULL thread state"); - if (PyThreadState_Swap(NULL) != tstate) - Py_FatalError("PyEval_ReleaseThread: wrong thread state"); - drop_gil(tstate); -} - -/* This function is called from PyOS_AfterFork to ensure that newly - created child processes don't hold locks referring to threads which - are not running in the child process. (This could also be done using - pthread_atfork mechanism, at least for the pthreads implementation.) */ - -void -PyEval_ReInitThreads(void) -{ - _Py_IDENTIFIER(_after_fork); - PyObject *threading, *result; - PyThreadState *tstate = PyThreadState_GET(); - - if (!gil_created()) - return; - recreate_gil(); - pending_lock = PyThread_allocate_lock(); - take_gil(tstate); - main_thread = PyThread_get_thread_ident(); - - /* Update the threading module with the new state. - */ - tstate = PyThreadState_GET(); - threading = PyMapping_GetItemString(tstate->interp->modules, - "threading"); - if (threading == NULL) { - /* threading not imported */ - PyErr_Clear(); - return; - } - result = _PyObject_CallMethodId(threading, &PyId__after_fork, NULL); - if (result == NULL) - PyErr_WriteUnraisable(threading); - else - Py_DECREF(result); - Py_DECREF(threading); -} - -#else -static _Py_atomic_int eval_breaker = {0}; -static int pending_async_exc = 0; -#endif /* WITH_THREAD */ - -/* This function is used to signal that async exceptions are waiting to be - raised, therefore it is also useful in non-threaded builds. */ - -void -_PyEval_SignalAsyncExc(void) -{ - SIGNAL_ASYNC_EXC(); -} - -/* Functions save_thread and restore_thread are always defined so - dynamically loaded modules needn't be compiled separately for use - with and without threads: */ - -PyThreadState * -PyEval_SaveThread(void) -{ - PyThreadState *tstate = PyThreadState_Swap(NULL); - if (tstate == NULL) - Py_FatalError("PyEval_SaveThread: NULL tstate"); -#ifdef WITH_THREAD - if (gil_created()) - drop_gil(tstate); -#endif - return tstate; -} - -void -PyEval_RestoreThread(PyThreadState *tstate) -{ - if (tstate == NULL) - Py_FatalError("PyEval_RestoreThread: NULL tstate"); -#ifdef WITH_THREAD - if (gil_created()) { - int err = errno; - take_gil(tstate); - /* _Py_Finalizing is protected by the GIL */ - if (_Py_Finalizing && tstate != _Py_Finalizing) { - drop_gil(tstate); - PyThread_exit_thread(); - assert(0); /* unreachable */ - } - errno = err; - } -#endif - PyThreadState_Swap(tstate); -} - - -/* Mechanism whereby asynchronously executing callbacks (e.g. UNIX - signal handlers or Mac I/O completion routines) can schedule calls - to a function to be called synchronously. - The synchronous function is called with one void* argument. - It should return 0 for success or -1 for failure -- failure should - be accompanied by an exception. - - If registry succeeds, the registry function returns 0; if it fails - (e.g. due to too many pending calls) it returns -1 (without setting - an exception condition). - - Note that because registry may occur from within signal handlers, - or other asynchronous events, calling malloc() is unsafe! - -#ifdef WITH_THREAD - Any thread can schedule pending calls, but only the main thread - will execute them. - There is no facility to schedule calls to a particular thread, but - that should be easy to change, should that ever be required. In - that case, the static variables here should go into the python - threadstate. -#endif -*/ - -#ifdef WITH_THREAD - -/* The WITH_THREAD implementation is thread-safe. It allows - scheduling to be made from any thread, and even from an executing - callback. - */ - -#define NPENDINGCALLS 32 -static struct { - int (*func)(void *); - void *arg; -} pendingcalls[NPENDINGCALLS]; -static int pendingfirst = 0; -static int pendinglast = 0; - -int -Py_AddPendingCall(int (*func)(void *), void *arg) -{ - int i, j, result=0; - PyThread_type_lock lock = pending_lock; - - /* try a few times for the lock. Since this mechanism is used - * for signal handling (on the main thread), there is a (slim) - * chance that a signal is delivered on the same thread while we - * hold the lock during the Py_MakePendingCalls() function. - * This avoids a deadlock in that case. - * Note that signals can be delivered on any thread. In particular, - * on Windows, a SIGINT is delivered on a system-created worker - * thread. - * We also check for lock being NULL, in the unlikely case that - * this function is called before any bytecode evaluation takes place. - */ - if (lock != NULL) { - for (i = 0; i<100; i++) { - if (PyThread_acquire_lock(lock, NOWAIT_LOCK)) - break; - } - if (i == 100) - return -1; - } - - i = pendinglast; - j = (i + 1) % NPENDINGCALLS; - if (j == pendingfirst) { - result = -1; /* Queue full */ - } else { - pendingcalls[i].func = func; - pendingcalls[i].arg = arg; - pendinglast = j; - } - /* signal main loop */ - SIGNAL_PENDING_CALLS(); - if (lock != NULL) - PyThread_release_lock(lock); - return result; -} - -int -Py_MakePendingCalls(void) -{ - static int busy = 0; - int i; - int r = 0; - - if (!pending_lock) { - /* initial allocation of the lock */ - pending_lock = PyThread_allocate_lock(); - if (pending_lock == NULL) - return -1; - } - - /* only service pending calls on main thread */ - if (main_thread && PyThread_get_thread_ident() != main_thread) - return 0; - /* don't perform recursive pending calls */ - if (busy) - return 0; - busy = 1; - /* perform a bounded number of calls, in case of recursion */ - for (i=0; i<NPENDINGCALLS; i++) { - int j; - int (*func)(void *); - void *arg = NULL; - - /* pop one item off the queue while holding the lock */ - PyThread_acquire_lock(pending_lock, WAIT_LOCK); - j = pendingfirst; - if (j == pendinglast) { - func = NULL; /* Queue empty */ - } else { - func = pendingcalls[j].func; - arg = pendingcalls[j].arg; - pendingfirst = (j + 1) % NPENDINGCALLS; - } - if (pendingfirst != pendinglast) - SIGNAL_PENDING_CALLS(); - else - UNSIGNAL_PENDING_CALLS(); - PyThread_release_lock(pending_lock); - /* having released the lock, perform the callback */ - if (func == NULL) - break; - r = func(arg); - if (r) - break; - } - busy = 0; - return r; -} - -#else /* if ! defined WITH_THREAD */ - -/* - WARNING! ASYNCHRONOUSLY EXECUTING CODE! - This code is used for signal handling in python that isn't built - with WITH_THREAD. - Don't use this implementation when Py_AddPendingCalls() can happen - on a different thread! - - There are two possible race conditions: - (1) nested asynchronous calls to Py_AddPendingCall() - (2) AddPendingCall() calls made while pending calls are being processed. - - (1) is very unlikely because typically signal delivery - is blocked during signal handling. So it should be impossible. - (2) is a real possibility. - The current code is safe against (2), but not against (1). - The safety against (2) is derived from the fact that only one - thread is present, interrupted by signals, and that the critical - section is protected with the "busy" variable. On Windows, which - delivers SIGINT on a system thread, this does not hold and therefore - Windows really shouldn't use this version. - The two threads could theoretically wiggle around the "busy" variable. -*/ - -#define NPENDINGCALLS 32 -static struct { - int (*func)(void *); - void *arg; -} pendingcalls[NPENDINGCALLS]; -static volatile int pendingfirst = 0; -static volatile int pendinglast = 0; -static _Py_atomic_int pendingcalls_to_do = {0}; - -int -Py_AddPendingCall(int (*func)(void *), void *arg) -{ - static volatile int busy = 0; - int i, j; - /* XXX Begin critical section */ - if (busy) - return -1; - busy = 1; - i = pendinglast; - j = (i + 1) % NPENDINGCALLS; - if (j == pendingfirst) { - busy = 0; - return -1; /* Queue full */ - } - pendingcalls[i].func = func; - pendingcalls[i].arg = arg; - pendinglast = j; - - SIGNAL_PENDING_CALLS(); - busy = 0; - /* XXX End critical section */ - return 0; -} - -int -Py_MakePendingCalls(void) -{ - static int busy = 0; - if (busy) - return 0; - busy = 1; - UNSIGNAL_PENDING_CALLS(); - for (;;) { - int i; - int (*func)(void *); - void *arg; - i = pendingfirst; - if (i == pendinglast) - break; /* Queue empty */ - func = pendingcalls[i].func; - arg = pendingcalls[i].arg; - pendingfirst = (i + 1) % NPENDINGCALLS; - if (func(arg) < 0) { - busy = 0; - SIGNAL_PENDING_CALLS(); /* We're not done yet */ - return -1; - } - } - busy = 0; - return 0; -} - -#endif /* WITH_THREAD */ - - -/* The interpreter's recursion limit */ - -#ifndef Py_DEFAULT_RECURSION_LIMIT -#define Py_DEFAULT_RECURSION_LIMIT 1000 -#endif -static int recursion_limit = Py_DEFAULT_RECURSION_LIMIT; -int _Py_CheckRecursionLimit = Py_DEFAULT_RECURSION_LIMIT; - -/* the macro Py_EnterRecursiveCall() only calls _Py_CheckRecursiveCall() - if the recursion_depth reaches _Py_CheckRecursionLimit. - If USE_STACKCHECK, the macro decrements _Py_CheckRecursionLimit - to guarantee that _Py_CheckRecursiveCall() is regularly called. - Without USE_STACKCHECK, there is no need for this. */ -int -_Py_CheckRecursiveCall(char *where) -{ - PyThreadState *tstate = PyThreadState_GET(); - -#ifdef USE_STACKCHECK - if (PyOS_CheckStack()) { - --tstate->recursion_depth; - PyErr_SetString(PyExc_MemoryError, "Stack overflow"); - return -1; - } -#endif - _Py_CheckRecursionLimit = recursion_limit; - if (tstate->recursion_critical) - /* Somebody asked that we don't check for recursion. */ - return 0; - if (tstate->overflowed) { - if (tstate->recursion_depth > recursion_limit + 50) { - /* Overflowing while handling an overflow. Give up. */ - Py_FatalError("Cannot recover from stack overflow."); - } - return 0; - } - if (tstate->recursion_depth > recursion_limit) { - --tstate->recursion_depth; - tstate->overflowed = 1; - PyErr_Format(PyExc_RuntimeError, - "maximum recursion depth exceeded%s", - where); - return -1; - } - return 0; -} - -/* Status code for main loop (reason for stack unwind) */ -enum why_code { - WHY_NOT = 0x0001, /* No error */ - WHY_EXCEPTION = 0x0002, /* Exception occurred */ - WHY_RERAISE = 0x0004, /* Exception re-raised by 'finally' */ - WHY_RETURN = 0x0008, /* 'return' statement */ - WHY_BREAK = 0x0010, /* 'break' statement */ - WHY_CONTINUE = 0x0020, /* 'continue' statement */ - WHY_YIELD = 0x0040, /* 'yield' operator */ - WHY_SILENCED = 0x0080 /* Exception silenced by 'with' */ -}; - -static void save_exc_state(PyThreadState *, PyFrameObject *); -static void swap_exc_state(PyThreadState *, PyFrameObject *); -static void restore_and_clear_exc_state(PyThreadState *, PyFrameObject *); -static enum why_code do_raise(PyObject *, PyObject *); -static int unpack_iterable(PyObject *, int, int, PyObject **); - -/* Records whether tracing is on for any thread. Counts the number of - threads for which tstate->c_tracefunc is non-NULL, so if the value - is 0, we know we don't have to check this thread's c_tracefunc. - This speeds up the if statement in PyEval_EvalFrameEx() after - fast_next_opcode*/ -static int _Py_TracingPossible = 0; - - - -PyObject * -PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals) -{ - return PyEval_EvalCodeEx(co, - globals, locals, - (PyObject **)NULL, 0, - (PyObject **)NULL, 0, - (PyObject **)NULL, 0, - NULL, NULL); -} - - -/* Interpreter main loop */ - -PyObject * -PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) -{ - register PyObject **stack_pointer; /* Next free slot in value stack */ - register unsigned char *next_instr; - register int opcode; /* Current opcode */ - register int oparg; /* Current opcode argument, if any */ - register enum why_code why; /* Reason for block stack unwind */ - register int err; /* Error status -- nonzero if error */ - register PyObject *x; /* Result object -- NULL if error */ - register PyObject *v; /* Temporary objects popped off stack */ - register PyObject *w; - register PyObject *u; - register PyObject *t; - register PyObject **fastlocals, **freevars; - PyObject *retval = NULL; /* Return value */ - PyThreadState *tstate = PyThreadState_GET(); - PyCodeObject *co; - - /* when tracing we set things up so that - - not (instr_lb <= current_bytecode_offset < instr_ub) - - is true when the line being executed has changed. The - initial values are such as to make this false the first - time it is tested. */ - int instr_ub = -1, instr_lb = 0, instr_prev = -1; - - unsigned char *first_instr; - PyObject *names; - PyObject *consts; - -/* Computed GOTOs, or - the-optimization-commonly-but-improperly-known-as-"threaded code" - using gcc's labels-as-values extension - (http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html). - - The traditional bytecode evaluation loop uses a "switch" statement, which - decent compilers will optimize as a single indirect branch instruction - combined with a lookup table of jump addresses. However, since the - indirect jump instruction is shared by all opcodes, the CPU will have a - hard time making the right prediction for where to jump next (actually, - it will be always wrong except in the uncommon case of a sequence of - several identical opcodes). - - "Threaded code" in contrast, uses an explicit jump table and an explicit - indirect jump instruction at the end of each opcode. Since the jump - instruction is at a different address for each opcode, the CPU will make a - separate prediction for each of these instructions, which is equivalent to - predicting the second opcode of each opcode pair. These predictions have - a much better chance to turn out valid, especially in small bytecode loops. - - A mispredicted branch on a modern CPU flushes the whole pipeline and - can cost several CPU cycles (depending on the pipeline depth), - and potentially many more instructions (depending on the pipeline width). - A correctly predicted branch, however, is nearly free. - - At the time of this writing, the "threaded code" version is up to 15-20% - faster than the normal "switch" version, depending on the compiler and the - CPU architecture. - - We disable the optimization if DYNAMIC_EXECUTION_PROFILE is defined, - because it would render the measurements invalid. - - - NOTE: care must be taken that the compiler doesn't try to "optimize" the - indirect jumps by sharing them between all opcodes. Such optimizations - can be disabled on gcc by using the -fno-gcse flag (or possibly - -fno-crossjumping). -*/ - -#ifdef DYNAMIC_EXECUTION_PROFILE -#undef USE_COMPUTED_GOTOS -#define USE_COMPUTED_GOTOS 0 -#endif - -#ifdef HAVE_COMPUTED_GOTOS - #ifndef USE_COMPUTED_GOTOS - #define USE_COMPUTED_GOTOS 1 - #endif -#else - #if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS - #error "Computed gotos are not supported on this compiler." - #endif - #undef USE_COMPUTED_GOTOS - #define USE_COMPUTED_GOTOS 0 -#endif - -#if USE_COMPUTED_GOTOS -/* Import the static jump table */ -#include "opcode_targets.h" - -/* This macro is used when several opcodes defer to the same implementation - (e.g. SETUP_LOOP, SETUP_FINALLY) */ -#define TARGET_WITH_IMPL(op, impl) \ - TARGET_##op: \ - opcode = op; \ - if (HAS_ARG(op)) \ - oparg = NEXTARG(); \ - case op: \ - goto impl; \ - -#define TARGET(op) \ - TARGET_##op: \ - opcode = op; \ - if (HAS_ARG(op)) \ - oparg = NEXTARG(); \ - case op: - - -#define DISPATCH() \ - { \ - if (!_Py_atomic_load_relaxed(&eval_breaker)) { \ - FAST_DISPATCH(); \ - } \ - continue; \ - } - -#ifdef LLTRACE -#define FAST_DISPATCH() \ - { \ - if (!lltrace && !_Py_TracingPossible) { \ - f->f_lasti = INSTR_OFFSET(); \ - goto *opcode_targets[*next_instr++]; \ - } \ - goto fast_next_opcode; \ - } -#else -#define FAST_DISPATCH() \ - { \ - if (!_Py_TracingPossible) { \ - f->f_lasti = INSTR_OFFSET(); \ - goto *opcode_targets[*next_instr++]; \ - } \ - goto fast_next_opcode; \ - } -#endif - -#else -#define TARGET(op) \ - case op: -#define TARGET_WITH_IMPL(op, impl) \ - /* silence compiler warnings about `impl` unused */ \ - if (0) goto impl; \ - case op: -#define DISPATCH() continue -#define FAST_DISPATCH() goto fast_next_opcode -#endif - - -/* Tuple access macros */ - -#ifndef Py_DEBUG -#define GETITEM(v, i) PyTuple_GET_ITEM((PyTupleObject *)(v), (i)) -#else -#define GETITEM(v, i) PyTuple_GetItem((v), (i)) -#endif - -#ifdef WITH_TSC -/* Use Pentium timestamp counter to mark certain events: - inst0 -- beginning of switch statement for opcode dispatch - inst1 -- end of switch statement (may be skipped) - loop0 -- the top of the mainloop - loop1 -- place where control returns again to top of mainloop - (may be skipped) - intr1 -- beginning of long interruption - intr2 -- end of long interruption - - Many opcodes call out to helper C functions. In some cases, the - time in those functions should be counted towards the time for the - opcode, but not in all cases. For example, a CALL_FUNCTION opcode - calls another Python function; there's no point in charge all the - bytecode executed by the called function to the caller. - - It's hard to make a useful judgement statically. In the presence - of operator overloading, it's impossible to tell if a call will - execute new Python code or not. - - It's a case-by-case judgement. I'll use intr1 for the following - cases: - - IMPORT_STAR - IMPORT_FROM - CALL_FUNCTION (and friends) - - */ - uint64 inst0, inst1, loop0, loop1, intr0 = 0, intr1 = 0; - int ticked = 0; - - READ_TIMESTAMP(inst0); - READ_TIMESTAMP(inst1); - READ_TIMESTAMP(loop0); - READ_TIMESTAMP(loop1); - - /* shut up the compiler */ - opcode = 0; -#endif - -/* Code access macros */ - -#define INSTR_OFFSET() ((int)(next_instr - first_instr)) -#define NEXTOP() (*next_instr++) -#define NEXTARG() (next_instr += 2, (next_instr[-1]<<8) + next_instr[-2]) -#define PEEKARG() ((next_instr[2]<<8) + next_instr[1]) -#define JUMPTO(x) (next_instr = first_instr + (x)) -#define JUMPBY(x) (next_instr += (x)) - -/* OpCode prediction macros - Some opcodes tend to come in pairs thus making it possible to - predict the second code when the first is run. For example, - COMPARE_OP is often followed by JUMP_IF_FALSE or JUMP_IF_TRUE. And, - those opcodes are often followed by a POP_TOP. - - Verifying the prediction costs a single high-speed test of a register - variable against a constant. If the pairing was good, then the - processor's own internal branch predication has a high likelihood of - success, resulting in a nearly zero-overhead transition to the - next opcode. A successful prediction saves a trip through the eval-loop - including its two unpredictable branches, the HAS_ARG test and the - switch-case. Combined with the processor's internal branch prediction, - a successful PREDICT has the effect of making the two opcodes run as if - they were a single new opcode with the bodies combined. - - If collecting opcode statistics, your choices are to either keep the - predictions turned-on and interpret the results as if some opcodes - had been combined or turn-off predictions so that the opcode frequency - counter updates for both opcodes. - - Opcode prediction is disabled with threaded code, since the latter allows - the CPU to record separate branch prediction information for each - opcode. - -*/ - -#if defined(DYNAMIC_EXECUTION_PROFILE) || USE_COMPUTED_GOTOS -#define PREDICT(op) if (0) goto PRED_##op -#define PREDICTED(op) PRED_##op: -#define PREDICTED_WITH_ARG(op) PRED_##op: -#else -#define PREDICT(op) if (*next_instr == op) goto PRED_##op -#define PREDICTED(op) PRED_##op: next_instr++ -#define PREDICTED_WITH_ARG(op) PRED_##op: oparg = PEEKARG(); next_instr += 3 -#endif - - -/* Stack manipulation macros */ - -/* The stack can grow at most MAXINT deep, as co_nlocals and - co_stacksize are ints. */ -#define STACK_LEVEL() ((int)(stack_pointer - f->f_valuestack)) -#define EMPTY() (STACK_LEVEL() == 0) -#define TOP() (stack_pointer[-1]) -#define SECOND() (stack_pointer[-2]) -#define THIRD() (stack_pointer[-3]) -#define FOURTH() (stack_pointer[-4]) -#define PEEK(n) (stack_pointer[-(n)]) -#define SET_TOP(v) (stack_pointer[-1] = (v)) -#define SET_SECOND(v) (stack_pointer[-2] = (v)) -#define SET_THIRD(v) (stack_pointer[-3] = (v)) -#define SET_FOURTH(v) (stack_pointer[-4] = (v)) -#define SET_VALUE(n, v) (stack_pointer[-(n)] = (v)) -#define BASIC_STACKADJ(n) (stack_pointer += n) -#define BASIC_PUSH(v) (*stack_pointer++ = (v)) -#define BASIC_POP() (*--stack_pointer) - -#define PUSH(v) BASIC_PUSH(v) -#define POP() BASIC_POP() -#define STACKADJ(n) BASIC_STACKADJ(n) -#define EXT_POP(STACK_POINTER) (*--(STACK_POINTER)) - -/* Local variable macros */ - -#define GETLOCAL(i) (fastlocals[i]) - -/* The SETLOCAL() macro must not DECREF the local variable in-place and - then store the new value; it must copy the old value to a temporary - value, then store the new value, and then DECREF the temporary value. - This is because it is possible that during the DECREF the frame is - accessed by other code (e.g. a __del__ method or gc.collect()) and the - variable would be pointing to already-freed memory. */ -#define SETLOCAL(i, value) do { PyObject *tmp = GETLOCAL(i); \ - GETLOCAL(i) = value; \ - Py_XDECREF(tmp); } while (0) - - -#define UNWIND_BLOCK(b) \ - while (STACK_LEVEL() > (b)->b_level) { \ - PyObject *v = POP(); \ - Py_XDECREF(v); \ - } - -#define UNWIND_EXCEPT_HANDLER(b) \ - { \ - PyObject *type, *value, *traceback; \ - assert(STACK_LEVEL() >= (b)->b_level + 3); \ - while (STACK_LEVEL() > (b)->b_level + 3) { \ - value = POP(); \ - Py_XDECREF(value); \ - } \ - type = tstate->exc_type; \ - value = tstate->exc_value; \ - traceback = tstate->exc_traceback; \ - tstate->exc_type = POP(); \ - tstate->exc_value = POP(); \ - tstate->exc_traceback = POP(); \ - Py_XDECREF(type); \ - Py_XDECREF(value); \ - Py_XDECREF(traceback); \ - } - -/* Start of code */ - - /* push frame */ - if (Py_EnterRecursiveCall("")) - return NULL; - - tstate->frame = f; - - if (tstate->use_tracing) { - if (tstate->c_tracefunc != NULL) { - /* tstate->c_tracefunc, if defined, is a - function that will be called on *every* entry - to a code block. Its return value, if not - None, is a function that will be called at - the start of each executed line of code. - (Actually, the function must return itself - in order to continue tracing.) The trace - functions are called with three arguments: - a pointer to the current frame, a string - indicating why the function is called, and - an argument which depends on the situation. - The global trace function is also called - whenever an exception is detected. */ - if (call_trace_protected(tstate->c_tracefunc, - tstate->c_traceobj, - f, PyTrace_CALL, Py_None)) { - /* Trace function raised an error */ - goto exit_eval_frame; - } - } - if (tstate->c_profilefunc != NULL) { - /* Similar for c_profilefunc, except it needn't - return itself and isn't called for "line" events */ - if (call_trace_protected(tstate->c_profilefunc, - tstate->c_profileobj, - f, PyTrace_CALL, Py_None)) { - /* Profile function raised an error */ - goto exit_eval_frame; - } - } - } - - co = f->f_code; - names = co->co_names; - consts = co->co_consts; - fastlocals = f->f_localsplus; - freevars = f->f_localsplus + co->co_nlocals; - first_instr = (unsigned char*) PyBytes_AS_STRING(co->co_code); - /* An explanation is in order for the next line. - - f->f_lasti now refers to the index of the last instruction - executed. You might think this was obvious from the name, but - this wasn't always true before 2.3! PyFrame_New now sets - f->f_lasti to -1 (i.e. the index *before* the first instruction) - and YIELD_VALUE doesn't fiddle with f_lasti any more. So this - does work. Promise. - - When the PREDICT() macros are enabled, some opcode pairs follow in - direct succession without updating f->f_lasti. A successful - prediction effectively links the two codes together as if they - were a single new opcode; accordingly,f->f_lasti will point to - the first code in the pair (for instance, GET_ITER followed by - FOR_ITER is effectively a single opcode and f->f_lasti will point - at to the beginning of the combined pair.) - */ - next_instr = first_instr + f->f_lasti + 1; - stack_pointer = f->f_stacktop; - assert(stack_pointer != NULL); - f->f_stacktop = NULL; /* remains NULL unless yield suspends frame */ - - if (co->co_flags & CO_GENERATOR && !throwflag) { - if (f->f_exc_type != NULL && f->f_exc_type != Py_None) { - /* We were in an except handler when we left, - restore the exception state which was put aside - (see YIELD_VALUE). */ - swap_exc_state(tstate, f); - } - else - save_exc_state(tstate, f); - } - - why = WHY_NOT; - err = 0; - x = Py_None; /* Not a reference, just anything non-NULL */ - w = NULL; - - if (throwflag) { /* support for generator.throw() */ - why = WHY_EXCEPTION; - goto on_error; - } - - for (;;) { - assert(stack_pointer >= f->f_valuestack); /* else underflow */ - assert(STACK_LEVEL() <= co->co_stacksize); /* else overflow */ - - /* Do periodic things. Doing this every time through - the loop would add too much overhead, so we do it - only every Nth instruction. We also do it if - ``pendingcalls_to_do'' is set, i.e. when an asynchronous - event needs attention (e.g. a signal handler or - async I/O handler); see Py_AddPendingCall() and - Py_MakePendingCalls() above. */ - - if (_Py_atomic_load_relaxed(&eval_breaker)) { - if (*next_instr == SETUP_FINALLY) { - /* Make the last opcode before - a try: finally: block uninterruptible. */ - goto fast_next_opcode; - } - tstate->tick_counter++; - if (_Py_atomic_load_relaxed(&pendingcalls_to_do)) { - if (Py_MakePendingCalls() < 0) { - why = WHY_EXCEPTION; - goto on_error; - } - } -#ifdef WITH_THREAD - if (_Py_atomic_load_relaxed(&gil_drop_request)) { - /* Give another thread a chance */ - if (PyThreadState_Swap(NULL) != tstate) - Py_FatalError("ceval: tstate mix-up"); - drop_gil(tstate); - - /* Other threads may run now */ - - take_gil(tstate); - if (PyThreadState_Swap(tstate) != NULL) - Py_FatalError("ceval: orphan tstate"); - } -#endif - /* Check for asynchronous exceptions. */ - if (tstate->async_exc != NULL) { - x = tstate->async_exc; - tstate->async_exc = NULL; - UNSIGNAL_ASYNC_EXC(); - PyErr_SetNone(x); - Py_DECREF(x); - why = WHY_EXCEPTION; - goto on_error; - } - } - - fast_next_opcode: - f->f_lasti = INSTR_OFFSET(); - - /* line-by-line tracing support */ - - if (_Py_TracingPossible && - tstate->c_tracefunc != NULL && !tstate->tracing) { - /* see maybe_call_line_trace - for expository comments */ - f->f_stacktop = stack_pointer; - - err = maybe_call_line_trace(tstate->c_tracefunc, - tstate->c_traceobj, - f, &instr_lb, &instr_ub, - &instr_prev); - /* Reload possibly changed frame fields */ - JUMPTO(f->f_lasti); - if (f->f_stacktop != NULL) { - stack_pointer = f->f_stacktop; - f->f_stacktop = NULL; - } - if (err) { - /* trace function raised an exception */ - goto on_error; - } - } - - /* Extract opcode and argument */ - - opcode = NEXTOP(); - oparg = 0; /* allows oparg to be stored in a register because - it doesn't have to be remembered across a full loop */ - if (HAS_ARG(opcode)) - oparg = NEXTARG(); - dispatch_opcode: - - /* Main switch on opcode */ - - switch (opcode) { - - /* BEWARE! - It is essential that any operation that fails sets either - x to NULL, err to nonzero, or why to anything but WHY_NOT, - and that no operation that succeeds does this! */ - - TARGET(NOP) - FAST_DISPATCH(); - - TARGET(LOAD_FAST) - x = GETLOCAL(oparg); - if (x != NULL) { - Py_INCREF(x); - PUSH(x); - FAST_DISPATCH(); - } - format_exc_check_arg(PyExc_UnboundLocalError, - UNBOUNDLOCAL_ERROR_MSG, - PyTuple_GetItem(co->co_varnames, oparg)); - break; - - TARGET(LOAD_CONST) - x = GETITEM(consts, oparg); - Py_INCREF(x); - PUSH(x); - FAST_DISPATCH(); - - PREDICTED_WITH_ARG(STORE_FAST); - TARGET(STORE_FAST) - v = POP(); - SETLOCAL(oparg, v); - FAST_DISPATCH(); - - TARGET(POP_TOP) - v = POP(); - Py_DECREF(v); - FAST_DISPATCH(); - - TARGET(ROT_TWO) - v = TOP(); - w = SECOND(); - SET_TOP(w); - SET_SECOND(v); - FAST_DISPATCH(); - - TARGET(ROT_THREE) - v = TOP(); - w = SECOND(); - x = THIRD(); - SET_TOP(w); - SET_SECOND(x); - SET_THIRD(v); - FAST_DISPATCH(); - - TARGET(DUP_TOP) - v = TOP(); - Py_INCREF(v); - PUSH(v); - FAST_DISPATCH(); - - TARGET(DUP_TOP_TWO) - x = TOP(); - Py_INCREF(x); - w = SECOND(); - Py_INCREF(w); - STACKADJ(2); - SET_TOP(x); - SET_SECOND(w); - FAST_DISPATCH(); - - TARGET(UNARY_POSITIVE) - v = TOP(); - x = PyNumber_Positive(v); - Py_DECREF(v); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(UNARY_NEGATIVE) - v = TOP(); - x = PyNumber_Negative(v); - Py_DECREF(v); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(UNARY_NOT) - v = TOP(); - err = PyObject_IsTrue(v); - Py_DECREF(v); - if (err == 0) { - Py_INCREF(Py_True); - SET_TOP(Py_True); - DISPATCH(); - } - else if (err > 0) { - Py_INCREF(Py_False); - SET_TOP(Py_False); - err = 0; - DISPATCH(); - } - STACKADJ(-1); - break; - - TARGET(UNARY_INVERT) - v = TOP(); - x = PyNumber_Invert(v); - Py_DECREF(v); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(BINARY_POWER) - w = POP(); - v = TOP(); - x = PyNumber_Power(v, w, Py_None); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(BINARY_MULTIPLY) - w = POP(); - v = TOP(); - x = PyNumber_Multiply(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(BINARY_TRUE_DIVIDE) - w = POP(); - v = TOP(); - x = PyNumber_TrueDivide(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(BINARY_FLOOR_DIVIDE) - w = POP(); - v = TOP(); - x = PyNumber_FloorDivide(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(BINARY_MODULO) - w = POP(); - v = TOP(); - if (PyUnicode_CheckExact(v)) - x = PyUnicode_Format(v, w); - else - x = PyNumber_Remainder(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(BINARY_ADD) - w = POP(); - v = TOP(); - if (PyUnicode_CheckExact(v) && - PyUnicode_CheckExact(w)) { - x = unicode_concatenate(v, w, f, next_instr); - /* unicode_concatenate consumed the ref to v */ - goto skip_decref_vx; - } - else { - x = PyNumber_Add(v, w); - } - Py_DECREF(v); - skip_decref_vx: - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(BINARY_SUBTRACT) - w = POP(); - v = TOP(); - x = PyNumber_Subtract(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(BINARY_SUBSCR) - w = POP(); - v = TOP(); - x = PyObject_GetItem(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(BINARY_LSHIFT) - w = POP(); - v = TOP(); - x = PyNumber_Lshift(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(BINARY_RSHIFT) - w = POP(); - v = TOP(); - x = PyNumber_Rshift(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(BINARY_AND) - w = POP(); - v = TOP(); - x = PyNumber_And(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(BINARY_XOR) - w = POP(); - v = TOP(); - x = PyNumber_Xor(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(BINARY_OR) - w = POP(); - v = TOP(); - x = PyNumber_Or(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(LIST_APPEND) - w = POP(); - v = PEEK(oparg); - err = PyList_Append(v, w); - Py_DECREF(w); - if (err == 0) { - PREDICT(JUMP_ABSOLUTE); - DISPATCH(); - } - break; - - TARGET(SET_ADD) - w = POP(); - v = stack_pointer[-oparg]; - err = PySet_Add(v, w); - Py_DECREF(w); - if (err == 0) { - PREDICT(JUMP_ABSOLUTE); - DISPATCH(); - } - break; - - TARGET(INPLACE_POWER) - w = POP(); - v = TOP(); - x = PyNumber_InPlacePower(v, w, Py_None); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(INPLACE_MULTIPLY) - w = POP(); - v = TOP(); - x = PyNumber_InPlaceMultiply(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(INPLACE_TRUE_DIVIDE) - w = POP(); - v = TOP(); - x = PyNumber_InPlaceTrueDivide(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(INPLACE_FLOOR_DIVIDE) - w = POP(); - v = TOP(); - x = PyNumber_InPlaceFloorDivide(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(INPLACE_MODULO) - w = POP(); - v = TOP(); - x = PyNumber_InPlaceRemainder(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(INPLACE_ADD) - w = POP(); - v = TOP(); - if (PyUnicode_CheckExact(v) && - PyUnicode_CheckExact(w)) { - x = unicode_concatenate(v, w, f, next_instr); - /* unicode_concatenate consumed the ref to v */ - goto skip_decref_v; - } - else { - x = PyNumber_InPlaceAdd(v, w); - } - Py_DECREF(v); - skip_decref_v: - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(INPLACE_SUBTRACT) - w = POP(); - v = TOP(); - x = PyNumber_InPlaceSubtract(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(INPLACE_LSHIFT) - w = POP(); - v = TOP(); - x = PyNumber_InPlaceLshift(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(INPLACE_RSHIFT) - w = POP(); - v = TOP(); - x = PyNumber_InPlaceRshift(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(INPLACE_AND) - w = POP(); - v = TOP(); - x = PyNumber_InPlaceAnd(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(INPLACE_XOR) - w = POP(); - v = TOP(); - x = PyNumber_InPlaceXor(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(INPLACE_OR) - w = POP(); - v = TOP(); - x = PyNumber_InPlaceOr(v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(STORE_SUBSCR) - w = TOP(); - v = SECOND(); - u = THIRD(); - STACKADJ(-3); - /* v[w] = u */ - err = PyObject_SetItem(v, w, u); - Py_DECREF(u); - Py_DECREF(v); - Py_DECREF(w); - if (err == 0) DISPATCH(); - break; - - TARGET(DELETE_SUBSCR) - w = TOP(); - v = SECOND(); - STACKADJ(-2); - /* del v[w] */ - err = PyObject_DelItem(v, w); - Py_DECREF(v); - Py_DECREF(w); - if (err == 0) DISPATCH(); - break; - - TARGET(PRINT_EXPR) - v = POP(); - w = PySys_GetObject("displayhook"); - if (w == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "lost sys.displayhook"); - err = -1; - x = NULL; - } - if (err == 0) { - x = PyTuple_Pack(1, v); - if (x == NULL) - err = -1; - } - if (err == 0) { - w = PyEval_CallObject(w, x); - Py_XDECREF(w); - if (w == NULL) - err = -1; - } - Py_DECREF(v); - Py_XDECREF(x); - break; - - TARGET(RAISE_VARARGS) - v = w = NULL; - switch (oparg) { - case 2: - v = POP(); /* cause */ - case 1: - w = POP(); /* exc */ - case 0: /* Fallthrough */ - why = do_raise(w, v); - break; - default: - PyErr_SetString(PyExc_SystemError, - "bad RAISE_VARARGS oparg"); - why = WHY_EXCEPTION; - break; - } - break; - - TARGET(STORE_LOCALS) - x = POP(); - v = f->f_locals; - Py_XDECREF(v); - f->f_locals = x; - DISPATCH(); - - TARGET(RETURN_VALUE) - retval = POP(); - why = WHY_RETURN; - goto fast_block_end; - - TARGET(YIELD_VALUE) - retval = POP(); - f->f_stacktop = stack_pointer; - why = WHY_YIELD; - goto fast_yield; - - TARGET(POP_EXCEPT) - { - PyTryBlock *b = PyFrame_BlockPop(f); - if (b->b_type != EXCEPT_HANDLER) { - PyErr_SetString(PyExc_SystemError, - "popped block is not an except handler"); - why = WHY_EXCEPTION; - break; - } - UNWIND_EXCEPT_HANDLER(b); - } - DISPATCH(); - - TARGET(POP_BLOCK) - { - PyTryBlock *b = PyFrame_BlockPop(f); - UNWIND_BLOCK(b); - } - DISPATCH(); - - PREDICTED(END_FINALLY); - TARGET(END_FINALLY) - v = POP(); - if (PyLong_Check(v)) { - why = (enum why_code) PyLong_AS_LONG(v); - assert(why != WHY_YIELD); - if (why == WHY_RETURN || - why == WHY_CONTINUE) - retval = POP(); - if (why == WHY_SILENCED) { - /* An exception was silenced by 'with', we must - manually unwind the EXCEPT_HANDLER block which was - created when the exception was caught, otherwise - the stack will be in an inconsistent state. */ - PyTryBlock *b = PyFrame_BlockPop(f); - assert(b->b_type == EXCEPT_HANDLER); - UNWIND_EXCEPT_HANDLER(b); - why = WHY_NOT; - } - } - else if (PyExceptionClass_Check(v)) { - w = POP(); - u = POP(); - PyErr_Restore(v, w, u); - why = WHY_RERAISE; - break; - } - else if (v != Py_None) { - PyErr_SetString(PyExc_SystemError, - "'finally' pops bad exception"); - why = WHY_EXCEPTION; - } - Py_DECREF(v); - break; - - TARGET(LOAD_BUILD_CLASS) - x = PyDict_GetItemString(f->f_builtins, - "__build_class__"); - if (x == NULL) { - PyErr_SetString(PyExc_ImportError, - "__build_class__ not found"); - break; - } - Py_INCREF(x); - PUSH(x); - break; - - TARGET(STORE_NAME) - w = GETITEM(names, oparg); - v = POP(); - if ((x = f->f_locals) != NULL) { - if (PyDict_CheckExact(x)) - err = PyDict_SetItem(x, w, v); - else - err = PyObject_SetItem(x, w, v); - Py_DECREF(v); - if (err == 0) DISPATCH(); - break; - } - PyErr_Format(PyExc_SystemError, - "no locals found when storing %R", w); - break; - - TARGET(DELETE_NAME) - w = GETITEM(names, oparg); - if ((x = f->f_locals) != NULL) { - if ((err = PyObject_DelItem(x, w)) != 0) - format_exc_check_arg(PyExc_NameError, - NAME_ERROR_MSG, - w); - break; - } - PyErr_Format(PyExc_SystemError, - "no locals when deleting %R", w); - break; - - PREDICTED_WITH_ARG(UNPACK_SEQUENCE); - TARGET(UNPACK_SEQUENCE) - v = POP(); - if (PyTuple_CheckExact(v) && - PyTuple_GET_SIZE(v) == oparg) { - PyObject **items = \ - ((PyTupleObject *)v)->ob_item; - while (oparg--) { - w = items[oparg]; - Py_INCREF(w); - PUSH(w); - } - Py_DECREF(v); - DISPATCH(); - } else if (PyList_CheckExact(v) && - PyList_GET_SIZE(v) == oparg) { - PyObject **items = \ - ((PyListObject *)v)->ob_item; - while (oparg--) { - w = items[oparg]; - Py_INCREF(w); - PUSH(w); - } - } else if (unpack_iterable(v, oparg, -1, - stack_pointer + oparg)) { - STACKADJ(oparg); - } else { - /* unpack_iterable() raised an exception */ - why = WHY_EXCEPTION; - } - Py_DECREF(v); - break; - - TARGET(UNPACK_EX) - { - int totalargs = 1 + (oparg & 0xFF) + (oparg >> 8); - v = POP(); - - if (unpack_iterable(v, oparg & 0xFF, oparg >> 8, - stack_pointer + totalargs)) { - stack_pointer += totalargs; - } else { - why = WHY_EXCEPTION; - } - Py_DECREF(v); - break; - } - - TARGET(STORE_ATTR) - w = GETITEM(names, oparg); - v = TOP(); - u = SECOND(); - STACKADJ(-2); - err = PyObject_SetAttr(v, w, u); /* v.w = u */ - Py_DECREF(v); - Py_DECREF(u); - if (err == 0) DISPATCH(); - break; - - TARGET(DELETE_ATTR) - w = GETITEM(names, oparg); - v = POP(); - err = PyObject_SetAttr(v, w, (PyObject *)NULL); - /* del v.w */ - Py_DECREF(v); - break; - - TARGET(STORE_GLOBAL) - w = GETITEM(names, oparg); - v = POP(); - err = PyDict_SetItem(f->f_globals, w, v); - Py_DECREF(v); - if (err == 0) DISPATCH(); - break; - - TARGET(DELETE_GLOBAL) - w = GETITEM(names, oparg); - if ((err = PyDict_DelItem(f->f_globals, w)) != 0) - format_exc_check_arg( - PyExc_NameError, GLOBAL_NAME_ERROR_MSG, w); - break; - - TARGET(LOAD_NAME) - w = GETITEM(names, oparg); - if ((v = f->f_locals) == NULL) { - PyErr_Format(PyExc_SystemError, - "no locals when loading %R", w); - why = WHY_EXCEPTION; - break; - } - if (PyDict_CheckExact(v)) { - x = PyDict_GetItem(v, w); - Py_XINCREF(x); - } - else { - x = PyObject_GetItem(v, w); - if (x == NULL && PyErr_Occurred()) { - if (!PyErr_ExceptionMatches( - PyExc_KeyError)) - break; - PyErr_Clear(); - } - } - if (x == NULL) { - x = PyDict_GetItem(f->f_globals, w); - if (x == NULL) { - x = PyDict_GetItem(f->f_builtins, w); - if (x == NULL) { - format_exc_check_arg( - PyExc_NameError, - NAME_ERROR_MSG, w); - break; - } - } - Py_INCREF(x); - } - PUSH(x); - DISPATCH(); - - TARGET(LOAD_GLOBAL) - w = GETITEM(names, oparg); - if (PyUnicode_CheckExact(w)) { - /* Inline the PyDict_GetItem() calls. - WARNING: this is an extreme speed hack. - Do not try this at home. */ - Py_hash_t hash = ((PyASCIIObject *)w)->hash; - if (hash != -1) { - PyDictObject *d; - PyDictEntry *e; - d = (PyDictObject *)(f->f_globals); - e = d->ma_lookup(d, w, hash); - if (e == NULL) { - x = NULL; - break; - } - x = e->me_value; - if (x != NULL) { - Py_INCREF(x); - PUSH(x); - DISPATCH(); - } - d = (PyDictObject *)(f->f_builtins); - e = d->ma_lookup(d, w, hash); - if (e == NULL) { - x = NULL; - break; - } - x = e->me_value; - if (x != NULL) { - Py_INCREF(x); - PUSH(x); - DISPATCH(); - } - goto load_global_error; - } - } - /* This is the un-inlined version of the code above */ - x = PyDict_GetItem(f->f_globals, w); - if (x == NULL) { - x = PyDict_GetItem(f->f_builtins, w); - if (x == NULL) { - load_global_error: - format_exc_check_arg( - PyExc_NameError, - GLOBAL_NAME_ERROR_MSG, w); - break; - } - } - Py_INCREF(x); - PUSH(x); - DISPATCH(); - - TARGET(DELETE_FAST) - x = GETLOCAL(oparg); - if (x != NULL) { - SETLOCAL(oparg, NULL); - DISPATCH(); - } - format_exc_check_arg( - PyExc_UnboundLocalError, - UNBOUNDLOCAL_ERROR_MSG, - PyTuple_GetItem(co->co_varnames, oparg) - ); - break; - - TARGET(DELETE_DEREF) - x = freevars[oparg]; - if (PyCell_GET(x) != NULL) { - PyCell_Set(x, NULL); - DISPATCH(); - } - err = -1; - format_exc_unbound(co, oparg); - break; - - TARGET(LOAD_CLOSURE) - x = freevars[oparg]; - Py_INCREF(x); - PUSH(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(LOAD_DEREF) - x = freevars[oparg]; - w = PyCell_Get(x); - if (w != NULL) { - PUSH(w); - DISPATCH(); - } - err = -1; - format_exc_unbound(co, oparg); - break; - - TARGET(STORE_DEREF) - w = POP(); - x = freevars[oparg]; - PyCell_Set(x, w); - Py_DECREF(w); - DISPATCH(); - - TARGET(BUILD_TUPLE) - x = PyTuple_New(oparg); - if (x != NULL) { - for (; --oparg >= 0;) { - w = POP(); - PyTuple_SET_ITEM(x, oparg, w); - } - PUSH(x); - DISPATCH(); - } - break; - - TARGET(BUILD_LIST) - x = PyList_New(oparg); - if (x != NULL) { - for (; --oparg >= 0;) { - w = POP(); - PyList_SET_ITEM(x, oparg, w); - } - PUSH(x); - DISPATCH(); - } - break; - - TARGET(BUILD_SET) - x = PySet_New(NULL); - if (x != NULL) { - for (; --oparg >= 0;) { - w = POP(); - if (err == 0) - err = PySet_Add(x, w); - Py_DECREF(w); - } - if (err != 0) { - Py_DECREF(x); - break; - } - PUSH(x); - DISPATCH(); - } - break; - - TARGET(BUILD_MAP) - x = _PyDict_NewPresized((Py_ssize_t)oparg); - PUSH(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(STORE_MAP) - w = TOP(); /* key */ - u = SECOND(); /* value */ - v = THIRD(); /* dict */ - STACKADJ(-2); - assert (PyDict_CheckExact(v)); - err = PyDict_SetItem(v, w, u); /* v[w] = u */ - Py_DECREF(u); - Py_DECREF(w); - if (err == 0) DISPATCH(); - break; - - TARGET(MAP_ADD) - w = TOP(); /* key */ - u = SECOND(); /* value */ - STACKADJ(-2); - v = stack_pointer[-oparg]; /* dict */ - assert (PyDict_CheckExact(v)); - err = PyDict_SetItem(v, w, u); /* v[w] = u */ - Py_DECREF(u); - Py_DECREF(w); - if (err == 0) { - PREDICT(JUMP_ABSOLUTE); - DISPATCH(); - } - break; - - TARGET(LOAD_ATTR) - w = GETITEM(names, oparg); - v = TOP(); - x = PyObject_GetAttr(v, w); - Py_DECREF(v); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(COMPARE_OP) - w = POP(); - v = TOP(); - x = cmp_outcome(oparg, v, w); - Py_DECREF(v); - Py_DECREF(w); - SET_TOP(x); - if (x == NULL) break; - PREDICT(POP_JUMP_IF_FALSE); - PREDICT(POP_JUMP_IF_TRUE); - DISPATCH(); - - TARGET(IMPORT_NAME) - w = GETITEM(names, oparg); - x = PyDict_GetItemString(f->f_builtins, "__import__"); - if (x == NULL) { - PyErr_SetString(PyExc_ImportError, - "__import__ not found"); - break; - } - Py_INCREF(x); - v = POP(); - u = TOP(); - if (PyLong_AsLong(u) != -1 || PyErr_Occurred()) - w = PyTuple_Pack(5, - w, - f->f_globals, - f->f_locals == NULL ? - Py_None : f->f_locals, - v, - u); - else - w = PyTuple_Pack(4, - w, - f->f_globals, - f->f_locals == NULL ? - Py_None : f->f_locals, - v); - Py_DECREF(v); - Py_DECREF(u); - if (w == NULL) { - u = POP(); - Py_DECREF(x); - x = NULL; - break; - } - READ_TIMESTAMP(intr0); - v = x; - x = PyEval_CallObject(v, w); - Py_DECREF(v); - READ_TIMESTAMP(intr1); - Py_DECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(IMPORT_STAR) - v = POP(); - PyFrame_FastToLocals(f); - if ((x = f->f_locals) == NULL) { - PyErr_SetString(PyExc_SystemError, - "no locals found during 'import *'"); - break; - } - READ_TIMESTAMP(intr0); - err = import_all_from(x, v); - READ_TIMESTAMP(intr1); - PyFrame_LocalsToFast(f, 0); - Py_DECREF(v); - if (err == 0) DISPATCH(); - break; - - TARGET(IMPORT_FROM) - w = GETITEM(names, oparg); - v = TOP(); - READ_TIMESTAMP(intr0); - x = import_from(v, w); - READ_TIMESTAMP(intr1); - PUSH(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(JUMP_FORWARD) - JUMPBY(oparg); - FAST_DISPATCH(); - - PREDICTED_WITH_ARG(POP_JUMP_IF_FALSE); - TARGET(POP_JUMP_IF_FALSE) - w = POP(); - if (w == Py_True) { - Py_DECREF(w); - FAST_DISPATCH(); - } - if (w == Py_False) { - Py_DECREF(w); - JUMPTO(oparg); - FAST_DISPATCH(); - } - err = PyObject_IsTrue(w); - Py_DECREF(w); - if (err > 0) - err = 0; - else if (err == 0) - JUMPTO(oparg); - else - break; - DISPATCH(); - - PREDICTED_WITH_ARG(POP_JUMP_IF_TRUE); - TARGET(POP_JUMP_IF_TRUE) - w = POP(); - if (w == Py_False) { - Py_DECREF(w); - FAST_DISPATCH(); - } - if (w == Py_True) { - Py_DECREF(w); - JUMPTO(oparg); - FAST_DISPATCH(); - } - err = PyObject_IsTrue(w); - Py_DECREF(w); - if (err > 0) { - err = 0; - JUMPTO(oparg); - } - else if (err == 0) - ; - else - break; - DISPATCH(); - - TARGET(JUMP_IF_FALSE_OR_POP) - w = TOP(); - if (w == Py_True) { - STACKADJ(-1); - Py_DECREF(w); - FAST_DISPATCH(); - } - if (w == Py_False) { - JUMPTO(oparg); - FAST_DISPATCH(); - } - err = PyObject_IsTrue(w); - if (err > 0) { - STACKADJ(-1); - Py_DECREF(w); - err = 0; - } - else if (err == 0) - JUMPTO(oparg); - else - break; - DISPATCH(); - - TARGET(JUMP_IF_TRUE_OR_POP) - w = TOP(); - if (w == Py_False) { - STACKADJ(-1); - Py_DECREF(w); - FAST_DISPATCH(); - } - if (w == Py_True) { - JUMPTO(oparg); - FAST_DISPATCH(); - } - err = PyObject_IsTrue(w); - if (err > 0) { - err = 0; - JUMPTO(oparg); - } - else if (err == 0) { - STACKADJ(-1); - Py_DECREF(w); - } - else - break; - DISPATCH(); - - PREDICTED_WITH_ARG(JUMP_ABSOLUTE); - TARGET(JUMP_ABSOLUTE) - JUMPTO(oparg); -#if FAST_LOOPS - /* Enabling this path speeds-up all while and for-loops by bypassing - the per-loop checks for signals. By default, this should be turned-off - because it prevents detection of a control-break in tight loops like - "while 1: pass". Compile with this option turned-on when you need - the speed-up and do not need break checking inside tight loops (ones - that contain only instructions ending with FAST_DISPATCH). - */ - FAST_DISPATCH(); -#else - DISPATCH(); -#endif - - TARGET(GET_ITER) - /* before: [obj]; after [getiter(obj)] */ - v = TOP(); - x = PyObject_GetIter(v); - Py_DECREF(v); - if (x != NULL) { - SET_TOP(x); - PREDICT(FOR_ITER); - DISPATCH(); - } - STACKADJ(-1); - break; - - PREDICTED_WITH_ARG(FOR_ITER); - TARGET(FOR_ITER) - /* before: [iter]; after: [iter, iter()] *or* [] */ - v = TOP(); - x = (*v->ob_type->tp_iternext)(v); - if (x != NULL) { - PUSH(x); - PREDICT(STORE_FAST); - PREDICT(UNPACK_SEQUENCE); - DISPATCH(); - } - if (PyErr_Occurred()) { - if (!PyErr_ExceptionMatches( - PyExc_StopIteration)) - break; - PyErr_Clear(); - } - /* iterator ended normally */ - x = v = POP(); - Py_DECREF(v); - JUMPBY(oparg); - DISPATCH(); - - TARGET(BREAK_LOOP) - why = WHY_BREAK; - goto fast_block_end; - - TARGET(CONTINUE_LOOP) - retval = PyLong_FromLong(oparg); - if (!retval) { - x = NULL; - break; - } - why = WHY_CONTINUE; - goto fast_block_end; - - TARGET_WITH_IMPL(SETUP_LOOP, _setup_finally) - TARGET_WITH_IMPL(SETUP_EXCEPT, _setup_finally) - TARGET(SETUP_FINALLY) - _setup_finally: - /* NOTE: If you add any new block-setup opcodes that - are not try/except/finally handlers, you may need - to update the PyGen_NeedsFinalizing() function. - */ - - PyFrame_BlockSetup(f, opcode, INSTR_OFFSET() + oparg, - STACK_LEVEL()); - DISPATCH(); - - TARGET(SETUP_WITH) - { - static PyObject *exit, *enter; - w = TOP(); - x = special_lookup(w, "__exit__", &exit); - if (!x) - break; - SET_TOP(x); - u = special_lookup(w, "__enter__", &enter); - Py_DECREF(w); - if (!u) { - x = NULL; - break; - } - x = PyObject_CallFunctionObjArgs(u, NULL); - Py_DECREF(u); - if (!x) - break; - /* Setup the finally block before pushing the result - of __enter__ on the stack. */ - PyFrame_BlockSetup(f, SETUP_FINALLY, INSTR_OFFSET() + oparg, - STACK_LEVEL()); - - PUSH(x); - DISPATCH(); - } - - TARGET(WITH_CLEANUP) - { - /* At the top of the stack are 1-3 values indicating - how/why we entered the finally clause: - - TOP = None - - (TOP, SECOND) = (WHY_{RETURN,CONTINUE}), retval - - TOP = WHY_*; no retval below it - - (TOP, SECOND, THIRD) = exc_info() - (FOURTH, FITH, SIXTH) = previous exception for EXCEPT_HANDLER - Below them is EXIT, the context.__exit__ bound method. - In the last case, we must call - EXIT(TOP, SECOND, THIRD) - otherwise we must call - EXIT(None, None, None) - - In the first two cases, we remove EXIT from the - stack, leaving the rest in the same order. In the - third case, we shift the bottom 3 values of the - stack down, and replace the empty spot with NULL. - - In addition, if the stack represents an exception, - *and* the function call returns a 'true' value, we - push WHY_SILENCED onto the stack. END_FINALLY will - then not re-raise the exception. (But non-local - gotos should still be resumed.) - */ - - PyObject *exit_func; - u = TOP(); - if (u == Py_None) { - (void)POP(); - exit_func = TOP(); - SET_TOP(u); - v = w = Py_None; - } - else if (PyLong_Check(u)) { - (void)POP(); - switch(PyLong_AsLong(u)) { - case WHY_RETURN: - case WHY_CONTINUE: - /* Retval in TOP. */ - exit_func = SECOND(); - SET_SECOND(TOP()); - SET_TOP(u); - break; - default: - exit_func = TOP(); - SET_TOP(u); - break; - } - u = v = w = Py_None; - } - else { - PyObject *tp, *exc, *tb; - PyTryBlock *block; - v = SECOND(); - w = THIRD(); - tp = FOURTH(); - exc = PEEK(5); - tb = PEEK(6); - exit_func = PEEK(7); - SET_VALUE(7, tb); - SET_VALUE(6, exc); - SET_VALUE(5, tp); - /* UNWIND_EXCEPT_HANDLER will pop this off. */ - SET_FOURTH(NULL); - /* We just shifted the stack down, so we have - to tell the except handler block that the - values are lower than it expects. */ - block = &f->f_blockstack[f->f_iblock - 1]; - assert(block->b_type == EXCEPT_HANDLER); - block->b_level--; - } - /* XXX Not the fastest way to call it... */ - x = PyObject_CallFunctionObjArgs(exit_func, u, v, w, - NULL); - Py_DECREF(exit_func); - if (x == NULL) - break; /* Go to error exit */ - - if (u != Py_None) - err = PyObject_IsTrue(x); - else - err = 0; - Py_DECREF(x); - - if (err < 0) - break; /* Go to error exit */ - else if (err > 0) { - err = 0; - /* There was an exception and a True return */ - PUSH(PyLong_FromLong((long) WHY_SILENCED)); - } - PREDICT(END_FINALLY); - break; - } - - TARGET(CALL_FUNCTION) - { - PyObject **sp; - PCALL(PCALL_ALL); - sp = stack_pointer; - x = call_function(&sp, oparg); - stack_pointer = sp; - PUSH(x); - if (x != NULL) - DISPATCH(); - break; - } - - TARGET_WITH_IMPL(CALL_FUNCTION_VAR, _call_function_var_kw) - TARGET_WITH_IMPL(CALL_FUNCTION_KW, _call_function_var_kw) - TARGET(CALL_FUNCTION_VAR_KW) - _call_function_var_kw: - { - int na = oparg & 0xff; - int nk = (oparg>>8) & 0xff; - int flags = (opcode - CALL_FUNCTION) & 3; - int n = na + 2 * nk; - PyObject **pfunc, *func, **sp; - PCALL(PCALL_ALL); - if (flags & CALL_FLAG_VAR) - n++; - if (flags & CALL_FLAG_KW) - n++; - pfunc = stack_pointer - n - 1; - func = *pfunc; - - if (PyMethod_Check(func) - && PyMethod_GET_SELF(func) != NULL) { - PyObject *self = PyMethod_GET_SELF(func); - Py_INCREF(self); - func = PyMethod_GET_FUNCTION(func); - Py_INCREF(func); - Py_DECREF(*pfunc); - *pfunc = self; - na++; - /* n++; */ - } else - Py_INCREF(func); - sp = stack_pointer; - READ_TIMESTAMP(intr0); - x = ext_do_call(func, &sp, flags, na, nk); - READ_TIMESTAMP(intr1); - stack_pointer = sp; - Py_DECREF(func); - - while (stack_pointer > pfunc) { - w = POP(); - Py_DECREF(w); - } - PUSH(x); - if (x != NULL) - DISPATCH(); - break; - } - - TARGET_WITH_IMPL(MAKE_CLOSURE, _make_function) - TARGET(MAKE_FUNCTION) - _make_function: - { - int posdefaults = oparg & 0xff; - int kwdefaults = (oparg>>8) & 0xff; - int num_annotations = (oparg >> 16) & 0x7fff; - - w = POP(); /* qualname */ - v = POP(); /* code object */ - x = PyFunction_NewWithQualName(v, f->f_globals, w); - Py_DECREF(v); - Py_DECREF(w); - - if (x != NULL && opcode == MAKE_CLOSURE) { - v = POP(); - if (PyFunction_SetClosure(x, v) != 0) { - /* Can't happen unless bytecode is corrupt. */ - why = WHY_EXCEPTION; - } - Py_DECREF(v); - } - - if (x != NULL && num_annotations > 0) { - Py_ssize_t name_ix; - u = POP(); /* names of args with annotations */ - v = PyDict_New(); - if (v == NULL) { - Py_DECREF(x); - x = NULL; - break; - } - name_ix = PyTuple_Size(u); - assert(num_annotations == name_ix+1); - while (name_ix > 0) { - --name_ix; - t = PyTuple_GET_ITEM(u, name_ix); - w = POP(); - /* XXX(nnorwitz): check for errors */ - PyDict_SetItem(v, t, w); - Py_DECREF(w); - } - - if (PyFunction_SetAnnotations(x, v) != 0) { - /* Can't happen unless - PyFunction_SetAnnotations changes. */ - why = WHY_EXCEPTION; - } - Py_DECREF(v); - Py_DECREF(u); - } - - /* XXX Maybe this should be a separate opcode? */ - if (x != NULL && posdefaults > 0) { - v = PyTuple_New(posdefaults); - if (v == NULL) { - Py_DECREF(x); - x = NULL; - break; - } - while (--posdefaults >= 0) { - w = POP(); - PyTuple_SET_ITEM(v, posdefaults, w); - } - if (PyFunction_SetDefaults(x, v) != 0) { - /* Can't happen unless - PyFunction_SetDefaults changes. */ - why = WHY_EXCEPTION; - } - Py_DECREF(v); - } - if (x != NULL && kwdefaults > 0) { - v = PyDict_New(); - if (v == NULL) { - Py_DECREF(x); - x = NULL; - break; - } - while (--kwdefaults >= 0) { - w = POP(); /* default value */ - u = POP(); /* kw only arg name */ - /* XXX(nnorwitz): check for errors */ - PyDict_SetItem(v, u, w); - Py_DECREF(w); - Py_DECREF(u); - } - if (PyFunction_SetKwDefaults(x, v) != 0) { - /* Can't happen unless - PyFunction_SetKwDefaults changes. */ - why = WHY_EXCEPTION; - } - Py_DECREF(v); - } - PUSH(x); - break; - } - - TARGET(BUILD_SLICE) - if (oparg == 3) - w = POP(); - else - w = NULL; - v = POP(); - u = TOP(); - x = PySlice_New(u, v, w); - Py_DECREF(u); - Py_DECREF(v); - Py_XDECREF(w); - SET_TOP(x); - if (x != NULL) DISPATCH(); - break; - - TARGET(EXTENDED_ARG) - opcode = NEXTOP(); - oparg = oparg<<16 | NEXTARG(); - goto dispatch_opcode; - -#if USE_COMPUTED_GOTOS - _unknown_opcode: -#endif - default: - fprintf(stderr, - "XXX lineno: %d, opcode: %d\n", - PyFrame_GetLineNumber(f), - opcode); - PyErr_SetString(PyExc_SystemError, "unknown opcode"); - why = WHY_EXCEPTION; - break; - - - } /* switch */ - - on_error: - - READ_TIMESTAMP(inst1); - - /* Quickly continue if no error occurred */ - - if (why == WHY_NOT) { - if (err == 0 && x != NULL) { -#ifdef CHECKEXC - /* This check is expensive! */ - if (PyErr_Occurred()) - fprintf(stderr, - "XXX undetected error\n"); - else { -#endif - READ_TIMESTAMP(loop1); - continue; /* Normal, fast path */ -#ifdef CHECKEXC - } -#endif - } - why = WHY_EXCEPTION; - x = Py_None; - err = 0; - } - - /* Double-check exception status */ - - if (why == WHY_EXCEPTION || why == WHY_RERAISE) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_SystemError, - "error return without exception set"); - why = WHY_EXCEPTION; - } - } -#ifdef CHECKEXC - else { - /* This check is expensive! */ - if (PyErr_Occurred()) { - char buf[128]; - sprintf(buf, "Stack unwind with exception " - "set and why=%d", why); - Py_FatalError(buf); - } - } -#endif - - /* Log traceback info if this is a real exception */ - - if (why == WHY_EXCEPTION) { - PyTraceBack_Here(f); - - if (tstate->c_tracefunc != NULL) - call_exc_trace(tstate->c_tracefunc, - tstate->c_traceobj, f); - } - - /* For the rest, treat WHY_RERAISE as WHY_EXCEPTION */ - - if (why == WHY_RERAISE) - why = WHY_EXCEPTION; - - /* Unwind stacks if a (pseudo) exception occurred */ - -fast_block_end: - while (why != WHY_NOT && f->f_iblock > 0) { - /* Peek at the current block. */ - PyTryBlock *b = &f->f_blockstack[f->f_iblock - 1]; - - assert(why != WHY_YIELD); - if (b->b_type == SETUP_LOOP && why == WHY_CONTINUE) { - why = WHY_NOT; - JUMPTO(PyLong_AS_LONG(retval)); - Py_DECREF(retval); - break; - } - /* Now we have to pop the block. */ - f->f_iblock--; - - if (b->b_type == EXCEPT_HANDLER) { - UNWIND_EXCEPT_HANDLER(b); - continue; - } - UNWIND_BLOCK(b); - if (b->b_type == SETUP_LOOP && why == WHY_BREAK) { - why = WHY_NOT; - JUMPTO(b->b_handler); - break; - } - if (why == WHY_EXCEPTION && (b->b_type == SETUP_EXCEPT - || b->b_type == SETUP_FINALLY)) { - PyObject *exc, *val, *tb; - int handler = b->b_handler; - /* Beware, this invalidates all b->b_* fields */ - PyFrame_BlockSetup(f, EXCEPT_HANDLER, -1, STACK_LEVEL()); - PUSH(tstate->exc_traceback); - PUSH(tstate->exc_value); - if (tstate->exc_type != NULL) { - PUSH(tstate->exc_type); - } - else { - Py_INCREF(Py_None); - PUSH(Py_None); - } - PyErr_Fetch(&exc, &val, &tb); - /* Make the raw exception data - available to the handler, - so a program can emulate the - Python main loop. */ - PyErr_NormalizeException( - &exc, &val, &tb); - PyException_SetTraceback(val, tb); - Py_INCREF(exc); - tstate->exc_type = exc; - Py_INCREF(val); - tstate->exc_value = val; - tstate->exc_traceback = tb; - if (tb == NULL) - tb = Py_None; - Py_INCREF(tb); - PUSH(tb); - PUSH(val); - PUSH(exc); - why = WHY_NOT; - JUMPTO(handler); - break; - } - if (b->b_type == SETUP_FINALLY) { - if (why & (WHY_RETURN | WHY_CONTINUE)) - PUSH(retval); - PUSH(PyLong_FromLong((long)why)); - why = WHY_NOT; - JUMPTO(b->b_handler); - break; - } - } /* unwind stack */ - - /* End the loop if we still have an error (or return) */ - - if (why != WHY_NOT) - break; - READ_TIMESTAMP(loop1); - - } /* main loop */ - - assert(why != WHY_YIELD); - /* Pop remaining stack entries. */ - while (!EMPTY()) { - v = POP(); - Py_XDECREF(v); - } - - if (why != WHY_RETURN) - retval = NULL; - -fast_yield: - if (co->co_flags & CO_GENERATOR && (why == WHY_YIELD || why == WHY_RETURN)) { - /* The purpose of this block is to put aside the generator's exception - state and restore that of the calling frame. If the current - exception state is from the caller, we clear the exception values - on the generator frame, so they are not swapped back in latter. The - origin of the current exception state is determined by checking for - except handler blocks, which we must be in iff a new exception - state came into existence in this frame. (An uncaught exception - would have why == WHY_EXCEPTION, and we wouldn't be here). */ - int i; - for (i = 0; i < f->f_iblock; i++) - if (f->f_blockstack[i].b_type == EXCEPT_HANDLER) - break; - if (i == f->f_iblock) - /* We did not create this exception. */ - restore_and_clear_exc_state(tstate, f); - else - swap_exc_state(tstate, f); - } - - if (tstate->use_tracing) { - if (tstate->c_tracefunc) { - if (why == WHY_RETURN || why == WHY_YIELD) { - if (call_trace(tstate->c_tracefunc, - tstate->c_traceobj, f, - PyTrace_RETURN, retval)) { - Py_XDECREF(retval); - retval = NULL; - why = WHY_EXCEPTION; - } - } - else if (why == WHY_EXCEPTION) { - call_trace_protected(tstate->c_tracefunc, - tstate->c_traceobj, f, - PyTrace_RETURN, NULL); - } - } - if (tstate->c_profilefunc) { - if (why == WHY_EXCEPTION) - call_trace_protected(tstate->c_profilefunc, - tstate->c_profileobj, f, - PyTrace_RETURN, NULL); - else if (call_trace(tstate->c_profilefunc, - tstate->c_profileobj, f, - PyTrace_RETURN, retval)) { - Py_XDECREF(retval); - retval = NULL; - /* why = WHY_EXCEPTION; */ - } - } - } - - /* pop frame */ -exit_eval_frame: - Py_LeaveRecursiveCall(); - tstate->frame = f->f_back; - - return retval; -} - -static void -format_missing(const char *kind, PyCodeObject *co, PyObject *names) -{ - int err; - Py_ssize_t len = PyList_GET_SIZE(names); - PyObject *name_str, *comma, *tail, *tmp; - - assert(PyList_CheckExact(names)); - assert(len >= 1); - /* Deal with the joys of natural language. */ - switch (len) { - case 1: - name_str = PyList_GET_ITEM(names, 0); - Py_INCREF(name_str); - break; - case 2: - name_str = PyUnicode_FromFormat("%U and %U", - PyList_GET_ITEM(names, len - 2), - PyList_GET_ITEM(names, len - 1)); - break; - default: - tail = PyUnicode_FromFormat(", %U, and %U", - PyList_GET_ITEM(names, len - 2), - PyList_GET_ITEM(names, len - 1)); - /* Chop off the last two objects in the list. This shouldn't actually - fail, but we can't be too careful. */ - err = PyList_SetSlice(names, len - 2, len, NULL); - if (err == -1) { - Py_DECREF(tail); - return; - } - /* Stitch everything up into a nice comma-separated list. */ - comma = PyUnicode_FromString(", "); - if (comma == NULL) { - Py_DECREF(tail); - return; - } - tmp = PyUnicode_Join(comma, names); - Py_DECREF(comma); - if (tmp == NULL) { - Py_DECREF(tail); - return; - } - name_str = PyUnicode_Concat(tmp, tail); - Py_DECREF(tmp); - Py_DECREF(tail); - break; - } - if (name_str == NULL) - return; - PyErr_Format(PyExc_TypeError, - "%U() missing %i required %s argument%s: %U", - co->co_name, - len, - kind, - len == 1 ? "" : "s", - name_str); - Py_DECREF(name_str); -} - -static void -missing_arguments(PyCodeObject *co, int missing, int defcount, - PyObject **fastlocals) -{ - int i, j = 0; - int start, end; - int positional = defcount != -1; - const char *kind = positional ? "positional" : "keyword-only"; - PyObject *missing_names; - - /* Compute the names of the arguments that are missing. */ - missing_names = PyList_New(missing); - if (missing_names == NULL) - return; - if (positional) { - start = 0; - end = co->co_argcount - defcount; - } - else { - start = co->co_argcount; - end = start + co->co_kwonlyargcount; - } - for (i = start; i < end; i++) { - if (GETLOCAL(i) == NULL) { - PyObject *raw = PyTuple_GET_ITEM(co->co_varnames, i); - PyObject *name = PyObject_Repr(raw); - if (name == NULL) { - Py_DECREF(missing_names); - return; - } - PyList_SET_ITEM(missing_names, j++, name); - } - } - assert(j == missing); - format_missing(kind, co, missing_names); - Py_DECREF(missing_names); -} - -static void -too_many_positional(PyCodeObject *co, int given, int defcount, PyObject **fastlocals) -{ - int plural; - int kwonly_given = 0; - int i; - PyObject *sig, *kwonly_sig; - - assert((co->co_flags & CO_VARARGS) == 0); - /* Count missing keyword-only args. */ - for (i = co->co_argcount; i < co->co_argcount + co->co_kwonlyargcount; i++) - if (GETLOCAL(i) != NULL) - kwonly_given++; - if (defcount) { - int atleast = co->co_argcount - defcount; - plural = 1; - sig = PyUnicode_FromFormat("from %d to %d", atleast, co->co_argcount); - } - else { - plural = co->co_argcount != 1; - sig = PyUnicode_FromFormat("%d", co->co_argcount); - } - if (sig == NULL) - return; - if (kwonly_given) { - const char *format = " positional argument%s (and %d keyword-only argument%s)"; - kwonly_sig = PyUnicode_FromFormat(format, given != 1 ? "s" : "", kwonly_given, - kwonly_given != 1 ? "s" : ""); - if (kwonly_sig == NULL) { - Py_DECREF(sig); - return; - } - } - else { - /* This will not fail. */ - kwonly_sig = PyUnicode_FromString(""); - assert(kwonly_sig != NULL); - } - PyErr_Format(PyExc_TypeError, - "%U() takes %U positional argument%s but %d%U %s given", - co->co_name, - sig, - plural ? "s" : "", - given, - kwonly_sig, - given == 1 && !kwonly_given ? "was" : "were"); - Py_DECREF(sig); - Py_DECREF(kwonly_sig); -} - -/* This is gonna seem *real weird*, but if you put some other code between - PyEval_EvalFrame() and PyEval_EvalCodeEx() you will need to adjust - the test in the if statements in Misc/gdbinit (pystack and pystackv). */ - -PyObject * -PyEval_EvalCodeEx(PyObject *_co, PyObject *globals, PyObject *locals, - PyObject **args, int argcount, PyObject **kws, int kwcount, - PyObject **defs, int defcount, PyObject *kwdefs, PyObject *closure) -{ - PyCodeObject* co = (PyCodeObject*)_co; - register PyFrameObject *f; - register PyObject *retval = NULL; - register PyObject **fastlocals, **freevars; - PyThreadState *tstate = PyThreadState_GET(); - PyObject *x, *u; - int total_args = co->co_argcount + co->co_kwonlyargcount; - int i; - int n = argcount; - PyObject *kwdict = NULL; - - if (globals == NULL) { - PyErr_SetString(PyExc_SystemError, - "PyEval_EvalCodeEx: NULL globals"); - return NULL; - } - - assert(tstate != NULL); - assert(globals != NULL); - f = PyFrame_New(tstate, co, globals, locals); - if (f == NULL) - return NULL; - - fastlocals = f->f_localsplus; - freevars = f->f_localsplus + co->co_nlocals; - - /* Parse arguments. */ - if (co->co_flags & CO_VARKEYWORDS) { - kwdict = PyDict_New(); - if (kwdict == NULL) - goto fail; - i = total_args; - if (co->co_flags & CO_VARARGS) - i++; - SETLOCAL(i, kwdict); - } - if (argcount > co->co_argcount) - n = co->co_argcount; - for (i = 0; i < n; i++) { - x = args[i]; - Py_INCREF(x); - SETLOCAL(i, x); - } - if (co->co_flags & CO_VARARGS) { - u = PyTuple_New(argcount - n); - if (u == NULL) - goto fail; - SETLOCAL(total_args, u); - for (i = n; i < argcount; i++) { - x = args[i]; - Py_INCREF(x); - PyTuple_SET_ITEM(u, i-n, x); - } - } - for (i = 0; i < kwcount; i++) { - PyObject **co_varnames; - PyObject *keyword = kws[2*i]; - PyObject *value = kws[2*i + 1]; - int j; - if (keyword == NULL || !PyUnicode_Check(keyword)) { - PyErr_Format(PyExc_TypeError, - "%U() keywords must be strings", - co->co_name); - goto fail; - } - /* Speed hack: do raw pointer compares. As names are - normally interned this should almost always hit. */ - co_varnames = ((PyTupleObject *)(co->co_varnames))->ob_item; - for (j = 0; j < total_args; j++) { - PyObject *nm = co_varnames[j]; - if (nm == keyword) - goto kw_found; - } - /* Slow fallback, just in case */ - for (j = 0; j < total_args; j++) { - PyObject *nm = co_varnames[j]; - int cmp = PyObject_RichCompareBool( - keyword, nm, Py_EQ); - if (cmp > 0) - goto kw_found; - else if (cmp < 0) - goto fail; - } - if (j >= total_args && kwdict == NULL) { - PyErr_Format(PyExc_TypeError, - "%U() got an unexpected " - "keyword argument '%S'", - co->co_name, - keyword); - goto fail; - } - PyDict_SetItem(kwdict, keyword, value); - continue; - kw_found: - if (GETLOCAL(j) != NULL) { - PyErr_Format(PyExc_TypeError, - "%U() got multiple " - "values for argument '%S'", - co->co_name, - keyword); - goto fail; - } - Py_INCREF(value); - SETLOCAL(j, value); - } - if (argcount > co->co_argcount && !(co->co_flags & CO_VARARGS)) { - too_many_positional(co, argcount, defcount, fastlocals); - goto fail; - } - if (argcount < co->co_argcount) { - int m = co->co_argcount - defcount; - int missing = 0; - for (i = argcount; i < m; i++) - if (GETLOCAL(i) == NULL) - missing++; - if (missing) { - missing_arguments(co, missing, defcount, fastlocals); - goto fail; - } - if (n > m) - i = n - m; - else - i = 0; - for (; i < defcount; i++) { - if (GETLOCAL(m+i) == NULL) { - PyObject *def = defs[i]; - Py_INCREF(def); - SETLOCAL(m+i, def); - } - } - } - if (co->co_kwonlyargcount > 0) { - int missing = 0; - for (i = co->co_argcount; i < total_args; i++) { - PyObject *name; - if (GETLOCAL(i) != NULL) - continue; - name = PyTuple_GET_ITEM(co->co_varnames, i); - if (kwdefs != NULL) { - PyObject *def = PyDict_GetItem(kwdefs, name); - if (def) { - Py_INCREF(def); - SETLOCAL(i, def); - continue; - } - } - missing++; - } - if (missing) { - missing_arguments(co, missing, -1, fastlocals); - goto fail; - } - } - - /* Allocate and initialize storage for cell vars, and copy free - vars into frame. */ - for (i = 0; i < PyTuple_GET_SIZE(co->co_cellvars); ++i) { - PyObject *c; - int arg; - /* Possibly account for the cell variable being an argument. */ - if (co->co_cell2arg != NULL && - (arg = co->co_cell2arg[i]) != CO_CELL_NOT_AN_ARG) - c = PyCell_New(GETLOCAL(arg)); - else - c = PyCell_New(NULL); - if (c == NULL) - goto fail; - SETLOCAL(co->co_nlocals + i, c); - } - for (i = 0; i < PyTuple_GET_SIZE(co->co_freevars); ++i) { - PyObject *o = PyTuple_GET_ITEM(closure, i); - Py_INCREF(o); - freevars[PyTuple_GET_SIZE(co->co_cellvars) + i] = o; - } - - if (co->co_flags & CO_GENERATOR) { - /* Don't need to keep the reference to f_back, it will be set - * when the generator is resumed. */ - Py_XDECREF(f->f_back); - f->f_back = NULL; - - PCALL(PCALL_GENERATOR); - - /* Create a new generator that owns the ready to run frame - * and return that as the value. */ - return PyGen_New(f); - } - - retval = PyEval_EvalFrameEx(f,0); - -fail: /* Jump here from prelude on failure */ - - /* decref'ing the frame can cause __del__ methods to get invoked, - which can call back into Python. While we're done with the - current Python frame (f), the associated C stack is still in use, - so recursion_depth must be boosted for the duration. - */ - assert(tstate != NULL); - ++tstate->recursion_depth; - Py_DECREF(f); - --tstate->recursion_depth; - return retval; -} - - -static PyObject * -special_lookup(PyObject *o, char *meth, PyObject **cache) -{ - PyObject *res; - res = _PyObject_LookupSpecial(o, meth, cache); - if (res == NULL && !PyErr_Occurred()) { - PyErr_SetObject(PyExc_AttributeError, *cache); - return NULL; - } - return res; -} - - -/* These 3 functions deal with the exception state of generators. */ - -static void -save_exc_state(PyThreadState *tstate, PyFrameObject *f) -{ - PyObject *type, *value, *traceback; - Py_XINCREF(tstate->exc_type); - Py_XINCREF(tstate->exc_value); - Py_XINCREF(tstate->exc_traceback); - type = f->f_exc_type; - value = f->f_exc_value; - traceback = f->f_exc_traceback; - f->f_exc_type = tstate->exc_type; - f->f_exc_value = tstate->exc_value; - f->f_exc_traceback = tstate->exc_traceback; - Py_XDECREF(type); - Py_XDECREF(value); - Py_XDECREF(traceback); -} - -static void -swap_exc_state(PyThreadState *tstate, PyFrameObject *f) -{ - PyObject *tmp; - tmp = tstate->exc_type; - tstate->exc_type = f->f_exc_type; - f->f_exc_type = tmp; - tmp = tstate->exc_value; - tstate->exc_value = f->f_exc_value; - f->f_exc_value = tmp; - tmp = tstate->exc_traceback; - tstate->exc_traceback = f->f_exc_traceback; - f->f_exc_traceback = tmp; -} - -static void -restore_and_clear_exc_state(PyThreadState *tstate, PyFrameObject *f) -{ - PyObject *type, *value, *tb; - type = tstate->exc_type; - value = tstate->exc_value; - tb = tstate->exc_traceback; - tstate->exc_type = f->f_exc_type; - tstate->exc_value = f->f_exc_value; - tstate->exc_traceback = f->f_exc_traceback; - f->f_exc_type = NULL; - f->f_exc_value = NULL; - f->f_exc_traceback = NULL; - Py_XDECREF(type); - Py_XDECREF(value); - Py_XDECREF(tb); -} - - -/* Logic for the raise statement (too complicated for inlining). - This *consumes* a reference count to each of its arguments. */ -static enum why_code -do_raise(PyObject *exc, PyObject *cause) -{ - PyObject *type = NULL, *value = NULL; - - if (exc == NULL) { - /* Reraise */ - PyThreadState *tstate = PyThreadState_GET(); - PyObject *tb; - type = tstate->exc_type; - value = tstate->exc_value; - tb = tstate->exc_traceback; - if (type == Py_None) { - PyErr_SetString(PyExc_RuntimeError, - "No active exception to reraise"); - return WHY_EXCEPTION; - } - Py_XINCREF(type); - Py_XINCREF(value); - Py_XINCREF(tb); - PyErr_Restore(type, value, tb); - return WHY_RERAISE; - } - - /* We support the following forms of raise: - raise - raise <instance> - raise <type> */ - - if (PyExceptionClass_Check(exc)) { - type = exc; - value = PyObject_CallObject(exc, NULL); - if (value == NULL) - goto raise_error; - if (!PyExceptionInstance_Check(value)) { - PyErr_Format(PyExc_TypeError, - "calling %R should have returned an instance of " - "BaseException, not %R", - type, Py_TYPE(value)); - goto raise_error; - } - } - else if (PyExceptionInstance_Check(exc)) { - value = exc; - type = PyExceptionInstance_Class(exc); - Py_INCREF(type); - } - else { - /* Not something you can raise. You get an exception - anyway, just not what you specified :-) */ - Py_DECREF(exc); - PyErr_SetString(PyExc_TypeError, - "exceptions must derive from BaseException"); - goto raise_error; - } - - if (cause) { - PyObject *fixed_cause; - if (PyExceptionClass_Check(cause)) { - fixed_cause = PyObject_CallObject(cause, NULL); - if (fixed_cause == NULL) - goto raise_error; - Py_DECREF(cause); - } - else if (PyExceptionInstance_Check(cause)) { - fixed_cause = cause; - } - else { - PyErr_SetString(PyExc_TypeError, - "exception causes must derive from " - "BaseException"); - goto raise_error; - } - PyException_SetCause(value, fixed_cause); - } - - PyErr_SetObject(type, value); - /* PyErr_SetObject incref's its arguments */ - Py_XDECREF(value); - Py_XDECREF(type); - return WHY_EXCEPTION; - -raise_error: - Py_XDECREF(value); - Py_XDECREF(type); - Py_XDECREF(cause); - return WHY_EXCEPTION; -} - -/* Iterate v argcnt times and store the results on the stack (via decreasing - sp). Return 1 for success, 0 if error. - - If argcntafter == -1, do a simple unpack. If it is >= 0, do an unpack - with a variable target. -*/ - -static int -unpack_iterable(PyObject *v, int argcnt, int argcntafter, PyObject **sp) -{ - int i = 0, j = 0; - Py_ssize_t ll = 0; - PyObject *it; /* iter(v) */ - PyObject *w; - PyObject *l = NULL; /* variable list */ - - assert(v != NULL); - - it = PyObject_GetIter(v); - if (it == NULL) - goto Error; - - for (; i < argcnt; i++) { - w = PyIter_Next(it); - if (w == NULL) { - /* Iterator done, via error or exhaustion. */ - if (!PyErr_Occurred()) { - PyErr_Format(PyExc_ValueError, - "need more than %d value%s to unpack", - i, i == 1 ? "" : "s"); - } - goto Error; - } - *--sp = w; - } - - if (argcntafter == -1) { - /* We better have exhausted the iterator now. */ - w = PyIter_Next(it); - if (w == NULL) { - if (PyErr_Occurred()) - goto Error; - Py_DECREF(it); - return 1; - } - Py_DECREF(w); - PyErr_Format(PyExc_ValueError, "too many values to unpack " - "(expected %d)", argcnt); - goto Error; - } - - l = PySequence_List(it); - if (l == NULL) - goto Error; - *--sp = l; - i++; - - ll = PyList_GET_SIZE(l); - if (ll < argcntafter) { - PyErr_Format(PyExc_ValueError, "need more than %zd values to unpack", - argcnt + ll); - goto Error; - } - - /* Pop the "after-variable" args off the list. */ - for (j = argcntafter; j > 0; j--, i++) { - *--sp = PyList_GET_ITEM(l, ll - j); - } - /* Resize the list. */ - Py_SIZE(l) = ll - argcntafter; - Py_DECREF(it); - return 1; - -Error: - for (; i > 0; i--, sp++) - Py_DECREF(*sp); - Py_XDECREF(it); - return 0; -} - - -#ifdef LLTRACE -static int -prtrace(PyObject *v, char *str) -{ - printf("%s ", str); - if (PyObject_Print(v, stdout, 0) != 0) - PyErr_Clear(); /* Don't know what else to do */ - printf("\n"); - return 1; -} -#endif - -static void -call_exc_trace(Py_tracefunc func, PyObject *self, PyFrameObject *f) -{ - PyObject *type, *value, *traceback, *arg; - int err; - PyErr_Fetch(&type, &value, &traceback); - if (value == NULL) { - value = Py_None; - Py_INCREF(value); - } - arg = PyTuple_Pack(3, type, value, traceback); - if (arg == NULL) { - PyErr_Restore(type, value, traceback); - return; - } - err = call_trace(func, self, f, PyTrace_EXCEPTION, arg); - Py_DECREF(arg); - if (err == 0) - PyErr_Restore(type, value, traceback); - else { - Py_XDECREF(type); - Py_XDECREF(value); - Py_XDECREF(traceback); - } -} - -static int -call_trace_protected(Py_tracefunc func, PyObject *obj, PyFrameObject *frame, - int what, PyObject *arg) -{ - PyObject *type, *value, *traceback; - int err; - PyErr_Fetch(&type, &value, &traceback); - err = call_trace(func, obj, frame, what, arg); - if (err == 0) - { - PyErr_Restore(type, value, traceback); - return 0; - } - else { - Py_XDECREF(type); - Py_XDECREF(value); - Py_XDECREF(traceback); - return -1; - } -} - -static int -call_trace(Py_tracefunc func, PyObject *obj, PyFrameObject *frame, - int what, PyObject *arg) -{ - register PyThreadState *tstate = frame->f_tstate; - int result; - if (tstate->tracing) - return 0; - tstate->tracing++; - tstate->use_tracing = 0; - result = func(obj, frame, what, arg); - tstate->use_tracing = ((tstate->c_tracefunc != NULL) - || (tstate->c_profilefunc != NULL)); - tstate->tracing--; - return result; -} - -PyObject * -_PyEval_CallTracing(PyObject *func, PyObject *args) -{ - PyFrameObject *frame = PyEval_GetFrame(); - PyThreadState *tstate = frame->f_tstate; - int save_tracing = tstate->tracing; - int save_use_tracing = tstate->use_tracing; - PyObject *result; - - tstate->tracing = 0; - tstate->use_tracing = ((tstate->c_tracefunc != NULL) - || (tstate->c_profilefunc != NULL)); - result = PyObject_Call(func, args, NULL); - tstate->tracing = save_tracing; - tstate->use_tracing = save_use_tracing; - return result; -} - -/* See Objects/lnotab_notes.txt for a description of how tracing works. */ -static int -maybe_call_line_trace(Py_tracefunc func, PyObject *obj, - PyFrameObject *frame, int *instr_lb, int *instr_ub, - int *instr_prev) -{ - int result = 0; - int line = frame->f_lineno; - - /* If the last instruction executed isn't in the current - instruction window, reset the window. - */ - if (frame->f_lasti < *instr_lb || frame->f_lasti >= *instr_ub) { - PyAddrPair bounds; - line = _PyCode_CheckLineNumber(frame->f_code, frame->f_lasti, - &bounds); - *instr_lb = bounds.ap_lower; - *instr_ub = bounds.ap_upper; - } - /* If the last instruction falls at the start of a line or if - it represents a jump backwards, update the frame's line - number and call the trace function. */ - if (frame->f_lasti == *instr_lb || frame->f_lasti < *instr_prev) { - frame->f_lineno = line; - result = call_trace(func, obj, frame, PyTrace_LINE, Py_None); - } - *instr_prev = frame->f_lasti; - return result; -} - -void -PyEval_SetProfile(Py_tracefunc func, PyObject *arg) -{ - PyThreadState *tstate = PyThreadState_GET(); - PyObject *temp = tstate->c_profileobj; - Py_XINCREF(arg); - tstate->c_profilefunc = NULL; - tstate->c_profileobj = NULL; - /* Must make sure that tracing is not ignored if 'temp' is freed */ - tstate->use_tracing = tstate->c_tracefunc != NULL; - Py_XDECREF(temp); - tstate->c_profilefunc = func; - tstate->c_profileobj = arg; - /* Flag that tracing or profiling is turned on */ - tstate->use_tracing = (func != NULL) || (tstate->c_tracefunc != NULL); -} - -void -PyEval_SetTrace(Py_tracefunc func, PyObject *arg) -{ - PyThreadState *tstate = PyThreadState_GET(); - PyObject *temp = tstate->c_traceobj; - _Py_TracingPossible += (func != NULL) - (tstate->c_tracefunc != NULL); - Py_XINCREF(arg); - tstate->c_tracefunc = NULL; - tstate->c_traceobj = NULL; - /* Must make sure that profiling is not ignored if 'temp' is freed */ - tstate->use_tracing = tstate->c_profilefunc != NULL; - Py_XDECREF(temp); - tstate->c_tracefunc = func; - tstate->c_traceobj = arg; - /* Flag that tracing or profiling is turned on */ - tstate->use_tracing = ((func != NULL) - || (tstate->c_profilefunc != NULL)); -} - -PyObject * -PyEval_GetBuiltins(void) -{ - PyFrameObject *current_frame = PyEval_GetFrame(); - if (current_frame == NULL) - return PyThreadState_GET()->interp->builtins; - else - return current_frame->f_builtins; -} - -PyObject * -PyEval_GetLocals(void) -{ - PyFrameObject *current_frame = PyEval_GetFrame(); - if (current_frame == NULL) - return NULL; - PyFrame_FastToLocals(current_frame); - return current_frame->f_locals; -} - -PyObject * -PyEval_GetGlobals(void) -{ - PyFrameObject *current_frame = PyEval_GetFrame(); - if (current_frame == NULL) - return NULL; - else - return current_frame->f_globals; -} - -PyFrameObject * -PyEval_GetFrame(void) -{ - PyThreadState *tstate = PyThreadState_GET(); - return _PyThreadState_GetFrame(tstate); -} - -int -PyEval_MergeCompilerFlags(PyCompilerFlags *cf) -{ - PyFrameObject *current_frame = PyEval_GetFrame(); - int result = cf->cf_flags != 0; - - if (current_frame != NULL) { - const int codeflags = current_frame->f_code->co_flags; - const int compilerflags = codeflags & PyCF_MASK; - if (compilerflags) { - result = 1; - cf->cf_flags |= compilerflags; - } -#if 0 /* future keyword */ - if (codeflags & CO_GENERATOR_ALLOWED) { - result = 1; - cf->cf_flags |= CO_GENERATOR_ALLOWED; - } -#endif - } - return result; -} - - -/* External interface to call any callable object. - The arg must be a tuple or NULL. The kw must be a dict or NULL. */ - -PyObject * -PyEval_CallObjectWithKeywords(PyObject *func, PyObject *arg, PyObject *kw) -{ - PyObject *result; - - if (arg == NULL) { - arg = PyTuple_New(0); - if (arg == NULL) - return NULL; - } - else if (!PyTuple_Check(arg)) { - PyErr_SetString(PyExc_TypeError, - "argument list must be a tuple"); - return NULL; - } - else - Py_INCREF(arg); - - if (kw != NULL && !PyDict_Check(kw)) { - PyErr_SetString(PyExc_TypeError, - "keyword list must be a dictionary"); - Py_DECREF(arg); - return NULL; - } - - result = PyObject_Call(func, arg, kw); - Py_DECREF(arg); - return result; -} - -const char * -PyEval_GetFuncName(PyObject *func) -{ - if (PyMethod_Check(func)) - return PyEval_GetFuncName(PyMethod_GET_FUNCTION(func)); - else if (PyFunction_Check(func)) - return _PyUnicode_AsString(((PyFunctionObject*)func)->func_name); - else if (PyCFunction_Check(func)) - return ((PyCFunctionObject*)func)->m_ml->ml_name; - else - return func->ob_type->tp_name; -} - -const char * -PyEval_GetFuncDesc(PyObject *func) -{ - if (PyMethod_Check(func)) - return "()"; - else if (PyFunction_Check(func)) - return "()"; - else if (PyCFunction_Check(func)) - return "()"; - else - return " object"; -} - -static void -err_args(PyObject *func, int flags, int nargs) -{ - if (flags & METH_NOARGS) - PyErr_Format(PyExc_TypeError, - "%.200s() takes no arguments (%d given)", - ((PyCFunctionObject *)func)->m_ml->ml_name, - nargs); - else - PyErr_Format(PyExc_TypeError, - "%.200s() takes exactly one argument (%d given)", - ((PyCFunctionObject *)func)->m_ml->ml_name, - nargs); -} - -#define C_TRACE(x, call) \ -if (tstate->use_tracing && tstate->c_profilefunc) { \ - if (call_trace(tstate->c_profilefunc, \ - tstate->c_profileobj, \ - tstate->frame, PyTrace_C_CALL, \ - func)) { \ - x = NULL; \ - } \ - else { \ - x = call; \ - if (tstate->c_profilefunc != NULL) { \ - if (x == NULL) { \ - call_trace_protected(tstate->c_profilefunc, \ - tstate->c_profileobj, \ - tstate->frame, PyTrace_C_EXCEPTION, \ - func); \ - /* XXX should pass (type, value, tb) */ \ - } else { \ - if (call_trace(tstate->c_profilefunc, \ - tstate->c_profileobj, \ - tstate->frame, PyTrace_C_RETURN, \ - func)) { \ - Py_DECREF(x); \ - x = NULL; \ - } \ - } \ - } \ - } \ -} else { \ - x = call; \ - } - -static PyObject * -call_function(PyObject ***pp_stack, int oparg -#ifdef WITH_TSC - , uint64* pintr0, uint64* pintr1 -#endif - ) -{ - int na = oparg & 0xff; - int nk = (oparg>>8) & 0xff; - int n = na + 2 * nk; - PyObject **pfunc = (*pp_stack) - n - 1; - PyObject *func = *pfunc; - PyObject *x, *w; - - /* Always dispatch PyCFunction first, because these are - presumed to be the most frequent callable object. - */ - if (PyCFunction_Check(func) && nk == 0) { - int flags = PyCFunction_GET_FLAGS(func); - PyThreadState *tstate = PyThreadState_GET(); - - PCALL(PCALL_CFUNCTION); - if (flags & (METH_NOARGS | METH_O)) { - PyCFunction meth = PyCFunction_GET_FUNCTION(func); - PyObject *self = PyCFunction_GET_SELF(func); - if (flags & METH_NOARGS && na == 0) { - C_TRACE(x, (*meth)(self,NULL)); - } - else if (flags & METH_O && na == 1) { - PyObject *arg = EXT_POP(*pp_stack); - C_TRACE(x, (*meth)(self,arg)); - Py_DECREF(arg); - } - else { - err_args(func, flags, na); - x = NULL; - } - } - else { - PyObject *callargs; - callargs = load_args(pp_stack, na); - READ_TIMESTAMP(*pintr0); - C_TRACE(x, PyCFunction_Call(func,callargs,NULL)); - READ_TIMESTAMP(*pintr1); - Py_XDECREF(callargs); - } - } else { - if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) { - /* optimize access to bound methods */ - PyObject *self = PyMethod_GET_SELF(func); - PCALL(PCALL_METHOD); - PCALL(PCALL_BOUND_METHOD); - Py_INCREF(self); - func = PyMethod_GET_FUNCTION(func); - Py_INCREF(func); - Py_DECREF(*pfunc); - *pfunc = self; - na++; - n++; - } else - Py_INCREF(func); - READ_TIMESTAMP(*pintr0); - if (PyFunction_Check(func)) - x = fast_function(func, pp_stack, n, na, nk); - else - x = do_call(func, pp_stack, na, nk); - READ_TIMESTAMP(*pintr1); - Py_DECREF(func); - } - - /* Clear the stack of the function object. Also removes - the arguments in case they weren't consumed already - (fast_function() and err_args() leave them on the stack). - */ - while ((*pp_stack) > pfunc) { - w = EXT_POP(*pp_stack); - Py_DECREF(w); - PCALL(PCALL_POP); - } - return x; -} - -/* The fast_function() function optimize calls for which no argument - tuple is necessary; the objects are passed directly from the stack. - For the simplest case -- a function that takes only positional - arguments and is called with only positional arguments -- it - inlines the most primitive frame setup code from - PyEval_EvalCodeEx(), which vastly reduces the checks that must be - done before evaluating the frame. -*/ - -static PyObject * -fast_function(PyObject *func, PyObject ***pp_stack, int n, int na, int nk) -{ - PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); - PyObject *globals = PyFunction_GET_GLOBALS(func); - PyObject *argdefs = PyFunction_GET_DEFAULTS(func); - PyObject *kwdefs = PyFunction_GET_KW_DEFAULTS(func); - PyObject **d = NULL; - int nd = 0; - - PCALL(PCALL_FUNCTION); - PCALL(PCALL_FAST_FUNCTION); - if (argdefs == NULL && co->co_argcount == n && - co->co_kwonlyargcount == 0 && nk==0 && - co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { - PyFrameObject *f; - PyObject *retval = NULL; - PyThreadState *tstate = PyThreadState_GET(); - PyObject **fastlocals, **stack; - int i; - - PCALL(PCALL_FASTER_FUNCTION); - assert(globals != NULL); - /* XXX Perhaps we should create a specialized - PyFrame_New() that doesn't take locals, but does - take builtins without sanity checking them. - */ - assert(tstate != NULL); - f = PyFrame_New(tstate, co, globals, NULL); - if (f == NULL) - return NULL; - - fastlocals = f->f_localsplus; - stack = (*pp_stack) - n; - - for (i = 0; i < n; i++) { - Py_INCREF(*stack); - fastlocals[i] = *stack++; - } - retval = PyEval_EvalFrameEx(f,0); - ++tstate->recursion_depth; - Py_DECREF(f); - --tstate->recursion_depth; - return retval; - } - if (argdefs != NULL) { - d = &PyTuple_GET_ITEM(argdefs, 0); - nd = Py_SIZE(argdefs); - } - return PyEval_EvalCodeEx((PyObject*)co, globals, - (PyObject *)NULL, (*pp_stack)-n, na, - (*pp_stack)-2*nk, nk, d, nd, kwdefs, - PyFunction_GET_CLOSURE(func)); -} - -static PyObject * -update_keyword_args(PyObject *orig_kwdict, int nk, PyObject ***pp_stack, - PyObject *func) -{ - PyObject *kwdict = NULL; - if (orig_kwdict == NULL) - kwdict = PyDict_New(); - else { - kwdict = PyDict_Copy(orig_kwdict); - Py_DECREF(orig_kwdict); - } - if (kwdict == NULL) - return NULL; - while (--nk >= 0) { - int err; - PyObject *value = EXT_POP(*pp_stack); - PyObject *key = EXT_POP(*pp_stack); - if (PyDict_GetItem(kwdict, key) != NULL) { - PyErr_Format(PyExc_TypeError, - "%.200s%s got multiple values " - "for keyword argument '%U'", - PyEval_GetFuncName(func), - PyEval_GetFuncDesc(func), - key); - Py_DECREF(key); - Py_DECREF(value); - Py_DECREF(kwdict); - return NULL; - } - err = PyDict_SetItem(kwdict, key, value); - Py_DECREF(key); - Py_DECREF(value); - if (err) { - Py_DECREF(kwdict); - return NULL; - } - } - return kwdict; -} - -static PyObject * -update_star_args(int nstack, int nstar, PyObject *stararg, - PyObject ***pp_stack) -{ - PyObject *callargs, *w; - - callargs = PyTuple_New(nstack + nstar); - if (callargs == NULL) { - return NULL; - } - if (nstar) { - int i; - for (i = 0; i < nstar; i++) { - PyObject *a = PyTuple_GET_ITEM(stararg, i); - Py_INCREF(a); - PyTuple_SET_ITEM(callargs, nstack + i, a); - } - } - while (--nstack >= 0) { - w = EXT_POP(*pp_stack); - PyTuple_SET_ITEM(callargs, nstack, w); - } - return callargs; -} - -static PyObject * -load_args(PyObject ***pp_stack, int na) -{ - PyObject *args = PyTuple_New(na); - PyObject *w; - - if (args == NULL) - return NULL; - while (--na >= 0) { - w = EXT_POP(*pp_stack); - PyTuple_SET_ITEM(args, na, w); - } - return args; -} - -static PyObject * -do_call(PyObject *func, PyObject ***pp_stack, int na, int nk) -{ - PyObject *callargs = NULL; - PyObject *kwdict = NULL; - PyObject *result = NULL; - - if (nk > 0) { - kwdict = update_keyword_args(NULL, nk, pp_stack, func); - if (kwdict == NULL) - goto call_fail; - } - callargs = load_args(pp_stack, na); - if (callargs == NULL) - goto call_fail; -#ifdef CALL_PROFILE - /* At this point, we have to look at the type of func to - update the call stats properly. Do it here so as to avoid - exposing the call stats machinery outside ceval.c - */ - if (PyFunction_Check(func)) - PCALL(PCALL_FUNCTION); - else if (PyMethod_Check(func)) - PCALL(PCALL_METHOD); - else if (PyType_Check(func)) - PCALL(PCALL_TYPE); - else if (PyCFunction_Check(func)) - PCALL(PCALL_CFUNCTION); - else - PCALL(PCALL_OTHER); -#endif - if (PyCFunction_Check(func)) { - PyThreadState *tstate = PyThreadState_GET(); - C_TRACE(result, PyCFunction_Call(func, callargs, kwdict)); - } - else - result = PyObject_Call(func, callargs, kwdict); -call_fail: - Py_XDECREF(callargs); - Py_XDECREF(kwdict); - return result; -} - -static PyObject * -ext_do_call(PyObject *func, PyObject ***pp_stack, int flags, int na, int nk) -{ - int nstar = 0; - PyObject *callargs = NULL; - PyObject *stararg = NULL; - PyObject *kwdict = NULL; - PyObject *result = NULL; - - if (flags & CALL_FLAG_KW) { - kwdict = EXT_POP(*pp_stack); - if (!PyDict_Check(kwdict)) { - PyObject *d; - d = PyDict_New(); - if (d == NULL) - goto ext_call_fail; - if (PyDict_Update(d, kwdict) != 0) { - Py_DECREF(d); - /* PyDict_Update raises attribute - * error (percolated from an attempt - * to get 'keys' attribute) instead of - * a type error if its second argument - * is not a mapping. - */ - if (PyErr_ExceptionMatches(PyExc_AttributeError)) { - PyErr_Format(PyExc_TypeError, - "%.200s%.200s argument after ** " - "must be a mapping, not %.200s", - PyEval_GetFuncName(func), - PyEval_GetFuncDesc(func), - kwdict->ob_type->tp_name); - } - goto ext_call_fail; - } - Py_DECREF(kwdict); - kwdict = d; - } - } - if (flags & CALL_FLAG_VAR) { - stararg = EXT_POP(*pp_stack); - if (!PyTuple_Check(stararg)) { - PyObject *t = NULL; - t = PySequence_Tuple(stararg); - if (t == NULL) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) { - PyErr_Format(PyExc_TypeError, - "%.200s%.200s argument after * " - "must be a sequence, not %.200s", - PyEval_GetFuncName(func), - PyEval_GetFuncDesc(func), - stararg->ob_type->tp_name); - } - goto ext_call_fail; - } - Py_DECREF(stararg); - stararg = t; - } - nstar = PyTuple_GET_SIZE(stararg); - } - if (nk > 0) { - kwdict = update_keyword_args(kwdict, nk, pp_stack, func); - if (kwdict == NULL) - goto ext_call_fail; - } - callargs = update_star_args(na, nstar, stararg, pp_stack); - if (callargs == NULL) - goto ext_call_fail; -#ifdef CALL_PROFILE - /* At this point, we have to look at the type of func to - update the call stats properly. Do it here so as to avoid - exposing the call stats machinery outside ceval.c - */ - if (PyFunction_Check(func)) - PCALL(PCALL_FUNCTION); - else if (PyMethod_Check(func)) - PCALL(PCALL_METHOD); - else if (PyType_Check(func)) - PCALL(PCALL_TYPE); - else if (PyCFunction_Check(func)) - PCALL(PCALL_CFUNCTION); - else - PCALL(PCALL_OTHER); -#endif - if (PyCFunction_Check(func)) { - PyThreadState *tstate = PyThreadState_GET(); - C_TRACE(result, PyCFunction_Call(func, callargs, kwdict)); - } - else - result = PyObject_Call(func, callargs, kwdict); -ext_call_fail: - Py_XDECREF(callargs); - Py_XDECREF(kwdict); - Py_XDECREF(stararg); - return result; -} - -/* Extract a slice index from a PyInt or PyLong or an object with the - nb_index slot defined, and store in *pi. - Silently reduce values larger than PY_SSIZE_T_MAX to PY_SSIZE_T_MAX, - and silently boost values less than -PY_SSIZE_T_MAX-1 to -PY_SSIZE_T_MAX-1. - Return 0 on error, 1 on success. -*/ -/* Note: If v is NULL, return success without storing into *pi. This - is because_PyEval_SliceIndex() is called by apply_slice(), which can be - called by the SLICE opcode with v and/or w equal to NULL. -*/ -int -_PyEval_SliceIndex(PyObject *v, Py_ssize_t *pi) -{ - if (v != NULL) { - Py_ssize_t x; - if (PyIndex_Check(v)) { - x = PyNumber_AsSsize_t(v, NULL); - if (x == -1 && PyErr_Occurred()) - return 0; - } - else { - PyErr_SetString(PyExc_TypeError, - "slice indices must be integers or " - "None or have an __index__ method"); - return 0; - } - *pi = x; - } - return 1; -} - -#define CANNOT_CATCH_MSG "catching classes that do not inherit from "\ - "BaseException is not allowed" - -static PyObject * -cmp_outcome(int op, register PyObject *v, register PyObject *w) -{ - int res = 0; - switch (op) { - case PyCmp_IS: - res = (v == w); - break; - case PyCmp_IS_NOT: - res = (v != w); - break; - case PyCmp_IN: - res = PySequence_Contains(w, v); - if (res < 0) - return NULL; - break; - case PyCmp_NOT_IN: - res = PySequence_Contains(w, v); - if (res < 0) - return NULL; - res = !res; - break; - case PyCmp_EXC_MATCH: - if (PyTuple_Check(w)) { - Py_ssize_t i, length; - length = PyTuple_Size(w); - for (i = 0; i < length; i += 1) { - PyObject *exc = PyTuple_GET_ITEM(w, i); - if (!PyExceptionClass_Check(exc)) { - PyErr_SetString(PyExc_TypeError, - CANNOT_CATCH_MSG); - return NULL; - } - } - } - else { - if (!PyExceptionClass_Check(w)) { - PyErr_SetString(PyExc_TypeError, - CANNOT_CATCH_MSG); - return NULL; - } - } - res = PyErr_GivenExceptionMatches(v, w); - break; - default: - return PyObject_RichCompare(v, w, op); - } - v = res ? Py_True : Py_False; - Py_INCREF(v); - return v; -} - -static PyObject * -import_from(PyObject *v, PyObject *name) -{ - PyObject *x; - - x = PyObject_GetAttr(v, name); - if (x == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) { - PyErr_Format(PyExc_ImportError, "cannot import name %S", name); - } - return x; -} - -static int -import_all_from(PyObject *locals, PyObject *v) -{ - _Py_IDENTIFIER(__all__); - _Py_IDENTIFIER(__dict__); - PyObject *all = _PyObject_GetAttrId(v, &PyId___all__); - PyObject *dict, *name, *value; - int skip_leading_underscores = 0; - int pos, err; - - if (all == NULL) { - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) - return -1; /* Unexpected error */ - PyErr_Clear(); - dict = _PyObject_GetAttrId(v, &PyId___dict__); - if (dict == NULL) { - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) - return -1; - PyErr_SetString(PyExc_ImportError, - "from-import-* object has no __dict__ and no __all__"); - return -1; - } - all = PyMapping_Keys(dict); - Py_DECREF(dict); - if (all == NULL) - return -1; - skip_leading_underscores = 1; - } - - for (pos = 0, err = 0; ; pos++) { - name = PySequence_GetItem(all, pos); - if (name == NULL) { - if (!PyErr_ExceptionMatches(PyExc_IndexError)) - err = -1; - else - PyErr_Clear(); - break; - } - if (skip_leading_underscores && - PyUnicode_Check(name) && - PyUnicode_READY(name) != -1 && - PyUnicode_READ_CHAR(name, 0) == '_') - { - Py_DECREF(name); - continue; - } - value = PyObject_GetAttr(v, name); - if (value == NULL) - err = -1; - else if (PyDict_CheckExact(locals)) - err = PyDict_SetItem(locals, name, value); - else - err = PyObject_SetItem(locals, name, value); - Py_DECREF(name); - Py_XDECREF(value); - if (err != 0) - break; - } - Py_DECREF(all); - return err; -} - -static void -format_exc_check_arg(PyObject *exc, const char *format_str, PyObject *obj) -{ - const char *obj_str; - - if (!obj) - return; - - obj_str = _PyUnicode_AsString(obj); - if (!obj_str) - return; - - PyErr_Format(exc, format_str, obj_str); -} - -static void -format_exc_unbound(PyCodeObject *co, int oparg) -{ - PyObject *name; - /* Don't stomp existing exception */ - if (PyErr_Occurred()) - return; - if (oparg < PyTuple_GET_SIZE(co->co_cellvars)) { - name = PyTuple_GET_ITEM(co->co_cellvars, - oparg); - format_exc_check_arg( - PyExc_UnboundLocalError, - UNBOUNDLOCAL_ERROR_MSG, - name); - } else { - name = PyTuple_GET_ITEM(co->co_freevars, oparg - - PyTuple_GET_SIZE(co->co_cellvars)); - format_exc_check_arg(PyExc_NameError, - UNBOUNDFREE_ERROR_MSG, name); - } -} - -static PyObject * -unicode_concatenate(PyObject *v, PyObject *w, - PyFrameObject *f, unsigned char *next_instr) -{ - PyObject *res; - if (Py_REFCNT(v) == 2) { - /* In the common case, there are 2 references to the value - * stored in 'variable' when the += is performed: one on the - * value stack (in 'v') and one still stored in the - * 'variable'. We try to delete the variable now to reduce - * the refcnt to 1. - */ - switch (*next_instr) { - case STORE_FAST: - { - int oparg = PEEKARG(); - PyObject **fastlocals = f->f_localsplus; - if (GETLOCAL(oparg) == v) - SETLOCAL(oparg, NULL); - break; - } - case STORE_DEREF: - { - PyObject **freevars = (f->f_localsplus + - f->f_code->co_nlocals); - PyObject *c = freevars[PEEKARG()]; - if (PyCell_GET(c) == v) - PyCell_Set(c, NULL); - break; - } - case STORE_NAME: - { - PyObject *names = f->f_code->co_names; - PyObject *name = GETITEM(names, PEEKARG()); - PyObject *locals = f->f_locals; - if (PyDict_CheckExact(locals) && - PyDict_GetItem(locals, name) == v) { - if (PyDict_DelItem(locals, name) != 0) { - PyErr_Clear(); - } - } - break; - } - } - } - res = v; - PyUnicode_Append(&res, w); - return res; -} - -
--- a/cos/python/Python/errors.c Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,906 +0,0 @@ - -/* Error handling */ - -#include "Python.h" - -#ifndef __STDC__ -#ifndef MS_WINDOWS -extern char *strerror(int); -#endif -#endif - -#ifdef MS_WINDOWS -#include <windows.h> -#include <winbase.h> -#endif - -#include <ctype.h> - -#ifdef __cplusplus -extern "C" { -#endif - - -void -PyErr_Restore(PyObject *type, PyObject *value, PyObject *traceback) -{ - PyThreadState *tstate = PyThreadState_GET(); - PyObject *oldtype, *oldvalue, *oldtraceback; - - if (traceback != NULL && !PyTraceBack_Check(traceback)) { - /* XXX Should never happen -- fatal error instead? */ - /* Well, it could be None. */ - Py_DECREF(traceback); - traceback = NULL; - } - - /* Save these in locals to safeguard against recursive - invocation through Py_XDECREF */ - oldtype = tstate->curexc_type; - oldvalue = tstate->curexc_value; - oldtraceback = tstate->curexc_traceback; - - tstate->curexc_type = type; - tstate->curexc_value = value; - tstate->curexc_traceback = traceback; - - Py_XDECREF(oldtype); - Py_XDECREF(oldvalue); - Py_XDECREF(oldtraceback); -} - -void -PyErr_SetObject(PyObject *exception, PyObject *value) -{ - PyThreadState *tstate = PyThreadState_GET(); - PyObject *exc_value; - PyObject *tb = NULL; - - if (exception != NULL && - !PyExceptionClass_Check(exception)) { - PyErr_Format(PyExc_SystemError, - "exception %R not a BaseException subclass", - exception); - return; - } - Py_XINCREF(value); - exc_value = tstate->exc_value; - if (exc_value != NULL && exc_value != Py_None) { - /* Implicit exception chaining */ - Py_INCREF(exc_value); - if (value == NULL || !PyExceptionInstance_Check(value)) { - /* We must normalize the value right now */ - PyObject *args, *fixed_value; - if (value == NULL || value == Py_None) - args = PyTuple_New(0); - else if (PyTuple_Check(value)) { - Py_INCREF(value); - args = value; - } - else - args = PyTuple_Pack(1, value); - fixed_value = args ? - PyEval_CallObject(exception, args) : NULL; - Py_XDECREF(args); - Py_XDECREF(value); - if (fixed_value == NULL) - return; - value = fixed_value; - } - /* Avoid reference cycles through the context chain. - This is O(chain length) but context chains are - usually very short. Sensitive readers may try - to inline the call to PyException_GetContext. */ - if (exc_value != value) { - PyObject *o = exc_value, *context; - while ((context = PyException_GetContext(o))) { - Py_DECREF(context); - if (context == value) { - PyException_SetContext(o, NULL); - break; - } - o = context; - } - PyException_SetContext(value, exc_value); - } else { - Py_DECREF(exc_value); - } - } - if (value != NULL && PyExceptionInstance_Check(value)) - tb = PyException_GetTraceback(value); - Py_XINCREF(exception); - PyErr_Restore(exception, value, tb); -} - -void -PyErr_SetNone(PyObject *exception) -{ - PyErr_SetObject(exception, (PyObject *)NULL); -} - -void -PyErr_SetString(PyObject *exception, const char *string) -{ - PyObject *value = PyUnicode_FromString(string); - PyErr_SetObject(exception, value); - Py_XDECREF(value); -} - - -PyObject * -PyErr_Occurred(void) -{ - /* If there is no thread state, PyThreadState_GET calls - Py_FatalError, which calls PyErr_Occurred. To avoid the - resulting infinite loop, we inline PyThreadState_GET here and - treat no thread as no error. */ - PyThreadState *tstate = - ((PyThreadState*)_Py_atomic_load_relaxed(&_PyThreadState_Current)); - - return tstate == NULL ? NULL : tstate->curexc_type; -} - - -int -PyErr_GivenExceptionMatches(PyObject *err, PyObject *exc) -{ - if (err == NULL || exc == NULL) { - /* maybe caused by "import exceptions" that failed early on */ - return 0; - } - if (PyTuple_Check(exc)) { - Py_ssize_t i, n; - n = PyTuple_Size(exc); - for (i = 0; i < n; i++) { - /* Test recursively */ - if (PyErr_GivenExceptionMatches( - err, PyTuple_GET_ITEM(exc, i))) - { - return 1; - } - } - return 0; - } - /* err might be an instance, so check its class. */ - if (PyExceptionInstance_Check(err)) - err = PyExceptionInstance_Class(err); - - if (PyExceptionClass_Check(err) && PyExceptionClass_Check(exc)) { - int res = 0; - PyObject *exception, *value, *tb; - PyErr_Fetch(&exception, &value, &tb); - /* PyObject_IsSubclass() can recurse and therefore is - not safe (see test_bad_getattr in test.pickletester). */ - res = PyType_IsSubtype((PyTypeObject *)err, (PyTypeObject *)exc); - /* This function must not fail, so print the error here */ - if (res == -1) { - PyErr_WriteUnraisable(err); - res = 0; - } - PyErr_Restore(exception, value, tb); - return res; - } - - return err == exc; -} - - -int -PyErr_ExceptionMatches(PyObject *exc) -{ - return PyErr_GivenExceptionMatches(PyErr_Occurred(), exc); -} - - -/* Used in many places to normalize a raised exception, including in - eval_code2(), do_raise(), and PyErr_Print() - - XXX: should PyErr_NormalizeException() also call - PyException_SetTraceback() with the resulting value and tb? -*/ -void -PyErr_NormalizeException(PyObject **exc, PyObject **val, PyObject **tb) -{ - PyObject *type = *exc; - PyObject *value = *val; - PyObject *inclass = NULL; - PyObject *initial_tb = NULL; - PyThreadState *tstate = NULL; - - if (type == NULL) { - /* There was no exception, so nothing to do. */ - return; - } - - /* If PyErr_SetNone() was used, the value will have been actually - set to NULL. - */ - if (!value) { - value = Py_None; - Py_INCREF(value); - } - - if (PyExceptionInstance_Check(value)) - inclass = PyExceptionInstance_Class(value); - - /* Normalize the exception so that if the type is a class, the - value will be an instance. - */ - if (PyExceptionClass_Check(type)) { - /* if the value was not an instance, or is not an instance - whose class is (or is derived from) type, then use the - value as an argument to instantiation of the type - class. - */ - if (!inclass || !PyObject_IsSubclass(inclass, type)) { - PyObject *args, *res; - - if (value == Py_None) - args = PyTuple_New(0); - else if (PyTuple_Check(value)) { - Py_INCREF(value); - args = value; - } - else - args = PyTuple_Pack(1, value); - - if (args == NULL) - goto finally; - res = PyEval_CallObject(type, args); - Py_DECREF(args); - if (res == NULL) - goto finally; - Py_DECREF(value); - value = res; - } - /* if the class of the instance doesn't exactly match the - class of the type, believe the instance - */ - else if (inclass != type) { - Py_DECREF(type); - type = inclass; - Py_INCREF(type); - } - } - *exc = type; - *val = value; - return; -finally: - Py_DECREF(type); - Py_DECREF(value); - /* If the new exception doesn't set a traceback and the old - exception had a traceback, use the old traceback for the - new exception. It's better than nothing. - */ - initial_tb = *tb; - PyErr_Fetch(exc, val, tb); - if (initial_tb != NULL) { - if (*tb == NULL) - *tb = initial_tb; - else - Py_DECREF(initial_tb); - } - /* normalize recursively */ - tstate = PyThreadState_GET(); - if (++tstate->recursion_depth > Py_GetRecursionLimit()) { - --tstate->recursion_depth; - /* throw away the old exception... */ - Py_DECREF(*exc); - Py_DECREF(*val); - /* ... and use the recursion error instead */ - *exc = PyExc_RuntimeError; - *val = PyExc_RecursionErrorInst; - Py_INCREF(*exc); - Py_INCREF(*val); - /* just keeping the old traceback */ - return; - } - PyErr_NormalizeException(exc, val, tb); - --tstate->recursion_depth; -} - - -void -PyErr_Fetch(PyObject **p_type, PyObject **p_value, PyObject **p_traceback) -{ - PyThreadState *tstate = PyThreadState_GET(); - - *p_type = tstate->curexc_type; - *p_value = tstate->curexc_value; - *p_traceback = tstate->curexc_traceback; - - tstate->curexc_type = NULL; - tstate->curexc_value = NULL; - tstate->curexc_traceback = NULL; -} - -void -PyErr_Clear(void) -{ - PyErr_Restore(NULL, NULL, NULL); -} - -/* Convenience functions to set a type error exception and return 0 */ - -int -PyErr_BadArgument(void) -{ - PyErr_SetString(PyExc_TypeError, - "bad argument type for built-in operation"); - return 0; -} - -PyObject * -PyErr_NoMemory(void) -{ - PyErr_SetNone(PyExc_MemoryError); - return NULL; -} - -PyObject * -PyErr_SetFromErrnoWithFilenameObject(PyObject *exc, PyObject *filenameObject) -{ - PyObject *message; - PyObject *v, *args; - int i = errno; -#ifdef MS_WINDOWS - WCHAR *s_buf = NULL; -#endif /* Unix/Windows */ - -#ifdef EINTR - if (i == EINTR && PyErr_CheckSignals()) - return NULL; -#endif - -#ifndef MS_WINDOWS - if (i != 0) { - char *s = strerror(i); - message = PyUnicode_DecodeLocale(s, "surrogateescape"); - } - else { - /* Sometimes errno didn't get set */ - message = PyUnicode_FromString("Error"); - } -#else - if (i == 0) - message = PyUnicode_FromString("Error"); /* Sometimes errno didn't get set */ - else - { - /* Note that the Win32 errors do not lineup with the - errno error. So if the error is in the MSVC error - table, we use it, otherwise we assume it really _is_ - a Win32 error code - */ - if (i > 0 && i < _sys_nerr) { - message = PyUnicode_FromString(_sys_errlist[i]); - } - else { - int len = FormatMessageW( - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, /* no message source */ - i, - MAKELANGID(LANG_NEUTRAL, - SUBLANG_DEFAULT), - /* Default language */ - (LPWSTR) &s_buf, - 0, /* size not used */ - NULL); /* no args */ - if (len==0) { - /* Only ever seen this in out-of-mem - situations */ - s_buf = NULL; - message = PyUnicode_FromFormat("Windows Error 0x%X", i); - } else { - /* remove trailing cr/lf and dots */ - while (len > 0 && (s_buf[len-1] <= L' ' || s_buf[len-1] == L'.')) - s_buf[--len] = L'\0'; - message = PyUnicode_FromWideChar(s_buf, len); - } - } - } -#endif /* Unix/Windows */ - - if (message == NULL) - { -#ifdef MS_WINDOWS - LocalFree(s_buf); -#endif - return NULL; - } - - if (filenameObject != NULL) - args = Py_BuildValue("(iOO)", i, message, filenameObject); - else - args = Py_BuildValue("(iO)", i, message); - Py_DECREF(message); - - if (args != NULL) { - v = PyObject_Call(exc, args, NULL); - Py_DECREF(args); - if (v != NULL) { - PyErr_SetObject((PyObject *) Py_TYPE(v), v); - Py_DECREF(v); - } - } -#ifdef MS_WINDOWS - LocalFree(s_buf); -#endif - return NULL; -} - - -PyObject * -PyErr_SetFromErrnoWithFilename(PyObject *exc, const char *filename) -{ - PyObject *name = filename ? PyUnicode_DecodeFSDefault(filename) : NULL; - PyObject *result = PyErr_SetFromErrnoWithFilenameObject(exc, name); - Py_XDECREF(name); - return result; -} - -#ifdef MS_WINDOWS -PyObject * -PyErr_SetFromErrnoWithUnicodeFilename(PyObject *exc, const Py_UNICODE *filename) -{ - PyObject *name = filename ? - PyUnicode_FromUnicode(filename, wcslen(filename)) : - NULL; - PyObject *result = PyErr_SetFromErrnoWithFilenameObject(exc, name); - Py_XDECREF(name); - return result; -} -#endif /* MS_WINDOWS */ - -PyObject * -PyErr_SetFromErrno(PyObject *exc) -{ - return PyErr_SetFromErrnoWithFilenameObject(exc, NULL); -} - -#ifdef MS_WINDOWS -/* Windows specific error code handling */ -PyObject *PyErr_SetExcFromWindowsErrWithFilenameObject( - PyObject *exc, - int ierr, - PyObject *filenameObject) -{ - int len; - WCHAR *s_buf = NULL; /* Free via LocalFree */ - PyObject *message; - PyObject *args, *v; - DWORD err = (DWORD)ierr; - if (err==0) err = GetLastError(); - len = FormatMessageW( - /* Error API error */ - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, /* no message source */ - err, - MAKELANGID(LANG_NEUTRAL, - SUBLANG_DEFAULT), /* Default language */ - (LPWSTR) &s_buf, - 0, /* size not used */ - NULL); /* no args */ - if (len==0) { - /* Only seen this in out of mem situations */ - message = PyUnicode_FromFormat("Windows Error 0x%X", err); - s_buf = NULL; - } else { - /* remove trailing cr/lf and dots */ - while (len > 0 && (s_buf[len-1] <= L' ' || s_buf[len-1] == L'.')) - s_buf[--len] = L'\0'; - message = PyUnicode_FromWideChar(s_buf, len); - } - - if (message == NULL) - { - LocalFree(s_buf); - return NULL; - } - - if (filenameObject == NULL) - filenameObject = Py_None; - /* This is the constructor signature for passing a Windows error code. - The POSIX translation will be figured out by the constructor. */ - args = Py_BuildValue("(iOOi)", 0, message, filenameObject, err); - Py_DECREF(message); - - if (args != NULL) { - v = PyObject_Call(exc, args, NULL); - Py_DECREF(args); - if (v != NULL) { - PyErr_SetObject((PyObject *) Py_TYPE(v), v); - Py_DECREF(v); - } - } - LocalFree(s_buf); - return NULL; -} - -PyObject *PyErr_SetExcFromWindowsErrWithFilename( - PyObject *exc, - int ierr, - const char *filename) -{ - PyObject *name = filename ? PyUnicode_DecodeFSDefault(filename) : NULL; - PyObject *ret = PyErr_SetExcFromWindowsErrWithFilenameObject(exc, - ierr, - name); - Py_XDECREF(name); - return ret; -} - -PyObject *PyErr_SetExcFromWindowsErrWithUnicodeFilename( - PyObject *exc, - int ierr, - const Py_UNICODE *filename) -{ - PyObject *name = filename ? - PyUnicode_FromUnicode(filename, wcslen(filename)) : - NULL; - PyObject *ret = PyErr_SetExcFromWindowsErrWithFilenameObject(exc, - ierr, - name); - Py_XDECREF(name); - return ret; -} - -PyObject *PyErr_SetExcFromWindowsErr(PyObject *exc, int ierr) -{ - return PyErr_SetExcFromWindowsErrWithFilename(exc, ierr, NULL); -} - -PyObject *PyErr_SetFromWindowsErr(int ierr) -{ - return PyErr_SetExcFromWindowsErrWithFilename(PyExc_WindowsError, - ierr, NULL); -} -PyObject *PyErr_SetFromWindowsErrWithFilename( - int ierr, - const char *filename) -{ - PyObject *name = filename ? PyUnicode_DecodeFSDefault(filename) : NULL; - PyObject *result = PyErr_SetExcFromWindowsErrWithFilenameObject( - PyExc_WindowsError, - ierr, name); - Py_XDECREF(name); - return result; -} - -PyObject *PyErr_SetFromWindowsErrWithUnicodeFilename( - int ierr, - const Py_UNICODE *filename) -{ - PyObject *name = filename ? - PyUnicode_FromUnicode(filename, wcslen(filename)) : - NULL; - PyObject *result = PyErr_SetExcFromWindowsErrWithFilenameObject( - PyExc_WindowsError, - ierr, name); - Py_XDECREF(name); - return result; -} -#endif /* MS_WINDOWS */ - -void -_PyErr_BadInternalCall(const char *filename, int lineno) -{ - PyErr_Format(PyExc_SystemError, - "%s:%d: bad argument to internal function", - filename, lineno); -} - -/* Remove the preprocessor macro for PyErr_BadInternalCall() so that we can - export the entry point for existing object code: */ -#undef PyErr_BadInternalCall -void -PyErr_BadInternalCall(void) -{ - PyErr_Format(PyExc_SystemError, - "bad argument to internal function"); -} -#define PyErr_BadInternalCall() _PyErr_BadInternalCall(__FILE__, __LINE__) - - - -PyObject * -PyErr_Format(PyObject *exception, const char *format, ...) -{ - va_list vargs; - PyObject* string; - -#ifdef HAVE_STDARG_PROTOTYPES - va_start(vargs, format); -#else - va_start(vargs); -#endif - - string = PyUnicode_FromFormatV(format, vargs); - PyErr_SetObject(exception, string); - Py_XDECREF(string); - va_end(vargs); - return NULL; -} - - - -PyObject * -PyErr_NewException(const char *name, PyObject *base, PyObject *dict) -{ - const char *dot; - PyObject *modulename = NULL; - PyObject *classname = NULL; - PyObject *mydict = NULL; - PyObject *bases = NULL; - PyObject *result = NULL; - dot = strrchr(name, '.'); - if (dot == NULL) { - PyErr_SetString(PyExc_SystemError, - "PyErr_NewException: name must be module.class"); - return NULL; - } - if (base == NULL) - base = PyExc_Exception; - if (dict == NULL) { - dict = mydict = PyDict_New(); - if (dict == NULL) - goto failure; - } - if (PyDict_GetItemString(dict, "__module__") == NULL) { - modulename = PyUnicode_FromStringAndSize(name, - (Py_ssize_t)(dot-name)); - if (modulename == NULL) - goto failure; - if (PyDict_SetItemString(dict, "__module__", modulename) != 0) - goto failure; - } - if (PyTuple_Check(base)) { - bases = base; - /* INCREF as we create a new ref in the else branch */ - Py_INCREF(bases); - } else { - bases = PyTuple_Pack(1, base); - if (bases == NULL) - goto failure; - } - /* Create a real class. */ - result = PyObject_CallFunction((PyObject *)&PyType_Type, "sOO", - dot+1, bases, dict); - failure: - Py_XDECREF(bases); - Py_XDECREF(mydict); - Py_XDECREF(classname); - Py_XDECREF(modulename); - return result; -} - - -/* Create an exception with docstring */ -PyObject * -PyErr_NewExceptionWithDoc(const char *name, const char *doc, - PyObject *base, PyObject *dict) -{ - int result; - PyObject *ret = NULL; - PyObject *mydict = NULL; /* points to the dict only if we create it */ - PyObject *docobj; - - if (dict == NULL) { - dict = mydict = PyDict_New(); - if (dict == NULL) { - return NULL; - } - } - - if (doc != NULL) { - docobj = PyUnicode_FromString(doc); - if (docobj == NULL) - goto failure; - result = PyDict_SetItemString(dict, "__doc__", docobj); - Py_DECREF(docobj); - if (result < 0) - goto failure; - } - - ret = PyErr_NewException(name, base, dict); - failure: - Py_XDECREF(mydict); - return ret; -} - - -/* Call when an exception has occurred but there is no way for Python - to handle it. Examples: exception in __del__ or during GC. */ -void -PyErr_WriteUnraisable(PyObject *obj) -{ - _Py_IDENTIFIER(__module__); - PyObject *f, *t, *v, *tb; - PyErr_Fetch(&t, &v, &tb); - f = PySys_GetObject("stderr"); - if (f != NULL && f != Py_None) { - PyFile_WriteString("Exception ", f); - if (t) { - PyObject* moduleName; - char* className; - assert(PyExceptionClass_Check(t)); - className = PyExceptionClass_Name(t); - if (className != NULL) { - char *dot = strrchr(className, '.'); - if (dot != NULL) - className = dot+1; - } - - moduleName = _PyObject_GetAttrId(t, &PyId___module__); - if (moduleName == NULL) - PyFile_WriteString("<unknown>", f); - else { - char* modstr = _PyUnicode_AsString(moduleName); - if (modstr && - strcmp(modstr, "builtins") != 0) - { - PyFile_WriteString(modstr, f); - PyFile_WriteString(".", f); - } - } - if (className == NULL) - PyFile_WriteString("<unknown>", f); - else - PyFile_WriteString(className, f); - if (v && v != Py_None) { - PyFile_WriteString(": ", f); - PyFile_WriteObject(v, f, 0); - } - Py_XDECREF(moduleName); - } - if (obj) { - PyFile_WriteString(" in ", f); - PyFile_WriteObject(obj, f, 0); - } - PyFile_WriteString(" ignored\n", f); - PyErr_Clear(); /* Just in case */ - } - Py_XDECREF(t); - Py_XDECREF(v); - Py_XDECREF(tb); -} - -extern PyObject *PyModule_GetWarningsModule(void); - - -void -PyErr_SyntaxLocation(const char *filename, int lineno) { - PyErr_SyntaxLocationEx(filename, lineno, -1); -} - - -/* Set file and line information for the current exception. - If the exception is not a SyntaxError, also sets additional attributes - to make printing of exceptions believe it is a syntax error. */ - -void -PyErr_SyntaxLocationEx(const char *filename, int lineno, int col_offset) -{ - PyObject *exc, *v, *tb, *tmp; - _Py_IDENTIFIER(filename); - _Py_IDENTIFIER(lineno); - _Py_IDENTIFIER(msg); - _Py_IDENTIFIER(offset); - _Py_IDENTIFIER(print_file_and_line); - _Py_IDENTIFIER(text); - - /* add attributes for the line number and filename for the error */ - PyErr_Fetch(&exc, &v, &tb); - PyErr_NormalizeException(&exc, &v, &tb); - /* XXX check that it is, indeed, a syntax error. It might not - * be, though. */ - tmp = PyLong_FromLong(lineno); - if (tmp == NULL) - PyErr_Clear(); - else { - if (_PyObject_SetAttrId(v, &PyId_lineno, tmp)) - PyErr_Clear(); - Py_DECREF(tmp); - } - if (col_offset >= 0) { - tmp = PyLong_FromLong(col_offset); - if (tmp == NULL) - PyErr_Clear(); - else { - if (_PyObject_SetAttrId(v, &PyId_offset, tmp)) - PyErr_Clear(); - Py_DECREF(tmp); - } - } - if (filename != NULL) { - tmp = PyUnicode_DecodeFSDefault(filename); - if (tmp == NULL) - PyErr_Clear(); - else { - if (_PyObject_SetAttrId(v, &PyId_filename, tmp)) - PyErr_Clear(); - Py_DECREF(tmp); - } - - tmp = PyErr_ProgramText(filename, lineno); - if (tmp) { - if (_PyObject_SetAttrId(v, &PyId_text, tmp)) - PyErr_Clear(); - Py_DECREF(tmp); - } - } - if (_PyObject_SetAttrId(v, &PyId_offset, Py_None)) { - PyErr_Clear(); - } - if (exc != PyExc_SyntaxError) { - if (!_PyObject_HasAttrId(v, &PyId_msg)) { - tmp = PyObject_Str(v); - if (tmp) { - if (_PyObject_SetAttrId(v, &PyId_msg, tmp)) - PyErr_Clear(); - Py_DECREF(tmp); - } else { - PyErr_Clear(); - } - } - if (!_PyObject_HasAttrId(v, &PyId_print_file_and_line)) { - if (_PyObject_SetAttrId(v, &PyId_print_file_and_line, - Py_None)) - PyErr_Clear(); - } - } - PyErr_Restore(exc, v, tb); -} - -/* Attempt to load the line of text that the exception refers to. If it - fails, it will return NULL but will not set an exception. - - XXX The functionality of this function is quite similar to the - functionality in tb_displayline() in traceback.c. */ - -PyObject * -PyErr_ProgramText(const char *filename, int lineno) -{ - FILE *fp; - int i; - char linebuf[1000]; - - if (filename == NULL || *filename == '\0' || lineno <= 0) - return NULL; - fp = fopen(filename, "r" PY_STDIOTEXTMODE); - if (fp == NULL) - return NULL; - for (i = 0; i < lineno; i++) { - char *pLastChar = &linebuf[sizeof(linebuf) - 2]; - do { - *pLastChar = '\0'; - if (Py_UniversalNewlineFgets(linebuf, sizeof linebuf, - fp, NULL) == NULL) - break; - /* fgets read *something*; if it didn't get as - far as pLastChar, it must have found a newline - or hit the end of the file; if pLastChar is \n, - it obviously found a newline; else we haven't - yet seen a newline, so must continue */ - } while (*pLastChar != '\0' && *pLastChar != '\n'); - } - fclose(fp); - if (i == lineno) { - char *p = linebuf; - PyObject *res; - while (*p == ' ' || *p == '\t' || *p == '\014') - p++; - res = PyUnicode_FromString(p); - if (res == NULL) - PyErr_Clear(); - return res; - } - return NULL; -} - -#ifdef __cplusplus -} -#endif
--- a/cos/python/Python/import.c Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4190 +0,0 @@ - -/* Module definition and import implementation */ - -#include "Python.h" - -#include "Python-ast.h" -#undef Yield /* undefine macro conflicting with winbase.h */ -#include "errcode.h" -#include "marshal.h" -#include "code.h" -#include "osdefs.h" -#include "importdl.h" - -#ifdef HAVE_FCNTL_H -#include <fcntl.h> -#endif - - -/* Magic word to reject .pyc files generated by other Python versions. - It should change for each incompatible change to the bytecode. - - The value of CR and LF is incorporated so if you ever read or write - a .pyc file in text mode the magic number will be wrong; also, the - Apple MPW compiler swaps their values, botching string constants. - - The magic numbers must be spaced apart at least 2 values, as the - -U interpeter flag will cause MAGIC+1 being used. They have been - odd numbers for some time now. - - There were a variety of old schemes for setting the magic number. - The current working scheme is to increment the previous value by - 10. - - Starting with the adoption of PEP 3147 in Python 3.2, every bump in magic - number also includes a new "magic tag", i.e. a human readable string used - to represent the magic number in __pycache__ directories. When you change - the magic number, you must also set a new unique magic tag. Generally this - can be named after the Python major version of the magic number bump, but - it can really be anything, as long as it's different than anything else - that's come before. The tags are included in the following table, starting - with Python 3.2a0. - - Known values: - Python 1.5: 20121 - Python 1.5.1: 20121 - Python 1.5.2: 20121 - Python 1.6: 50428 - Python 2.0: 50823 - Python 2.0.1: 50823 - Python 2.1: 60202 - Python 2.1.1: 60202 - Python 2.1.2: 60202 - Python 2.2: 60717 - Python 2.3a0: 62011 - Python 2.3a0: 62021 - Python 2.3a0: 62011 (!) - Python 2.4a0: 62041 - Python 2.4a3: 62051 - Python 2.4b1: 62061 - Python 2.5a0: 62071 - Python 2.5a0: 62081 (ast-branch) - Python 2.5a0: 62091 (with) - Python 2.5a0: 62092 (changed WITH_CLEANUP opcode) - Python 2.5b3: 62101 (fix wrong code: for x, in ...) - Python 2.5b3: 62111 (fix wrong code: x += yield) - Python 2.5c1: 62121 (fix wrong lnotab with for loops and - storing constants that should have been removed) - Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp) - Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode) - Python 2.6a1: 62161 (WITH_CLEANUP optimization) - Python 3000: 3000 - 3010 (removed UNARY_CONVERT) - 3020 (added BUILD_SET) - 3030 (added keyword-only parameters) - 3040 (added signature annotations) - 3050 (print becomes a function) - 3060 (PEP 3115 metaclass syntax) - 3061 (string literals become unicode) - 3071 (PEP 3109 raise changes) - 3081 (PEP 3137 make __file__ and __name__ unicode) - 3091 (kill str8 interning) - 3101 (merge from 2.6a0, see 62151) - 3103 (__file__ points to source file) - Python 3.0a4: 3111 (WITH_CLEANUP optimization). - Python 3.0a5: 3131 (lexical exception stacking, including POP_EXCEPT) - Python 3.1a0: 3141 (optimize list, set and dict comprehensions: - change LIST_APPEND and SET_ADD, add MAP_ADD) - Python 3.1a0: 3151 (optimize conditional branches: - introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE) - Python 3.2a0: 3160 (add SETUP_WITH) - tag: cpython-32 - Python 3.2a1: 3170 (add DUP_TOP_TWO, remove DUP_TOPX and ROT_FOUR) - tag: cpython-32 - Python 3.2a2 3180 (add DELETE_DEREF) - Python 3.3a0 3190 __class__ super closure changed - Python 3.3a0 3200 (__qualname__ added) -*/ - -/* MAGIC must change whenever the bytecode emitted by the compiler may no - longer be understood by older implementations of the eval loop (usually - due to the addition of new opcodes) - TAG must change for each major Python release. The magic number will take - care of any bytecode changes that occur during development. -*/ -#define QUOTE(arg) #arg -#define STRIFY(name) QUOTE(name) -#define MAJOR STRIFY(PY_MAJOR_VERSION) -#define MINOR STRIFY(PY_MINOR_VERSION) -#define MAGIC (3200 | ((long)'\r'<<16) | ((long)'\n'<<24)) -#define TAG "cpython-" MAJOR MINOR; -#define CACHEDIR "__pycache__" -/* Current magic word and string tag as globals. */ -static long pyc_magic = MAGIC; -static const char *pyc_tag = TAG; -#undef QUOTE -#undef STRIFY -#undef MAJOR -#undef MINOR - -/* See _PyImport_FixupExtensionObject() below */ -static PyObject *extensions = NULL; - -/* Function from Parser/tokenizer.c */ -extern char * PyTokenizer_FindEncodingFilename(int, PyObject *); - -/* This table is defined in config.c: */ -extern struct _inittab _PyImport_Inittab[]; - -struct _inittab *PyImport_Inittab = _PyImport_Inittab; - -/* these tables define the module suffixes that Python recognizes */ -struct filedescr * _PyImport_Filetab = NULL; - -static const struct filedescr _PyImport_StandardFiletab[] = { - {".py", "U", PY_SOURCE}, - {".pyc", "rb", PY_COMPILED}, - {0, 0} -}; - -static PyObject *initstr = NULL; -_Py_IDENTIFIER(__path__); - -/* Initialize things */ - -void -_PyImport_Init(void) -{ - const struct filedescr *scan; - struct filedescr *filetab; - int countD = 0; - int countS = 0; - - initstr = PyUnicode_InternFromString("__init__"); - if (initstr == NULL) - Py_FatalError("Can't initialize import variables"); - - /* prepare _PyImport_Filetab: copy entries from - _PyImport_DynLoadFiletab and _PyImport_StandardFiletab. - */ -#ifdef HAVE_DYNAMIC_LOADING - for (scan = _PyImport_DynLoadFiletab; scan->suffix != NULL; ++scan) - ++countD; -#endif - for (scan = _PyImport_StandardFiletab; scan->suffix != NULL; ++scan) - ++countS; - filetab = PyMem_NEW(struct filedescr, countD + countS + 1); - if (filetab == NULL) - Py_FatalError("Can't initialize import file table."); -#ifdef HAVE_DYNAMIC_LOADING - memcpy(filetab, _PyImport_DynLoadFiletab, - countD * sizeof(struct filedescr)); -#endif - memcpy(filetab + countD, _PyImport_StandardFiletab, - countS * sizeof(struct filedescr)); - filetab[countD + countS].suffix = NULL; - - _PyImport_Filetab = filetab; - - if (Py_OptimizeFlag) { - /* Replace ".pyc" with ".pyo" in _PyImport_Filetab */ - for (; filetab->suffix != NULL; filetab++) { - if (strcmp(filetab->suffix, ".pyc") == 0) - filetab->suffix = ".pyo"; - } - } -} - -void -_PyImportHooks_Init(void) -{ - PyObject *v, *path_hooks = NULL, *zimpimport; - int err = 0; - - /* adding sys.path_hooks and sys.path_importer_cache, setting up - zipimport */ - if (PyType_Ready(&PyNullImporter_Type) < 0) - goto error; - - if (Py_VerboseFlag) - PySys_WriteStderr("# installing zipimport hook\n"); - - v = PyList_New(0); - if (v == NULL) - goto error; - err = PySys_SetObject("meta_path", v); - Py_DECREF(v); - if (err) - goto error; - v = PyDict_New(); - if (v == NULL) - goto error; - err = PySys_SetObject("path_importer_cache", v); - Py_DECREF(v); - if (err) - goto error; - path_hooks = PyList_New(0); - if (path_hooks == NULL) - goto error; - err = PySys_SetObject("path_hooks", path_hooks); - if (err) { - error: - PyErr_Print(); - Py_FatalError("initializing sys.meta_path, sys.path_hooks, " - "path_importer_cache, or NullImporter failed" - ); - } - - zimpimport = PyImport_ImportModule("zipimport"); - if (zimpimport == NULL) { - PyErr_Clear(); /* No zip import module -- okay */ - if (Py_VerboseFlag) - PySys_WriteStderr("# can't import zipimport\n"); - } - else { - _Py_IDENTIFIER(zipimporter); - PyObject *zipimporter = _PyObject_GetAttrId(zimpimport, - &PyId_zipimporter); - Py_DECREF(zimpimport); - if (zipimporter == NULL) { - PyErr_Clear(); /* No zipimporter object -- okay */ - if (Py_VerboseFlag) - PySys_WriteStderr( - "# can't import zipimport.zipimporter\n"); - } - else { - /* sys.path_hooks.append(zipimporter) */ - err = PyList_Append(path_hooks, zipimporter); - Py_DECREF(zipimporter); - if (err) - goto error; - if (Py_VerboseFlag) - PySys_WriteStderr( - "# installed zipimport hook\n"); - } - } - Py_DECREF(path_hooks); -} - -/* Locking primitives to prevent parallel imports of the same module - in different threads to return with a partially loaded module. - These calls are serialized by the global interpreter lock. */ - -#ifdef WITH_THREAD - -#include "pythread.h" - -static PyThread_type_lock import_lock = 0; -static long import_lock_thread = -1; -static int import_lock_level = 0; - -void -_PyImport_AcquireLock(void) -{ - long me = PyThread_get_thread_ident(); - if (me == -1) - return; /* Too bad */ - if (import_lock == NULL) { - import_lock = PyThread_allocate_lock(); - if (import_lock == NULL) - return; /* Nothing much we can do. */ - } - if (import_lock_thread == me) { - import_lock_level++; - return; - } - if (import_lock_thread != -1 || !PyThread_acquire_lock(import_lock, 0)) - { - PyThreadState *tstate = PyEval_SaveThread(); - PyThread_acquire_lock(import_lock, 1); - PyEval_RestoreThread(tstate); - } - import_lock_thread = me; - import_lock_level = 1; -} - -int -_PyImport_ReleaseLock(void) -{ - long me = PyThread_get_thread_ident(); - if (me == -1 || import_lock == NULL) - return 0; /* Too bad */ - if (import_lock_thread != me) - return -1; - import_lock_level--; - if (import_lock_level == 0) { - import_lock_thread = -1; - PyThread_release_lock(import_lock); - } - return 1; -} - -/* This function is called from PyOS_AfterFork to ensure that newly - created child processes do not share locks with the parent. - We now acquire the import lock around fork() calls but on some platforms - (Solaris 9 and earlier? see isue7242) that still left us with problems. */ - -void -_PyImport_ReInitLock(void) -{ - if (import_lock != NULL) - import_lock = PyThread_allocate_lock(); - if (import_lock_level > 1) { - /* Forked as a side effect of import */ - long me = PyThread_get_thread_ident(); - PyThread_acquire_lock(import_lock, 0); - /* XXX: can the previous line fail? */ - import_lock_thread = me; - import_lock_level--; - } else { - import_lock_thread = -1; - import_lock_level = 0; - } -} - -#endif - -static PyObject * -imp_lock_held(PyObject *self, PyObject *noargs) -{ -#ifdef WITH_THREAD - return PyBool_FromLong(import_lock_thread != -1); -#else - return PyBool_FromLong(0); -#endif -} - -static PyObject * -imp_acquire_lock(PyObject *self, PyObject *noargs) -{ -#ifdef WITH_THREAD - _PyImport_AcquireLock(); -#endif - Py_INCREF(Py_None); - return Py_None; -} - -static PyObject * -imp_release_lock(PyObject *self, PyObject *noargs) -{ -#ifdef WITH_THREAD - if (_PyImport_ReleaseLock() < 0) { - PyErr_SetString(PyExc_RuntimeError, - "not holding the import lock"); - return NULL; - } -#endif - Py_INCREF(Py_None); - return Py_None; -} - -void -_PyImport_Fini(void) -{ - Py_XDECREF(extensions); - extensions = NULL; - PyMem_DEL(_PyImport_Filetab); - _PyImport_Filetab = NULL; -#ifdef WITH_THREAD - if (import_lock != NULL) { - PyThread_free_lock(import_lock); - import_lock = NULL; - } -#endif -} - -static void -imp_modules_reloading_clear(void) -{ - PyInterpreterState *interp = PyThreadState_Get()->interp; - if (interp->modules_reloading != NULL) - PyDict_Clear(interp->modules_reloading); -} - -/* Helper for sys */ - -PyObject * -PyImport_GetModuleDict(void) -{ - PyInterpreterState *interp = PyThreadState_GET()->interp; - if (interp->modules == NULL) - Py_FatalError("PyImport_GetModuleDict: no module dictionary!"); - return interp->modules; -} - - -/* List of names to clear in sys */ -static char* sys_deletes[] = { - "path", "argv", "ps1", "ps2", - "last_type", "last_value", "last_traceback", - "path_hooks", "path_importer_cache", "meta_path", - /* misc stuff */ - "flags", "float_info", - NULL -}; - -static char* sys_files[] = { - "stdin", "__stdin__", - "stdout", "__stdout__", - "stderr", "__stderr__", - NULL -}; - - -/* Un-initialize things, as good as we can */ - -void -PyImport_Cleanup(void) -{ - Py_ssize_t pos, ndone; - PyObject *key, *value, *dict; - PyInterpreterState *interp = PyThreadState_GET()->interp; - PyObject *modules = interp->modules; - - if (modules == NULL) - return; /* Already done */ - - /* Delete some special variables first. These are common - places where user values hide and people complain when their - destructors fail. Since the modules containing them are - deleted *last* of all, they would come too late in the normal - destruction order. Sigh. */ - - value = PyDict_GetItemString(modules, "builtins"); - if (value != NULL && PyModule_Check(value)) { - dict = PyModule_GetDict(value); - if (Py_VerboseFlag) - PySys_WriteStderr("# clear builtins._\n"); - PyDict_SetItemString(dict, "_", Py_None); - } - value = PyDict_GetItemString(modules, "sys"); - if (value != NULL && PyModule_Check(value)) { - char **p; - PyObject *v; - dict = PyModule_GetDict(value); - for (p = sys_deletes; *p != NULL; p++) { - if (Py_VerboseFlag) - PySys_WriteStderr("# clear sys.%s\n", *p); - PyDict_SetItemString(dict, *p, Py_None); - } - for (p = sys_files; *p != NULL; p+=2) { - if (Py_VerboseFlag) - PySys_WriteStderr("# restore sys.%s\n", *p); - v = PyDict_GetItemString(dict, *(p+1)); - if (v == NULL) - v = Py_None; - PyDict_SetItemString(dict, *p, v); - } - } - - /* First, delete __main__ */ - value = PyDict_GetItemString(modules, "__main__"); - if (value != NULL && PyModule_Check(value)) { - if (Py_VerboseFlag) - PySys_WriteStderr("# cleanup __main__\n"); - _PyModule_Clear(value); - PyDict_SetItemString(modules, "__main__", Py_None); - } - - /* The special treatment of "builtins" here is because even - when it's not referenced as a module, its dictionary is - referenced by almost every module's __builtins__. Since - deleting a module clears its dictionary (even if there are - references left to it), we need to delete the "builtins" - module last. Likewise, we don't delete sys until the very - end because it is implicitly referenced (e.g. by print). - - Also note that we 'delete' modules by replacing their entry - in the modules dict with None, rather than really deleting - them; this avoids a rehash of the modules dictionary and - also marks them as "non existent" so they won't be - re-imported. */ - - /* Next, repeatedly delete modules with a reference count of - one (skipping builtins and sys) and delete them */ - do { - ndone = 0; - pos = 0; - while (PyDict_Next(modules, &pos, &key, &value)) { - if (value->ob_refcnt != 1) - continue; - if (PyUnicode_Check(key) && PyModule_Check(value)) { - if (PyUnicode_CompareWithASCIIString(key, "builtins") == 0) - continue; - if (PyUnicode_CompareWithASCIIString(key, "sys") == 0) - continue; - if (Py_VerboseFlag) - PySys_FormatStderr( - "# cleanup[1] %U\n", key); - _PyModule_Clear(value); - PyDict_SetItem(modules, key, Py_None); - ndone++; - } - } - } while (ndone > 0); - - /* Next, delete all modules (still skipping builtins and sys) */ - pos = 0; - while (PyDict_Next(modules, &pos, &key, &value)) { - if (PyUnicode_Check(key) && PyModule_Check(value)) { - if (PyUnicode_CompareWithASCIIString(key, "builtins") == 0) - continue; - if (PyUnicode_CompareWithASCIIString(key, "sys") == 0) - continue; - if (Py_VerboseFlag) - PySys_FormatStderr("# cleanup[2] %U\n", key); - _PyModule_Clear(value); - PyDict_SetItem(modules, key, Py_None); - } - } - - /* Next, delete sys and builtins (in that order) */ - value = PyDict_GetItemString(modules, "sys"); - if (value != NULL && PyModule_Check(value)) { - if (Py_VerboseFlag) - PySys_WriteStderr("# cleanup sys\n"); - _PyModule_Clear(value); - PyDict_SetItemString(modules, "sys", Py_None); - } - value = PyDict_GetItemString(modules, "builtins"); - if (value != NULL && PyModule_Check(value)) { - if (Py_VerboseFlag) - PySys_WriteStderr("# cleanup builtins\n"); - _PyModule_Clear(value); - PyDict_SetItemString(modules, "builtins", Py_None); - } - - /* Finally, clear and delete the modules directory */ - PyDict_Clear(modules); - interp->modules = NULL; - Py_DECREF(modules); - Py_CLEAR(interp->modules_reloading); -} - - -/* Helper for pythonrun.c -- return magic number and tag. */ - -long -PyImport_GetMagicNumber(void) -{ - return pyc_magic; -} - - -const char * -PyImport_GetMagicTag(void) -{ - return pyc_tag; -} - -/* Magic for extension modules (built-in as well as dynamically - loaded). To prevent initializing an extension module more than - once, we keep a static dictionary 'extensions' keyed by module name - (for built-in modules) or by filename (for dynamically loaded - modules), containing these modules. A copy of the module's - dictionary is stored by calling _PyImport_FixupExtensionObject() - immediately after the module initialization function succeeds. A - copy can be retrieved from there by calling - _PyImport_FindExtensionObject(). - - Modules which do support multiple initialization set their m_size - field to a non-negative number (indicating the size of the - module-specific state). They are still recorded in the extensions - dictionary, to avoid loading shared libraries twice. -*/ - -int -_PyImport_FixupExtensionObject(PyObject *mod, PyObject *name, - PyObject *filename) -{ - PyObject *modules, *dict; - struct PyModuleDef *def; - if (extensions == NULL) { - extensions = PyDict_New(); - if (extensions == NULL) - return -1; - } - if (mod == NULL || !PyModule_Check(mod)) { - PyErr_BadInternalCall(); - return -1; - } - def = PyModule_GetDef(mod); - if (!def) { - PyErr_BadInternalCall(); - return -1; - } - modules = PyImport_GetModuleDict(); - if (PyDict_SetItem(modules, name, mod) < 0) - return -1; - if (_PyState_AddModule(mod, def) < 0) { - PyDict_DelItem(modules, name); - return -1; - } - if (def->m_size == -1) { - if (def->m_base.m_copy) { - /* Somebody already imported the module, - likely under a different name. - XXX this should really not happen. */ - Py_DECREF(def->m_base.m_copy); - def->m_base.m_copy = NULL; - } - dict = PyModule_GetDict(mod); - if (dict == NULL) - return -1; - def->m_base.m_copy = PyDict_Copy(dict); - if (def->m_base.m_copy == NULL) - return -1; - } - PyDict_SetItem(extensions, filename, (PyObject*)def); - return 0; -} - -int -_PyImport_FixupBuiltin(PyObject *mod, char *name) -{ - int res; - PyObject *nameobj; - nameobj = PyUnicode_InternFromString(name); - if (nameobj == NULL) - return -1; - res = _PyImport_FixupExtensionObject(mod, nameobj, nameobj); - Py_DECREF(nameobj); - return res; -} - -PyObject * -_PyImport_FindExtensionObject(PyObject *name, PyObject *filename) -{ - PyObject *mod, *mdict; - PyModuleDef* def; - if (extensions == NULL) - return NULL; - def = (PyModuleDef*)PyDict_GetItem(extensions, filename); - if (def == NULL) - return NULL; - if (def->m_size == -1) { - /* Module does not support repeated initialization */ - if (def->m_base.m_copy == NULL) - return NULL; - mod = PyImport_AddModuleObject(name); - if (mod == NULL) - return NULL; - mdict = PyModule_GetDict(mod); - if (mdict == NULL) - return NULL; - if (PyDict_Update(mdict, def->m_base.m_copy)) - return NULL; - } - else { - if (def->m_base.m_init == NULL) - return NULL; - mod = def->m_base.m_init(); - if (mod == NULL) - return NULL; - PyDict_SetItem(PyImport_GetModuleDict(), name, mod); - Py_DECREF(mod); - } - if (_PyState_AddModule(mod, def) < 0) { - PyDict_DelItem(PyImport_GetModuleDict(), name); - Py_DECREF(mod); - return NULL; - } - if (Py_VerboseFlag) - PySys_FormatStderr("import %U # previously loaded (%R)\n", - name, filename); - return mod; - -} - -PyObject * -_PyImport_FindBuiltin(const char *name) -{ - PyObject *res, *nameobj; - nameobj = PyUnicode_InternFromString(name); - if (nameobj == NULL) - return NULL; - res = _PyImport_FindExtensionObject(nameobj, nameobj); - Py_DECREF(nameobj); - return res; -} - -/* Get the module object corresponding to a module name. - First check the modules dictionary if there's one there, - if not, create a new one and insert it in the modules dictionary. - Because the former action is most common, THIS DOES NOT RETURN A - 'NEW' REFERENCE! */ - -PyObject * -PyImport_AddModuleObject(PyObject *name) -{ - PyObject *modules = PyImport_GetModuleDict(); - PyObject *m; - - if ((m = PyDict_GetItem(modules, name)) != NULL && - PyModule_Check(m)) - return m; - m = PyModule_NewObject(name); - if (m == NULL) - return NULL; - if (PyDict_SetItem(modules, name, m) != 0) { - Py_DECREF(m); - return NULL; - } - Py_DECREF(m); /* Yes, it still exists, in modules! */ - - return m; -} - -PyObject * -PyImport_AddModule(const char *name) -{ - PyObject *nameobj, *module; - nameobj = PyUnicode_FromString(name); - if (nameobj == NULL) - return NULL; - module = PyImport_AddModuleObject(nameobj); - Py_DECREF(nameobj); - return module; -} - - -/* Remove name from sys.modules, if it's there. */ -static void -remove_module(PyObject *name) -{ - PyObject *modules = PyImport_GetModuleDict(); - if (PyDict_GetItem(modules, name) == NULL) - return; - if (PyDict_DelItem(modules, name) < 0) - Py_FatalError("import: deleting existing key in" - "sys.modules failed"); -} - -static PyObject * get_sourcefile(PyObject *filename); -static PyObject *make_source_pathname(PyObject *pathname); -static PyObject* make_compiled_pathname(PyObject *pathname, int debug); - -/* Execute a code object in a module and return the module object - * WITH INCREMENTED REFERENCE COUNT. If an error occurs, name is - * removed from sys.modules, to avoid leaving damaged module objects - * in sys.modules. The caller may wish to restore the original - * module object (if any) in this case; PyImport_ReloadModule is an - * example. - * - * Note that PyImport_ExecCodeModuleWithPathnames() is the preferred, richer - * interface. The other two exist primarily for backward compatibility. - */ -PyObject * -PyImport_ExecCodeModule(char *name, PyObject *co) -{ - return PyImport_ExecCodeModuleWithPathnames( - name, co, (char *)NULL, (char *)NULL); -} - -PyObject * -PyImport_ExecCodeModuleEx(char *name, PyObject *co, char *pathname) -{ - return PyImport_ExecCodeModuleWithPathnames( - name, co, pathname, (char *)NULL); -} - -PyObject * -PyImport_ExecCodeModuleWithPathnames(char *name, PyObject *co, char *pathname, - char *cpathname) -{ - PyObject *m = NULL; - PyObject *nameobj, *pathobj = NULL, *cpathobj = NULL; - - nameobj = PyUnicode_FromString(name); - if (nameobj == NULL) - return NULL; - - if (pathname != NULL) { - pathobj = PyUnicode_DecodeFSDefault(pathname); - if (pathobj == NULL) - goto error; - } else - pathobj = NULL; - if (cpathname != NULL) { - cpathobj = PyUnicode_DecodeFSDefault(cpathname); - if (cpathobj == NULL) - goto error; - } else - cpathobj = NULL; - m = PyImport_ExecCodeModuleObject(nameobj, co, pathobj, cpathobj); -error: - Py_DECREF(nameobj); - Py_XDECREF(pathobj); - Py_XDECREF(cpathobj); - return m; -} - -PyObject* -PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname, - PyObject *cpathname) -{ - PyObject *modules = PyImport_GetModuleDict(); - PyObject *m, *d, *v; - - m = PyImport_AddModuleObject(name); - if (m == NULL) - return NULL; - /* If the module is being reloaded, we get the old module back - and re-use its dict to exec the new code. */ - d = PyModule_GetDict(m); - if (PyDict_GetItemString(d, "__builtins__") == NULL) { - if (PyDict_SetItemString(d, "__builtins__", - PyEval_GetBuiltins()) != 0) - goto error; - } - /* Remember the filename as the __file__ attribute */ - if (pathname != NULL) { - v = get_sourcefile(pathname); - if (v == NULL) - PyErr_Clear(); - } - else - v = NULL; - if (v == NULL) { - v = ((PyCodeObject *)co)->co_filename; - Py_INCREF(v); - } - if (PyDict_SetItemString(d, "__file__", v) != 0) - PyErr_Clear(); /* Not important enough to report */ - Py_DECREF(v); - - /* Remember the pyc path name as the __cached__ attribute. */ - if (cpathname != NULL) - v = cpathname; - else - v = Py_None; - if (PyDict_SetItemString(d, "__cached__", v) != 0) - PyErr_Clear(); /* Not important enough to report */ - - v = PyEval_EvalCode(co, d, d); - if (v == NULL) - goto error; - Py_DECREF(v); - - if ((m = PyDict_GetItem(modules, name)) == NULL) { - PyErr_Format(PyExc_ImportError, - "Loaded module %R not found in sys.modules", - name); - return NULL; - } - - Py_INCREF(m); - - return m; - - error: - remove_module(name); - return NULL; -} - - -/* Like strrchr(string, '/') but searches for the rightmost of either SEP - or ALTSEP, if the latter is defined. -*/ -static Py_UCS4* -rightmost_sep(Py_UCS4 *s) -{ - Py_UCS4 *found, c; - for (found = NULL; (c = *s); s++) { - if (c == SEP -#ifdef ALTSEP - || c == ALTSEP -#endif - ) - { - found = s; - } - } - return found; -} - -/* Like rightmost_sep, but operate on unicode objects. */ -static Py_ssize_t -rightmost_sep_obj(PyObject* o, Py_ssize_t start, Py_ssize_t end) -{ - Py_ssize_t found, i; - Py_UCS4 c; - for (found = -1, i = start; i < end; i++) { - c = PyUnicode_READ_CHAR(o, i); - if (c == SEP -#ifdef ALTSEP - || c == ALTSEP -#endif - ) - { - found = i; - } - } - return found; -} - -/* Given a pathname for a Python source file, fill a buffer with the - pathname for the corresponding compiled file. Return the pathname - for the compiled file, or NULL if there's no space in the buffer. - Doesn't set an exception. - - foo.py -> __pycache__/foo.<tag>.pyc - - pathstr is assumed to be "ready". -*/ - -static PyObject* -make_compiled_pathname(PyObject *pathstr, int debug) -{ - PyObject *result; - Py_ssize_t fname, ext, len, i, pos, taglen; - Py_ssize_t pycache_len = sizeof(CACHEDIR) - 1; - int kind; - void *data; - Py_UCS4 lastsep; - - /* Compute the output string size. */ - len = PyUnicode_GET_LENGTH(pathstr); - /* If there is no separator, this returns -1, so - fname will be 0. */ - fname = rightmost_sep_obj(pathstr, 0, len) + 1; - /* Windows: re-use the last separator character (/ or \\) when - appending the __pycache__ path. */ - if (fname > 0) - lastsep = PyUnicode_READ_CHAR(pathstr, fname -1); - else - lastsep = SEP; - ext = fname - 1; - for(i = fname; i < len; i++) - if (PyUnicode_READ_CHAR(pathstr, i) == '.') - ext = i + 1; - if (ext < fname) - /* No dot in filename; use entire filename */ - ext = len; - - /* result = pathstr[:fname] + "__pycache__" + SEP + - pathstr[fname:ext] + tag + ".py[co]" */ - taglen = strlen(pyc_tag); - result = PyUnicode_New(ext + pycache_len + 1 + taglen + 4, - PyUnicode_MAX_CHAR_VALUE(pathstr)); - if (!result) - return NULL; - kind = PyUnicode_KIND(result); - data = PyUnicode_DATA(result); - PyUnicode_CopyCharacters(result, 0, pathstr, 0, fname); - pos = fname; - for (i = 0; i < pycache_len; i++) - PyUnicode_WRITE(kind, data, pos++, CACHEDIR[i]); - PyUnicode_WRITE(kind, data, pos++, lastsep); - PyUnicode_CopyCharacters(result, pos, pathstr, - fname, ext - fname); - pos += ext - fname; - for (i = 0; pyc_tag[i]; i++) - PyUnicode_WRITE(kind, data, pos++, pyc_tag[i]); - PyUnicode_WRITE(kind, data, pos++, '.'); - PyUnicode_WRITE(kind, data, pos++, 'p'); - PyUnicode_WRITE(kind, data, pos++, 'y'); - PyUnicode_WRITE(kind, data, pos++, debug ? 'c' : 'o'); - return result; -} - - -/* Given a pathname to a Python byte compiled file, return the path to the - source file, if the path matches the PEP 3147 format. This does not check - for any file existence, however, if the pyc file name does not match PEP - 3147 style, NULL is returned. buf must be at least as big as pathname; - the resulting path will always be shorter. - - (...)/__pycache__/foo.<tag>.pyc -> (...)/foo.py */ - -static PyObject* -make_source_pathname(PyObject *path) -{ - Py_ssize_t left, right, dot0, dot1, len; - Py_ssize_t i, j; - PyObject *result; - int kind; - void *data; - - len = PyUnicode_GET_LENGTH(path); - if (len > MAXPATHLEN) - return NULL; - - /* Look back two slashes from the end. In between these two slashes - must be the string __pycache__ or this is not a PEP 3147 style - path. It's possible for there to be only one slash. - */ - right = rightmost_sep_obj(path, 0, len); - if (right == -1) - return NULL; - left = rightmost_sep_obj(path, 0, right); - if (left == -1) - left = 0; - else - left++; - if (right-left != sizeof(CACHEDIR)-1) - return NULL; - for (i = 0; i < sizeof(CACHEDIR)-1; i++) - if (PyUnicode_READ_CHAR(path, left+i) != CACHEDIR[i]) - return NULL; - - /* Now verify that the path component to the right of the last slash - has two dots in it. - */ - dot0 = PyUnicode_FindChar(path, '.', right+1, len, 1); - if (dot0 < 0) - return NULL; - dot1 = PyUnicode_FindChar(path, '.', dot0+1, len, 1); - if (dot1 < 0) - return NULL; - /* Too many dots? */ - if (PyUnicode_FindChar(path, '.', dot1+1, len, 1) != -1) - return NULL; - - /* This is a PEP 3147 path. Start by copying everything from the - start of pathname up to and including the leftmost slash. Then - copy the file's basename, removing the magic tag and adding a .py - suffix. - */ - result = PyUnicode_New(left + (dot0-right) + 2, - PyUnicode_MAX_CHAR_VALUE(path)); - if (!result) - return NULL; - kind = PyUnicode_KIND(result); - data = PyUnicode_DATA(result); - PyUnicode_CopyCharacters(result, 0, path, 0, (i = left)); - PyUnicode_CopyCharacters(result, left, path, right+1, - (j = dot0-right)); - PyUnicode_WRITE(kind, data, i+j, 'p'); - PyUnicode_WRITE(kind, data, i+j+1, 'y'); - return result; -} - -/* Given a pathname for a Python source file, its time of last - modification, and a pathname for a compiled file, check whether the - compiled file represents the same version of the source. If so, - return a FILE pointer for the compiled file, positioned just after - the header; if not, return NULL. - Doesn't set an exception. */ - -static FILE * -check_compiled_module(PyObject *pathname, time_t mtime, PyObject *cpathname) -{ - FILE *fp; - long magic; - long pyc_mtime; - - fp = _Py_fopen(cpathname, "rb"); - if (fp == NULL) - return NULL; - magic = PyMarshal_ReadLongFromFile(fp); - if (magic != pyc_magic) { - if (Py_VerboseFlag) - PySys_FormatStderr("# %R has bad magic\n", cpathname); - fclose(fp); - return NULL; - } - pyc_mtime = PyMarshal_ReadLongFromFile(fp); - if (pyc_mtime != mtime) { - if (Py_VerboseFlag) - PySys_FormatStderr("# %R has bad mtime\n", cpathname); - fclose(fp); - return NULL; - } - if (Py_VerboseFlag) - PySys_FormatStderr("# %R matches %R\n", cpathname, pathname); - return fp; -} - - -/* Read a code object from a file and check it for validity */ - -static PyCodeObject * -read_compiled_module(PyObject *cpathname, FILE *fp) -{ - PyObject *co; - - co = PyMarshal_ReadLastObjectFromFile(fp); - if (co == NULL) - return NULL; - if (!PyCode_Check(co)) { - PyErr_Format(PyExc_ImportError, - "Non-code object in %R", cpathname); - Py_DECREF(co); - return NULL; - } - return (PyCodeObject *)co; -} - - -/* Load a module from a compiled file, execute it, and return its - module object WITH INCREMENTED REFERENCE COUNT */ - -static PyObject * -load_compiled_module(PyObject *name, PyObject *cpathname, FILE *fp) -{ - long magic; - PyCodeObject *co; - PyObject *m; - - magic = PyMarshal_ReadLongFromFile(fp); - if (magic != pyc_magic) { - PyErr_Format(PyExc_ImportError, - "Bad magic number in %R", cpathname); - return NULL; - } - (void) PyMarshal_ReadLongFromFile(fp); - co = read_compiled_module(cpathname, fp); - if (co == NULL) - return NULL; - if (Py_VerboseFlag) - PySys_FormatStderr("import %U # precompiled from %R\n", - name, cpathname); - m = PyImport_ExecCodeModuleObject(name, (PyObject *)co, - cpathname, cpathname); - Py_DECREF(co); - - return m; -} - -/* Parse a source file and return the corresponding code object */ - -static PyCodeObject * -parse_source_module(PyObject *pathname, FILE *fp) -{ - PyCodeObject *co; - PyObject *pathbytes; - mod_ty mod; - PyCompilerFlags flags; - PyArena *arena; - - pathbytes = PyUnicode_EncodeFSDefault(pathname); - if (pathbytes == NULL) - return NULL; - - arena = PyArena_New(); - if (arena == NULL) { - Py_DECREF(pathbytes); - return NULL; - } - - flags.cf_flags = 0; - mod = PyParser_ASTFromFile(fp, PyBytes_AS_STRING(pathbytes), NULL, - Py_file_input, 0, 0, &flags, - NULL, arena); - if (mod != NULL) - co = PyAST_Compile(mod, PyBytes_AS_STRING(pathbytes), NULL, arena); - else - co = NULL; - Py_DECREF(pathbytes); - PyArena_Free(arena); - return co; -} - -/* Write a compiled module to a file, placing the time of last - modification of its source into the header. - Errors are ignored, if a write error occurs an attempt is made to - remove the file. */ - -static void -write_compiled_module(PyCodeObject *co, PyObject *cpathname, - struct stat *srcstat) -{ - Py_UCS4 *cpathname_ucs4; - FILE *fp; - time_t mtime = srcstat->st_mtime; - PyObject *cpathname_tmp; - mode_t dirmode = (srcstat->st_mode | - S_IXUSR | S_IXGRP | S_IXOTH | - S_IWUSR | S_IWGRP | S_IWOTH); - PyObject *dirbytes; - PyObject *cpathbytes, *cpathbytes_tmp; - int fd; - PyObject *dirname; - Py_UCS4 *dirsep; - int res, ok; - - /* Ensure that the __pycache__ directory exists. */ - cpathname_ucs4 = PyUnicode_AsUCS4Copy(cpathname); - if (!cpathname_ucs4) - return; - dirsep = rightmost_sep(cpathname_ucs4); - if (dirsep == NULL) { - if (Py_VerboseFlag) - PySys_FormatStderr("# no %s path found %R\n", CACHEDIR, cpathname); - return; - } - dirname = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, - cpathname_ucs4, - dirsep - cpathname_ucs4); - PyMem_Free(cpathname_ucs4); - if (dirname == NULL) { - PyErr_Clear(); - return; - } - - dirbytes = PyUnicode_EncodeFSDefault(dirname); - if (dirbytes == NULL) { - PyErr_Clear(); - return; - } - res = mkdir(PyBytes_AS_STRING(dirbytes), dirmode); - Py_DECREF(dirbytes); - if (0 <= res) - ok = 1; - else - ok = (errno == EEXIST); - if (!ok) { - if (Py_VerboseFlag) - PySys_FormatStderr("# cannot create cache dir %R\n", dirname); - Py_DECREF(dirname); - return; - } - Py_DECREF(dirname); - - /* We first write to a tmp file and then take advantage - of atomic renaming (which *should* be true even under Windows). - As in importlib, we use id(something) to generate a pseudo-random - filename. mkstemp() can't be used since it doesn't allow specifying - the file access permissions. - */ - cpathname_tmp = PyUnicode_FromFormat("%U.%zd", - cpathname, (Py_ssize_t) co); - if (cpathname_tmp == NULL) { - PyErr_Clear(); - return; - } - cpathbytes_tmp = PyUnicode_EncodeFSDefault(cpathname_tmp); - Py_DECREF(cpathname_tmp); - if (cpathbytes_tmp == NULL) { - PyErr_Clear(); - return; - } - cpathbytes = PyUnicode_EncodeFSDefault(cpathname); - if (cpathbytes == NULL) { - PyErr_Clear(); - return; - } - fd = open(PyBytes_AS_STRING(cpathbytes_tmp), - O_CREAT | O_EXCL | O_WRONLY, 0666); - if (0 <= fd) - fp = fdopen(fd, "wb"); - else - fp = NULL; - if (fp == NULL) { - if (Py_VerboseFlag) - PySys_FormatStderr( - "# can't create %R\n", cpathname); - Py_DECREF(cpathbytes); - Py_DECREF(cpathbytes_tmp); - return; - } - PyMarshal_WriteLongToFile(pyc_magic, fp, Py_MARSHAL_VERSION); - /* First write a 0 for mtime */ - PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION); - PyMarshal_WriteObjectToFile((PyObject *)co, fp, Py_MARSHAL_VERSION); - fflush(fp); - /* Now write the true mtime */ - fseek(fp, 4L, 0); - assert(mtime < LONG_MAX); - PyMarshal_WriteLongToFile((long)mtime, fp, Py_MARSHAL_VERSION); - if (fflush(fp) != 0 || ferror(fp)) { - if (Py_VerboseFlag) - PySys_FormatStderr("# can't write %R\n", cpathname); - /* Don't keep partial file */ - fclose(fp); - (void) unlink(PyBytes_AS_STRING(cpathbytes_tmp)); - Py_DECREF(cpathbytes); - Py_DECREF(cpathbytes_tmp); - return; - } - fclose(fp); - /* Do a (hopefully) atomic rename */ - if (rename(PyBytes_AS_STRING(cpathbytes_tmp), - PyBytes_AS_STRING(cpathbytes))) { - if (Py_VerboseFlag) - PySys_FormatStderr("# can't write %R\n", cpathname); - /* Don't keep tmp file */ - unlink(PyBytes_AS_STRING(cpathbytes_tmp)); - Py_DECREF(cpathbytes); - Py_DECREF(cpathbytes_tmp); - return; - } - Py_DECREF(cpathbytes); - Py_DECREF(cpathbytes_tmp); - if (Py_VerboseFlag) - PySys_FormatStderr("# wrote %R\n", cpathname); -} - -static void -update_code_filenames(PyCodeObject *co, PyObject *oldname, PyObject *newname) -{ - PyObject *constants, *tmp; - Py_ssize_t i, n; - - if (PyUnicode_Compare(co->co_filename, oldname)) - return; - - tmp = co->co_filename; - co->co_filename = newname; - Py_INCREF(co->co_filename); - Py_DECREF(tmp); - - constants = co->co_consts; - n = PyTuple_GET_SIZE(constants); - for (i = 0; i < n; i++) { - tmp = PyTuple_GET_ITEM(constants, i); - if (PyCode_Check(tmp)) - update_code_filenames((PyCodeObject *)tmp, - oldname, newname); - } -} - -static void -update_compiled_module(PyCodeObject *co, PyObject *newname) -{ - PyObject *oldname; - - if (PyUnicode_Compare(co->co_filename, newname) == 0) - return; - - oldname = co->co_filename; - Py_INCREF(oldname); - update_code_filenames(co, oldname, newname); - Py_DECREF(oldname); -} - -static PyObject * -imp_fix_co_filename(PyObject *self, PyObject *args) -{ - PyObject *co; - PyObject *file_path; - - if (!PyArg_ParseTuple(args, "OO:_fix_co_filename", &co, &file_path)) - return NULL; - - if (!PyCode_Check(co)) { - PyErr_SetString(PyExc_TypeError, - "first argument must be a code object"); - return NULL; - } - - if (!PyUnicode_Check(file_path)) { - PyErr_SetString(PyExc_TypeError, - "second argument must be a string"); - return NULL; - } - - update_compiled_module((PyCodeObject*)co, file_path); - - Py_RETURN_NONE; -} - -/* Load a source module from a given file and return its module - object WITH INCREMENTED REFERENCE COUNT. If there's a matching - byte-compiled file, use that instead. */ - -static PyObject * -load_source_module(PyObject *name, PyObject *pathname, FILE *fp) -{ - struct stat st; - FILE *fpc; - PyObject *cpathname = NULL, *cpathbytes = NULL; - PyCodeObject *co; - PyObject *m = NULL; - - if (fstat(fileno(fp), &st) != 0) { - PyErr_Format(PyExc_RuntimeError, - "unable to get file status from %R", - pathname); - goto error; - } -#if SIZEOF_TIME_T > 4 - /* Python's .pyc timestamp handling presumes that the timestamp fits - in 4 bytes. This will be fine until sometime in the year 2038, - when a 4-byte signed time_t will overflow. - */ - if (st.st_mtime >> 32) { - PyErr_SetString(PyExc_OverflowError, - "modification time overflows a 4 byte field"); - goto error; - } -#endif - if (PyUnicode_READY(pathname) < 0) - return NULL; - cpathname = make_compiled_pathname(pathname, !Py_OptimizeFlag); - - if (cpathname != NULL) - fpc = check_compiled_module(pathname, st.st_mtime, cpathname); - else - fpc = NULL; - - if (fpc) { - co = read_compiled_module(cpathname, fpc); - fclose(fpc); - if (co == NULL) - goto error; - update_compiled_module(co, pathname); - if (Py_VerboseFlag) - PySys_FormatStderr("import %U # precompiled from %R\n", - name, cpathname); - m = PyImport_ExecCodeModuleObject(name, (PyObject *)co, - cpathname, cpathname); - } - else { - co = parse_source_module(pathname, fp); - if (co == NULL) - goto error; - if (Py_VerboseFlag) - PySys_FormatStderr("import %U # from %R\n", - name, pathname); - if (cpathname != NULL) { - PyObject *ro = PySys_GetObject("dont_write_bytecode"); - if (ro == NULL || !PyObject_IsTrue(ro)) - write_compiled_module(co, cpathname, &st); - } - m = PyImport_ExecCodeModuleObject(name, (PyObject *)co, - pathname, cpathname); - } - Py_DECREF(co); - -error: - Py_XDECREF(cpathbytes); - Py_XDECREF(cpathname); - return m; -} - -/* Get source file -> unicode or None - * Returns the path to the py file if available, else the given path - */ -static PyObject * -get_sourcefile(PyObject *filename) -{ - Py_ssize_t len; - Py_UCS4 *fileuni; - PyObject *py; - struct stat statbuf; - - len = PyUnicode_GET_LENGTH(filename); - if (len == 0) - Py_RETURN_NONE; - - /* don't match *.pyc or *.pyo? */ - fileuni = PyUnicode_AsUCS4Copy(filename); - if (!fileuni) - return NULL; - if (len < 5 - || fileuni[len-4] != '.' - || (fileuni[len-3] != 'p' && fileuni[len-3] != 'P') - || (fileuni[len-2] != 'y' && fileuni[len-2] != 'Y')) - goto unchanged; - - /* Start by trying to turn PEP 3147 path into source path. If that - * fails, just chop off the trailing character, i.e. legacy pyc path - * to py. - */ - py = make_source_pathname(filename); - if (py == NULL) { - PyErr_Clear(); - py = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, fileuni, len - 1); - } - if (py == NULL) - goto error; - - if (_Py_stat(py, &statbuf) == 0 && S_ISREG(statbuf.st_mode)) { - PyMem_Free(fileuni); - return py; - } - Py_DECREF(py); - goto unchanged; - -error: - PyErr_Clear(); -unchanged: - PyMem_Free(fileuni); - Py_INCREF(filename); - return filename; -} - -/* Forward */ -static PyObject *load_module(PyObject *, FILE *, PyObject *, int, PyObject *); -static struct filedescr *find_module(PyObject *, PyObject *, PyObject *, - PyObject **, FILE **, PyObject **); -static struct _frozen * find_frozen(PyObject *); - -/* Load a package and return its module object WITH INCREMENTED - REFERENCE COUNT */ - -static PyObject * -load_package(PyObject *name, PyObject *pathname) -{ - PyObject *m, *d, *bufobj; - PyObject *file = NULL, *path_list = NULL; - int err; - FILE *fp = NULL; - struct filedescr *fdp; - - m = PyImport_AddModuleObject(name); - if (m == NULL) - return NULL; - if (Py_VerboseFlag) - PySys_FormatStderr("import %U # directory %R\n", - name, pathname); - file = get_sourcefile(pathname); - if (file == NULL) - return NULL; - path_list = Py_BuildValue("[O]", file); - if (path_list == NULL) { - Py_DECREF(file); - return NULL; - } - d = PyModule_GetDict(m); - err = PyDict_SetItemString(d, "__file__", file); - Py_DECREF(file); - if (err == 0) - err = PyDict_SetItemString(d, "__path__", path_list); - if (err != 0) { - Py_DECREF(path_list); - return NULL; - } - fdp = find_module(name, initstr, path_list, - &bufobj, &fp, NULL); - Py_DECREF(path_list); - if (fdp == NULL) { - if (PyErr_ExceptionMatches(PyExc_ImportError)) { - PyErr_Clear(); - Py_INCREF(m); - return m; - } - else - return NULL; - } - m = load_module(name, fp, bufobj, fdp->type, NULL); - Py_XDECREF(bufobj); - if (fp != NULL) - fclose(fp); - return m; -} - - -/* Helper to test for built-in module */ - -static int -is_builtin(PyObject *name) -{ - int i, cmp; - for (i = 0; PyImport_Inittab[i].name != NULL; i++) { - cmp = PyUnicode_CompareWithASCIIString(name, PyImport_Inittab[i].name); - if (cmp == 0) { - if (PyImport_Inittab[i].initfunc == NULL) - return -1; - else - return 1; - } - } - return 0; -} - - -/* Return an importer object for a sys.path/pkg.__path__ item 'p', - possibly by fetching it from the path_importer_cache dict. If it - wasn't yet cached, traverse path_hooks until a hook is found - that can handle the path item. Return None if no hook could; - this tells our caller it should fall back to the builtin - import mechanism. Cache the result in path_importer_cache. - Returns a borrowed reference. */ - -static PyObject * -get_path_importer(PyObject *path_importer_cache, PyObject *path_hooks, - PyObject *p) -{ - PyObject *importer; - Py_ssize_t j, nhooks; - - /* These conditions are the caller's responsibility: */ - assert(PyList_Check(path_hooks)); - assert(PyDict_Check(path_importer_cache)); - - nhooks = PyList_Size(path_hooks); - if (nhooks < 0) - return NULL; /* Shouldn't happen */ - - importer = PyDict_GetItem(path_importer_cache, p); - if (importer != NULL) - return importer; - - /* set path_importer_cache[p] to None to avoid recursion */ - if (PyDict_SetItem(path_importer_cache, p, Py_None) != 0) - return NULL; - - for (j = 0; j < nhooks; j++) { - PyObject *hook = PyList_GetItem(path_hooks, j); - if (hook == NULL) - return NULL; - importer = PyObject_CallFunctionObjArgs(hook, p, NULL); - if (importer != NULL) - break; - - if (!PyErr_ExceptionMatches(PyExc_ImportError)) { - return NULL; - } - PyErr_Clear(); - } - if (importer == NULL) { - importer = PyObject_CallFunctionObjArgs( - (PyObject *)&PyNullImporter_Type, p, NULL - ); - if (importer == NULL) { - if (PyErr_ExceptionMatches(PyExc_ImportError)) { - PyErr_Clear(); - return Py_None; - } - } - } - if (importer != NULL) { - int err = PyDict_SetItem(path_importer_cache, p, importer); - Py_DECREF(importer); - if (err != 0) - return NULL; - } - return importer; -} - -PyAPI_FUNC(PyObject *) -PyImport_GetImporter(PyObject *path) { - PyObject *importer=NULL, *path_importer_cache=NULL, *path_hooks=NULL; - - if ((path_importer_cache = PySys_GetObject("path_importer_cache"))) { - if ((path_hooks = PySys_GetObject("path_hooks"))) { - importer = get_path_importer(path_importer_cache, - path_hooks, path); - } - } - Py_XINCREF(importer); /* get_path_importer returns a borrowed reference */ - return importer; -} - -/* Search the path (default sys.path) for a module. Return the - corresponding filedescr struct, and (via return arguments) the - pathname and an open file. Return NULL if the module is not found. */ - -#ifdef MS_COREDLL -extern FILE *_PyWin_FindRegisteredModule(PyObject *, struct filedescr **, - PyObject **p_path); -#endif - -/* Forward */ -static int case_ok(PyObject *, Py_ssize_t, PyObject *); -static int find_init_module(PyObject *); -static struct filedescr importhookdescr = {"", "", IMP_HOOK}; - -/* Get the path of a module: get its importer and call importer.find_module() - hook, or check if the module if a package (if path/__init__.py exists). - - -1: error: a Python error occurred - 0: ignore: an error occurred because of invalid data, but the error is not - important enough to be reported. - 1: get path: module not found, but *buf contains its path - 2: found: *p_fd is the file descriptor (IMP_HOOK or PKG_DIRECTORY) - and *buf is the path */ - -static int -find_module_path(PyObject *fullname, PyObject *name, PyObject *path, - PyObject *path_hooks, PyObject *path_importer_cache, - PyObject **p_path, PyObject **p_loader, struct filedescr **p_fd) -{ - PyObject *path_unicode, *filename = NULL; - Py_ssize_t len, pos; - struct stat statbuf; - static struct filedescr fd_package = {"", "", PKG_DIRECTORY}; - int result, addsep; - - if (PyUnicode_Check(path)) { - Py_INCREF(path); - path_unicode = path; - } - else if (PyBytes_Check(path)) { - path_unicode = PyUnicode_DecodeFSDefaultAndSize( - PyBytes_AS_STRING(path), PyBytes_GET_SIZE(path)); - if (path_unicode == NULL) - return -1; - } - else - return 0; - - if (PyUnicode_READY(path_unicode)) - return -1; - - len = PyUnicode_GET_LENGTH(path_unicode); - if (PyUnicode_FindChar(path_unicode, 0, 0, len, 1) != -1) { - result = 0; - goto out; /* path contains '\0' */ - } - - /* sys.path_hooks import hook */ - if (p_loader != NULL) { - _Py_IDENTIFIER(find_module); - PyObject *importer; - - importer = get_path_importer(path_importer_cache, - path_hooks, path); - if (importer == NULL) { - result = -1; - goto out; - } - /* Note: importer is a borrowed reference */ - if (importer != Py_None) { - PyObject *loader; - loader = _PyObject_CallMethodId(importer, - &PyId_find_module, "O", fullname); - if (loader == NULL) { - result = -1; /* error */ - goto out; - } - if (loader != Py_None) { - /* a loader was found */ - *p_loader = loader; - *p_fd = &importhookdescr; - result = 2; - goto out; - } - Py_DECREF(loader); - result = 0; - goto out; - } - } - /* no hook was found, use builtin import */ - - addsep = 0; - if (len > 0 && PyUnicode_READ_CHAR(path_unicode, len-1) != SEP -#ifdef ALTSEP - && PyUnicode_READ_CHAR(path_unicode, len-1) != ALTSEP -#endif - ) - addsep = 1; - filename = PyUnicode_New(len + PyUnicode_GET_LENGTH(name) + addsep, - Py_MAX(PyUnicode_MAX_CHAR_VALUE(path_unicode), - PyUnicode_MAX_CHAR_VALUE(name))); - if (filename == NULL) { - result = -1; - goto out; - } - PyUnicode_CopyCharacters(filename, 0, path_unicode, 0, len); - pos = len; - if (addsep) - PyUnicode_WRITE(PyUnicode_KIND(filename), - PyUnicode_DATA(filename), - pos++, SEP); - PyUnicode_CopyCharacters(filename, pos, name, 0, - PyUnicode_GET_LENGTH(name)); - - /* Check for package import (buf holds a directory name, - and there's an __init__ module in that directory */ -#ifdef HAVE_STAT - if (_Py_stat(filename, &statbuf) == 0 && /* it exists */ - S_ISDIR(statbuf.st_mode)) /* it's a directory */ - { - int match; - - match = case_ok(filename, 0, name); - if (match < 0) { - result = -1; - goto out; - } - if (match) { /* case matches */ - if (find_init_module(filename)) { /* and has __init__.py */ - *p_path = filename; - filename = NULL; - *p_fd = &fd_package; - result = 2; - goto out; - } - else { - int err; - err = PyErr_WarnFormat(PyExc_ImportWarning, 1, - "Not importing directory %R: missing __init__.py", - filename); - if (err) { - result = -1; - goto out; - } - } - } - } -#endif - *p_path = filename; - filename = NULL; - result = 1; - out: - Py_DECREF(path_unicode); - Py_XDECREF(filename); - return result; -} - -/* Find a module in search_path_list. For each path, try - find_module_path() or try each _PyImport_Filetab suffix. - - If the module is found, return a file descriptor, write the path in - *p_filename, write the pointer to the file object into *p_fp, and (if - p_loader is not NULL) the loader into *p_loader. - - Otherwise, raise an exception and return NULL. */ - -static struct filedescr* -find_module_path_list(PyObject *fullname, PyObject *name, - PyObject *search_path_list, PyObject *path_hooks, - PyObject *path_importer_cache, - PyObject **p_path, FILE **p_fp, PyObject **p_loader) -{ - Py_ssize_t i, npath; - struct filedescr *fdp = NULL; - char *filemode; - FILE *fp = NULL; - PyObject *prefix, *filename; - int match; - - npath = PyList_Size(search_path_list); - for (i = 0; i < npath; i++) { - PyObject *path; - int ok; - - path = PyList_GetItem(search_path_list, i); - if (path == NULL) - return NULL; - - prefix = NULL; - ok = find_module_path(fullname, name, path, - path_hooks, path_importer_cache, - &prefix, p_loader, &fdp); - if (ok < 0) - return NULL; - if (ok == 0) - continue; - if (ok == 2) { - *p_path = prefix; - return fdp; - } - - for (fdp = _PyImport_Filetab; fdp->suffix != NULL; fdp++) { - struct stat statbuf; - - filemode = fdp->mode; - if (filemode[0] == 'U') - filemode = "r" PY_STDIOTEXTMODE; - - filename = PyUnicode_FromFormat("%U%s", prefix, fdp->suffix); - if (filename == NULL) { - Py_DECREF(prefix); - return NULL; - } - - if (Py_VerboseFlag > 1) - PySys_FormatStderr("# trying %R\n", filename); - - if (_Py_stat(filename, &statbuf) != 0 || S_ISDIR(statbuf.st_mode)) - { - /* it doesn't exist, or it's a directory */ - Py_DECREF(filename); - continue; - } - - fp = _Py_fopen(filename, filemode); - if (fp == NULL) { - Py_DECREF(filename); - if (PyErr_Occurred()) { - Py_DECREF(prefix); - return NULL; - } - continue; - } - match = case_ok(filename, -(Py_ssize_t)strlen(fdp->suffix), name); - if (match < 0) { - Py_DECREF(prefix); - Py_DECREF(filename); - return NULL; - } - if (match) { - Py_DECREF(prefix); - *p_path = filename; - *p_fp = fp; - return fdp; - } - Py_DECREF(filename); - - fclose(fp); - fp = NULL; - } - Py_DECREF(prefix); - } - PyErr_Format(PyExc_ImportError, - "No module named %R", name); - return NULL; -} - -/* Find a module: - - - try find_module() of each sys.meta_path hook - - try find_frozen() - - try is_builtin() - - try _PyWin_FindRegisteredModule() (Windows only) - - otherwise, call find_module_path_list() with search_path_list (if not - NULL) or sys.path - - fullname can be NULL, but only if p_loader is NULL. - - Return: - - - &fd_builtin (C_BUILTIN) if it is a builtin - - &fd_frozen (PY_FROZEN) if it is frozen - - &fd_package (PKG_DIRECTORY) and write the filename into *p_path - if it is a package - - &importhookdescr (IMP_HOOK) and write the loader into *p_loader if a - importer loader was found - - a file descriptor (PY_SOURCE, PY_COMPILED, C_EXTENSION, PY_RESOURCE or - PY_CODERESOURCE: see _PyImport_Filetab), write the filename into - *p_path and the pointer to the open file into *p_fp - - NULL on error - - By default, *p_path, *p_fp and *p_loader (if set) are set to NULL. - Eg. *p_path is set to NULL for a builtin package. -*/ - -static struct filedescr * -find_module(PyObject *fullname, PyObject *name, PyObject *search_path_list, - PyObject **p_path, FILE **p_fp, PyObject **p_loader) -{ - Py_ssize_t i, npath; - static struct filedescr fd_frozen = {"", "", PY_FROZEN}; - static struct filedescr fd_builtin = {"", "", C_BUILTIN}; - PyObject *path_hooks, *path_importer_cache; - - *p_path = NULL; - *p_fp = NULL; - if (p_loader != NULL) - *p_loader = NULL; - - if (PyUnicode_GET_LENGTH(name) > MAXPATHLEN) { - PyErr_SetString(PyExc_OverflowError, - "module name is too long"); - return NULL; - } - - /* sys.meta_path import hook */ - if (p_loader != NULL) { - _Py_IDENTIFIER(find_module); - PyObject *meta_path; - - meta_path = PySys_GetObject("meta_path"); - if (meta_path == NULL || !PyList_Check(meta_path)) { - PyErr_SetString(PyExc_RuntimeError, - "sys.meta_path must be a list of " - "import hooks"); - return NULL; - } - Py_INCREF(meta_path); /* zap guard */ - npath = PyList_Size(meta_path); - for (i = 0; i < npath; i++) { - PyObject *loader; - PyObject *hook = PyList_GetItem(meta_path, i); - loader = _PyObject_CallMethodId(hook, &PyId_find_module, - "OO", fullname, - search_path_list != NULL ? - search_path_list : Py_None); - if (loader == NULL) { - Py_DECREF(meta_path); - return NULL; /* true error */ - } - if (loader != Py_None) { - /* a loader was found */ - *p_loader = loader; - Py_DECREF(meta_path); - return &importhookdescr; - } - Py_DECREF(loader); - } - Py_DECREF(meta_path); - } - - if (find_frozen(fullname) != NULL) - return &fd_frozen; - - if (search_path_list == NULL) { -#ifdef MS_COREDLL - FILE *fp; - struct filedescr *fdp; -#endif - if (is_builtin(name)) - return &fd_builtin; -#ifdef MS_COREDLL - fp = _PyWin_FindRegisteredModule(name, &fdp, p_path); - if (fp != NULL) { - *p_fp = fp; - return fdp; - } - else if (PyErr_Occurred()) - return NULL; -#endif - search_path_list = PySys_GetObject("path"); - } - - if (search_path_list == NULL || !PyList_Check(search_path_list)) { - PyErr_SetString(PyExc_RuntimeError, - "sys.path must be a list of directory names"); - return NULL; - } - - path_hooks = PySys_GetObject("path_hooks"); - if (path_hooks == NULL || !PyList_Check(path_hooks)) { - PyErr_SetString(PyExc_RuntimeError, - "sys.path_hooks must be a list of " - "import hooks"); - return NULL; - } - path_importer_cache = PySys_GetObject("path_importer_cache"); - if (path_importer_cache == NULL || - !PyDict_Check(path_importer_cache)) { - PyErr_SetString(PyExc_RuntimeError, - "sys.path_importer_cache must be a dict"); - return NULL; - } - - return find_module_path_list(fullname, name, - search_path_list, path_hooks, - path_importer_cache, - p_path, p_fp, p_loader); -} - -/* case_bytes(char* buf, Py_ssize_t len, Py_ssize_t namelen, char* name) - * The arguments here are tricky, best shown by example: - * /a/b/c/d/e/f/g/h/i/j/k/some_long_module_name.py\0 - * ^ ^ ^ ^ - * |--------------------- buf ---------------------| - * |------------------- len ------------------| - * |------ name -------| - * |----- namelen -----| - * buf is the full path, but len only counts up to (& exclusive of) the - * extension. name is the module name, also exclusive of extension. - * - * We've already done a successful stat() or fopen() on buf, so know that - * there's some match, possibly case-insensitive. - * - * case_bytes() is to return 1 if there's a case-sensitive match for - * name, else 0. case_bytes() is also to return 1 if envar PYTHONCASEOK - * exists. - * - * case_bytes() is used to implement case-sensitive import semantics even - * on platforms with case-insensitive filesystems. It's trivial to implement - * for case-sensitive filesystems. It's pretty much a cross-platform - * nightmare for systems with case-insensitive filesystems. - */ - -/* First we may need a pile of platform-specific header files; the sequence - * of #if's here should match the sequence in the body of case_bytes(). - */ -#if defined(MS_WINDOWS) -#include <windows.h> - -#elif defined(DJGPP) -#include <dir.h> - -#elif (defined(__MACH__) && defined(__APPLE__) || defined(__CYGWIN__)) && defined(HAVE_DIRENT_H) -#include <sys/types.h> -#include <dirent.h> - -#elif defined(PYOS_OS2) -#define INCL_DOS -#define INCL_DOSERRORS -#define INCL_NOPMAPI -#include <os2.h> -#endif - -#if defined(DJGPP) \ - || ((defined(__MACH__) && defined(__APPLE__) || defined(__CYGWIN__)) \ - && defined(HAVE_DIRENT_H)) \ - || defined(PYOS_OS2) -# define USE_CASE_OK_BYTES -#endif - - -#ifdef USE_CASE_OK_BYTES -static int -case_bytes(char *buf, Py_ssize_t len, Py_ssize_t namelen, const char *name) -{ -/* Pick a platform-specific implementation; the sequence of #if's here should - * match the sequence just above. - */ - -/* DJGPP */ -#if defined(DJGPP) - struct ffblk ffblk; - int done; - - if (Py_GETENV("PYTHONCASEOK") != NULL) - return 1; - - done = findfirst(buf, &ffblk, FA_ARCH|FA_RDONLY|FA_HIDDEN|FA_DIREC); - if (done) { - PyErr_Format(PyExc_NameError, - "Can't find file for module %.100s\n(filename %.300s)", - name, buf); - return -1; - } - return strncmp(ffblk.ff_name, name, namelen) == 0; - -/* new-fangled macintosh (macosx) or Cygwin */ -#elif (defined(__MACH__) && defined(__APPLE__) || defined(__CYGWIN__)) && defined(HAVE_DIRENT_H) - DIR *dirp; - struct dirent *dp; - char dirname[MAXPATHLEN + 1]; - const int dirlen = len - namelen - 1; /* don't want trailing SEP */ - - if (Py_GETENV("PYTHONCASEOK") != NULL) - return 1; - - /* Copy the dir component into dirname; substitute "." if empty */ - if (dirlen <= 0) { - dirname[0] = '.'; - dirname[1] = '\0'; - } - else { - assert(dirlen <= MAXPATHLEN); - memcpy(dirname, buf, dirlen); - dirname[dirlen] = '\0'; - } - /* Open the directory and search the entries for an exact match. */ - dirp = opendir(dirname); - if (dirp) { - char *nameWithExt = buf + len - namelen; - while ((dp = readdir(dirp)) != NULL) { - const int thislen = -#ifdef _DIRENT_HAVE_D_NAMELEN - dp->d_namlen; -#else - strlen(dp->d_name); -#endif - if (thislen >= namelen && - strcmp(dp->d_name, nameWithExt) == 0) { - (void)closedir(dirp); - return 1; /* Found */ - } - } - (void)closedir(dirp); - } - return 0 ; /* Not found */ - -/* OS/2 */ -#elif defined(PYOS_OS2) - HDIR hdir = 1; - ULONG srchcnt = 1; - FILEFINDBUF3 ffbuf; - APIRET rc; - - if (Py_GETENV("PYTHONCASEOK") != NULL) - return 1; - - rc = DosFindFirst(buf, - &hdir, - FILE_READONLY | FILE_HIDDEN | FILE_SYSTEM | FILE_DIRECTORY, - &ffbuf, sizeof(ffbuf), - &srchcnt, - FIL_STANDARD); - if (rc != NO_ERROR) - return 0; - return strncmp(ffbuf.achName, name, namelen) == 0; - -/* assuming it's a case-sensitive filesystem, so there's nothing to do! */ -#else -# error "USE_CASE_OK_BYTES is not correctly defined" -#endif -} -#endif - -/* - * Check if a filename case matchs the name case. We've already done a - * successful stat() or fopen() on buf, so know that there's some match, - * possibly case-insensitive. - * - * case_ok() is to return 1 if there's a case-sensitive match for name, 0 if it - * the filename doesn't match, or -1 on error. case_ok() is also to return 1 - * if envar PYTHONCASEOK exists. - * - * case_ok() is used to implement case-sensitive import semantics even - * on platforms with case-insensitive filesystems. It's trivial to implement - * for case-sensitive filesystems. It's pretty much a cross-platform - * nightmare for systems with case-insensitive filesystems. - */ - -static int -case_ok(PyObject *filename, Py_ssize_t prefix_delta, PyObject *name) -{ - /* assuming it's a case-sensitive filesystem, so there's nothing to do! */ - return 1; - -} - -#ifdef HAVE_STAT - -/* Helper to look for __init__.py or __init__.py[co] in potential package. - Return 1 if __init__ was found, 0 if not, or -1 on error. */ -static int -find_init_module(PyObject *directory) -{ - struct stat statbuf; - PyObject *filename; - int match; - - filename = PyUnicode_FromFormat("%U%c__init__.py", directory, SEP); - if (filename == NULL) - return -1; - if (_Py_stat(filename, &statbuf) == 0) { - /* 3=len(".py") */ - match = case_ok(filename, -3, initstr); - if (match < 0) { - Py_DECREF(filename); - return -1; - } - if (match) { - Py_DECREF(filename); - return 1; - } - } - Py_DECREF(filename); - - filename = PyUnicode_FromFormat("%U%c__init__.py%c", - directory, SEP, Py_OptimizeFlag ? 'o' : 'c'); - if (filename == NULL) - return -1; - if (_Py_stat(filename, &statbuf) == 0) { - /* 4=len(".pyc") */ - match = case_ok(filename, -4, initstr); - if (match < 0) { - Py_DECREF(filename); - return -1; - } - if (match) { - Py_DECREF(filename); - return 1; - } - } - Py_DECREF(filename); - return 0; -} - -#endif /* HAVE_STAT */ - - -static int init_builtin(PyObject *); /* Forward */ - -static PyObject* -load_builtin(PyObject *name, int type) -{ - PyObject *m, *modules; - int err; - - if (type == C_BUILTIN) - err = init_builtin(name); - else - err = PyImport_ImportFrozenModuleObject(name); - if (err < 0) - return NULL; - if (err == 0) { - PyErr_Format(PyExc_ImportError, - "Purported %s module %R not found", - type == C_BUILTIN ? "builtin" : "frozen", - name); - return NULL; - } - - modules = PyImport_GetModuleDict(); - m = PyDict_GetItem(modules, name); - if (m == NULL) { - PyErr_Format( - PyExc_ImportError, - "%s module %R not properly initialized", - type == C_BUILTIN ? "builtin" : "frozen", - name); - return NULL; - } - Py_INCREF(m); - return m; -} - -/* Load an external module using the default search path and return - its module object WITH INCREMENTED REFERENCE COUNT */ - -static PyObject * -load_module(PyObject *name, FILE *fp, PyObject *pathname, int type, PyObject *loader) -{ - PyObject *m; - - /* First check that there's an open file (if we need one) */ - switch (type) { - case PY_SOURCE: - case PY_COMPILED: - if (fp == NULL) { - PyErr_Format(PyExc_ValueError, - "file object required for import (type code %d)", - type); - return NULL; - } - } - - switch (type) { - - case PY_SOURCE: - m = load_source_module(name, pathname, fp); - break; - - case PY_COMPILED: - m = load_compiled_module(name, pathname, fp); - break; - -#ifdef HAVE_DYNAMIC_LOADING - case C_EXTENSION: - m = _PyImport_LoadDynamicModule(name, pathname, fp); - break; -#endif - - case PKG_DIRECTORY: - m = load_package(name, pathname); - break; - - case C_BUILTIN: - case PY_FROZEN: - m = load_builtin(name, type); - break; - - case IMP_HOOK: { - _Py_IDENTIFIER(load_module); - if (loader == NULL) { - PyErr_SetString(PyExc_ImportError, - "import hook without loader"); - return NULL; - } - m = _PyObject_CallMethodId(loader, &PyId_load_module, "O", name); - break; - } - - default: - PyErr_Format(PyExc_ImportError, - "Don't know how to import %R (type code %d)", - name, type); - m = NULL; - - } - - return m; -} - - -/* Initialize a built-in module. - Return 1 for success, 0 if the module is not found, and -1 with - an exception set if the initialization failed. */ - -static int -init_builtin(PyObject *name) -{ - struct _inittab *p; - - if (_PyImport_FindExtensionObject(name, name) != NULL) - return 1; - - for (p = PyImport_Inittab; p->name != NULL; p++) { - PyObject *mod; - if (PyUnicode_CompareWithASCIIString(name, p->name) == 0) { - if (p->initfunc == NULL) { - PyErr_Format(PyExc_ImportError, - "Cannot re-init internal module %R", - name); - return -1; - } - if (Py_VerboseFlag) - PySys_FormatStderr("import %U # builtin\n", name); - mod = (*p->initfunc)(); - if (mod == 0) - return -1; - if (_PyImport_FixupExtensionObject(mod, name, name) < 0) - return -1; - /* FixupExtension has put the module into sys.modules, - so we can release our own reference. */ - Py_DECREF(mod); - return 1; - } - } - return 0; -} - - -/* Frozen modules */ - -static struct _frozen * -find_frozen(PyObject *name) -{ - struct _frozen *p; - - if (name == NULL) - return NULL; - - for (p = PyImport_FrozenModules; ; p++) { - if (p->name == NULL) - return NULL; - if (PyUnicode_CompareWithASCIIString(name, p->name) == 0) - break; - } - return p; -} - -static PyObject * -get_frozen_object(PyObject *name) -{ - struct _frozen *p = find_frozen(name); - int size; - - if (p == NULL) { - PyErr_Format(PyExc_ImportError, - "No such frozen object named %R", - name); - return NULL; - } - if (p->code == NULL) { - PyErr_Format(PyExc_ImportError, - "Excluded frozen object named %R", - name); - return NULL; - } - size = p->size; - if (size < 0) - size = -size; - return PyMarshal_ReadObjectFromString((char *)p->code, size); -} - -static PyObject * -is_frozen_package(PyObject *name) -{ - struct _frozen *p = find_frozen(name); - int size; - - if (p == NULL) { - PyErr_Format(PyExc_ImportError, - "No such frozen object named %R", - name); - return NULL; - } - - size = p->size; - - if (size < 0) - Py_RETURN_TRUE; - else - Py_RETURN_FALSE; -} - - -/* Initialize a frozen module. - Return 1 for success, 0 if the module is not found, and -1 with - an exception set if the initialization failed. - This function is also used from frozenmain.c */ - -int -PyImport_ImportFrozenModuleObject(PyObject *name) -{ - struct _frozen *p; - PyObject *co, *m, *path; - int ispackage; - int size; - - p = find_frozen(name); - - if (p == NULL) - return 0; - if (p->code == NULL) { - PyErr_Format(PyExc_ImportError, - "Excluded frozen object named %R", - name); - return -1; - } - size = p->size; - ispackage = (size < 0); - if (ispackage) - size = -size; - if (Py_VerboseFlag) - PySys_FormatStderr("import %U # frozen%s\n", - name, ispackage ? " package" : ""); - co = PyMarshal_ReadObjectFromString((char *)p->code, size); - if (co == NULL) - return -1; - if (!PyCode_Check(co)) { - PyErr_Format(PyExc_TypeError, - "frozen object %R is not a code object", - name); - goto err_return; - } - if (ispackage) { - /* Set __path__ to the package name */ - PyObject *d, *l; - int err; - m = PyImport_AddModuleObject(name); - if (m == NULL) - goto err_return; - d = PyModule_GetDict(m); - l = PyList_New(1); - if (l == NULL) { - goto err_return; - } - Py_INCREF(name); - PyList_SET_ITEM(l, 0, name); - err = PyDict_SetItemString(d, "__path__", l); - Py_DECREF(l); - if (err != 0) - goto err_return; - } - path = PyUnicode_FromString("<frozen>"); - if (path == NULL) - goto err_return; - m = PyImport_ExecCodeModuleObject(name, co, path, NULL); - Py_DECREF(path); - if (m == NULL) - goto err_return; - Py_DECREF(co); - Py_DECREF(m); - return 1; -err_return: - Py_DECREF(co); - return -1; -} - -int -PyImport_ImportFrozenModule(char *name) -{ - PyObject *nameobj; - int ret; - nameobj = PyUnicode_InternFromString(name); - if (nameobj == NULL) - return -1; - ret = PyImport_ImportFrozenModuleObject(nameobj); - Py_DECREF(nameobj); - return ret; -} - - -/* Import a module, either built-in, frozen, or external, and return - its module object WITH INCREMENTED REFERENCE COUNT */ - -PyObject * -PyImport_ImportModule(const char *name) -{ - PyObject *pname; - PyObject *result; - - pname = PyUnicode_FromString(name); - if (pname == NULL) - return NULL; - result = PyImport_Import(pname); - Py_DECREF(pname); - return result; -} - -/* Import a module without blocking - * - * At first it tries to fetch the module from sys.modules. If the module was - * never loaded before it loads it with PyImport_ImportModule() unless another - * thread holds the import lock. In the latter case the function raises an - * ImportError instead of blocking. - * - * Returns the module object with incremented ref count. - */ -PyObject * -PyImport_ImportModuleNoBlock(const char *name) -{ - PyObject *nameobj, *modules, *result; -#ifdef WITH_THREAD - long me; -#endif - - /* Try to get the module from sys.modules[name] */ - modules = PyImport_GetModuleDict(); - if (modules == NULL) - return NULL; - - nameobj = PyUnicode_FromString(name); - if (nameobj == NULL) - return NULL; - result = PyDict_GetItem(modules, nameobj); - if (result != NULL) { - Py_DECREF(nameobj); - Py_INCREF(result); - return result; - } - PyErr_Clear(); -#ifdef WITH_THREAD - /* check the import lock - * me might be -1 but I ignore the error here, the lock function - * takes care of the problem */ - me = PyThread_get_thread_ident(); - if (import_lock_thread == -1 || import_lock_thread == me) { - /* no thread or me is holding the lock */ - result = PyImport_Import(nameobj); - } - else { - PyErr_Format(PyExc_ImportError, - "Failed to import %R because the import lock" - "is held by another thread.", - nameobj); - result = NULL; - } -#else - result = PyImport_Import(nameobj); -#endif - Py_DECREF(nameobj); - return result; -} - -/* Forward declarations for helper routines */ -static PyObject *get_parent(PyObject *globals, - PyObject **p_name, - int level); -static PyObject *load_next(PyObject *mod, PyObject *altmod, - PyObject *inputname, PyObject **p_outputname, - PyObject **p_prefix); -static int mark_miss(PyObject *name); -static int ensure_fromlist(PyObject *mod, PyObject *fromlist, - PyObject *buf, int recursive); -static PyObject * import_submodule(PyObject *mod, PyObject *name, - PyObject *fullname); - -/* The Magnum Opus of dotted-name import :-) */ - -static PyObject * -import_module_level(PyObject *name, PyObject *globals, PyObject *locals, - PyObject *fromlist, int level) -{ - PyObject *parent, *next, *inputname, *outputname; - PyObject *head = NULL; - PyObject *tail = NULL; - PyObject *prefix = NULL; - PyObject *result = NULL; - Py_ssize_t sep, altsep; - - if (PyUnicode_READY(name)) - return NULL; - - sep = PyUnicode_FindChar(name, SEP, 0, PyUnicode_GET_LENGTH(name), 1); - if (sep == -2) - return NULL; -#ifdef ALTSEP - altsep = PyUnicode_FindChar(name, ALTSEP, 0, PyUnicode_GET_LENGTH(name), 1); - if (altsep == -2) - return NULL; -#else - altsep = -1; -#endif - if (sep != -1 || altsep != -1) - { - PyErr_SetString(PyExc_ImportError, - "Import by filename is not supported."); - return NULL; - } - - parent = get_parent(globals, &prefix, level); - if (parent == NULL) { - return NULL; - } - - if (PyUnicode_READY(prefix)) - return NULL; - - head = load_next(parent, level < 0 ? Py_None : parent, name, &outputname, - &prefix); - if (head == NULL) - goto out; - - tail = head; - Py_INCREF(tail); - - if (outputname != NULL) { - while (1) { - inputname = outputname; - next = load_next(tail, tail, inputname, &outputname, - &prefix); - Py_CLEAR(tail); - Py_CLEAR(inputname); - if (next == NULL) - goto out; - tail = next; - - if (outputname == NULL) { - break; - } - } - } - if (tail == Py_None) { - /* If tail is Py_None, both get_parent and load_next found - an empty module name: someone called __import__("") or - doctored faulty bytecode */ - PyErr_SetString(PyExc_ValueError, "Empty module name"); - goto out; - } - - if (fromlist != NULL) { - if (fromlist == Py_None || !PyObject_IsTrue(fromlist)) - fromlist = NULL; - } - - if (fromlist == NULL) { - result = head; - Py_INCREF(result); - goto out; - } - - if (!ensure_fromlist(tail, fromlist, prefix, 0)) - goto out; - - result = tail; - Py_INCREF(result); - out: - Py_XDECREF(head); - Py_XDECREF(tail); - Py_XDECREF(prefix); - return result; -} - -PyObject * -PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals, - PyObject *locals, PyObject *fromlist, - int level) -{ - PyObject *mod; - _PyImport_AcquireLock(); - mod = import_module_level(name, globals, locals, fromlist, level); - if (_PyImport_ReleaseLock() < 0) { - Py_XDECREF(mod); - PyErr_SetString(PyExc_RuntimeError, - "not holding the import lock"); - return NULL; - } - return mod; -} - -PyObject * -PyImport_ImportModuleLevel(const char *name, PyObject *globals, PyObject *locals, - PyObject *fromlist, int level) -{ - PyObject *nameobj, *mod; - nameobj = PyUnicode_FromString(name); - if (nameobj == NULL) - return NULL; - mod = PyImport_ImportModuleLevelObject(nameobj, globals, locals, - fromlist, level); - Py_DECREF(nameobj); - return mod; -} - - -/* Return the package that an import is being performed in. If globals comes - from the module foo.bar.bat (not itself a package), this returns the - sys.modules entry for foo.bar. If globals is from a package's __init__.py, - the package's entry in sys.modules is returned, as a borrowed reference. - - The name of the returned package is returned in *p_name. - - If globals doesn't come from a package or a module in a package, or a - corresponding entry is not found in sys.modules, Py_None is returned. -*/ -static PyObject * -get_parent(PyObject *globals, PyObject **p_name, int level) -{ - PyObject *nameobj; - - static PyObject *namestr = NULL; - static PyObject *pathstr = NULL; - static PyObject *pkgstr = NULL; - PyObject *pkgname, *modname, *modpath, *modules, *parent; - int orig_level = level; - - if (globals == NULL || !PyDict_Check(globals) || !level) - goto return_none; - - if (namestr == NULL) { - namestr = PyUnicode_InternFromString("__name__"); - if (namestr == NULL) - return NULL; - } - if (pathstr == NULL) { - pathstr = PyUnicode_InternFromString("__path__"); - if (pathstr == NULL) - return NULL; - } - if (pkgstr == NULL) { - pkgstr = PyUnicode_InternFromString("__package__"); - if (pkgstr == NULL) - return NULL; - } - - pkgname = PyDict_GetItem(globals, pkgstr); - - if ((pkgname != NULL) && (pkgname != Py_None)) { - /* __package__ is set, so use it */ - if (!PyUnicode_Check(pkgname)) { - PyErr_SetString(PyExc_ValueError, - "__package__ set to non-string"); - return NULL; - } - if (PyUnicode_GET_LENGTH(pkgname) == 0) { - if (level > 0) { - PyErr_SetString(PyExc_ValueError, - "Attempted relative import in non-package"); - return NULL; - } - goto return_none; - } - Py_INCREF(pkgname); - nameobj = pkgname; - } else { - /* __package__ not set, so figure it out and set it */ - modname = PyDict_GetItem(globals, namestr); - if (modname == NULL || !PyUnicode_Check(modname)) - goto return_none; - - modpath = PyDict_GetItem(globals, pathstr); - if (modpath != NULL) { - /* __path__ is set, so modname is already the package name */ - int error; - - error = PyDict_SetItem(globals, pkgstr, modname); - if (error) { - PyErr_SetString(PyExc_ValueError, - "Could not set __package__"); - return NULL; - } - Py_INCREF(modname); - nameobj = modname; - } else { - /* Normal module, so work out the package name if any */ - Py_ssize_t len; - len = PyUnicode_FindChar(modname, '.', - 0, PyUnicode_GET_LENGTH(modname), -1); - if (len == -2) - return NULL; - if (len < 0) { - if (level > 0) { - PyErr_SetString(PyExc_ValueError, - "Attempted relative import in non-package"); - return NULL; - } - if (PyDict_SetItem(globals, pkgstr, Py_None)) { - PyErr_SetString(PyExc_ValueError, - "Could not set __package__"); - return NULL; - } - goto return_none; - } - pkgname = PyUnicode_Substring(modname, 0, len); - if (pkgname == NULL) - return NULL; - if (PyDict_SetItem(globals, pkgstr, pkgname)) { - Py_DECREF(pkgname); - PyErr_SetString(PyExc_ValueError, - "Could not set __package__"); - return NULL; - } - nameobj = pkgname; - } - } - if (level > 1) { - Py_ssize_t dot, end = PyUnicode_GET_LENGTH(nameobj); - PyObject *newname; - while (--level > 0) { - dot = PyUnicode_FindChar(nameobj, '.', 0, end, -1); - if (dot == -2) { - Py_DECREF(nameobj); - return NULL; - } - if (dot < 0) { - Py_DECREF(nameobj); - PyErr_SetString(PyExc_ValueError, - "Attempted relative import beyond " - "toplevel package"); - return NULL; - } - end = dot; - } - newname = PyUnicode_Substring(nameobj, 0, end); - Py_DECREF(nameobj); - if (newname == NULL) - return NULL; - nameobj = newname; - } - - modules = PyImport_GetModuleDict(); - parent = PyDict_GetItem(modules, nameobj); - if (parent == NULL) { - int err; - - if (orig_level >= 1) { - PyErr_Format(PyExc_SystemError, - "Parent module %R not loaded, " - "cannot perform relative import", nameobj); - Py_DECREF(nameobj); - return NULL; - } - - err = PyErr_WarnFormat( - PyExc_RuntimeWarning, 1, - "Parent module %R not found while handling absolute import", - nameobj); - Py_DECREF(nameobj); - if (err) - return NULL; - - goto return_none; - } - *p_name = nameobj; - return parent; - /* We expect, but can't guarantee, if parent != None, that: - - parent.__name__ == name - - parent.__dict__ is globals - If this is violated... Who cares? */ - -return_none: - nameobj = PyUnicode_New(0, 0); - if (nameobj == NULL) - return NULL; - *p_name = nameobj; - return Py_None; -} - -/* altmod is either None or same as mod */ -static PyObject * -load_next(PyObject *mod, PyObject *altmod, - PyObject *inputname, PyObject **p_outputname, - PyObject **p_prefix) -{ - Py_ssize_t dot; - Py_ssize_t len; - PyObject *fullname, *name = NULL, *result; - - *p_outputname = NULL; - - len = PyUnicode_GET_LENGTH(inputname); - if (len == 0) { - /* completely empty module name should only happen in - 'from . import' (or '__import__("")')*/ - Py_INCREF(mod); - return mod; - } - - - dot = PyUnicode_FindChar(inputname, '.', 0, len, 1); - if (dot >= 0) { - len = dot; - if (len == 0) { - PyErr_SetString(PyExc_ValueError, - "Empty module name"); - goto error; - } - } - - /* name = inputname[:len] */ - name = PyUnicode_Substring(inputname, 0, len); - if (name == NULL) - goto error; - - if (PyUnicode_GET_LENGTH(*p_prefix)) { - /* fullname = prefix + "." + name */ - fullname = PyUnicode_FromFormat("%U.%U", *p_prefix, name); - if (fullname == NULL) - goto error; - } - else { - fullname = name; - Py_INCREF(fullname); - } - - result = import_submodule(mod, name, fullname); - Py_DECREF(*p_prefix); - /* Transfer reference. */ - *p_prefix = fullname; - if (result == Py_None && altmod != mod) { - Py_DECREF(result); - /* Here, altmod must be None and mod must not be None */ - result = import_submodule(altmod, name, name); - if (result != NULL && result != Py_None) { - if (mark_miss(*p_prefix) != 0) { - Py_DECREF(result); - goto error; - } - Py_DECREF(*p_prefix); - *p_prefix = name; - Py_INCREF(*p_prefix); - } - } - if (result == NULL) - goto error; - - if (result == Py_None) { - Py_DECREF(result); - PyErr_Format(PyExc_ImportError, - "No module named %R", inputname); - goto error; - } - - if (dot >= 0) { - *p_outputname = PyUnicode_Substring(inputname, dot+1, - PyUnicode_GET_LENGTH(inputname)); - if (*p_outputname == NULL) { - Py_DECREF(result); - goto error; - } - } - - Py_DECREF(name); - return result; - -error: - Py_XDECREF(name); - return NULL; -} - -static int -mark_miss(PyObject *name) -{ - PyObject *modules = PyImport_GetModuleDict(); - return PyDict_SetItem(modules, name, Py_None); -} - -static int -ensure_fromlist(PyObject *mod, PyObject *fromlist, PyObject *name, - int recursive) -{ - int i; - PyObject *fullname; - Py_ssize_t fromlist_len; - - if (!_PyObject_HasAttrId(mod, &PyId___path__)) - return 1; - - fromlist_len = PySequence_Size(fromlist); - - for (i = 0; i < fromlist_len; i++) { - PyObject *item = PySequence_GetItem(fromlist, i); - int hasit; - if (item == NULL) - return 0; - if (!PyUnicode_Check(item)) { - PyErr_SetString(PyExc_TypeError, - "Item in ``from list'' not a string"); - Py_DECREF(item); - return 0; - } - if (PyUnicode_READ_CHAR(item, 0) == '*') { - PyObject *all; - _Py_IDENTIFIER(__all__); - Py_DECREF(item); - /* See if the package defines __all__ */ - if (recursive) - continue; /* Avoid endless recursion */ - all = _PyObject_GetAttrId(mod, &PyId___all__); - if (all == NULL) - PyErr_Clear(); - else { - int ret = ensure_fromlist(mod, all, name, 1); - Py_DECREF(all); - if (!ret) - return 0; - } - continue; - } - hasit = PyObject_HasAttr(mod, item); - if (!hasit) { - PyObject *submod; - fullname = PyUnicode_FromFormat("%U.%U", name, item); - if (fullname != NULL) { - submod = import_submodule(mod, item, fullname); - Py_DECREF(fullname); - } - else - submod = NULL; - Py_XDECREF(submod); - if (submod == NULL) { - Py_DECREF(item); - return 0; - } - } - Py_DECREF(item); - } - - return 1; -} - -static int -add_submodule(PyObject *mod, PyObject *submod, PyObject *fullname, - PyObject *subname, PyObject *modules) -{ - if (mod == Py_None) - return 1; - /* Irrespective of the success of this load, make a - reference to it in the parent package module. A copy gets - saved in the modules dictionary under the full name, so get a - reference from there, if need be. (The exception is when the - load failed with a SyntaxError -- then there's no trace in - sys.modules. In that case, of course, do nothing extra.) */ - if (submod == NULL) { - submod = PyDict_GetItem(modules, fullname); - if (submod == NULL) - return 1; - } - if (PyModule_Check(mod)) { - /* We can't use setattr here since it can give a - * spurious warning if the submodule name shadows a - * builtin name */ - PyObject *dict = PyModule_GetDict(mod); - if (!dict) - return 0; - if (PyDict_SetItem(dict, subname, submod) < 0) - return 0; - } - else { - if (PyObject_SetAttr(mod, subname, submod) < 0) - return 0; - } - return 1; -} - -static PyObject * -import_submodule(PyObject *mod, PyObject *subname, PyObject *fullname) -{ - PyObject *modules = PyImport_GetModuleDict(); - PyObject *m = NULL, *bufobj, *path_list, *loader; - struct filedescr *fdp; - FILE *fp; - - /* Require: - if mod == None: subname == fullname - else: mod.__name__ + "." + subname == fullname - */ - - if ((m = PyDict_GetItem(modules, fullname)) != NULL) { - Py_INCREF(m); - return m; - } - - if (mod == Py_None) - path_list = NULL; - else { - path_list = _PyObject_GetAttrId(mod, &PyId___path__); - if (path_list == NULL) { - PyErr_Clear(); - Py_INCREF(Py_None); - return Py_None; - } - } - - fdp = find_module(fullname, subname, path_list, - &bufobj, &fp, &loader); - Py_XDECREF(path_list); - if (fdp == NULL) { - if (!PyErr_ExceptionMatches(PyExc_ImportError)) - return NULL; - PyErr_Clear(); - Py_INCREF(Py_None); - return Py_None; - } - m = load_module(fullname, fp, bufobj, fdp->type, loader); - Py_XDECREF(bufobj); - Py_XDECREF(loader); - if (fp) - fclose(fp); - if (m == NULL) - return NULL; - if (!add_submodule(mod, m, fullname, subname, modules)) { - Py_XDECREF(m); - return NULL; - } - return m; -} - - -/* Re-import a module of any kind and return its module object, WITH - INCREMENTED REFERENCE COUNT */ - -PyObject * -PyImport_ReloadModule(PyObject *m) -{ - PyInterpreterState *interp = PyThreadState_Get()->interp; - PyObject *modules_reloading = interp->modules_reloading; - PyObject *modules = PyImport_GetModuleDict(); - PyObject *path_list = NULL, *loader = NULL, *existing_m = NULL; - PyObject *name, *bufobj, *subname; - Py_ssize_t subname_start; - struct filedescr *fdp; - FILE *fp = NULL; - PyObject *newm = NULL; - - if (modules_reloading == NULL) { - Py_FatalError("PyImport_ReloadModule: " - "no modules_reloading dictionary!"); - return NULL; - } - - if (m == NULL || !PyModule_Check(m)) { - PyErr_SetString(PyExc_TypeError, - "reload() argument must be module"); - return NULL; - } - name = PyModule_GetNameObject(m); - if (name == NULL || PyUnicode_READY(name) == -1) - return NULL; - if (m != PyDict_GetItem(modules, name)) { - PyErr_Format(PyExc_ImportError, - "reload(): module %R not in sys.modules", - name); - Py_DECREF(name); - return NULL; - } - existing_m = PyDict_GetItem(modules_reloading, name); - if (existing_m != NULL) { - /* Due to a recursive reload, this module is already - being reloaded. */ - Py_DECREF(name); - Py_INCREF(existing_m); - return existing_m; - } - if (PyDict_SetItem(modules_reloading, name, m) < 0) { - Py_DECREF(name); - return NULL; - } - - subname_start = PyUnicode_FindChar(name, '.', 0, - PyUnicode_GET_LENGTH(name), -1); - if (subname_start == -1) { - Py_INCREF(name); - subname = name; - } - else { - PyObject *parentname, *parent; - Py_ssize_t len; - parentname = PyUnicode_Substring(name, 0, subname_start); - if (parentname == NULL) { - goto error; - } - parent = PyDict_GetItem(modules, parentname); - if (parent == NULL) { - PyErr_Format(PyExc_ImportError, - "reload(): parent %R not in sys.modules", - parentname); - Py_DECREF(parentname); - goto error; - } - Py_DECREF(parentname); - path_list = _PyObject_GetAttrId(parent, &PyId___path__); - if (path_list == NULL) - PyErr_Clear(); - subname_start++; - len = PyUnicode_GET_LENGTH(name) - (subname_start + 1); - subname = PyUnicode_Substring(name, subname_start, len); - } - if (subname == NULL) - goto error; - fdp = find_module(name, subname, path_list, - &bufobj, &fp, &loader); - Py_DECREF(subname); - Py_XDECREF(path_list); - - if (fdp == NULL) { - Py_XDECREF(loader); - goto error; - } - - newm = load_module(name, fp, bufobj, fdp->type, loader); - Py_XDECREF(bufobj); - Py_XDECREF(loader); - - if (fp) - fclose(fp); - if (newm == NULL) { - /* load_module probably removed name from modules because of - * the error. Put back the original module object. We're - * going to return NULL in this case regardless of whether - * replacing name succeeds, so the return value is ignored. - */ - PyDict_SetItem(modules, name, m); - } - -error: - imp_modules_reloading_clear(); - Py_DECREF(name); - return newm; -} - - -/* Higher-level import emulator which emulates the "import" statement - more accurately -- it invokes the __import__() function from the - builtins of the current globals. This means that the import is - done using whatever import hooks are installed in the current - environment. - A dummy list ["__doc__"] is passed as the 4th argument so that - e.g. PyImport_Import(PyUnicode_FromString("win32com.client.gencache")) - will return <module "gencache"> instead of <module "win32com">. */ - -PyObject * -PyImport_Import(PyObject *module_name) -{ - static PyObject *silly_list = NULL; - static PyObject *builtins_str = NULL; - static PyObject *import_str = NULL; - PyObject *globals = NULL; - PyObject *import = NULL; - PyObject *builtins = NULL; - PyObject *modules = NULL; - PyObject *r = NULL; - - /* Initialize constant string objects */ - if (silly_list == NULL) { - import_str = PyUnicode_InternFromString("__import__"); - if (import_str == NULL) - return NULL; - builtins_str = PyUnicode_InternFromString("__builtins__"); - if (builtins_str == NULL) - return NULL; - silly_list = PyList_New(0); - if (silly_list == NULL) - return NULL; - } - - /* Get the builtins from current globals */ - globals = PyEval_GetGlobals(); - if (globals != NULL) { - Py_INCREF(globals); - builtins = PyObject_GetItem(globals, builtins_str); - if (builtins == NULL) - goto err; - } - else { - /* No globals -- use standard builtins, and fake globals */ - builtins = PyImport_ImportModuleLevel("builtins", - NULL, NULL, NULL, 0); - if (builtins == NULL) - return NULL; - globals = Py_BuildValue("{OO}", builtins_str, builtins); - if (globals == NULL) - goto err; - } - - /* Get the __import__ function from the builtins */ - if (PyDict_Check(builtins)) { - import = PyObject_GetItem(builtins, import_str); - if (import == NULL) - PyErr_SetObject(PyExc_KeyError, import_str); - } - else - import = PyObject_GetAttr(builtins, import_str); - if (import == NULL) - goto err; - - /* Call the __import__ function with the proper argument list - Always use absolute import here. - Calling for side-effect of import. */ - r = PyObject_CallFunction(import, "OOOOi", module_name, globals, - globals, silly_list, 0, NULL); - if (r == NULL) - goto err; - Py_DECREF(r); - - modules = PyImport_GetModuleDict(); - r = PyDict_GetItem(modules, module_name); - if (r != NULL) - Py_INCREF(r); - - err: - Py_XDECREF(globals); - Py_XDECREF(builtins); - Py_XDECREF(import); - - return r; -} - - -/* Module 'imp' provides Python access to the primitives used for - importing modules. -*/ - -static PyObject * -imp_make_magic(long magic) -{ - char buf[4]; - - buf[0] = (char) ((magic >> 0) & 0xff); - buf[1] = (char) ((magic >> 8) & 0xff); - buf[2] = (char) ((magic >> 16) & 0xff); - buf[3] = (char) ((magic >> 24) & 0xff); - - return PyBytes_FromStringAndSize(buf, 4); -} - -static PyObject * -imp_get_magic(PyObject *self, PyObject *noargs) -{ - return imp_make_magic(pyc_magic); -} - -static PyObject * -imp_get_tag(PyObject *self, PyObject *noargs) -{ - return PyUnicode_FromString(pyc_tag); -} - -static PyObject * -imp_get_suffixes(PyObject *self, PyObject *noargs) -{ - PyObject *list; - struct filedescr *fdp; - - list = PyList_New(0); - if (list == NULL) - return NULL; - for (fdp = _PyImport_Filetab; fdp->suffix != NULL; fdp++) { - PyObject *item = Py_BuildValue("ssi", - fdp->suffix, fdp->mode, fdp->type); - if (item == NULL) { - Py_DECREF(list); - return NULL; - } - if (PyList_Append(list, item) < 0) { - Py_DECREF(list); - Py_DECREF(item); - return NULL; - } - Py_DECREF(item); - } - return list; -} - -static PyObject * -call_find_module(PyObject *name, PyObject *path_list) -{ - extern int fclose(FILE *); - PyObject *fob, *ret; - PyObject *pathobj; - struct filedescr *fdp; - FILE *fp; - int fd = -1; - char *found_encoding = NULL; - char *encoding = NULL; - - if (path_list == Py_None) - path_list = NULL; - fdp = find_module(NULL, name, path_list, - &pathobj, &fp, NULL); - if (fdp == NULL) - return NULL; - if (fp != NULL) { - fd = fileno(fp); - if (fd != -1) - fd = dup(fd); - fclose(fp); - if (fd == -1) { - PyErr_SetFromErrno(PyExc_OSError); - return NULL; - } - fp = NULL; - } - if (fd != -1) { - if (strchr(fdp->mode, 'b') == NULL) { - /* PyTokenizer_FindEncodingFilename() returns PyMem_MALLOC'ed - memory. */ - found_encoding = PyTokenizer_FindEncodingFilename(fd, pathobj); - lseek(fd, 0, 0); /* Reset position */ - if (found_encoding == NULL && PyErr_Occurred()) { - Py_XDECREF(pathobj); - return NULL; - } - encoding = (found_encoding != NULL) ? found_encoding : - (char*)PyUnicode_GetDefaultEncoding(); - } - fob = PyFile_FromFd(fd, NULL, fdp->mode, -1, - (char*)encoding, NULL, NULL, 1); - if (fob == NULL) { - Py_XDECREF(pathobj); - close(fd); - PyMem_FREE(found_encoding); - return NULL; - } - } - else { - fob = Py_None; - Py_INCREF(fob); - } - if (pathobj == NULL) { - Py_INCREF(Py_None); - pathobj = Py_None; - } - ret = Py_BuildValue("NN(ssi)", - fob, pathobj, fdp->suffix, fdp->mode, fdp->type); - PyMem_FREE(found_encoding); - - return ret; -} - -static PyObject * -imp_find_module(PyObject *self, PyObject *args) -{ - PyObject *name, *path_list = NULL; - if (!PyArg_ParseTuple(args, "U|O:find_module", - &name, &path_list)) - return NULL; - return call_find_module(name, path_list); -} - -static PyObject * -imp_init_builtin(PyObject *self, PyObject *args) -{ - PyObject *name; - int ret; - PyObject *m; - if (!PyArg_ParseTuple(args, "U:init_builtin", &name)) - return NULL; - ret = init_builtin(name); - if (ret < 0) - return NULL; - if (ret == 0) { - Py_INCREF(Py_None); - return Py_None; - } - m = PyImport_AddModuleObject(name); - Py_XINCREF(m); - return m; -} - -static PyObject * -imp_init_frozen(PyObject *self, PyObject *args) -{ - PyObject *name; - int ret; - PyObject *m; - if (!PyArg_ParseTuple(args, "U:init_frozen", &name)) - return NULL; - ret = PyImport_ImportFrozenModuleObject(name); - if (ret < 0) - return NULL; - if (ret == 0) { - Py_INCREF(Py_None); - return Py_None; - } - m = PyImport_AddModuleObject(name); - Py_XINCREF(m); - return m; -} - -static PyObject * -imp_get_frozen_object(PyObject *self, PyObject *args) -{ - PyObject *name; - - if (!PyArg_ParseTuple(args, "U:get_frozen_object", &name)) - return NULL; - return get_frozen_object(name); -} - -static PyObject * -imp_is_frozen_package(PyObject *self, PyObject *args) -{ - PyObject *name; - - if (!PyArg_ParseTuple(args, "U:is_frozen_package", &name)) - return NULL; - return is_frozen_package(name); -} - -static PyObject * -imp_is_builtin(PyObject *self, PyObject *args) -{ - PyObject *name; - if (!PyArg_ParseTuple(args, "U:is_builtin", &name)) - return NULL; - return PyLong_FromLong(is_builtin(name)); -} - -static PyObject * -imp_is_frozen(PyObject *self, PyObject *args) -{ - PyObject *name; - struct _frozen *p; - if (!PyArg_ParseTuple(args, "U:is_frozen", &name)) - return NULL; - p = find_frozen(name); - return PyBool_FromLong((long) (p == NULL ? 0 : p->size)); -} - -static FILE * -get_file(PyObject *pathname, PyObject *fob, char *mode) -{ - FILE *fp; - if (mode[0] == 'U') - mode = "r" PY_STDIOTEXTMODE; - if (fob == NULL) { - fp = _Py_fopen(pathname, mode); - } - else { - int fd = PyObject_AsFileDescriptor(fob); - if (fd == -1) - return NULL; - if (!_PyVerify_fd(fd)) - goto error; - /* the FILE struct gets a new fd, so that it can be closed - * independently of the file descriptor given - */ - fd = dup(fd); - if (fd == -1) - goto error; - fp = fdopen(fd, mode); - } - if (fp) - return fp; -error: - PyErr_SetFromErrno(PyExc_IOError); - return NULL; -} - -static PyObject * -imp_load_compiled(PyObject *self, PyObject *args) -{ - PyObject *name, *pathname; - PyObject *fob = NULL; - PyObject *m; - FILE *fp; - if (!PyArg_ParseTuple(args, "UO&|O:load_compiled", - &name, - PyUnicode_FSDecoder, &pathname, - &fob)) - return NULL; - fp = get_file(pathname, fob, "rb"); - if (fp == NULL) { - Py_DECREF(pathname); - return NULL; - } - m = load_compiled_module(name, pathname, fp); - fclose(fp); - Py_DECREF(pathname); - return m; -} - -#ifdef HAVE_DYNAMIC_LOADING - -static PyObject * -imp_load_dynamic(PyObject *self, PyObject *args) -{ - PyObject *name, *pathname, *fob = NULL, *mod; - FILE *fp; - - if (!PyArg_ParseTuple(args, "UO&|O:load_dynamic", - &name, PyUnicode_FSDecoder, &pathname, &fob)) - return NULL; - if (fob != NULL) { - fp = get_file(NULL, fob, "r"); - if (fp == NULL) { - Py_DECREF(pathname); - return NULL; - } - } - else - fp = NULL; - mod = _PyImport_LoadDynamicModule(name, pathname, fp); - Py_DECREF(pathname); - if (fp) - fclose(fp); - return mod; -} - -#endif /* HAVE_DYNAMIC_LOADING */ - -static PyObject * -imp_load_source(PyObject *self, PyObject *args) -{ - PyObject *name, *pathname; - PyObject *fob = NULL; - PyObject *m; - FILE *fp; - if (!PyArg_ParseTuple(args, "UO&|O:load_source", - &name, - PyUnicode_FSDecoder, &pathname, - &fob)) - return NULL; - fp = get_file(pathname, fob, "r"); - if (fp == NULL) { - Py_DECREF(pathname); - return NULL; - } - m = load_source_module(name, pathname, fp); - Py_DECREF(pathname); - fclose(fp); - return m; -} - -static PyObject * -imp_load_module(PyObject *self, PyObject *args) -{ - PyObject *name, *fob, *pathname, *pathname_obj, *ret; - char *suffix; /* Unused */ - char *mode; - int type; - FILE *fp; - - if (!PyArg_ParseTuple(args, "UOO(ssi):load_module", - &name, &fob, &pathname_obj, &suffix, &mode, &type)) - return NULL; - if (pathname_obj != Py_None) { - if (!PyUnicode_FSDecoder(pathname_obj, &pathname)) - return NULL; - } - else - pathname = NULL; - - if (*mode) { - /* Mode must start with 'r' or 'U' and must not contain '+'. - Implicit in this test is the assumption that the mode - may contain other modifiers like 'b' or 't'. */ - - if (!(*mode == 'r' || *mode == 'U') || strchr(mode, '+')) { - PyErr_Format(PyExc_ValueError, - "invalid file open mode %.200s", mode); - Py_XDECREF(pathname); - return NULL; - } - } - if (fob == Py_None) - fp = NULL; - else { - fp = get_file(NULL, fob, mode); - if (fp == NULL) { - Py_XDECREF(pathname); - return NULL; - } - } - ret = load_module(name, fp, pathname, type, NULL); - Py_XDECREF(pathname); - if (fp) - fclose(fp); - return ret; -} - -static PyObject * -imp_load_package(PyObject *self, PyObject *args) -{ - PyObject *name, *pathname; - PyObject * ret; - if (!PyArg_ParseTuple(args, "UO&:load_package", - &name, PyUnicode_FSDecoder, &pathname)) - return NULL; - ret = load_package(name, pathname); - Py_DECREF(pathname); - return ret; -} - -static PyObject * -imp_new_module(PyObject *self, PyObject *args) -{ - PyObject *name; - if (!PyArg_ParseTuple(args, "U:new_module", &name)) - return NULL; - return PyModule_NewObject(name); -} - -static PyObject * -imp_reload(PyObject *self, PyObject *v) -{ - return PyImport_ReloadModule(v); -} - -PyDoc_STRVAR(doc_reload, -"reload(module) -> module\n\ -\n\ -Reload the module. The module must have been successfully imported before."); - -static PyObject * -imp_cache_from_source(PyObject *self, PyObject *args, PyObject *kws) -{ - static char *kwlist[] = {"path", "debug_override", NULL}; - - PyObject *pathname, *cpathname; - PyObject *debug_override = NULL; - int debug = !Py_OptimizeFlag; - - if (!PyArg_ParseTupleAndKeywords( - args, kws, "O&|O", kwlist, - PyUnicode_FSDecoder, &pathname, &debug_override)) - return NULL; - - if (debug_override != NULL && - (debug = PyObject_IsTrue(debug_override)) < 0) { - Py_DECREF(pathname); - return NULL; - } - - if (PyUnicode_READY(pathname) < 0) - return NULL; - - cpathname = make_compiled_pathname(pathname, debug); - Py_DECREF(pathname); - - if (cpathname == NULL) { - PyErr_Format(PyExc_SystemError, "path buffer too short"); - return NULL; - } - return cpathname; -} - -PyDoc_STRVAR(doc_cache_from_source, -"cache_from_source(path, [debug_override]) -> path\n\ -Given the path to a .py file, return the path to its .pyc/.pyo file.\n\ -\n\ -The .py file does not need to exist; this simply returns the path to the\n\ -.pyc/.pyo file calculated as if the .py file were imported. The extension\n\ -will be .pyc unless __debug__ is not defined, then it will be .pyo.\n\ -\n\ -If debug_override is not None, then it must be a boolean and is taken as\n\ -the value of __debug__ instead."); - -static PyObject * -imp_source_from_cache(PyObject *self, PyObject *args, PyObject *kws) -{ - static char *kwlist[] = {"path", NULL}; - PyObject *pathname, *source; - - if (!PyArg_ParseTupleAndKeywords( - args, kws, "O&", kwlist, - PyUnicode_FSDecoder, &pathname)) - return NULL; - - source = make_source_pathname(pathname); - if (source == NULL) { - PyErr_Format(PyExc_ValueError, "Not a PEP 3147 pyc path: %R", - pathname); - Py_DECREF(pathname); - return NULL; - } - Py_DECREF(pathname); - return source; -} - -PyDoc_STRVAR(doc_source_from_cache, -"source_from_cache(path) -> path\n\ -Given the path to a .pyc./.pyo file, return the path to its .py file.\n\ -\n\ -The .pyc/.pyo file does not need to exist; this simply returns the path to\n\ -the .py file calculated to correspond to the .pyc/.pyo file. If path\n\ -does not conform to PEP 3147 format, ValueError will be raised."); - -/* Doc strings */ - -PyDoc_STRVAR(doc_imp, -"This module provides the components needed to build your own\n\ -__import__ function. Undocumented functions are obsolete."); - -PyDoc_STRVAR(doc_find_module, -"find_module(name, [path]) -> (file, filename, (suffix, mode, type))\n\ -Search for a module. If path is omitted or None, search for a\n\ -built-in, frozen or special module and continue search in sys.path.\n\ -The module name cannot contain '.'; to search for a submodule of a\n\ -package, pass the submodule name and the package's __path__."); - -PyDoc_STRVAR(doc_load_module, -"load_module(name, file, filename, (suffix, mode, type)) -> module\n\ -Load a module, given information returned by find_module().\n\ -The module name must include the full package name, if any."); - -PyDoc_STRVAR(doc_get_magic, -"get_magic() -> string\n\ -Return the magic number for .pyc or .pyo files."); - -PyDoc_STRVAR(doc_get_tag, -"get_tag() -> string\n\ -Return the magic tag for .pyc or .pyo files."); - -PyDoc_STRVAR(doc_get_suffixes, -"get_suffixes() -> [(suffix, mode, type), ...]\n\ -Return a list of (suffix, mode, type) tuples describing the files\n\ -that find_module() looks for."); - -PyDoc_STRVAR(doc_new_module, -"new_module(name) -> module\n\ -Create a new module. Do not enter it in sys.modules.\n\ -The module name must include the full package name, if any."); - -PyDoc_STRVAR(doc_lock_held, -"lock_held() -> boolean\n\ -Return True if the import lock is currently held, else False.\n\ -On platforms without threads, return False."); - -PyDoc_STRVAR(doc_acquire_lock, -"acquire_lock() -> None\n\ -Acquires the interpreter's import lock for the current thread.\n\ -This lock should be used by import hooks to ensure thread-safety\n\ -when importing modules.\n\ -On platforms without threads, this function does nothing."); - -PyDoc_STRVAR(doc_release_lock, -"release_lock() -> None\n\ -Release the interpreter's import lock.\n\ -On platforms without threads, this function does nothing."); - -static PyMethodDef imp_methods[] = { - {"find_module", imp_find_module, METH_VARARGS, doc_find_module}, - {"get_magic", imp_get_magic, METH_NOARGS, doc_get_magic}, - {"get_tag", imp_get_tag, METH_NOARGS, doc_get_tag}, - {"get_suffixes", imp_get_suffixes, METH_NOARGS, doc_get_suffixes}, - {"load_module", imp_load_module, METH_VARARGS, doc_load_module}, - {"new_module", imp_new_module, METH_VARARGS, doc_new_module}, - {"lock_held", imp_lock_held, METH_NOARGS, doc_lock_held}, - {"acquire_lock", imp_acquire_lock, METH_NOARGS, doc_acquire_lock}, - {"release_lock", imp_release_lock, METH_NOARGS, doc_release_lock}, - {"reload", imp_reload, METH_O, doc_reload}, - {"cache_from_source", (PyCFunction)imp_cache_from_source, - METH_VARARGS | METH_KEYWORDS, doc_cache_from_source}, - {"source_from_cache", (PyCFunction)imp_source_from_cache, - METH_VARARGS | METH_KEYWORDS, doc_source_from_cache}, - /* The rest are obsolete */ - {"get_frozen_object", imp_get_frozen_object, METH_VARARGS}, - {"is_frozen_package", imp_is_frozen_package, METH_VARARGS}, - {"init_builtin", imp_init_builtin, METH_VARARGS}, - {"init_frozen", imp_init_frozen, METH_VARARGS}, - {"is_builtin", imp_is_builtin, METH_VARARGS}, - {"is_frozen", imp_is_frozen, METH_VARARGS}, - {"load_compiled", imp_load_compiled, METH_VARARGS}, -#ifdef HAVE_DYNAMIC_LOADING - {"load_dynamic", imp_load_dynamic, METH_VARARGS}, -#endif - {"load_package", imp_load_package, METH_VARARGS}, - {"load_source", imp_load_source, METH_VARARGS}, - {"_fix_co_filename", imp_fix_co_filename, METH_VARARGS}, - {NULL, NULL} /* sentinel */ -}; - -static int -setint(PyObject *d, char *name, int value) -{ - PyObject *v; - int err; - - v = PyLong_FromLong((long)value); - err = PyDict_SetItemString(d, name, v); - Py_XDECREF(v); - return err; -} - -typedef struct { - PyObject_HEAD -} NullImporter; - -static int -NullImporter_init(NullImporter *self, PyObject *args, PyObject *kwds) -{ -#ifndef MS_WINDOWS - PyObject *path; - struct stat statbuf; - int rv; - - if (!_PyArg_NoKeywords("NullImporter()", kwds)) - return -1; - - if (!PyArg_ParseTuple(args, "O&:NullImporter", - PyUnicode_FSConverter, &path)) - return -1; - - if (PyBytes_GET_SIZE(path) == 0) { - Py_DECREF(path); - PyErr_SetString(PyExc_ImportError, "empty pathname"); - return -1; - } - - rv = stat(PyBytes_AS_STRING(path), &statbuf); - Py_DECREF(path); - if (rv == 0) { - /* it exists */ - if (S_ISDIR(statbuf.st_mode)) { - /* it's a directory */ - PyErr_SetString(PyExc_ImportError, "existing directory"); - return -1; - } - } -#else /* MS_WINDOWS */ - PyObject *pathobj; - DWORD rv; - wchar_t *path; - - if (!_PyArg_NoKeywords("NullImporter()", kwds)) - return -1; - - if (!PyArg_ParseTuple(args, "U:NullImporter", - &pathobj)) - return -1; - - if (PyUnicode_GET_LENGTH(pathobj) == 0) { - PyErr_SetString(PyExc_ImportError, "empty pathname"); - return -1; - } - - path = PyUnicode_AsWideCharString(pathobj, NULL); - if (path == NULL) - return -1; - /* see issue1293 and issue3677: - * stat() on Windows doesn't recognise paths like - * "e:\\shared\\" and "\\\\whiterab-c2znlh\\shared" as dirs. - */ - rv = GetFileAttributesW(path); - PyMem_Free(path); - if (rv != INVALID_FILE_ATTRIBUTES) { - /* it exists */ - if (rv & FILE_ATTRIBUTE_DIRECTORY) { - /* it's a directory */ - PyErr_SetString(PyExc_ImportError, "existing directory"); - return -1; - } - } -#endif - return 0; -} - -static PyObject * -NullImporter_find_module(NullImporter *self, PyObject *args) -{ - Py_RETURN_NONE; -} - -static PyMethodDef NullImporter_methods[] = { - {"find_module", (PyCFunction)NullImporter_find_module, METH_VARARGS, - "Always return None" - }, - {NULL} /* Sentinel */ -}; - - -PyTypeObject PyNullImporter_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "imp.NullImporter", /*tp_name*/ - sizeof(NullImporter), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - 0, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_reserved*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT, /*tp_flags*/ - "Null importer object", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - NullImporter_methods, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)NullImporter_init, /* tp_init */ - 0, /* tp_alloc */ - PyType_GenericNew /* tp_new */ -}; - -static struct PyModuleDef impmodule = { - PyModuleDef_HEAD_INIT, - "imp", - doc_imp, - 0, - imp_methods, - NULL, - NULL, - NULL, - NULL -}; - -PyMODINIT_FUNC -PyInit_imp(void) -{ - PyObject *m, *d; - - if (PyType_Ready(&PyNullImporter_Type) < 0) - return NULL; - - m = PyModule_Create(&impmodule); - if (m == NULL) - goto failure; - d = PyModule_GetDict(m); - if (d == NULL) - goto failure; - - if (setint(d, "SEARCH_ERROR", SEARCH_ERROR) < 0) goto failure; - if (setint(d, "PY_SOURCE", PY_SOURCE) < 0) goto failure; - if (setint(d, "PY_COMPILED", PY_COMPILED) < 0) goto failure; - if (setint(d, "C_EXTENSION", C_EXTENSION) < 0) goto failure; - if (setint(d, "PY_RESOURCE", PY_RESOURCE) < 0) goto failure; - if (setint(d, "PKG_DIRECTORY", PKG_DIRECTORY) < 0) goto failure; - if (setint(d, "C_BUILTIN", C_BUILTIN) < 0) goto failure; - if (setint(d, "PY_FROZEN", PY_FROZEN) < 0) goto failure; - if (setint(d, "PY_CODERESOURCE", PY_CODERESOURCE) < 0) goto failure; - if (setint(d, "IMP_HOOK", IMP_HOOK) < 0) goto failure; - - Py_INCREF(&PyNullImporter_Type); - PyModule_AddObject(m, "NullImporter", (PyObject *)&PyNullImporter_Type); - return m; - failure: - Py_XDECREF(m); - return NULL; -} - - -/* API for embedding applications that want to add their own entries - to the table of built-in modules. This should normally be called - *before* Py_Initialize(). When the table resize fails, -1 is - returned and the existing table is unchanged. - - After a similar function by Just van Rossum. */ - -int -PyImport_ExtendInittab(struct _inittab *newtab) -{ - static struct _inittab *our_copy = NULL; - struct _inittab *p; - int i, n; - - /* Count the number of entries in both tables */ - for (n = 0; newtab[n].name != NULL; n++) - ; - if (n == 0) - return 0; /* Nothing to do */ - for (i = 0; PyImport_Inittab[i].name != NULL; i++) - ; - - /* Allocate new memory for the combined table */ - p = our_copy; - PyMem_RESIZE(p, struct _inittab, i+n+1); - if (p == NULL) - return -1; - - /* Copy the tables into the new memory */ - if (our_copy != PyImport_Inittab) - memcpy(p, PyImport_Inittab, (i+1) * sizeof(struct _inittab)); - PyImport_Inittab = our_copy = p; - memcpy(p+i, newtab, (n+1) * sizeof(struct _inittab)); - - return 0; -} - -/* Shorthand to add a single entry given a name and a function */ - -int -PyImport_AppendInittab(const char *name, PyObject* (*initfunc)(void)) -{ - struct _inittab newtab[2]; - - memset(newtab, '\0', sizeof newtab); - - newtab[0].name = (char *)name; - newtab[0].initfunc = initfunc; - - return PyImport_ExtendInittab(newtab); -} -
--- a/cos/python/Python/pystate.c Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,729 +0,0 @@ - -/* Thread and interpreter state structures and their interfaces */ - -#include "Python.h" - -/* -------------------------------------------------------------------------- -CAUTION - -Always use malloc() and free() directly in this file. A number of these -functions are advertised as safe to call when the GIL isn't held, and in -a debug build Python redirects (e.g.) PyMem_NEW (etc) to Python's debugging -obmalloc functions. Those aren't thread-safe (they rely on the GIL to avoid -the expense of doing their own locking). --------------------------------------------------------------------------- */ - -#ifdef HAVE_DLOPEN -#ifdef HAVE_DLFCN_H -#include <dlfcn.h> -#endif -#ifndef RTLD_LAZY -#define RTLD_LAZY 1 -#endif -#endif - - -#ifdef WITH_THREAD -#include "pythread.h" -static PyThread_type_lock head_mutex = NULL; /* Protects interp->tstate_head */ -#define HEAD_INIT() (void)(head_mutex || (head_mutex = PyThread_allocate_lock())) -#define HEAD_LOCK() PyThread_acquire_lock(head_mutex, WAIT_LOCK) -#define HEAD_UNLOCK() PyThread_release_lock(head_mutex) - - -/* The single PyInterpreterState used by this process' - GILState implementation -*/ -static PyInterpreterState *autoInterpreterState = NULL; -static int autoTLSkey = 0; -#else -#define HEAD_INIT() /* Nothing */ -#define HEAD_LOCK() /* Nothing */ -#define HEAD_UNLOCK() /* Nothing */ -#endif - -static PyInterpreterState *interp_head = NULL; - -/* Assuming the current thread holds the GIL, this is the - PyThreadState for the current thread. */ -_Py_atomic_address _PyThreadState_Current = {NULL}; -PyThreadFrameGetter _PyThreadState_GetFrame = NULL; - -#ifdef WITH_THREAD -static void _PyGILState_NoteThreadState(PyThreadState* tstate); -#endif - - -PyInterpreterState * -PyInterpreterState_New(void) -{ - PyInterpreterState *interp = (PyInterpreterState *) - malloc(sizeof(PyInterpreterState)); - - if (interp != NULL) { - HEAD_INIT(); -#ifdef WITH_THREAD - if (head_mutex == NULL) - Py_FatalError("Can't initialize threads for interpreter"); -#endif - interp->modules = NULL; - interp->modules_reloading = NULL; - interp->modules_by_index = NULL; - interp->sysdict = NULL; - interp->builtins = NULL; - interp->tstate_head = NULL; - interp->codec_search_path = NULL; - interp->codec_search_cache = NULL; - interp->codec_error_registry = NULL; - interp->codecs_initialized = 0; - interp->fscodec_initialized = 0; -#ifdef HAVE_DLOPEN -#ifdef RTLD_NOW - interp->dlopenflags = RTLD_NOW; -#else - interp->dlopenflags = RTLD_LAZY; -#endif -#endif -#ifdef WITH_TSC - interp->tscdump = 0; -#endif - - HEAD_LOCK(); - interp->next = interp_head; - interp_head = interp; - HEAD_UNLOCK(); - } - - return interp; -} - - -void -PyInterpreterState_Clear(PyInterpreterState *interp) -{ - PyThreadState *p; - HEAD_LOCK(); - for (p = interp->tstate_head; p != NULL; p = p->next) - PyThreadState_Clear(p); - HEAD_UNLOCK(); - Py_CLEAR(interp->codec_search_path); - Py_CLEAR(interp->codec_search_cache); - Py_CLEAR(interp->codec_error_registry); - Py_CLEAR(interp->modules); - Py_CLEAR(interp->modules_by_index); - Py_CLEAR(interp->modules_reloading); - Py_CLEAR(interp->sysdict); - Py_CLEAR(interp->builtins); -} - - -static void -zapthreads(PyInterpreterState *interp) -{ - PyThreadState *p; - /* No need to lock the mutex here because this should only happen - when the threads are all really dead (XXX famous last words). */ - while ((p = interp->tstate_head) != NULL) { - PyThreadState_Delete(p); - } -} - - -void -PyInterpreterState_Delete(PyInterpreterState *interp) -{ - PyInterpreterState **p; - zapthreads(interp); - HEAD_LOCK(); - for (p = &interp_head; ; p = &(*p)->next) { - if (*p == NULL) - Py_FatalError( - "PyInterpreterState_Delete: invalid interp"); - if (*p == interp) - break; - } - if (interp->tstate_head != NULL) - Py_FatalError("PyInterpreterState_Delete: remaining threads"); - *p = interp->next; - HEAD_UNLOCK(); - free(interp); -#ifdef WITH_THREAD - if (interp_head == NULL && head_mutex != NULL) { - PyThread_free_lock(head_mutex); - head_mutex = NULL; - } -#endif -} - - -/* Default implementation for _PyThreadState_GetFrame */ -static struct _frame * -threadstate_getframe(PyThreadState *self) -{ - return self->frame; -} - -static PyThreadState * -new_threadstate(PyInterpreterState *interp, int init) -{ - PyThreadState *tstate = (PyThreadState *)malloc(sizeof(PyThreadState)); - - if (_PyThreadState_GetFrame == NULL) - _PyThreadState_GetFrame = threadstate_getframe; - - if (tstate != NULL) { - tstate->interp = interp; - - tstate->frame = NULL; - tstate->recursion_depth = 0; - tstate->overflowed = 0; - tstate->recursion_critical = 0; - tstate->tracing = 0; - tstate->use_tracing = 0; - tstate->tick_counter = 0; - tstate->gilstate_counter = 0; - tstate->async_exc = NULL; -#ifdef WITH_THREAD - tstate->thread_id = PyThread_get_thread_ident(); -#else - tstate->thread_id = 0; -#endif - - tstate->dict = NULL; - - tstate->curexc_type = NULL; - tstate->curexc_value = NULL; - tstate->curexc_traceback = NULL; - - tstate->exc_type = NULL; - tstate->exc_value = NULL; - tstate->exc_traceback = NULL; - - tstate->c_profilefunc = NULL; - tstate->c_tracefunc = NULL; - tstate->c_profileobj = NULL; - tstate->c_traceobj = NULL; - - if (init) - _PyThreadState_Init(tstate); - - HEAD_LOCK(); - tstate->next = interp->tstate_head; - interp->tstate_head = tstate; - HEAD_UNLOCK(); - } - - return tstate; -} - -PyThreadState * -PyThreadState_New(PyInterpreterState *interp) -{ - return new_threadstate(interp, 1); -} - -PyThreadState * -_PyThreadState_Prealloc(PyInterpreterState *interp) -{ - return new_threadstate(interp, 0); -} - -void -_PyThreadState_Init(PyThreadState *tstate) -{ -#ifdef WITH_THREAD - _PyGILState_NoteThreadState(tstate); -#endif -} - -PyObject* -PyState_FindModule(struct PyModuleDef* m) -{ - Py_ssize_t index = m->m_base.m_index; - PyInterpreterState *state = PyThreadState_GET()->interp; - PyObject *res; - if (index == 0) - return NULL; - if (state->modules_by_index == NULL) - return NULL; - if (index > PyList_GET_SIZE(state->modules_by_index)) - return NULL; - res = PyList_GET_ITEM(state->modules_by_index, index); - return res==Py_None ? NULL : res; -} - -int -_PyState_AddModule(PyObject* module, struct PyModuleDef* def) -{ - PyInterpreterState *state = PyThreadState_GET()->interp; - if (!def) - return -1; - if (!state->modules_by_index) { - state->modules_by_index = PyList_New(0); - if (!state->modules_by_index) - return -1; - } - while(PyList_GET_SIZE(state->modules_by_index) <= def->m_base.m_index) - if (PyList_Append(state->modules_by_index, Py_None) < 0) - return -1; - Py_INCREF(module); - return PyList_SetItem(state->modules_by_index, - def->m_base.m_index, module); -} - -void -PyThreadState_Clear(PyThreadState *tstate) -{ - if (Py_VerboseFlag && tstate->frame != NULL) - fprintf(stderr, - "PyThreadState_Clear: warning: thread still has a frame\n"); - - Py_CLEAR(tstate->frame); - - Py_CLEAR(tstate->dict); - Py_CLEAR(tstate->async_exc); - - Py_CLEAR(tstate->curexc_type); - Py_CLEAR(tstate->curexc_value); - Py_CLEAR(tstate->curexc_traceback); - - Py_CLEAR(tstate->exc_type); - Py_CLEAR(tstate->exc_value); - Py_CLEAR(tstate->exc_traceback); - - tstate->c_profilefunc = NULL; - tstate->c_tracefunc = NULL; - Py_CLEAR(tstate->c_profileobj); - Py_CLEAR(tstate->c_traceobj); -} - - -/* Common code for PyThreadState_Delete() and PyThreadState_DeleteCurrent() */ -static void -tstate_delete_common(PyThreadState *tstate) -{ - PyInterpreterState *interp; - PyThreadState **p; - PyThreadState *prev_p = NULL; - if (tstate == NULL) - Py_FatalError("PyThreadState_Delete: NULL tstate"); - interp = tstate->interp; - if (interp == NULL) - Py_FatalError("PyThreadState_Delete: NULL interp"); - HEAD_LOCK(); - for (p = &interp->tstate_head; ; p = &(*p)->next) { - if (*p == NULL) - Py_FatalError( - "PyThreadState_Delete: invalid tstate"); - if (*p == tstate) - break; - /* Sanity check. These states should never happen but if - * they do we must abort. Otherwise we'll end up spinning in - * in a tight loop with the lock held. A similar check is done - * in thread.c find_key(). */ - if (*p == prev_p) - Py_FatalError( - "PyThreadState_Delete: small circular list(!)" - " and tstate not found."); - prev_p = *p; - if ((*p)->next == interp->tstate_head) - Py_FatalError( - "PyThreadState_Delete: circular list(!) and" - " tstate not found."); - } - *p = tstate->next; - HEAD_UNLOCK(); - free(tstate); -} - - -void -PyThreadState_Delete(PyThreadState *tstate) -{ - if (tstate == _Py_atomic_load_relaxed(&_PyThreadState_Current)) - Py_FatalError("PyThreadState_Delete: tstate is still current"); - tstate_delete_common(tstate); -#ifdef WITH_THREAD - if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate) - PyThread_delete_key_value(autoTLSkey); -#endif /* WITH_THREAD */ -} - - -#ifdef WITH_THREAD -void -PyThreadState_DeleteCurrent() -{ - PyThreadState *tstate = (PyThreadState*)_Py_atomic_load_relaxed( - &_PyThreadState_Current); - if (tstate == NULL) - Py_FatalError( - "PyThreadState_DeleteCurrent: no current tstate"); - _Py_atomic_store_relaxed(&_PyThreadState_Current, NULL); - tstate_delete_common(tstate); - if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate) - PyThread_delete_key_value(autoTLSkey); - PyEval_ReleaseLock(); -} -#endif /* WITH_THREAD */ - - -PyThreadState * -PyThreadState_Get(void) -{ - PyThreadState *tstate = (PyThreadState*)_Py_atomic_load_relaxed( - &_PyThreadState_Current); - if (tstate == NULL) - Py_FatalError("PyThreadState_Get: no current thread"); - - return tstate; -} - - -PyThreadState * -PyThreadState_Swap(PyThreadState *newts) -{ - PyThreadState *oldts = (PyThreadState*)_Py_atomic_load_relaxed( - &_PyThreadState_Current); - - _Py_atomic_store_relaxed(&_PyThreadState_Current, newts); - /* It should not be possible for more than one thread state - to be used for a thread. Check this the best we can in debug - builds. - */ -#if defined(Py_DEBUG) && defined(WITH_THREAD) - if (newts) { - /* This can be called from PyEval_RestoreThread(). Similar - to it, we need to ensure errno doesn't change. - */ - int err = errno; - PyThreadState *check = PyGILState_GetThisThreadState(); - if (check && check->interp == newts->interp && check != newts) - Py_FatalError("Invalid thread state for this thread"); - errno = err; - } -#endif - return oldts; -} - -/* An extension mechanism to store arbitrary additional per-thread state. - PyThreadState_GetDict() returns a dictionary that can be used to hold such - state; the caller should pick a unique key and store its state there. If - PyThreadState_GetDict() returns NULL, an exception has *not* been raised - and the caller should assume no per-thread state is available. */ - -PyObject * -PyThreadState_GetDict(void) -{ - PyThreadState *tstate = (PyThreadState*)_Py_atomic_load_relaxed( - &_PyThreadState_Current); - if (tstate == NULL) - return NULL; - - if (tstate->dict == NULL) { - PyObject *d; - tstate->dict = d = PyDict_New(); - if (d == NULL) - PyErr_Clear(); - } - return tstate->dict; -} - - -/* Asynchronously raise an exception in a thread. - Requested by Just van Rossum and Alex Martelli. - To prevent naive misuse, you must write your own extension - to call this, or use ctypes. Must be called with the GIL held. - Returns the number of tstates modified (normally 1, but 0 if `id` didn't - match any known thread id). Can be called with exc=NULL to clear an - existing async exception. This raises no exceptions. */ - -int -PyThreadState_SetAsyncExc(long id, PyObject *exc) { - PyThreadState *tstate = PyThreadState_GET(); - PyInterpreterState *interp = tstate->interp; - PyThreadState *p; - - /* Although the GIL is held, a few C API functions can be called - * without the GIL held, and in particular some that create and - * destroy thread and interpreter states. Those can mutate the - * list of thread states we're traversing, so to prevent that we lock - * head_mutex for the duration. - */ - HEAD_LOCK(); - for (p = interp->tstate_head; p != NULL; p = p->next) { - if (p->thread_id == id) { - /* Tricky: we need to decref the current value - * (if any) in p->async_exc, but that can in turn - * allow arbitrary Python code to run, including - * perhaps calls to this function. To prevent - * deadlock, we need to release head_mutex before - * the decref. - */ - PyObject *old_exc = p->async_exc; - Py_XINCREF(exc); - p->async_exc = exc; - HEAD_UNLOCK(); - Py_XDECREF(old_exc); - _PyEval_SignalAsyncExc(); - return 1; - } - } - HEAD_UNLOCK(); - return 0; -} - - -/* Routines for advanced debuggers, requested by David Beazley. - Don't use unless you know what you are doing! */ - -PyInterpreterState * -PyInterpreterState_Head(void) -{ - return interp_head; -} - -PyInterpreterState * -PyInterpreterState_Next(PyInterpreterState *interp) { - return interp->next; -} - -PyThreadState * -PyInterpreterState_ThreadHead(PyInterpreterState *interp) { - return interp->tstate_head; -} - -PyThreadState * -PyThreadState_Next(PyThreadState *tstate) { - return tstate->next; -} - -/* The implementation of sys._current_frames(). This is intended to be - called with the GIL held, as it will be when called via - sys._current_frames(). It's possible it would work fine even without - the GIL held, but haven't thought enough about that. -*/ -PyObject * -_PyThread_CurrentFrames(void) -{ - PyObject *result; - PyInterpreterState *i; - - result = PyDict_New(); - if (result == NULL) - return NULL; - - /* for i in all interpreters: - * for t in all of i's thread states: - * if t's frame isn't NULL, map t's id to its frame - * Because these lists can mutate even when the GIL is held, we - * need to grab head_mutex for the duration. - */ - HEAD_LOCK(); - for (i = interp_head; i != NULL; i = i->next) { - PyThreadState *t; - for (t = i->tstate_head; t != NULL; t = t->next) { - PyObject *id; - int stat; - struct _frame *frame = t->frame; - if (frame == NULL) - continue; - id = PyLong_FromLong(t->thread_id); - if (id == NULL) - goto Fail; - stat = PyDict_SetItem(result, id, (PyObject *)frame); - Py_DECREF(id); - if (stat < 0) - goto Fail; - } - } - HEAD_UNLOCK(); - return result; - - Fail: - HEAD_UNLOCK(); - Py_DECREF(result); - return NULL; -} - -/* Python "auto thread state" API. */ -#ifdef WITH_THREAD - -/* Keep this as a static, as it is not reliable! It can only - ever be compared to the state for the *current* thread. - * If not equal, then it doesn't matter that the actual - value may change immediately after comparison, as it can't - possibly change to the current thread's state. - * If equal, then the current thread holds the lock, so the value can't - change until we yield the lock. -*/ -static int -PyThreadState_IsCurrent(PyThreadState *tstate) -{ - /* Must be the tstate for this thread */ - assert(PyGILState_GetThisThreadState()==tstate); - return tstate == _Py_atomic_load_relaxed(&_PyThreadState_Current); -} - -/* Internal initialization/finalization functions called by - Py_Initialize/Py_Finalize -*/ -void -_PyGILState_Init(PyInterpreterState *i, PyThreadState *t) -{ - assert(i && t); /* must init with valid states */ - autoTLSkey = PyThread_create_key(); - if (autoTLSkey == -1) - Py_FatalError("Could not allocate TLS entry"); - autoInterpreterState = i; - assert(PyThread_get_key_value(autoTLSkey) == NULL); - assert(t->gilstate_counter == 0); - - _PyGILState_NoteThreadState(t); -} - -void -_PyGILState_Fini(void) -{ - PyThread_delete_key(autoTLSkey); - autoInterpreterState = NULL; -} - -/* Reset the TLS key - called by PyOS_AfterFork(). - * This should not be necessary, but some - buggy - pthread implementations - * don't reset TLS upon fork(), see issue #10517. - */ -void -_PyGILState_Reinit(void) -{ - PyThreadState *tstate = PyGILState_GetThisThreadState(); - PyThread_delete_key(autoTLSkey); - if ((autoTLSkey = PyThread_create_key()) == -1) - Py_FatalError("Could not allocate TLS entry"); - - /* If the thread had an associated auto thread state, reassociate it with - * the new key. */ - if (tstate && PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0) - Py_FatalError("Couldn't create autoTLSkey mapping"); -} - -/* When a thread state is created for a thread by some mechanism other than - PyGILState_Ensure, it's important that the GILState machinery knows about - it so it doesn't try to create another thread state for the thread (this is - a better fix for SF bug #1010677 than the first one attempted). -*/ -static void -_PyGILState_NoteThreadState(PyThreadState* tstate) -{ - /* If autoTLSkey isn't initialized, this must be the very first - threadstate created in Py_Initialize(). Don't do anything for now - (we'll be back here when _PyGILState_Init is called). */ - if (!autoInterpreterState) - return; - - /* Stick the thread state for this thread in thread local storage. - - The only situation where you can legitimately have more than one - thread state for an OS level thread is when there are multiple - interpreters, when: - - a) You shouldn't really be using the PyGILState_ APIs anyway, - and: - - b) The slightly odd way PyThread_set_key_value works (see - comments by its implementation) means that the first thread - state created for that given OS level thread will "win", - which seems reasonable behaviour. - */ - if (PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0) - Py_FatalError("Couldn't create autoTLSkey mapping"); - - /* PyGILState_Release must not try to delete this thread state. */ - tstate->gilstate_counter = 1; -} - -/* The public functions */ -PyThreadState * -PyGILState_GetThisThreadState(void) -{ - if (autoInterpreterState == NULL) - return NULL; - return (PyThreadState *)PyThread_get_key_value(autoTLSkey); -} - -PyGILState_STATE -PyGILState_Ensure(void) -{ - int current; - PyThreadState *tcur; - /* Note that we do not auto-init Python here - apart from - potential races with 2 threads auto-initializing, pep-311 - spells out other issues. Embedders are expected to have - called Py_Initialize() and usually PyEval_InitThreads(). - */ - assert(autoInterpreterState); /* Py_Initialize() hasn't been called! */ - tcur = (PyThreadState *)PyThread_get_key_value(autoTLSkey); - if (tcur == NULL) { - /* Create a new thread state for this thread */ - tcur = PyThreadState_New(autoInterpreterState); - if (tcur == NULL) - Py_FatalError("Couldn't create thread-state for new thread"); - /* This is our thread state! We'll need to delete it in the - matching call to PyGILState_Release(). */ - tcur->gilstate_counter = 0; - current = 0; /* new thread state is never current */ - } - else - current = PyThreadState_IsCurrent(tcur); - if (current == 0) - PyEval_RestoreThread(tcur); - /* Update our counter in the thread-state - no need for locks: - - tcur will remain valid as we hold the GIL. - - the counter is safe as we are the only thread "allowed" - to modify this value - */ - ++tcur->gilstate_counter; - return current ? PyGILState_LOCKED : PyGILState_UNLOCKED; -} - -void -PyGILState_Release(PyGILState_STATE oldstate) -{ - PyThreadState *tcur = (PyThreadState *)PyThread_get_key_value( - autoTLSkey); - if (tcur == NULL) - Py_FatalError("auto-releasing thread-state, " - "but no thread-state for this thread"); - /* We must hold the GIL and have our thread state current */ - /* XXX - remove the check - the assert should be fine, - but while this is very new (April 2003), the extra check - by release-only users can't hurt. - */ - if (! PyThreadState_IsCurrent(tcur)) - Py_FatalError("This thread state must be current when releasing"); - assert(PyThreadState_IsCurrent(tcur)); - --tcur->gilstate_counter; - assert(tcur->gilstate_counter >= 0); /* illegal counter value */ - - /* If we're going to destroy this thread-state, we must - * clear it while the GIL is held, as destructors may run. - */ - if (tcur->gilstate_counter == 0) { - /* can't have been locked when we created it */ - assert(oldstate == PyGILState_UNLOCKED); - PyThreadState_Clear(tcur); - /* Delete the thread-state. Note this releases the GIL too! - * It's vital that the GIL be held here, to avoid shutdown - * races; see bugs 225673 and 1061968 (that nasty bug has a - * habit of coming back). - */ - PyThreadState_DeleteCurrent(); - } - /* Release the lock if necessary */ - else if (oldstate == PyGILState_UNLOCKED) - PyEval_SaveThread(); -} - -#endif /* WITH_THREAD */ - -
--- a/cos/python/Python/pythonrun.c Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2270 +0,0 @@ - -/* Python interpreter top-level routines, including init/exit */ - -#include "Python.h" - -#include "Python-ast.h" -#undef Yield /* undefine macro conflicting with winbase.h */ -#include "grammar.h" -#include "node.h" -#include "token.h" -#include "parsetok.h" -#include "errcode.h" -#include "code.h" -#include "symtable.h" -#include "ast.h" -#include "marshal.h" -#include "osdefs.h" - -#define PRINT_TOTAL_REFS() - -extern wchar_t *Py_GetPath(void); - -extern grammar _PyParser_Grammar; /* From graminit.c */ - -/* Forward */ -static void initmain(void); -static int initfsencoding(PyInterpreterState *interp); -static void initsite(void); -static int initstdio(void); -static void flush_io(void); -static PyObject *run_mod(mod_ty, const char *, PyObject *, PyObject *, - PyCompilerFlags *, PyArena *); -static PyObject *run_pyc_file(FILE *, const char *, PyObject *, PyObject *, - PyCompilerFlags *); -static void err_input(perrdetail *); -static void err_free(perrdetail *); -static void initsigs(void); -static void call_py_exitfuncs(void); -static void wait_for_thread_shutdown(void); -static void call_ll_exitfuncs(void); -extern int _PyUnicode_Init(void); -extern void _PyUnicode_Fini(void); -extern int _PyLong_Init(void); -extern void PyLong_Fini(void); -extern int _PyFaulthandler_Init(void); -extern void _PyFaulthandler_Fini(void); - -#ifdef WITH_THREAD -extern void _PyGILState_Init(PyInterpreterState *, PyThreadState *); -extern void _PyGILState_Fini(void); -#endif /* WITH_THREAD */ - -int Py_DebugFlag; /* Needed by parser.c */ -int Py_VerboseFlag; /* Needed by import.c */ -int Py_QuietFlag; /* Needed by sysmodule.c */ -int Py_InteractiveFlag; /* Needed by Py_FdIsInteractive() below */ -int Py_InspectFlag; /* Needed to determine whether to exit at SystemExit */ -int Py_NoSiteFlag; /* Suppress 'import site' */ -int Py_BytesWarningFlag; /* Warn on str(bytes) and str(buffer) */ -int Py_DontWriteBytecodeFlag; /* Suppress writing bytecode files (*.py[co]) */ -int Py_UseClassExceptionsFlag = 1; /* Needed by bltinmodule.c: deprecated */ -int Py_FrozenFlag; /* Needed by getpath.c */ -int Py_IgnoreEnvironmentFlag; /* e.g. PYTHONPATH, PYTHONHOME */ -int Py_NoUserSiteDirectory = 0; /* for -s and site.py */ -int Py_UnbufferedStdioFlag = 0; /* Unbuffered binary std{in,out,err} */ - -PyThreadState *_Py_Finalizing = NULL; - -/* PyModule_GetWarningsModule is no longer necessary as of 2.6 -since _warnings is builtin. This API should not be used. */ -PyObject * -PyModule_GetWarningsModule(void) -{ - return PyImport_ImportModule("warnings"); -} - -static int initialized = 0; - -/* API to access the initialized flag -- useful for esoteric use */ - -int -Py_IsInitialized(void) -{ - return initialized; -} - -/* Global initializations. Can be undone by Py_Finalize(). Don't - call this twice without an intervening Py_Finalize() call. When - initializations fail, a fatal error is issued and the function does - not return. On return, the first thread and interpreter state have - been created. - - Locking: you must hold the interpreter lock while calling this. - (If the lock has not yet been initialized, that's equivalent to - having the lock, but you cannot use multiple threads.) - -*/ - -static int -add_flag(int flag, const char *envs) -{ - int env = atoi(envs); - if (flag < env) - flag = env; - if (flag < 1) - flag = 1; - return flag; -} - -static char* -get_codec_name(const char *encoding) -{ - char *name_utf8, *name_str; - PyObject *codec, *name = NULL; - _Py_IDENTIFIER(name); - - codec = _PyCodec_Lookup(encoding); - if (!codec) - goto error; - - name = _PyObject_GetAttrId(codec, &PyId_name); - Py_CLEAR(codec); - if (!name) - goto error; - - name_utf8 = _PyUnicode_AsString(name); - if (name_utf8 == NULL) - goto error; - name_str = strdup(name_utf8); - Py_DECREF(name); - if (name_str == NULL) { - PyErr_NoMemory(); - return NULL; - } - return name_str; - -error: - Py_XDECREF(codec); - Py_XDECREF(name); - return NULL; -} - -static char* -get_locale_encoding(void) -{ - PyErr_SetNone(PyExc_NotImplementedError); - return NULL; -} - -void -Py_InitializeEx(int install_sigs) -{ - PyInterpreterState *interp; - PyThreadState *tstate; - PyObject *bimod, *sysmod, *pstderr; - char *p; - extern void _Py_ReadyTypes(void); - - if (initialized) - return; - initialized = 1; - _Py_Finalizing = NULL; - - if ((p = Py_GETENV("PYTHONDEBUG")) && *p != '\0') - Py_DebugFlag = add_flag(Py_DebugFlag, p); - if ((p = Py_GETENV("PYTHONVERBOSE")) && *p != '\0') - Py_VerboseFlag = add_flag(Py_VerboseFlag, p); - if ((p = Py_GETENV("PYTHONOPTIMIZE")) && *p != '\0') - Py_OptimizeFlag = add_flag(Py_OptimizeFlag, p); - if ((p = Py_GETENV("PYTHONDONTWRITEBYTECODE")) && *p != '\0') - Py_DontWriteBytecodeFlag = add_flag(Py_DontWriteBytecodeFlag, p); - - interp = PyInterpreterState_New(); - if (interp == NULL) - Py_FatalError("Py_Initialize: can't make first interpreter"); - - tstate = PyThreadState_New(interp); - if (tstate == NULL) - Py_FatalError("Py_Initialize: can't make first thread"); - (void) PyThreadState_Swap(tstate); - -#ifdef WITH_THREAD - /* We can't call _PyEval_FiniThreads() in Py_Finalize because - destroying the GIL might fail when it is being referenced from - another running thread (see issue #9901). - Instead we destroy the previously created GIL here, which ensures - that we can call Py_Initialize / Py_Finalize multiple times. */ - _PyEval_FiniThreads(); - - /* Auto-thread-state API */ - _PyGILState_Init(interp, tstate); -#endif /* WITH_THREAD */ - - _Py_ReadyTypes(); - - if (!_PyFrame_Init()) - Py_FatalError("Py_Initialize: can't init frames"); - - if (!_PyLong_Init()) - Py_FatalError("Py_Initialize: can't init longs"); - - if (!PyByteArray_Init()) - Py_FatalError("Py_Initialize: can't init bytearray"); - - _PyFloat_Init(); - - interp->modules = PyDict_New(); - if (interp->modules == NULL) - Py_FatalError("Py_Initialize: can't make modules dictionary"); - interp->modules_reloading = PyDict_New(); - if (interp->modules_reloading == NULL) - Py_FatalError("Py_Initialize: can't make modules_reloading dictionary"); - - /* Init Unicode implementation; relies on the codec registry */ - if (_PyUnicode_Init() < 0) - Py_FatalError("Py_Initialize: can't initialize unicode"); - - bimod = _PyBuiltin_Init(); - if (bimod == NULL) - Py_FatalError("Py_Initialize: can't initialize builtins modules"); - _PyImport_FixupBuiltin(bimod, "builtins"); - interp->builtins = PyModule_GetDict(bimod); - if (interp->builtins == NULL) - Py_FatalError("Py_Initialize: can't initialize builtins dict"); - Py_INCREF(interp->builtins); - - /* initialize builtin exceptions */ - _PyExc_Init(); - - sysmod = _PySys_Init(); - if (sysmod == NULL) - Py_FatalError("Py_Initialize: can't initialize sys"); - interp->sysdict = PyModule_GetDict(sysmod); - if (interp->sysdict == NULL) - Py_FatalError("Py_Initialize: can't initialize sys dict"); - Py_INCREF(interp->sysdict); - _PyImport_FixupBuiltin(sysmod, "sys"); - PySys_SetPath(Py_GetPath()); - PyDict_SetItemString(interp->sysdict, "modules", - interp->modules); - - /* Set up a preliminary stderr printer until we have enough - infrastructure for the io module in place. */ - pstderr = PyFile_NewStdPrinter(fileno(stderr)); - if (pstderr == NULL) - Py_FatalError("Py_Initialize: can't set preliminary stderr"); - PySys_SetObject("stderr", pstderr); - PySys_SetObject("__stderr__", pstderr); - Py_DECREF(pstderr); - - _PyImport_Init(); - - _PyImportHooks_Init(); - - /* initialize the faulthandler module */ - if (_PyFaulthandler_Init()) - Py_FatalError("Py_Initialize: can't initialize faulthandler"); - - /* Initialize _warnings. */ - _PyWarnings_Init(); - - _PyTime_Init(); - - if (initfsencoding(interp) < 0) - Py_FatalError("Py_Initialize: unable to load the file system codec"); - - if (install_sigs) - initsigs(); /* Signal handling stuff, including initintr() */ - - initmain(); /* Module __main__ */ - if (initstdio() < 0) - Py_FatalError( - "Py_Initialize: can't initialize sys standard streams"); - - /* Initialize warnings. */ - if (PySys_HasWarnOptions()) { - PyObject *warnings_module = PyImport_ImportModule("warnings"); - if (warnings_module == NULL) { - fprintf(stderr, "'import warnings' failed; traceback:\n"); - PyErr_Print(); - } - Py_XDECREF(warnings_module); - } - - if (!Py_NoSiteFlag) - initsite(); /* Module site */ -} - -void -Py_Initialize(void) -{ - Py_InitializeEx(1); -} - -/* Flush stdout and stderr */ - -static int -file_is_closed(PyObject *fobj) -{ - int r; - PyObject *tmp = PyObject_GetAttrString(fobj, "closed"); - if (tmp == NULL) { - PyErr_Clear(); - return 0; - } - r = PyObject_IsTrue(tmp); - Py_DECREF(tmp); - if (r < 0) - PyErr_Clear(); - return r > 0; -} - -static void -flush_std_files(void) -{ - PyObject *fout = PySys_GetObject("stdout"); - PyObject *ferr = PySys_GetObject("stderr"); - PyObject *tmp; - _Py_IDENTIFIER(flush); - - if (fout != NULL && fout != Py_None && !file_is_closed(fout)) { - tmp = _PyObject_CallMethodId(fout, &PyId_flush, ""); - if (tmp == NULL) - PyErr_WriteUnraisable(fout); - else - Py_DECREF(tmp); - } - - if (ferr != NULL && ferr != Py_None && !file_is_closed(ferr)) { - tmp = _PyObject_CallMethodId(ferr, &PyId_flush, ""); - if (tmp == NULL) - PyErr_Clear(); - else - Py_DECREF(tmp); - } -} - -/* Undo the effect of Py_Initialize(). - - Beware: if multiple interpreter and/or thread states exist, these - are not wiped out; only the current thread and interpreter state - are deleted. But since everything else is deleted, those other - interpreter and thread states should no longer be used. - - (XXX We should do better, e.g. wipe out all interpreters and - threads.) - - Locking: as above. - -*/ - -void -Py_Finalize(void) -{ - PyInterpreterState *interp; - PyThreadState *tstate; - - if (!initialized) - return; - - wait_for_thread_shutdown(); - - /* The interpreter is still entirely intact at this point, and the - * exit funcs may be relying on that. In particular, if some thread - * or exit func is still waiting to do an import, the import machinery - * expects Py_IsInitialized() to return true. So don't say the - * interpreter is uninitialized until after the exit funcs have run. - * Note that Threading.py uses an exit func to do a join on all the - * threads created thru it, so this also protects pending imports in - * the threads created via Threading. - */ - call_py_exitfuncs(); - - /* Get current thread state and interpreter pointer */ - tstate = PyThreadState_GET(); - interp = tstate->interp; - - /* Remaining threads (e.g. daemon threads) will automatically exit - after taking the GIL (in PyEval_RestoreThread()). */ - _Py_Finalizing = tstate; - initialized = 0; - - /* Flush stdout+stderr */ - flush_std_files(); - - /* Disable signal handling */ - PyOS_FiniInterrupts(); - - /* Clear type lookup cache */ - PyType_ClearCache(); - - /* Collect garbage. This may call finalizers; it's nice to call these - * before all modules are destroyed. - * XXX If a __del__ or weakref callback is triggered here, and tries to - * XXX import a module, bad things can happen, because Python no - * XXX longer believes it's initialized. - * XXX Fatal Python error: Interpreter not initialized (version mismatch?) - * XXX is easy to provoke that way. I've also seen, e.g., - * XXX Exception exceptions.ImportError: 'No module named sha' - * XXX in <function callback at 0x008F5718> ignored - * XXX but I'm unclear on exactly how that one happens. In any case, - * XXX I haven't seen a real-life report of either of these. - */ - PyGC_Collect(); - /* We run this while most interpreter state is still alive, so that - debug information can be printed out */ - _PyGC_Fini(); - - /* Destroy all modules */ - PyImport_Cleanup(); - - /* Flush stdout+stderr (again, in case more was printed) */ - flush_std_files(); - - /* Collect final garbage. This disposes of cycles created by - * new-style class definitions, for example. - * XXX This is disabled because it caused too many problems. If - * XXX a __del__ or weakref callback triggers here, Python code has - * XXX a hard time running, because even the sys module has been - * XXX cleared out (sys.stdout is gone, sys.excepthook is gone, etc). - * XXX One symptom is a sequence of information-free messages - * XXX coming from threads (if a __del__ or callback is invoked, - * XXX other threads can execute too, and any exception they encounter - * XXX triggers a comedy of errors as subsystem after subsystem - * XXX fails to find what it *expects* to find in sys to help report - * XXX the exception and consequent unexpected failures). I've also - * XXX seen segfaults then, after adding print statements to the - * XXX Python code getting called. - */ - - /* Destroy the database used by _PyImport_{Fixup,Find}Extension */ - _PyImport_Fini(); - - /* unload faulthandler module */ - _PyFaulthandler_Fini(); - - /* Clear interpreter state */ - PyInterpreterState_Clear(interp); - - /* Now we decref the exception classes. After this point nothing - can raise an exception. That's okay, because each Fini() method - below has been checked to make sure no exceptions are ever - raised. - */ - - _PyExc_Fini(); - - /* Cleanup auto-thread-state */ -#ifdef WITH_THREAD - _PyGILState_Fini(); -#endif /* WITH_THREAD */ - - /* Delete current thread */ - PyThreadState_Swap(NULL); - PyInterpreterState_Delete(interp); - - /* Sundry finalizers */ - PyMethod_Fini(); - PyFrame_Fini(); - PyCFunction_Fini(); - PyTuple_Fini(); - PyList_Fini(); - PySet_Fini(); - PyBytes_Fini(); - PyByteArray_Fini(); - PyLong_Fini(); - PyFloat_Fini(); - PyDict_Fini(); - PySlice_Fini(); - - /* Cleanup Unicode implementation */ - _PyUnicode_Fini(); - - /* reset file system default encoding */ - if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) { - free((char*)Py_FileSystemDefaultEncoding); - Py_FileSystemDefaultEncoding = NULL; - } - - /* XXX Still allocated: - - various static ad-hoc pointers to interned strings - - int and float free list blocks - - whatever various modules and libraries allocate - */ - - PyGrammar_RemoveAccelerators(&_PyParser_Grammar); - - call_ll_exitfuncs(); -} - -/* Create and initialize a new interpreter and thread, and return the - new thread. This requires that Py_Initialize() has been called - first. - - Unsuccessful initialization yields a NULL pointer. Note that *no* - exception information is available even in this case -- the - exception information is held in the thread, and there is no - thread. - - Locking: as above. - -*/ - -PyThreadState * -Py_NewInterpreter(void) -{ - PyInterpreterState *interp; - PyThreadState *tstate, *save_tstate; - PyObject *bimod, *sysmod; - - if (!initialized) - Py_FatalError("Py_NewInterpreter: call Py_Initialize first"); - - interp = PyInterpreterState_New(); - if (interp == NULL) - return NULL; - - tstate = PyThreadState_New(interp); - if (tstate == NULL) { - PyInterpreterState_Delete(interp); - return NULL; - } - - save_tstate = PyThreadState_Swap(tstate); - - /* XXX The following is lax in error checking */ - - interp->modules = PyDict_New(); - interp->modules_reloading = PyDict_New(); - - bimod = _PyImport_FindBuiltin("builtins"); - if (bimod != NULL) { - interp->builtins = PyModule_GetDict(bimod); - if (interp->builtins == NULL) - goto handle_error; - Py_INCREF(interp->builtins); - } - - /* initialize builtin exceptions */ - _PyExc_Init(); - - sysmod = _PyImport_FindBuiltin("sys"); - if (bimod != NULL && sysmod != NULL) { - PyObject *pstderr; - interp->sysdict = PyModule_GetDict(sysmod); - if (interp->sysdict == NULL) - goto handle_error; - Py_INCREF(interp->sysdict); - PySys_SetPath(Py_GetPath()); - PyDict_SetItemString(interp->sysdict, "modules", - interp->modules); - /* Set up a preliminary stderr printer until we have enough - infrastructure for the io module in place. */ - pstderr = PyFile_NewStdPrinter(fileno(stderr)); - if (pstderr == NULL) - Py_FatalError("Py_Initialize: can't set preliminary stderr"); - PySys_SetObject("stderr", pstderr); - PySys_SetObject("__stderr__", pstderr); - Py_DECREF(pstderr); - - _PyImportHooks_Init(); - - if (initfsencoding(interp) < 0) - goto handle_error; - - if (initstdio() < 0) - Py_FatalError( - "Py_Initialize: can't initialize sys standard streams"); - initmain(); - if (!Py_NoSiteFlag) - initsite(); - } - - if (!PyErr_Occurred()) - return tstate; - -handle_error: - /* Oops, it didn't work. Undo it all. */ - - PyErr_PrintEx(0); - PyThreadState_Clear(tstate); - PyThreadState_Swap(save_tstate); - PyThreadState_Delete(tstate); - PyInterpreterState_Delete(interp); - - return NULL; -} - -/* Delete an interpreter and its last thread. This requires that the - given thread state is current, that the thread has no remaining - frames, and that it is its interpreter's only remaining thread. - It is a fatal error to violate these constraints. - - (Py_Finalize() doesn't have these constraints -- it zaps - everything, regardless.) - - Locking: as above. - -*/ - -void -Py_EndInterpreter(PyThreadState *tstate) -{ - PyInterpreterState *interp = tstate->interp; - - if (tstate != PyThreadState_GET()) - Py_FatalError("Py_EndInterpreter: thread is not current"); - if (tstate->frame != NULL) - Py_FatalError("Py_EndInterpreter: thread still has a frame"); - if (tstate != interp->tstate_head || tstate->next != NULL) - Py_FatalError("Py_EndInterpreter: not the last thread"); - - PyImport_Cleanup(); - PyInterpreterState_Clear(interp); - PyThreadState_Swap(NULL); - PyInterpreterState_Delete(interp); -} - -static wchar_t *progname = L"python"; - -void -Py_SetProgramName(wchar_t *pn) -{ - if (pn && *pn) - progname = pn; -} - -wchar_t * -Py_GetProgramName(void) -{ - return progname; -} - -static wchar_t *default_home = NULL; -static wchar_t env_home[PATH_MAX+1]; - -void -Py_SetPythonHome(wchar_t *home) -{ - default_home = home; -} - -wchar_t * -Py_GetPythonHome(void) -{ - wchar_t *home = default_home; - if (home == NULL && !Py_IgnoreEnvironmentFlag) { - char* chome = Py_GETENV("PYTHONHOME"); - if (chome) { - size_t r = mbstowcs(env_home, chome, PATH_MAX+1); - if (r != (size_t)-1 && r <= PATH_MAX) - home = env_home; - } - - } - return home; -} - -/* Create __main__ module */ - -static void -initmain(void) -{ - PyObject *m, *d; - m = PyImport_AddModule("__main__"); - if (m == NULL) - Py_FatalError("can't create __main__ module"); - d = PyModule_GetDict(m); - if (PyDict_GetItemString(d, "__builtins__") == NULL) { - PyObject *bimod = PyImport_ImportModule("builtins"); - if (bimod == NULL || - PyDict_SetItemString(d, "__builtins__", bimod) != 0) - Py_FatalError("can't add __builtins__ to __main__"); - Py_DECREF(bimod); - } -} - -static int -initfsencoding(PyInterpreterState *interp) -{ - PyObject *codec; - - if (Py_FileSystemDefaultEncoding == NULL) - { - Py_FileSystemDefaultEncoding = get_locale_encoding(); - if (Py_FileSystemDefaultEncoding == NULL) - Py_FatalError("Py_Initialize: Unable to get the locale encoding"); - - Py_HasFileSystemDefaultEncoding = 0; - interp->fscodec_initialized = 1; - return 0; - } - - /* the encoding is mbcs, utf-8 or ascii */ - codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding); - if (!codec) { - /* Such error can only occurs in critical situations: no more - * memory, import a module of the standard library failed, - * etc. */ - return -1; - } - Py_DECREF(codec); - interp->fscodec_initialized = 1; - return 0; -} - -/* Import the site module (not into __main__ though) */ - -static void -initsite(void) -{ - PyObject *m; - m = PyImport_ImportModule("site"); - if (m == NULL) { - PyErr_Print(); - Py_Finalize(); - exit(1); - } - else { - Py_DECREF(m); - } -} - -static PyObject* -create_stdio(PyObject* io, - int fd, int write_mode, char* name, - char* encoding, char* errors) -{ - PyObject *buf = NULL, *stream = NULL, *text = NULL, *raw = NULL, *res; - const char* mode; - const char* newline; - PyObject *line_buffering; - int buffering, isatty; - _Py_IDENTIFIER(open); - _Py_IDENTIFIER(isatty); - _Py_IDENTIFIER(TextIOWrapper); - _Py_IDENTIFIER(name); - _Py_IDENTIFIER(mode); - - /* stdin is always opened in buffered mode, first because it shouldn't - make a difference in common use cases, second because TextIOWrapper - depends on the presence of a read1() method which only exists on - buffered streams. - */ - if (Py_UnbufferedStdioFlag && write_mode) - buffering = 0; - else - buffering = -1; - if (write_mode) - mode = "wb"; - else - mode = "rb"; - buf = _PyObject_CallMethodId(io, &PyId_open, "isiOOOi", - fd, mode, buffering, - Py_None, Py_None, Py_None, 0); - if (buf == NULL) - goto error; - - if (buffering) { - _Py_IDENTIFIER(raw); - raw = _PyObject_GetAttrId(buf, &PyId_raw); - if (raw == NULL) - goto error; - } - else { - raw = buf; - Py_INCREF(raw); - } - - text = PyUnicode_FromString(name); - if (text == NULL || _PyObject_SetAttrId(raw, &PyId_name, text) < 0) - goto error; - res = _PyObject_CallMethodId(raw, &PyId_isatty, ""); - if (res == NULL) - goto error; - isatty = PyObject_IsTrue(res); - Py_DECREF(res); - if (isatty == -1) - goto error; - if (isatty || Py_UnbufferedStdioFlag) - line_buffering = Py_True; - else - line_buffering = Py_False; - - Py_CLEAR(raw); - Py_CLEAR(text); - - newline = "\n"; - - stream = _PyObject_CallMethodId(io, &PyId_TextIOWrapper, "OsssO", - buf, encoding, errors, - newline, line_buffering); - Py_CLEAR(buf); - if (stream == NULL) - goto error; - - if (write_mode) - mode = "w"; - else - mode = "r"; - text = PyUnicode_FromString(mode); - if (!text || _PyObject_SetAttrId(stream, &PyId_mode, text) < 0) - goto error; - Py_CLEAR(text); - return stream; - -error: - Py_XDECREF(buf); - Py_XDECREF(stream); - Py_XDECREF(text); - Py_XDECREF(raw); - return NULL; -} - -static int -is_valid_fd(int fd) -{ - int dummy_fd; - if (fd < 0 || !_PyVerify_fd(fd)) - return 0; - dummy_fd = dup(fd); - if (dummy_fd < 0) - return 0; - close(dummy_fd); - return 1; -} - -/* Initialize sys.stdin, stdout, stderr and builtins.open */ -static int -initstdio(void) -{ - PyObject *iomod = NULL, *wrapper; - PyObject *bimod = NULL; - PyObject *m; - PyObject *std = NULL; - int status = 0, fd; - PyObject * encoding_attr; - char *encoding = NULL, *errors; - - /* Hack to avoid a nasty recursion issue when Python is invoked - in verbose mode: pre-import the Latin-1 and UTF-8 codecs */ - if ((m = PyImport_ImportModule("encodings.utf_8")) == NULL) { - goto error; - } - Py_DECREF(m); - - if (!(m = PyImport_ImportModule("encodings.latin_1"))) { - goto error; - } - Py_DECREF(m); - - if (!(bimod = PyImport_ImportModule("builtins"))) { - goto error; - } - - if (!(iomod = PyImport_ImportModule("io"))) { - goto error; - } - if (!(wrapper = PyObject_GetAttrString(iomod, "OpenWrapper"))) { - goto error; - } - - /* Set builtins.open */ - if (PyObject_SetAttrString(bimod, "open", wrapper) == -1) { - Py_DECREF(wrapper); - goto error; - } - Py_DECREF(wrapper); - - encoding = Py_GETENV("PYTHONIOENCODING"); - errors = NULL; - if (encoding) { - encoding = strdup(encoding); - errors = strchr(encoding, ':'); - if (errors) { - *errors = '\0'; - errors++; - } - } - - /* Set sys.stdin */ - fd = fileno(stdin); - /* Under some conditions stdin, stdout and stderr may not be connected - * and fileno() may point to an invalid file descriptor. For example - * GUI apps don't have valid standard streams by default. - */ - if (!is_valid_fd(fd)) { - std = Py_None; - Py_INCREF(std); - } - else { - std = create_stdio(iomod, fd, 0, "<stdin>", encoding, errors); - if (std == NULL) - goto error; - } /* if (fd < 0) */ - PySys_SetObject("__stdin__", std); - PySys_SetObject("stdin", std); - Py_DECREF(std); - - /* Set sys.stdout */ - fd = fileno(stdout); - if (!is_valid_fd(fd)) { - std = Py_None; - Py_INCREF(std); - } - else { - std = create_stdio(iomod, fd, 1, "<stdout>", encoding, errors); - if (std == NULL) - goto error; - } /* if (fd < 0) */ - PySys_SetObject("__stdout__", std); - PySys_SetObject("stdout", std); - Py_DECREF(std); - -#if 1 /* Disable this if you have trouble debugging bootstrap stuff */ - /* Set sys.stderr, replaces the preliminary stderr */ - fd = fileno(stderr); - if (!is_valid_fd(fd)) { - std = Py_None; - Py_INCREF(std); - } - else { - std = create_stdio(iomod, fd, 1, "<stderr>", encoding, "backslashreplace"); - if (std == NULL) - goto error; - } /* if (fd < 0) */ - - /* Same as hack above, pre-import stderr's codec to avoid recursion - when import.c tries to write to stderr in verbose mode. */ - encoding_attr = PyObject_GetAttrString(std, "encoding"); - if (encoding_attr != NULL) { - const char * encoding; - encoding = _PyUnicode_AsString(encoding_attr); - if (encoding != NULL) { - _PyCodec_Lookup(encoding); - } - Py_DECREF(encoding_attr); - } - PyErr_Clear(); /* Not a fatal error if codec isn't available */ - - PySys_SetObject("__stderr__", std); - PySys_SetObject("stderr", std); - Py_DECREF(std); -#endif - - if (0) { - error: - status = -1; - } - - if (encoding) - free(encoding); - Py_XDECREF(bimod); - Py_XDECREF(iomod); - return status; -} - -/* Parse input from a file and execute it */ - -int -PyRun_AnyFileExFlags(FILE *fp, const char *filename, int closeit, - PyCompilerFlags *flags) -{ - if (filename == NULL) - filename = "???"; - if (Py_FdIsInteractive(fp, filename)) { - int err = PyRun_InteractiveLoopFlags(fp, filename, flags); - if (closeit) - fclose(fp); - return err; - } - else - return PyRun_SimpleFileExFlags(fp, filename, closeit, flags); -} - -int -PyRun_InteractiveLoopFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) -{ - PyObject *v; - int ret; - PyCompilerFlags local_flags; - - if (flags == NULL) { - flags = &local_flags; - local_flags.cf_flags = 0; - } - v = PySys_GetObject("ps1"); - if (v == NULL) { - PySys_SetObject("ps1", v = PyUnicode_FromString(">>> ")); - Py_XDECREF(v); - } - v = PySys_GetObject("ps2"); - if (v == NULL) { - PySys_SetObject("ps2", v = PyUnicode_FromString("... ")); - Py_XDECREF(v); - } - for (;;) { - ret = PyRun_InteractiveOneFlags(fp, filename, flags); - PRINT_TOTAL_REFS(); - if (ret == E_EOF) - return 0; - /* - if (ret == E_NOMEM) - return -1; - */ - } -} - -/* compute parser flags based on compiler flags */ -static int PARSER_FLAGS(PyCompilerFlags *flags) -{ - int parser_flags = 0; - if (!flags) - return 0; - if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) - parser_flags |= PyPARSE_DONT_IMPLY_DEDENT; - if (flags->cf_flags & PyCF_IGNORE_COOKIE) - parser_flags |= PyPARSE_IGNORE_COOKIE; - if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) - parser_flags |= PyPARSE_BARRY_AS_BDFL; - return parser_flags; -} - -int -PyRun_InteractiveOneFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) -{ - PyObject *m, *d, *v, *w, *oenc = NULL; - mod_ty mod; - PyArena *arena; - char *ps1 = "", *ps2 = "", *enc = NULL; - int errcode = 0; - _Py_IDENTIFIER(encoding); - - if (fp == stdin) { - /* Fetch encoding from sys.stdin */ - v = PySys_GetObject("stdin"); - if (v == NULL || v == Py_None) - return -1; - oenc = _PyObject_GetAttrId(v, &PyId_encoding); - if (!oenc) - return -1; - enc = _PyUnicode_AsString(oenc); - if (enc == NULL) - return -1; - } - v = PySys_GetObject("ps1"); - if (v != NULL) { - v = PyObject_Str(v); - if (v == NULL) - PyErr_Clear(); - else if (PyUnicode_Check(v)) { - ps1 = _PyUnicode_AsString(v); - if (ps1 == NULL) { - PyErr_Clear(); - ps1 = ""; - } - } - } - w = PySys_GetObject("ps2"); - if (w != NULL) { - w = PyObject_Str(w); - if (w == NULL) - PyErr_Clear(); - else if (PyUnicode_Check(w)) { - ps2 = _PyUnicode_AsString(w); - if (ps2 == NULL) { - PyErr_Clear(); - ps2 = ""; - } - } - } - arena = PyArena_New(); - if (arena == NULL) { - Py_XDECREF(v); - Py_XDECREF(w); - Py_XDECREF(oenc); - return -1; - } - mod = PyParser_ASTFromFile(fp, filename, enc, - Py_single_input, ps1, ps2, - flags, &errcode, arena); - Py_XDECREF(v); - Py_XDECREF(w); - Py_XDECREF(oenc); - if (mod == NULL) { - PyArena_Free(arena); - if (errcode == E_EOF) { - PyErr_Clear(); - return E_EOF; - } - PyErr_Print(); - return -1; - } - m = PyImport_AddModule("__main__"); - if (m == NULL) { - PyArena_Free(arena); - return -1; - } - d = PyModule_GetDict(m); - v = run_mod(mod, filename, d, d, flags, arena); - PyArena_Free(arena); - flush_io(); - if (v == NULL) { - PyErr_Print(); - return -1; - } - Py_DECREF(v); - return 0; -} - -/* Check whether a file maybe a pyc file: Look at the extension, - the file type, and, if we may close it, at the first few bytes. */ - -static int -maybe_pyc_file(FILE *fp, const char* filename, const char* ext, int closeit) -{ - if (strcmp(ext, ".pyc") == 0 || strcmp(ext, ".pyo") == 0) - return 1; - - /* Only look into the file if we are allowed to close it, since - it then should also be seekable. */ - if (closeit) { - /* Read only two bytes of the magic. If the file was opened in - text mode, the bytes 3 and 4 of the magic (\r\n) might not - be read as they are on disk. */ - unsigned int halfmagic = PyImport_GetMagicNumber() & 0xFFFF; - unsigned char buf[2]; - /* Mess: In case of -x, the stream is NOT at its start now, - and ungetc() was used to push back the first newline, - which makes the current stream position formally undefined, - and a x-platform nightmare. - Unfortunately, we have no direct way to know whether -x - was specified. So we use a terrible hack: if the current - stream position is not 0, we assume -x was specified, and - give up. Bug 132850 on SourceForge spells out the - hopelessness of trying anything else (fseek and ftell - don't work predictably x-platform for text-mode files). - */ - int ispyc = 0; - if (ftell(fp) == 0) { - if (fread(buf, 1, 2, fp) == 2 && - ((unsigned int)buf[1]<<8 | buf[0]) == halfmagic) - ispyc = 1; - rewind(fp); - } - return ispyc; - } - return 0; -} - -int -PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit, - PyCompilerFlags *flags) -{ - PyObject *m, *d, *v; - const char *ext; - int set_file_name = 0, ret; - size_t len; - - m = PyImport_AddModule("__main__"); - if (m == NULL) - return -1; - d = PyModule_GetDict(m); - if (PyDict_GetItemString(d, "__file__") == NULL) { - PyObject *f; - f = PyUnicode_DecodeFSDefault(filename); - if (f == NULL) - return -1; - if (PyDict_SetItemString(d, "__file__", f) < 0) { - Py_DECREF(f); - return -1; - } - if (PyDict_SetItemString(d, "__cached__", Py_None) < 0) { - Py_DECREF(f); - return -1; - } - set_file_name = 1; - Py_DECREF(f); - } - len = strlen(filename); - ext = filename + len - (len > 4 ? 4 : 0); - if (maybe_pyc_file(fp, filename, ext, closeit)) { - /* Try to run a pyc file. First, re-open in binary */ - if (closeit) - fclose(fp); - if ((fp = fopen(filename, "rb")) == NULL) { - fprintf(stderr, "python: Can't reopen .pyc file\n"); - ret = -1; - goto done; - } - /* Turn on optimization if a .pyo file is given */ - if (strcmp(ext, ".pyo") == 0) - Py_OptimizeFlag = 1; - v = run_pyc_file(fp, filename, d, d, flags); - } else { - v = PyRun_FileExFlags(fp, filename, Py_file_input, d, d, - closeit, flags); - } - flush_io(); - if (v == NULL) { - PyErr_Print(); - ret = -1; - goto done; - } - Py_DECREF(v); - ret = 0; - done: - if (set_file_name && PyDict_DelItemString(d, "__file__")) - PyErr_Clear(); - return ret; -} - -int -PyRun_SimpleStringFlags(const char *command, PyCompilerFlags *flags) -{ - PyObject *m, *d, *v; - m = PyImport_AddModule("__main__"); - if (m == NULL) - return -1; - d = PyModule_GetDict(m); - v = PyRun_StringFlags(command, Py_file_input, d, d, flags); - if (v == NULL) { - PyErr_Print(); - return -1; - } - Py_DECREF(v); - return 0; -} - -static int -parse_syntax_error(PyObject *err, PyObject **message, const char **filename, - int *lineno, int *offset, const char **text) -{ - long hold; - PyObject *v; - _Py_IDENTIFIER(msg); - _Py_IDENTIFIER(filename); - _Py_IDENTIFIER(lineno); - _Py_IDENTIFIER(offset); - _Py_IDENTIFIER(text); - - /* old style errors */ - if (PyTuple_Check(err)) - return PyArg_ParseTuple(err, "O(ziiz)", message, filename, - lineno, offset, text); - - /* new style errors. `err' is an instance */ - - if (! (v = _PyObject_GetAttrId(err, &PyId_msg))) - goto finally; - *message = v; - - if (!(v = _PyObject_GetAttrId(err, &PyId_filename))) - goto finally; - if (v == Py_None) - *filename = NULL; - else if (! (*filename = _PyUnicode_AsString(v))) - goto finally; - - Py_DECREF(v); - if (!(v = _PyObject_GetAttrId(err, &PyId_lineno))) - goto finally; - hold = PyLong_AsLong(v); - Py_DECREF(v); - v = NULL; - if (hold < 0 && PyErr_Occurred()) - goto finally; - *lineno = (int)hold; - - if (!(v = _PyObject_GetAttrId(err, &PyId_offset))) - goto finally; - if (v == Py_None) { - *offset = -1; - Py_DECREF(v); - v = NULL; - } else { - hold = PyLong_AsLong(v); - Py_DECREF(v); - v = NULL; - if (hold < 0 && PyErr_Occurred()) - goto finally; - *offset = (int)hold; - } - - if (!(v = _PyObject_GetAttrId(err, &PyId_text))) - goto finally; - if (v == Py_None) - *text = NULL; - else if (!PyUnicode_Check(v) || - !(*text = _PyUnicode_AsString(v))) - goto finally; - Py_DECREF(v); - return 1; - -finally: - Py_XDECREF(v); - return 0; -} - -void -PyErr_Print(void) -{ - PyErr_PrintEx(1); -} - -static void -print_error_text(PyObject *f, int offset, const char *text) -{ - char *nl; - if (offset >= 0) { - if (offset > 0 && offset == strlen(text) && text[offset - 1] == '\n') - offset--; - for (;;) { - nl = strchr(text, '\n'); - if (nl == NULL || nl-text >= offset) - break; - offset -= (int)(nl+1-text); - text = nl+1; - } - while (*text == ' ' || *text == '\t') { - text++; - offset--; - } - } - PyFile_WriteString(" ", f); - PyFile_WriteString(text, f); - if (*text == '\0' || text[strlen(text)-1] != '\n') - PyFile_WriteString("\n", f); - if (offset == -1) - return; - PyFile_WriteString(" ", f); - while (--offset > 0) - PyFile_WriteString(" ", f); - PyFile_WriteString("^\n", f); -} - -static void -handle_system_exit(void) -{ - PyObject *exception, *value, *tb; - int exitcode = 0; - - if (Py_InspectFlag) - /* Don't exit if -i flag was given. This flag is set to 0 - * when entering interactive mode for inspecting. */ - return; - - PyErr_Fetch(&exception, &value, &tb); - fflush(stdout); - if (value == NULL || value == Py_None) - goto done; - if (PyExceptionInstance_Check(value)) { - /* The error code should be in the `code' attribute. */ - _Py_IDENTIFIER(code); - PyObject *code = _PyObject_GetAttrId(value, &PyId_code); - if (code) { - Py_DECREF(value); - value = code; - if (value == Py_None) - goto done; - } - /* If we failed to dig out the 'code' attribute, - just let the else clause below print the error. */ - } - if (PyLong_Check(value)) - exitcode = (int)PyLong_AsLong(value); - else { - PyObject *sys_stderr = PySys_GetObject("stderr"); - if (sys_stderr != NULL && sys_stderr != Py_None) { - PyFile_WriteObject(value, sys_stderr, Py_PRINT_RAW); - } else { - PyObject_Print(value, stderr, Py_PRINT_RAW); - fflush(stderr); - } - PySys_WriteStderr("\n"); - exitcode = 1; - } - done: - /* Restore and clear the exception info, in order to properly decref - * the exception, value, and traceback. If we just exit instead, - * these leak, which confuses PYTHONDUMPREFS output, and may prevent - * some finalizers from running. - */ - PyErr_Restore(exception, value, tb); - PyErr_Clear(); - Py_Exit(exitcode); - /* NOTREACHED */ -} - -void -PyErr_PrintEx(int set_sys_last_vars) -{ - PyObject *exception, *v, *tb, *hook; - - if (PyErr_ExceptionMatches(PyExc_SystemExit)) { - handle_system_exit(); - } - PyErr_Fetch(&exception, &v, &tb); - if (exception == NULL) - return; - PyErr_NormalizeException(&exception, &v, &tb); - if (tb == NULL) { - tb = Py_None; - Py_INCREF(tb); - } - PyException_SetTraceback(v, tb); - if (exception == NULL) - return; - /* Now we know v != NULL too */ - if (set_sys_last_vars) { - PySys_SetObject("last_type", exception); - PySys_SetObject("last_value", v); - PySys_SetObject("last_traceback", tb); - } - hook = PySys_GetObject("excepthook"); - if (hook) { - PyObject *args = PyTuple_Pack(3, exception, v, tb); - PyObject *result = PyEval_CallObject(hook, args); - if (result == NULL) { - PyObject *exception2, *v2, *tb2; - if (PyErr_ExceptionMatches(PyExc_SystemExit)) { - handle_system_exit(); - } - PyErr_Fetch(&exception2, &v2, &tb2); - PyErr_NormalizeException(&exception2, &v2, &tb2); - /* It should not be possible for exception2 or v2 - to be NULL. However PyErr_Display() can't - tolerate NULLs, so just be safe. */ - if (exception2 == NULL) { - exception2 = Py_None; - Py_INCREF(exception2); - } - if (v2 == NULL) { - v2 = Py_None; - Py_INCREF(v2); - } - fflush(stdout); - PySys_WriteStderr("Error in sys.excepthook:\n"); - PyErr_Display(exception2, v2, tb2); - PySys_WriteStderr("\nOriginal exception was:\n"); - PyErr_Display(exception, v, tb); - Py_DECREF(exception2); - Py_DECREF(v2); - Py_XDECREF(tb2); - } - Py_XDECREF(result); - Py_XDECREF(args); - } else { - PySys_WriteStderr("sys.excepthook is missing\n"); - PyErr_Display(exception, v, tb); - } - Py_XDECREF(exception); - Py_XDECREF(v); - Py_XDECREF(tb); -} - -static void -print_exception(PyObject *f, PyObject *value) -{ - int err = 0; - PyObject *type, *tb; - _Py_IDENTIFIER(print_file_and_line); - - if (!PyExceptionInstance_Check(value)) { - PyFile_WriteString("TypeError: print_exception(): Exception expected for value, ", f); - PyFile_WriteString(Py_TYPE(value)->tp_name, f); - PyFile_WriteString(" found\n", f); - return; - } - - Py_INCREF(value); - fflush(stdout); - type = (PyObject *) Py_TYPE(value); - tb = PyException_GetTraceback(value); - if (tb && tb != Py_None) - err = PyTraceBack_Print(tb, f); - if (err == 0 && - _PyObject_HasAttrId(value, &PyId_print_file_and_line)) - { - PyObject *message; - const char *filename, *text; - int lineno, offset; - if (!parse_syntax_error(value, &message, &filename, - &lineno, &offset, &text)) - PyErr_Clear(); - else { - char buf[10]; - PyFile_WriteString(" File \"", f); - if (filename == NULL) - PyFile_WriteString("<string>", f); - else - PyFile_WriteString(filename, f); - PyFile_WriteString("\", line ", f); - PyOS_snprintf(buf, sizeof(buf), "%d", lineno); - PyFile_WriteString(buf, f); - PyFile_WriteString("\n", f); - if (text != NULL) - print_error_text(f, offset, text); - Py_DECREF(value); - value = message; - /* Can't be bothered to check all those - PyFile_WriteString() calls */ - if (PyErr_Occurred()) - err = -1; - } - } - if (err) { - /* Don't do anything else */ - } - else { - PyObject* moduleName; - char* className; - _Py_IDENTIFIER(__module__); - assert(PyExceptionClass_Check(type)); - className = PyExceptionClass_Name(type); - if (className != NULL) { - char *dot = strrchr(className, '.'); - if (dot != NULL) - className = dot+1; - } - - moduleName = _PyObject_GetAttrId(type, &PyId___module__); - if (moduleName == NULL || !PyUnicode_Check(moduleName)) - { - Py_XDECREF(moduleName); - err = PyFile_WriteString("<unknown>", f); - } - else { - char* modstr = _PyUnicode_AsString(moduleName); - if (modstr && strcmp(modstr, "builtins")) - { - err = PyFile_WriteString(modstr, f); - err += PyFile_WriteString(".", f); - } - Py_DECREF(moduleName); - } - if (err == 0) { - if (className == NULL) - err = PyFile_WriteString("<unknown>", f); - else - err = PyFile_WriteString(className, f); - } - } - if (err == 0 && (value != Py_None)) { - PyObject *s = PyObject_Str(value); - /* only print colon if the str() of the - object is not the empty string - */ - if (s == NULL) - err = -1; - else if (!PyUnicode_Check(s) || - PyUnicode_GetLength(s) != 0) - err = PyFile_WriteString(": ", f); - if (err == 0) - err = PyFile_WriteObject(s, f, Py_PRINT_RAW); - Py_XDECREF(s); - } - /* try to write a newline in any case */ - err += PyFile_WriteString("\n", f); - Py_XDECREF(tb); - Py_DECREF(value); - /* If an error happened here, don't show it. - XXX This is wrong, but too many callers rely on this behavior. */ - if (err != 0) - PyErr_Clear(); -} - -static const char *cause_message = - "\nThe above exception was the direct cause " - "of the following exception:\n\n"; - -static const char *context_message = - "\nDuring handling of the above exception, " - "another exception occurred:\n\n"; - -static void -print_exception_recursive(PyObject *f, PyObject *value, PyObject *seen) -{ - int err = 0, res; - PyObject *cause, *context; - - if (seen != NULL) { - /* Exception chaining */ - if (PySet_Add(seen, value) == -1) - PyErr_Clear(); - else if (PyExceptionInstance_Check(value)) { - cause = PyException_GetCause(value); - context = PyException_GetContext(value); - if (cause) { - res = PySet_Contains(seen, cause); - if (res == -1) - PyErr_Clear(); - if (res == 0) { - print_exception_recursive( - f, cause, seen); - err |= PyFile_WriteString( - cause_message, f); - } - } - else if (context) { - res = PySet_Contains(seen, context); - if (res == -1) - PyErr_Clear(); - if (res == 0) { - print_exception_recursive( - f, context, seen); - err |= PyFile_WriteString( - context_message, f); - } - } - Py_XDECREF(context); - Py_XDECREF(cause); - } - } - print_exception(f, value); - if (err != 0) - PyErr_Clear(); -} - -void -PyErr_Display(PyObject *exception, PyObject *value, PyObject *tb) -{ - PyObject *seen; - PyObject *f = PySys_GetObject("stderr"); - if (f == Py_None) { - /* pass */ - } - else if (f == NULL) { - _PyObject_Dump(value); - fprintf(stderr, "lost sys.stderr\n"); - } - else { - /* We choose to ignore seen being possibly NULL, and report - at least the main exception (it could be a MemoryError). - */ - seen = PySet_New(NULL); - if (seen == NULL) - PyErr_Clear(); - print_exception_recursive(f, value, seen); - Py_XDECREF(seen); - } -} - -PyObject * -PyRun_StringFlags(const char *str, int start, PyObject *globals, - PyObject *locals, PyCompilerFlags *flags) -{ - PyObject *ret = NULL; - mod_ty mod; - PyArena *arena = PyArena_New(); - if (arena == NULL) - return NULL; - - mod = PyParser_ASTFromString(str, "<string>", start, flags, arena); - if (mod != NULL) - ret = run_mod(mod, "<string>", globals, locals, flags, arena); - PyArena_Free(arena); - return ret; -} - -PyObject * -PyRun_FileExFlags(FILE *fp, const char *filename, int start, PyObject *globals, - PyObject *locals, int closeit, PyCompilerFlags *flags) -{ - PyObject *ret; - mod_ty mod; - PyArena *arena = PyArena_New(); - if (arena == NULL) - return NULL; - - mod = PyParser_ASTFromFile(fp, filename, NULL, start, 0, 0, - flags, NULL, arena); - if (closeit) - fclose(fp); - if (mod == NULL) { - PyArena_Free(arena); - return NULL; - } - ret = run_mod(mod, filename, globals, locals, flags, arena); - PyArena_Free(arena); - return ret; -} - -static void -flush_io(void) -{ - PyObject *f, *r; - PyObject *type, *value, *traceback; - _Py_IDENTIFIER(flush); - - /* Save the current exception */ - PyErr_Fetch(&type, &value, &traceback); - - f = PySys_GetObject("stderr"); - if (f != NULL) { - r = _PyObject_CallMethodId(f, &PyId_flush, ""); - if (r) - Py_DECREF(r); - else - PyErr_Clear(); - } - f = PySys_GetObject("stdout"); - if (f != NULL) { - r = _PyObject_CallMethodId(f, &PyId_flush, ""); - if (r) - Py_DECREF(r); - else - PyErr_Clear(); - } - - PyErr_Restore(type, value, traceback); -} - -static PyObject * -run_mod(mod_ty mod, const char *filename, PyObject *globals, PyObject *locals, - PyCompilerFlags *flags, PyArena *arena) -{ - PyCodeObject *co; - PyObject *v; - co = PyAST_Compile(mod, filename, flags, arena); - if (co == NULL) - return NULL; - v = PyEval_EvalCode((PyObject*)co, globals, locals); - Py_DECREF(co); - return v; -} - -static PyObject * -run_pyc_file(FILE *fp, const char *filename, PyObject *globals, - PyObject *locals, PyCompilerFlags *flags) -{ - PyCodeObject *co; - PyObject *v; - long magic; - long PyImport_GetMagicNumber(void); - - magic = PyMarshal_ReadLongFromFile(fp); - if (magic != PyImport_GetMagicNumber()) { - PyErr_SetString(PyExc_RuntimeError, - "Bad magic number in .pyc file"); - return NULL; - } - (void) PyMarshal_ReadLongFromFile(fp); - v = PyMarshal_ReadLastObjectFromFile(fp); - fclose(fp); - if (v == NULL || !PyCode_Check(v)) { - Py_XDECREF(v); - PyErr_SetString(PyExc_RuntimeError, - "Bad code object in .pyc file"); - return NULL; - } - co = (PyCodeObject *)v; - v = PyEval_EvalCode((PyObject*)co, globals, locals); - if (v && flags) - flags->cf_flags |= (co->co_flags & PyCF_MASK); - Py_DECREF(co); - return v; -} - -PyObject * -Py_CompileStringExFlags(const char *str, const char *filename, int start, - PyCompilerFlags *flags, int optimize) -{ - PyCodeObject *co; - mod_ty mod; - PyArena *arena = PyArena_New(); - if (arena == NULL) - return NULL; - - mod = PyParser_ASTFromString(str, filename, start, flags, arena); - if (mod == NULL) { - PyArena_Free(arena); - return NULL; - } - if (flags && (flags->cf_flags & PyCF_ONLY_AST)) { - PyObject *result = PyAST_mod2obj(mod); - PyArena_Free(arena); - return result; - } - co = PyAST_CompileEx(mod, filename, flags, optimize, arena); - PyArena_Free(arena); - return (PyObject *)co; -} - -/* For use in Py_LIMITED_API */ -#undef Py_CompileString -PyObject * -PyCompileString(const char *str, const char *filename, int start) -{ - return Py_CompileStringFlags(str, filename, start, NULL); -} - -struct symtable * -Py_SymtableString(const char *str, const char *filename, int start) -{ - struct symtable *st; - mod_ty mod; - PyCompilerFlags flags; - PyArena *arena = PyArena_New(); - if (arena == NULL) - return NULL; - - flags.cf_flags = 0; - mod = PyParser_ASTFromString(str, filename, start, &flags, arena); - if (mod == NULL) { - PyArena_Free(arena); - return NULL; - } - st = PySymtable_Build(mod, filename, 0); - PyArena_Free(arena); - return st; -} - -/* Preferred access to parser is through AST. */ -mod_ty -PyParser_ASTFromString(const char *s, const char *filename, int start, - PyCompilerFlags *flags, PyArena *arena) -{ - mod_ty mod; - PyCompilerFlags localflags; - perrdetail err; - int iflags = PARSER_FLAGS(flags); - - node *n = PyParser_ParseStringFlagsFilenameEx(s, filename, - &_PyParser_Grammar, start, &err, - &iflags); - if (flags == NULL) { - localflags.cf_flags = 0; - flags = &localflags; - } - if (n) { - flags->cf_flags |= iflags & PyCF_MASK; - mod = PyAST_FromNode(n, flags, filename, arena); - PyNode_Free(n); - } - else { - err_input(&err); - mod = NULL; - } - err_free(&err); - return mod; -} - -mod_ty -PyParser_ASTFromFile(FILE *fp, const char *filename, const char* enc, - int start, char *ps1, - char *ps2, PyCompilerFlags *flags, int *errcode, - PyArena *arena) -{ - mod_ty mod; - PyCompilerFlags localflags; - perrdetail err; - int iflags = PARSER_FLAGS(flags); - - node *n = PyParser_ParseFileFlagsEx(fp, filename, enc, - &_PyParser_Grammar, - start, ps1, ps2, &err, &iflags); - if (flags == NULL) { - localflags.cf_flags = 0; - flags = &localflags; - } - if (n) { - flags->cf_flags |= iflags & PyCF_MASK; - mod = PyAST_FromNode(n, flags, filename, arena); - PyNode_Free(n); - } - else { - err_input(&err); - if (errcode) - *errcode = err.error; - mod = NULL; - } - err_free(&err); - return mod; -} - -/* Simplified interface to parsefile -- return node or set exception */ - -node * -PyParser_SimpleParseFileFlags(FILE *fp, const char *filename, int start, int flags) -{ - perrdetail err; - node *n = PyParser_ParseFileFlags(fp, filename, NULL, - &_PyParser_Grammar, - start, NULL, NULL, &err, flags); - if (n == NULL) - err_input(&err); - err_free(&err); - - return n; -} - -/* Simplified interface to parsestring -- return node or set exception */ - -node * -PyParser_SimpleParseStringFlags(const char *str, int start, int flags) -{ - perrdetail err; - node *n = PyParser_ParseStringFlags(str, &_PyParser_Grammar, - start, &err, flags); - if (n == NULL) - err_input(&err); - err_free(&err); - return n; -} - -node * -PyParser_SimpleParseStringFlagsFilename(const char *str, const char *filename, - int start, int flags) -{ - perrdetail err; - node *n = PyParser_ParseStringFlagsFilename(str, filename, - &_PyParser_Grammar, start, &err, flags); - if (n == NULL) - err_input(&err); - err_free(&err); - return n; -} - -node * -PyParser_SimpleParseStringFilename(const char *str, const char *filename, int start) -{ - return PyParser_SimpleParseStringFlagsFilename(str, filename, start, 0); -} - -/* May want to move a more generalized form of this to parsetok.c or - even parser modules. */ - -void -PyParser_ClearError(perrdetail *err) -{ - err_free(err); -} - -void -PyParser_SetError(perrdetail *err) -{ - err_input(err); -} - -static void -err_free(perrdetail *err) -{ - Py_CLEAR(err->filename); -} - -/* Set the error appropriate to the given input error code (see errcode.h) */ - -static void -err_input(perrdetail *err) -{ - PyObject *v, *w, *errtype, *errtext; - PyObject *msg_obj = NULL; - char *msg = NULL; - - errtype = PyExc_SyntaxError; - switch (err->error) { - case E_ERROR: - return; - case E_SYNTAX: - errtype = PyExc_IndentationError; - if (err->expected == INDENT) - msg = "expected an indented block"; - else if (err->token == INDENT) - msg = "unexpected indent"; - else if (err->token == DEDENT) - msg = "unexpected unindent"; - else { - errtype = PyExc_SyntaxError; - msg = "invalid syntax"; - } - break; - case E_TOKEN: - msg = "invalid token"; - break; - case E_EOFS: - msg = "EOF while scanning triple-quoted string literal"; - break; - case E_EOLS: - msg = "EOL while scanning string literal"; - break; - case E_INTR: - if (!PyErr_Occurred()) - PyErr_SetNone(PyExc_KeyboardInterrupt); - goto cleanup; - case E_NOMEM: - PyErr_NoMemory(); - goto cleanup; - case E_EOF: - msg = "unexpected EOF while parsing"; - break; - case E_TABSPACE: - errtype = PyExc_TabError; - msg = "inconsistent use of tabs and spaces in indentation"; - break; - case E_OVERFLOW: - msg = "expression too long"; - break; - case E_DEDENT: - errtype = PyExc_IndentationError; - msg = "unindent does not match any outer indentation level"; - break; - case E_TOODEEP: - errtype = PyExc_IndentationError; - msg = "too many levels of indentation"; - break; - case E_DECODE: { - PyObject *type, *value, *tb; - PyErr_Fetch(&type, &value, &tb); - msg = "unknown decode error"; - if (value != NULL) - msg_obj = PyObject_Str(value); - Py_XDECREF(type); - Py_XDECREF(value); - Py_XDECREF(tb); - break; - } - case E_LINECONT: - msg = "unexpected character after line continuation character"; - break; - - case E_IDENTIFIER: - msg = "invalid character in identifier"; - break; - default: - fprintf(stderr, "error=%d\n", err->error); - msg = "unknown parsing error"; - break; - } - /* err->text may not be UTF-8 in case of decoding errors. - Explicitly convert to an object. */ - if (!err->text) { - errtext = Py_None; - Py_INCREF(Py_None); - } else { - errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text), - "replace"); - } - v = Py_BuildValue("(OiiN)", err->filename, - err->lineno, err->offset, errtext); - if (v != NULL) { - if (msg_obj) - w = Py_BuildValue("(OO)", msg_obj, v); - else - w = Py_BuildValue("(sO)", msg, v); - } else - w = NULL; - Py_XDECREF(v); - PyErr_SetObject(errtype, w); - Py_XDECREF(w); -cleanup: - Py_XDECREF(msg_obj); - if (err->text != NULL) { - PyObject_FREE(err->text); - err->text = NULL; - } -} - -/* Print fatal error message and abort */ - -void -Py_FatalError(const char *msg) -{ - const int fd = fileno(stderr); - PyThreadState *tstate; - - fprintf(stderr, "Fatal Python error: %s\n", msg); - fflush(stderr); /* it helps in Windows debug build */ - if (PyErr_Occurred()) { - PyErr_PrintEx(0); - } - else { - tstate = _Py_atomic_load_relaxed(&_PyThreadState_Current); - if (tstate != NULL) { - fputc('\n', stderr); - fflush(stderr); - _Py_DumpTracebackThreads(fd, tstate->interp, tstate); - } - _PyFaulthandler_Fini(); - } - - abort(); -} - -/* Clean up and exit */ - -#ifdef WITH_THREAD -#include "pythread.h" -#endif - -static void (*pyexitfunc)(void) = NULL; -/* For the atexit module. */ -void _Py_PyAtExit(void (*func)(void)) -{ - pyexitfunc = func; -} - -static void -call_py_exitfuncs(void) -{ - if (pyexitfunc == NULL) - return; - - (*pyexitfunc)(); - PyErr_Clear(); -} - -/* Wait until threading._shutdown completes, provided - the threading module was imported in the first place. - The shutdown routine will wait until all non-daemon - "threading" threads have completed. */ -static void -wait_for_thread_shutdown(void) -{ -#ifdef WITH_THREAD - _Py_IDENTIFIER(_shutdown); - PyObject *result; - PyThreadState *tstate = PyThreadState_GET(); - PyObject *threading = PyMapping_GetItemString(tstate->interp->modules, - "threading"); - if (threading == NULL) { - /* threading not imported */ - PyErr_Clear(); - return; - } - result = _PyObject_CallMethodId(threading, &PyId__shutdown, ""); - if (result == NULL) { - PyErr_WriteUnraisable(threading); - } - else { - Py_DECREF(result); - } - Py_DECREF(threading); -#endif -} - -#define NEXITFUNCS 32 -static void (*exitfuncs[NEXITFUNCS])(void); -static int nexitfuncs = 0; - -int Py_AtExit(void (*func)(void)) -{ - if (nexitfuncs >= NEXITFUNCS) - return -1; - exitfuncs[nexitfuncs++] = func; - return 0; -} - -static void -call_ll_exitfuncs(void) -{ - while (nexitfuncs > 0) - (*exitfuncs[--nexitfuncs])(); - - fflush(stdout); - fflush(stderr); -} - -void -Py_Exit(int sts) -{ - Py_Finalize(); - - exit(sts); -} - -static void -initsigs(void) -{ -#ifdef SIGPIPE - PyOS_setsig(SIGPIPE, SIG_IGN); -#endif -#ifdef SIGXFZ - PyOS_setsig(SIGXFZ, SIG_IGN); -#endif -#ifdef SIGXFSZ - PyOS_setsig(SIGXFSZ, SIG_IGN); -#endif - PyOS_InitInterrupts(); /* May imply initsignal() */ -} - - -/* Restore signals that the interpreter has called SIG_IGN on to SIG_DFL. - * - * All of the code in this function must only use async-signal-safe functions, - * listed at `man 7 signal` or - * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html. - */ -void -_Py_RestoreSignals(void) -{ -#ifdef SIGPIPE - PyOS_setsig(SIGPIPE, SIG_DFL); -#endif -#ifdef SIGXFZ - PyOS_setsig(SIGXFZ, SIG_DFL); -#endif -#ifdef SIGXFSZ - PyOS_setsig(SIGXFSZ, SIG_DFL); -#endif -} - - -/* - * The file descriptor fd is considered ``interactive'' if either - * a) isatty(fd) is TRUE, or - * b) the -i flag was given, and the filename associated with - * the descriptor is NULL or "<stdin>" or "???". - */ -int -Py_FdIsInteractive(FILE *fp, const char *filename) -{ - if (isatty((int)fileno(fp))) - return 1; - if (!Py_InteractiveFlag) - return 0; - return (filename == NULL) || - (strcmp(filename, "<stdin>") == 0) || - (strcmp(filename, "???") == 0); -} - -/* Wrappers around sigaction() or signal(). */ - -PyOS_sighandler_t -PyOS_getsig(int sig) -{ -#ifdef HAVE_SIGACTION - struct sigaction context; - if (sigaction(sig, NULL, &context) == -1) - return SIG_ERR; - return context.sa_handler; -#else - PyOS_sighandler_t handler; - handler = signal(sig, SIG_IGN); - if (handler != SIG_ERR) - signal(sig, handler); - return handler; -#endif -} - -/* - * All of the code in this function must only use async-signal-safe functions, - * listed at `man 7 signal` or - * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html. - */ -PyOS_sighandler_t -PyOS_setsig(int sig, PyOS_sighandler_t handler) -{ -#ifdef HAVE_SIGACTION - /* Some code in Modules/signalmodule.c depends on sigaction() being - * used here if HAVE_SIGACTION is defined. Fix that if this code - * changes to invalidate that assumption. - */ - struct sigaction context, ocontext; - context.sa_handler = handler; - sigemptyset(&context.sa_mask); - context.sa_flags = 0; - if (sigaction(sig, &context, &ocontext) == -1) - return SIG_ERR; - return ocontext.sa_handler; -#else - PyOS_sighandler_t oldhandler; - oldhandler = signal(sig, handler); -#ifdef HAVE_SIGINTERRUPT - siginterrupt(sig, 1); -#endif - return oldhandler; -#endif -} -
--- a/cos/utils/bin2c.py Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ -#!/usr/bin/python - -import sys -print(sys.argv) -if len(sys.argv) < 2: - print('Usage: {0} binfile [headerfile]'.format(sys.argv[0])) - sys.exit(-1) - -with open(sys.argv[1], 'rb') as f: - data = f.read() - -s = ', '.join(hex(b) for b in data) -output = 'unsigned char data[] = {{{0}}};'.format(s) -if len(sys.argv) < 3: - print(output) -else: - with open(sys.argv[2], 'w') as f: - f.write(output) -
--- a/cos/utils/makeinitrd.py Fri Jul 05 13:26:51 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,34 +0,0 @@ -#!/usr/bin/python - -# Script for generation of an initial ramdisk that contains -# the first programs to run. - -import struct -import sys - -if len(sys.argv) < 3: - print('Usage: {0} output file1 [file2] ...'.format(sys.argv[0])) - sys.exit(1) - -outputfilename = sys.argv[1] -inputFileNames = sys.argv[2:] - -with open(outputfilename, 'wb') as outputFile: - # Amount of files: - outputFile.write(struct.pack('<I', 0x1337babe)) - outputFile.write(struct.pack('<I', len(inputFileNames))) - for inputFileName in inputFileNames: - # Magic: - outputFile.write(struct.pack('<I', 0xcafebabe)) - - # Filename: - fn = inputFileName.encode('ascii') - outputFile.write(struct.pack('<I', len(fn))) - outputFile.write(fn) - - # Data: - with open(inputFileName, 'rb') as inputFile: - data = inputFile.read() - outputFile.write(struct.pack('<I', len(data))) - outputFile.write(data) -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/bin2c.py Fri Jul 05 14:13:59 2013 +0200 @@ -0,0 +1,19 @@ +#!/usr/bin/python + +import sys +print(sys.argv) +if len(sys.argv) < 2: + print('Usage: {0} binfile [headerfile]'.format(sys.argv[0])) + sys.exit(-1) + +with open(sys.argv[1], 'rb') as f: + data = f.read() + +s = ', '.join(hex(b) for b in data) +output = 'unsigned char data[] = {{{0}}};'.format(s) +if len(sys.argv) < 3: + print(output) +else: + with open(sys.argv[2], 'w') as f: + f.write(output) +
--- a/python/c3/analyse.py Fri Jul 05 13:26:51 2013 +0200 +++ b/python/c3/analyse.py Fri Jul 05 14:13:59 2013 +0200 @@ -44,6 +44,7 @@ # Create subscope: if type(sym) in [Package, Function]: self.curScope = Scope(self.curScope) + sym.innerScope = self.curScope def quitScope(self, sym): # Pop out of scope:
--- a/python/c3/astnodes.py Fri Jul 05 13:26:51 2013 +0200 +++ b/python/c3/astnodes.py Fri Jul 05 14:13:59 2013 +0200 @@ -98,7 +98,7 @@ self.isReadOnly = False self.isParameter = False def __repr__(self): - return '{}'.format(self.name) + return 'Var {}'.format(self.name) # Procedure types class Function(Symbol): @@ -109,7 +109,7 @@ self.declarations = [] def __repr__(self): - return '{}'.format(self.name) + return 'Func {}'.format(self.name) # Operations / Expressions: class Unop(Node):
--- a/python/c3/codegenerator.py Fri Jul 05 13:26:51 2013 +0200 +++ b/python/c3/codegenerator.py Fri Jul 05 14:13:59 2013 +0200 @@ -3,25 +3,24 @@ from .scope import boolType class CodeGenerator: - """ Generates intermediate code from a package """ - def gencode(self, pkg): - assert type(pkg) is astnodes.Package - self.varMap = {} # Maps variables to storage locations. - self.funcMap = {} - self.builder = ir.Builder() - m = ir.Module(pkg.name) - self.builder.setModule(m) - self.genModule(pkg) - return m + """ Generates intermediate code from a package """ + def gencode(self, pkg): + assert type(pkg) is astnodes.Package + self.varMap = {} # Maps variables to storage locations. + self.funcMap = {} + self.builder = ir.Builder() + m = ir.Module(pkg.name) + self.builder.setModule(m) + self.genModule(pkg) + return m - # inner helpers: - def genModule(self, pkg): + # inner helpers: + def genModule(self, pkg): # Take care of forward declarations: - for s in pkg.scope: - if type(s) is astnodes.Function: + for s in pkg.innerScope.Functions: f = self.builder.newFunction(s.name) self.funcMap[s] = f - for s in pkg.scope: + for s in pkg.innerScope: if type(s) is astnodes.Variable: v = self.builder.newVariable(s.name) #self.builder.addIns(ir.Alloc(v)) @@ -35,7 +34,7 @@ self.builder.setBB(bb) # generate room for locals: - for sym in s.scope: + for sym in s.innerScope: #print(sym, sym.isParameter) # TODO: handle parameters different v = self.builder.newTmp(sym.name) @@ -49,7 +48,7 @@ else: print(s) - def genCode(self, code): + def genCode(self, code): if type(code) is astnodes.CompoundStatement: for s in code.statements: self.genCode(s) @@ -88,12 +87,12 @@ self.builder.setBB(bbtest) self.genCondCode(code.condition, bbdo, te) self.builder.setBB(bbdo) - self.genCode(code.dostatement) + self.genCode(code.statement) self.builder.addIns(ir.Branch(bbtest)) self.builder.setBB(te) else: print('Unknown stmt:', code) - def genCondCode(self, expr, bbtrue, bbfalse): + def genCondCode(self, expr, bbtrue, bbfalse): # Implement sequential logical operators assert expr.typ == boolType if type(expr) is astnodes.Binop: @@ -121,7 +120,7 @@ self.builder.addIns(ir.Branch(bbfalse)) else: print('Unknown cond', expr) - def genExprCode(self, expr): + def genExprCode(self, expr): if type(expr) is astnodes.Binop: ra = self.genExprCode(expr.a) rb = self.genExprCode(expr.b)
--- a/python/c3/scope.py Fri Jul 05 13:26:51 2013 +0200 +++ b/python/c3/scope.py Fri Jul 05 14:13:59 2013 +0200 @@ -1,42 +1,53 @@ from . import astnodes class Scope: - """ A scope contains all symbols in a scope """ - def __init__(self, parent=None): - self.symbols = {} - self.parent = parent - def __iter__(self): + """ A scope contains all symbols in a scope """ + def __init__(self, parent=None): + self.symbols = {} + self.parent = parent + + def __iter__(self): # Iterate in a deterministic manner: return iter(self.Constants + self.Variables + self.Functions) - @property - def Syms(self): - syms = self.symbols.values() - return sorted(syms, key=lambda v: v.name) - @property - def Constants(self): - return [s for s in self.Syms if type(s) is astnodes.Constant] - @property - def Variables(self): - return [s for s in self.Syms if type(s) is astnodes.Variable] - @property - def Functions(self): - return [s for s in self.Syms if type(s) is astnodes.Function] - def getSymbol(self, name): + + @property + def Syms(self): + syms = self.symbols.values() + return sorted(syms, key=lambda v: v.name) + + @property + def Constants(self): + return [s for s in self.Syms if type(s) is astnodes.Constant] + + @property + def Variables(self): + return [s for s in self.Syms if type(s) is astnodes.Variable] + + @property + def Functions(self): + return [s for s in self.Syms if type(s) is astnodes.Function] + + def getSymbol(self, name): if name in self.symbols: return self.symbols[name] # Look for symbol: if self.parent: return self.parent.getSymbol(name) raise CompilerException("Symbol {0} not found".format(name), name.loc) - def hasSymbol(self, name): + + def hasSymbol(self, name): if name in self.symbols: return True if self.parent: return self.parent.hasSymbol(name) return False - def addSymbol(self, sym): + + def addSymbol(self, sym): self.symbols[sym.name] = sym + def __repr__(self): + return 'Scope with {} symbols'.format(len(self.symbols)) + # buildin types: intType = astnodes.BaseType('int') doubleType = astnodes.BaseType('double')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/makeinitrd.py Fri Jul 05 14:13:59 2013 +0200 @@ -0,0 +1,34 @@ +#!/usr/bin/python + +# Script for generation of an initial ramdisk that contains +# the first programs to run. + +import struct +import sys + +if len(sys.argv) < 3: + print('Usage: {0} output file1 [file2] ...'.format(sys.argv[0])) + sys.exit(1) + +outputfilename = sys.argv[1] +inputFileNames = sys.argv[2:] + +with open(outputfilename, 'wb') as outputFile: + # Amount of files: + outputFile.write(struct.pack('<I', 0x1337babe)) + outputFile.write(struct.pack('<I', len(inputFileNames))) + for inputFileName in inputFileNames: + # Magic: + outputFile.write(struct.pack('<I', 0xcafebabe)) + + # Filename: + fn = inputFileName.encode('ascii') + outputFile.write(struct.pack('<I', len(fn))) + outputFile.write(fn) + + # Data: + with open(inputFileName, 'rb') as inputFile: + data = inputFile.read() + outputFile.write(struct.pack('<I', len(data))) + outputFile.write(data) +
--- a/python/testc3.py Fri Jul 05 13:26:51 2013 +0200 +++ b/python/testc3.py Fri Jul 05 14:13:59 2013 +0200 @@ -109,6 +109,7 @@ if not ircode: self.diag.printErrors(snippet) self.assertTrue(ircode) + return ircode def testFunctArgs(self): snippet = """ @@ -271,7 +272,8 @@ } """ - self.expectOK(snippet) + ircode = self.expectOK(snippet) + self.assertEqual(1, len(ircode.Functions)) if __name__ == '__main__': unittest.main()