changeset 13:c05fac334ab1

compute size for objects
author Thinker K.F. Li <thinker@codemud.net>
date Wed, 25 May 2011 21:47:15 +0800
parents 8101024c942b
children f5728c6868b2
files paraspace/dexfile.py
diffstat 1 files changed, 322 insertions(+), 29 deletions(-) [+]
line wrap: on
line diff
--- a/paraspace/dexfile.py	Wed May 25 01:15:07 2011 +0800
+++ b/paraspace/dexfile.py	Wed May 25 21:47:15 2011 +0800
@@ -1,3 +1,5 @@
+import itertools
+        
 class _DEX_header(object):
     magic = None                # 0x00, 8 bytes
     checksum = None             # 0x08, 4 bytes
@@ -30,6 +32,8 @@
         'methodIdsSize methodIdsOff classDefsSize classDefsOff ' \
         'dataSize dataOff'.split()
 
+    data_size = 0x70
+
     def parse(self, data):
         self.magic = data[:8]
         self.checksum = data[8: 0x0c]
@@ -111,6 +115,26 @@
     return v, nbytes
 
 
+def _to_uleb128(v):
+    assert v >= 0
+    
+    data = ''
+    while True:
+        if v > 0x7f:
+            data = data + chr((v & 0x7f) | 0x80)
+        else:
+            data = data + chr(v & 0x7f)
+            break
+        v = v >> 7
+        pass
+
+    return data
+
+
+def _uleb128_sz(v):
+    return len(_to_uleb128(v))
+
+
 def _leb128(data):
     v, sh = _uleb128(data)
     if v & (1 << (sh * 7 - 1)):
@@ -119,6 +143,36 @@
     return v, sh
 
 
+def _to_leb128(v):
+    data = ''
+    while True:
+        if v > 0x3f or v < ~0x3f:
+            data = data + chr((v & 0x7f) | 0x80)
+        else:
+            data = data + chr(v & 0x7f)
+            break
+        v = v >> 7
+        pass
+    return data
+
+
+def _leb128_sz(v):
+    return len(_to_leb128(v))
+
+
+def _compute_sz(o):
+    if hasattr(o, 'compute_size'):
+        o.compute_size()
+        pass
+    return o.data_size
+
+
+def _sum_data_size(obj_list):
+    obj_sizes = itertools.imap(_compute_sz, obj_list)
+    total = sum(obj_sizes)
+    return total
+
+
 class _DEX_MapItem(object):
     type = None                 # 2 bytes
     unused = None               # 2 bytes
@@ -266,6 +320,14 @@
 
         self.data_size = sz
         pass
+
+    def compute_size(self):
+        self.data_size = \
+            _uleb128_sz(self.staticFieldsSize) + \
+            _uleb128_sz(self.instanceFieldsSize) + \
+            _uleb128_sz(self.directMethodsSize) + \
+            _uleb128_sz(self.virtualMethodsSize)
+        pass
     pass
 
 
@@ -284,6 +346,12 @@
 
         self.data_size = sz
         pass
+
+    def compute_size(self):
+        self.data_size = \
+            _uleb128_sz(self.fieldIdx) + \
+            _uleb128_sz(self.accessFlags)
+        pass
     pass
 
 
@@ -308,6 +376,13 @@
 
         self.data_size = sz
         pass
+
+    def compute_size(self):
+        self.data_size = \
+            _uleb128_sz(self.methodIdx) + \
+            _uleb128_sz(self.accessFlags) + \
+            _uleb128_sz(self.codeOff)
+        pass
     pass
 
 class _DEX_ClassData(object):
@@ -320,17 +395,17 @@
     data_size = None
 
     def parse(self, data, off):
+        moff = man_off(off)
+        
         header = _DEX_ClassDataHeader()
-        header.parse(data, off)
+        header.parse(data, moff())
         self.header = header
-        
-        cur_off = [off + header.data_size]
+        moff(header.data_size)
         
         def parse_field():
             field = _DEX_Field()
-            off = cur_off[0]
-            field.parse(data, off)
-            cur_off[0] = cur_off[0] + field.data_size
+            field.parse(data, moff())
+            moff(field.data_size)
             
             #
             # field index depends previous one to reduce size
@@ -342,9 +417,8 @@
 
         def parse_method():
             method = _DEX_Method()
-            off = cur_off[0]
-            method.parse(data, off)
-            cur_off[0] = cur_off[0] + method.data_size
+            method.parse(data, moff())
+            moff(method.data_size)
 
             #
             # method index depends previous one to reduce size
@@ -367,7 +441,17 @@
         self.virtualMethods = [parse_method()
                                for i in range(header.virtualMethodsSize)]
 
-        self.data_size = cur_off[0] - off
+        self.data_size = moff() - off
+        pass
+
+    def compute_size(self):
+        sz = self.header.data_size
+        sz = sz + _sum_data_size(itertools.chain(self.staticFields,
+                                                 self.instanceFields,
+                                                 self.directMethods,
+                                                 self.virtualMethods))
+        
+        self.data_size = sz
         pass
     pass
 
@@ -405,6 +489,12 @@
         self.typeItems = typeItems
         self.data_size = moff() - off
         pass
+
+    def compute_size(self):
+        size = 4 + _sum_data_size(self.typeItems)
+
+        self.data_size = size
+        pass
     pass
 
 
@@ -414,19 +504,31 @@
     data_size = None
 
     def parse(self, num, data, off):
+        moff = man_off(off)
+        
         def parse():
-            off = (cur_off[0] + 3) & ~0x3 # type list must aligned for 4 bytes
+            moff.off = (moff(0) + 3) & ~0x3 # aligned for 4 bytes
             typeList = _DEX_TypeList()
-            typeList.parse(data, off)
-            cur_off[0] = off + typeList.data_size
+            typeList.parse(data, moff())
+            moff(typeList.data_size)
             
             return typeList
         
-        cur_off = [off]
         typeLists = [parse() for i in range(num)]
 
         self.typeLists = typeLists
-        self.data_size = cur_off[0] - off
+        self.data_size = moff(0) - off
+        pass
+
+    def compute_size(self):
+        def compute_align(prev, cur):
+            v = ((prev + 3) & ~0x3) + cur
+            return v
+
+        sizes = itertools.imap(_compute_sz, self.typeLists)
+        size = reduce(compute_align, sizes)
+        
+        self.data_size = size
         pass
     pass
 
@@ -456,13 +558,13 @@
     data_size = None
 
     def parse(self, data, off):
-        cur_off = off
-        self.typeIdx, sh = _uleb128(data[cur_off:cur_off + 5])
-        cur_off = cur_off + sh
-        self.address, sh = _uleb128(data[cur_off:cur_off + 5])
-        cur_off = cur_off + sh
+        moff = man_off(off)
+        self.typeIdx, sh = _uleb128(data[moff():moff() + 5])
+        moff(sh)
+        self.address, sh = _uleb128(data[moff():moff() + 5])
+        moff(sh)
 
-        self.data_size = cur_off - off
+        self.data_size = moff() - off
         pass
 
     def parse1(self, data, off):
@@ -470,6 +572,17 @@
         
         self.data_size = sh
         pass
+
+    def compute_size(self):
+        if self.typeIdx is not None:
+            size = _uleb128_sz(self.typeIdx)
+        else:
+            size = 0
+            pass
+        size = size + _uleb128_sz(self.address)
+        
+        self.data_size = size
+        pass
     pass
 
 
@@ -512,6 +625,18 @@
         
         self.data_size = moff() - off
         pass
+
+    def compute_size(self):
+        count = len(self.handlers)
+        if self.catchesAll:
+            count = -(count - 1)
+            pass
+        count_sz = _leb128_sz(count)
+
+        handlers_size = _sum_data_size(self.handlers)
+        
+        size = count_sz + handlers_size
+        pass
     pass
 
 
@@ -568,6 +693,28 @@
         moff.off = (moff() + 3) & ~0x3 # round code item to 4 bytes
         self.data_size = moff() - off
         pass
+
+    def compute_size(self):
+        size = 16 + self.insnsSize * 2
+        
+        if self.triesSize > 0:
+            size = (size + 0x3) & ~0x3
+
+            try_items_size = _sum_data_size(self.try_items)
+
+            catch_handler_items = self.catch_handler_items
+            catch_handler_items_cnt = len(catch_handler_items)
+            catch_handler_items_cnt_sz = _uleb128_sz(catch_handler_items_cnt)
+            catch_handler_items_sz = _sum_data_size(catch_handler_items)
+            catch_handler_items_size = \
+                catch_handler_items_cnt_sz + \
+                catch_handler_items_sz
+            
+            size = size + try_items_size + catch_handler_items_size
+            pass
+
+        self.data_size = (size + 3) & ~0x3
+        pass
     pass
 
 
@@ -608,6 +755,13 @@
 
         self.data_size = moff() - off
         pass
+
+    def compute_size(self):
+        annotations_size = _sum_data_size(self.annotations)
+        size = 4 + annotations_size
+
+        self.data_size = size
+        pass
     pass
 
 
@@ -699,6 +853,17 @@
         
         self.data_size = moff() - off
         pass
+
+    def compute_size(self):
+        field_anno_sz = _sum_data_size(self.fieldAnnotationsItems)
+        method_anno_sz = _sum_data_size(self.methodAnnotationsItems)
+        parameter_anno_sz = _sum_data_size(self.parameterAnnotationsItems)
+        
+        all_items_size = field_anno_sz + method_anno_sz + parameter_anno_sz
+        size = 16 + all_items_size
+        
+        self.data_size = size
+        pass
     pass
 
 
@@ -831,6 +996,52 @@
         
         self.data_size = moff() - off
         pass
+
+    def compute_size(self):
+        if self.nameIdx is not None:
+            nameIdx_size = _uleb128_sz(self.nameIdx)
+        else:
+            nameIdx_size = 0
+            pass
+        
+        valueType = self.valueType
+        width = valueType >> self.kDexAnnotationValueArgShift
+        
+        vtype = valueType & self.kDexAnnotationValueTypeMask
+
+        if vtype in (self.kDexAnnotationByte,
+                     self.kDexAnnotationShort,
+                     self.kDexAnnotationChar,
+                     self.kDexAnnotationInt,
+                     self.kDexAnnotationLong,
+                     self.kDexAnnotationFloat,
+                     self.kDexAnnotationDouble,
+                     self.kDexAnnotationString,
+                     self.kDexAnnotationType,
+                     self.kDexAnnotationMethod,
+                     self.kDexAnnotationField,
+                     self.kDexAnnotationEnum):
+            value_size = width + 2
+            pass
+        elif vtype in (self.kDexAnnotationBoolean,
+                       self.kDexAnnotationNull):
+            value_size = 1
+            pass
+        elif vtype == self.kDexAnnotationArray:
+            array_cnt = len(self.value)
+            array_cnt_size = _uleb128_sz(array_cnt)
+            array_size = _sum_data_size(self.value)
+            value_size = 1 + array_cnt_size + array_size
+            pass
+        elif vtype == self.kDexAnnotationAnnotation:
+            value_size = 1 + _compute_sz(self.value)
+            pass
+        else:
+            raise ValueError, \
+                'Bad annotation element value byte 0x02x' % (valueType)
+        
+        self.data_size = nameIdx_size + value_size
+        pass
     pass
 
 
@@ -876,6 +1087,23 @@
 
         self.data_size = moff() - off
         pass
+
+    def compute_size(self):
+        if self.visibility is not None:
+            visibility_size = 1
+        else:
+            visibility_size = 0
+            pass
+
+        typeIdx_size = _uleb128_sz(self.typeIdx)
+
+        members_cnt_size = _uleb128_sz(len(self.members))
+        members_size = members_cnt_size + _sum_data_size(self.members)
+
+        size = visibility_size + typeIdx_size + members_size
+        
+        self.data_size = size
+        pass
     pass
 
 
@@ -901,6 +1129,13 @@
 
         self.data_size = moff() - off
         pass
+
+    def compute_size(self):
+        elements_cnt_size = _uleb128_sz(len(self.elements))
+        size = elements_cnt_size + _sum_data_size(self.elements)
+
+        self.data_size = size
+        pass
     pass
 
 
@@ -960,7 +1195,7 @@
                 opcodes.append((opcode, adv))
                 pass
             elif opcode == self.DBG_ADVANCE_LINE:
-                adv, sh = _uleb128(data[moff():moff() + 5])
+                adv, sh = _leb128(data[moff():moff() + 5])
                 moff(sh)
                 opcodes.append((opcode, adv))
                 pass
@@ -1003,6 +1238,53 @@
         
         self.data_size = moff() - off
         pass
+
+    def compute_size(self):
+        start_line_size = _uleb128_sz(self.start_line)
+
+        parameters_cnt_size = _uleb128_sz(len(self.parameters))
+        parameter_sizes = itertools.imap(_uleb128_sz, self.parameters)
+        parameters_size = parameters_cnt_size + sum(parameter_sizes)
+
+        def compute_opcode_size(code):
+            opcode = code[0]
+            
+            if opcode == self.DBG_END_SEQUENCE:
+                size = 1
+            elif opcode == self.DBG_ADVANCE_PC:
+                size = 1 + _uleb128_sz(code[1])
+            elif opcode == self.DBG_ADVANCE_LINE:
+                size = 1 + _leb128_sz(code[1])
+            elif opcode in (self.DBG_START_LOCAL,
+                            self.DBG_START_LOCAL_EXTENDED):
+                size = 1 + _uleb128_sz(code[1]) + _uleb128_sz(code[2]) + \
+                    _uleb128_sz(code[3])
+                if len(code) == 5:
+                    size = size + _uleb128_sz(code[4])
+                    pass
+                pass
+            elif opcode == self.DBG_END_LOCAL:
+                size = 1 + _uleb128_sz(code[1])
+            elif opcode == self.DBG_RESTART_LOCAL:
+                size = 1 + _uleb128_sz(code[1])
+            elif opcode in (self.DBG_SET_PROLOGUE_END,
+                            self.DBG_SET_EPILOGUE_BEGIN,
+                            self.DBG_SET_FILE):
+                size = 1
+            else:
+                size = 1
+                pass
+            
+            return size
+
+        opcode_sizes = itertools.imap(compute_opcode_size, self.opcodes)
+        opcode_sizes = [i for i in opcode_sizes]
+        opcodes_size = sum(opcode_sizes)
+        
+        size = start_line_size + parameters_size + opcodes_size
+
+        self.data_size = size
+        pass
     pass
 
 
@@ -1418,35 +1700,46 @@
         pass
 
     print
-    print 'TypeLists size is %d bytes' % (dex._typeLists.data_size)
+    print 'TypeLists size is %d/%d bytes' % (dex._typeLists.data_size,
+                                             _compute_sz(dex._typeLists))
 
     bytes = sum([code.data_size for code in dex._codeItems])
+    rbytes = sum([_compute_sz(code) for code in dex._codeItems])
     print
-    print 'CodeItems size is %d bytes' % (bytes)
+    print 'CodeItems size is %d/%d bytes' % (bytes, rbytes)
     
     bytes = sum([annoset.data_size for annoset in dex._annotationSetItems])
+    rbytes = sum([_compute_sz(annoset) for annoset in dex._annotationSetItems])
     print
-    print 'AnnotationSetItems size is %d bytes' % (bytes)
+    print 'AnnotationSetItems size is %d/%d bytes' % (bytes, rbytes)
     
     bytes = sum([annodir.data_size
                  for annodir in dex._annotationsDirectoryItems])
+    rbytes = sum([_compute_sz(annodir)
+                  for annodir in dex._annotationsDirectoryItems])
     print
-    print 'AnnotationsDirtoryItems size is %d bytes' % (bytes)
+    print 'AnnotationsDirtoryItems size is %d/%d bytes' % (bytes, rbytes)
     
     bytes = sum([annoitem.data_size
                  for annoitem in dex._annotationItems])
+    rbytes = sum([_compute_sz(annoitem)
+                  for annoitem in dex._annotationItems])
     print
-    print 'AnnotationItems size is %d bytes' % (bytes)
+    print 'AnnotationItems size is %d/%d bytes' % (bytes, rbytes)
     
     bytes = sum([encodeditem.data_size
                  for encodeditem in dex._encodedArrayItems])
+    rbytes = sum([_compute_sz(encodeditem)
+                  for encodeditem in dex._encodedArrayItems])
     print
-    print 'EncodedArrayItems size is %d bytes' % (bytes)
+    print 'EncodedArrayItems size is %d/%d bytes' % (bytes, rbytes)
     
     bytes = sum([debuginfoitem.data_size
                  for debuginfoitem in dex._debugInfoItems])
+    rbytes = sum([_compute_sz(debuginfoitem)
+                  for debuginfoitem in dex._debugInfoItems])
     print
-    print 'DebugInfoItems size is %d bytes' % (bytes)
+    print 'DebugInfoItems size is %d/%d bytes' % (bytes, rbytes)
     
     print
     print 'Data maps'