# HG changeset patch # User Thinker K.F. Li # Date 1306331235 -28800 # Node ID c05fac334ab113953ac740f325cf3339c8369ba6 # Parent 8101024c942b5d729f6e5facf3df29ccbd180865 compute size for objects diff -r 8101024c942b -r c05fac334ab1 paraspace/dexfile.py --- a/paraspace/dexfile.py Wed May 25 01:15:07 2011 +0800 +++ b/paraspace/dexfile.py Wed May 25 21:47:15 2011 +0800 @@ -1,3 +1,5 @@ +import itertools + class _DEX_header(object): magic = None # 0x00, 8 bytes checksum = None # 0x08, 4 bytes @@ -30,6 +32,8 @@ 'methodIdsSize methodIdsOff classDefsSize classDefsOff ' \ 'dataSize dataOff'.split() + data_size = 0x70 + def parse(self, data): self.magic = data[:8] self.checksum = data[8: 0x0c] @@ -111,6 +115,26 @@ return v, nbytes +def _to_uleb128(v): + assert v >= 0 + + data = '' + while True: + if v > 0x7f: + data = data + chr((v & 0x7f) | 0x80) + else: + data = data + chr(v & 0x7f) + break + v = v >> 7 + pass + + return data + + +def _uleb128_sz(v): + return len(_to_uleb128(v)) + + def _leb128(data): v, sh = _uleb128(data) if v & (1 << (sh * 7 - 1)): @@ -119,6 +143,36 @@ return v, sh +def _to_leb128(v): + data = '' + while True: + if v > 0x3f or v < ~0x3f: + data = data + chr((v & 0x7f) | 0x80) + else: + data = data + chr(v & 0x7f) + break + v = v >> 7 + pass + return data + + +def _leb128_sz(v): + return len(_to_leb128(v)) + + +def _compute_sz(o): + if hasattr(o, 'compute_size'): + o.compute_size() + pass + return o.data_size + + +def _sum_data_size(obj_list): + obj_sizes = itertools.imap(_compute_sz, obj_list) + total = sum(obj_sizes) + return total + + class _DEX_MapItem(object): type = None # 2 bytes unused = None # 2 bytes @@ -266,6 +320,14 @@ self.data_size = sz pass + + def compute_size(self): + self.data_size = \ + _uleb128_sz(self.staticFieldsSize) + \ + _uleb128_sz(self.instanceFieldsSize) + \ + _uleb128_sz(self.directMethodsSize) + \ + _uleb128_sz(self.virtualMethodsSize) + pass pass @@ -284,6 +346,12 @@ self.data_size = sz pass + + def compute_size(self): + self.data_size = \ + _uleb128_sz(self.fieldIdx) + \ + _uleb128_sz(self.accessFlags) + pass pass @@ -308,6 +376,13 @@ self.data_size = sz pass + + def compute_size(self): + self.data_size = \ + _uleb128_sz(self.methodIdx) + \ + _uleb128_sz(self.accessFlags) + \ + _uleb128_sz(self.codeOff) + pass pass class _DEX_ClassData(object): @@ -320,17 +395,17 @@ data_size = None def parse(self, data, off): + moff = man_off(off) + header = _DEX_ClassDataHeader() - header.parse(data, off) + header.parse(data, moff()) self.header = header - - cur_off = [off + header.data_size] + moff(header.data_size) def parse_field(): field = _DEX_Field() - off = cur_off[0] - field.parse(data, off) - cur_off[0] = cur_off[0] + field.data_size + field.parse(data, moff()) + moff(field.data_size) # # field index depends previous one to reduce size @@ -342,9 +417,8 @@ def parse_method(): method = _DEX_Method() - off = cur_off[0] - method.parse(data, off) - cur_off[0] = cur_off[0] + method.data_size + method.parse(data, moff()) + moff(method.data_size) # # method index depends previous one to reduce size @@ -367,7 +441,17 @@ self.virtualMethods = [parse_method() for i in range(header.virtualMethodsSize)] - self.data_size = cur_off[0] - off + self.data_size = moff() - off + pass + + def compute_size(self): + sz = self.header.data_size + sz = sz + _sum_data_size(itertools.chain(self.staticFields, + self.instanceFields, + self.directMethods, + self.virtualMethods)) + + self.data_size = sz pass pass @@ -405,6 +489,12 @@ self.typeItems = typeItems self.data_size = moff() - off pass + + def compute_size(self): + size = 4 + _sum_data_size(self.typeItems) + + self.data_size = size + pass pass @@ -414,19 +504,31 @@ data_size = None def parse(self, num, data, off): + moff = man_off(off) + def parse(): - off = (cur_off[0] + 3) & ~0x3 # type list must aligned for 4 bytes + moff.off = (moff(0) + 3) & ~0x3 # aligned for 4 bytes typeList = _DEX_TypeList() - typeList.parse(data, off) - cur_off[0] = off + typeList.data_size + typeList.parse(data, moff()) + moff(typeList.data_size) return typeList - cur_off = [off] typeLists = [parse() for i in range(num)] self.typeLists = typeLists - self.data_size = cur_off[0] - off + self.data_size = moff(0) - off + pass + + def compute_size(self): + def compute_align(prev, cur): + v = ((prev + 3) & ~0x3) + cur + return v + + sizes = itertools.imap(_compute_sz, self.typeLists) + size = reduce(compute_align, sizes) + + self.data_size = size pass pass @@ -456,13 +558,13 @@ data_size = None def parse(self, data, off): - cur_off = off - self.typeIdx, sh = _uleb128(data[cur_off:cur_off + 5]) - cur_off = cur_off + sh - self.address, sh = _uleb128(data[cur_off:cur_off + 5]) - cur_off = cur_off + sh + moff = man_off(off) + self.typeIdx, sh = _uleb128(data[moff():moff() + 5]) + moff(sh) + self.address, sh = _uleb128(data[moff():moff() + 5]) + moff(sh) - self.data_size = cur_off - off + self.data_size = moff() - off pass def parse1(self, data, off): @@ -470,6 +572,17 @@ self.data_size = sh pass + + def compute_size(self): + if self.typeIdx is not None: + size = _uleb128_sz(self.typeIdx) + else: + size = 0 + pass + size = size + _uleb128_sz(self.address) + + self.data_size = size + pass pass @@ -512,6 +625,18 @@ self.data_size = moff() - off pass + + def compute_size(self): + count = len(self.handlers) + if self.catchesAll: + count = -(count - 1) + pass + count_sz = _leb128_sz(count) + + handlers_size = _sum_data_size(self.handlers) + + size = count_sz + handlers_size + pass pass @@ -568,6 +693,28 @@ moff.off = (moff() + 3) & ~0x3 # round code item to 4 bytes self.data_size = moff() - off pass + + def compute_size(self): + size = 16 + self.insnsSize * 2 + + if self.triesSize > 0: + size = (size + 0x3) & ~0x3 + + try_items_size = _sum_data_size(self.try_items) + + catch_handler_items = self.catch_handler_items + catch_handler_items_cnt = len(catch_handler_items) + catch_handler_items_cnt_sz = _uleb128_sz(catch_handler_items_cnt) + catch_handler_items_sz = _sum_data_size(catch_handler_items) + catch_handler_items_size = \ + catch_handler_items_cnt_sz + \ + catch_handler_items_sz + + size = size + try_items_size + catch_handler_items_size + pass + + self.data_size = (size + 3) & ~0x3 + pass pass @@ -608,6 +755,13 @@ self.data_size = moff() - off pass + + def compute_size(self): + annotations_size = _sum_data_size(self.annotations) + size = 4 + annotations_size + + self.data_size = size + pass pass @@ -699,6 +853,17 @@ self.data_size = moff() - off pass + + def compute_size(self): + field_anno_sz = _sum_data_size(self.fieldAnnotationsItems) + method_anno_sz = _sum_data_size(self.methodAnnotationsItems) + parameter_anno_sz = _sum_data_size(self.parameterAnnotationsItems) + + all_items_size = field_anno_sz + method_anno_sz + parameter_anno_sz + size = 16 + all_items_size + + self.data_size = size + pass pass @@ -831,6 +996,52 @@ self.data_size = moff() - off pass + + def compute_size(self): + if self.nameIdx is not None: + nameIdx_size = _uleb128_sz(self.nameIdx) + else: + nameIdx_size = 0 + pass + + valueType = self.valueType + width = valueType >> self.kDexAnnotationValueArgShift + + vtype = valueType & self.kDexAnnotationValueTypeMask + + if vtype in (self.kDexAnnotationByte, + self.kDexAnnotationShort, + self.kDexAnnotationChar, + self.kDexAnnotationInt, + self.kDexAnnotationLong, + self.kDexAnnotationFloat, + self.kDexAnnotationDouble, + self.kDexAnnotationString, + self.kDexAnnotationType, + self.kDexAnnotationMethod, + self.kDexAnnotationField, + self.kDexAnnotationEnum): + value_size = width + 2 + pass + elif vtype in (self.kDexAnnotationBoolean, + self.kDexAnnotationNull): + value_size = 1 + pass + elif vtype == self.kDexAnnotationArray: + array_cnt = len(self.value) + array_cnt_size = _uleb128_sz(array_cnt) + array_size = _sum_data_size(self.value) + value_size = 1 + array_cnt_size + array_size + pass + elif vtype == self.kDexAnnotationAnnotation: + value_size = 1 + _compute_sz(self.value) + pass + else: + raise ValueError, \ + 'Bad annotation element value byte 0x02x' % (valueType) + + self.data_size = nameIdx_size + value_size + pass pass @@ -876,6 +1087,23 @@ self.data_size = moff() - off pass + + def compute_size(self): + if self.visibility is not None: + visibility_size = 1 + else: + visibility_size = 0 + pass + + typeIdx_size = _uleb128_sz(self.typeIdx) + + members_cnt_size = _uleb128_sz(len(self.members)) + members_size = members_cnt_size + _sum_data_size(self.members) + + size = visibility_size + typeIdx_size + members_size + + self.data_size = size + pass pass @@ -901,6 +1129,13 @@ self.data_size = moff() - off pass + + def compute_size(self): + elements_cnt_size = _uleb128_sz(len(self.elements)) + size = elements_cnt_size + _sum_data_size(self.elements) + + self.data_size = size + pass pass @@ -960,7 +1195,7 @@ opcodes.append((opcode, adv)) pass elif opcode == self.DBG_ADVANCE_LINE: - adv, sh = _uleb128(data[moff():moff() + 5]) + adv, sh = _leb128(data[moff():moff() + 5]) moff(sh) opcodes.append((opcode, adv)) pass @@ -1003,6 +1238,53 @@ self.data_size = moff() - off pass + + def compute_size(self): + start_line_size = _uleb128_sz(self.start_line) + + parameters_cnt_size = _uleb128_sz(len(self.parameters)) + parameter_sizes = itertools.imap(_uleb128_sz, self.parameters) + parameters_size = parameters_cnt_size + sum(parameter_sizes) + + def compute_opcode_size(code): + opcode = code[0] + + if opcode == self.DBG_END_SEQUENCE: + size = 1 + elif opcode == self.DBG_ADVANCE_PC: + size = 1 + _uleb128_sz(code[1]) + elif opcode == self.DBG_ADVANCE_LINE: + size = 1 + _leb128_sz(code[1]) + elif opcode in (self.DBG_START_LOCAL, + self.DBG_START_LOCAL_EXTENDED): + size = 1 + _uleb128_sz(code[1]) + _uleb128_sz(code[2]) + \ + _uleb128_sz(code[3]) + if len(code) == 5: + size = size + _uleb128_sz(code[4]) + pass + pass + elif opcode == self.DBG_END_LOCAL: + size = 1 + _uleb128_sz(code[1]) + elif opcode == self.DBG_RESTART_LOCAL: + size = 1 + _uleb128_sz(code[1]) + elif opcode in (self.DBG_SET_PROLOGUE_END, + self.DBG_SET_EPILOGUE_BEGIN, + self.DBG_SET_FILE): + size = 1 + else: + size = 1 + pass + + return size + + opcode_sizes = itertools.imap(compute_opcode_size, self.opcodes) + opcode_sizes = [i for i in opcode_sizes] + opcodes_size = sum(opcode_sizes) + + size = start_line_size + parameters_size + opcodes_size + + self.data_size = size + pass pass @@ -1418,35 +1700,46 @@ pass print - print 'TypeLists size is %d bytes' % (dex._typeLists.data_size) + print 'TypeLists size is %d/%d bytes' % (dex._typeLists.data_size, + _compute_sz(dex._typeLists)) bytes = sum([code.data_size for code in dex._codeItems]) + rbytes = sum([_compute_sz(code) for code in dex._codeItems]) print - print 'CodeItems size is %d bytes' % (bytes) + print 'CodeItems size is %d/%d bytes' % (bytes, rbytes) bytes = sum([annoset.data_size for annoset in dex._annotationSetItems]) + rbytes = sum([_compute_sz(annoset) for annoset in dex._annotationSetItems]) print - print 'AnnotationSetItems size is %d bytes' % (bytes) + print 'AnnotationSetItems size is %d/%d bytes' % (bytes, rbytes) bytes = sum([annodir.data_size for annodir in dex._annotationsDirectoryItems]) + rbytes = sum([_compute_sz(annodir) + for annodir in dex._annotationsDirectoryItems]) print - print 'AnnotationsDirtoryItems size is %d bytes' % (bytes) + print 'AnnotationsDirtoryItems size is %d/%d bytes' % (bytes, rbytes) bytes = sum([annoitem.data_size for annoitem in dex._annotationItems]) + rbytes = sum([_compute_sz(annoitem) + for annoitem in dex._annotationItems]) print - print 'AnnotationItems size is %d bytes' % (bytes) + print 'AnnotationItems size is %d/%d bytes' % (bytes, rbytes) bytes = sum([encodeditem.data_size for encodeditem in dex._encodedArrayItems]) + rbytes = sum([_compute_sz(encodeditem) + for encodeditem in dex._encodedArrayItems]) print - print 'EncodedArrayItems size is %d bytes' % (bytes) + print 'EncodedArrayItems size is %d/%d bytes' % (bytes, rbytes) bytes = sum([debuginfoitem.data_size for debuginfoitem in dex._debugInfoItems]) + rbytes = sum([_compute_sz(debuginfoitem) + for debuginfoitem in dex._debugInfoItems]) print - print 'DebugInfoItems size is %d bytes' % (bytes) + print 'DebugInfoItems size is %d/%d bytes' % (bytes, rbytes) print print 'Data maps'