annotate pyikriam/buildings.py @ 175:9f248c8460ce

Simplize xpath patterns. - Use 'descendant' aix name simplize patterns.
author Thinker K.F. Li <thinker@branda.to>
date Sun, 02 Nov 2008 09:59:51 +0800
parents 8f699a9da6c0
children 3ba3edda6d1e
rev   line source
168
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
1 from lazy.www import c
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
2 from lxml import etree
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
3 from StringIO import StringIO
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
4
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
5 class position(object):
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
6 def __init__(self, build_type, city_id, idx, baseurl):
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
7 self._baseurl = baseurl + '/index.php'
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
8 self.build_type = build_type
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
9 self.city_id = city_id
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
10 self.idx = idx
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
11 self._params = {'view': 'buildingGround',
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
12 'id': city_id,
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
13 'position': idx}
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
14 pass
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
15
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
16 def get_page(self):
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
17 page = c(self._baseurl).get(self._params).get_content()
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
18 return page
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
19 pass
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
20
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
21 class townhall(position):
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
22 xpath_patterns = {
175
9f248c8460ce Simplize xpath patterns.
Thinker K.F. Li <thinker@branda.to>
parents: 168
diff changeset
23 'level': 'div[@class=\'buildingLevel\']/text()'
168
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
24 }
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
25
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
26 def __init__(self, city_id, idx, baseurl):
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
27 super(townhall, self).__init__('townhall', city_id, idx, baseurl)
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
28 pass
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
29
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
30 def _sync(self, page):
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
31 parser = etree.HTMLParser(encoding='utf8')
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
32 page_dom = etree.parse(StringIO(page), parser)
175
9f248c8460ce Simplize xpath patterns.
Thinker K.F. Li <thinker@branda.to>
parents: 168
diff changeset
33 xpath_building = '/html/body/descendant::'
168
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
34 for name, ptn in self.xpath_patterns.items():
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
35 path = xpath_building + ptn
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
36 value = page_dom.xpath(path)[0]
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
37 setattr(self, name, value)
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
38 pass
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
39 pass
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
40
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
41 def sync(self):
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
42 page = self.get_page()
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
43 self._sync(page)
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
44 pass
8f699a9da6c0 Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff changeset
45 pass