Mercurial > eagle-eye
annotate pyikriam/buildings.py @ 175:9f248c8460ce
Simplize xpath patterns.
- Use 'descendant' aix name simplize patterns.
author | Thinker K.F. Li <thinker@branda.to> |
---|---|
date | Sun, 02 Nov 2008 09:59:51 +0800 |
parents | 8f699a9da6c0 |
children | 3ba3edda6d1e |
rev | line source |
---|---|
168
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
1 from lazy.www import c |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
2 from lxml import etree |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
3 from StringIO import StringIO |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
4 |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
5 class position(object): |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
6 def __init__(self, build_type, city_id, idx, baseurl): |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
7 self._baseurl = baseurl + '/index.php' |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
8 self.build_type = build_type |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
9 self.city_id = city_id |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
10 self.idx = idx |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
11 self._params = {'view': 'buildingGround', |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
12 'id': city_id, |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
13 'position': idx} |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
14 pass |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
15 |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
16 def get_page(self): |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
17 page = c(self._baseurl).get(self._params).get_content() |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
18 return page |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
19 pass |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
20 |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
21 class townhall(position): |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
22 xpath_patterns = { |
175
9f248c8460ce
Simplize xpath patterns.
Thinker K.F. Li <thinker@branda.to>
parents:
168
diff
changeset
|
23 'level': 'div[@class=\'buildingLevel\']/text()' |
168
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
24 } |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
25 |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
26 def __init__(self, city_id, idx, baseurl): |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
27 super(townhall, self).__init__('townhall', city_id, idx, baseurl) |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
28 pass |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
29 |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
30 def _sync(self, page): |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
31 parser = etree.HTMLParser(encoding='utf8') |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
32 page_dom = etree.parse(StringIO(page), parser) |
175
9f248c8460ce
Simplize xpath patterns.
Thinker K.F. Li <thinker@branda.to>
parents:
168
diff
changeset
|
33 xpath_building = '/html/body/descendant::' |
168
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
34 for name, ptn in self.xpath_patterns.items(): |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
35 path = xpath_building + ptn |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
36 value = page_dom.xpath(path)[0] |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
37 setattr(self, name, value) |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
38 pass |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
39 pass |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
40 |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
41 def sync(self): |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
42 page = self.get_page() |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
43 self._sync(page) |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
44 pass |
8f699a9da6c0
Extract building level of townhalls.
Thinker K.F. Li <thinker@branda.to>
parents:
diff
changeset
|
45 pass |