62
|
1 from lxml import etree
|
|
2 from cStringIO import StringIO
|
|
3
|
|
4 class Finder:
|
|
5
|
|
6 dom_tree = None
|
|
7 xpath = None
|
|
8
|
|
9 def __init__(self, working_product):
|
|
10 self.working_prodcut = working_product
|
|
11
|
|
12 self.encoding = 'utf8'
|
|
13
|
|
14 def find(self, express , callback = None):
|
246
|
15
|
|
16 if self.dom_tree is None: self.set_dom_tree(self.working_prodcut.content)
|
|
17
|
62
|
18 xpath = self.dom_tree.xpath(express)
|
|
19
|
246
|
20 if callback: return self.callback(xpath)
|
|
21 return xpath
|
|
22
|
|
23 def set_dom_tree(self, content):
|
|
24 stream = StringIO(content)
|
|
25
|
|
26 parser = etree.HTMLParser(encoding=self.encoding)
|
|
27 self.dom_tree = etree.parse(stream, parser) |