62
|
1 from lxml import etree
|
|
2 from cStringIO import StringIO
|
|
3
|
|
4 class Finder:
|
|
5
|
|
6 dom_tree = None
|
|
7 xpath = None
|
|
8
|
|
9 def __init__(self, working_product):
|
|
10 self.working_prodcut = working_product
|
|
11
|
|
12 self.encoding = 'utf8'
|
|
13 parser = etree.HTMLParser(encoding=self.encoding)
|
|
14 self.dom_tree = etree.parse(StringIO(self.working_prodcut.content), parser)
|
|
15
|
|
16 def find(self, express , callback = None):
|
|
17 xpath = self.dom_tree.xpath(express)
|
|
18
|
|
19 if callback is None:
|
|
20 ret = xpath
|
|
21 else:
|
|
22 ret = self.callback(xpath)
|
|
23 return ret
|