view pyikriam/lazy/www/work/find.py @ 365:2444685c68c9

merged multi-attacks script
author "Rex Tsai <chihchun@kalug.linux.org.tw>"
date Sun, 01 Mar 2009 11:10:32 +0800
parents 60c4b4b78a01
children
line wrap: on
line source

from lxml import etree
from cStringIO import StringIO

class Finder:

    dom_tree = None
    xpath = None

    def __init__(self, working_product):
        self.working_prodcut = working_product

        self.encoding = 'utf8'
    
    def find(self, express , callback = None):
        
        if self.dom_tree is None:   self.set_dom_tree(self.working_prodcut.content)
 
        xpath = self.dom_tree.xpath(express)
        
        if callback:    return self.callback(xpath)
        return xpath

    def set_dom_tree(self, content):
        stream = StringIO(content)

        parser = etree.HTMLParser(encoding=self.encoding)
        self.dom_tree = etree.parse(stream, parser)