Mercurial > eagle-eye
view ikweb/tools/lazy/www/__init__.py @ 365:2444685c68c9
merged multi-attacks script
author | "Rex Tsai <chihchun@kalug.linux.org.tw>" |
---|---|
date | Sun, 01 Mar 2009 11:10:32 +0800 |
parents | 7747bbe5b68e |
children |
line wrap: on
line source
# -*- coding: utf-8 -*- # # Copyright 2008 Hsin Yi, Chen """ [Note] the project is not available yet. A web page fetcing tool chain that has a JQuery-like selector and supports chain working. Here is an exmaple can show the the main idea, To restrive a content you want in a div box in a web page, and then post and restrive next content in the other web page with the param you just maked from the content in first restriving. finally, storage the production. def func(s): return {'msg':s} try: c("http://example.tw/").get().find("////ul/text()") \ .build_param( func ).post_to("http://example2.com") \ .save_as('hellow.html') except: pass more complex example try: c("http://example.tw/").retry(4, '5m').get() \ .find("#id > div"). \ .build_param( func ).post_to("http://example2.com") \ .save_as('hellow.html') \ .end().find("#id2 > img").download('pretty-%s.jpg'). \ tar_and_zip("pretty_girl.tar.gz") except NotFound: print "the web page is not found." except NoPermissionTosave: print "the files can not be save with incorrect permission." else: print "unknow error." """ from lazy.www.work import WorkFlow from lazy.www.work.fetch import Fetcher from lazy.www.work.storage import FileStorager from lazy.www.core import SemiProduct import os import sys import re def parse_scheme(scheme): try: return re.findall("(\w+):\/\/(.*\/?)",scheme)[0] except: sys.stdout.write("the scheme is not supported.") sys.exit() def c(scheme, worker=None): """ connect to a web apge >>> c('http://localhost:8080').get().worker.working_product.content 'It works!!\\n' >>> c('http://localhost:8080').get().find('//text()') 'It works!!\\n' """ target_type, path = parse_scheme(scheme) #@todo: SemiProduct Factory. if worker is None: if 'file' == target_type: s= SemiProduct(source=path) worker = FileStorager(s) else: s= SemiProduct(source=scheme) worker = Fetcher(s) return WorkFlow(worker) if __name__ == '__main__': import doctest doctest.testmod()