Mercurial > eagle-eye
diff ikweb/tools/lazy/www/__init__.py @ 247:7747bbe5b68e
start to develope Information Exchange Center of Ikariam Game. (prototpye)
author | "Hisn Yi, Chen <ossug.hychen@gmail.com>" |
---|---|
date | Mon, 01 Dec 2008 00:27:22 +0800 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ikweb/tools/lazy/www/__init__.py Mon Dec 01 00:27:22 2008 +0800 @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2008 Hsin Yi, Chen +""" + [Note] the project is not available yet. + + A web page fetcing tool chain that has a JQuery-like selector and supports chain working. + + Here is an exmaple can show the the main idea, To restrive a content you want + in a div box in a web page, and then post and restrive next content in the other + web page with the param you just maked from the content in first restriving. + finally, storage the production. + + def func(s): + return {'msg':s} + + try: + c("http://example.tw/").get().find("////ul/text()") \ + .build_param( func ).post_to("http://example2.com") \ + .save_as('hellow.html') + except: + pass + + more complex example + + try: + c("http://example.tw/").retry(4, '5m').get() \ + .find("#id > div"). \ + .build_param( func ).post_to("http://example2.com") \ + .save_as('hellow.html') \ + .end().find("#id2 > img").download('pretty-%s.jpg'). \ + tar_and_zip("pretty_girl.tar.gz") + except NotFound: + print "the web page is not found." + except NoPermissionTosave: + print "the files can not be save with incorrect permission." + else: + print "unknow error." +""" +from lazy.www.work import WorkFlow +from lazy.www.work.fetch import Fetcher +from lazy.www.work.storage import FileStorager +from lazy.www.core import SemiProduct +import os +import sys +import re + +def parse_scheme(scheme): + try: + return re.findall("(\w+):\/\/(.*\/?)",scheme)[0] + except: + sys.stdout.write("the scheme is not supported.") + sys.exit() + +def c(scheme, worker=None): + """ + connect to a web apge + + >>> c('http://localhost:8080').get().worker.working_product.content + 'It works!!\\n' + + >>> c('http://localhost:8080').get().find('//text()') + 'It works!!\\n' + """ + target_type, path = parse_scheme(scheme) + + #@todo: SemiProduct Factory. + if worker is None: + if 'file' == target_type: + s= SemiProduct(source=path) + worker = FileStorager(s) + else: + s= SemiProduct(source=scheme) + worker = Fetcher(s) + + return WorkFlow(worker) + +if __name__ == '__main__': + import doctest + doctest.testmod()