comparison ikweb/tools/lazy/www/__init__.py @ 247:7747bbe5b68e

start to develope Information Exchange Center of Ikariam Game. (prototpye)
author "Hisn Yi, Chen <ossug.hychen@gmail.com>"
date Mon, 01 Dec 2008 00:27:22 +0800
parents
children
comparison
equal deleted inserted replaced
246:60c4b4b78a01 247:7747bbe5b68e
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright 2008 Hsin Yi, Chen
4 """
5 [Note] the project is not available yet.
6
7 A web page fetcing tool chain that has a JQuery-like selector and supports chain working.
8
9 Here is an exmaple can show the the main idea, To restrive a content you want
10 in a div box in a web page, and then post and restrive next content in the other
11 web page with the param you just maked from the content in first restriving.
12 finally, storage the production.
13
14 def func(s):
15 return {'msg':s}
16
17 try:
18 c("http://example.tw/").get().find("////ul/text()") \
19 .build_param( func ).post_to("http://example2.com") \
20 .save_as('hellow.html')
21 except:
22 pass
23
24 more complex example
25
26 try:
27 c("http://example.tw/").retry(4, '5m').get() \
28 .find("#id > div"). \
29 .build_param( func ).post_to("http://example2.com") \
30 .save_as('hellow.html') \
31 .end().find("#id2 > img").download('pretty-%s.jpg'). \
32 tar_and_zip("pretty_girl.tar.gz")
33 except NotFound:
34 print "the web page is not found."
35 except NoPermissionTosave:
36 print "the files can not be save with incorrect permission."
37 else:
38 print "unknow error."
39 """
40 from lazy.www.work import WorkFlow
41 from lazy.www.work.fetch import Fetcher
42 from lazy.www.work.storage import FileStorager
43 from lazy.www.core import SemiProduct
44 import os
45 import sys
46 import re
47
48 def parse_scheme(scheme):
49 try:
50 return re.findall("(\w+):\/\/(.*\/?)",scheme)[0]
51 except:
52 sys.stdout.write("the scheme is not supported.")
53 sys.exit()
54
55 def c(scheme, worker=None):
56 """
57 connect to a web apge
58
59 >>> c('http://localhost:8080').get().worker.working_product.content
60 'It works!!\\n'
61
62 >>> c('http://localhost:8080').get().find('//text()')
63 'It works!!\\n'
64 """
65 target_type, path = parse_scheme(scheme)
66
67 #@todo: SemiProduct Factory.
68 if worker is None:
69 if 'file' == target_type:
70 s= SemiProduct(source=path)
71 worker = FileStorager(s)
72 else:
73 s= SemiProduct(source=scheme)
74 worker = Fetcher(s)
75
76 return WorkFlow(worker)
77
78 if __name__ == '__main__':
79 import doctest
80 doctest.testmod()