62
|
1 # -*- coding: utf-8 -*-
|
|
2 #
|
|
3 # Copyright 2008 Hsin Yi, Chen
|
|
4 """
|
|
5 [Note] the project is not available yet.
|
|
6
|
|
7 A web page fetcing tool chain that has a JQuery-like selector and supports chain working.
|
|
8
|
|
9 Here is an exmaple can show the the main idea, To restrive a content you want
|
|
10 in a div box in a web page, and then post and restrive next content in the other
|
|
11 web page with the param you just maked from the content in first restriving.
|
|
12 finally, storage the production.
|
|
13
|
|
14 def func(s):
|
|
15 return {'msg':s}
|
|
16
|
|
17 try:
|
246
|
18 c("http://example.tw/").get().find("////ul/text()") \
|
62
|
19 .build_param( func ).post_to("http://example2.com") \
|
|
20 .save_as('hellow.html')
|
|
21 except:
|
|
22 pass
|
|
23
|
|
24 more complex example
|
|
25
|
|
26 try:
|
|
27 c("http://example.tw/").retry(4, '5m').get() \
|
|
28 .find("#id > div"). \
|
|
29 .build_param( func ).post_to("http://example2.com") \
|
|
30 .save_as('hellow.html') \
|
|
31 .end().find("#id2 > img").download('pretty-%s.jpg'). \
|
|
32 tar_and_zip("pretty_girl.tar.gz")
|
|
33 except NotFound:
|
|
34 print "the web page is not found."
|
|
35 except NoPermissionTosave:
|
|
36 print "the files can not be save with incorrect permission."
|
|
37 else:
|
|
38 print "unknow error."
|
|
39 """
|
|
40 from lazy.www.work import WorkFlow
|
246
|
41 from lazy.www.work.fetch import Fetcher
|
|
42 from lazy.www.work.storage import FileStorager
|
62
|
43 from lazy.www.core import SemiProduct
|
246
|
44 import os
|
|
45 import sys
|
|
46 import re
|
62
|
47
|
246
|
48 def parse_scheme(scheme):
|
|
49 try:
|
|
50 return re.findall("(\w+):\/\/(.*\/?)",scheme)[0]
|
|
51 except:
|
|
52 sys.stdout.write("the scheme is not supported.")
|
|
53 sys.exit()
|
|
54
|
|
55 def c(scheme):
|
62
|
56 """
|
|
57 connect to a web apge
|
|
58
|
|
59 >>> c('http://localhost:8080').get().worker.working_product.content
|
|
60 'It works!!\\n'
|
|
61
|
|
62 >>> c('http://localhost:8080').get().find('//text()')
|
|
63 'It works!!\\n'
|
|
64 """
|
246
|
65 target_type, path = parse_scheme(scheme)
|
62
|
66
|
246
|
67 #@todo: SemiProduct Factory.
|
|
68 if 'file' == target_type:
|
|
69 s= SemiProduct(source=path)
|
|
70 worker = FileStorager(s)
|
|
71 else:
|
|
72 s= SemiProduct(source=scheme)
|
|
73 worker = Fetcher(s)
|
|
74 return WorkFlow(worker)
|
62
|
75
|
|
76 if __name__ == '__main__':
|
|
77 import doctest
|
246
|
78 doctest.testmod()
|