62
|
1 # -*- coding: utf-8 -*-
|
|
2 #
|
|
3 # Copyright 2008 Hsin Yi, Chen
|
|
4 """
|
|
5 [Note] the project is not available yet.
|
|
6
|
|
7 A web page fetcing tool chain that has a JQuery-like selector and supports chain working.
|
|
8
|
|
9 Here is an exmaple can show the the main idea, To restrive a content you want
|
|
10 in a div box in a web page, and then post and restrive next content in the other
|
|
11 web page with the param you just maked from the content in first restriving.
|
|
12 finally, storage the production.
|
|
13
|
|
14 def func(s):
|
|
15 return {'msg':s}
|
|
16
|
|
17 try:
|
|
18 c("http://example.tw/").get().find("#id > div") \
|
|
19 .build_param( func ).post_to("http://example2.com") \
|
|
20 .save_as('hellow.html')
|
|
21 except:
|
|
22 pass
|
|
23
|
|
24 more complex example
|
|
25
|
|
26 try:
|
|
27 c("http://example.tw/").retry(4, '5m').get() \
|
|
28 .find("#id > div"). \
|
|
29 .build_param( func ).post_to("http://example2.com") \
|
|
30 .save_as('hellow.html') \
|
|
31 .end().find("#id2 > img").download('pretty-%s.jpg'). \
|
|
32 tar_and_zip("pretty_girl.tar.gz")
|
|
33 except NotFound:
|
|
34 print "the web page is not found."
|
|
35 except NoPermissionTosave:
|
|
36 print "the files can not be save with incorrect permission."
|
|
37 else:
|
|
38 print "unknow error."
|
|
39 """
|
|
40 from lazy.www.work import WorkFlow
|
|
41 from lazy.www.work.fetch import Fetcher, install_opener
|
|
42 from lazy.www.core import SemiProduct
|
|
43
|
|
44 def c(url):
|
|
45 """
|
|
46 connect to a web apge
|
|
47
|
|
48 >>> c('http://localhost:8080').get().worker.working_product.content
|
|
49 'It works!!\\n'
|
|
50
|
|
51 >>> c('http://localhost:8080').get().find('//text()')
|
|
52 'It works!!\\n'
|
|
53 """
|
|
54 s= SemiProduct(source=url)
|
|
55 w = WorkFlow(Fetcher(s))
|
|
56 return w
|
|
57
|
|
58 def lz_install(**kwds):
|
|
59 if('opener' == kwds.get('name')):
|
|
60 install_opener(kwds.get('cookiefile'))
|
|
61
|
|
62 if __name__ == '__main__':
|
|
63 import doctest
|
|
64 doctest.testmod() |