view pyikriam/lazy/www/__init__.py @ 308:f6f56a47f383

implemented is_tavern_enough
author "Rex Tsai <chihchun@kalug.linux.org.tw>"
date Sun, 04 Jan 2009 23:49:41 +0800
parents 60c4b4b78a01
children
line wrap: on
line source

# -*- coding: utf-8 -*-
#
# Copyright 2008 Hsin Yi, Chen
"""
    [Note] the project is not available yet.

    A web page fetcing tool chain that has a JQuery-like selector and supports chain working.
    
    Here is an exmaple can show the the main idea, To restrive a content you want
    in a div box in a web page, and then post and restrive next content in the other
    web page with the param you just maked from the content in first restriving.
    finally, storage the production.
    
    def func(s):
        return {'msg':s}
    
    try:
        c("http://example.tw/").get().find("////ul/text()") \
            .build_param( func ).post_to("http://example2.com") \
            .save_as('hellow.html')
    except:
        pass
        
    more complex example
        
    try:
        c("http://example.tw/").retry(4, '5m').get() \
            .find("#id > div"). \
            .build_param( func ).post_to("http://example2.com") \
            .save_as('hellow.html') \
            .end().find("#id2 > img").download('pretty-%s.jpg'). \
            tar_and_zip("pretty_girl.tar.gz")
    except NotFound:
        print "the web page is not found."
    except NoPermissionTosave:
        print "the files can not be save with incorrect permission."
    else:
        print "unknow error."
"""
from lazy.www.work import WorkFlow
from lazy.www.work.fetch import Fetcher
from lazy.www.work.storage import FileStorager
from lazy.www.core import SemiProduct
import os
import sys
import re

def parse_scheme(scheme):
    try:
        return re.findall("(\w+):\/\/(.*\/?)",scheme)[0]
    except:
        sys.stdout.write("the scheme is not supported.")
        sys.exit()

def c(scheme):
    """
    connect to a web apge
    
    >>> c('http://localhost:8080').get().worker.working_product.content
    'It works!!\\n'
    
    >>> c('http://localhost:8080').get().find('//text()')
    'It works!!\\n'    
    """
    target_type, path = parse_scheme(scheme)

    #@todo: SemiProduct Factory.
    if 'file' == target_type:
        s= SemiProduct(source=path)        
        worker = FileStorager(s)
    else:
        s= SemiProduct(source=scheme)        
        worker = Fetcher(s)
    return WorkFlow(worker)

if __name__ == '__main__':
    import doctest
    doctest.testmod()