Mercurial > luasocket
comparison etc/check-links.lua @ 0:4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
author | Eric Wing <ewing . public |-at-| gmail . com> |
---|---|
date | Tue, 26 Aug 2008 18:40:01 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4b915342e2a8 |
---|---|
1 ----------------------------------------------------------------------------- | |
2 -- Little program that checks links in HTML files, using coroutines and | |
3 -- non-blocking I/O via the dispatcher module. | |
4 -- LuaSocket sample files | |
5 -- Author: Diego Nehab | |
6 -- RCS ID: $$ | |
7 ----------------------------------------------------------------------------- | |
8 local url = require("socket.url") | |
9 local dispatch = require("dispatch") | |
10 local http = require("socket.http") | |
11 dispatch.TIMEOUT = 10 | |
12 | |
13 -- make sure the user knows how to invoke us | |
14 arg = arg or {} | |
15 if table.getn(arg) < 1 then | |
16 print("Usage:\n luasocket check-links.lua [-n] {<url>}") | |
17 exit() | |
18 end | |
19 | |
20 -- '-n' means we are running in non-blocking mode | |
21 if arg[1] == "-n" then | |
22 -- if non-blocking I/O was requested, use real dispatcher interface | |
23 table.remove(arg, 1) | |
24 handler = dispatch.newhandler("coroutine") | |
25 else | |
26 -- if using blocking I/O, use fake dispatcher interface | |
27 handler = dispatch.newhandler("sequential") | |
28 end | |
29 | |
30 local nthreads = 0 | |
31 | |
32 -- get the status of a URL using the dispatcher | |
33 function getstatus(link) | |
34 local parsed = url.parse(link, {scheme = "file"}) | |
35 if parsed.scheme == "http" then | |
36 nthreads = nthreads + 1 | |
37 handler:start(function() | |
38 local r, c, h, s = http.request{ | |
39 method = "HEAD", | |
40 url = link, | |
41 create = handler.tcp | |
42 } | |
43 if r and c == 200 then io.write('\t', link, '\n') | |
44 else io.write('\t', link, ': ', tostring(c), '\n') end | |
45 nthreads = nthreads - 1 | |
46 end) | |
47 end | |
48 end | |
49 | |
50 function readfile(path) | |
51 path = url.unescape(path) | |
52 local file, error = io.open(path, "r") | |
53 if file then | |
54 local body = file:read("*a") | |
55 file:close() | |
56 return body | |
57 else return nil, error end | |
58 end | |
59 | |
60 function load(u) | |
61 local parsed = url.parse(u, { scheme = "file" }) | |
62 local body, headers, code, error | |
63 local base = u | |
64 if parsed.scheme == "http" then | |
65 body, code, headers = http.request(u) | |
66 if code == 200 then | |
67 -- if there was a redirect, update base to reflect it | |
68 base = headers.location or base | |
69 end | |
70 if not body then | |
71 error = code | |
72 end | |
73 elseif parsed.scheme == "file" then | |
74 body, error = readfile(parsed.path) | |
75 else error = string.format("unhandled scheme '%s'", parsed.scheme) end | |
76 return base, body, error | |
77 end | |
78 | |
79 function getlinks(body, base) | |
80 -- get rid of comments | |
81 body = string.gsub(body, "%<%!%-%-.-%-%-%>", "") | |
82 local links = {} | |
83 -- extract links | |
84 body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href) | |
85 table.insert(links, url.absolute(base, href)) | |
86 end) | |
87 body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href) | |
88 table.insert(links, url.absolute(base, href)) | |
89 end) | |
90 string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href) | |
91 table.insert(links, url.absolute(base, href)) | |
92 end) | |
93 return links | |
94 end | |
95 | |
96 function checklinks(address) | |
97 local base, body, error = load(address) | |
98 if not body then print(error) return end | |
99 print("Checking ", base) | |
100 local links = getlinks(body, base) | |
101 for _, link in ipairs(links) do | |
102 getstatus(link) | |
103 end | |
104 end | |
105 | |
106 for _, address in ipairs(arg) do | |
107 checklinks(url.absolute("file:", address)) | |
108 end | |
109 | |
110 while nthreads > 0 do | |
111 handler:step() | |
112 end |