Mercurial > luasocket
annotate etc/check-links.lua @ 1:cf0892e34f45
Resyncing with Git repo
author | Eric Wing <ewing . public |-at-| gmail . com> |
---|---|
date | Wed, 27 Aug 2008 22:44:22 -0700 |
parents | 4b915342e2a8 |
children |
rev | line source |
---|---|
0
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
1 ----------------------------------------------------------------------------- |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
2 -- Little program that checks links in HTML files, using coroutines and |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
3 -- non-blocking I/O via the dispatcher module. |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
4 -- LuaSocket sample files |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
5 -- Author: Diego Nehab |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
6 -- RCS ID: $$ |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
7 ----------------------------------------------------------------------------- |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
8 local url = require("socket.url") |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
9 local dispatch = require("dispatch") |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
10 local http = require("socket.http") |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
11 dispatch.TIMEOUT = 10 |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
12 |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
13 -- make sure the user knows how to invoke us |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
14 arg = arg or {} |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
15 if table.getn(arg) < 1 then |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
16 print("Usage:\n luasocket check-links.lua [-n] {<url>}") |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
17 exit() |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
18 end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
19 |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
20 -- '-n' means we are running in non-blocking mode |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
21 if arg[1] == "-n" then |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
22 -- if non-blocking I/O was requested, use real dispatcher interface |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
23 table.remove(arg, 1) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
24 handler = dispatch.newhandler("coroutine") |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
25 else |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
26 -- if using blocking I/O, use fake dispatcher interface |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
27 handler = dispatch.newhandler("sequential") |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
28 end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
29 |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
30 local nthreads = 0 |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
31 |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
32 -- get the status of a URL using the dispatcher |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
33 function getstatus(link) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
34 local parsed = url.parse(link, {scheme = "file"}) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
35 if parsed.scheme == "http" then |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
36 nthreads = nthreads + 1 |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
37 handler:start(function() |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
38 local r, c, h, s = http.request{ |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
39 method = "HEAD", |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
40 url = link, |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
41 create = handler.tcp |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
42 } |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
43 if r and c == 200 then io.write('\t', link, '\n') |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
44 else io.write('\t', link, ': ', tostring(c), '\n') end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
45 nthreads = nthreads - 1 |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
46 end) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
47 end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
48 end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
49 |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
50 function readfile(path) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
51 path = url.unescape(path) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
52 local file, error = io.open(path, "r") |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
53 if file then |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
54 local body = file:read("*a") |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
55 file:close() |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
56 return body |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
57 else return nil, error end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
58 end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
59 |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
60 function load(u) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
61 local parsed = url.parse(u, { scheme = "file" }) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
62 local body, headers, code, error |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
63 local base = u |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
64 if parsed.scheme == "http" then |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
65 body, code, headers = http.request(u) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
66 if code == 200 then |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
67 -- if there was a redirect, update base to reflect it |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
68 base = headers.location or base |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
69 end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
70 if not body then |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
71 error = code |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
72 end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
73 elseif parsed.scheme == "file" then |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
74 body, error = readfile(parsed.path) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
75 else error = string.format("unhandled scheme '%s'", parsed.scheme) end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
76 return base, body, error |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
77 end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
78 |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
79 function getlinks(body, base) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
80 -- get rid of comments |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
81 body = string.gsub(body, "%<%!%-%-.-%-%-%>", "") |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
82 local links = {} |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
83 -- extract links |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
84 body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
85 table.insert(links, url.absolute(base, href)) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
86 end) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
87 body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
88 table.insert(links, url.absolute(base, href)) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
89 end) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
90 string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
91 table.insert(links, url.absolute(base, href)) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
92 end) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
93 return links |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
94 end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
95 |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
96 function checklinks(address) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
97 local base, body, error = load(address) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
98 if not body then print(error) return end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
99 print("Checking ", base) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
100 local links = getlinks(body, base) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
101 for _, link in ipairs(links) do |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
102 getstatus(link) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
103 end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
104 end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
105 |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
106 for _, address in ipairs(arg) do |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
107 checklinks(url.absolute("file:", address)) |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
108 end |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
109 |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
110 while nthreads > 0 do |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
111 handler:step() |
4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff
changeset
|
112 end |