annotate etc/check-links.lua @ 2:9d9266316a91

Resync with Git
author Eric Wing <ewing . public |-at-| gmail . com>
date Fri, 29 Aug 2008 22:48:39 -0700
parents 4b915342e2a8
children
rev   line source
0
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
1 -----------------------------------------------------------------------------
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
2 -- Little program that checks links in HTML files, using coroutines and
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
3 -- non-blocking I/O via the dispatcher module.
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
4 -- LuaSocket sample files
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
5 -- Author: Diego Nehab
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
6 -- RCS ID: $$
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
7 -----------------------------------------------------------------------------
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
8 local url = require("socket.url")
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
9 local dispatch = require("dispatch")
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
10 local http = require("socket.http")
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
11 dispatch.TIMEOUT = 10
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
12
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
13 -- make sure the user knows how to invoke us
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
14 arg = arg or {}
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
15 if table.getn(arg) < 1 then
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
16 print("Usage:\n luasocket check-links.lua [-n] {<url>}")
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
17 exit()
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
18 end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
19
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
20 -- '-n' means we are running in non-blocking mode
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
21 if arg[1] == "-n" then
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
22 -- if non-blocking I/O was requested, use real dispatcher interface
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
23 table.remove(arg, 1)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
24 handler = dispatch.newhandler("coroutine")
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
25 else
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
26 -- if using blocking I/O, use fake dispatcher interface
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
27 handler = dispatch.newhandler("sequential")
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
28 end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
29
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
30 local nthreads = 0
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
31
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
32 -- get the status of a URL using the dispatcher
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
33 function getstatus(link)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
34 local parsed = url.parse(link, {scheme = "file"})
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
35 if parsed.scheme == "http" then
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
36 nthreads = nthreads + 1
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
37 handler:start(function()
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
38 local r, c, h, s = http.request{
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
39 method = "HEAD",
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
40 url = link,
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
41 create = handler.tcp
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
42 }
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
43 if r and c == 200 then io.write('\t', link, '\n')
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
44 else io.write('\t', link, ': ', tostring(c), '\n') end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
45 nthreads = nthreads - 1
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
46 end)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
47 end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
48 end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
49
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
50 function readfile(path)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
51 path = url.unescape(path)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
52 local file, error = io.open(path, "r")
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
53 if file then
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
54 local body = file:read("*a")
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
55 file:close()
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
56 return body
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
57 else return nil, error end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
58 end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
59
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
60 function load(u)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
61 local parsed = url.parse(u, { scheme = "file" })
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
62 local body, headers, code, error
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
63 local base = u
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
64 if parsed.scheme == "http" then
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
65 body, code, headers = http.request(u)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
66 if code == 200 then
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
67 -- if there was a redirect, update base to reflect it
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
68 base = headers.location or base
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
69 end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
70 if not body then
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
71 error = code
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
72 end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
73 elseif parsed.scheme == "file" then
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
74 body, error = readfile(parsed.path)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
75 else error = string.format("unhandled scheme '%s'", parsed.scheme) end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
76 return base, body, error
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
77 end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
78
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
79 function getlinks(body, base)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
80 -- get rid of comments
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
81 body = string.gsub(body, "%<%!%-%-.-%-%-%>", "")
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
82 local links = {}
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
83 -- extract links
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
84 body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
85 table.insert(links, url.absolute(base, href))
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
86 end)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
87 body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
88 table.insert(links, url.absolute(base, href))
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
89 end)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
90 string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
91 table.insert(links, url.absolute(base, href))
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
92 end)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
93 return links
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
94 end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
95
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
96 function checklinks(address)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
97 local base, body, error = load(address)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
98 if not body then print(error) return end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
99 print("Checking ", base)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
100 local links = getlinks(body, base)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
101 for _, link in ipairs(links) do
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
102 getstatus(link)
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
103 end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
104 end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
105
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
106 for _, address in ipairs(arg) do
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
107 checklinks(url.absolute("file:", address))
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
108 end
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
109
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
110 while nthreads > 0 do
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
111 handler:step()
4b915342e2a8 LuaSocket 2.0.2 + CMake build description.
Eric Wing <ewing . public |-at-| gmail . com>
parents:
diff changeset
112 end