comparison etc/check-links.lua @ 0:4b915342e2a8

LuaSocket 2.0.2 + CMake build description.
author Eric Wing <ewing . public |-at-| gmail . com>
date Tue, 26 Aug 2008 18:40:01 -0700
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4b915342e2a8
1 -----------------------------------------------------------------------------
2 -- Little program that checks links in HTML files, using coroutines and
3 -- non-blocking I/O via the dispatcher module.
4 -- LuaSocket sample files
5 -- Author: Diego Nehab
6 -- RCS ID: $$
7 -----------------------------------------------------------------------------
8 local url = require("socket.url")
9 local dispatch = require("dispatch")
10 local http = require("socket.http")
11 dispatch.TIMEOUT = 10
12
13 -- make sure the user knows how to invoke us
14 arg = arg or {}
15 if table.getn(arg) < 1 then
16 print("Usage:\n luasocket check-links.lua [-n] {<url>}")
17 exit()
18 end
19
20 -- '-n' means we are running in non-blocking mode
21 if arg[1] == "-n" then
22 -- if non-blocking I/O was requested, use real dispatcher interface
23 table.remove(arg, 1)
24 handler = dispatch.newhandler("coroutine")
25 else
26 -- if using blocking I/O, use fake dispatcher interface
27 handler = dispatch.newhandler("sequential")
28 end
29
30 local nthreads = 0
31
32 -- get the status of a URL using the dispatcher
33 function getstatus(link)
34 local parsed = url.parse(link, {scheme = "file"})
35 if parsed.scheme == "http" then
36 nthreads = nthreads + 1
37 handler:start(function()
38 local r, c, h, s = http.request{
39 method = "HEAD",
40 url = link,
41 create = handler.tcp
42 }
43 if r and c == 200 then io.write('\t', link, '\n')
44 else io.write('\t', link, ': ', tostring(c), '\n') end
45 nthreads = nthreads - 1
46 end)
47 end
48 end
49
50 function readfile(path)
51 path = url.unescape(path)
52 local file, error = io.open(path, "r")
53 if file then
54 local body = file:read("*a")
55 file:close()
56 return body
57 else return nil, error end
58 end
59
60 function load(u)
61 local parsed = url.parse(u, { scheme = "file" })
62 local body, headers, code, error
63 local base = u
64 if parsed.scheme == "http" then
65 body, code, headers = http.request(u)
66 if code == 200 then
67 -- if there was a redirect, update base to reflect it
68 base = headers.location or base
69 end
70 if not body then
71 error = code
72 end
73 elseif parsed.scheme == "file" then
74 body, error = readfile(parsed.path)
75 else error = string.format("unhandled scheme '%s'", parsed.scheme) end
76 return base, body, error
77 end
78
79 function getlinks(body, base)
80 -- get rid of comments
81 body = string.gsub(body, "%<%!%-%-.-%-%-%>", "")
82 local links = {}
83 -- extract links
84 body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href)
85 table.insert(links, url.absolute(base, href))
86 end)
87 body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href)
88 table.insert(links, url.absolute(base, href))
89 end)
90 string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href)
91 table.insert(links, url.absolute(base, href))
92 end)
93 return links
94 end
95
96 function checklinks(address)
97 local base, body, error = load(address)
98 if not body then print(error) return end
99 print("Checking ", base)
100 local links = getlinks(body, base)
101 for _, link in ipairs(links) do
102 getstatus(link)
103 end
104 end
105
106 for _, address in ipairs(arg) do
107 checklinks(url.absolute("file:", address))
108 end
109
110 while nthreads > 0 do
111 handler:step()
112 end