Mercurial > luasocket
diff etc/check-links.lua @ 0:4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
author | Eric Wing <ewing . public |-at-| gmail . com> |
---|---|
date | Tue, 26 Aug 2008 18:40:01 -0700 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/etc/check-links.lua Tue Aug 26 18:40:01 2008 -0700 @@ -0,0 +1,112 @@ +----------------------------------------------------------------------------- +-- Little program that checks links in HTML files, using coroutines and +-- non-blocking I/O via the dispatcher module. +-- LuaSocket sample files +-- Author: Diego Nehab +-- RCS ID: $$ +----------------------------------------------------------------------------- +local url = require("socket.url") +local dispatch = require("dispatch") +local http = require("socket.http") +dispatch.TIMEOUT = 10 + +-- make sure the user knows how to invoke us +arg = arg or {} +if table.getn(arg) < 1 then + print("Usage:\n luasocket check-links.lua [-n] {<url>}") + exit() +end + +-- '-n' means we are running in non-blocking mode +if arg[1] == "-n" then + -- if non-blocking I/O was requested, use real dispatcher interface + table.remove(arg, 1) + handler = dispatch.newhandler("coroutine") +else + -- if using blocking I/O, use fake dispatcher interface + handler = dispatch.newhandler("sequential") +end + +local nthreads = 0 + +-- get the status of a URL using the dispatcher +function getstatus(link) + local parsed = url.parse(link, {scheme = "file"}) + if parsed.scheme == "http" then + nthreads = nthreads + 1 + handler:start(function() + local r, c, h, s = http.request{ + method = "HEAD", + url = link, + create = handler.tcp + } + if r and c == 200 then io.write('\t', link, '\n') + else io.write('\t', link, ': ', tostring(c), '\n') end + nthreads = nthreads - 1 + end) + end +end + +function readfile(path) + path = url.unescape(path) + local file, error = io.open(path, "r") + if file then + local body = file:read("*a") + file:close() + return body + else return nil, error end +end + +function load(u) + local parsed = url.parse(u, { scheme = "file" }) + local body, headers, code, error + local base = u + if parsed.scheme == "http" then + body, code, headers = http.request(u) + if code == 200 then + -- if there was a redirect, update base to reflect it + base = headers.location or base + end + if not body then + error = code + end + elseif parsed.scheme == "file" then + body, error = readfile(parsed.path) + else error = string.format("unhandled scheme '%s'", parsed.scheme) end + return base, body, error +end + +function getlinks(body, base) + -- get rid of comments + body = string.gsub(body, "%<%!%-%-.-%-%-%>", "") + local links = {} + -- extract links + body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href) + table.insert(links, url.absolute(base, href)) + end) + body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href) + table.insert(links, url.absolute(base, href)) + end) + string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href) + table.insert(links, url.absolute(base, href)) + end) + return links +end + +function checklinks(address) + local base, body, error = load(address) + if not body then print(error) return end + print("Checking ", base) + local links = getlinks(body, base) + for _, link in ipairs(links) do + getstatus(link) + end +end + +for _, address in ipairs(arg) do + checklinks(url.absolute("file:", address)) +end + +while nthreads > 0 do + handler:step() +end