diff etc/check-links.lua @ 0:4b915342e2a8

LuaSocket 2.0.2 + CMake build description.
author Eric Wing <ewing . public |-at-| gmail . com>
date Tue, 26 Aug 2008 18:40:01 -0700
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/etc/check-links.lua	Tue Aug 26 18:40:01 2008 -0700
@@ -0,0 +1,112 @@
+-----------------------------------------------------------------------------
+-- Little program that checks links in HTML files, using coroutines and
+-- non-blocking I/O via the dispatcher module.
+-- LuaSocket sample files
+-- Author: Diego Nehab
+-- RCS ID: $$
+-----------------------------------------------------------------------------
+local url = require("socket.url")
+local dispatch = require("dispatch")
+local http = require("socket.http")
+dispatch.TIMEOUT = 10
+
+-- make sure the user knows how to invoke us
+arg = arg or {}
+if table.getn(arg) < 1 then
+    print("Usage:\n  luasocket check-links.lua [-n] {<url>}")
+    exit()
+end
+
+-- '-n' means we are running in non-blocking mode
+if arg[1] == "-n" then
+    -- if non-blocking I/O was requested, use real dispatcher interface
+    table.remove(arg, 1)
+    handler = dispatch.newhandler("coroutine")
+else
+    -- if using blocking I/O, use fake dispatcher interface
+    handler = dispatch.newhandler("sequential")
+end
+
+local nthreads = 0
+
+-- get the status of a URL using the dispatcher
+function getstatus(link)
+    local parsed = url.parse(link, {scheme = "file"})
+    if parsed.scheme == "http" then
+        nthreads = nthreads + 1
+        handler:start(function()
+            local r, c, h, s = http.request{
+                method = "HEAD",
+                url = link,
+                create = handler.tcp
+            }
+            if r and c == 200 then io.write('\t', link, '\n')
+            else io.write('\t', link, ': ', tostring(c), '\n') end
+            nthreads = nthreads - 1
+        end)
+    end
+end
+
+function readfile(path)
+    path = url.unescape(path)
+    local file, error = io.open(path, "r")
+    if file then
+        local body = file:read("*a")
+        file:close()
+        return body
+    else return nil, error end
+end
+
+function load(u)
+    local parsed = url.parse(u, { scheme = "file" })
+    local body, headers, code, error
+    local base = u
+    if parsed.scheme == "http" then
+        body, code, headers = http.request(u)
+        if code == 200 then
+            -- if there was a redirect, update base to reflect it
+            base = headers.location or base
+        end
+        if not body then
+            error = code
+        end
+    elseif parsed.scheme == "file" then
+        body, error = readfile(parsed.path)
+    else error = string.format("unhandled scheme '%s'", parsed.scheme) end
+    return base, body, error
+end
+
+function getlinks(body, base)
+    -- get rid of comments
+    body = string.gsub(body, "%<%!%-%-.-%-%-%>", "")
+    local links = {}
+    -- extract links
+    body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href)
+        table.insert(links, url.absolute(base, href))
+    end)
+    body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href)
+        table.insert(links, url.absolute(base, href))
+    end)
+    string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href)
+        table.insert(links, url.absolute(base, href))
+    end)
+    return links
+end
+
+function checklinks(address)
+    local base, body, error = load(address)
+    if not body then print(error) return end
+    print("Checking ", base)
+    local links = getlinks(body, base)
+    for _, link in ipairs(links) do
+        getstatus(link)
+    end
+end
+
+for _, address in ipairs(arg) do
+    checklinks(url.absolute("file:", address))
+end
+
+while nthreads > 0 do
+    handler:step()
+end