Blame etc/check-links.lua

Packit b53373
-----------------------------------------------------------------------------
Packit b53373
-- Little program that checks links in HTML files, using coroutines and
Packit b53373
-- non-blocking I/O via the dispatcher module.
Packit b53373
-- LuaSocket sample files
Packit b53373
-- Author: Diego Nehab
Packit b53373
-----------------------------------------------------------------------------
Packit b53373
local url = require("socket.url")
Packit b53373
local dispatch = require("dispatch")
Packit b53373
local http = require("socket.http")
Packit b53373
dispatch.TIMEOUT = 10
Packit b53373
Packit b53373
-- make sure the user knows how to invoke us
Packit b53373
arg = arg or {}
Packit b53373
if #arg < 1 then
Packit b53373
    print("Usage:\n  luasocket check-links.lua [-n] {<url>}")
Packit b53373
    exit()
Packit b53373
end
Packit b53373
Packit b53373
-- '-n' means we are running in non-blocking mode
Packit b53373
if arg[1] == "-n" then
Packit b53373
    -- if non-blocking I/O was requested, use real dispatcher interface
Packit b53373
    table.remove(arg, 1)
Packit b53373
    handler = dispatch.newhandler("coroutine")
Packit b53373
else
Packit b53373
    -- if using blocking I/O, use fake dispatcher interface
Packit b53373
    handler = dispatch.newhandler("sequential")
Packit b53373
end
Packit b53373
Packit b53373
local nthreads = 0
Packit b53373
Packit b53373
-- get the status of a URL using the dispatcher
Packit b53373
function getstatus(link)
Packit b53373
    local parsed = url.parse(link, {scheme = "file"})
Packit b53373
    if parsed.scheme == "http" then
Packit b53373
        nthreads = nthreads + 1
Packit b53373
        handler:start(function()
Packit b53373
            local r, c, h, s = http.request{
Packit b53373
                method = "HEAD",
Packit b53373
                url = link,
Packit b53373
                create = handler.tcp
Packit b53373
            }
Packit b53373
            if r and c == 200 then io.write('\t', link, '\n')
Packit b53373
            else io.write('\t', link, ': ', tostring(c), '\n') end
Packit b53373
            nthreads = nthreads - 1
Packit b53373
        end)
Packit b53373
    end
Packit b53373
end
Packit b53373
Packit b53373
function readfile(path)
Packit b53373
    path = url.unescape(path)
Packit b53373
    local file, error = io.open(path, "r")
Packit b53373
    if file then
Packit b53373
        local body = file:read("*a")
Packit b53373
        file:close()
Packit b53373
        return body
Packit b53373
    else return nil, error end
Packit b53373
end
Packit b53373
Packit b53373
function load(u)
Packit b53373
    local parsed = url.parse(u, { scheme = "file" })
Packit b53373
    local body, headers, code, error
Packit b53373
    local base = u
Packit b53373
    if parsed.scheme == "http" then
Packit b53373
        body, code, headers = http.request(u)
Packit b53373
        if code == 200 then
Packit b53373
            -- if there was a redirect, update base to reflect it
Packit b53373
            base = headers.location or base
Packit b53373
        end
Packit b53373
        if not body then
Packit b53373
            error = code
Packit b53373
        end
Packit b53373
    elseif parsed.scheme == "file" then
Packit b53373
        body, error = readfile(parsed.path)
Packit b53373
    else error = string.format("unhandled scheme '%s'", parsed.scheme) end
Packit b53373
    return base, body, error
Packit b53373
end
Packit b53373
Packit b53373
function getlinks(body, base)
Packit b53373
    -- get rid of comments
Packit b53373
    body = string.gsub(body, "%<%!%-%-.-%-%-%>", "")
Packit b53373
    local links = {}
Packit b53373
    -- extract links
Packit b53373
    body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href)
Packit b53373
        table.insert(links, url.absolute(base, href))
Packit b53373
    end)
Packit b53373
    body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href)
Packit b53373
        table.insert(links, url.absolute(base, href))
Packit b53373
    end)
Packit b53373
    string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href)
Packit b53373
        table.insert(links, url.absolute(base, href))
Packit b53373
    end)
Packit b53373
    return links
Packit b53373
end
Packit b53373
Packit b53373
function checklinks(address)
Packit b53373
    local base, body, error = load(address)
Packit b53373
    if not body then print(error) return end
Packit b53373
    print("Checking ", base)
Packit b53373
    local links = getlinks(body, base)
Packit b53373
    for _, link in ipairs(links) do
Packit b53373
        getstatus(link)
Packit b53373
    end
Packit b53373
end
Packit b53373
Packit b53373
for _, address in ipairs(arg) do
Packit b53373
    checklinks(url.absolute("file:", address))
Packit b53373
end
Packit b53373
Packit b53373
while nthreads > 0 do
Packit b53373
    handler:step()
Packit b53373
end