Mercurial > luasocket
diff src/url.lua @ 0:4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
author | Eric Wing <ewing . public |-at-| gmail . com> |
---|---|
date | Tue, 26 Aug 2008 18:40:01 -0700 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/url.lua Tue Aug 26 18:40:01 2008 -0700 @@ -0,0 +1,297 @@ +----------------------------------------------------------------------------- +-- URI parsing, composition and relative URL resolution +-- LuaSocket toolkit. +-- Author: Diego Nehab +-- RCS ID: $Id: url.lua,v 1.38 2006/04/03 04:45:42 diego Exp $ +----------------------------------------------------------------------------- + +----------------------------------------------------------------------------- +-- Declare module +----------------------------------------------------------------------------- +local string = require("string") +local base = _G +local table = require("table") +module("socket.url") + +----------------------------------------------------------------------------- +-- Module version +----------------------------------------------------------------------------- +_VERSION = "URL 1.0.1" + +----------------------------------------------------------------------------- +-- Encodes a string into its escaped hexadecimal representation +-- Input +-- s: binary string to be encoded +-- Returns +-- escaped representation of string binary +----------------------------------------------------------------------------- +function escape(s) + return string.gsub(s, "([^A-Za-z0-9_])", function(c) + return string.format("%%%02x", string.byte(c)) + end) +end + +----------------------------------------------------------------------------- +-- Protects a path segment, to prevent it from interfering with the +-- url parsing. +-- Input +-- s: binary string to be encoded +-- Returns +-- escaped representation of string binary +----------------------------------------------------------------------------- +local function make_set(t) + local s = {} + for i,v in base.ipairs(t) do + s[t[i]] = 1 + end + return s +end + +-- these are allowed withing a path segment, along with alphanum +-- other characters must be escaped +local segment_set = make_set { + "-", "_", ".", "!", "~", "*", "'", "(", + ")", ":", "@", "&", "=", "+", "$", ",", +} + +local function protect_segment(s) + return string.gsub(s, "([^A-Za-z0-9_])", function (c) + if segment_set[c] then return c + else return string.format("%%%02x", string.byte(c)) end + end) +end + +----------------------------------------------------------------------------- +-- Encodes a string into its escaped hexadecimal representation +-- Input +-- s: binary string to be encoded +-- Returns +-- escaped representation of string binary +----------------------------------------------------------------------------- +function unescape(s) + return string.gsub(s, "%%(%x%x)", function(hex) + return string.char(base.tonumber(hex, 16)) + end) +end + +----------------------------------------------------------------------------- +-- Builds a path from a base path and a relative path +-- Input +-- base_path +-- relative_path +-- Returns +-- corresponding absolute path +----------------------------------------------------------------------------- +local function absolute_path(base_path, relative_path) + if string.sub(relative_path, 1, 1) == "/" then return relative_path end + local path = string.gsub(base_path, "[^/]*$", "") + path = path .. relative_path + path = string.gsub(path, "([^/]*%./)", function (s) + if s ~= "./" then return s else return "" end + end) + path = string.gsub(path, "/%.$", "/") + local reduced + while reduced ~= path do + reduced = path + path = string.gsub(reduced, "([^/]*/%.%./)", function (s) + if s ~= "../../" then return "" else return s end + end) + end + path = string.gsub(reduced, "([^/]*/%.%.)$", function (s) + if s ~= "../.." then return "" else return s end + end) + return path +end + +----------------------------------------------------------------------------- +-- Parses a url and returns a table with all its parts according to RFC 2396 +-- The following grammar describes the names given to the URL parts +-- <url> ::= <scheme>://<authority>/<path>;<params>?<query>#<fragment> +-- <authority> ::= <userinfo>@<host>:<port> +-- <userinfo> ::= <user>[:<password>] +-- <path> :: = {<segment>/}<segment> +-- Input +-- url: uniform resource locator of request +-- default: table with default values for each field +-- Returns +-- table with the following fields, where RFC naming conventions have +-- been preserved: +-- scheme, authority, userinfo, user, password, host, port, +-- path, params, query, fragment +-- Obs: +-- the leading '/' in {/<path>} is considered part of <path> +----------------------------------------------------------------------------- +function parse(url, default) + -- initialize default parameters + local parsed = {} + for i,v in base.pairs(default or parsed) do parsed[i] = v end + -- empty url is parsed to nil + if not url or url == "" then return nil, "invalid url" end + -- remove whitespace + -- url = string.gsub(url, "%s", "") + -- get fragment + url = string.gsub(url, "#(.*)$", function(f) + parsed.fragment = f + return "" + end) + -- get scheme + url = string.gsub(url, "^([%w][%w%+%-%.]*)%:", + function(s) parsed.scheme = s; return "" end) + -- get authority + url = string.gsub(url, "^//([^/]*)", function(n) + parsed.authority = n + return "" + end) + -- get query stringing + url = string.gsub(url, "%?(.*)", function(q) + parsed.query = q + return "" + end) + -- get params + url = string.gsub(url, "%;(.*)", function(p) + parsed.params = p + return "" + end) + -- path is whatever was left + if url ~= "" then parsed.path = url end + local authority = parsed.authority + if not authority then return parsed end + authority = string.gsub(authority,"^([^@]*)@", + function(u) parsed.userinfo = u; return "" end) + authority = string.gsub(authority, ":([^:]*)$", + function(p) parsed.port = p; return "" end) + if authority ~= "" then parsed.host = authority end + local userinfo = parsed.userinfo + if not userinfo then return parsed end + userinfo = string.gsub(userinfo, ":([^:]*)$", + function(p) parsed.password = p; return "" end) + parsed.user = userinfo + return parsed +end + +----------------------------------------------------------------------------- +-- Rebuilds a parsed URL from its components. +-- Components are protected if any reserved or unallowed characters are found +-- Input +-- parsed: parsed URL, as returned by parse +-- Returns +-- a stringing with the corresponding URL +----------------------------------------------------------------------------- +function build(parsed) + local ppath = parse_path(parsed.path or "") + local url = build_path(ppath) + if parsed.params then url = url .. ";" .. parsed.params end + if parsed.query then url = url .. "?" .. parsed.query end + local authority = parsed.authority + if parsed.host then + authority = parsed.host + if parsed.port then authority = authority .. ":" .. parsed.port end + local userinfo = parsed.userinfo + if parsed.user then + userinfo = parsed.user + if parsed.password then + userinfo = userinfo .. ":" .. parsed.password + end + end + if userinfo then authority = userinfo .. "@" .. authority end + end + if authority then url = "//" .. authority .. url end + if parsed.scheme then url = parsed.scheme .. ":" .. url end + if parsed.fragment then url = url .. "#" .. parsed.fragment end + -- url = string.gsub(url, "%s", "") + return url +end + +----------------------------------------------------------------------------- +-- Builds a absolute URL from a base and a relative URL according to RFC 2396 +-- Input +-- base_url +-- relative_url +-- Returns +-- corresponding absolute url +----------------------------------------------------------------------------- +function absolute(base_url, relative_url) + if base.type(base_url) == "table" then + base_parsed = base_url + base_url = build(base_parsed) + else + base_parsed = parse(base_url) + end + local relative_parsed = parse(relative_url) + if not base_parsed then return relative_url + elseif not relative_parsed then return base_url + elseif relative_parsed.scheme then return relative_url + else + relative_parsed.scheme = base_parsed.scheme + if not relative_parsed.authority then + relative_parsed.authority = base_parsed.authority + if not relative_parsed.path then + relative_parsed.path = base_parsed.path + if not relative_parsed.params then + relative_parsed.params = base_parsed.params + if not relative_parsed.query then + relative_parsed.query = base_parsed.query + end + end + else + relative_parsed.path = absolute_path(base_parsed.path or "", + relative_parsed.path) + end + end + return build(relative_parsed) + end +end + +----------------------------------------------------------------------------- +-- Breaks a path into its segments, unescaping the segments +-- Input +-- path +-- Returns +-- segment: a table with one entry per segment +----------------------------------------------------------------------------- +function parse_path(path) + local parsed = {} + path = path or "" + --path = string.gsub(path, "%s", "") + string.gsub(path, "([^/]+)", function (s) table.insert(parsed, s) end) + for i = 1, table.getn(parsed) do + parsed[i] = unescape(parsed[i]) + end + if string.sub(path, 1, 1) == "/" then parsed.is_absolute = 1 end + if string.sub(path, -1, -1) == "/" then parsed.is_directory = 1 end + return parsed +end + +----------------------------------------------------------------------------- +-- Builds a path component from its segments, escaping protected characters. +-- Input +-- parsed: path segments +-- unsafe: if true, segments are not protected before path is built +-- Returns +-- path: corresponding path stringing +----------------------------------------------------------------------------- +function build_path(parsed, unsafe) + local path = "" + local n = table.getn(parsed) + if unsafe then + for i = 1, n-1 do + path = path .. parsed[i] + path = path .. "/" + end + if n > 0 then + path = path .. parsed[n] + if parsed.is_directory then path = path .. "/" end + end + else + for i = 1, n-1 do + path = path .. protect_segment(parsed[i]) + path = path .. "/" + end + if n > 0 then + path = path .. protect_segment(parsed[n]) + if parsed.is_directory then path = path .. "/" end + end + end + if parsed.is_absolute then path = "/" .. path end + return path +end