comparison src/url.lua @ 0:4b915342e2a8

LuaSocket 2.0.2 + CMake build description.
author Eric Wing <ewing . public |-at-| gmail . com>
date Tue, 26 Aug 2008 18:40:01 -0700
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4b915342e2a8
1 -----------------------------------------------------------------------------
2 -- URI parsing, composition and relative URL resolution
3 -- LuaSocket toolkit.
4 -- Author: Diego Nehab
5 -- RCS ID: $Id: url.lua,v 1.38 2006/04/03 04:45:42 diego Exp $
6 -----------------------------------------------------------------------------
7
8 -----------------------------------------------------------------------------
9 -- Declare module
10 -----------------------------------------------------------------------------
11 local string = require("string")
12 local base = _G
13 local table = require("table")
14 module("socket.url")
15
16 -----------------------------------------------------------------------------
17 -- Module version
18 -----------------------------------------------------------------------------
19 _VERSION = "URL 1.0.1"
20
21 -----------------------------------------------------------------------------
22 -- Encodes a string into its escaped hexadecimal representation
23 -- Input
24 -- s: binary string to be encoded
25 -- Returns
26 -- escaped representation of string binary
27 -----------------------------------------------------------------------------
28 function escape(s)
29 return string.gsub(s, "([^A-Za-z0-9_])", function(c)
30 return string.format("%%%02x", string.byte(c))
31 end)
32 end
33
34 -----------------------------------------------------------------------------
35 -- Protects a path segment, to prevent it from interfering with the
36 -- url parsing.
37 -- Input
38 -- s: binary string to be encoded
39 -- Returns
40 -- escaped representation of string binary
41 -----------------------------------------------------------------------------
42 local function make_set(t)
43 local s = {}
44 for i,v in base.ipairs(t) do
45 s[t[i]] = 1
46 end
47 return s
48 end
49
50 -- these are allowed withing a path segment, along with alphanum
51 -- other characters must be escaped
52 local segment_set = make_set {
53 "-", "_", ".", "!", "~", "*", "'", "(",
54 ")", ":", "@", "&", "=", "+", "$", ",",
55 }
56
57 local function protect_segment(s)
58 return string.gsub(s, "([^A-Za-z0-9_])", function (c)
59 if segment_set[c] then return c
60 else return string.format("%%%02x", string.byte(c)) end
61 end)
62 end
63
64 -----------------------------------------------------------------------------
65 -- Encodes a string into its escaped hexadecimal representation
66 -- Input
67 -- s: binary string to be encoded
68 -- Returns
69 -- escaped representation of string binary
70 -----------------------------------------------------------------------------
71 function unescape(s)
72 return string.gsub(s, "%%(%x%x)", function(hex)
73 return string.char(base.tonumber(hex, 16))
74 end)
75 end
76
77 -----------------------------------------------------------------------------
78 -- Builds a path from a base path and a relative path
79 -- Input
80 -- base_path
81 -- relative_path
82 -- Returns
83 -- corresponding absolute path
84 -----------------------------------------------------------------------------
85 local function absolute_path(base_path, relative_path)
86 if string.sub(relative_path, 1, 1) == "/" then return relative_path end
87 local path = string.gsub(base_path, "[^/]*$", "")
88 path = path .. relative_path
89 path = string.gsub(path, "([^/]*%./)", function (s)
90 if s ~= "./" then return s else return "" end
91 end)
92 path = string.gsub(path, "/%.$", "/")
93 local reduced
94 while reduced ~= path do
95 reduced = path
96 path = string.gsub(reduced, "([^/]*/%.%./)", function (s)
97 if s ~= "../../" then return "" else return s end
98 end)
99 end
100 path = string.gsub(reduced, "([^/]*/%.%.)$", function (s)
101 if s ~= "../.." then return "" else return s end
102 end)
103 return path
104 end
105
106 -----------------------------------------------------------------------------
107 -- Parses a url and returns a table with all its parts according to RFC 2396
108 -- The following grammar describes the names given to the URL parts
109 -- <url> ::= <scheme>://<authority>/<path>;<params>?<query>#<fragment>
110 -- <authority> ::= <userinfo>@<host>:<port>
111 -- <userinfo> ::= <user>[:<password>]
112 -- <path> :: = {<segment>/}<segment>
113 -- Input
114 -- url: uniform resource locator of request
115 -- default: table with default values for each field
116 -- Returns
117 -- table with the following fields, where RFC naming conventions have
118 -- been preserved:
119 -- scheme, authority, userinfo, user, password, host, port,
120 -- path, params, query, fragment
121 -- Obs:
122 -- the leading '/' in {/<path>} is considered part of <path>
123 -----------------------------------------------------------------------------
124 function parse(url, default)
125 -- initialize default parameters
126 local parsed = {}
127 for i,v in base.pairs(default or parsed) do parsed[i] = v end
128 -- empty url is parsed to nil
129 if not url or url == "" then return nil, "invalid url" end
130 -- remove whitespace
131 -- url = string.gsub(url, "%s", "")
132 -- get fragment
133 url = string.gsub(url, "#(.*)$", function(f)
134 parsed.fragment = f
135 return ""
136 end)
137 -- get scheme
138 url = string.gsub(url, "^([%w][%w%+%-%.]*)%:",
139 function(s) parsed.scheme = s; return "" end)
140 -- get authority
141 url = string.gsub(url, "^//([^/]*)", function(n)
142 parsed.authority = n
143 return ""
144 end)
145 -- get query stringing
146 url = string.gsub(url, "%?(.*)", function(q)
147 parsed.query = q
148 return ""
149 end)
150 -- get params
151 url = string.gsub(url, "%;(.*)", function(p)
152 parsed.params = p
153 return ""
154 end)
155 -- path is whatever was left
156 if url ~= "" then parsed.path = url end
157 local authority = parsed.authority
158 if not authority then return parsed end
159 authority = string.gsub(authority,"^([^@]*)@",
160 function(u) parsed.userinfo = u; return "" end)
161 authority = string.gsub(authority, ":([^:]*)$",
162 function(p) parsed.port = p; return "" end)
163 if authority ~= "" then parsed.host = authority end
164 local userinfo = parsed.userinfo
165 if not userinfo then return parsed end
166 userinfo = string.gsub(userinfo, ":([^:]*)$",
167 function(p) parsed.password = p; return "" end)
168 parsed.user = userinfo
169 return parsed
170 end
171
172 -----------------------------------------------------------------------------
173 -- Rebuilds a parsed URL from its components.
174 -- Components are protected if any reserved or unallowed characters are found
175 -- Input
176 -- parsed: parsed URL, as returned by parse
177 -- Returns
178 -- a stringing with the corresponding URL
179 -----------------------------------------------------------------------------
180 function build(parsed)
181 local ppath = parse_path(parsed.path or "")
182 local url = build_path(ppath)
183 if parsed.params then url = url .. ";" .. parsed.params end
184 if parsed.query then url = url .. "?" .. parsed.query end
185 local authority = parsed.authority
186 if parsed.host then
187 authority = parsed.host
188 if parsed.port then authority = authority .. ":" .. parsed.port end
189 local userinfo = parsed.userinfo
190 if parsed.user then
191 userinfo = parsed.user
192 if parsed.password then
193 userinfo = userinfo .. ":" .. parsed.password
194 end
195 end
196 if userinfo then authority = userinfo .. "@" .. authority end
197 end
198 if authority then url = "//" .. authority .. url end
199 if parsed.scheme then url = parsed.scheme .. ":" .. url end
200 if parsed.fragment then url = url .. "#" .. parsed.fragment end
201 -- url = string.gsub(url, "%s", "")
202 return url
203 end
204
205 -----------------------------------------------------------------------------
206 -- Builds a absolute URL from a base and a relative URL according to RFC 2396
207 -- Input
208 -- base_url
209 -- relative_url
210 -- Returns
211 -- corresponding absolute url
212 -----------------------------------------------------------------------------
213 function absolute(base_url, relative_url)
214 if base.type(base_url) == "table" then
215 base_parsed = base_url
216 base_url = build(base_parsed)
217 else
218 base_parsed = parse(base_url)
219 end
220 local relative_parsed = parse(relative_url)
221 if not base_parsed then return relative_url
222 elseif not relative_parsed then return base_url
223 elseif relative_parsed.scheme then return relative_url
224 else
225 relative_parsed.scheme = base_parsed.scheme
226 if not relative_parsed.authority then
227 relative_parsed.authority = base_parsed.authority
228 if not relative_parsed.path then
229 relative_parsed.path = base_parsed.path
230 if not relative_parsed.params then
231 relative_parsed.params = base_parsed.params
232 if not relative_parsed.query then
233 relative_parsed.query = base_parsed.query
234 end
235 end
236 else
237 relative_parsed.path = absolute_path(base_parsed.path or "",
238 relative_parsed.path)
239 end
240 end
241 return build(relative_parsed)
242 end
243 end
244
245 -----------------------------------------------------------------------------
246 -- Breaks a path into its segments, unescaping the segments
247 -- Input
248 -- path
249 -- Returns
250 -- segment: a table with one entry per segment
251 -----------------------------------------------------------------------------
252 function parse_path(path)
253 local parsed = {}
254 path = path or ""
255 --path = string.gsub(path, "%s", "")
256 string.gsub(path, "([^/]+)", function (s) table.insert(parsed, s) end)
257 for i = 1, table.getn(parsed) do
258 parsed[i] = unescape(parsed[i])
259 end
260 if string.sub(path, 1, 1) == "/" then parsed.is_absolute = 1 end
261 if string.sub(path, -1, -1) == "/" then parsed.is_directory = 1 end
262 return parsed
263 end
264
265 -----------------------------------------------------------------------------
266 -- Builds a path component from its segments, escaping protected characters.
267 -- Input
268 -- parsed: path segments
269 -- unsafe: if true, segments are not protected before path is built
270 -- Returns
271 -- path: corresponding path stringing
272 -----------------------------------------------------------------------------
273 function build_path(parsed, unsafe)
274 local path = ""
275 local n = table.getn(parsed)
276 if unsafe then
277 for i = 1, n-1 do
278 path = path .. parsed[i]
279 path = path .. "/"
280 end
281 if n > 0 then
282 path = path .. parsed[n]
283 if parsed.is_directory then path = path .. "/" end
284 end
285 else
286 for i = 1, n-1 do
287 path = path .. protect_segment(parsed[i])
288 path = path .. "/"
289 end
290 if n > 0 then
291 path = path .. protect_segment(parsed[n])
292 if parsed.is_directory then path = path .. "/" end
293 end
294 end
295 if parsed.is_absolute then path = "/" .. path end
296 return path
297 end