Mercurial > luasocket
comparison src/url.lua @ 0:4b915342e2a8
LuaSocket 2.0.2 + CMake build description.
author | Eric Wing <ewing . public |-at-| gmail . com> |
---|---|
date | Tue, 26 Aug 2008 18:40:01 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4b915342e2a8 |
---|---|
1 ----------------------------------------------------------------------------- | |
2 -- URI parsing, composition and relative URL resolution | |
3 -- LuaSocket toolkit. | |
4 -- Author: Diego Nehab | |
5 -- RCS ID: $Id: url.lua,v 1.38 2006/04/03 04:45:42 diego Exp $ | |
6 ----------------------------------------------------------------------------- | |
7 | |
8 ----------------------------------------------------------------------------- | |
9 -- Declare module | |
10 ----------------------------------------------------------------------------- | |
11 local string = require("string") | |
12 local base = _G | |
13 local table = require("table") | |
14 module("socket.url") | |
15 | |
16 ----------------------------------------------------------------------------- | |
17 -- Module version | |
18 ----------------------------------------------------------------------------- | |
19 _VERSION = "URL 1.0.1" | |
20 | |
21 ----------------------------------------------------------------------------- | |
22 -- Encodes a string into its escaped hexadecimal representation | |
23 -- Input | |
24 -- s: binary string to be encoded | |
25 -- Returns | |
26 -- escaped representation of string binary | |
27 ----------------------------------------------------------------------------- | |
28 function escape(s) | |
29 return string.gsub(s, "([^A-Za-z0-9_])", function(c) | |
30 return string.format("%%%02x", string.byte(c)) | |
31 end) | |
32 end | |
33 | |
34 ----------------------------------------------------------------------------- | |
35 -- Protects a path segment, to prevent it from interfering with the | |
36 -- url parsing. | |
37 -- Input | |
38 -- s: binary string to be encoded | |
39 -- Returns | |
40 -- escaped representation of string binary | |
41 ----------------------------------------------------------------------------- | |
42 local function make_set(t) | |
43 local s = {} | |
44 for i,v in base.ipairs(t) do | |
45 s[t[i]] = 1 | |
46 end | |
47 return s | |
48 end | |
49 | |
50 -- these are allowed withing a path segment, along with alphanum | |
51 -- other characters must be escaped | |
52 local segment_set = make_set { | |
53 "-", "_", ".", "!", "~", "*", "'", "(", | |
54 ")", ":", "@", "&", "=", "+", "$", ",", | |
55 } | |
56 | |
57 local function protect_segment(s) | |
58 return string.gsub(s, "([^A-Za-z0-9_])", function (c) | |
59 if segment_set[c] then return c | |
60 else return string.format("%%%02x", string.byte(c)) end | |
61 end) | |
62 end | |
63 | |
64 ----------------------------------------------------------------------------- | |
65 -- Encodes a string into its escaped hexadecimal representation | |
66 -- Input | |
67 -- s: binary string to be encoded | |
68 -- Returns | |
69 -- escaped representation of string binary | |
70 ----------------------------------------------------------------------------- | |
71 function unescape(s) | |
72 return string.gsub(s, "%%(%x%x)", function(hex) | |
73 return string.char(base.tonumber(hex, 16)) | |
74 end) | |
75 end | |
76 | |
77 ----------------------------------------------------------------------------- | |
78 -- Builds a path from a base path and a relative path | |
79 -- Input | |
80 -- base_path | |
81 -- relative_path | |
82 -- Returns | |
83 -- corresponding absolute path | |
84 ----------------------------------------------------------------------------- | |
85 local function absolute_path(base_path, relative_path) | |
86 if string.sub(relative_path, 1, 1) == "/" then return relative_path end | |
87 local path = string.gsub(base_path, "[^/]*$", "") | |
88 path = path .. relative_path | |
89 path = string.gsub(path, "([^/]*%./)", function (s) | |
90 if s ~= "./" then return s else return "" end | |
91 end) | |
92 path = string.gsub(path, "/%.$", "/") | |
93 local reduced | |
94 while reduced ~= path do | |
95 reduced = path | |
96 path = string.gsub(reduced, "([^/]*/%.%./)", function (s) | |
97 if s ~= "../../" then return "" else return s end | |
98 end) | |
99 end | |
100 path = string.gsub(reduced, "([^/]*/%.%.)$", function (s) | |
101 if s ~= "../.." then return "" else return s end | |
102 end) | |
103 return path | |
104 end | |
105 | |
106 ----------------------------------------------------------------------------- | |
107 -- Parses a url and returns a table with all its parts according to RFC 2396 | |
108 -- The following grammar describes the names given to the URL parts | |
109 -- <url> ::= <scheme>://<authority>/<path>;<params>?<query>#<fragment> | |
110 -- <authority> ::= <userinfo>@<host>:<port> | |
111 -- <userinfo> ::= <user>[:<password>] | |
112 -- <path> :: = {<segment>/}<segment> | |
113 -- Input | |
114 -- url: uniform resource locator of request | |
115 -- default: table with default values for each field | |
116 -- Returns | |
117 -- table with the following fields, where RFC naming conventions have | |
118 -- been preserved: | |
119 -- scheme, authority, userinfo, user, password, host, port, | |
120 -- path, params, query, fragment | |
121 -- Obs: | |
122 -- the leading '/' in {/<path>} is considered part of <path> | |
123 ----------------------------------------------------------------------------- | |
124 function parse(url, default) | |
125 -- initialize default parameters | |
126 local parsed = {} | |
127 for i,v in base.pairs(default or parsed) do parsed[i] = v end | |
128 -- empty url is parsed to nil | |
129 if not url or url == "" then return nil, "invalid url" end | |
130 -- remove whitespace | |
131 -- url = string.gsub(url, "%s", "") | |
132 -- get fragment | |
133 url = string.gsub(url, "#(.*)$", function(f) | |
134 parsed.fragment = f | |
135 return "" | |
136 end) | |
137 -- get scheme | |
138 url = string.gsub(url, "^([%w][%w%+%-%.]*)%:", | |
139 function(s) parsed.scheme = s; return "" end) | |
140 -- get authority | |
141 url = string.gsub(url, "^//([^/]*)", function(n) | |
142 parsed.authority = n | |
143 return "" | |
144 end) | |
145 -- get query stringing | |
146 url = string.gsub(url, "%?(.*)", function(q) | |
147 parsed.query = q | |
148 return "" | |
149 end) | |
150 -- get params | |
151 url = string.gsub(url, "%;(.*)", function(p) | |
152 parsed.params = p | |
153 return "" | |
154 end) | |
155 -- path is whatever was left | |
156 if url ~= "" then parsed.path = url end | |
157 local authority = parsed.authority | |
158 if not authority then return parsed end | |
159 authority = string.gsub(authority,"^([^@]*)@", | |
160 function(u) parsed.userinfo = u; return "" end) | |
161 authority = string.gsub(authority, ":([^:]*)$", | |
162 function(p) parsed.port = p; return "" end) | |
163 if authority ~= "" then parsed.host = authority end | |
164 local userinfo = parsed.userinfo | |
165 if not userinfo then return parsed end | |
166 userinfo = string.gsub(userinfo, ":([^:]*)$", | |
167 function(p) parsed.password = p; return "" end) | |
168 parsed.user = userinfo | |
169 return parsed | |
170 end | |
171 | |
172 ----------------------------------------------------------------------------- | |
173 -- Rebuilds a parsed URL from its components. | |
174 -- Components are protected if any reserved or unallowed characters are found | |
175 -- Input | |
176 -- parsed: parsed URL, as returned by parse | |
177 -- Returns | |
178 -- a stringing with the corresponding URL | |
179 ----------------------------------------------------------------------------- | |
180 function build(parsed) | |
181 local ppath = parse_path(parsed.path or "") | |
182 local url = build_path(ppath) | |
183 if parsed.params then url = url .. ";" .. parsed.params end | |
184 if parsed.query then url = url .. "?" .. parsed.query end | |
185 local authority = parsed.authority | |
186 if parsed.host then | |
187 authority = parsed.host | |
188 if parsed.port then authority = authority .. ":" .. parsed.port end | |
189 local userinfo = parsed.userinfo | |
190 if parsed.user then | |
191 userinfo = parsed.user | |
192 if parsed.password then | |
193 userinfo = userinfo .. ":" .. parsed.password | |
194 end | |
195 end | |
196 if userinfo then authority = userinfo .. "@" .. authority end | |
197 end | |
198 if authority then url = "//" .. authority .. url end | |
199 if parsed.scheme then url = parsed.scheme .. ":" .. url end | |
200 if parsed.fragment then url = url .. "#" .. parsed.fragment end | |
201 -- url = string.gsub(url, "%s", "") | |
202 return url | |
203 end | |
204 | |
205 ----------------------------------------------------------------------------- | |
206 -- Builds a absolute URL from a base and a relative URL according to RFC 2396 | |
207 -- Input | |
208 -- base_url | |
209 -- relative_url | |
210 -- Returns | |
211 -- corresponding absolute url | |
212 ----------------------------------------------------------------------------- | |
213 function absolute(base_url, relative_url) | |
214 if base.type(base_url) == "table" then | |
215 base_parsed = base_url | |
216 base_url = build(base_parsed) | |
217 else | |
218 base_parsed = parse(base_url) | |
219 end | |
220 local relative_parsed = parse(relative_url) | |
221 if not base_parsed then return relative_url | |
222 elseif not relative_parsed then return base_url | |
223 elseif relative_parsed.scheme then return relative_url | |
224 else | |
225 relative_parsed.scheme = base_parsed.scheme | |
226 if not relative_parsed.authority then | |
227 relative_parsed.authority = base_parsed.authority | |
228 if not relative_parsed.path then | |
229 relative_parsed.path = base_parsed.path | |
230 if not relative_parsed.params then | |
231 relative_parsed.params = base_parsed.params | |
232 if not relative_parsed.query then | |
233 relative_parsed.query = base_parsed.query | |
234 end | |
235 end | |
236 else | |
237 relative_parsed.path = absolute_path(base_parsed.path or "", | |
238 relative_parsed.path) | |
239 end | |
240 end | |
241 return build(relative_parsed) | |
242 end | |
243 end | |
244 | |
245 ----------------------------------------------------------------------------- | |
246 -- Breaks a path into its segments, unescaping the segments | |
247 -- Input | |
248 -- path | |
249 -- Returns | |
250 -- segment: a table with one entry per segment | |
251 ----------------------------------------------------------------------------- | |
252 function parse_path(path) | |
253 local parsed = {} | |
254 path = path or "" | |
255 --path = string.gsub(path, "%s", "") | |
256 string.gsub(path, "([^/]+)", function (s) table.insert(parsed, s) end) | |
257 for i = 1, table.getn(parsed) do | |
258 parsed[i] = unescape(parsed[i]) | |
259 end | |
260 if string.sub(path, 1, 1) == "/" then parsed.is_absolute = 1 end | |
261 if string.sub(path, -1, -1) == "/" then parsed.is_directory = 1 end | |
262 return parsed | |
263 end | |
264 | |
265 ----------------------------------------------------------------------------- | |
266 -- Builds a path component from its segments, escaping protected characters. | |
267 -- Input | |
268 -- parsed: path segments | |
269 -- unsafe: if true, segments are not protected before path is built | |
270 -- Returns | |
271 -- path: corresponding path stringing | |
272 ----------------------------------------------------------------------------- | |
273 function build_path(parsed, unsafe) | |
274 local path = "" | |
275 local n = table.getn(parsed) | |
276 if unsafe then | |
277 for i = 1, n-1 do | |
278 path = path .. parsed[i] | |
279 path = path .. "/" | |
280 end | |
281 if n > 0 then | |
282 path = path .. parsed[n] | |
283 if parsed.is_directory then path = path .. "/" end | |
284 end | |
285 else | |
286 for i = 1, n-1 do | |
287 path = path .. protect_segment(parsed[i]) | |
288 path = path .. "/" | |
289 end | |
290 if n > 0 then | |
291 path = path .. protect_segment(parsed[n]) | |
292 if parsed.is_directory then path = path .. "/" end | |
293 end | |
294 end | |
295 if parsed.is_absolute then path = "/" .. path end | |
296 return path | |
297 end |