| 1 | ----------------------------------------------------------------------------- |
| 2 | -- URI parsing, composition and relative URL resolution |
| 3 | -- LuaSocket toolkit. |
| 4 | -- Author: Diego Nehab |
| 5 | -- RCS ID: $Id: url.lua,v 1.38 2006/04/03 04:45:42 diego Exp $ |
| 6 | ----------------------------------------------------------------------------- |
| 7 | |
| 8 | ----------------------------------------------------------------------------- |
| 9 | -- Declare module |
| 10 | ----------------------------------------------------------------------------- |
| 11 | local string = require("string") |
| 12 | local base = _G |
| 13 | local table = require("table") |
| 14 | module("socket.url") |
| 15 | |
| 16 | ----------------------------------------------------------------------------- |
| 17 | -- Module version |
| 18 | ----------------------------------------------------------------------------- |
| 19 | _VERSION = "URL 1.0.1" |
| 20 | |
| 21 | ----------------------------------------------------------------------------- |
| 22 | -- Encodes a string into its escaped hexadecimal representation |
| 23 | -- Input |
| 24 | -- s: binary string to be encoded |
| 25 | -- Returns |
| 26 | -- escaped representation of string binary |
| 27 | ----------------------------------------------------------------------------- |
| 28 | function escape(s) |
| 29 | return string.gsub(s, "([^A-Za-z0-9_])", function(c) |
| 30 | return string.format("%%%02x", string.byte(c)) |
| 31 | end) |
| 32 | end |
| 33 | |
| 34 | ----------------------------------------------------------------------------- |
| 35 | -- Protects a path segment, to prevent it from interfering with the |
| 36 | -- url parsing. |
| 37 | -- Input |
| 38 | -- s: binary string to be encoded |
| 39 | -- Returns |
| 40 | -- escaped representation of string binary |
| 41 | ----------------------------------------------------------------------------- |
| 42 | local function make_set(t) |
| 43 | local s = {} |
| 44 | for i,v in base.ipairs(t) do |
| 45 | s[t[i]] = 1 |
| 46 | end |
| 47 | return s |
| 48 | end |
| 49 | |
| 50 | -- these are allowed withing a path segment, along with alphanum |
| 51 | -- other characters must be escaped |
| 52 | local segment_set = make_set { |
| 53 | "-", "_", ".", "!", "~", "*", "'", "(", |
| 54 | ")", ":", "@", "&", "=", "+", "$", ",", |
| 55 | } |
| 56 | |
| 57 | local function protect_segment(s) |
| 58 | return string.gsub(s, "([^A-Za-z0-9_])", function (c) |
| 59 | if segment_set[c] then return c |
| 60 | else return string.format("%%%02x", string.byte(c)) end |
| 61 | end) |
| 62 | end |
| 63 | |
| 64 | ----------------------------------------------------------------------------- |
| 65 | -- Encodes a string into its escaped hexadecimal representation |
| 66 | -- Input |
| 67 | -- s: binary string to be encoded |
| 68 | -- Returns |
| 69 | -- escaped representation of string binary |
| 70 | ----------------------------------------------------------------------------- |
| 71 | function unescape(s) |
| 72 | return string.gsub(s, "%%(%x%x)", function(hex) |
| 73 | return string.char(base.tonumber(hex, 16)) |
| 74 | end) |
| 75 | end |
| 76 | |
| 77 | ----------------------------------------------------------------------------- |
| 78 | -- Builds a path from a base path and a relative path |
| 79 | -- Input |
| 80 | -- base_path |
| 81 | -- relative_path |
| 82 | -- Returns |
| 83 | -- corresponding absolute path |
| 84 | ----------------------------------------------------------------------------- |
| 85 | local function absolute_path(base_path, relative_path) |
| 86 | if string.sub(relative_path, 1, 1) == "/" then return relative_path end |
| 87 | local path = string.gsub(base_path, "[^/]*$", "") |
| 88 | path = path .. relative_path |
| 89 | path = string.gsub(path, "([^/]*%./)", function (s) |
| 90 | if s ~= "./" then return s else return "" end |
| 91 | end) |
| 92 | path = string.gsub(path, "/%.$", "/") |
| 93 | local reduced |
| 94 | while reduced ~= path do |
| 95 | reduced = path |
| 96 | path = string.gsub(reduced, "([^/]*/%.%./)", function (s) |
| 97 | if s ~= "../../" then return "" else return s end |
| 98 | end) |
| 99 | end |
| 100 | path = string.gsub(reduced, "([^/]*/%.%.)$", function (s) |
| 101 | if s ~= "../.." then return "" else return s end |
| 102 | end) |
| 103 | return path |
| 104 | end |
| 105 | |
| 106 | ----------------------------------------------------------------------------- |
| 107 | -- Parses a url and returns a table with all its parts according to RFC 2396 |
| 108 | -- The following grammar describes the names given to the URL parts |
| 109 | -- <url> ::= <scheme>://<authority>/<path>;<params>?<query>#<fragment> |
| 110 | -- <authority> ::= <userinfo>@<host>:<port> |
| 111 | -- <userinfo> ::= <user>[:<password>] |
| 112 | -- <path> :: = {<segment>/}<segment> |
| 113 | -- Input |
| 114 | -- url: uniform resource locator of request |
| 115 | -- default: table with default values for each field |
| 116 | -- Returns |
| 117 | -- table with the following fields, where RFC naming conventions have |
| 118 | -- been preserved: |
| 119 | -- scheme, authority, userinfo, user, password, host, port, |
| 120 | -- path, params, query, fragment |
| 121 | -- Obs: |
| 122 | -- the leading '/' in {/<path>} is considered part of <path> |
| 123 | ----------------------------------------------------------------------------- |
| 124 | function parse(url, default) |
| 125 | -- initialize default parameters |
| 126 | local parsed = {} |
| 127 | for i,v in base.pairs(default or parsed) do parsed[i] = v end |
| 128 | -- empty url is parsed to nil |
| 129 | if not url or url == "" then return nil, "invalid url" end |
| 130 | -- remove whitespace |
| 131 | -- url = string.gsub(url, "%s", "") |
| 132 | -- get fragment |
| 133 | url = string.gsub(url, "#(.*)$", function(f) |
| 134 | parsed.fragment = f |
| 135 | return "" |
| 136 | end) |
| 137 | -- get scheme |
| 138 | url = string.gsub(url, "^([%w][%w%+%-%.]*)%:", |
| 139 | function(s) parsed.scheme = s; return "" end) |
| 140 | -- get authority |
| 141 | url = string.gsub(url, "^//([^/]*)", function(n) |
| 142 | parsed.authority = n |
| 143 | return "" |
| 144 | end) |
| 145 | -- get query stringing |
| 146 | url = string.gsub(url, "%?(.*)", function(q) |
| 147 | parsed.query = q |
| 148 | return "" |
| 149 | end) |
| 150 | -- get params |
| 151 | url = string.gsub(url, "%;(.*)", function(p) |
| 152 | parsed.params = p |
| 153 | return "" |
| 154 | end) |
| 155 | -- path is whatever was left |
| 156 | if url ~= "" then parsed.path = url end |
| 157 | local authority = parsed.authority |
| 158 | if not authority then return parsed end |
| 159 | authority = string.gsub(authority,"^([^@]*)@", |
| 160 | function(u) parsed.userinfo = u; return "" end) |
| 161 | authority = string.gsub(authority, ":([^:]*)$", |
| 162 | function(p) parsed.port = p; return "" end) |
| 163 | if authority ~= "" then parsed.host = authority end |
| 164 | local userinfo = parsed.userinfo |
| 165 | if not userinfo then return parsed end |
| 166 | userinfo = string.gsub(userinfo, ":([^:]*)$", |
| 167 | function(p) parsed.password = p; return "" end) |
| 168 | parsed.user = userinfo |
| 169 | return parsed |
| 170 | end |
| 171 | |
| 172 | ----------------------------------------------------------------------------- |
| 173 | -- Rebuilds a parsed URL from its components. |
| 174 | -- Components are protected if any reserved or unallowed characters are found |
| 175 | -- Input |
| 176 | -- parsed: parsed URL, as returned by parse |
| 177 | -- Returns |
| 178 | -- a stringing with the corresponding URL |
| 179 | ----------------------------------------------------------------------------- |
| 180 | function build(parsed) |
| 181 | local ppath = parse_path(parsed.path or "") |
| 182 | local url = build_path(ppath) |
| 183 | if parsed.params then url = url .. ";" .. parsed.params end |
| 184 | if parsed.query then url = url .. "?" .. parsed.query end |
| 185 | local authority = parsed.authority |
| 186 | if parsed.host then |
| 187 | authority = parsed.host |
| 188 | if parsed.port then authority = authority .. ":" .. parsed.port end |
| 189 | local userinfo = parsed.userinfo |
| 190 | if parsed.user then |
| 191 | userinfo = parsed.user |
| 192 | if parsed.password then |
| 193 | userinfo = userinfo .. ":" .. parsed.password |
| 194 | end |
| 195 | end |
| 196 | if userinfo then authority = userinfo .. "@" .. authority end |
| 197 | end |
| 198 | if authority then url = "//" .. authority .. url end |
| 199 | if parsed.scheme then url = parsed.scheme .. ":" .. url end |
| 200 | if parsed.fragment then url = url .. "#" .. parsed.fragment end |
| 201 | -- url = string.gsub(url, "%s", "") |
| 202 | return url |
| 203 | end |
| 204 | |
| 205 | ----------------------------------------------------------------------------- |
| 206 | -- Builds a absolute URL from a base and a relative URL according to RFC 2396 |
| 207 | -- Input |
| 208 | -- base_url |
| 209 | -- relative_url |
| 210 | -- Returns |
| 211 | -- corresponding absolute url |
| 212 | ----------------------------------------------------------------------------- |
| 213 | function absolute(base_url, relative_url) |
| 214 | if base.type(base_url) == "table" then |
| 215 | base_parsed = base_url |
| 216 | base_url = build(base_parsed) |
| 217 | else |
| 218 | base_parsed = parse(base_url) |
| 219 | end |
| 220 | local relative_parsed = parse(relative_url) |
| 221 | if not base_parsed then return relative_url |
| 222 | elseif not relative_parsed then return base_url |
| 223 | elseif relative_parsed.scheme then return relative_url |
| 224 | else |
| 225 | relative_parsed.scheme = base_parsed.scheme |
| 226 | if not relative_parsed.authority then |
| 227 | relative_parsed.authority = base_parsed.authority |
| 228 | if not relative_parsed.path then |
| 229 | relative_parsed.path = base_parsed.path |
| 230 | if not relative_parsed.params then |
| 231 | relative_parsed.params = base_parsed.params |
| 232 | if not relative_parsed.query then |
| 233 | relative_parsed.query = base_parsed.query |
| 234 | end |
| 235 | end |
| 236 | else |
| 237 | relative_parsed.path = absolute_path(base_parsed.path or "", |
| 238 | relative_parsed.path) |
| 239 | end |
| 240 | end |
| 241 | return build(relative_parsed) |
| 242 | end |
| 243 | end |
| 244 | |
| 245 | ----------------------------------------------------------------------------- |
| 246 | -- Breaks a path into its segments, unescaping the segments |
| 247 | -- Input |
| 248 | -- path |
| 249 | -- Returns |
| 250 | -- segment: a table with one entry per segment |
| 251 | ----------------------------------------------------------------------------- |
| 252 | function parse_path(path) |
| 253 | local parsed = {} |
| 254 | path = path or "" |
| 255 | --path = string.gsub(path, "%s", "") |
| 256 | string.gsub(path, "([^/]+)", function (s) table.insert(parsed, s) end) |
| 257 | for i = 1, table.getn(parsed) do |
| 258 | parsed[i] = unescape(parsed[i]) |
| 259 | end |
| 260 | if string.sub(path, 1, 1) == "/" then parsed.is_absolute = 1 end |
| 261 | if string.sub(path, -1, -1) == "/" then parsed.is_directory = 1 end |
| 262 | return parsed |
| 263 | end |
| 264 | |
| 265 | ----------------------------------------------------------------------------- |
| 266 | -- Builds a path component from its segments, escaping protected characters. |
| 267 | -- Input |
| 268 | -- parsed: path segments |
| 269 | -- unsafe: if true, segments are not protected before path is built |
| 270 | -- Returns |
| 271 | -- path: corresponding path stringing |
| 272 | ----------------------------------------------------------------------------- |
| 273 | function build_path(parsed, unsafe) |
| 274 | local path = "" |
| 275 | local n = table.getn(parsed) |
| 276 | if unsafe then |
| 277 | for i = 1, n-1 do |
| 278 | path = path .. parsed[i] |
| 279 | path = path .. "/" |
| 280 | end |
| 281 | if n > 0 then |
| 282 | path = path .. parsed[n] |
| 283 | if parsed.is_directory then path = path .. "/" end |
| 284 | end |
| 285 | else |
| 286 | for i = 1, n-1 do |
| 287 | path = path .. protect_segment(parsed[i]) |
| 288 | path = path .. "/" |
| 289 | end |
| 290 | if n > 0 then |
| 291 | path = path .. protect_segment(parsed[n]) |
| 292 | if parsed.is_directory then path = path .. "/" end |
| 293 | end |
| 294 | end |
| 295 | if parsed.is_absolute then path = "/" .. path end |
| 296 | return path |
| 297 | end |