501 lines
14 KiB
D
501 lines
14 KiB
D
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
/**
|
|
* URL parser.
|
|
*
|
|
* Copyright: Eugene Wissner 2017-2018.
|
|
* License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
|
|
* Mozilla Public License, v. 2.0).
|
|
* Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
|
|
* Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/net/uri.d,
|
|
* tanya/net/uri.d)
|
|
*/
|
|
module tanya.net.uri;
|
|
|
|
import tanya.conv;
|
|
import tanya.encoding.ascii;
|
|
import tanya.memory;
|
|
|
|
version (unittest)
|
|
{
|
|
import tanya.test.assertion;
|
|
}
|
|
|
|
/**
|
|
* Thrown if an invalid URI was specified.
|
|
*/
|
|
final class URIException : Exception
|
|
{
|
|
/**
|
|
* Params:
|
|
* msg = The message for the exception.
|
|
* file = The file where the exception occurred.
|
|
* line = The line number where the exception occurred.
|
|
* next = The previous exception in the chain of exceptions, if any.
|
|
*/
|
|
this(string msg,
|
|
string file = __FILE__,
|
|
size_t line = __LINE__,
|
|
Throwable next = null) @nogc nothrow pure @safe
|
|
{
|
|
super(msg, file, line, next);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A Unique Resource Locator.
|
|
*/
|
|
struct URL
|
|
{
|
|
/// The URL scheme.
|
|
const(char)[] scheme;
|
|
|
|
/// The username.
|
|
const(char)[] user;
|
|
|
|
/// The password.
|
|
const(char)[] pass;
|
|
|
|
/// The hostname.
|
|
const(char)[] host;
|
|
|
|
/// The port number.
|
|
ushort port;
|
|
|
|
/// The path.
|
|
const(char)[] path;
|
|
|
|
/// The query string.
|
|
const(char)[] query;
|
|
|
|
/// The anchor.
|
|
const(char)[] fragment;
|
|
|
|
/**
|
|
* Attempts to parse an URL from a string.
|
|
* Output string data (scheme, user, etc.) are just slices of input string
|
|
* (i.e., no memory allocation and copying).
|
|
*
|
|
* Params:
|
|
* source = The string containing the URL.
|
|
*
|
|
* Throws: $(D_PSYMBOL URIException) if the URL is malformed.
|
|
*/
|
|
this(const char[] source) @nogc pure
|
|
{
|
|
ptrdiff_t pos = -1, endPos = source.length, start;
|
|
|
|
foreach (i, ref c; source)
|
|
{
|
|
if (pos == -1 && c == ':')
|
|
{
|
|
pos = i;
|
|
}
|
|
if (endPos == source.length && (c == '?' || c == '#'))
|
|
{
|
|
endPos = i;
|
|
}
|
|
}
|
|
|
|
// Check if the colon is a part of the scheme or the port and parse
|
|
// the appropriate part.
|
|
if (source.length > 1 && source[0] == '/' && source[1] == '/')
|
|
{
|
|
// Relative scheme.
|
|
start = 2;
|
|
}
|
|
else if (pos > 0)
|
|
{
|
|
// Validate scheme:
|
|
// [ toLower(alpha) | digit | "+" | "-" | "." ]
|
|
foreach (ref c; source[0 .. pos])
|
|
{
|
|
if (!c.isAlphaNum && c != '+' && c != '-' && c != '.')
|
|
{
|
|
goto ParsePath;
|
|
}
|
|
}
|
|
|
|
if (source.length == pos + 1) // only "scheme:" is available.
|
|
{
|
|
this.scheme = source[0 .. $ - 1];
|
|
return;
|
|
}
|
|
else if (source.length > pos + 1 && source[pos + 1] == '/')
|
|
{
|
|
this.scheme = source[0 .. pos];
|
|
|
|
if (source.length > pos + 2 && source[pos + 2] == '/')
|
|
{
|
|
start = pos + 3;
|
|
|
|
if (source.length <= start)
|
|
{
|
|
// Only "scheme://" is available.
|
|
return;
|
|
}
|
|
if (this.scheme == "file" && source[start] == '/')
|
|
{
|
|
// Windows drive letters.
|
|
if (source.length - start > 2
|
|
&& source[start + 2] == ':')
|
|
{
|
|
++start;
|
|
}
|
|
goto ParsePath;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
start = pos + 1;
|
|
goto ParsePath;
|
|
}
|
|
}
|
|
else if (!parsePort(source[pos .. $]))
|
|
{
|
|
// Schemas like mailto: and zlib: may not have any slash after
|
|
// them.
|
|
this.scheme = source[0 .. pos];
|
|
start = pos + 1;
|
|
goto ParsePath;
|
|
}
|
|
}
|
|
else if (pos == 0 && parsePort(source[pos .. $]))
|
|
{
|
|
// An URL shouldn't begin with a port number.
|
|
throw defaultAllocator.make!URIException("URL begins with port");
|
|
}
|
|
else
|
|
{
|
|
goto ParsePath;
|
|
}
|
|
|
|
// Parse host.
|
|
pos = -1;
|
|
for (ptrdiff_t i = start; i < source.length; ++i)
|
|
{
|
|
if (source[i] == '@')
|
|
{
|
|
pos = i;
|
|
}
|
|
else if (source[i] == '/')
|
|
{
|
|
endPos = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Check for login and password.
|
|
if (pos != -1)
|
|
{
|
|
// *( unreserved / pct-encoded / sub-delims / ":" )
|
|
foreach (i, c; source[start .. pos])
|
|
{
|
|
if (c == ':')
|
|
{
|
|
if (this.user is null)
|
|
{
|
|
this.user = source[start .. start + i];
|
|
this.pass = source[start + i + 1 .. pos];
|
|
}
|
|
}
|
|
else if (!c.isAlpha() &&
|
|
!c.isDigit() &&
|
|
c != '!' &&
|
|
c != ';' &&
|
|
c != '=' &&
|
|
c != '_' &&
|
|
c != '~' &&
|
|
!(c >= '$' && c <= '.'))
|
|
{
|
|
this.scheme = this.user = this.pass = null;
|
|
throw make!URIException(defaultAllocator,
|
|
"Restricted characters in user information");
|
|
}
|
|
}
|
|
if (this.user is null)
|
|
{
|
|
this.user = source[start .. pos];
|
|
}
|
|
|
|
start = ++pos;
|
|
}
|
|
|
|
pos = endPos;
|
|
if (endPos <= 1 || source[start] != '[' || source[endPos - 1] != ']')
|
|
{
|
|
// Short circuit portscan.
|
|
// IPv6 embedded address.
|
|
for (ptrdiff_t i = endPos - 1; i >= start; --i)
|
|
{
|
|
if (source[i] == ':')
|
|
{
|
|
pos = i;
|
|
if (this.port == 0 && !parsePort(source[i .. endPos]))
|
|
{
|
|
this.scheme = this.user = this.pass = null;
|
|
throw defaultAllocator.make!URIException("Invalid port");
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check if we have a valid host, if we don't reject the string as URL.
|
|
if (pos <= start)
|
|
{
|
|
this.scheme = this.user = this.pass = null;
|
|
throw defaultAllocator.make!URIException("Invalid host");
|
|
}
|
|
|
|
this.host = source[start .. pos];
|
|
|
|
if (endPos == source.length)
|
|
{
|
|
return;
|
|
}
|
|
|
|
start = endPos;
|
|
|
|
ParsePath:
|
|
endPos = source.length;
|
|
pos = -1;
|
|
foreach (i, ref c; source[start .. $])
|
|
{
|
|
if (c == '?' && pos == -1)
|
|
{
|
|
pos = start + i;
|
|
}
|
|
else if (c == '#')
|
|
{
|
|
endPos = start + i;
|
|
break;
|
|
}
|
|
}
|
|
if (pos == -1)
|
|
{
|
|
pos = endPos;
|
|
}
|
|
|
|
if (pos > start)
|
|
{
|
|
this.path = source[start .. pos];
|
|
}
|
|
if (endPos >= ++pos)
|
|
{
|
|
this.query = source[pos .. endPos];
|
|
}
|
|
if (++endPos <= source.length)
|
|
{
|
|
this.fragment = source[endPos .. $];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Attempts to parse and set the port.
|
|
*
|
|
* Params:
|
|
* port = String beginning with a colon followed by the port number and
|
|
* an optional path (query string and/or fragment), like:
|
|
* `:12345/some_path` or `:12345`.
|
|
*
|
|
* Returns: Whether the port could be found.
|
|
*/
|
|
private bool parsePort(const(char)[] port) @nogc nothrow pure @safe
|
|
{
|
|
auto unparsed = port[1 .. $];
|
|
auto parsed = readIntegral!ushort(unparsed);
|
|
if (unparsed.length == 0 || unparsed[0] == '/')
|
|
{
|
|
this.port = parsed;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
|
|
///
|
|
@nogc pure @system unittest
|
|
{
|
|
auto u = URL("example.org");
|
|
assert(u.path == "example.org");
|
|
|
|
u = URL("relative/path");
|
|
assert(u.path == "relative/path");
|
|
|
|
// Host and scheme
|
|
u = URL("https://example.org");
|
|
assert(u.scheme == "https");
|
|
assert(u.host == "example.org");
|
|
assert(u.path is null);
|
|
assert(u.port == 0);
|
|
assert(u.fragment is null);
|
|
|
|
// With user and port and path
|
|
u = URL("https://hilary:putnam@example.org:443/foo/bar");
|
|
assert(u.scheme == "https");
|
|
assert(u.host == "example.org");
|
|
assert(u.path == "/foo/bar");
|
|
assert(u.port == 443);
|
|
assert(u.user == "hilary");
|
|
assert(u.pass == "putnam");
|
|
assert(u.fragment is null);
|
|
|
|
// With query string
|
|
u = URL("https://example.org/?login=true");
|
|
assert(u.scheme == "https");
|
|
assert(u.host == "example.org");
|
|
assert(u.path == "/");
|
|
assert(u.query == "login=true");
|
|
assert(u.fragment is null);
|
|
|
|
// With query string and fragment
|
|
u = URL("https://example.org/?login=false#label");
|
|
assert(u.scheme == "https");
|
|
assert(u.host == "example.org");
|
|
assert(u.path == "/");
|
|
assert(u.query == "login=false");
|
|
assert(u.fragment == "label");
|
|
|
|
u = URL("redis://root:password@localhost:2201/path?query=value#fragment");
|
|
assert(u.scheme == "redis");
|
|
assert(u.user == "root");
|
|
assert(u.pass == "password");
|
|
assert(u.host == "localhost");
|
|
assert(u.port == 2201);
|
|
assert(u.path == "/path");
|
|
assert(u.query == "query=value");
|
|
assert(u.fragment == "fragment");
|
|
}
|
|
|
|
@nogc pure @system unittest
|
|
{
|
|
auto u = URL("127.0.0.1");
|
|
assert(u.path == "127.0.0.1");
|
|
|
|
u = URL("http://127.0.0.1");
|
|
assert(u.scheme == "http");
|
|
assert(u.host == "127.0.0.1");
|
|
|
|
u = URL("http://127.0.0.1:9000");
|
|
assert(u.scheme == "http");
|
|
assert(u.host == "127.0.0.1");
|
|
assert(u.port == 9000);
|
|
|
|
u = URL("127.0.0.1:80");
|
|
assert(u.host == "127.0.0.1");
|
|
assert(u.port == 80);
|
|
assert(u.path is null);
|
|
|
|
u = URL("//example.net");
|
|
assert(u.host == "example.net");
|
|
assert(u.scheme is null);
|
|
|
|
u = URL("//example.net?q=before:after");
|
|
assert(u.host == "example.net");
|
|
assert(u.query == "q=before:after");
|
|
|
|
u = URL("localhost:8080");
|
|
assert(u.host == "localhost");
|
|
assert(u.port == 8080);
|
|
assert(u.path is null);
|
|
|
|
u = URL("ftp:");
|
|
assert(u.scheme == "ftp");
|
|
|
|
u = URL("file:///C:\\Users");
|
|
assert(u.scheme == "file");
|
|
assert(u.path == "C:\\Users");
|
|
|
|
u = URL("localhost:66000");
|
|
assert(u.scheme == "localhost");
|
|
assert(u.path == "66000");
|
|
|
|
u = URL("file:///home/");
|
|
assert(u.scheme == "file");
|
|
assert(u.path == "/home/");
|
|
|
|
u = URL("file:///home/?q=asdf");
|
|
assert(u.scheme == "file");
|
|
assert(u.path == "/home/");
|
|
assert(u.query == "q=asdf");
|
|
|
|
u = URL("http://secret@example.org");
|
|
assert(u.scheme == "http");
|
|
assert(u.host == "example.org");
|
|
assert(u.user == "secret");
|
|
|
|
u = URL("h_tp://:80");
|
|
assert(u.path == "h_tp://:80");
|
|
assert(u.port == 0);
|
|
|
|
u = URL("zlib:/home/user/file.gz");
|
|
assert(u.scheme == "zlib");
|
|
assert(u.path == "/home/user/file.gz");
|
|
|
|
u = URL("h_tp:asdf");
|
|
assert(u.path == "h_tp:asdf");
|
|
}
|
|
|
|
@nogc pure @system unittest
|
|
{
|
|
assertThrown!URIException(() => URL("http://:80"));
|
|
assertThrown!URIException(() => URL(":80"));
|
|
assertThrown!URIException(() => URL("http://u1:p1@u2:p2@example.org"));
|
|
assertThrown!URIException(() => URL("http://blah.com:port"));
|
|
assertThrown!URIException(() => URL("http://blah.com:66000"));
|
|
}
|
|
|
|
@nogc pure @system unittest
|
|
{
|
|
auto u = URL("ftp://");
|
|
assert(u.scheme == "ftp");
|
|
}
|
|
|
|
/**
|
|
* Attempts to parse an URL from a string and returns the specified component
|
|
* of the URL or $(D_PSYMBOL URL) if no component is specified.
|
|
*
|
|
* Params:
|
|
* T = "scheme", "host", "port", "user", "pass", "path", "query",
|
|
* "fragment".
|
|
* source = The string containing the URL.
|
|
*
|
|
* Returns: Requested URL component.
|
|
*/
|
|
auto parseURL(string T)(const char[] source)
|
|
if (T == "scheme"
|
|
|| T == "host"
|
|
|| T == "user"
|
|
|| T == "pass"
|
|
|| T == "path"
|
|
|| T == "query"
|
|
|| T == "fragment"
|
|
|| T == "port")
|
|
{
|
|
auto ret = URL(source);
|
|
return mixin("ret." ~ T);
|
|
}
|
|
|
|
/// ditto
|
|
URL parseURL(const char[] source) @nogc pure
|
|
{
|
|
return URL(source);
|
|
}
|
|
|
|
///
|
|
@nogc pure @system unittest
|
|
{
|
|
auto u = parseURL("http://example.org:5326");
|
|
assert(u.scheme == parseURL!"scheme"("http://example.org:5326"));
|
|
assert(u.host == parseURL!"host"("http://example.org:5326"));
|
|
assert(u.user == parseURL!"user"("http://example.org:5326"));
|
|
assert(u.pass == parseURL!"pass"("http://example.org:5326"));
|
|
assert(u.path == parseURL!"path"("http://example.org:5326"));
|
|
assert(u.query == parseURL!"query"("http://example.org:5326"));
|
|
assert(u.fragment == parseURL!"fragment"("http://example.org:5326"));
|
|
assert(u.port == parseURL!"port"("http://example.org:5326"));
|
|
}
|