Add tanya.encoding.ascii
This commit is contained in:
parent
8d3a4860e6
commit
4612d5eb6d
501
source/tanya/encoding/ascii.d
Normal file
501
source/tanya/encoding/ascii.d
Normal file
@ -0,0 +1,501 @@
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/**
|
||||
* Functions operating on ASCII characters.
|
||||
*
|
||||
* ASCII is $(B A)merican $(B S)tandard $(B C)ode for $(B I)nformation
|
||||
* $(B I)nterchange.
|
||||
*
|
||||
* Copyright: Eugene Wissner 2017.
|
||||
* License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
|
||||
* Mozilla Public License, v. 2.0).
|
||||
* Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
|
||||
* Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/encoding/ascii.d,
|
||||
* tanya/encoding/ascii.d)
|
||||
*/
|
||||
module tanya.encoding.ascii;
|
||||
|
||||
import tanya.meta.trait;
|
||||
|
||||
const string fullHexDigits = "0123456789ABCDEFabcdef"; /// 0..9A..Fa..f.
|
||||
const string hexDigits = "0123456789ABCDEF"; /// 0..9A..F.
|
||||
const string lowerHexDigits = "0123456789ABCDEF"; /// 0..9a..f.
|
||||
const string digits = "0123456789"; /// 0..9.
|
||||
const string octalDigits = "01234567"; /// 0..7.
|
||||
|
||||
/// A..Za..z.
|
||||
const string letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
|
||||
|
||||
const string uppercase = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; /// A..Z.
|
||||
const string lowercase = "abcdefghijklmnopqrstuvwxyz"; /// a..z.
|
||||
|
||||
/**
|
||||
* Whitespace, Horizontal Tab (HT), Line Feed (LF), Carriage Return (CR),
|
||||
* Vertical Tab (VT) or Form Feed (FF).
|
||||
*/
|
||||
const string whitespace = "\t\n\v\f\r ";
|
||||
|
||||
/// Letter case specifier.
|
||||
enum LetterCase : bool
|
||||
{
|
||||
upper, /// Uppercase.
|
||||
lower, /// Lowercase.
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for an uppecase alphabetic character.
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is an uppercase alphabetic
|
||||
* character, $(D_KEYWORD false) otherwise.
|
||||
*/
|
||||
bool isUpper(C)(C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return (c >= 'A') && (c <= 'Z');
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(isUpper('A'));
|
||||
assert(isUpper('Z'));
|
||||
assert(isUpper('L'));
|
||||
assert(!isUpper('a'));
|
||||
assert(!isUpper('!'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for a lowercase alphabetic character.
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a lowercase alphabetic
|
||||
* character, $(D_KEYWORD false) otherwise.
|
||||
*/
|
||||
bool isLower(C)(C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return (c >= 'a') && (c <= 'z');
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(isLower('a'));
|
||||
assert(isLower('z'));
|
||||
assert(isLower('l'));
|
||||
assert(!isLower('A'));
|
||||
assert(!isLower('!'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for an alphabetic character (upper- or lowercase).
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is an alphabetic character,
|
||||
* $(D_KEYWORD false) otherwise.
|
||||
*/
|
||||
bool isAlpha(C)(C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return isUpper(c) || isLower(c);
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(isAlpha('A'));
|
||||
assert(isAlpha('Z'));
|
||||
assert(isAlpha('L'));
|
||||
assert(isAlpha('a'));
|
||||
assert(isAlpha('z'));
|
||||
assert(isAlpha('l'));
|
||||
assert(!isAlpha('!'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for a digit.
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a digit,
|
||||
* $(D_KEYWORD false) otherwise.
|
||||
*/
|
||||
bool isDigit(C)(C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return (c >= '0') && (c <= '9');
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(isDigit('0'));
|
||||
assert(isDigit('1'));
|
||||
assert(isDigit('2'));
|
||||
assert(isDigit('3'));
|
||||
assert(isDigit('4'));
|
||||
assert(isDigit('5'));
|
||||
assert(isDigit('6'));
|
||||
assert(isDigit('7'));
|
||||
assert(isDigit('8'));
|
||||
assert(isDigit('9'));
|
||||
assert(!isDigit('a'));
|
||||
assert(!isDigit('!'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for an alphabetic character (upper- or lowercase) or a digit.
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is an alphabetic character or a
|
||||
* digit, $(D_KEYWORD false) otherwise.
|
||||
*/
|
||||
bool isAlphaNum(C)(C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return isAlpha(c) || isDigit(c);
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(isAlphaNum('0'));
|
||||
assert(isAlphaNum('1'));
|
||||
assert(isAlphaNum('9'));
|
||||
assert(isAlphaNum('A'));
|
||||
assert(isAlphaNum('Z'));
|
||||
assert(isAlphaNum('L'));
|
||||
assert(isAlphaNum('a'));
|
||||
assert(isAlphaNum('z'));
|
||||
assert(isAlphaNum('l'));
|
||||
assert(!isAlphaNum('!'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for a 7-bit ASCII character.
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is an ASCII character,
|
||||
* $(D_KEYWORD false) otherwise.
|
||||
*/
|
||||
bool isASCII(C)(C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return c < 128;
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(isASCII('0'));
|
||||
assert(isASCII('L'));
|
||||
assert(isASCII('l'));
|
||||
assert(isASCII('!'));
|
||||
assert(!isASCII('©'));
|
||||
assert(!isASCII('§'));
|
||||
assert(!isASCII(char.init)); // 0xFF
|
||||
assert(!isASCII(wchar.init)); // 0xFFFF
|
||||
assert(!isASCII(dchar.init)); // 0xFFFF
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for a control character.
|
||||
*
|
||||
* Control characters are non-printable characters. Their ASCII codes are those
|
||||
* between 0x00 (NUL) and 0x1f (US), and 0x7f (DEL).
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a control character,
|
||||
* $(D_KEYWORD false) otherwise.
|
||||
*
|
||||
* See_Also: $(D_PSYMBOL isPrintable), $(D_PSYMBOL isGraphical).
|
||||
*/
|
||||
bool isControl(C)(C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return (c <= 0x1f) || (c == 0x7f);
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(isControl('\t'));
|
||||
assert(isControl('\0'));
|
||||
assert(isControl('\u007f'));
|
||||
assert(!isControl(' '));
|
||||
assert(!isControl('a'));
|
||||
assert(!isControl(char.init)); // 0xFF
|
||||
assert(!isControl(wchar.init)); // 0xFFFF
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for a whitespace character.
|
||||
*
|
||||
* Whitespace characters are:
|
||||
*
|
||||
* $(UL
|
||||
* $(LI Whitespace)
|
||||
* $(LI Horizontal Tab (HT))
|
||||
* $(LI Line Feed (LF))
|
||||
* $(LI Carriage Return (CR))
|
||||
* $(LI Vertical Tab (VT))
|
||||
* $(LI Form Feed (FF))
|
||||
* )
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a whitespace character,
|
||||
* $(D_KEYWORD false) otherwise.
|
||||
*
|
||||
* See_Also: $(D_PSYMBOL whitespace).
|
||||
*/
|
||||
bool isWhite(C)(C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return ((c >= 0x09) && (c <= 0x0d)) || (c == 0x20);
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(isWhite('\t'));
|
||||
assert(isWhite('\n'));
|
||||
assert(isWhite('\v'));
|
||||
assert(isWhite('\f'));
|
||||
assert(isWhite('\r'));
|
||||
assert(isWhite(' '));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for a graphical character.
|
||||
*
|
||||
* Graphical characters are printable characters but whitespace characters.
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a control character,
|
||||
* $(D_KEYWORD false) otherwise.
|
||||
*
|
||||
* See_Also: $(D_PSYMBOL isControl), $(D_PSYMBOL isWhite).
|
||||
*/
|
||||
bool isGraphical(C)(C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return (c > 0x20) && (c < 0x7f);
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(isGraphical('a'));
|
||||
assert(isGraphical('0'));
|
||||
assert(!isGraphical('\u007f'));
|
||||
assert(!isGraphical('§'));
|
||||
assert(!isGraphical('\n'));
|
||||
assert(!isGraphical(' '));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for a printable character.
|
||||
*
|
||||
* This is the opposite of a control character.
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a control character,
|
||||
* $(D_KEYWORD false) otherwise.
|
||||
*
|
||||
* See_Also: $(D_PSYMBOL isControl).
|
||||
*/
|
||||
bool isPrintable(C)(C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return (c >= 0x20) && (c < 0x7f);
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(isPrintable('a'));
|
||||
assert(isPrintable('0'));
|
||||
assert(!isPrintable('\u007f'));
|
||||
assert(!isPrintable('§'));
|
||||
assert(!isPrintable('\n'));
|
||||
assert(isPrintable(' '));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for a hexadecimal digit.
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a hexadecimal digit,
|
||||
* $(D_KEYWORD false) otherwise.
|
||||
*/
|
||||
bool isHexDigit(C)(C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return ((c >= '0') && (c <= '9'))
|
||||
|| ((c >= 'a') && (c <= 'f'))
|
||||
|| ((c >= 'A') && (c <= 'F'));
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(isHexDigit('0'));
|
||||
assert(isHexDigit('1'));
|
||||
assert(isHexDigit('8'));
|
||||
assert(isHexDigit('9'));
|
||||
assert(isHexDigit('A'));
|
||||
assert(isHexDigit('F'));
|
||||
assert(!isHexDigit('G'));
|
||||
assert(isHexDigit('a'));
|
||||
assert(isHexDigit('f'));
|
||||
assert(!isHexDigit('g'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for an octal character.
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is an octal character,
|
||||
* $(D_KEYWORD false) otherwise.
|
||||
*/
|
||||
bool isOctalDigit(C)(C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return (c >= '0') && (c <= '7');
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(isOctalDigit('0'));
|
||||
assert(isOctalDigit('1'));
|
||||
assert(isOctalDigit('2'));
|
||||
assert(isOctalDigit('3'));
|
||||
assert(isOctalDigit('4'));
|
||||
assert(isOctalDigit('5'));
|
||||
assert(isOctalDigit('6'));
|
||||
assert(isOctalDigit('7'));
|
||||
assert(!isOctalDigit('8'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for a octal character.
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a octal character,
|
||||
* $(D_KEYWORD false) otherwise.
|
||||
*/
|
||||
bool isPunctuation(C)(C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return ((c >= 0x21) && (c <= 0x2f))
|
||||
|| ((c >= 0x3a) && (c <= 0x40))
|
||||
|| ((c >= 0x5b) && (c <= 0x60))
|
||||
|| ((c >= 0x7b) && (c <= 0x7e));
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(isPunctuation('!'));
|
||||
assert(isPunctuation(':'));
|
||||
assert(isPunctuation('\\'));
|
||||
assert(isPunctuation('|'));
|
||||
assert(!isPunctuation('0'));
|
||||
assert(!isPunctuation(' '));
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts $(D_PARAM c) to uppercase.
|
||||
*
|
||||
* If $(D_PARAM c) is not a lowercase character, $(D_PARAM c) is returned
|
||||
* unchanged.
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: The lowercase of $(D_PARAM c) if available, just $(D_PARAM c)
|
||||
* otherwise.
|
||||
*/
|
||||
C toUpper(C)(const C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return isLower(c) ? (cast(C) (c - 32)) : c;
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(toUpper('a') == 'A');
|
||||
assert(toUpper('A') == 'A');
|
||||
assert(toUpper('!') == '!');
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts $(D_PARAM c) to lowercase.
|
||||
*
|
||||
* If $(D_PARAM c) is not an uppercase character, $(D_PARAM c) is returned
|
||||
* unchanged.
|
||||
*
|
||||
* Params:
|
||||
* C = Some character type.
|
||||
* c = Some character.
|
||||
*
|
||||
* Returns: The uppercase of $(D_PARAM c) if available, just $(D_PARAM c)
|
||||
* otherwise.
|
||||
*/
|
||||
C toLower(C)(const C c)
|
||||
if (isSomeChar!C)
|
||||
{
|
||||
return isUpper(c) ? (cast(C) (c + 32)) : c;
|
||||
}
|
||||
|
||||
///
|
||||
pure nothrow @safe @nogc unittest
|
||||
{
|
||||
assert(toLower('A') == 'a');
|
||||
assert(toLower('a') == 'a');
|
||||
assert(toLower('!') == '!');
|
||||
}
|
17
source/tanya/encoding/package.d
Normal file
17
source/tanya/encoding/package.d
Normal file
@ -0,0 +1,17 @@
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/**
|
||||
* This package provides tools to work with text encodings.
|
||||
*
|
||||
* Copyright: Eugene Wissner 2017.
|
||||
* License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
|
||||
* Mozilla Public License, v. 2.0).
|
||||
* Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
|
||||
* Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/encoding/package.d,
|
||||
* tanya/encoding/package.d)
|
||||
*/
|
||||
module tanya.encoding;
|
||||
|
||||
public import tanya.encoding.ascii;
|
@ -15,9 +15,9 @@
|
||||
module tanya.math.mp;
|
||||
|
||||
import std.algorithm;
|
||||
import std.ascii;
|
||||
import std.range;
|
||||
import tanya.container.array;
|
||||
import tanya.encoding.ascii;
|
||||
import tanya.memory;
|
||||
import tanya.meta.trait;
|
||||
import tanya.meta.transform;
|
||||
|
@ -14,8 +14,7 @@
|
||||
*/
|
||||
module tanya.net.uri;
|
||||
|
||||
import std.ascii : isAlphaNum, isDigit;
|
||||
import std.uni : isAlpha, isNumber;
|
||||
import tanya.encoding.ascii;
|
||||
import tanya.memory;
|
||||
|
||||
/**
|
||||
@ -199,8 +198,8 @@ struct URL
|
||||
this.pass = source[start + i + 1 .. pos];
|
||||
}
|
||||
}
|
||||
else if (!c.isAlpha &&
|
||||
!c.isNumber &&
|
||||
else if (!c.isAlpha() &&
|
||||
!c.isDigit() &&
|
||||
c != '!' &&
|
||||
c != ';' &&
|
||||
c != '=' &&
|
||||
|
Loading…
Reference in New Issue
Block a user