Add tanya.encoding.ascii

This commit is contained in:
Eugen Wissner 2017-09-21 06:57:49 +02:00
parent 8d3a4860e6
commit 4612d5eb6d
4 changed files with 522 additions and 5 deletions

View File

@ -0,0 +1,501 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/**
* Functions operating on ASCII characters.
*
* ASCII is $(B A)merican $(B S)tandard $(B C)ode for $(B I)nformation
* $(B I)nterchange.
*
* Copyright: Eugene Wissner 2017.
* License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
* Mozilla Public License, v. 2.0).
* Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
* Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/encoding/ascii.d,
* tanya/encoding/ascii.d)
*/
module tanya.encoding.ascii;
import tanya.meta.trait;
const string fullHexDigits = "0123456789ABCDEFabcdef"; /// 0..9A..Fa..f.
const string hexDigits = "0123456789ABCDEF"; /// 0..9A..F.
const string lowerHexDigits = "0123456789ABCDEF"; /// 0..9a..f.
const string digits = "0123456789"; /// 0..9.
const string octalDigits = "01234567"; /// 0..7.
/// A..Za..z.
const string letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
const string uppercase = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; /// A..Z.
const string lowercase = "abcdefghijklmnopqrstuvwxyz"; /// a..z.
/**
* Whitespace, Horizontal Tab (HT), Line Feed (LF), Carriage Return (CR),
* Vertical Tab (VT) or Form Feed (FF).
*/
const string whitespace = "\t\n\v\f\r ";
/// Letter case specifier.
enum LetterCase : bool
{
upper, /// Uppercase.
lower, /// Lowercase.
}
/**
* Checks for an uppecase alphabetic character.
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is an uppercase alphabetic
* character, $(D_KEYWORD false) otherwise.
*/
bool isUpper(C)(C c)
if (isSomeChar!C)
{
return (c >= 'A') && (c <= 'Z');
}
///
pure nothrow @safe @nogc unittest
{
assert(isUpper('A'));
assert(isUpper('Z'));
assert(isUpper('L'));
assert(!isUpper('a'));
assert(!isUpper('!'));
}
/**
* Checks for a lowercase alphabetic character.
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a lowercase alphabetic
* character, $(D_KEYWORD false) otherwise.
*/
bool isLower(C)(C c)
if (isSomeChar!C)
{
return (c >= 'a') && (c <= 'z');
}
///
pure nothrow @safe @nogc unittest
{
assert(isLower('a'));
assert(isLower('z'));
assert(isLower('l'));
assert(!isLower('A'));
assert(!isLower('!'));
}
/**
* Checks for an alphabetic character (upper- or lowercase).
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is an alphabetic character,
* $(D_KEYWORD false) otherwise.
*/
bool isAlpha(C)(C c)
if (isSomeChar!C)
{
return isUpper(c) || isLower(c);
}
///
pure nothrow @safe @nogc unittest
{
assert(isAlpha('A'));
assert(isAlpha('Z'));
assert(isAlpha('L'));
assert(isAlpha('a'));
assert(isAlpha('z'));
assert(isAlpha('l'));
assert(!isAlpha('!'));
}
/**
* Checks for a digit.
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a digit,
* $(D_KEYWORD false) otherwise.
*/
bool isDigit(C)(C c)
if (isSomeChar!C)
{
return (c >= '0') && (c <= '9');
}
///
pure nothrow @safe @nogc unittest
{
assert(isDigit('0'));
assert(isDigit('1'));
assert(isDigit('2'));
assert(isDigit('3'));
assert(isDigit('4'));
assert(isDigit('5'));
assert(isDigit('6'));
assert(isDigit('7'));
assert(isDigit('8'));
assert(isDigit('9'));
assert(!isDigit('a'));
assert(!isDigit('!'));
}
/**
* Checks for an alphabetic character (upper- or lowercase) or a digit.
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is an alphabetic character or a
* digit, $(D_KEYWORD false) otherwise.
*/
bool isAlphaNum(C)(C c)
if (isSomeChar!C)
{
return isAlpha(c) || isDigit(c);
}
///
pure nothrow @safe @nogc unittest
{
assert(isAlphaNum('0'));
assert(isAlphaNum('1'));
assert(isAlphaNum('9'));
assert(isAlphaNum('A'));
assert(isAlphaNum('Z'));
assert(isAlphaNum('L'));
assert(isAlphaNum('a'));
assert(isAlphaNum('z'));
assert(isAlphaNum('l'));
assert(!isAlphaNum('!'));
}
/**
* Checks for a 7-bit ASCII character.
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is an ASCII character,
* $(D_KEYWORD false) otherwise.
*/
bool isASCII(C)(C c)
if (isSomeChar!C)
{
return c < 128;
}
///
pure nothrow @safe @nogc unittest
{
assert(isASCII('0'));
assert(isASCII('L'));
assert(isASCII('l'));
assert(isASCII('!'));
assert(!isASCII('©'));
assert(!isASCII('§'));
assert(!isASCII(char.init)); // 0xFF
assert(!isASCII(wchar.init)); // 0xFFFF
assert(!isASCII(dchar.init)); // 0xFFFF
}
/**
* Checks for a control character.
*
* Control characters are non-printable characters. Their ASCII codes are those
* between 0x00 (NUL) and 0x1f (US), and 0x7f (DEL).
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a control character,
* $(D_KEYWORD false) otherwise.
*
* See_Also: $(D_PSYMBOL isPrintable), $(D_PSYMBOL isGraphical).
*/
bool isControl(C)(C c)
if (isSomeChar!C)
{
return (c <= 0x1f) || (c == 0x7f);
}
///
pure nothrow @safe @nogc unittest
{
assert(isControl('\t'));
assert(isControl('\0'));
assert(isControl('\u007f'));
assert(!isControl(' '));
assert(!isControl('a'));
assert(!isControl(char.init)); // 0xFF
assert(!isControl(wchar.init)); // 0xFFFF
}
/**
* Checks for a whitespace character.
*
* Whitespace characters are:
*
* $(UL
* $(LI Whitespace)
* $(LI Horizontal Tab (HT))
* $(LI Line Feed (LF))
* $(LI Carriage Return (CR))
* $(LI Vertical Tab (VT))
* $(LI Form Feed (FF))
* )
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a whitespace character,
* $(D_KEYWORD false) otherwise.
*
* See_Also: $(D_PSYMBOL whitespace).
*/
bool isWhite(C)(C c)
if (isSomeChar!C)
{
return ((c >= 0x09) && (c <= 0x0d)) || (c == 0x20);
}
///
pure nothrow @safe @nogc unittest
{
assert(isWhite('\t'));
assert(isWhite('\n'));
assert(isWhite('\v'));
assert(isWhite('\f'));
assert(isWhite('\r'));
assert(isWhite(' '));
}
/**
* Checks for a graphical character.
*
* Graphical characters are printable characters but whitespace characters.
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a control character,
* $(D_KEYWORD false) otherwise.
*
* See_Also: $(D_PSYMBOL isControl), $(D_PSYMBOL isWhite).
*/
bool isGraphical(C)(C c)
if (isSomeChar!C)
{
return (c > 0x20) && (c < 0x7f);
}
///
pure nothrow @safe @nogc unittest
{
assert(isGraphical('a'));
assert(isGraphical('0'));
assert(!isGraphical('\u007f'));
assert(!isGraphical('§'));
assert(!isGraphical('\n'));
assert(!isGraphical(' '));
}
/**
* Checks for a printable character.
*
* This is the opposite of a control character.
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a control character,
* $(D_KEYWORD false) otherwise.
*
* See_Also: $(D_PSYMBOL isControl).
*/
bool isPrintable(C)(C c)
if (isSomeChar!C)
{
return (c >= 0x20) && (c < 0x7f);
}
///
pure nothrow @safe @nogc unittest
{
assert(isPrintable('a'));
assert(isPrintable('0'));
assert(!isPrintable('\u007f'));
assert(!isPrintable('§'));
assert(!isPrintable('\n'));
assert(isPrintable(' '));
}
/**
* Checks for a hexadecimal digit.
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a hexadecimal digit,
* $(D_KEYWORD false) otherwise.
*/
bool isHexDigit(C)(C c)
if (isSomeChar!C)
{
return ((c >= '0') && (c <= '9'))
|| ((c >= 'a') && (c <= 'f'))
|| ((c >= 'A') && (c <= 'F'));
}
///
pure nothrow @safe @nogc unittest
{
assert(isHexDigit('0'));
assert(isHexDigit('1'));
assert(isHexDigit('8'));
assert(isHexDigit('9'));
assert(isHexDigit('A'));
assert(isHexDigit('F'));
assert(!isHexDigit('G'));
assert(isHexDigit('a'));
assert(isHexDigit('f'));
assert(!isHexDigit('g'));
}
/**
* Checks for an octal character.
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is an octal character,
* $(D_KEYWORD false) otherwise.
*/
bool isOctalDigit(C)(C c)
if (isSomeChar!C)
{
return (c >= '0') && (c <= '7');
}
///
pure nothrow @safe @nogc unittest
{
assert(isOctalDigit('0'));
assert(isOctalDigit('1'));
assert(isOctalDigit('2'));
assert(isOctalDigit('3'));
assert(isOctalDigit('4'));
assert(isOctalDigit('5'));
assert(isOctalDigit('6'));
assert(isOctalDigit('7'));
assert(!isOctalDigit('8'));
}
/**
* Checks for a octal character.
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: $(D_KEYWORD true) if $(D_PARAM c) is a octal character,
* $(D_KEYWORD false) otherwise.
*/
bool isPunctuation(C)(C c)
if (isSomeChar!C)
{
return ((c >= 0x21) && (c <= 0x2f))
|| ((c >= 0x3a) && (c <= 0x40))
|| ((c >= 0x5b) && (c <= 0x60))
|| ((c >= 0x7b) && (c <= 0x7e));
}
///
pure nothrow @safe @nogc unittest
{
assert(isPunctuation('!'));
assert(isPunctuation(':'));
assert(isPunctuation('\\'));
assert(isPunctuation('|'));
assert(!isPunctuation('0'));
assert(!isPunctuation(' '));
}
/**
* Converts $(D_PARAM c) to uppercase.
*
* If $(D_PARAM c) is not a lowercase character, $(D_PARAM c) is returned
* unchanged.
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: The lowercase of $(D_PARAM c) if available, just $(D_PARAM c)
* otherwise.
*/
C toUpper(C)(const C c)
if (isSomeChar!C)
{
return isLower(c) ? (cast(C) (c - 32)) : c;
}
///
pure nothrow @safe @nogc unittest
{
assert(toUpper('a') == 'A');
assert(toUpper('A') == 'A');
assert(toUpper('!') == '!');
}
/**
* Converts $(D_PARAM c) to lowercase.
*
* If $(D_PARAM c) is not an uppercase character, $(D_PARAM c) is returned
* unchanged.
*
* Params:
* C = Some character type.
* c = Some character.
*
* Returns: The uppercase of $(D_PARAM c) if available, just $(D_PARAM c)
* otherwise.
*/
C toLower(C)(const C c)
if (isSomeChar!C)
{
return isUpper(c) ? (cast(C) (c + 32)) : c;
}
///
pure nothrow @safe @nogc unittest
{
assert(toLower('A') == 'a');
assert(toLower('a') == 'a');
assert(toLower('!') == '!');
}

View File

@ -0,0 +1,17 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/**
* This package provides tools to work with text encodings.
*
* Copyright: Eugene Wissner 2017.
* License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
* Mozilla Public License, v. 2.0).
* Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
* Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/encoding/package.d,
* tanya/encoding/package.d)
*/
module tanya.encoding;
public import tanya.encoding.ascii;

View File

@ -15,9 +15,9 @@
module tanya.math.mp;
import std.algorithm;
import std.ascii;
import std.range;
import tanya.container.array;
import tanya.encoding.ascii;
import tanya.memory;
import tanya.meta.trait;
import tanya.meta.transform;

View File

@ -14,8 +14,7 @@
*/
module tanya.net.uri;
import std.ascii : isAlphaNum, isDigit;
import std.uni : isAlpha, isNumber;
import tanya.encoding.ascii;
import tanya.memory;
/**
@ -199,8 +198,8 @@ struct URL
this.pass = source[start + i + 1 .. pos];
}
}
else if (!c.isAlpha &&
!c.isNumber &&
else if (!c.isAlpha() &&
!c.isDigit() &&
c != '!' &&
c != ';' &&
c != '=' &&