diff --git a/source/tanya/container/slice.d b/source/tanya/container/slice.d index e0b2e5f..321acfb 100644 --- a/source/tanya/container/slice.d +++ b/source/tanya/container/slice.d @@ -1655,3 +1655,895 @@ private @safe @nogc unittest assert(i == 0); } } + +private ref const(wchar) front(const wchar[] str) +pure nothrow @safe @nogc +in +{ + assert(str.length > 0); +} +body +{ + return str[0]; +} + +private void popFront(ref const(wchar)[] str, const size_t s = 1) +pure nothrow @safe @nogc +in +{ + assert(str.length >= s); +} +body +{ + str = str[s .. $]; +} + +/** + * Thrown on encoding errors. + */ +class UTFException : Exception +{ + /** + * Params: + * msg = The message for the exception. + * file = The file where the exception occurred. + * line = The line number where the exception occurred. + * next = The previous exception in the chain of exceptions, if any. + */ + this(string msg, + string file = __FILE__, + size_t line = __LINE__, + Throwable next = null) @nogc @safe pure nothrow + { + super(msg, file, line, next); + } +} + +/** + * Iterates $(D_PSYMBOL String) by UTF-8 code unit. + * + * Params: + * E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))). + */ +struct ByCodeUnit(E) + if (is(Unqual!E == char)) +{ + private E* begin, end; + private alias ContainerType = CopyConstness!(E, Slice!char); + private ContainerType* container; + + invariant + { + assert(this.begin <= this.end); + assert(this.container !is null); + assert(this.begin >= this.container.data); + assert(this.end <= this.container.data + this.container.length); + } + + private this(ref ContainerType container, E* begin, E* end) @trusted + in + { + assert(begin <= end); + assert(begin >= container.data); + assert(end <= container.data + container.length); + } + body + { + this.container = &container; + this.begin = begin; + this.end = end; + } + + @disable this(); + + @property ByCodeUnit save() + { + return this; + } + + @property bool empty() const + { + return this.begin == this.end; + } + + @property size_t length() const + { + return this.end - this.begin; + } + + alias opDollar = length; + + @property ref inout(E) front() inout + in + { + assert(!empty); + } + body + { + return *this.begin; + } + + @property ref inout(E) back() inout @trusted + in + { + assert(!empty); + } + body + { + return *(this.end - 1); + } + + void popFront() @trusted + in + { + assert(!empty); + } + body + { + ++this.begin; + } + + void popBack() @trusted + in + { + assert(!empty); + } + body + { + --this.end; + } + + ref inout(E) opIndex(const size_t i) inout @trusted + in + { + assert(i < length); + } + body + { + return *(this.begin + i); + } + + ByCodeUnit opIndex() + { + return typeof(return)(*this.container, this.begin, this.end); + } + + ByCodeUnit!(const E) opIndex() const + { + return typeof(return)(*this.container, this.begin, this.end); + } + + ByCodeUnit opSlice(const size_t i, const size_t j) @trusted + in + { + assert(i <= j); + assert(j <= length); + } + body + { + return typeof(return)(*this.container, this.begin + i, this.begin + j); + } + + ByCodeUnit!(const E) opSlice(const size_t i, const size_t j) const @trusted + in + { + assert(i <= j); + assert(j <= length); + } + body + { + return typeof(return)(*this.container, this.begin + i, this.begin + j); + } + + inout(E[]) get() inout @trusted + { + return this.begin[0 .. length]; + } +} + +/// UTF-8 string. +alias String = Slice!char; + +/** + * UTF-8 string. + * + * Params: + * T = $(D_KEYWORD char). + */ +struct Slice(T) + if (is(T == char)) +{ + private size_t length_; + private char* data; + private size_t capacity_; + + pure nothrow @safe @nogc invariant + { + assert(this.length_ <= this.capacity_); + } + + /** + * Constructs the string from a stringish range. + * + * Params: + * R = String type. + * str = Initial string. + * allocator = Allocator. + * + * Throws: $(D_PSYMBOL UTFException). + * + * Precondition: $(D_INLINECODE allocator is null). + */ + this(R)(const R str, shared Allocator allocator = defaultAllocator) + if (!isInfinite!R + && isInputRange!R + && isSomeChar!(ElementEncodingType!R)) + { + this(allocator); + insertBack(str); + } + + /// + @safe @nogc unittest + { + auto s = String("\u10437"w); + assert("\u10437" == s.get()); + } + + /// + @safe @nogc unittest + { + auto s = String("Отказаться от вина - в этом страшная вина."d); + assert("Отказаться от вина - в этом страшная вина." == s.get()); + } + + /** + * Initializes this string from another one. + * + * If $(D_PARAM init) is passed by value, it won't be copied, but moved. + * If the allocator of ($D_PARAM init) matches $(D_PARAM allocator), + * $(D_KEYWORD this) will just take the ownership over $(D_PARAM init)'s + * storage, otherwise, the storage will be allocated with + * $(D_PARAM allocator). $(D_PARAM init) will be destroyed at the end. + * + * If $(D_PARAM init) is passed by reference, it will be copied. + * + * Params: + * init = Source string. + * allocator = Allocator. + * + * Precondition: $(D_INLINECODE allocator is null). + */ + this(Slice!char init, shared Allocator allocator = defaultAllocator) + nothrow @trusted @nogc + { + this(allocator); + if (allocator !is init.allocator) + { + // Just steal all references and the allocator. + this.data = init.data; + this.length_ = init.length_; + this.capacity_ = init.capacity_; + + // Reset the source string, so it can't destroy the moved storage. + init.length_ = init.capacity_ = 0; + init.data = null; + } + else + { + reserve(init.length); + init.data[0 .. init.length].copy(this.data[0 .. init.length]); + this.length_ = init.length; + } + } + + /// Ditto. + this(ref const Slice!char init, shared Allocator allocator = defaultAllocator) + nothrow @trusted @nogc + { + this(allocator); + reserve(init.length); + init.data[0 .. init.length].copy(this.data[0 .. init.length]); + this.length_ = init.length; + } + + /// Ditto. + this(shared Allocator allocator) pure nothrow @safe @nogc + in + { + assert(allocator !is null); + } + body + { + this.allocator_ = allocator; + } + + /** + * Fills the string with $(D_PARAM n) consecutive copies of character $(D_PARAM chr). + * + * Params: + * C = Type of the character to fill the string with. + * n = Number of characters to copy. + * chr = Character to fill the string with. + */ + this(C)(const size_t n, const C chr, + shared Allocator allocator = defaultAllocator) @trusted + if (isSomeChar!C) + { + this(allocator); + if (n == 0) + { + return; + } + insertBack(chr); + + // insertBack should validate the character, so we can just copy it + // n - 1 times. + auto remaining = length * n; + + reserve(remaining); + + // Use a quick copy. + for (auto i = this.length_ * 2; i <= remaining; i *= 2) + { + this.data[0 .. this.length_].copy(this.data[this.length_ .. i]); + this.length_ = i; + } + remaining -= length; + copy(this.data[this.length_ - remaining .. this.length_], + this.data[this.length_ .. this.length_ + remaining]); + this.length_ += remaining; + } + + private unittest + { + { + auto s = String(1, 'О'); + assert(s.length == 2); + } + { + auto s = String(3, 'О'); + assert(s.length == 6); + } + { + auto s = String(8, 'О'); + assert(s.length == 16); + } + } + + /** + * Destroys the string. + */ + ~this() nothrow @trusted @nogc + { + allocator.deallocate(this.data[0 .. this.capacity_]); + } + + private void write4Bytes(ref const dchar src) + pure nothrow @trusted @nogc + in + { + assert(capacity - length >= 4); + assert(src - 0x10000 < 0x100000); + } + body + { + auto dst = this.data + length; + + *dst++ = 0xf0 | (src >> 18); + *dst++ = 0x80 | ((src >> 12) & 0x3f); + *dst++ = 0x80 | ((src >> 6) & 0x3f); + *dst = 0x80 | (src & 0x3f); + + this.length_ += 4; + } + + private size_t insertWideChar(C)(auto ref const C chr) @trusted + if (is(C == wchar) || is(C == dchar)) + in + { + assert(capacity - length >= C.sizeof); + } + body + { + auto dst = this.data + length; + if (chr < 0x80) + { + *dst = chr & 0x7f; + this.length_ += 1; + return 1; + } + else if (chr < 0x800) + { + *dst++ = 0xc0 | (chr >> 6) & 0xff; + *dst = 0x80 | (chr & 0x3f); + this.length_ += 2; + return 2; + } + else if (chr < 0xd800 || chr - 0xe000 < 0x2000) + { + *dst++ = 0xe0 | (chr >> 12) & 0xff; + *dst++ = 0x80 | ((chr >> 6) & 0x3f); + *dst = 0x80 | (chr & 0x3f); + this.length_ += 3; + return 3; + } + return 0; + } + + /** + * Inserts a single character at the end of the string. + * + * Params: + * chr = The character should be inserted. + * + * Returns: The number of bytes inserted. + * + * Throws: $(D_PSYMBOL UTFException). + */ + size_t insertBack(const char chr) @trusted @nogc + { + if ((chr & 0x80) != 0) + { + throw defaultAllocator.make!UTFException("Invalid UTF-8 character"); + } + reserve(length + 1); + + *(data + length) = chr; + ++this.length_; + + return 1; + } + + /// Ditto. + size_t insertBack(const wchar chr) @trusted @nogc + { + reserve(length + wchar.sizeof); + + auto ret = insertWideChar(chr); + if (ret == 0) + { + throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); + } + return ret; + } + + /// Ditto. + size_t insertBack(const dchar chr) @trusted @nogc + { + reserve(length + dchar.sizeof); + + auto ret = insertWideChar(chr); + if (ret > 0) + { + return ret; + } + else if (chr - 0x10000 < 0x100000) + { + write4Bytes(chr); + return 4; + } + else + { + throw defaultAllocator.make!UTFException("Invalid UTF-32 sequeunce"); + } + } + + /** + * Inserts a stringish range at the end of the string. + * + * Params: + * R = Type of the inserted string. + * str = String should be inserted. + * + * Returns: The number of bytes inserted. + * + * Throws: $(D_PSYMBOL UTFException). + */ + size_t insertBack(R)(R str) @trusted + if (!isInfinite!R + && isInputRange!R + && is(Unqual!(ElementEncodingType!R) == char)) + { + size_t size; + static if (hasLength!R || isNarrowString!R) + { + size = str.length + length; + reserve(size); + } + + static if (isNarrowString!R) + { + str.copy(this.data[length .. size]); + this.length_ = size; + return str.length; + } + else + { + size_t insertedLength; + while (!str.empty) + { + ubyte expectedLength; + if ((str.front & 0x80) == 0x00) + { + expectedLength = 1; + } + else if ((str.front & 0xe0) == 0xc0) + { + expectedLength = 2; + } + else if ((str.front & 0xf0) == 0xe0) + { + expectedLength = 3; + } + else if ((str.front & 0xf8) == 0xf0) + { + expectedLength = 4; + } + else + { + throw defaultAllocator.make!UTFException("Invalid UTF-8 sequeunce"); + } + size = length + expectedLength; + reserve(size); + + for (; expectedLength > 0; --expectedLength) + { + if (str.empty) + { + throw defaultAllocator.make!UTFException("Invalid UTF-8 sequeunce"); + } + *(data + length) = str.front; + str.popFront(); + } + insertedLength += expectedLength; + this.length_ = size; + } + return insertedLength; + } + } + + /// Ditto. + size_t insertBack(R)(R str) @trusted + if (!isInfinite!R + && isInputRange!R + && is(Unqual!(ElementEncodingType!R) == wchar)) + { + static if (hasLength!R || isNarrowString!R) + { + reserve(length + str.length * wchar.sizeof); + } + + static if (isNarrowString!R) + { + const(wchar)[] range = str; + } + else + { + alias range = str; + } + + auto oldLength = length; + + while (!range.empty) + { + reserve(length + 4); + + auto ret = insertWideChar(range.front); + if (ret > 0) + { + range.popFront(); + } + else if (range.front - 0xd800 < 2048) + { // Surrogate pair. + static if (isNarrowString!R) + { + if (range.length < 2 || range[1] - 0xdc00 >= 0x400) + { + throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); + } + dchar d = (range[0] - 0xd800) | ((range[1] - 0xdc00) >> 10); + + range.popFront(2); + } + else + { + dchar d = range.front - 0xd800; + range.popFront(); + + if (range.empty || range.front - 0xdc00 >= 0x400) + { + throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); + } + d |= (range.front - 0xdc00) >> 10; + + range.popFront(); + } + write4Bytes(d); + } + else + { + throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); + } + } + return this.length_ - oldLength; + } + + /// Ditto. + size_t insertBack(R)(R str) @trusted + if (!isInfinite!R + && isInputRange!R + && is(Unqual!(ElementEncodingType!R) == dchar)) + { + static if (hasLength!R || isSomeString!R) + { + reserve(length + str.length * 4); + } + + size_t insertedLength; + foreach (const dchar c; str) + { + insertedLength += insertBack(c); + } + return insertedLength; + } + + /// Ditto. + alias insert = insertBack; + + /** + * Reserves $(D_PARAM size) bytes for the string. + * + * If $(D_PARAM size) is less than or equal to the $(D_PSYMBOL capacity), the + * function call does not cause a reallocation and the string capacity is not + * affected. + * + * Params: + * size = Desired size in bytes. + */ + void reserve(const size_t size) nothrow @trusted @nogc + { + if (this.capacity_ >= size) + { + return; + } + + this.data = allocator.resize(this.data[0 .. this.capacity_], size).ptr; + this.capacity_ = size; + } + + /// + @nogc @safe unittest + { + String s; + assert(s.capacity == 0); + + s.reserve(3); + assert(s.capacity == 3); + + s.reserve(3); + assert(s.capacity == 3); + + s.reserve(1); + assert(s.capacity == 3); + } + + /** + * Requests the string to reduce its capacity to fit the $(D_PARAM size). + * + * The request is non-binding. The string won't become smaller than the + * string byte length. + * + * Params: + * size = Desired size. + */ + void shrink(const size_t size) nothrow @trusted @nogc + { + if (this.capacity_ <= size) + { + return; + } + + const n = max(this.length_, size); + void[] buf = this.data[0 .. this.capacity_]; + if (allocator.reallocate(buf, n)) + { + this.capacity_ = n; + this.data = cast(char*) buf; + } + } + + /// + @nogc @safe unittest + { + auto s = String("Die Alten lasen laut."); + assert(s.capacity == 21); + + s.reserve(30); + s.shrink(25); + assert(s.capacity == 25); + + s.shrink(18); + assert(s.capacity == 21); + } + + /** + * Returns: String capacity in bytes. + */ + @property size_t capacity() const pure nothrow @safe @nogc + { + return this.capacity_; + } + + /// + unittest + { + auto s = String("In allem Schreiben ist Schamlosigkeit."); + assert(s.capacity == 38); + } + + /** + * Returns an array used internally by the string. + * The length of the returned array may be smaller than the size of the + * reserved memory for the string. + * + * Returns: The array representing the string. + */ + inout(char[]) get() inout pure nothrow @trusted @nogc + { + return this.data[0 .. this.length_]; + } + + /** + * Returns: The number of code units that are required to encode the string. + */ + @property size_t length() const pure nothrow @safe @nogc + { + return this.length_; + } + + /// + alias opDollar = length; + + /// + unittest + { + auto s = String("Piscis primuin a capite foetat."); + assert(s.length == 31); + assert(s[$ - 1] == '.'); + } + + /** + * Params: + * pos = Position. + * + * Returns: Byte at $(D_PARAM pos). + * + * Precondition: $(D_INLINECODE length > pos). + */ + ref inout(char) opIndex(const size_t pos) inout pure nothrow @trusted @nogc + in + { + assert(length > pos); + } + body + { + return *(this.data + pos); + } + + /// + unittest + { + auto s = String("Alea iacta est."); + assert(s[0] == 'A'); + assert(s[4] == ' '); + } + + /** + * Returns: Random access range that iterates over the string by bytes, in + * forward order. + */ + ByCodeUnit!char opIndex() pure nothrow @trusted @nogc + { + return typeof(return)(this, this.data, this.data + length); + } + + /// Ditto. + ByCodeUnit!(const char) opIndex() const pure nothrow @trusted @nogc + { + return typeof(return)(this, this.data, this.data + length); + } + + /// + unittest + { + auto s = String("Plutarchus"); + auto r = s[]; + assert(r.front == 'P'); + assert(r.back == 's'); + + r.popFront(); + assert(r.front == 'l'); + assert(r.back == 's'); + + r.popBack(); + assert(r.front == 'l'); + assert(r.back == 'u'); + + assert(r.length == 8); + } + + /** + * Returns: $(D_KEYWORD true) if the vector is empty. + */ + @property bool empty() const pure nothrow @safe @nogc + { + return length == 0; + } + + /** + * Params: + * i = Slice start. + * j = Slice end. + * + * Returns: A range that iterates over the string by bytes from + * index $(D_PARAM i) up to (excluding) index $(D_PARAM j). + * + * Precondition: $(D_INLINECODE i <= j && j <= length). + */ + ByCodeUnit!char opSlice(const size_t i, const size_t j) + pure nothrow @trusted @nogc + in + { + assert(i <= j); + assert(j <= length); + } + body + { + return typeof(return)(this, this.data + i, this.data + j); + } + + /// Ditto. + ByCodeUnit!(const char) opSlice(const size_t i, const size_t j) + const pure nothrow @trusted @nogc + in + { + assert(i <= j); + assert(j <= length); + } + body + { + return typeof(return)(this, this.data + i, this.data + j); + } + + /// + unittest + { + auto s = String("Vladimir Soloviev"); + auto r = s[9 .. $]; + + assert(r.front == 'S'); + assert(r.back == 'v'); + + r.popFront(); + r.popBack(); + assert(r.front == 'o'); + assert(r.back == 'e'); + + r.popFront(); + r.popBack(); + assert(r.front == 'l'); + assert(r.back == 'i'); + + r.popFront(); + r.popBack(); + assert(r.front == 'o'); + assert(r.back == 'v'); + + r.popFront(); + r.popBack(); + assert(r.empty); + } + + mixin DefaultAllocator; +} diff --git a/source/tanya/container/string.d b/source/tanya/container/string.d deleted file mode 100644 index 4737fab..0000000 --- a/source/tanya/container/string.d +++ /dev/null @@ -1,905 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -/** - * UTF-8 string. - * - * Copyright: Eugene Wissner 2017. - * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/, - * Mozilla Public License, v. 2.0). - * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner) - */ -module tanya.container.string; - -import core.exception; -import std.algorithm.comparison; -import std.algorithm.mutation; -import std.range; -import std.traits; -import tanya.memory; - -private ref const(wchar) front(const wchar[] str) -pure nothrow @safe @nogc -in -{ - assert(str.length > 0); -} -body -{ - return str[0]; -} - -private void popFront(ref const(wchar)[] str, const size_t s = 1) -pure nothrow @safe @nogc -in -{ - assert(str.length >= s); -} -body -{ - str = str[s .. $]; -} - -/** - * Thrown on encoding errors. - */ -class UTFException : Exception -{ - /** - * Params: - * msg = The message for the exception. - * file = The file where the exception occurred. - * line = The line number where the exception occurred. - * next = The previous exception in the chain of exceptions, if any. - */ - this(string msg, - string file = __FILE__, - size_t line = __LINE__, - Throwable next = null) @nogc @safe pure nothrow - { - super(msg, file, line, next); - } -} - -/** - * Iterates $(D_PSYMBOL String) by UTF-8 code unit. - * - * Params: - * E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))). - */ -struct ByCodeUnit(E) - if (is(Unqual!E == char)) -{ - private E* begin, end; - private alias ContainerType = CopyConstness!(E, String); - private ContainerType* container; - - invariant - { - assert(this.begin <= this.end); - assert(this.container !is null); - assert(this.begin >= this.container.data); - assert(this.end <= this.container.data + this.container.length); - } - - private this(ref ContainerType container, E* begin, E* end) @trusted - in - { - assert(begin <= end); - assert(begin >= container.data); - assert(end <= container.data + container.length); - } - body - { - this.container = &container; - this.begin = begin; - this.end = end; - } - - @disable this(); - - @property ByCodeUnit save() - { - return this; - } - - @property bool empty() const - { - return this.begin == this.end; - } - - @property size_t length() const - { - return this.end - this.begin; - } - - alias opDollar = length; - - @property ref inout(E) front() inout - in - { - assert(!empty); - } - body - { - return *this.begin; - } - - @property ref inout(E) back() inout @trusted - in - { - assert(!empty); - } - body - { - return *(this.end - 1); - } - - void popFront() @trusted - in - { - assert(!empty); - } - body - { - ++this.begin; - } - - void popBack() @trusted - in - { - assert(!empty); - } - body - { - --this.end; - } - - ref inout(E) opIndex(const size_t i) inout @trusted - in - { - assert(i < length); - } - body - { - return *(this.begin + i); - } - - ByCodeUnit opIndex() - { - return typeof(return)(*this.container, this.begin, this.end); - } - - ByCodeUnit!(const E) opIndex() const - { - return typeof(return)(*this.container, this.begin, this.end); - } - - ByCodeUnit opSlice(const size_t i, const size_t j) @trusted - in - { - assert(i <= j); - assert(j <= length); - } - body - { - return typeof(return)(*this.container, this.begin + i, this.begin + j); - } - - ByCodeUnit!(const E) opSlice(const size_t i, const size_t j) const @trusted - in - { - assert(i <= j); - assert(j <= length); - } - body - { - return typeof(return)(*this.container, this.begin + i, this.begin + j); - } - - inout(E[]) get() inout @trusted - { - return this.begin[0 .. length]; - } -} - -/** - * UTF-8 string. - */ -struct String -{ - private size_t length_; - private char* data; - private size_t capacity_; - - pure nothrow @safe @nogc invariant - { - assert(this.length_ <= this.capacity_); - } - - /** - * Constructs the string from a stringish range. - * - * Params: - * R = String type. - * str = Initial string. - * allocator = Allocator. - * - * Throws: $(D_PSYMBOL UTFException). - * - * Precondition: $(D_INLINECODE allocator is null). - */ - this(R)(const R str, shared Allocator allocator = defaultAllocator) - if (!isInfinite!R - && isInputRange!R - && isSomeChar!(ElementEncodingType!R)) - { - this(allocator); - insertBack(str); - } - - /// - @safe @nogc unittest - { - auto s = String("\u10437"w); - assert("\u10437" == s.get()); - } - - /// - @safe @nogc unittest - { - auto s = String("Отказаться от вина - в этом страшная вина."d); - assert("Отказаться от вина - в этом страшная вина." == s.get()); - } - - /** - * Initializes this string from another one. - * - * If $(D_PARAM init) is passed by value, it won't be copied, but moved. - * If the allocator of ($D_PARAM init) matches $(D_PARAM allocator), - * $(D_KEYWORD this) will just take the ownership over $(D_PARAM init)'s - * storage, otherwise, the storage will be allocated with - * $(D_PARAM allocator). $(D_PARAM init) will be destroyed at the end. - * - * If $(D_PARAM init) is passed by reference, it will be copied. - * - * Params: - * init = Source string. - * allocator = Allocator. - * - * Precondition: $(D_INLINECODE allocator is null). - */ - this(String init, shared Allocator allocator = defaultAllocator) - nothrow @trusted @nogc - { - this(allocator); - if (allocator !is init.allocator) - { - // Just steal all references and the allocator. - this.data = init.data; - this.length_ = init.length_; - this.capacity_ = init.capacity_; - - // Reset the source string, so it can't destroy the moved storage. - init.length_ = init.capacity_ = 0; - init.data = null; - } - else - { - reserve(init.length); - init.data[0 .. init.length].copy(this.data[0 .. init.length]); - this.length_ = init.length; - } - } - - /// Ditto. - this(ref const String init, shared Allocator allocator = defaultAllocator) - nothrow @trusted @nogc - { - this(allocator); - reserve(init.length); - init.data[0 .. init.length].copy(this.data[0 .. init.length]); - this.length_ = init.length; - } - - /// Ditto. - this(shared Allocator allocator) pure nothrow @safe @nogc - in - { - assert(allocator !is null); - } - body - { - this.allocator_ = allocator; - } - - /** - * Fills the string with $(D_PARAM n) consecutive copies of character $(D_PARAM chr). - * - * Params: - * C = Type of the character to fill the string with. - * n = Number of characters to copy. - * chr = Character to fill the string with. - */ - this(C)(const size_t n, const C chr, - shared Allocator allocator = defaultAllocator) @trusted - if (isSomeChar!C) - { - this(allocator); - if (n == 0) - { - return; - } - insertBack(chr); - - // insertBack should validate the character, so we can just copy it - // n - 1 times. - auto remaining = length * n; - - reserve(remaining); - - // Use a quick copy. - for (auto i = this.length_ * 2; i <= remaining; i *= 2) - { - this.data[0 .. this.length_].copy(this.data[this.length_ .. i]); - this.length_ = i; - } - remaining -= length; - copy(this.data[this.length_ - remaining .. this.length_], - this.data[this.length_ .. this.length_ + remaining]); - this.length_ += remaining; - } - - private unittest - { - { - auto s = String(1, 'О'); - assert(s.length == 2); - } - { - auto s = String(3, 'О'); - assert(s.length == 6); - } - { - auto s = String(8, 'О'); - assert(s.length == 16); - } - } - - /** - * Destroys the string. - */ - ~this() nothrow @trusted @nogc - { - allocator.deallocate(this.data[0 .. this.capacity_]); - } - - private void write4Bytes(ref const dchar src) - pure nothrow @trusted @nogc - in - { - assert(capacity - length >= 4); - assert(src - 0x10000 < 0x100000); - } - body - { - auto dst = this.data + length; - - *dst++ = 0xf0 | (src >> 18); - *dst++ = 0x80 | ((src >> 12) & 0x3f); - *dst++ = 0x80 | ((src >> 6) & 0x3f); - *dst = 0x80 | (src & 0x3f); - - this.length_ += 4; - } - - private size_t insertWideChar(C)(auto ref const C chr) @trusted - if (is(C == wchar) || is(C == dchar)) - in - { - assert(capacity - length >= C.sizeof); - } - body - { - auto dst = this.data + length; - if (chr < 0x80) - { - *dst = chr & 0x7f; - this.length_ += 1; - return 1; - } - else if (chr < 0x800) - { - *dst++ = 0xc0 | (chr >> 6) & 0xff; - *dst = 0x80 | (chr & 0x3f); - this.length_ += 2; - return 2; - } - else if (chr < 0xd800 || chr - 0xe000 < 0x2000) - { - *dst++ = 0xe0 | (chr >> 12) & 0xff; - *dst++ = 0x80 | ((chr >> 6) & 0x3f); - *dst = 0x80 | (chr & 0x3f); - this.length_ += 3; - return 3; - } - return 0; - } - - /** - * Inserts a single character at the end of the string. - * - * Params: - * chr = The character should be inserted. - * - * Returns: The number of bytes inserted. - * - * Throws: $(D_PSYMBOL UTFException). - */ - size_t insertBack(const char chr) @trusted @nogc - { - if ((chr & 0x80) != 0) - { - throw defaultAllocator.make!UTFException("Invalid UTF-8 character"); - } - reserve(length + 1); - - *(data + length) = chr; - ++this.length_; - - return 1; - } - - /// Ditto. - size_t insertBack(const wchar chr) @trusted @nogc - { - reserve(length + wchar.sizeof); - - auto ret = insertWideChar(chr); - if (ret == 0) - { - throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); - } - return ret; - } - - /// Ditto. - size_t insertBack(const dchar chr) @trusted @nogc - { - reserve(length + dchar.sizeof); - - auto ret = insertWideChar(chr); - if (ret > 0) - { - return ret; - } - else if (chr - 0x10000 < 0x100000) - { - write4Bytes(chr); - return 4; - } - else - { - throw defaultAllocator.make!UTFException("Invalid UTF-32 sequeunce"); - } - } - - /** - * Inserts a stringish range at the end of the string. - * - * Params: - * R = Type of the inserted string. - * str = String should be inserted. - * - * Returns: The number of bytes inserted. - * - * Throws: $(D_PSYMBOL UTFException). - */ - size_t insertBack(R)(R str) @trusted - if (!isInfinite!R - && isInputRange!R - && is(Unqual!(ElementEncodingType!R) == char)) - { - size_t size; - static if (hasLength!R || isNarrowString!R) - { - size = str.length + length; - reserve(size); - } - - static if (isNarrowString!R) - { - str.copy(this.data[length .. size]); - this.length_ = size; - return str.length; - } - else - { - size_t insertedLength; - while (!str.empty) - { - ubyte expectedLength; - if ((str.front & 0x80) == 0x00) - { - expectedLength = 1; - } - else if ((str.front & 0xe0) == 0xc0) - { - expectedLength = 2; - } - else if ((str.front & 0xf0) == 0xe0) - { - expectedLength = 3; - } - else if ((str.front & 0xf8) == 0xf0) - { - expectedLength = 4; - } - else - { - throw defaultAllocator.make!UTFException("Invalid UTF-8 sequeunce"); - } - size = length + expectedLength; - reserve(size); - - for (; expectedLength > 0; --expectedLength) - { - if (str.empty) - { - throw defaultAllocator.make!UTFException("Invalid UTF-8 sequeunce"); - } - *(data + length) = str.front; - str.popFront(); - } - insertedLength += expectedLength; - this.length_ = size; - } - return insertedLength; - } - } - - /// Ditto. - size_t insertBack(R)(R str) @trusted - if (!isInfinite!R - && isInputRange!R - && is(Unqual!(ElementEncodingType!R) == wchar)) - { - static if (hasLength!R || isNarrowString!R) - { - reserve(length + str.length * wchar.sizeof); - } - - static if (isNarrowString!R) - { - const(wchar)[] range = str; - } - else - { - alias range = str; - } - - auto oldLength = length; - - while (!range.empty) - { - reserve(length + 4); - - auto ret = insertWideChar(range.front); - if (ret > 0) - { - range.popFront(); - } - else if (range.front - 0xd800 < 2048) - { // Surrogate pair. - static if (isNarrowString!R) - { - if (range.length < 2 || range[1] - 0xdc00 >= 0x400) - { - throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); - } - dchar d = (range[0] - 0xd800) | ((range[1] - 0xdc00) >> 10); - - range.popFront(2); - } - else - { - dchar d = range.front - 0xd800; - range.popFront(); - - if (range.empty || range.front - 0xdc00 >= 0x400) - { - throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); - } - d |= (range.front - 0xdc00) >> 10; - - range.popFront(); - } - write4Bytes(d); - } - else - { - throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); - } - } - return this.length_ - oldLength; - } - - /// Ditto. - size_t insertBack(R)(R str) @trusted - if (!isInfinite!R - && isInputRange!R - && is(Unqual!(ElementEncodingType!R) == dchar)) - { - static if (hasLength!R || isSomeString!R) - { - reserve(length + str.length * 4); - } - - size_t insertedLength; - foreach (const dchar c; str) - { - insertedLength += insertBack(c); - } - return insertedLength; - } - - /// Ditto. - alias insert = insertBack; - - /** - * Reserves $(D_PARAM size) bytes for the string. - * - * If $(D_PARAM size) is less than or equal to the $(D_PSYMBOL capacity), the - * function call does not cause a reallocation and the string capacity is not - * affected. - * - * Params: - * size = Desired size in bytes. - */ - void reserve(const size_t size) nothrow @trusted @nogc - { - if (this.capacity_ >= size) - { - return; - } - - this.data = allocator.resize(this.data[0 .. this.capacity_], size).ptr; - this.capacity_ = size; - } - - /// - @nogc @safe unittest - { - String s; - assert(s.capacity == 0); - - s.reserve(3); - assert(s.capacity == 3); - - s.reserve(3); - assert(s.capacity == 3); - - s.reserve(1); - assert(s.capacity == 3); - } - - /** - * Requests the string to reduce its capacity to fit the $(D_PARAM size). - * - * The request is non-binding. The string won't become smaller than the - * string byte length. - * - * Params: - * size = Desired size. - */ - void shrink(const size_t size) nothrow @trusted @nogc - { - if (this.capacity_ <= size) - { - return; - } - - const n = max(this.length_, size); - void[] buf = this.data[0 .. this.capacity_]; - if (allocator.reallocate(buf, n)) - { - this.capacity_ = n; - this.data = cast(char*) buf; - } - } - - /// - @nogc @safe unittest - { - auto s = String("Die Alten lasen laut."); - assert(s.capacity == 21); - - s.reserve(30); - s.shrink(25); - assert(s.capacity == 25); - - s.shrink(18); - assert(s.capacity == 21); - } - - /** - * Returns: String capacity in bytes. - */ - @property size_t capacity() const pure nothrow @safe @nogc - { - return this.capacity_; - } - - /// - unittest - { - auto s = String("In allem Schreiben ist Schamlosigkeit."); - assert(s.capacity == 38); - } - - /** - * Returns an array used internally by the string. - * The length of the returned array may be smaller than the size of the - * reserved memory for the string. - * - * Returns: The array representing the string. - */ - inout(char[]) get() inout pure nothrow @trusted @nogc - { - return this.data[0 .. this.length_]; - } - - /** - * Returns: The number of code units that are required to encode the string. - */ - @property size_t length() const pure nothrow @safe @nogc - { - return this.length_; - } - - /// - alias opDollar = length; - - /// - unittest - { - auto s = String("Piscis primuin a capite foetat."); - assert(s.length == 31); - assert(s[$ - 1] == '.'); - } - - /** - * Params: - * pos = Position. - * - * Returns: Byte at $(D_PARAM pos). - * - * Precondition: $(D_INLINECODE length > pos). - */ - ref inout(char) opIndex(const size_t pos) inout pure nothrow @trusted @nogc - in - { - assert(length > pos); - } - body - { - return *(this.data + pos); - } - - /// - unittest - { - auto s = String("Alea iacta est."); - assert(s[0] == 'A'); - assert(s[4] == ' '); - } - - /** - * Returns: Random access range that iterates over the string by bytes, in - * forward order. - */ - ByCodeUnit!char opIndex() pure nothrow @trusted @nogc - { - return typeof(return)(this, this.data, this.data + length); - } - - /// Ditto. - ByCodeUnit!(const char) opIndex() const pure nothrow @trusted @nogc - { - return typeof(return)(this, this.data, this.data + length); - } - - /// - unittest - { - auto s = String("Plutarchus"); - auto r = s[]; - assert(r.front == 'P'); - assert(r.back == 's'); - - r.popFront(); - assert(r.front == 'l'); - assert(r.back == 's'); - - r.popBack(); - assert(r.front == 'l'); - assert(r.back == 'u'); - - assert(r.length == 8); - } - - /** - * Returns: $(D_KEYWORD true) if the vector is empty. - */ - @property bool empty() const pure nothrow @safe @nogc - { - return length == 0; - } - - /** - * Params: - * i = Slice start. - * j = Slice end. - * - * Returns: A range that iterates over the string by bytes from - * index $(D_PARAM i) up to (excluding) index $(D_PARAM j). - * - * Precondition: $(D_INLINECODE i <= j && j <= length). - */ - ByCodeUnit!char opSlice(const size_t i, const size_t j) - pure nothrow @trusted @nogc - in - { - assert(i <= j); - assert(j <= length); - } - body - { - return typeof(return)(this, this.data + i, this.data + j); - } - - /// Ditto. - ByCodeUnit!(const char) opSlice(const size_t i, const size_t j) - const pure nothrow @trusted @nogc - in - { - assert(i <= j); - assert(j <= length); - } - body - { - return typeof(return)(this, this.data + i, this.data + j); - } - - /// - unittest - { - auto s = String("Vladimir Soloviev"); - auto r = s[9 .. $]; - - assert(r.front == 'S'); - assert(r.back == 'v'); - - r.popFront(); - r.popBack(); - assert(r.front == 'o'); - assert(r.back == 'e'); - - r.popFront(); - r.popBack(); - assert(r.front == 'l'); - assert(r.back == 'i'); - - r.popFront(); - r.popBack(); - assert(r.front == 'o'); - assert(r.back == 'v'); - - r.popFront(); - r.popBack(); - assert(r.empty); - } - - mixin DefaultAllocator; -}