From 47ef7873538bb690cc0b2ee0bfa6b1de9d32fc9e Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Mon, 10 Apr 2017 08:10:08 +0200 Subject: [PATCH] Add missing constructors to the String --- source/tanya/container/string.d | 620 ++++++++++++++++++++++++-------- 1 file changed, 480 insertions(+), 140 deletions(-) diff --git a/source/tanya/container/string.d b/source/tanya/container/string.d index d976e34..4737fab 100644 --- a/source/tanya/container/string.d +++ b/source/tanya/container/string.d @@ -12,13 +12,35 @@ */ module tanya.container.string; -import core.checkedint; import core.exception; import std.algorithm.comparison; import std.algorithm.mutation; +import std.range; import std.traits; import tanya.memory; +private ref const(wchar) front(const wchar[] str) +pure nothrow @safe @nogc +in +{ + assert(str.length > 0); +} +body +{ + return str[0]; +} + +private void popFront(ref const(wchar)[] str, const size_t s = 1) +pure nothrow @safe @nogc +in +{ + assert(str.length >= s); +} +body +{ + str = str[s .. $]; +} + /** * Thrown on encoding errors. */ @@ -41,12 +63,12 @@ class UTFException : Exception } /** - * Byte range. + * Iterates $(D_PSYMBOL String) by UTF-8 code unit. * * Params: * E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))). */ -struct ByteRange(E) +struct ByCodeUnit(E) if (is(Unqual!E == char)) { private E* begin, end; @@ -77,7 +99,7 @@ struct ByteRange(E) @disable this(); - @property ByteRange save() + @property ByCodeUnit save() { return this; } @@ -144,17 +166,17 @@ struct ByteRange(E) return *(this.begin + i); } - ByteRange opIndex() + ByCodeUnit opIndex() { return typeof(return)(*this.container, this.begin, this.end); } - ByteRange!(const E) opIndex() const + ByCodeUnit!(const E) opIndex() const { return typeof(return)(*this.container, this.begin, this.end); } - ByteRange opSlice(const size_t i, const size_t j) @trusted + ByCodeUnit opSlice(const size_t i, const size_t j) @trusted in { assert(i <= j); @@ -165,7 +187,7 @@ struct ByteRange(E) return typeof(return)(*this.container, this.begin + i, this.begin + j); } - ByteRange!(const E) opSlice(const size_t i, const size_t j) const @trusted + ByCodeUnit!(const E) opSlice(const size_t i, const size_t j) const @trusted in { assert(i <= j); @@ -180,7 +202,6 @@ struct ByteRange(E) { return this.begin[0 .. length]; } - } /** @@ -192,13 +213,16 @@ struct String private char* data; private size_t capacity_; - invariant + pure nothrow @safe @nogc invariant { assert(this.length_ <= this.capacity_); } /** + * Constructs the string from a stringish range. + * * Params: + * R = String type. * str = Initial string. * allocator = Allocator. * @@ -206,127 +230,79 @@ struct String * * Precondition: $(D_INLINECODE allocator is null). */ - this(const char[] str, shared Allocator allocator = defaultAllocator) - @trusted @nogc + this(R)(const R str, shared Allocator allocator = defaultAllocator) + if (!isInfinite!R + && isInputRange!R + && isSomeChar!(ElementEncodingType!R)) { this(allocator); - reserve(str.length); - this.length_ = str.length; - str.copy(this.data[0 .. this.length_]); - } - - /// Ditto. - this(const wchar[] str, shared Allocator allocator = defaultAllocator) - @trusted @nogc - { - this(allocator); - reserve(str.length * 2); - - size_t s; - auto sourceLength = str.length; - for (auto c = str.ptr; sourceLength != 0; ++c, --sourceLength) - { - if (length - s < 5) // More space required. - { - bool overflow; - auto size = addu(length, str.length, overflow); - assert(!overflow); - reserve(size); - } - if (*c < 0x80) - { - this.data[s++] = *c & 0x7f; - this.length_ += 1; - } - else if (*c < 0x800) - { - this.data[s++] = 0xc0 | (*c >> 6) & 0xff; - this.data[s++] = 0x80 | (*c & 0x3f); - this.length_ += 2; - } - else if (*c < 0xd800 || *c - 0xe000 < 0x2000) - { - this.data[s++] = 0xe0 | (*c >> 12) & 0xff; - this.data[s++] = 0x80 | ((*c >> 6) & 0x3f); - this.data[s++] = 0x80 | (*c & 0x3f); - this.length_ += 3; - } - else if ((*c - 0xd800) < 2048 && sourceLength > 0 && *(c + 1) - 0xdc00 < 0x400) - { // Surrogate pair - dchar d = (*c - 0xd800) | ((*c++ - 0xdc00) >> 10); - - this.data[s++] = 0xf0 | (d >> 18); - this.data[s++] = 0x80 | ((d >> 12) & 0x3f); - this.data[s++] = 0x80 | ((d >> 6) & 0x3f); - this.data[s++] = 0x80 | (d & 0x3f); - --sourceLength; - this.length_ += 4; - } - else - { - throw defaultAllocator.make!UTFException("Wrong UTF-16 sequeunce"); - } - } + insertBack(str); } /// - unittest + @safe @nogc unittest { auto s = String("\u10437"w); assert("\u10437" == s.get()); } - /// Ditto. - this(const dchar[] str, shared Allocator allocator = defaultAllocator) - @trusted @nogc - { - this(allocator); - - reserve(str.length * 4); - - auto s = data; - foreach (c; str) - { - if (c < 0x80) - { - *s++ = c & 0x7f; - this.length_ += 1; - } - else if (c < 0x800) - { - *s++ = 0xc0 | (c >> 6) & 0xff; - *s++ = 0x80 | (c & 0x3f); - this.length_ += 2; - } - else if (c < 0xd800 || c - 0xe000 < 0x2000) - { - *s++ = 0xe0 | (c >> 12) & 0xff; - *s++ = 0x80 | ((c >> 6) & 0x3f); - *s++ = 0x80 | (c & 0x3f); - this.length_ += 3; - } - else if (c - 0x10000 < 0x100000) - { - *s++ = 0xf0 | (c >> 18); - *s++ = 0x80 | ((c >> 12) & 0x3f); - *s++ = 0x80 | ((c >> 6) & 0x3f); - *s++ = 0x80 | (c & 0x3f); - this.length_ += 4; - } - else - { - throw defaultAllocator.make!UTFException("Wrong UTF-32 sequeunce"); - } - } - } - /// - unittest + @safe @nogc unittest { auto s = String("Отказаться от вина - в этом страшная вина."d); assert("Отказаться от вина - в этом страшная вина." == s.get()); } + /** + * Initializes this string from another one. + * + * If $(D_PARAM init) is passed by value, it won't be copied, but moved. + * If the allocator of ($D_PARAM init) matches $(D_PARAM allocator), + * $(D_KEYWORD this) will just take the ownership over $(D_PARAM init)'s + * storage, otherwise, the storage will be allocated with + * $(D_PARAM allocator). $(D_PARAM init) will be destroyed at the end. + * + * If $(D_PARAM init) is passed by reference, it will be copied. + * + * Params: + * init = Source string. + * allocator = Allocator. + * + * Precondition: $(D_INLINECODE allocator is null). + */ + this(String init, shared Allocator allocator = defaultAllocator) + nothrow @trusted @nogc + { + this(allocator); + if (allocator !is init.allocator) + { + // Just steal all references and the allocator. + this.data = init.data; + this.length_ = init.length_; + this.capacity_ = init.capacity_; + + // Reset the source string, so it can't destroy the moved storage. + init.length_ = init.capacity_ = 0; + init.data = null; + } + else + { + reserve(init.length); + init.data[0 .. init.length].copy(this.data[0 .. init.length]); + this.length_ = init.length; + } + } + + /// Ditto. + this(ref const String init, shared Allocator allocator = defaultAllocator) + nothrow @trusted @nogc + { + this(allocator); + reserve(init.length); + init.data[0 .. init.length].copy(this.data[0 .. init.length]); + this.length_ = init.length; + } + /// Ditto. this(shared Allocator allocator) pure nothrow @safe @nogc in @@ -338,6 +314,59 @@ struct String this.allocator_ = allocator; } + /** + * Fills the string with $(D_PARAM n) consecutive copies of character $(D_PARAM chr). + * + * Params: + * C = Type of the character to fill the string with. + * n = Number of characters to copy. + * chr = Character to fill the string with. + */ + this(C)(const size_t n, const C chr, + shared Allocator allocator = defaultAllocator) @trusted + if (isSomeChar!C) + { + this(allocator); + if (n == 0) + { + return; + } + insertBack(chr); + + // insertBack should validate the character, so we can just copy it + // n - 1 times. + auto remaining = length * n; + + reserve(remaining); + + // Use a quick copy. + for (auto i = this.length_ * 2; i <= remaining; i *= 2) + { + this.data[0 .. this.length_].copy(this.data[this.length_ .. i]); + this.length_ = i; + } + remaining -= length; + copy(this.data[this.length_ - remaining .. this.length_], + this.data[this.length_ .. this.length_ + remaining]); + this.length_ += remaining; + } + + private unittest + { + { + auto s = String(1, 'О'); + assert(s.length == 2); + } + { + auto s = String(3, 'О'); + assert(s.length == 6); + } + { + auto s = String(8, 'О'); + assert(s.length == 16); + } + } + /** * Destroys the string. */ @@ -346,6 +375,278 @@ struct String allocator.deallocate(this.data[0 .. this.capacity_]); } + private void write4Bytes(ref const dchar src) + pure nothrow @trusted @nogc + in + { + assert(capacity - length >= 4); + assert(src - 0x10000 < 0x100000); + } + body + { + auto dst = this.data + length; + + *dst++ = 0xf0 | (src >> 18); + *dst++ = 0x80 | ((src >> 12) & 0x3f); + *dst++ = 0x80 | ((src >> 6) & 0x3f); + *dst = 0x80 | (src & 0x3f); + + this.length_ += 4; + } + + private size_t insertWideChar(C)(auto ref const C chr) @trusted + if (is(C == wchar) || is(C == dchar)) + in + { + assert(capacity - length >= C.sizeof); + } + body + { + auto dst = this.data + length; + if (chr < 0x80) + { + *dst = chr & 0x7f; + this.length_ += 1; + return 1; + } + else if (chr < 0x800) + { + *dst++ = 0xc0 | (chr >> 6) & 0xff; + *dst = 0x80 | (chr & 0x3f); + this.length_ += 2; + return 2; + } + else if (chr < 0xd800 || chr - 0xe000 < 0x2000) + { + *dst++ = 0xe0 | (chr >> 12) & 0xff; + *dst++ = 0x80 | ((chr >> 6) & 0x3f); + *dst = 0x80 | (chr & 0x3f); + this.length_ += 3; + return 3; + } + return 0; + } + + /** + * Inserts a single character at the end of the string. + * + * Params: + * chr = The character should be inserted. + * + * Returns: The number of bytes inserted. + * + * Throws: $(D_PSYMBOL UTFException). + */ + size_t insertBack(const char chr) @trusted @nogc + { + if ((chr & 0x80) != 0) + { + throw defaultAllocator.make!UTFException("Invalid UTF-8 character"); + } + reserve(length + 1); + + *(data + length) = chr; + ++this.length_; + + return 1; + } + + /// Ditto. + size_t insertBack(const wchar chr) @trusted @nogc + { + reserve(length + wchar.sizeof); + + auto ret = insertWideChar(chr); + if (ret == 0) + { + throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); + } + return ret; + } + + /// Ditto. + size_t insertBack(const dchar chr) @trusted @nogc + { + reserve(length + dchar.sizeof); + + auto ret = insertWideChar(chr); + if (ret > 0) + { + return ret; + } + else if (chr - 0x10000 < 0x100000) + { + write4Bytes(chr); + return 4; + } + else + { + throw defaultAllocator.make!UTFException("Invalid UTF-32 sequeunce"); + } + } + + /** + * Inserts a stringish range at the end of the string. + * + * Params: + * R = Type of the inserted string. + * str = String should be inserted. + * + * Returns: The number of bytes inserted. + * + * Throws: $(D_PSYMBOL UTFException). + */ + size_t insertBack(R)(R str) @trusted + if (!isInfinite!R + && isInputRange!R + && is(Unqual!(ElementEncodingType!R) == char)) + { + size_t size; + static if (hasLength!R || isNarrowString!R) + { + size = str.length + length; + reserve(size); + } + + static if (isNarrowString!R) + { + str.copy(this.data[length .. size]); + this.length_ = size; + return str.length; + } + else + { + size_t insertedLength; + while (!str.empty) + { + ubyte expectedLength; + if ((str.front & 0x80) == 0x00) + { + expectedLength = 1; + } + else if ((str.front & 0xe0) == 0xc0) + { + expectedLength = 2; + } + else if ((str.front & 0xf0) == 0xe0) + { + expectedLength = 3; + } + else if ((str.front & 0xf8) == 0xf0) + { + expectedLength = 4; + } + else + { + throw defaultAllocator.make!UTFException("Invalid UTF-8 sequeunce"); + } + size = length + expectedLength; + reserve(size); + + for (; expectedLength > 0; --expectedLength) + { + if (str.empty) + { + throw defaultAllocator.make!UTFException("Invalid UTF-8 sequeunce"); + } + *(data + length) = str.front; + str.popFront(); + } + insertedLength += expectedLength; + this.length_ = size; + } + return insertedLength; + } + } + + /// Ditto. + size_t insertBack(R)(R str) @trusted + if (!isInfinite!R + && isInputRange!R + && is(Unqual!(ElementEncodingType!R) == wchar)) + { + static if (hasLength!R || isNarrowString!R) + { + reserve(length + str.length * wchar.sizeof); + } + + static if (isNarrowString!R) + { + const(wchar)[] range = str; + } + else + { + alias range = str; + } + + auto oldLength = length; + + while (!range.empty) + { + reserve(length + 4); + + auto ret = insertWideChar(range.front); + if (ret > 0) + { + range.popFront(); + } + else if (range.front - 0xd800 < 2048) + { // Surrogate pair. + static if (isNarrowString!R) + { + if (range.length < 2 || range[1] - 0xdc00 >= 0x400) + { + throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); + } + dchar d = (range[0] - 0xd800) | ((range[1] - 0xdc00) >> 10); + + range.popFront(2); + } + else + { + dchar d = range.front - 0xd800; + range.popFront(); + + if (range.empty || range.front - 0xdc00 >= 0x400) + { + throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); + } + d |= (range.front - 0xdc00) >> 10; + + range.popFront(); + } + write4Bytes(d); + } + else + { + throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); + } + } + return this.length_ - oldLength; + } + + /// Ditto. + size_t insertBack(R)(R str) @trusted + if (!isInfinite!R + && isInputRange!R + && is(Unqual!(ElementEncodingType!R) == dchar)) + { + static if (hasLength!R || isSomeString!R) + { + reserve(length + str.length * 4); + } + + size_t insertedLength; + foreach (const dchar c; str) + { + insertedLength += insertBack(c); + } + return insertedLength; + } + + /// Ditto. + alias insert = insertBack; + /** * Reserves $(D_PARAM size) bytes for the string. * @@ -450,7 +751,7 @@ struct String } /** - * Returns: Byte length. + * Returns: The number of code units that are required to encode the string. */ @property size_t length() const pure nothrow @safe @nogc { @@ -498,17 +799,36 @@ struct String * Returns: Random access range that iterates over the string by bytes, in * forward order. */ - ByteRange!char opIndex() pure nothrow @trusted @nogc + ByCodeUnit!char opIndex() pure nothrow @trusted @nogc { return typeof(return)(this, this.data, this.data + length); } /// Ditto. - ByteRange!(const char) opIndex() const pure nothrow @trusted @nogc + ByCodeUnit!(const char) opIndex() const pure nothrow @trusted @nogc { return typeof(return)(this, this.data, this.data + length); } + /// + unittest + { + auto s = String("Plutarchus"); + auto r = s[]; + assert(r.front == 'P'); + assert(r.back == 's'); + + r.popFront(); + assert(r.front == 'l'); + assert(r.back == 's'); + + r.popBack(); + assert(r.front == 'l'); + assert(r.back == 'u'); + + assert(r.length == 8); + } + /** * Returns: $(D_KEYWORD true) if the vector is empty. */ @@ -518,47 +838,67 @@ struct String } /** - * Returns: The first byte. + * Params: + * i = Slice start. + * j = Slice end. * - * Precondition: $(D_INLINECODE !empty). + * Returns: A range that iterates over the string by bytes from + * index $(D_PARAM i) up to (excluding) index $(D_PARAM j). + * + * Precondition: $(D_INLINECODE i <= j && j <= length). */ - @property ref inout(char) front() inout pure nothrow @safe @nogc + ByCodeUnit!char opSlice(const size_t i, const size_t j) + pure nothrow @trusted @nogc in { - assert(!empty); + assert(i <= j); + assert(j <= length); } body { - return *this.data; + return typeof(return)(this, this.data + i, this.data + j); } - /// - @safe unittest - { - auto s = String("Vladimir Soloviev"); - assert(s.front == 'V'); - } - - /** - * Returns: The last byte. - * - * Precondition: $(D_INLINECODE !empty). - */ - @property ref inout(char) back() inout pure nothrow @trusted @nogc + /// Ditto. + ByCodeUnit!(const char) opSlice(const size_t i, const size_t j) + const pure nothrow @trusted @nogc in { - assert(!empty); + assert(i <= j); + assert(j <= length); } body { - return *(this.data + length - 1); + return typeof(return)(this, this.data + i, this.data + j); } /// unittest { - auto s = String("Caesar"); - assert(s.back == 'r'); + auto s = String("Vladimir Soloviev"); + auto r = s[9 .. $]; + + assert(r.front == 'S'); + assert(r.back == 'v'); + + r.popFront(); + r.popBack(); + assert(r.front == 'o'); + assert(r.back == 'e'); + + r.popFront(); + r.popBack(); + assert(r.front == 'l'); + assert(r.back == 'i'); + + r.popFront(); + r.popBack(); + assert(r.front == 'o'); + assert(r.back == 'v'); + + r.popFront(); + r.popBack(); + assert(r.empty); } mixin DefaultAllocator;