summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2017-04-21 14:03:20 +0200
committerEugen Wissner <belka@caraus.de>2017-04-21 14:03:20 +0200
commitd629525a4b0b6568154ec642faf2596179600846 (patch)
tree5df947a3e839f8cfdaf58c2ce3f15d7f44b964aa /source
parent33d321f0d7bd564f01ff8e8050af9c7f5dba6804 (diff)
downloadtanya-d629525a4b0b6568154ec642faf2596179600846.tar.gz
Make String to be a char Slice alias
Diffstat (limited to 'source')
-rw-r--r--source/tanya/container/slice.d892
-rw-r--r--source/tanya/container/string.d905
2 files changed, 892 insertions, 905 deletions
diff --git a/source/tanya/container/slice.d b/source/tanya/container/slice.d
index e0b2e5f..321acfb 100644
--- a/source/tanya/container/slice.d
+++ b/source/tanya/container/slice.d
@@ -1655,3 +1655,895 @@ private @safe @nogc unittest
assert(i == 0);
}
}
+
+private ref const(wchar) front(const wchar[] str)
+pure nothrow @safe @nogc
+in
+{
+ assert(str.length > 0);
+}
+body
+{
+ return str[0];
+}
+
+private void popFront(ref const(wchar)[] str, const size_t s = 1)
+pure nothrow @safe @nogc
+in
+{
+ assert(str.length >= s);
+}
+body
+{
+ str = str[s .. $];
+}
+
+/**
+ * Thrown on encoding errors.
+ */
+class UTFException : Exception
+{
+ /**
+ * Params:
+ * msg = The message for the exception.
+ * file = The file where the exception occurred.
+ * line = The line number where the exception occurred.
+ * next = The previous exception in the chain of exceptions, if any.
+ */
+ this(string msg,
+ string file = __FILE__,
+ size_t line = __LINE__,
+ Throwable next = null) @nogc @safe pure nothrow
+ {
+ super(msg, file, line, next);
+ }
+}
+
+/**
+ * Iterates $(D_PSYMBOL String) by UTF-8 code unit.
+ *
+ * Params:
+ * E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))).
+ */
+struct ByCodeUnit(E)
+ if (is(Unqual!E == char))
+{
+ private E* begin, end;
+ private alias ContainerType = CopyConstness!(E, Slice!char);
+ private ContainerType* container;
+
+ invariant
+ {
+ assert(this.begin <= this.end);
+ assert(this.container !is null);
+ assert(this.begin >= this.container.data);
+ assert(this.end <= this.container.data + this.container.length);
+ }
+
+ private this(ref ContainerType container, E* begin, E* end) @trusted
+ in
+ {
+ assert(begin <= end);
+ assert(begin >= container.data);
+ assert(end <= container.data + container.length);
+ }
+ body
+ {
+ this.container = &container;
+ this.begin = begin;
+ this.end = end;
+ }
+
+ @disable this();
+
+ @property ByCodeUnit save()
+ {
+ return this;
+ }
+
+ @property bool empty() const
+ {
+ return this.begin == this.end;
+ }
+
+ @property size_t length() const
+ {
+ return this.end - this.begin;
+ }
+
+ alias opDollar = length;
+
+ @property ref inout(E) front() inout
+ in
+ {
+ assert(!empty);
+ }
+ body
+ {
+ return *this.begin;
+ }
+
+ @property ref inout(E) back() inout @trusted
+ in
+ {
+ assert(!empty);
+ }
+ body
+ {
+ return *(this.end - 1);
+ }
+
+ void popFront() @trusted
+ in
+ {
+ assert(!empty);
+ }
+ body
+ {
+ ++this.begin;
+ }
+
+ void popBack() @trusted
+ in
+ {
+ assert(!empty);
+ }
+ body
+ {
+ --this.end;
+ }
+
+ ref inout(E) opIndex(const size_t i) inout @trusted
+ in
+ {
+ assert(i < length);
+ }
+ body
+ {
+ return *(this.begin + i);
+ }
+
+ ByCodeUnit opIndex()
+ {
+ return typeof(return)(*this.container, this.begin, this.end);
+ }
+
+ ByCodeUnit!(const E) opIndex() const
+ {
+ return typeof(return)(*this.container, this.begin, this.end);
+ }
+
+ ByCodeUnit opSlice(const size_t i, const size_t j) @trusted
+ in
+ {
+ assert(i <= j);
+ assert(j <= length);
+ }
+ body
+ {
+ return typeof(return)(*this.container, this.begin + i, this.begin + j);
+ }
+
+ ByCodeUnit!(const E) opSlice(const size_t i, const size_t j) const @trusted
+ in
+ {
+ assert(i <= j);
+ assert(j <= length);
+ }
+ body
+ {
+ return typeof(return)(*this.container, this.begin + i, this.begin + j);
+ }
+
+ inout(E[]) get() inout @trusted
+ {
+ return this.begin[0 .. length];
+ }
+}
+
+/// UTF-8 string.
+alias String = Slice!char;
+
+/**
+ * UTF-8 string.
+ *
+ * Params:
+ * T = $(D_KEYWORD char).
+ */
+struct Slice(T)
+ if (is(T == char))
+{
+ private size_t length_;
+ private char* data;
+ private size_t capacity_;
+
+ pure nothrow @safe @nogc invariant
+ {
+ assert(this.length_ <= this.capacity_);
+ }
+
+ /**
+ * Constructs the string from a stringish range.
+ *
+ * Params:
+ * R = String type.
+ * str = Initial string.
+ * allocator = Allocator.
+ *
+ * Throws: $(D_PSYMBOL UTFException).
+ *
+ * Precondition: $(D_INLINECODE allocator is null).
+ */
+ this(R)(const R str, shared Allocator allocator = defaultAllocator)
+ if (!isInfinite!R
+ && isInputRange!R
+ && isSomeChar!(ElementEncodingType!R))
+ {
+ this(allocator);
+ insertBack(str);
+ }
+
+ ///
+ @safe @nogc unittest
+ {
+ auto s = String("\u10437"w);
+ assert("\u10437" == s.get());
+ }
+
+ ///
+ @safe @nogc unittest
+ {
+ auto s = String("Отказаться от вина - в этом страшная вина."d);
+ assert("Отказаться от вина - в этом страшная вина." == s.get());
+ }
+
+ /**
+ * Initializes this string from another one.
+ *
+ * If $(D_PARAM init) is passed by value, it won't be copied, but moved.
+ * If the allocator of ($D_PARAM init) matches $(D_PARAM allocator),
+ * $(D_KEYWORD this) will just take the ownership over $(D_PARAM init)'s
+ * storage, otherwise, the storage will be allocated with
+ * $(D_PARAM allocator). $(D_PARAM init) will be destroyed at the end.
+ *
+ * If $(D_PARAM init) is passed by reference, it will be copied.
+ *
+ * Params:
+ * init = Source string.
+ * allocator = Allocator.
+ *
+ * Precondition: $(D_INLINECODE allocator is null).
+ */
+ this(Slice!char init, shared Allocator allocator = defaultAllocator)
+ nothrow @trusted @nogc
+ {
+ this(allocator);
+ if (allocator !is init.allocator)
+ {
+ // Just steal all references and the allocator.
+ this.data = init.data;
+ this.length_ = init.length_;
+ this.capacity_ = init.capacity_;
+
+ // Reset the source string, so it can't destroy the moved storage.
+ init.length_ = init.capacity_ = 0;
+ init.data = null;
+ }
+ else
+ {
+ reserve(init.length);
+ init.data[0 .. init.length].copy(this.data[0 .. init.length]);
+ this.length_ = init.length;
+ }
+ }
+
+ /// Ditto.
+ this(ref const Slice!char init, shared Allocator allocator = defaultAllocator)
+ nothrow @trusted @nogc
+ {
+ this(allocator);
+ reserve(init.length);
+ init.data[0 .. init.length].copy(this.data[0 .. init.length]);
+ this.length_ = init.length;
+ }
+
+ /// Ditto.
+ this(shared Allocator allocator) pure nothrow @safe @nogc
+ in
+ {
+ assert(allocator !is null);
+ }
+ body
+ {
+ this.allocator_ = allocator;
+ }
+
+ /**
+ * Fills the string with $(D_PARAM n) consecutive copies of character $(D_PARAM chr).
+ *
+ * Params:
+ * C = Type of the character to fill the string with.
+ * n = Number of characters to copy.
+ * chr = Character to fill the string with.
+ */
+ this(C)(const size_t n, const C chr,
+ shared Allocator allocator = defaultAllocator) @trusted
+ if (isSomeChar!C)
+ {
+ this(allocator);
+ if (n == 0)
+ {
+ return;
+ }
+ insertBack(chr);
+
+ // insertBack should validate the character, so we can just copy it
+ // n - 1 times.
+ auto remaining = length * n;
+
+ reserve(remaining);
+
+ // Use a quick copy.
+ for (auto i = this.length_ * 2; i <= remaining; i *= 2)
+ {
+ this.data[0 .. this.length_].copy(this.data[this.length_ .. i]);
+ this.length_ = i;
+ }
+ remaining -= length;
+ copy(this.data[this.length_ - remaining .. this.length_],
+ this.data[this.length_ .. this.length_ + remaining]);
+ this.length_ += remaining;
+ }
+
+ private unittest
+ {
+ {
+ auto s = String(1, 'О');
+ assert(s.length == 2);
+ }
+ {
+ auto s = String(3, 'О');
+ assert(s.length == 6);
+ }
+ {
+ auto s = String(8, 'О');
+ assert(s.length == 16);
+ }
+ }
+
+ /**
+ * Destroys the string.
+ */
+ ~this() nothrow @trusted @nogc
+ {
+ allocator.deallocate(this.data[0 .. this.capacity_]);
+ }
+
+ private void write4Bytes(ref const dchar src)
+ pure nothrow @trusted @nogc
+ in
+ {
+ assert(capacity - length >= 4);
+ assert(src - 0x10000 < 0x100000);
+ }
+ body
+ {
+ auto dst = this.data + length;
+
+ *dst++ = 0xf0 | (src >> 18);
+ *dst++ = 0x80 | ((src >> 12) & 0x3f);
+ *dst++ = 0x80 | ((src >> 6) & 0x3f);
+ *dst = 0x80 | (src & 0x3f);
+
+ this.length_ += 4;
+ }
+
+ private size_t insertWideChar(C)(auto ref const C chr) @trusted
+ if (is(C == wchar) || is(C == dchar))
+ in
+ {
+ assert(capacity - length >= C.sizeof);
+ }
+ body
+ {
+ auto dst = this.data + length;
+ if (chr < 0x80)
+ {
+ *dst = chr & 0x7f;
+ this.length_ += 1;
+ return 1;
+ }
+ else if (chr < 0x800)
+ {
+ *dst++ = 0xc0 | (chr >> 6) & 0xff;
+ *dst = 0x80 | (chr & 0x3f);
+ this.length_ += 2;
+ return 2;
+ }
+ else if (chr < 0xd800 || chr - 0xe000 < 0x2000)
+ {
+ *dst++ = 0xe0 | (chr >> 12) & 0xff;
+ *dst++ = 0x80 | ((chr >> 6) & 0x3f);
+ *dst = 0x80 | (chr & 0x3f);
+ this.length_ += 3;
+ return 3;
+ }
+ return 0;
+ }
+
+ /**
+ * Inserts a single character at the end of the string.
+ *
+ * Params:
+ * chr = The character should be inserted.
+ *
+ * Returns: The number of bytes inserted.
+ *
+ * Throws: $(D_PSYMBOL UTFException).
+ */
+ size_t insertBack(const char chr) @trusted @nogc
+ {
+ if ((chr & 0x80) != 0)
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-8 character");
+ }
+ reserve(length + 1);
+
+ *(data + length) = chr;
+ ++this.length_;
+
+ return 1;
+ }
+
+ /// Ditto.
+ size_t insertBack(const wchar chr) @trusted @nogc
+ {
+ reserve(length + wchar.sizeof);
+
+ auto ret = insertWideChar(chr);
+ if (ret == 0)
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
+ }
+ return ret;
+ }
+
+ /// Ditto.
+ size_t insertBack(const dchar chr) @trusted @nogc
+ {
+ reserve(length + dchar.sizeof);
+
+ auto ret = insertWideChar(chr);
+ if (ret > 0)
+ {
+ return ret;
+ }
+ else if (chr - 0x10000 < 0x100000)
+ {
+ write4Bytes(chr);
+ return 4;
+ }
+ else
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-32 sequeunce");
+ }
+ }
+
+ /**
+ * Inserts a stringish range at the end of the string.
+ *
+ * Params:
+ * R = Type of the inserted string.
+ * str = String should be inserted.
+ *
+ * Returns: The number of bytes inserted.
+ *
+ * Throws: $(D_PSYMBOL UTFException).
+ */
+ size_t insertBack(R)(R str) @trusted
+ if (!isInfinite!R
+ && isInputRange!R
+ && is(Unqual!(ElementEncodingType!R) == char))
+ {
+ size_t size;
+ static if (hasLength!R || isNarrowString!R)
+ {
+ size = str.length + length;
+ reserve(size);
+ }
+
+ static if (isNarrowString!R)
+ {
+ str.copy(this.data[length .. size]);
+ this.length_ = size;
+ return str.length;
+ }
+ else
+ {
+ size_t insertedLength;
+ while (!str.empty)
+ {
+ ubyte expectedLength;
+ if ((str.front & 0x80) == 0x00)
+ {
+ expectedLength = 1;
+ }
+ else if ((str.front & 0xe0) == 0xc0)
+ {
+ expectedLength = 2;
+ }
+ else if ((str.front & 0xf0) == 0xe0)
+ {
+ expectedLength = 3;
+ }
+ else if ((str.front & 0xf8) == 0xf0)
+ {
+ expectedLength = 4;
+ }
+ else
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-8 sequeunce");
+ }
+ size = length + expectedLength;
+ reserve(size);
+
+ for (; expectedLength > 0; --expectedLength)
+ {
+ if (str.empty)
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-8 sequeunce");
+ }
+ *(data + length) = str.front;
+ str.popFront();
+ }
+ insertedLength += expectedLength;
+ this.length_ = size;
+ }
+ return insertedLength;
+ }
+ }
+
+ /// Ditto.
+ size_t insertBack(R)(R str) @trusted
+ if (!isInfinite!R
+ && isInputRange!R
+ && is(Unqual!(ElementEncodingType!R) == wchar))
+ {
+ static if (hasLength!R || isNarrowString!R)
+ {
+ reserve(length + str.length * wchar.sizeof);
+ }
+
+ static if (isNarrowString!R)
+ {
+ const(wchar)[] range = str;
+ }
+ else
+ {
+ alias range = str;
+ }
+
+ auto oldLength = length;
+
+ while (!range.empty)
+ {
+ reserve(length + 4);
+
+ auto ret = insertWideChar(range.front);
+ if (ret > 0)
+ {
+ range.popFront();
+ }
+ else if (range.front - 0xd800 < 2048)
+ { // Surrogate pair.
+ static if (isNarrowString!R)
+ {
+ if (range.length < 2 || range[1] - 0xdc00 >= 0x400)
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
+ }
+ dchar d = (range[0] - 0xd800) | ((range[1] - 0xdc00) >> 10);
+
+ range.popFront(2);
+ }
+ else
+ {
+ dchar d = range.front - 0xd800;
+ range.popFront();
+
+ if (range.empty || range.front - 0xdc00 >= 0x400)
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
+ }
+ d |= (range.front - 0xdc00) >> 10;
+
+ range.popFront();
+ }
+ write4Bytes(d);
+ }
+ else
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
+ }
+ }
+ return this.length_ - oldLength;
+ }
+
+ /// Ditto.
+ size_t insertBack(R)(R str) @trusted
+ if (!isInfinite!R
+ && isInputRange!R
+ && is(Unqual!(ElementEncodingType!R) == dchar))
+ {
+ static if (hasLength!R || isSomeString!R)
+ {
+ reserve(length + str.length * 4);
+ }
+
+ size_t insertedLength;
+ foreach (const dchar c; str)
+ {
+ insertedLength += insertBack(c);
+ }
+ return insertedLength;
+ }
+
+ /// Ditto.
+ alias insert = insertBack;
+
+ /**
+ * Reserves $(D_PARAM size) bytes for the string.
+ *
+ * If $(D_PARAM size) is less than or equal to the $(D_PSYMBOL capacity), the
+ * function call does not cause a reallocation and the string capacity is not
+ * affected.
+ *
+ * Params:
+ * size = Desired size in bytes.
+ */
+ void reserve(const size_t size) nothrow @trusted @nogc
+ {
+ if (this.capacity_ >= size)
+ {
+ return;
+ }
+
+ this.data = allocator.resize(this.data[0 .. this.capacity_], size).ptr;
+ this.capacity_ = size;
+ }
+
+ ///
+ @nogc @safe unittest
+ {
+ String s;
+ assert(s.capacity == 0);
+
+ s.reserve(3);
+ assert(s.capacity == 3);
+
+ s.reserve(3);
+ assert(s.capacity == 3);
+
+ s.reserve(1);
+ assert(s.capacity == 3);
+ }
+
+ /**
+ * Requests the string to reduce its capacity to fit the $(D_PARAM size).
+ *
+ * The request is non-binding. The string won't become smaller than the
+ * string byte length.
+ *
+ * Params:
+ * size = Desired size.
+ */
+ void shrink(const size_t size) nothrow @trusted @nogc
+ {
+ if (this.capacity_ <= size)
+ {
+ return;
+ }
+
+ const n = max(this.length_, size);
+ void[] buf = this.data[0 .. this.capacity_];
+ if (allocator.reallocate(buf, n))
+ {
+ this.capacity_ = n;
+ this.data = cast(char*) buf;
+ }
+ }
+
+ ///
+ @nogc @safe unittest
+ {
+ auto s = String("Die Alten lasen laut.");
+ assert(s.capacity == 21);
+
+ s.reserve(30);
+ s.shrink(25);
+ assert(s.capacity == 25);
+
+ s.shrink(18);
+ assert(s.capacity == 21);
+ }
+
+ /**
+ * Returns: String capacity in bytes.
+ */
+ @property size_t capacity() const pure nothrow @safe @nogc
+ {
+ return this.capacity_;
+ }
+
+ ///
+ unittest
+ {
+ auto s = String("In allem Schreiben ist Schamlosigkeit.");
+ assert(s.capacity == 38);
+ }
+
+ /**
+ * Returns an array used internally by the string.
+ * The length of the returned array may be smaller than the size of the
+ * reserved memory for the string.
+ *
+ * Returns: The array representing the string.
+ */
+ inout(char[]) get() inout pure nothrow @trusted @nogc
+ {
+ return this.data[0 .. this.length_];
+ }
+
+ /**
+ * Returns: The number of code units that are required to encode the string.
+ */
+ @property size_t length() const pure nothrow @safe @nogc
+ {
+ return this.length_;
+ }
+
+ ///
+ alias opDollar = length;
+
+ ///
+ unittest
+ {
+ auto s = String("Piscis primuin a capite foetat.");
+ assert(s.length == 31);
+ assert(s[$ - 1] == '.');
+ }
+
+ /**
+ * Params:
+ * pos = Position.
+ *
+ * Returns: Byte at $(D_PARAM pos).
+ *
+ * Precondition: $(D_INLINECODE length > pos).
+ */
+ ref inout(char) opIndex(const size_t pos) inout pure nothrow @trusted @nogc
+ in
+ {
+ assert(length > pos);
+ }
+ body
+ {
+ return *(this.data + pos);
+ }
+
+ ///
+ unittest
+ {
+ auto s = String("Alea iacta est.");
+ assert(s[0] == 'A');
+ assert(s[4] == ' ');
+ }
+
+ /**
+ * Returns: Random access range that iterates over the string by bytes, in
+ * forward order.
+ */
+ ByCodeUnit!char opIndex() pure nothrow @trusted @nogc
+ {
+ return typeof(return)(this, this.data, this.data + length);
+ }
+
+ /// Ditto.
+ ByCodeUnit!(const char) opIndex() const pure nothrow @trusted @nogc
+ {
+ return typeof(return)(this, this.data, this.data + length);
+ }
+
+ ///
+ unittest
+ {
+ auto s = String("Plutarchus");
+ auto r = s[];
+ assert(r.front == 'P');
+ assert(r.back == 's');
+
+ r.popFront();
+ assert(r.front == 'l');
+ assert(r.back == 's');
+
+ r.popBack();
+ assert(r.front == 'l');
+ assert(r.back == 'u');
+
+ assert(r.length == 8);
+ }
+
+ /**
+ * Returns: $(D_KEYWORD true) if the vector is empty.
+ */
+ @property bool empty() const pure nothrow @safe @nogc
+ {
+ return length == 0;
+ }
+
+ /**
+ * Params:
+ * i = Slice start.
+ * j = Slice end.
+ *
+ * Returns: A range that iterates over the string by bytes from
+ * index $(D_PARAM i) up to (excluding) index $(D_PARAM j).
+ *
+ * Precondition: $(D_INLINECODE i <= j && j <= length).
+ */
+ ByCodeUnit!char opSlice(const size_t i, const size_t j)
+ pure nothrow @trusted @nogc
+ in
+ {
+ assert(i <= j);
+ assert(j <= length);
+ }
+ body
+ {
+ return typeof(return)(this, this.data + i, this.data + j);
+ }
+
+ /// Ditto.
+ ByCodeUnit!(const char) opSlice(const size_t i, const size_t j)
+ const pure nothrow @trusted @nogc
+ in
+ {
+ assert(i <= j);
+ assert(j <= length);
+ }
+ body
+ {
+ return typeof(return)(this, this.data + i, this.data + j);
+ }
+
+ ///
+ unittest
+ {
+ auto s = String("Vladimir Soloviev");
+ auto r = s[9 .. $];
+
+ assert(r.front == 'S');
+ assert(r.back == 'v');
+
+ r.popFront();
+ r.popBack();
+ assert(r.front == 'o');
+ assert(r.back == 'e');
+
+ r.popFront();
+ r.popBack();
+ assert(r.front == 'l');
+ assert(r.back == 'i');
+
+ r.popFront();
+ r.popBack();
+ assert(r.front == 'o');
+ assert(r.back == 'v');
+
+ r.popFront();
+ r.popBack();
+ assert(r.empty);
+ }
+
+ mixin DefaultAllocator;
+}
diff --git a/source/tanya/container/string.d b/source/tanya/container/string.d
deleted file mode 100644
index 4737fab..0000000
--- a/source/tanya/container/string.d
+++ /dev/null
@@ -1,905 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-/**
- * UTF-8 string.
- *
- * Copyright: Eugene Wissner 2017.
- * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
- * Mozilla Public License, v. 2.0).
- * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
- */
-module tanya.container.string;
-
-import core.exception;
-import std.algorithm.comparison;
-import std.algorithm.mutation;
-import std.range;
-import std.traits;
-import tanya.memory;
-
-private ref const(wchar) front(const wchar[] str)
-pure nothrow @safe @nogc
-in
-{
- assert(str.length > 0);
-}
-body
-{
- return str[0];
-}
-
-private void popFront(ref const(wchar)[] str, const size_t s = 1)
-pure nothrow @safe @nogc
-in
-{
- assert(str.length >= s);
-}
-body
-{
- str = str[s .. $];
-}
-
-/**
- * Thrown on encoding errors.
- */
-class UTFException : Exception
-{
- /**
- * Params:
- * msg = The message for the exception.
- * file = The file where the exception occurred.
- * line = The line number where the exception occurred.
- * next = The previous exception in the chain of exceptions, if any.
- */
- this(string msg,
- string file = __FILE__,
- size_t line = __LINE__,
- Throwable next = null) @nogc @safe pure nothrow
- {
- super(msg, file, line, next);
- }
-}
-
-/**
- * Iterates $(D_PSYMBOL String) by UTF-8 code unit.
- *
- * Params:
- * E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))).
- */
-struct ByCodeUnit(E)
- if (is(Unqual!E == char))
-{
- private E* begin, end;
- private alias ContainerType = CopyConstness!(E, String);
- private ContainerType* container;
-
- invariant
- {
- assert(this.begin <= this.end);
- assert(this.container !is null);
- assert(this.begin >= this.container.data);
- assert(this.end <= this.container.data + this.container.length);
- }
-
- private this(ref ContainerType container, E* begin, E* end) @trusted
- in
- {
- assert(begin <= end);
- assert(begin >= container.data);
- assert(end <= container.data + container.length);
- }
- body
- {
- this.container = &container;
- this.begin = begin;
- this.end = end;
- }
-
- @disable this();
-
- @property ByCodeUnit save()
- {
- return this;
- }
-
- @property bool empty() const
- {
- return this.begin == this.end;
- }
-
- @property size_t length() const
- {
- return this.end - this.begin;
- }
-
- alias opDollar = length;
-
- @property ref inout(E) front() inout
- in
- {
- assert(!empty);
- }
- body
- {
- return *this.begin;
- }
-
- @property ref inout(E) back() inout @trusted
- in
- {
- assert(!empty);
- }
- body
- {
- return *(this.end - 1);
- }
-
- void popFront() @trusted
- in
- {
- assert(!empty);
- }
- body
- {
- ++this.begin;
- }
-
- void popBack() @trusted
- in
- {
- assert(!empty);
- }
- body
- {
- --this.end;
- }
-
- ref inout(E) opIndex(const size_t i) inout @trusted
- in
- {
- assert(i < length);
- }
- body
- {
- return *(this.begin + i);
- }
-
- ByCodeUnit opIndex()
- {
- return typeof(return)(*this.container, this.begin, this.end);
- }
-
- ByCodeUnit!(const E) opIndex() const
- {
- return typeof(return)(*this.container, this.begin, this.end);
- }
-
- ByCodeUnit opSlice(const size_t i, const size_t j) @trusted
- in
- {
- assert(i <= j);
- assert(j <= length);
- }
- body
- {
- return typeof(return)(*this.container, this.begin + i, this.begin + j);
- }
-
- ByCodeUnit!(const E) opSlice(const size_t i, const size_t j) const @trusted
- in
- {
- assert(i <= j);
- assert(j <= length);
- }
- body
- {
- return typeof(return)(*this.container, this.begin + i, this.begin + j);
- }
-
- inout(E[]) get() inout @trusted
- {
- return this.begin[0 .. length];
- }
-}
-
-/**
- * UTF-8 string.
- */
-struct String
-{
- private size_t length_;
- private char* data;
- private size_t capacity_;
-
- pure nothrow @safe @nogc invariant
- {
- assert(this.length_ <= this.capacity_);
- }
-
- /**
- * Constructs the string from a stringish range.
- *
- * Params:
- * R = String type.
- * str = Initial string.
- * allocator = Allocator.
- *
- * Throws: $(D_PSYMBOL UTFException).
- *
- * Precondition: $(D_INLINECODE allocator is null).
- */
- this(R)(const R str, shared Allocator allocator = defaultAllocator)
- if (!isInfinite!R
- && isInputRange!R
- && isSomeChar!(ElementEncodingType!R))
- {
- this(allocator);
- insertBack(str);
- }
-
- ///
- @safe @nogc unittest
- {
- auto s = String("\u10437"w);
- assert("\u10437" == s.get());
- }
-
- ///
- @safe @nogc unittest
- {
- auto s = String("Отказаться от вина - в этом страшная вина."d);
- assert("Отказаться от вина - в этом страшная вина." == s.get());
- }
-
- /**
- * Initializes this string from another one.
- *
- * If $(D_PARAM init) is passed by value, it won't be copied, but moved.
- * If the allocator of ($D_PARAM init) matches $(D_PARAM allocator),
- * $(D_KEYWORD this) will just take the ownership over $(D_PARAM init)'s
- * storage, otherwise, the storage will be allocated with
- * $(D_PARAM allocator). $(D_PARAM init) will be destroyed at the end.
- *
- * If $(D_PARAM init) is passed by reference, it will be copied.
- *
- * Params:
- * init = Source string.
- * allocator = Allocator.
- *
- * Precondition: $(D_INLINECODE allocator is null).
- */
- this(String init, shared Allocator allocator = defaultAllocator)
- nothrow @trusted @nogc
- {
- this(allocator);
- if (allocator !is init.allocator)
- {
- // Just steal all references and the allocator.
- this.data = init.data;
- this.length_ = init.length_;
- this.capacity_ = init.capacity_;
-
- // Reset the source string, so it can't destroy the moved storage.
- init.length_ = init.capacity_ = 0;
- init.data = null;
- }
- else
- {
- reserve(init.length);
- init.data[0 .. init.length].copy(this.data[0 .. init.length]);
- this.length_ = init.length;
- }
- }
-
- /// Ditto.
- this(ref const String init, shared Allocator allocator = defaultAllocator)
- nothrow @trusted @nogc
- {
- this(allocator);
- reserve(init.length);
- init.data[0 .. init.length].copy(this.data[0 .. init.length]);
- this.length_ = init.length;
- }
-
- /// Ditto.
- this(shared Allocator allocator) pure nothrow @safe @nogc
- in
- {
- assert(allocator !is null);
- }
- body
- {
- this.allocator_ = allocator;
- }
-
- /**
- * Fills the string with $(D_PARAM n) consecutive copies of character $(D_PARAM chr).
- *
- * Params:
- * C = Type of the character to fill the string with.
- * n = Number of characters to copy.
- * chr = Character to fill the string with.
- */
- this(C)(const size_t n, const C chr,
- shared Allocator allocator = defaultAllocator) @trusted
- if (isSomeChar!C)
- {
- this(allocator);
- if (n == 0)
- {
- return;
- }
- insertBack(chr);
-
- // insertBack should validate the character, so we can just copy it
- // n - 1 times.
- auto remaining = length * n;
-
- reserve(remaining);
-
- // Use a quick copy.
- for (auto i = this.length_ * 2; i <= remaining; i *= 2)
- {
- this.data[0 .. this.length_].copy(this.data[this.length_ .. i]);
- this.length_ = i;
- }
- remaining -= length;
- copy(this.data[this.length_ - remaining .. this.length_],
- this.data[this.length_ .. this.length_ + remaining]);
- this.length_ += remaining;
- }
-
- private unittest
- {
- {
- auto s = String(1, 'О');
- assert(s.length == 2);
- }
- {
- auto s = String(3, 'О');
- assert(s.length == 6);
- }
- {
- auto s = String(8, 'О');
- assert(s.length == 16);
- }
- }
-
- /**
- * Destroys the string.
- */
- ~this() nothrow @trusted @nogc
- {
- allocator.deallocate(this.data[0 .. this.capacity_]);
- }
-
- private void write4Bytes(ref const dchar src)
- pure nothrow @trusted @nogc
- in
- {
- assert(capacity - length >= 4);
- assert(src - 0x10000 < 0x100000);
- }
- body
- {
- auto dst = this.data + length;
-
- *dst++ = 0xf0 | (src >> 18);
- *dst++ = 0x80 | ((src >> 12) & 0x3f);
- *dst++ = 0x80 | ((src >> 6) & 0x3f);
- *dst = 0x80 | (src & 0x3f);
-
- this.length_ += 4;
- }
-
- private size_t insertWideChar(C)(auto ref const C chr) @trusted
- if (is(C == wchar) || is(C == dchar))
- in
- {
- assert(capacity - length >= C.sizeof);
- }
- body
- {
- auto dst = this.data + length;
- if (chr < 0x80)
- {
- *dst = chr & 0x7f;
- this.length_ += 1;
- return 1;
- }
- else if (chr < 0x800)
- {
- *dst++ = 0xc0 | (chr >> 6) & 0xff;
- *dst = 0x80 | (chr & 0x3f);
- this.length_ += 2;
- return 2;
- }
- else if (chr < 0xd800 || chr - 0xe000 < 0x2000)
- {
- *dst++ = 0xe0 | (chr >> 12) & 0xff;
- *dst++ = 0x80 | ((chr >> 6) & 0x3f);
- *dst = 0x80 | (chr & 0x3f);
- this.length_ += 3;
- return 3;
- }
- return 0;
- }
-
- /**
- * Inserts a single character at the end of the string.
- *
- * Params:
- * chr = The character should be inserted.
- *
- * Returns: The number of bytes inserted.
- *
- * Throws: $(D_PSYMBOL UTFException).
- */
- size_t insertBack(const char chr) @trusted @nogc
- {
- if ((chr & 0x80) != 0)
- {
- throw defaultAllocator.make!UTFException("Invalid UTF-8 character");
- }
- reserve(length + 1);
-
- *(data + length) = chr;
- ++this.length_;
-
- return 1;
- }
-
- /// Ditto.
- size_t insertBack(const wchar chr) @trusted @nogc
- {
- reserve(length + wchar.sizeof);
-
- auto ret = insertWideChar(chr);
- if (ret == 0)
- {
- throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
- }
- return ret;
- }
-
- /// Ditto.
- size_t insertBack(const dchar chr) @trusted @nogc
- {
- reserve(length + dchar.sizeof);
-
- auto ret = insertWideChar(chr);
- if (ret > 0)
- {
- return ret;
- }
- else if (chr - 0x10000 < 0x100000)
- {
- write4Bytes(chr);
- return 4;
- }
- else
- {
- throw defaultAllocator.make!UTFException("Invalid UTF-32 sequeunce");
- }
- }
-
- /**
- * Inserts a stringish range at the end of the string.
- *
- * Params:
- * R = Type of the inserted string.
- * str = String should be inserted.
- *
- * Returns: The number of bytes inserted.
- *
- * Throws: $(D_PSYMBOL UTFException).
- */
- size_t insertBack(R)(R str) @trusted
- if (!isInfinite!R
- && isInputRange!R
- && is(Unqual!(ElementEncodingType!R) == char))
- {
- size_t size;
- static if (hasLength!R || isNarrowString!R)
- {
- size = str.length + length;
- reserve(size);
- }
-
- static if (isNarrowString!R)
- {
- str.copy(this.data[length .. size]);
- this.length_ = size;
- return str.length;
- }
- else
- {
- size_t insertedLength;
- while (!str.empty)
- {
- ubyte expectedLength;
- if ((str.front & 0x80) == 0x00)
- {
- expectedLength = 1;
- }
- else if ((str.front & 0xe0) == 0xc0)
- {
- expectedLength = 2;
- }
- else if ((str.front & 0xf0) == 0xe0)
- {
- expectedLength = 3;
- }
- else if ((str.front & 0xf8) == 0xf0)
- {
- expectedLength = 4;
- }
- else
- {
- throw defaultAllocator.make!UTFException("Invalid UTF-8 sequeunce");
- }
- size = length + expectedLength;
- reserve(size);
-
- for (; expectedLength > 0; --expectedLength)
- {
- if (str.empty)
- {
- throw defaultAllocator.make!UTFException("Invalid UTF-8 sequeunce");
- }
- *(data + length) = str.front;
- str.popFront();
- }
- insertedLength += expectedLength;
- this.length_ = size;
- }
- return insertedLength;
- }
- }
-
- /// Ditto.
- size_t insertBack(R)(R str) @trusted
- if (!isInfinite!R
- && isInputRange!R
- && is(Unqual!(ElementEncodingType!R) == wchar))
- {
- static if (hasLength!R || isNarrowString!R)
- {
- reserve(length + str.length * wchar.sizeof);
- }
-
- static if (isNarrowString!R)
- {
- const(wchar)[] range = str;
- }
- else
- {
- alias range = str;
- }
-
- auto oldLength = length;
-
- while (!range.empty)
- {
- reserve(length + 4);
-
- auto ret = insertWideChar(range.front);
- if (ret > 0)
- {
- range.popFront();
- }
- else if (range.front - 0xd800 < 2048)
- { // Surrogate pair.
- static if (isNarrowString!R)
- {
- if (range.length < 2 || range[1] - 0xdc00 >= 0x400)
- {
- throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
- }
- dchar d = (range[0] - 0xd800) | ((range[1] - 0xdc00) >> 10);
-
- range.popFront(2);
- }
- else
- {
- dchar d = range.front - 0xd800;
- range.popFront();
-
- if (range.empty || range.front - 0xdc00 >= 0x400)
- {
- throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
- }
- d |= (range.front - 0xdc00) >> 10;
-
- range.popFront();
- }
- write4Bytes(d);
- }
- else
- {
- throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
- }
- }
- return this.length_ - oldLength;
- }
-
- /// Ditto.
- size_t insertBack(R)(R str) @trusted
- if (!isInfinite!R
- && isInputRange!R
- && is(Unqual!(ElementEncodingType!R) == dchar))
- {
- static if (hasLength!R || isSomeString!R)
- {
- reserve(length + str.length * 4);
- }
-
- size_t insertedLength;
- foreach (const dchar c; str)
- {
- insertedLength += insertBack(c);
- }
- return insertedLength;
- }
-
- /// Ditto.
- alias insert = insertBack;
-
- /**
- * Reserves $(D_PARAM size) bytes for the string.
- *
- * If $(D_PARAM size) is less than or equal to the $(D_PSYMBOL capacity), the
- * function call does not cause a reallocation and the string capacity is not
- * affected.
- *
- * Params:
- * size = Desired size in bytes.
- */
- void reserve(const size_t size) nothrow @trusted @nogc
- {
- if (this.capacity_ >= size)
- {
- return;
- }
-
- this.data = allocator.resize(this.data[0 .. this.capacity_], size).ptr;
- this.capacity_ = size;
- }
-
- ///
- @nogc @safe unittest
- {
- String s;
- assert(s.capacity == 0);
-
- s.reserve(3);
- assert(s.capacity == 3);
-
- s.reserve(3);
- assert(s.capacity == 3);
-
- s.reserve(1);
- assert(s.capacity == 3);
- }
-
- /**
- * Requests the string to reduce its capacity to fit the $(D_PARAM size).
- *
- * The request is non-binding. The string won't become smaller than the
- * string byte length.
- *
- * Params:
- * size = Desired size.
- */
- void shrink(const size_t size) nothrow @trusted @nogc
- {
- if (this.capacity_ <= size)
- {
- return;
- }
-
- const n = max(this.length_, size);
- void[] buf = this.data[0 .. this.capacity_];
- if (allocator.reallocate(buf, n))
- {
- this.capacity_ = n;
- this.data = cast(char*) buf;
- }
- }
-
- ///
- @nogc @safe unittest
- {
- auto s = String("Die Alten lasen laut.");
- assert(s.capacity == 21);
-
- s.reserve(30);
- s.shrink(25);
- assert(s.capacity == 25);
-
- s.shrink(18);
- assert(s.capacity == 21);
- }
-
- /**
- * Returns: String capacity in bytes.
- */
- @property size_t capacity() const pure nothrow @safe @nogc
- {
- return this.capacity_;
- }
-
- ///
- unittest
- {
- auto s = String("In allem Schreiben ist Schamlosigkeit.");
- assert(s.capacity == 38);
- }
-
- /**
- * Returns an array used internally by the string.
- * The length of the returned array may be smaller than the size of the
- * reserved memory for the string.
- *
- * Returns: The array representing the string.
- */
- inout(char[]) get() inout pure nothrow @trusted @nogc
- {
- return this.data[0 .. this.length_];
- }
-
- /**
- * Returns: The number of code units that are required to encode the string.
- */
- @property size_t length() const pure nothrow @safe @nogc
- {
- return this.length_;
- }
-
- ///
- alias opDollar = length;
-
- ///
- unittest
- {
- auto s = String("Piscis primuin a capite foetat.");
- assert(s.length == 31);
- assert(s[$ - 1] == '.');
- }
-
- /**
- * Params:
- * pos = Position.
- *
- * Returns: Byte at $(D_PARAM pos).
- *
- * Precondition: $(D_INLINECODE length > pos).
- */
- ref inout(char) opIndex(const size_t pos) inout pure nothrow @trusted @nogc
- in
- {
- assert(length > pos);
- }
- body
- {
- return *(this.data + pos);
- }
-
- ///
- unittest
- {
- auto s = String("Alea iacta est.");
- assert(s[0] == 'A');
- assert(s[4] == ' ');
- }
-
- /**
- * Returns: Random access range that iterates over the string by bytes, in
- * forward order.
- */
- ByCodeUnit!char opIndex() pure nothrow @trusted @nogc
- {
- return typeof(return)(this, this.data, this.data + length);
- }
-
- /// Ditto.
- ByCodeUnit!(const char) opIndex() const pure nothrow @trusted @nogc
- {
- return typeof(return)(this, this.data, this.data + length);
- }
-
- ///
- unittest
- {
- auto s = String("Plutarchus");
- auto r = s[];
- assert(r.front == 'P');
- assert(r.back == 's');
-
- r.popFront();
- assert(r.front == 'l');
- assert(r.back == 's');
-
- r.popBack();
- assert(r.front == 'l');
- assert(r.back == 'u');
-
- assert(r.length == 8);
- }
-
- /**
- * Returns: $(D_KEYWORD true) if the vector is empty.
- */
- @property bool empty() const pure nothrow @safe @nogc
- {
- return length == 0;
- }
-
- /**
- * Params:
- * i = Slice start.
- * j = Slice end.
- *
- * Returns: A range that iterates over the string by bytes from
- * index $(D_PARAM i) up to (excluding) index $(D_PARAM j).
- *
- * Precondition: $(D_INLINECODE i <= j && j <= length).
- */
- ByCodeUnit!char opSlice(const size_t i, const size_t j)
- pure nothrow @trusted @nogc
- in
- {
- assert(i <= j);
- assert(j <= length);
- }
- body
- {
- return typeof(return)(this, this.data + i, this.data + j);
- }
-
- /// Ditto.
- ByCodeUnit!(const char) opSlice(const size_t i, const size_t j)
- const pure nothrow @trusted @nogc
- in
- {
- assert(i <= j);
- assert(j <= length);
- }
- body
- {
- return typeof(return)(this, this.data + i, this.data + j);
- }
-
- ///
- unittest
- {
- auto s = String("Vladimir Soloviev");
- auto r = s[9 .. $];
-
- assert(r.front == 'S');
- assert(r.back == 'v');
-
- r.popFront();
- r.popBack();
- assert(r.front == 'o');
- assert(r.back == 'e');
-
- r.popFront();
- r.popBack();
- assert(r.front == 'l');
- assert(r.back == 'i');
-
- r.popFront();
- r.popBack();
- assert(r.front == 'o');
- assert(r.back == 'v');
-
- r.popFront();
- r.popBack();
- assert(r.empty);
- }
-
- mixin DefaultAllocator;
-}