summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2017-04-10 08:10:08 +0200
committerEugen Wissner <belka@caraus.de>2017-04-10 08:10:08 +0200
commit47ef7873538bb690cc0b2ee0bfa6b1de9d32fc9e (patch)
tree09c3ba9174b862a29ecf8960e10769d558d11917 /source
parent6436ad49dfad383e7c79fa3743cdb1b232df2f38 (diff)
downloadtanya-47ef7873538bb690cc0b2ee0bfa6b1de9d32fc9e.tar.gz
Add missing constructors to the String
Diffstat (limited to 'source')
-rw-r--r--source/tanya/container/string.d612
1 files changed, 476 insertions, 136 deletions
diff --git a/source/tanya/container/string.d b/source/tanya/container/string.d
index d976e34..4737fab 100644
--- a/source/tanya/container/string.d
+++ b/source/tanya/container/string.d
@@ -12,13 +12,35 @@
*/
module tanya.container.string;
-import core.checkedint;
import core.exception;
import std.algorithm.comparison;
import std.algorithm.mutation;
+import std.range;
import std.traits;
import tanya.memory;
+private ref const(wchar) front(const wchar[] str)
+pure nothrow @safe @nogc
+in
+{
+ assert(str.length > 0);
+}
+body
+{
+ return str[0];
+}
+
+private void popFront(ref const(wchar)[] str, const size_t s = 1)
+pure nothrow @safe @nogc
+in
+{
+ assert(str.length >= s);
+}
+body
+{
+ str = str[s .. $];
+}
+
/**
* Thrown on encoding errors.
*/
@@ -41,12 +63,12 @@ class UTFException : Exception
}
/**
- * Byte range.
+ * Iterates $(D_PSYMBOL String) by UTF-8 code unit.
*
* Params:
* E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))).
*/
-struct ByteRange(E)
+struct ByCodeUnit(E)
if (is(Unqual!E == char))
{
private E* begin, end;
@@ -77,7 +99,7 @@ struct ByteRange(E)
@disable this();
- @property ByteRange save()
+ @property ByCodeUnit save()
{
return this;
}
@@ -144,17 +166,17 @@ struct ByteRange(E)
return *(this.begin + i);
}
- ByteRange opIndex()
+ ByCodeUnit opIndex()
{
return typeof(return)(*this.container, this.begin, this.end);
}
- ByteRange!(const E) opIndex() const
+ ByCodeUnit!(const E) opIndex() const
{
return typeof(return)(*this.container, this.begin, this.end);
}
- ByteRange opSlice(const size_t i, const size_t j) @trusted
+ ByCodeUnit opSlice(const size_t i, const size_t j) @trusted
in
{
assert(i <= j);
@@ -165,7 +187,7 @@ struct ByteRange(E)
return typeof(return)(*this.container, this.begin + i, this.begin + j);
}
- ByteRange!(const E) opSlice(const size_t i, const size_t j) const @trusted
+ ByCodeUnit!(const E) opSlice(const size_t i, const size_t j) const @trusted
in
{
assert(i <= j);
@@ -180,7 +202,6 @@ struct ByteRange(E)
{
return this.begin[0 .. length];
}
-
}
/**
@@ -192,13 +213,16 @@ struct String
private char* data;
private size_t capacity_;
- invariant
+ pure nothrow @safe @nogc invariant
{
assert(this.length_ <= this.capacity_);
}
/**
+ * Constructs the string from a stringish range.
+ *
* Params:
+ * R = String type.
* str = Initial string.
* allocator = Allocator.
*
@@ -206,125 +230,77 @@ struct String
*
* Precondition: $(D_INLINECODE allocator is null).
*/
- this(const char[] str, shared Allocator allocator = defaultAllocator)
- @trusted @nogc
+ this(R)(const R str, shared Allocator allocator = defaultAllocator)
+ if (!isInfinite!R
+ && isInputRange!R
+ && isSomeChar!(ElementEncodingType!R))
{
this(allocator);
- reserve(str.length);
- this.length_ = str.length;
- str.copy(this.data[0 .. this.length_]);
+ insertBack(str);
}
- /// Ditto.
- this(const wchar[] str, shared Allocator allocator = defaultAllocator)
- @trusted @nogc
+ ///
+ @safe @nogc unittest
{
- this(allocator);
- reserve(str.length * 2);
-
- size_t s;
- auto sourceLength = str.length;
- for (auto c = str.ptr; sourceLength != 0; ++c, --sourceLength)
- {
- if (length - s < 5) // More space required.
- {
- bool overflow;
- auto size = addu(length, str.length, overflow);
- assert(!overflow);
- reserve(size);
- }
- if (*c < 0x80)
- {
- this.data[s++] = *c & 0x7f;
- this.length_ += 1;
- }
- else if (*c < 0x800)
- {
- this.data[s++] = 0xc0 | (*c >> 6) & 0xff;
- this.data[s++] = 0x80 | (*c & 0x3f);
- this.length_ += 2;
- }
- else if (*c < 0xd800 || *c - 0xe000 < 0x2000)
- {
- this.data[s++] = 0xe0 | (*c >> 12) & 0xff;
- this.data[s++] = 0x80 | ((*c >> 6) & 0x3f);
- this.data[s++] = 0x80 | (*c & 0x3f);
- this.length_ += 3;
- }
- else if ((*c - 0xd800) < 2048 && sourceLength > 0 && *(c + 1) - 0xdc00 < 0x400)
- { // Surrogate pair
- dchar d = (*c - 0xd800) | ((*c++ - 0xdc00) >> 10);
-
- this.data[s++] = 0xf0 | (d >> 18);
- this.data[s++] = 0x80 | ((d >> 12) & 0x3f);
- this.data[s++] = 0x80 | ((d >> 6) & 0x3f);
- this.data[s++] = 0x80 | (d & 0x3f);
- --sourceLength;
- this.length_ += 4;
- }
- else
- {
- throw defaultAllocator.make!UTFException("Wrong UTF-16 sequeunce");
- }
- }
+ auto s = String("\u10437"w);
+ assert("\u10437" == s.get());
}
///
- unittest
+ @safe @nogc unittest
{
- auto s = String("\u10437"w);
- assert("\u10437" == s.get());
+ auto s = String("Отказаться от вина - в этом страшная вина."d);
+ assert("Отказаться от вина - в этом страшная вина." == s.get());
}
- /// Ditto.
- this(const dchar[] str, shared Allocator allocator = defaultAllocator)
- @trusted @nogc
+ /**
+ * Initializes this string from another one.
+ *
+ * If $(D_PARAM init) is passed by value, it won't be copied, but moved.
+ * If the allocator of ($D_PARAM init) matches $(D_PARAM allocator),
+ * $(D_KEYWORD this) will just take the ownership over $(D_PARAM init)'s
+ * storage, otherwise, the storage will be allocated with
+ * $(D_PARAM allocator). $(D_PARAM init) will be destroyed at the end.
+ *
+ * If $(D_PARAM init) is passed by reference, it will be copied.
+ *
+ * Params:
+ * init = Source string.
+ * allocator = Allocator.
+ *
+ * Precondition: $(D_INLINECODE allocator is null).
+ */
+ this(String init, shared Allocator allocator = defaultAllocator)
+ nothrow @trusted @nogc
{
this(allocator);
-
- reserve(str.length * 4);
-
- auto s = data;
- foreach (c; str)
+ if (allocator !is init.allocator)
{
- if (c < 0x80)
- {
- *s++ = c & 0x7f;
- this.length_ += 1;
- }
- else if (c < 0x800)
- {
- *s++ = 0xc0 | (c >> 6) & 0xff;
- *s++ = 0x80 | (c & 0x3f);
- this.length_ += 2;
- }
- else if (c < 0xd800 || c - 0xe000 < 0x2000)
- {
- *s++ = 0xe0 | (c >> 12) & 0xff;
- *s++ = 0x80 | ((c >> 6) & 0x3f);
- *s++ = 0x80 | (c & 0x3f);
- this.length_ += 3;
- }
- else if (c - 0x10000 < 0x100000)
- {
- *s++ = 0xf0 | (c >> 18);
- *s++ = 0x80 | ((c >> 12) & 0x3f);
- *s++ = 0x80 | ((c >> 6) & 0x3f);
- *s++ = 0x80 | (c & 0x3f);
- this.length_ += 4;
- }
- else
- {
- throw defaultAllocator.make!UTFException("Wrong UTF-32 sequeunce");
- }
+ // Just steal all references and the allocator.
+ this.data = init.data;
+ this.length_ = init.length_;
+ this.capacity_ = init.capacity_;
+
+ // Reset the source string, so it can't destroy the moved storage.
+ init.length_ = init.capacity_ = 0;
+ init.data = null;
+ }
+ else
+ {
+ reserve(init.length);
+ init.data[0 .. init.length].copy(this.data[0 .. init.length]);
+ this.length_ = init.length;
}
}
- ///
- unittest
+ /// Ditto.
+ this(ref const String init, shared Allocator allocator = defaultAllocator)
+ nothrow @trusted @nogc
{
- auto s = String("Отказаться от вина - в этом страшная вина."d);
- assert("Отказаться от вина - в этом страшная вина." == s.get());
+ this(allocator);
+ reserve(init.length);
+ init.data[0 .. init.length].copy(this.data[0 .. init.length]);
+ this.length_ = init.length;
}
/// Ditto.
@@ -339,6 +315,59 @@ struct String
}
/**
+ * Fills the string with $(D_PARAM n) consecutive copies of character $(D_PARAM chr).
+ *
+ * Params:
+ * C = Type of the character to fill the string with.
+ * n = Number of characters to copy.
+ * chr = Character to fill the string with.
+ */
+ this(C)(const size_t n, const C chr,
+ shared Allocator allocator = defaultAllocator) @trusted
+ if (isSomeChar!C)
+ {
+ this(allocator);
+ if (n == 0)
+ {
+ return;
+ }
+ insertBack(chr);
+
+ // insertBack should validate the character, so we can just copy it
+ // n - 1 times.
+ auto remaining = length * n;
+
+ reserve(remaining);
+
+ // Use a quick copy.
+ for (auto i = this.length_ * 2; i <= remaining; i *= 2)
+ {
+ this.data[0 .. this.length_].copy(this.data[this.length_ .. i]);
+ this.length_ = i;
+ }
+ remaining -= length;
+ copy(this.data[this.length_ - remaining .. this.length_],
+ this.data[this.length_ .. this.length_ + remaining]);
+ this.length_ += remaining;
+ }
+
+ private unittest
+ {
+ {
+ auto s = String(1, 'О');
+ assert(s.length == 2);
+ }
+ {
+ auto s = String(3, 'О');
+ assert(s.length == 6);
+ }
+ {
+ auto s = String(8, 'О');
+ assert(s.length == 16);
+ }
+ }
+
+ /**
* Destroys the string.
*/
~this() nothrow @trusted @nogc
@@ -346,6 +375,278 @@ struct String
allocator.deallocate(this.data[0 .. this.capacity_]);
}
+ private void write4Bytes(ref const dchar src)
+ pure nothrow @trusted @nogc
+ in
+ {
+ assert(capacity - length >= 4);
+ assert(src - 0x10000 < 0x100000);
+ }
+ body
+ {
+ auto dst = this.data + length;
+
+ *dst++ = 0xf0 | (src >> 18);
+ *dst++ = 0x80 | ((src >> 12) & 0x3f);
+ *dst++ = 0x80 | ((src >> 6) & 0x3f);
+ *dst = 0x80 | (src & 0x3f);
+
+ this.length_ += 4;
+ }
+
+ private size_t insertWideChar(C)(auto ref const C chr) @trusted
+ if (is(C == wchar) || is(C == dchar))
+ in
+ {
+ assert(capacity - length >= C.sizeof);
+ }
+ body
+ {
+ auto dst = this.data + length;
+ if (chr < 0x80)
+ {
+ *dst = chr & 0x7f;
+ this.length_ += 1;
+ return 1;
+ }
+ else if (chr < 0x800)
+ {
+ *dst++ = 0xc0 | (chr >> 6) & 0xff;
+ *dst = 0x80 | (chr & 0x3f);
+ this.length_ += 2;
+ return 2;
+ }
+ else if (chr < 0xd800 || chr - 0xe000 < 0x2000)
+ {
+ *dst++ = 0xe0 | (chr >> 12) & 0xff;
+ *dst++ = 0x80 | ((chr >> 6) & 0x3f);
+ *dst = 0x80 | (chr & 0x3f);
+ this.length_ += 3;
+ return 3;
+ }
+ return 0;
+ }
+
+ /**
+ * Inserts a single character at the end of the string.
+ *
+ * Params:
+ * chr = The character should be inserted.
+ *
+ * Returns: The number of bytes inserted.
+ *
+ * Throws: $(D_PSYMBOL UTFException).
+ */
+ size_t insertBack(const char chr) @trusted @nogc
+ {
+ if ((chr & 0x80) != 0)
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-8 character");
+ }
+ reserve(length + 1);
+
+ *(data + length) = chr;
+ ++this.length_;
+
+ return 1;
+ }
+
+ /// Ditto.
+ size_t insertBack(const wchar chr) @trusted @nogc
+ {
+ reserve(length + wchar.sizeof);
+
+ auto ret = insertWideChar(chr);
+ if (ret == 0)
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
+ }
+ return ret;
+ }
+
+ /// Ditto.
+ size_t insertBack(const dchar chr) @trusted @nogc
+ {
+ reserve(length + dchar.sizeof);
+
+ auto ret = insertWideChar(chr);
+ if (ret > 0)
+ {
+ return ret;
+ }
+ else if (chr - 0x10000 < 0x100000)
+ {
+ write4Bytes(chr);
+ return 4;
+ }
+ else
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-32 sequeunce");
+ }
+ }
+
+ /**
+ * Inserts a stringish range at the end of the string.
+ *
+ * Params:
+ * R = Type of the inserted string.
+ * str = String should be inserted.
+ *
+ * Returns: The number of bytes inserted.
+ *
+ * Throws: $(D_PSYMBOL UTFException).
+ */
+ size_t insertBack(R)(R str) @trusted
+ if (!isInfinite!R
+ && isInputRange!R
+ && is(Unqual!(ElementEncodingType!R) == char))
+ {
+ size_t size;
+ static if (hasLength!R || isNarrowString!R)
+ {
+ size = str.length + length;
+ reserve(size);
+ }
+
+ static if (isNarrowString!R)
+ {
+ str.copy(this.data[length .. size]);
+ this.length_ = size;
+ return str.length;
+ }
+ else
+ {
+ size_t insertedLength;
+ while (!str.empty)
+ {
+ ubyte expectedLength;
+ if ((str.front & 0x80) == 0x00)
+ {
+ expectedLength = 1;
+ }
+ else if ((str.front & 0xe0) == 0xc0)
+ {
+ expectedLength = 2;
+ }
+ else if ((str.front & 0xf0) == 0xe0)
+ {
+ expectedLength = 3;
+ }
+ else if ((str.front & 0xf8) == 0xf0)
+ {
+ expectedLength = 4;
+ }
+ else
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-8 sequeunce");
+ }
+ size = length + expectedLength;
+ reserve(size);
+
+ for (; expectedLength > 0; --expectedLength)
+ {
+ if (str.empty)
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-8 sequeunce");
+ }
+ *(data + length) = str.front;
+ str.popFront();
+ }
+ insertedLength += expectedLength;
+ this.length_ = size;
+ }
+ return insertedLength;
+ }
+ }
+
+ /// Ditto.
+ size_t insertBack(R)(R str) @trusted
+ if (!isInfinite!R
+ && isInputRange!R
+ && is(Unqual!(ElementEncodingType!R) == wchar))
+ {
+ static if (hasLength!R || isNarrowString!R)
+ {
+ reserve(length + str.length * wchar.sizeof);
+ }
+
+ static if (isNarrowString!R)
+ {
+ const(wchar)[] range = str;
+ }
+ else
+ {
+ alias range = str;
+ }
+
+ auto oldLength = length;
+
+ while (!range.empty)
+ {
+ reserve(length + 4);
+
+ auto ret = insertWideChar(range.front);
+ if (ret > 0)
+ {
+ range.popFront();
+ }
+ else if (range.front - 0xd800 < 2048)
+ { // Surrogate pair.
+ static if (isNarrowString!R)
+ {
+ if (range.length < 2 || range[1] - 0xdc00 >= 0x400)
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
+ }
+ dchar d = (range[0] - 0xd800) | ((range[1] - 0xdc00) >> 10);
+
+ range.popFront(2);
+ }
+ else
+ {
+ dchar d = range.front - 0xd800;
+ range.popFront();
+
+ if (range.empty || range.front - 0xdc00 >= 0x400)
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
+ }
+ d |= (range.front - 0xdc00) >> 10;
+
+ range.popFront();
+ }
+ write4Bytes(d);
+ }
+ else
+ {
+ throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
+ }
+ }
+ return this.length_ - oldLength;
+ }
+
+ /// Ditto.
+ size_t insertBack(R)(R str) @trusted
+ if (!isInfinite!R
+ && isInputRange!R
+ && is(Unqual!(ElementEncodingType!R) == dchar))
+ {
+ static if (hasLength!R || isSomeString!R)
+ {
+ reserve(length + str.length * 4);
+ }
+
+ size_t insertedLength;
+ foreach (const dchar c; str)
+ {
+ insertedLength += insertBack(c);
+ }
+ return insertedLength;
+ }
+
+ /// Ditto.
+ alias insert = insertBack;
+
/**
* Reserves $(D_PARAM size) bytes for the string.
*
@@ -450,7 +751,7 @@ struct String
}
/**
- * Returns: Byte length.
+ * Returns: The number of code units that are required to encode the string.
*/
@property size_t length() const pure nothrow @safe @nogc
{
@@ -498,17 +799,36 @@ struct String
* Returns: Random access range that iterates over the string by bytes, in
* forward order.
*/
- ByteRange!char opIndex() pure nothrow @trusted @nogc
+ ByCodeUnit!char opIndex() pure nothrow @trusted @nogc
{
return typeof(return)(this, this.data, this.data + length);
}
/// Ditto.
- ByteRange!(const char) opIndex() const pure nothrow @trusted @nogc
+ ByCodeUnit!(const char) opIndex() const pure nothrow @trusted @nogc
{
return typeof(return)(this, this.data, this.data + length);
}
+ ///
+ unittest
+ {
+ auto s = String("Plutarchus");
+ auto r = s[];
+ assert(r.front == 'P');
+ assert(r.back == 's');
+
+ r.popFront();
+ assert(r.front == 'l');
+ assert(r.back == 's');
+
+ r.popBack();
+ assert(r.front == 'l');
+ assert(r.back == 'u');
+
+ assert(r.length == 8);
+ }
+
/**
* Returns: $(D_KEYWORD true) if the vector is empty.
*/
@@ -518,47 +838,67 @@ struct String
}
/**
- * Returns: The first byte.
+ * Params:
+ * i = Slice start.
+ * j = Slice end.
+ *
+ * Returns: A range that iterates over the string by bytes from
+ * index $(D_PARAM i) up to (excluding) index $(D_PARAM j).
*
- * Precondition: $(D_INLINECODE !empty).
+ * Precondition: $(D_INLINECODE i <= j && j <= length).
*/
- @property ref inout(char) front() inout pure nothrow @safe @nogc
+ ByCodeUnit!char opSlice(const size_t i, const size_t j)
+ pure nothrow @trusted @nogc
in
{
- assert(!empty);
+ assert(i <= j);
+ assert(j <= length);
}
body
{
- return *this.data;
+ return typeof(return)(this, this.data + i, this.data + j);
}
- ///
- @safe unittest
- {
- auto s = String("Vladimir Soloviev");
- assert(s.front == 'V');
- }
-
- /**
- * Returns: The last byte.
- *
- * Precondition: $(D_INLINECODE !empty).
- */
- @property ref inout(char) back() inout pure nothrow @trusted @nogc
+ /// Ditto.
+ ByCodeUnit!(const char) opSlice(const size_t i, const size_t j)
+ const pure nothrow @trusted @nogc
in
{
- assert(!empty);
+ assert(i <= j);
+ assert(j <= length);
}
body
{
- return *(this.data + length - 1);
+ return typeof(return)(this, this.data + i, this.data + j);
}
///
unittest
{
- auto s = String("Caesar");
- assert(s.back == 'r');
+ auto s = String("Vladimir Soloviev");
+ auto r = s[9 .. $];
+
+ assert(r.front == 'S');
+ assert(r.back == 'v');
+
+ r.popFront();
+ r.popBack();
+ assert(r.front == 'o');
+ assert(r.back == 'e');
+
+ r.popFront();
+ r.popBack();
+ assert(r.front == 'l');
+ assert(r.back == 'i');
+
+ r.popFront();
+ r.popBack();
+ assert(r.front == 'o');
+ assert(r.back == 'v');
+
+ r.popFront();
+ r.popBack();
+ assert(r.empty);
}
mixin DefaultAllocator;