Add missing constructors to the String

This commit is contained in:
Eugen Wissner 2017-04-10 08:10:08 +02:00
parent 6436ad49df
commit 47ef787353
1 changed files with 480 additions and 140 deletions

View File

@ -12,13 +12,35 @@
*/
module tanya.container.string;
import core.checkedint;
import core.exception;
import std.algorithm.comparison;
import std.algorithm.mutation;
import std.range;
import std.traits;
import tanya.memory;
private ref const(wchar) front(const wchar[] str)
pure nothrow @safe @nogc
in
{
assert(str.length > 0);
}
body
{
return str[0];
}
private void popFront(ref const(wchar)[] str, const size_t s = 1)
pure nothrow @safe @nogc
in
{
assert(str.length >= s);
}
body
{
str = str[s .. $];
}
/**
* Thrown on encoding errors.
*/
@ -41,12 +63,12 @@ class UTFException : Exception
}
/**
* Byte range.
* Iterates $(D_PSYMBOL String) by UTF-8 code unit.
*
* Params:
* E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))).
*/
struct ByteRange(E)
struct ByCodeUnit(E)
if (is(Unqual!E == char))
{
private E* begin, end;
@ -77,7 +99,7 @@ struct ByteRange(E)
@disable this();
@property ByteRange save()
@property ByCodeUnit save()
{
return this;
}
@ -144,17 +166,17 @@ struct ByteRange(E)
return *(this.begin + i);
}
ByteRange opIndex()
ByCodeUnit opIndex()
{
return typeof(return)(*this.container, this.begin, this.end);
}
ByteRange!(const E) opIndex() const
ByCodeUnit!(const E) opIndex() const
{
return typeof(return)(*this.container, this.begin, this.end);
}
ByteRange opSlice(const size_t i, const size_t j) @trusted
ByCodeUnit opSlice(const size_t i, const size_t j) @trusted
in
{
assert(i <= j);
@ -165,7 +187,7 @@ struct ByteRange(E)
return typeof(return)(*this.container, this.begin + i, this.begin + j);
}
ByteRange!(const E) opSlice(const size_t i, const size_t j) const @trusted
ByCodeUnit!(const E) opSlice(const size_t i, const size_t j) const @trusted
in
{
assert(i <= j);
@ -180,7 +202,6 @@ struct ByteRange(E)
{
return this.begin[0 .. length];
}
}
/**
@ -192,13 +213,16 @@ struct String
private char* data;
private size_t capacity_;
invariant
pure nothrow @safe @nogc invariant
{
assert(this.length_ <= this.capacity_);
}
/**
* Constructs the string from a stringish range.
*
* Params:
* R = String type.
* str = Initial string.
* allocator = Allocator.
*
@ -206,127 +230,79 @@ struct String
*
* Precondition: $(D_INLINECODE allocator is null).
*/
this(const char[] str, shared Allocator allocator = defaultAllocator)
@trusted @nogc
this(R)(const R str, shared Allocator allocator = defaultAllocator)
if (!isInfinite!R
&& isInputRange!R
&& isSomeChar!(ElementEncodingType!R))
{
this(allocator);
reserve(str.length);
this.length_ = str.length;
str.copy(this.data[0 .. this.length_]);
}
/// Ditto.
this(const wchar[] str, shared Allocator allocator = defaultAllocator)
@trusted @nogc
{
this(allocator);
reserve(str.length * 2);
size_t s;
auto sourceLength = str.length;
for (auto c = str.ptr; sourceLength != 0; ++c, --sourceLength)
{
if (length - s < 5) // More space required.
{
bool overflow;
auto size = addu(length, str.length, overflow);
assert(!overflow);
reserve(size);
}
if (*c < 0x80)
{
this.data[s++] = *c & 0x7f;
this.length_ += 1;
}
else if (*c < 0x800)
{
this.data[s++] = 0xc0 | (*c >> 6) & 0xff;
this.data[s++] = 0x80 | (*c & 0x3f);
this.length_ += 2;
}
else if (*c < 0xd800 || *c - 0xe000 < 0x2000)
{
this.data[s++] = 0xe0 | (*c >> 12) & 0xff;
this.data[s++] = 0x80 | ((*c >> 6) & 0x3f);
this.data[s++] = 0x80 | (*c & 0x3f);
this.length_ += 3;
}
else if ((*c - 0xd800) < 2048 && sourceLength > 0 && *(c + 1) - 0xdc00 < 0x400)
{ // Surrogate pair
dchar d = (*c - 0xd800) | ((*c++ - 0xdc00) >> 10);
this.data[s++] = 0xf0 | (d >> 18);
this.data[s++] = 0x80 | ((d >> 12) & 0x3f);
this.data[s++] = 0x80 | ((d >> 6) & 0x3f);
this.data[s++] = 0x80 | (d & 0x3f);
--sourceLength;
this.length_ += 4;
}
else
{
throw defaultAllocator.make!UTFException("Wrong UTF-16 sequeunce");
}
}
insertBack(str);
}
///
unittest
@safe @nogc unittest
{
auto s = String("\u10437"w);
assert("\u10437" == s.get());
}
/// Ditto.
this(const dchar[] str, shared Allocator allocator = defaultAllocator)
@trusted @nogc
{
this(allocator);
reserve(str.length * 4);
auto s = data;
foreach (c; str)
{
if (c < 0x80)
{
*s++ = c & 0x7f;
this.length_ += 1;
}
else if (c < 0x800)
{
*s++ = 0xc0 | (c >> 6) & 0xff;
*s++ = 0x80 | (c & 0x3f);
this.length_ += 2;
}
else if (c < 0xd800 || c - 0xe000 < 0x2000)
{
*s++ = 0xe0 | (c >> 12) & 0xff;
*s++ = 0x80 | ((c >> 6) & 0x3f);
*s++ = 0x80 | (c & 0x3f);
this.length_ += 3;
}
else if (c - 0x10000 < 0x100000)
{
*s++ = 0xf0 | (c >> 18);
*s++ = 0x80 | ((c >> 12) & 0x3f);
*s++ = 0x80 | ((c >> 6) & 0x3f);
*s++ = 0x80 | (c & 0x3f);
this.length_ += 4;
}
else
{
throw defaultAllocator.make!UTFException("Wrong UTF-32 sequeunce");
}
}
}
///
unittest
@safe @nogc unittest
{
auto s = String("Отказаться от вина - в этом страшная вина."d);
assert("Отказаться от вина - в этом страшная вина." == s.get());
}
/**
* Initializes this string from another one.
*
* If $(D_PARAM init) is passed by value, it won't be copied, but moved.
* If the allocator of ($D_PARAM init) matches $(D_PARAM allocator),
* $(D_KEYWORD this) will just take the ownership over $(D_PARAM init)'s
* storage, otherwise, the storage will be allocated with
* $(D_PARAM allocator). $(D_PARAM init) will be destroyed at the end.
*
* If $(D_PARAM init) is passed by reference, it will be copied.
*
* Params:
* init = Source string.
* allocator = Allocator.
*
* Precondition: $(D_INLINECODE allocator is null).
*/
this(String init, shared Allocator allocator = defaultAllocator)
nothrow @trusted @nogc
{
this(allocator);
if (allocator !is init.allocator)
{
// Just steal all references and the allocator.
this.data = init.data;
this.length_ = init.length_;
this.capacity_ = init.capacity_;
// Reset the source string, so it can't destroy the moved storage.
init.length_ = init.capacity_ = 0;
init.data = null;
}
else
{
reserve(init.length);
init.data[0 .. init.length].copy(this.data[0 .. init.length]);
this.length_ = init.length;
}
}
/// Ditto.
this(ref const String init, shared Allocator allocator = defaultAllocator)
nothrow @trusted @nogc
{
this(allocator);
reserve(init.length);
init.data[0 .. init.length].copy(this.data[0 .. init.length]);
this.length_ = init.length;
}
/// Ditto.
this(shared Allocator allocator) pure nothrow @safe @nogc
in
@ -338,6 +314,59 @@ struct String
this.allocator_ = allocator;
}
/**
* Fills the string with $(D_PARAM n) consecutive copies of character $(D_PARAM chr).
*
* Params:
* C = Type of the character to fill the string with.
* n = Number of characters to copy.
* chr = Character to fill the string with.
*/
this(C)(const size_t n, const C chr,
shared Allocator allocator = defaultAllocator) @trusted
if (isSomeChar!C)
{
this(allocator);
if (n == 0)
{
return;
}
insertBack(chr);
// insertBack should validate the character, so we can just copy it
// n - 1 times.
auto remaining = length * n;
reserve(remaining);
// Use a quick copy.
for (auto i = this.length_ * 2; i <= remaining; i *= 2)
{
this.data[0 .. this.length_].copy(this.data[this.length_ .. i]);
this.length_ = i;
}
remaining -= length;
copy(this.data[this.length_ - remaining .. this.length_],
this.data[this.length_ .. this.length_ + remaining]);
this.length_ += remaining;
}
private unittest
{
{
auto s = String(1, 'О');
assert(s.length == 2);
}
{
auto s = String(3, 'О');
assert(s.length == 6);
}
{
auto s = String(8, 'О');
assert(s.length == 16);
}
}
/**
* Destroys the string.
*/
@ -346,6 +375,278 @@ struct String
allocator.deallocate(this.data[0 .. this.capacity_]);
}
private void write4Bytes(ref const dchar src)
pure nothrow @trusted @nogc
in
{
assert(capacity - length >= 4);
assert(src - 0x10000 < 0x100000);
}
body
{
auto dst = this.data + length;
*dst++ = 0xf0 | (src >> 18);
*dst++ = 0x80 | ((src >> 12) & 0x3f);
*dst++ = 0x80 | ((src >> 6) & 0x3f);
*dst = 0x80 | (src & 0x3f);
this.length_ += 4;
}
private size_t insertWideChar(C)(auto ref const C chr) @trusted
if (is(C == wchar) || is(C == dchar))
in
{
assert(capacity - length >= C.sizeof);
}
body
{
auto dst = this.data + length;
if (chr < 0x80)
{
*dst = chr & 0x7f;
this.length_ += 1;
return 1;
}
else if (chr < 0x800)
{
*dst++ = 0xc0 | (chr >> 6) & 0xff;
*dst = 0x80 | (chr & 0x3f);
this.length_ += 2;
return 2;
}
else if (chr < 0xd800 || chr - 0xe000 < 0x2000)
{
*dst++ = 0xe0 | (chr >> 12) & 0xff;
*dst++ = 0x80 | ((chr >> 6) & 0x3f);
*dst = 0x80 | (chr & 0x3f);
this.length_ += 3;
return 3;
}
return 0;
}
/**
* Inserts a single character at the end of the string.
*
* Params:
* chr = The character should be inserted.
*
* Returns: The number of bytes inserted.
*
* Throws: $(D_PSYMBOL UTFException).
*/
size_t insertBack(const char chr) @trusted @nogc
{
if ((chr & 0x80) != 0)
{
throw defaultAllocator.make!UTFException("Invalid UTF-8 character");
}
reserve(length + 1);
*(data + length) = chr;
++this.length_;
return 1;
}
/// Ditto.
size_t insertBack(const wchar chr) @trusted @nogc
{
reserve(length + wchar.sizeof);
auto ret = insertWideChar(chr);
if (ret == 0)
{
throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
}
return ret;
}
/// Ditto.
size_t insertBack(const dchar chr) @trusted @nogc
{
reserve(length + dchar.sizeof);
auto ret = insertWideChar(chr);
if (ret > 0)
{
return ret;
}
else if (chr - 0x10000 < 0x100000)
{
write4Bytes(chr);
return 4;
}
else
{
throw defaultAllocator.make!UTFException("Invalid UTF-32 sequeunce");
}
}
/**
* Inserts a stringish range at the end of the string.
*
* Params:
* R = Type of the inserted string.
* str = String should be inserted.
*
* Returns: The number of bytes inserted.
*
* Throws: $(D_PSYMBOL UTFException).
*/
size_t insertBack(R)(R str) @trusted
if (!isInfinite!R
&& isInputRange!R
&& is(Unqual!(ElementEncodingType!R) == char))
{
size_t size;
static if (hasLength!R || isNarrowString!R)
{
size = str.length + length;
reserve(size);
}
static if (isNarrowString!R)
{
str.copy(this.data[length .. size]);
this.length_ = size;
return str.length;
}
else
{
size_t insertedLength;
while (!str.empty)
{
ubyte expectedLength;
if ((str.front & 0x80) == 0x00)
{
expectedLength = 1;
}
else if ((str.front & 0xe0) == 0xc0)
{
expectedLength = 2;
}
else if ((str.front & 0xf0) == 0xe0)
{
expectedLength = 3;
}
else if ((str.front & 0xf8) == 0xf0)
{
expectedLength = 4;
}
else
{
throw defaultAllocator.make!UTFException("Invalid UTF-8 sequeunce");
}
size = length + expectedLength;
reserve(size);
for (; expectedLength > 0; --expectedLength)
{
if (str.empty)
{
throw defaultAllocator.make!UTFException("Invalid UTF-8 sequeunce");
}
*(data + length) = str.front;
str.popFront();
}
insertedLength += expectedLength;
this.length_ = size;
}
return insertedLength;
}
}
/// Ditto.
size_t insertBack(R)(R str) @trusted
if (!isInfinite!R
&& isInputRange!R
&& is(Unqual!(ElementEncodingType!R) == wchar))
{
static if (hasLength!R || isNarrowString!R)
{
reserve(length + str.length * wchar.sizeof);
}
static if (isNarrowString!R)
{
const(wchar)[] range = str;
}
else
{
alias range = str;
}
auto oldLength = length;
while (!range.empty)
{
reserve(length + 4);
auto ret = insertWideChar(range.front);
if (ret > 0)
{
range.popFront();
}
else if (range.front - 0xd800 < 2048)
{ // Surrogate pair.
static if (isNarrowString!R)
{
if (range.length < 2 || range[1] - 0xdc00 >= 0x400)
{
throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
}
dchar d = (range[0] - 0xd800) | ((range[1] - 0xdc00) >> 10);
range.popFront(2);
}
else
{
dchar d = range.front - 0xd800;
range.popFront();
if (range.empty || range.front - 0xdc00 >= 0x400)
{
throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
}
d |= (range.front - 0xdc00) >> 10;
range.popFront();
}
write4Bytes(d);
}
else
{
throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
}
}
return this.length_ - oldLength;
}
/// Ditto.
size_t insertBack(R)(R str) @trusted
if (!isInfinite!R
&& isInputRange!R
&& is(Unqual!(ElementEncodingType!R) == dchar))
{
static if (hasLength!R || isSomeString!R)
{
reserve(length + str.length * 4);
}
size_t insertedLength;
foreach (const dchar c; str)
{
insertedLength += insertBack(c);
}
return insertedLength;
}
/// Ditto.
alias insert = insertBack;
/**
* Reserves $(D_PARAM size) bytes for the string.
*
@ -450,7 +751,7 @@ struct String
}
/**
* Returns: Byte length.
* Returns: The number of code units that are required to encode the string.
*/
@property size_t length() const pure nothrow @safe @nogc
{
@ -498,17 +799,36 @@ struct String
* Returns: Random access range that iterates over the string by bytes, in
* forward order.
*/
ByteRange!char opIndex() pure nothrow @trusted @nogc
ByCodeUnit!char opIndex() pure nothrow @trusted @nogc
{
return typeof(return)(this, this.data, this.data + length);
}
/// Ditto.
ByteRange!(const char) opIndex() const pure nothrow @trusted @nogc
ByCodeUnit!(const char) opIndex() const pure nothrow @trusted @nogc
{
return typeof(return)(this, this.data, this.data + length);
}
///
unittest
{
auto s = String("Plutarchus");
auto r = s[];
assert(r.front == 'P');
assert(r.back == 's');
r.popFront();
assert(r.front == 'l');
assert(r.back == 's');
r.popBack();
assert(r.front == 'l');
assert(r.back == 'u');
assert(r.length == 8);
}
/**
* Returns: $(D_KEYWORD true) if the vector is empty.
*/
@ -518,47 +838,67 @@ struct String
}
/**
* Returns: The first byte.
* Params:
* i = Slice start.
* j = Slice end.
*
* Precondition: $(D_INLINECODE !empty).
* Returns: A range that iterates over the string by bytes from
* index $(D_PARAM i) up to (excluding) index $(D_PARAM j).
*
* Precondition: $(D_INLINECODE i <= j && j <= length).
*/
@property ref inout(char) front() inout pure nothrow @safe @nogc
ByCodeUnit!char opSlice(const size_t i, const size_t j)
pure nothrow @trusted @nogc
in
{
assert(!empty);
assert(i <= j);
assert(j <= length);
}
body
{
return *this.data;
return typeof(return)(this, this.data + i, this.data + j);
}
///
@safe unittest
{
auto s = String("Vladimir Soloviev");
assert(s.front == 'V');
}
/**
* Returns: The last byte.
*
* Precondition: $(D_INLINECODE !empty).
*/
@property ref inout(char) back() inout pure nothrow @trusted @nogc
/// Ditto.
ByCodeUnit!(const char) opSlice(const size_t i, const size_t j)
const pure nothrow @trusted @nogc
in
{
assert(!empty);
assert(i <= j);
assert(j <= length);
}
body
{
return *(this.data + length - 1);
return typeof(return)(this, this.data + i, this.data + j);
}
///
unittest
{
auto s = String("Caesar");
assert(s.back == 'r');
auto s = String("Vladimir Soloviev");
auto r = s[9 .. $];
assert(r.front == 'S');
assert(r.back == 'v');
r.popFront();
r.popBack();
assert(r.front == 'o');
assert(r.back == 'e');
r.popFront();
r.popBack();
assert(r.front == 'l');
assert(r.back == 'i');
r.popFront();
r.popBack();
assert(r.front == 'o');
assert(r.back == 'v');
r.popFront();
r.popBack();
assert(r.empty);
}
mixin DefaultAllocator;