tanya/source/tanya/container/string.d

1668 lines
40 KiB
D
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/**
* UTF-8 encoded string.
*
* You can create a $(D_PSYMBOL String) from a literal string, single character
* or character range. Characters can be of the type $(D_KEYWORD char),
* $(D_KEYWORD wchar) or $(D_KEYWORD dchar). Literal strings, characters and
* character ranges can be also inserted into an existing string.
*
* $(D_PSYMBOL String) is always valid UTF-8. Inserting an invalid sequence
* or working on a corrupted $(D_PSYMBOL String) causes
* $(D_PSYMBOL UTFException) to be thrown.
*
* Internally $(D_PSYMBOL String) is represented by a sequence of
* $(D_KEYWORD char)s.
*
* Copyright: Eugene Wissner 2017-2018.
* License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
* Mozilla Public License, v. 2.0).
* Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
* Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/container/string.d,
* tanya/container/string.d)
*/
module tanya.container.string;
import std.algorithm.mutation : bringToFront;
import tanya.algorithm.comparison;
import tanya.algorithm.mutation;
import tanya.hash.lookup;
import tanya.memory;
import tanya.meta.trait;
import tanya.meta.transform;
import tanya.range.array;
import tanya.range.primitive;
version (unittest)
{
import tanya.test.assertion;
}
/**
* Thrown on encoding errors.
*/
class UTFException : Exception
{
/**
* Params:
* msg = The message for the exception.
* file = The file where the exception occurred.
* line = The line number where the exception occurred.
* next = The previous exception in the chain of exceptions, if any.
*/
this(string msg,
string file = __FILE__,
size_t line = __LINE__,
Throwable next = null) @nogc @safe pure nothrow
{
super(msg, file, line, next);
}
}
/**
* Iterates $(D_PSYMBOL String) by UTF-8 code unit.
*
* Params:
* E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))).
*/
struct ByCodeUnit(E)
if (is(Unqual!E == char))
{
private E* begin, end;
private alias ContainerType = CopyConstness!(E, String);
private ContainerType* container;
invariant
{
assert(this.begin <= this.end);
assert(this.container !is null);
assert(this.begin >= this.container.data);
assert(this.end <= this.container.data + this.container.length);
}
private this(ref ContainerType container, E* begin, E* end) @trusted
in
{
assert(begin <= end);
assert(begin >= container.data);
assert(end <= container.data + container.length);
}
do
{
this.container = &container;
this.begin = begin;
this.end = end;
}
@disable this();
@property ByCodeUnit save()
{
return this;
}
@property bool empty() const
{
return this.begin == this.end;
}
@property size_t length() const
{
return this.end - this.begin;
}
alias opDollar = length;
@property ref inout(E) front() inout
in
{
assert(!empty);
}
do
{
return *this.begin;
}
@property ref inout(E) back() inout @trusted
in
{
assert(!empty);
}
do
{
return *(this.end - 1);
}
void popFront() @trusted
in
{
assert(!empty);
}
do
{
++this.begin;
}
void popBack() @trusted
in
{
assert(!empty);
}
do
{
--this.end;
}
ref inout(E) opIndex(const size_t i) inout @trusted
in
{
assert(i < length);
}
do
{
return *(this.begin + i);
}
ByCodeUnit opIndex()
{
return typeof(return)(*this.container, this.begin, this.end);
}
ByCodeUnit!(const E) opIndex() const
{
return typeof(return)(*this.container, this.begin, this.end);
}
ByCodeUnit opSlice(const size_t i, const size_t j) @trusted
in
{
assert(i <= j);
assert(j <= length);
}
do
{
return typeof(return)(*this.container, this.begin + i, this.begin + j);
}
ByCodeUnit!(const E) opSlice(const size_t i, const size_t j) const @trusted
in
{
assert(i <= j);
assert(j <= length);
}
do
{
return typeof(return)(*this.container, this.begin + i, this.begin + j);
}
inout(E)[] get() inout @trusted
{
return this.begin[0 .. length];
}
}
/**
* Iterates $(D_PSYMBOL String) by UTF-8 code point.
*
* Params:
* E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))).
*/
struct ByCodePoint(E)
if (is(Unqual!E == char))
{
private E* begin, end;
private alias ContainerType = CopyConstness!(E, String);
private ContainerType* container;
invariant
{
assert(this.begin <= this.end);
assert(this.container !is null);
assert(this.begin >= this.container.data);
assert(this.end <= this.container.data + this.container.length);
}
private this(ref ContainerType container, E* begin, E* end) @trusted
in
{
assert(begin <= end);
assert(begin >= container.data);
assert(end <= container.data + container.length);
}
do
{
this.container = &container;
this.begin = begin;
this.end = end;
}
@disable this();
@property ByCodePoint save()
{
return this;
}
@property bool empty() const
{
return this.begin == this.end;
}
@property dchar front() const @trusted
in
{
assert(!empty);
}
out (chr)
{
assert(chr < 0xd800 || chr > 0xdfff);
}
do
{
dchar chr;
ubyte units;
int mask;
const(char)* it = this.begin;
if (*it & 0x80)
{
mask = 0xe0;
for (units = 2; ((*it << units) & 0x80) != 0; ++units)
{
mask = (mask >> 1) | 0x80;
}
}
if (this.begin + units > end || units > 4)
{
throw defaultAllocator.make!UTFException("Invalid UTF-8 character");
}
chr = *it++ & ~mask;
for (; units > 1; --units)
{
chr = (chr << 6) | (*it++ & 0x3f);
}
return chr;
}
void popFront() @trusted
in
{
assert(!empty);
}
do
{
ubyte units;
if ((*begin & 0xf0) == 0xf0)
{
units = 4;
}
else if ((*begin & 0xe0) == 0xe0)
{
units = 3;
}
else if ((*begin & 0xc0) == 0xc0)
{
units = 2;
}
else if ((*begin & 0x80) == 0)
{
units = 1;
}
if (units == 0 || this.begin + units > this.end)
{
throw defaultAllocator.make!UTFException("Invalid UTF-8 character");
}
this.begin += units;
}
ByCodePoint opIndex()
{
return typeof(return)(*this.container, this.begin, this.end);
}
ByCodePoint!(const E) opIndex() const
{
return typeof(return)(*this.container, this.begin, this.end);
}
}
/**
* UTF-8 string.
*/
struct String
{
private size_t length_;
private char* data;
private size_t capacity_;
pure nothrow @safe @nogc invariant
{
assert(this.length_ <= this.capacity_);
}
/**
* Constructs the string from a stringish range.
*
* Params:
* S = String type.
* str = Initial string.
* allocator = Allocator.
*
* Throws: $(D_PSYMBOL UTFException).
*
* Precondition: $(D_INLINECODE allocator is null).
*/
this(S)(const S str, shared Allocator allocator = defaultAllocator)
if (!isInfinite!S
&& isInputRange!S
&& isSomeChar!(ElementType!S))
{
this(allocator);
insertBack(str);
}
///
@nogc pure @safe unittest
{
auto s = String("\u10437"w);
assert(s == "\u10437");
}
///
@nogc pure @safe unittest
{
auto s = String("Отказаться от вина - в этом страшная вина."d);
assert(s == "Отказаться от вина - в этом страшная вина.");
}
/**
* Initializes this string from another one.
*
* If $(D_PARAM init) is passed by value, it won't be copied, but moved.
* If the allocator of ($D_PARAM init) matches $(D_PARAM allocator),
* $(D_KEYWORD this) will just take the ownership over $(D_PARAM init)'s
* storage, otherwise, the storage will be allocated with
* $(D_PARAM allocator). $(D_PARAM init) will be destroyed at the end.
*
* If $(D_PARAM init) is passed by reference, it will be copied.
*
* Params:
* S = Source string type.
* init = Source string.
* allocator = Allocator.
*
* Precondition: $(D_INLINECODE allocator is null).
*/
this(S)(S init, shared Allocator allocator = defaultAllocator) @trusted
if (is(S == String))
{
this(allocator);
if (allocator !is init.allocator)
{
// Just steal all references and the allocator.
this.data = init.data;
this.length_ = init.length_;
this.capacity_ = init.capacity_;
// Reset the source string, so it can't destroy the moved storage.
init.length_ = init.capacity_ = 0;
init.data = null;
}
else
{
reserve(init.length);
init.data[0 .. init.length].copy(this.data[0 .. init.length]);
this.length_ = init.length;
}
}
/// ditto
this(S)(ref S init, shared Allocator allocator = defaultAllocator) @trusted
if (is(Unqual!S == String))
{
this(allocator);
reserve(init.length);
init.data[0 .. init.length].copy(this.data[0 .. init.length]);
this.length_ = init.length;
}
/// ditto
this(shared Allocator allocator) @nogc nothrow pure @safe
in
{
assert(allocator !is null);
}
do
{
this.allocator_ = allocator;
}
/**
* Fills the string with $(D_PARAM n) consecutive copies of character $(D_PARAM chr).
*
* Params:
* C = Type of the character to fill the string with.
* n = Number of characters to copy.
* chr = Character to fill the string with.
* allocator = Allocator.
*/
this(C)(const size_t n,
const C chr,
shared Allocator allocator = defaultAllocator) @trusted
if (isSomeChar!C)
{
this(allocator);
if (n == 0)
{
return;
}
insertBack(chr);
// insertBack should validate the character, so we can just copy it
// n - 1 times.
auto remaining = length * n;
reserve(remaining);
// Use a quick copy.
for (auto i = this.length_ * 2; i <= remaining; i *= 2)
{
this.data[0 .. this.length_].copy(this.data[this.length_ .. i]);
this.length_ = i;
}
remaining -= length;
copy(this.data[this.length_ - remaining .. this.length_],
this.data[this.length_ .. this.length_ + remaining]);
this.length_ += remaining;
}
///
@nogc pure @safe unittest
{
{
auto s = String(1, 'О');
assert(s.length == 2);
}
{
auto s = String(3, 'О');
assert(s.length == 6);
}
{
auto s = String(8, 'О');
assert(s.length == 16);
}
}
@nogc nothrow pure @safe unittest
{
auto s = String(0, 'K');
assert(s.length == 0);
}
this(this) @nogc nothrow pure @trusted
{
auto buf = this.data[0 .. this.length_];
this.length_ = capacity_ = 0;
this.data = null;
insertBack(buf);
}
/**
* Destroys the string.
*/
~this() @nogc nothrow pure @trusted
{
allocator.resize(this.data[0 .. this.capacity_], 0);
}
private void write4Bytes(ref const dchar src)
@nogc nothrow pure @trusted
in
{
assert(capacity - length >= 4);
assert(src - 0x10000 < 0x100000);
}
do
{
auto dst = this.data + length;
*dst++ = 0xf0 | (src >> 18);
*dst++ = 0x80 | ((src >> 12) & 0x3f);
*dst++ = 0x80 | ((src >> 6) & 0x3f);
*dst = 0x80 | (src & 0x3f);
this.length_ += 4;
}
private size_t insertWideChar(C)(auto ref const C chr) @trusted
if (is(C == wchar) || is(C == dchar))
in
{
assert(capacity - length >= 3);
}
do
{
auto dst = this.data + length;
if (chr < 0x80)
{
*dst = chr & 0x7f;
this.length_ += 1;
return 1;
}
else if (chr < 0x800)
{
*dst++ = 0xc0 | (chr >> 6) & 0xff;
*dst = 0x80 | (chr & 0x3f);
this.length_ += 2;
return 2;
}
else if (chr < 0xd800 || (chr >= 0xe000 && chr <= 0xffff))
{
*dst++ = 0xe0 | (chr >> 12) & 0xff;
*dst++ = 0x80 | ((chr >> 6) & 0x3f);
*dst = 0x80 | (chr & 0x3f);
this.length_ += 3;
return 3;
}
return 0;
}
/**
* Inserts a single character at the end of the string.
*
* Params:
* chr = The character should be inserted.
*
* Returns: The number of bytes inserted (1).
*/
size_t insertBack(char chr) @nogc nothrow pure @trusted
{
reserve(length + 1);
*(data + length) = chr;
++this.length_;
return 1;
}
/// ditto
size_t insertBack(const wchar chr) @nogc pure @trusted
{
reserve(length + 3);
const ret = insertWideChar(chr);
if (ret == 0)
{
throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
}
return ret;
}
// Allocates enough space for 3-byte character.
@nogc pure @safe unittest
{
String s;
s.insertBack('\u8100');
}
/// ditto
size_t insertBack(const dchar chr) @nogc pure @trusted
{
reserve(length + dchar.sizeof);
const ret = insertWideChar(chr);
if (ret > 0)
{
return ret;
}
else if (chr - 0x10000 < 0x100000)
{
write4Bytes(chr);
return 4;
}
else
{
throw defaultAllocator.make!UTFException("Invalid UTF-32 sequeunce");
}
}
@nogc pure @safe unittest
{
assertThrown!UTFException(() => String(1, cast(dchar) 0xd900));
assertThrown!UTFException(() => String(1, cast(wchar) 0xd900));
}
/**
* Inserts a stringish range at the end of the string.
*
* Params:
* R = Type of the inserted string.
* str = String should be inserted.
*
* Returns: The number of bytes inserted.
*/
size_t insertBack(R)(R str) @trusted
if (!isInfinite!R
&& isInputRange!R
&& is(Unqual!(ElementType!R) == char))
{
size_t size;
static if (hasLength!R || isNarrowString!R)
{
size = str.length + length;
reserve(size);
}
static if (isNarrowString!R)
{
str.copy(this.data[length .. size]);
this.length_ = size;
return str.length;
}
else static if (isInstanceOf!(ByCodeUnit, R))
{
str.get.copy(this.data[length .. size]);
this.length_ = size;
return str.length;
}
else
{
size_t insertedLength;
foreach (c; str)
{
insertedLength += insertBack(c);
}
return insertedLength;
}
}
/// ditto
size_t insertBack(R)(R str) @trusted
if (!isInfinite!R
&& isInputRange!R
&& is(Unqual!(ElementType!R) == wchar))
{
static if (hasLength!R || isNarrowString!R)
{
reserve(length + str.length * wchar.sizeof);
}
static if (isNarrowString!R)
{
const(wchar)[] range = str;
}
else
{
alias range = str;
}
auto oldLength = length;
while (!range.empty)
{
reserve(length + wchar.sizeof * 2);
auto ret = insertWideChar(range.front);
if (ret > 0)
{
range.popFront();
}
else if (range.front - 0xd800 < 2048)
{ // Surrogate pair.
static if (isNarrowString!R)
{
if (range.length < 2 || range[1] - 0xdc00 >= 0x400)
{
throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
}
dchar d = (range[0] - 0xd800) | ((range[1] - 0xdc00) >> 10);
popFrontN(range, 2);
}
else
{
dchar d = range.front - 0xd800;
range.popFront();
if (range.empty || range.front - 0xdc00 >= 0x400)
{
throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
}
d |= (range.front - 0xdc00) >> 10;
range.popFront();
}
write4Bytes(d);
}
else
{
throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
}
}
return this.length_ - oldLength;
}
/// ditto
size_t insertBack(R)(R str) @trusted
if (!isInfinite!R
&& isInputRange!R
&& is(Unqual!(ElementType!R) == dchar))
{
static if (hasLength!R || isSomeString!R)
{
reserve(length + str.length * 4);
}
size_t insertedLength;
foreach (const dchar c; str)
{
insertedLength += insertBack(c);
}
return insertedLength;
}
/// ditto
alias insert = insertBack;
/**
* Reserves $(D_PARAM size) bytes for the string.
*
* If $(D_PARAM size) is less than or equal to the $(D_PSYMBOL capacity), the
* function call does not cause a reallocation and the string capacity is not
* affected.
*
* Params:
* size = Desired size in bytes.
*/
void reserve(const size_t size) @nogc nothrow pure @trusted
{
if (this.capacity_ >= size)
{
return;
}
this.data = allocator.resize(this.data[0 .. this.capacity_], size).ptr;
this.capacity_ = size;
}
///
@nogc nothrow pure @safe unittest
{
String s;
assert(s.capacity == 0);
s.reserve(3);
assert(s.capacity == 3);
s.reserve(3);
assert(s.capacity == 3);
s.reserve(1);
assert(s.capacity == 3);
}
/**
* Requests the string to reduce its capacity to fit the $(D_PARAM size).
*
* The request is non-binding. The string won't become smaller than the
* string byte length.
*
* Params:
* size = Desired size.
*/
void shrink(const size_t size) @nogc nothrow pure @trusted
{
if (this.capacity_ <= size)
{
return;
}
const n = max(this.length_, size);
void[] buf = this.data[0 .. this.capacity_];
if (allocator.reallocate(buf, n))
{
this.capacity_ = n;
this.data = cast(char*) buf;
}
}
///
@nogc nothrow pure @safe unittest
{
auto s = String("Die Alten lasen laut.");
assert(s.capacity == 21);
s.reserve(30);
s.shrink(25);
assert(s.capacity == 25);
s.shrink(18);
assert(s.capacity == 21);
s.shrink(22);
assert(s.capacity == 21);
}
/**
* Returns: String capacity in bytes.
*/
@property size_t capacity() const @nogc nothrow pure @safe
{
return this.capacity_;
}
///
@nogc nothrow pure @safe unittest
{
auto s = String("In allem Schreiben ist Schamlosigkeit.");
assert(s.capacity == 38);
}
/**
* Slicing assignment.
*
* Params:
* R = $(D_KEYWORD char).
* value = Assigned character, range or string.
* i = Slice start.
* j = Slice end.
*
* Returns: Slice with the assigned part of the string.
*
* Precondition: $(D_INLINECODE i <= j && j <= length
* && value.length == j - i)
*/
ByCodeUnit!char opSliceAssign(R)(ByCodeUnit!R value,
const size_t i,
const size_t j)
if (is(Unqual!R == char))
in
{
assert(i <= j);
assert(j <= length);
assert(j - i == value.length);
}
do
{
auto target = opSlice(i, j);
copy(value, target);
return target;
}
/// ditto
ByCodeUnit!char opSliceAssign(const char[] value,
const size_t i,
const size_t j)
@nogc nothrow pure @trusted
in
{
assert(i <= j);
assert(j <= length);
}
do
{
copy(value[], this.data[i .. j]);
return opSlice(i, j);
}
/// ditto
ByCodeUnit!char opSliceAssign(const char value,
const size_t i,
const size_t j)
@nogc nothrow pure @trusted
in
{
assert(i <= j);
assert(j <= length);
}
do
{
for (auto p = this.data + i; p < this.data + j; ++p)
{
*p = value;
}
return opSlice(i, j);
}
/**
* Returns an array used internally by the string.
* The length of the returned array may be smaller than the size of the
* reserved memory for the string.
*
* Returns: The array representing the string.
*/
inout(char)[] get() inout @nogc nothrow pure @trusted
{
return this.data[0 .. this.length_];
}
///
@nogc nothrow pure @safe unittest
{
auto s = String("Char array.");
assert(s.get().length == 11);
}
/**
* Returns null-terminated string. The returned string is managed by this
* object and shouldn't be freed.
*
* Returns: Null-terminated string.
*/
const(char)* toStringz() @nogc nothrow pure @system
{
reserve(length + 1);
this.data[length] = '\0';
return this.data;
}
///
@nogc nothrow pure @system unittest
{
auto s = String("C string.");
assert(s.toStringz()[0] == 'C');
assert(s.toStringz()[9] == '\0');
}
/**
* Returns: The number of code units that are required to encode the string.
*/
@property size_t length() const @nogc nothrow pure @safe
{
return this.length_;
}
///
alias opDollar = length;
///
@nogc nothrow pure @safe unittest
{
auto s = String("Piscis primuin a capite foetat.");
assert(s.length == 31);
assert(s[$ - 1] == '.');
}
/**
* Params:
* pos = Position.
*
* Returns: Byte at $(D_PARAM pos).
*
* Precondition: $(D_INLINECODE length > pos).
*/
ref inout(char) opIndex(const size_t pos) inout @nogc nothrow pure @trusted
in
{
assert(length > pos);
}
do
{
return *(this.data + pos);
}
///
@nogc nothrow pure @safe unittest
{
auto s = String("Alea iacta est.");
assert(s[0] == 'A');
assert(s[4] == ' ');
}
/**
* Returns: Random access range that iterates over the string by bytes, in
* forward order.
*/
ByCodeUnit!char opIndex() @nogc nothrow pure @trusted
{
return typeof(return)(this, this.data, this.data + length);
}
/// ditto
ByCodeUnit!(const char) opIndex() const pure nothrow @trusted @nogc
{
return typeof(return)(this, this.data, this.data + length);
}
///
@nogc nothrow pure @safe unittest
{
auto s = String("Plutarchus");
auto r = s[];
assert(r.front == 'P');
assert(r.back == 's');
r.popFront();
assert(r.front == 'l');
assert(r.back == 's');
r.popBack();
assert(r.front == 'l');
assert(r.back == 'u');
assert(r.length == 8);
}
///
@nogc nothrow pure @safe unittest
{
auto s = const String("Was ich vermag, soll gern geschehen. Goethe");
auto r1 = s[];
assert(r1.front == 'W');
auto r2 = r1[];
r1.popFront();
assert(r1.front == 'a');
assert(r2.front == 'W');
}
/**
* Returns: Forward range that iterates over the string by code points.
*/
ByCodePoint!char byCodePoint() @nogc nothrow pure @trusted
{
return typeof(return)(this, this.data, this.data + length);
}
/// ditto
ByCodePoint!(const char) byCodePoint() const @nogc nothrow pure @trusted
{
return typeof(return)(this, this.data, this.data + length);
}
///
@nogc pure @safe unittest
{
auto s = String("Мне есть, что спеть, представ перед Всевышним.");
auto cp = s.byCodePoint();
assert(cp.front == 'М');
cp.popFront();
assert(cp.front == 'н');
s = String("€");
cp = s.byCodePoint();
assert(cp.front == '€');
assert(s.length == 3);
s = String("\U00024B62");
cp = s.byCodePoint();
assert(cp.front == '\U00024B62');
assert(s.length == 4);
}
///
@nogc pure @safe unittest
{
auto s = const String("Высоцкий");
auto cp1 = s.byCodePoint();
assert(cp1.front == 'В');
auto cp2 = cp1[];
cp1.popFront();
assert(cp1.front == 'ы');
assert(cp2.front == 'В');
cp2 = cp1.save();
cp1.popFront();
assert(cp1.front == 'с');
assert(cp2.front == 'ы');
}
/**
* Returns whether the string is empty.
*
* Returns: $(D_KEYWORD true) if the string is empty, $(D_KEYWORD false)
* otherwise.
*/
@property bool empty() const @nogc nothrow pure @safe
{
return length == 0;
}
///
@nogc nothrow pure @safe unittest
{
String s;
assert(s.empty);
s.insertBack('K');
assert(!s.empty);
}
/**
* Params:
* i = Slice start.
* j = Slice end.
*
* Returns: A range that iterates over the string by bytes from
* index $(D_PARAM i) up to (excluding) index $(D_PARAM j).
*
* Precondition: $(D_INLINECODE i <= j && j <= length).
*/
ByCodeUnit!char opSlice(const size_t i, const size_t j)
@nogc nothrow pure @trusted
in
{
assert(i <= j);
assert(j <= length);
}
do
{
return typeof(return)(this, this.data + i, this.data + j);
}
/// ditto
ByCodeUnit!(const char) opSlice(const size_t i, const size_t j)
const @nogc nothrow pure @trusted
in
{
assert(i <= j);
assert(j <= length);
}
do
{
return typeof(return)(this, this.data + i, this.data + j);
}
///
@nogc nothrow pure @safe unittest
{
auto s = String("Vladimir Soloviev");
auto r = s[9 .. $];
assert(r.front == 'S');
assert(r.back == 'v');
r.popFront();
r.popBack();
assert(r.front == 'o');
assert(r.back == 'e');
r.popFront();
r.popBack();
assert(r.front == 'l');
assert(r.back == 'i');
r.popFront();
r.popBack();
assert(r.front == 'o');
assert(r.back == 'v');
r.popFront();
r.popBack();
assert(r.empty);
}
/**
* Assigns another string.
*
* If $(D_PARAM that) is passed by value, it won't be copied, but moved.
* This string will take the ownership over $(D_PARAM that)'s storage and
* the allocator.
*
* If $(D_PARAM that) is passed by reference, it will be copied.
*
* Params:
* S = Content type.
* that = The value should be assigned.
*
* Returns: $(D_KEYWORD this).
*/
ref String opAssign(S)(S that)
if (is(S == String))
{
swap(this.data, that.data);
swap(this.length_, that.length_);
swap(this.capacity_, that.capacity_);
swap(this.allocator_, that.allocator_);
return this;
}
/// ditto
ref String opAssign(S)(ref S that) @trusted
if (is(Unqual!S == String))
{
reserve(that.length);
that.data[0 .. that.length].copy(this.data[0 .. that.length]);
this.length_ = that.length;
return this;
}
///
@nogc nothrow pure @safe unittest
{
auto s = String("Черная, потом пропахшая выть!");
s = String("Как мне тебя не ласкать, не любить?");
}
/**
* Assigns a stringish range.
*
* Params:
* S = String type.
* that = Initial string.
*
* Returns: $(D_KEYWORD this).
*
* Throws: $(D_PSYMBOL UTFException).
*/
ref String opAssign(S)(S that)
if (!isInfinite!S
&& isInputRange!S
&& isSomeChar!(ElementType!S))
{
this.length_ = 0;
insertBack(that);
return this;
}
///
@nogc nothrow pure @safe unittest
{
auto s = String("Оловом светится лужная голь...");
s = "Грустная песня, ты - русская боль.";
assert(s == "Грустная песня, ты - русская боль.");
}
/**
* Comparison for equality.
*
* Params:
* S = Right hand side type.
* that = The string to compare with.
*
* Returns: A positive number if $(D_KEYWORD this) is lexicographically
* greater than $(D_PARAM that), if equal `0`, else `-1`.
*/
int opCmp(S)(auto ref S that) const @trusted
if (is(Unqual!S == String))
{
return compare(this.data[0 .. length], that.data[0 .. that.length]);
}
/// ditto
int opCmp(S)(ByCodeUnit!S that) const @trusted
if (is(Unqual!S == char))
{
return compare(this.data[0 .. length],
that.begin[0 .. that.end - that.begin]);
}
/// ditto
int opCmp(S)(ByCodePoint!S that) const @trusted
if (is(Unqual!S == char))
{
return compare(this.data[0 .. length],
that.begin[0 .. that.end - that.begin]);
}
/// ditto
int opCmp()(const char[] that) const @trusted
{
return compare(this.data[0 .. length], that);
}
///
@nogc nothrow pure @safe unittest
{
assert(String("Голубая кофта.") < String("Синие глаза."));
assert(String("Никакой я правды") < String("милой не сказал")[]);
}
/**
* Comparison for equality.
*
* Params:
* S = Right hand side type.
* that = The string to compare with.
*
* Returns: $(D_KEYWORD true) if the strings are equal, $(D_KEYWORD false)
* otherwise.
*/
bool opEquals(S)(auto ref S that) const @trusted
if (is(Unqual!S == String))
{
return equal(this.data[0 .. length], that.data[0 .. that.length]);
}
/**
* Comparison for equality.
*
* Params:
* that = Right hand side string range.
*
* Returns: $(D_KEYWORD true) if the string and the range are equal,
* $(D_KEYWORD false) otherwise.
*/
bool opEquals(S)(ByCodeUnit!S that) const @trusted
if (is(Unqual!S == char))
{
return equal(this.data[0 .. length],
that.begin[0 .. that.end - that.begin]);
}
/// ditto
bool opEquals(S)(ByCodePoint!S that) const @trusted
if (is(Unqual!S == char))
{
return equal(this.data[0 .. length],
that.begin[0 .. that.end - that.begin]);
}
/// ditto
bool opEquals()(const char[] that) const @trusted
{
return equal(this.data[0 .. length], that);
}
///
@nogc nothrow pure @safe unittest
{
assert(String("Милая спросила:") != String("Крутит ли метель?"));
assert(String("Затопить бы печку,") != String("постелить постель.")[]);
assert(const String("Я ответил милой:") != String("Нынче с высоты"));
assert(String("Кто-то осыпает") != "белые цветы");
assert(const String("Затопи ты печку,") != String("постели постель,")[]);
auto s = const String("У меня на сердце");
assert(s[] != String("без тебя метель."));
assert(s == s);
assert(s == s[]);
assert(s == "У меня на сердце");
}
/**
* Assigns a value to the character with the index $(D_PARAM pos).
*
* Params:
* value = Value.
* pos = Position.
*
* Returns: Assigned value.
*
* Precondition: $(D_INLINECODE length > pos).
*/
ref char opIndexAssign(const char value, const size_t pos)
@nogc nothrow pure @safe
{
return opIndex(pos) = value;
}
///
@nogc nothrow pure @safe unittest
{
auto s = String("alea iacta est.");
s[0] = 'A';
assert(s[0] == 'A');
}
/**
* Slicing assignment.
*
* Params:
* R = $(D_KEYWORD char).
* value = Assigned character, range or string.
*
* Returns: Range over the string.
*
* Precondition: $(D_INLINECODE length == value.length).
*/
ByCodeUnit!char opIndexAssign(R)(ByCodeUnit!R value)
if (is(Unqual!R == char))
{
return opSliceAssign(value, 0, length);
}
@nogc nothrow pure @safe unittest
{
auto s1 = String("Buttercup");
auto s2 = String("Cap");
s2[] = s1[6 .. $];
assert(s2 == "cup");
}
/// ditto
ByCodeUnit!char opIndexAssign(const char value) @nogc nothrow pure @safe
{
return opSliceAssign(value, 0, length);
}
@nogc nothrow pure @safe unittest
{
auto s1 = String("Wow");
s1[] = 'a';
assert(s1 == "aaa");
}
/// ditto
ByCodeUnit!char opIndexAssign(const char[] value) @nogc nothrow pure @safe
{
return opSliceAssign(value, 0, length);
}
@nogc nothrow pure @safe unittest
{
auto s1 = String("ö");
s1[] = "oe";
assert(s1 == "oe");
}
/**
* Remove all characters beloning to $(D_PARAM r).
*
* Params:
* R = $(D_PSYMBOL ByCodeUnit) or $(D_PSYMBOL ByCodePoint).
* r = Range originally obtained from this string.
*
* Returns: A range spanning the remaining characters in the string that
* initially were right after $(D_PARAM r).
*
* Precondition: $(D_PARAM r) refers to a region of $(D_KEYWORD this).
*/
R remove(R)(R r) @trusted
if (is(R == ByCodeUnit!char) || is(R == ByCodePoint!char))
in
{
assert(r.container is &this);
assert(r.begin >= this.data);
assert(r.end <= this.data + length);
}
do
{
auto end = this.data + this.length;
copy(ByCodeUnit!char(this, r.end, end), ByCodeUnit!char(this, r.begin, end));
this.length_ = length - (r.end - r.begin);
return R(this, r.begin, this.data + length);
}
///
@nogc pure @safe unittest
{
import tanya.algorithm.searching : count;
auto s = String("Из пословицы слова не выкинешь.");
assert(s.remove(s[5 .. 24]).length == 33);
assert(s == "Из слова не выкинешь.");
assert(s.length == 38);
auto byCodePoint = s.byCodePoint();
popFrontN(byCodePoint, 8);
assert(s.remove(byCodePoint).count == 0);
assert(s == "Из слова");
assert(s.remove(s[]).length == 0);
assert(s.length == 0);
assert(s.remove(s[]).length == 0);
}
/**
* Inserts $(D_PARAM el) before or after $(D_PARAM r).
*
* Params:
* R = $(D_PSYMBOL ByCodeUnit) or $(D_PSYMBOL ByCodePoint).
* T = Stringish type.
* r = Range originally obtained from this string.
* el = Value(s) should be inserted.
*
* Returns: The number of elements inserted.
*
* Precondition: $(D_PARAM r) refers to a region of $(D_KEYWORD this).
*/
size_t insertAfter(T, R)(R r, T el) @trusted
if ((isSomeChar!T || (!isInfinite!T
&& isInputRange!T
&& isSomeChar!(ElementType!T)))
&& (is(R == ByCodeUnit!char) || is(R == ByCodePoint!char)))
in
{
assert(r.container is &this);
assert(r.begin >= this.data);
assert(r.end <= this.data + length);
}
do
{
const oldLength = length;
const rangeEnd = r.end - this.data;
const inserted = insertBack(el);
auto containerEnd = this.data + oldLength;
bringToFront(ByCodeUnit!char(this, this.data + rangeEnd, containerEnd),
ByCodeUnit!char(this, containerEnd, this.data + length));
return inserted;
}
///
@nogc nothrow pure @safe unittest
{
auto s = String("Казнить нельзя помиловать.");
s.insertAfter(s[0 .. 27], ",");
assert(s == "Казнить нельзя, помиловать.");
s = String("Казнить нельзя помиловать.");
s.insertAfter(s[0 .. 14], ',');
assert(s == "Казнить, нельзя помиловать.");
}
///
size_t insertBefore(T, R)(R r, T el) @trusted
if ((isSomeChar!T || (!isInfinite!T
&& isInputRange!T
&& isSomeChar!(ElementType!T)))
&& (is(R == ByCodeUnit!char) || is(R == ByCodePoint!char)))
in
{
assert(r.container is &this);
assert(r.begin >= this.data);
assert(r.end <= this.data + length);
}
do
{
return insertAfter(R(this, this.data, r.begin), el);
}
///
@nogc nothrow pure @safe unittest
{
auto s = String("Казнить нельзя помиловать.");
s.insertBefore(s[27 .. $], ",");
assert(s == "Казнить нельзя, помиловать.");
s = String("Казнить нельзя помиловать.");
s.insertBefore(s[14 .. $], ',');
assert(s == "Казнить, нельзя помиловать.");
}
/**
* Calculates the hash value for the string.
*
* Returns: Hash value for the string.
*/
size_t toHash() const @nogc nothrow pure @safe
{
return hash(get);
}
mixin DefaultAllocator;
}
// Postblit works
@nogc nothrow pure @safe unittest
{
void internFunc(String arg)
{
}
void middleFunc(S...)(S args)
{
foreach (arg; args)
{
internFunc(arg);
}
}
void topFunc(String args)
{
middleFunc(args);
}
topFunc(String("asdf"));
}
// Const range produces mutable ranges
@nogc pure @safe unittest
{
auto s = const String("И снизу лед, и сверху - маюсь между.");
{
const constRange = s[];
auto fromConstRange = constRange[];
fromConstRange.popFront();
assert(fromConstRange.front == s[1]);
fromConstRange = constRange[0 .. $];
fromConstRange.popFront();
assert(fromConstRange.front == s[1]);
assert(constRange.get() is s.get());
}
{
const constRange = s.byCodePoint();
auto fromConstRange = constRange[];
fromConstRange.popFront();
assert(fromConstRange.front == ' ');
}
}
// Can pop multibyte characters
@nogc pure @safe unittest
{
auto s = String("\U00024B62\U00002260");
auto range = s.byCodePoint();
range.popFront();
assert(!range.empty);
range.popFront();
assert(range.empty);
range = s.byCodePoint();
range.popFront();
s[$ - 3] = 0xf0;
assertThrown!UTFException(&(range.popFront));
}
// Inserts own char range correctly
@nogc nothrow pure @safe unittest
{
auto s1 = String(`ü`);
String s2;
s2.insertBack(s1[]);
assert(s1 == s2);
}