Fix inserting 3 byte wchar into String

* Fix inserting 3 byte wchar into String
* Improve documentation
This commit is contained in:
Eugen Wissner 2017-07-09 15:16:06 +02:00
parent 97358ebc6c
commit 7bdc778390

View File

@ -3,7 +3,19 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/** /**
* UTF-8 string. * UTF-8 encoded string.
*
* You can create a $(D_PSYMBOL String) from a literal string, single character
* or character range. Characters can be of the type $(D_KEYWORD char),
* $(D_KEYWORD wchar) or $(D_KEYWORD dchar). Literal strings, characters and
* character ranges can be also inserted into an existing string.
*
* $(D_PSYMBOL String) is always valid UTF-8. Inserting an invalid sequence
* or working on a corrupted $(D_PSYMBOL String) causes
* $(D_PSYMBOL UTFException) to be thrown.
*
* Internally $(D_PSYMBOL String) is represented by a sequence of
* $(D_KEYWORD char)s.
* *
* Copyright: Eugene Wissner 2017. * Copyright: Eugene Wissner 2017.
* License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/, * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
@ -70,7 +82,7 @@ class UTFException : Exception
* E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))). * E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))).
*/ */
struct ByCodeUnit(E) struct ByCodeUnit(E)
if (is(Unqual!E == char)) if (is(Unqual!E == char))
{ {
private E* begin, end; private E* begin, end;
private alias ContainerType = CopyConstness!(E, String); private alias ContainerType = CopyConstness!(E, String);
@ -212,7 +224,7 @@ struct ByCodeUnit(E)
* E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))). * E = Element type ($(D_KEYWORD char) or $(D_INLINECODE const(char))).
*/ */
struct ByCodePoint(E) struct ByCodePoint(E)
if (is(Unqual!E == char)) if (is(Unqual!E == char))
{ {
private E* begin, end; private E* begin, end;
private alias ContainerType = CopyConstness!(E, String); private alias ContainerType = CopyConstness!(E, String);
@ -537,7 +549,7 @@ struct String
if (is(C == wchar) || is(C == dchar)) if (is(C == wchar) || is(C == dchar))
in in
{ {
assert(capacity - length >= C.sizeof); assert(capacity - length >= 3);
} }
body body
{ {
@ -555,7 +567,7 @@ struct String
this.length_ += 2; this.length_ += 2;
return 2; return 2;
} }
else if (chr < 0xd800 || chr - 0xe000 < 0x2000) else if (chr < 0xd800 || (chr >= 0xe000 && chr <= 0xffff))
{ {
*dst++ = 0xe0 | (chr >> 12) & 0xff; *dst++ = 0xe0 | (chr >> 12) & 0xff;
*dst++ = 0x80 | ((chr >> 6) & 0x3f); *dst++ = 0x80 | ((chr >> 6) & 0x3f);
@ -593,9 +605,9 @@ struct String
/// Ditto. /// Ditto.
size_t insertBack(const wchar chr) @trusted @nogc size_t insertBack(const wchar chr) @trusted @nogc
{ {
reserve(length + wchar.sizeof); reserve(length + 3);
auto ret = insertWideChar(chr); const ret = insertWideChar(chr);
if (ret == 0) if (ret == 0)
{ {
throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce"); throw defaultAllocator.make!UTFException("Invalid UTF-16 sequeunce");
@ -603,12 +615,34 @@ struct String
return ret; return ret;
} }
// Allocates enough space for 3-byte character.
private @safe @nogc unittest
{
String s;
s.insertBack('\u8100');
}
private @safe @nogc unittest
{
UTFException exception;
try
{
auto s = String(1, cast(wchar) 0xd900);
}
catch (UTFException e)
{
exception = e;
}
assert(exception !is null);
defaultAllocator.dispose(exception);
}
/// Ditto. /// Ditto.
size_t insertBack(const dchar chr) @trusted @nogc size_t insertBack(const dchar chr) @trusted @nogc
{ {
reserve(length + dchar.sizeof); reserve(length + dchar.sizeof);
auto ret = insertWideChar(chr); const ret = insertWideChar(chr);
if (ret > 0) if (ret > 0)
{ {
return ret; return ret;
@ -624,6 +658,21 @@ struct String
} }
} }
private @safe @nogc unittest
{
UTFException exception;
try
{
auto s = String(1, cast(dchar) 0xd900);
}
catch (UTFException e)
{
exception = e;
}
assert(exception !is null);
defaultAllocator.dispose(exception);
}
/** /**
* Inserts a stringish range at the end of the string. * Inserts a stringish range at the end of the string.
* *