Make real2String more readable

2017-11-12 13:02:22 +01:00 · 2017-11-12 13:02:22 +01:00 · a38242d0ac
commit a38242d0ac
parent a84c71f26d
1 changed files with 541 additions and 0 deletions
--- a/source/tanya/format/package.d
+++ b/source/tanya/format/package.d
@ -14,9 +14,16 @@
 */
 module tanya.format;

+import tanya.container.string;
 public import tanya.format.conv;
+import tanya.math;
+import tanya.memory.op;
+import tanya.meta.metafunction;
 import tanya.meta.trait;

+// Integer and floating point to string conversion is based on stb_sprintf
+// written by Jeff Roberts.
+
 // Returns the last part of buffer with converted number.
 package(tanya) char[] integral2String(T)(T number, return ref char[21] buffer)
 if (isIntegral!T)
@ -93,3 +100,537 @@ if (isIntegral!T)
    assert(integral2String(uint.max, buf) == "4294967295");
    assert(integral2String(int.min, buf) == "-2147483648");
 }
+
+/*
+ * Double-double high-precision floating point number.
+ *
+ * The first element is a base value corresponding to the nearest approximation
+ * of the target $(D_PSYMBOL HP) value, and the second element is an offset
+ * value corresponding to the difference between the target value and the base.
+ * Thus, the $(D_PSYMBOL HP) value represented is the sum of the base and the
+ * offset.
+ */
+private struct HP
+{
+    private double base;
+    private double offset = 0.0;
+
+    private void normalize() @nogc nothrow pure @safe
+    {
+        const double target = this.base + this.offset;
+        this.offset -= target - this.base;
+        this.base = target;
+    }
+
+    private void multiply(ref const HP x, ref const HP y)
+    @nogc nothrow pure @safe
+    {
+        double ahi, bhi;
+        long bt;
+        this.base = x.base * y.base;
+        copyFp(x.base, bt);
+        bt &= (~cast(ulong) 0) << 27;
+        copyFp(bt, ahi);
+
+        double alo = x.base - ahi;
+        copyFp(y.base, bt);
+        bt &= (~cast(ulong) 0) << 27;
+        copyFp(bt, bhi);
+
+        double blo = y.base - bhi;
+        this.offset = ahi * bhi - this.base + ahi * blo + alo * bhi + alo * blo;
+        this.offset += x.base * y.offset + x.offset * y.base;
+    }
+}
+
+private enum int special = 0x7000;
+
+private static const ulong[20] powersOf10 = [
+    1,
+    10,
+    100,
+    1000,
+    10000,
+    100000,
+    1000000,
+    10000000,
+    100000000,
+    1000000000,
+    10000000000UL,
+    100000000000UL,
+    1000000000000UL,
+    10000000000000UL,
+    100000000000000UL,
+    1000000000000000UL,
+    10000000000000000UL,
+    100000000000000000UL,
+    1000000000000000000UL,
+    10000000000000000000UL,
+];
+
+private static const char[201] digitPairs =
+    "0001020304050607080910111213141516171819202122232425262728293031323334353"
+  ~ "6373839404142434445464748495051525354555657585960616263646566676869707172"
+  ~ "737475767778798081828384858687888990919293949596979899";
+
+private static const HP[23] bottom = [
+    HP(1e+000), HP(1e+001), HP(1e+002), HP(1e+003), HP(1e+004), HP(1e+005),
+    HP(1e+006), HP(1e+007), HP(1e+008), HP(1e+009), HP(1e+010), HP(1e+011),
+    HP(1e+012), HP(1e+013), HP(1e+014), HP(1e+015), HP(1e+016), HP(1e+017),
+    HP(1e+018), HP(1e+019), HP(1e+020), HP(1e+021), HP(1e+022),
+];
+
+private static const HP[22] negativeBottom = [
+    HP(1e-001, -5.551115123125783e-018),
+    HP(1e-002, -2.0816681711721684e-019),
+    HP(1e-003, -2.0816681711721686e-020),
+    HP(1e-004, -4.7921736023859299e-021),
+    HP(1e-005, -8.1803053914031305e-022),
+    HP(1e-006, 4.5251888174113741e-023),
+    HP(1e-007, 4.5251888174113739e-024),
+    HP(1e-008, -2.0922560830128471e-025),
+    HP(1e-009, -6.2281591457779853e-026),
+    HP(1e-010, -3.6432197315497743e-027),
+    HP(1e-011, 6.0503030718060191e-028),
+    HP(1e-012, 2.0113352370744385e-029),
+    HP(1e-013, -3.0373745563400371e-030),
+    HP(1e-014, 1.1806906454401013e-032),
+    HP(1e-015, -7.7705399876661076e-032),
+    HP(1e-016, 2.0902213275965398e-033),
+    HP(1e-017, -7.1542424054621921e-034),
+    HP(1e-018, -7.1542424054621926e-035),
+    HP(1e-019, 2.4754073164739869e-036),
+    HP(1e-020, 5.4846728545790429e-037),
+    HP(1e-021, 9.2462547772103625e-038),
+    HP(1e-022, -4.8596774326570872e-039),
+];
+
+private static const HP[13] top = [
+    HP(1e+023, 8388608),
+    HP(1e+046, 6.8601809640529717e+028),
+    HP(1e+069, -7.253143638152921e+052),
+    HP(1e+092, -4.3377296974619174e+075),
+    HP(1e+115, -1.5559416129466825e+098),
+    HP(1e+138, -3.2841562489204913e+121),
+    HP(1e+161, -3.7745893248228135e+144),
+    HP(1e+184, -1.7356668416969134e+167),
+    HP(1e+207, -3.8893577551088374e+190),
+    HP(1e+230, -9.9566444326005119e+213),
+    HP(1e+253, 6.3641293062232429e+236),
+    HP(1e+276, -5.2069140800249813e+259),
+    HP(1e+299, -5.2504760255204387e+282),
+];
+
+private static const HP[13] negativeTop = [
+    HP(1e-023, 3.9565301985100693e-040L),
+    HP(1e-046, -2.299904345391321e-063L),
+    HP(1e-069, 3.6506201437945798e-086L),
+    HP(1e-092, 1.1875228833981544e-109L),
+    HP(1e-115, -5.0644902316928607e-132L),
+    HP(1e-138, -6.7156837247865426e-155L),
+    HP(1e-161, -2.812077463003139e-178L),
+    HP(1e-184, -5.7778912386589953e-201L),
+    HP(1e-207, 7.4997100559334532e-224L),
+    HP(1e-230, -4.6439668915134491e-247L),
+    HP(1e-253, -6.3691100762962136e-270L),
+    HP(1e-276, -9.436808465446358e-293L),
+    HP(1e-299, 8.0970921678014997e-317L),
+];
+
+/*
+ * Copies double into long and back bitwise.
+ */
+private void copyFp(T, U)(ref const U src, ref T dest) @trusted
+if (T.sizeof == U.sizeof)
+{
+    copy((&src)[0 .. 1], (&dest)[0 .. 1]);
+}
+
+// Power can be -323 to +350.
+private HP raise2Power10(const HP value, int power)
+@nogc nothrow pure @safe
+{
+    HP result;
+    if ((power >= 0) && (power <= 22))
+    {
+        result.multiply(value, bottom[power]);
+    }
+    else
+    {
+        HP p2;
+        int e = power;
+
+        if (power < 0)
+        {
+            e = -e;
+        }
+        int et = (e * 0x2c9) >> 14; // % 23
+        if (et > 13)
+        {
+            et = 13;
+        }
+        int eb = e - (et * 23);
+
+        result = value;
+        if (power < 0)
+        {
+            if (eb != 0)
+            {
+                --eb;
+                result.multiply(value, negativeBottom[eb]);
+            }
+            if (et)
+            {
+                result.normalize();
+                --et;
+                p2.multiply(result, negativeTop[et]);
+                result = p2;
+            }
+        }
+        else
+        {
+            if (eb != 0)
+            {
+                e = eb;
+                if (eb > 22)
+                {
+                    eb = 22;
+                }
+                e -= eb;
+                result.multiply(value, bottom[eb]);
+                if (e)
+                {
+                    result.normalize();
+                    p2.multiply(result, bottom[e]);
+                    result = p2;
+                }
+            }
+            if (et != 0)
+            {
+                result.normalize();
+                --et;
+                p2.multiply(result, top[et]);
+                result = p2;
+            }
+        }
+    }
+    result.normalize();
+
+    return result;
+}
+
+/*
+ * Given a float value, returns the significant bits in bits, and the position
+ * of the decimal point in $(D_PARAM decimalPos). +/-Inf and NaN are specified
+ * by special values returned in the $(D_PARAM decimalPos). Sing bit is set in
+ * $(D_PARAM sign).
+ */
+private const(char)[] real2String(double value,
+                                  ref char[512] buffer,
+                                  out int exponent,
+                                  out bool sign) @nogc nothrow pure @trusted
+{
+    long bits;
+    copyFp(value, bits);
+
+    exponent = (bits >> 52) & 0x7ff;
+    sign = signBit(value);
+    if (sign)
+    {
+        value = -value;
+    }
+
+    if (exponent == 2047) // Is NaN or Inf?
+    {
+        exponent = special;
+        return (bits & ((1UL << 52) - 1)) != 0 ? "NaN" : "Inf";
+    }
+
+    if (exponent == 0) // Is zero or denormal?
+    {
+        if ((bits << 1) == 0) // Zero.
+        {
+            exponent = 1;
+            buffer[0] = '0';
+            return buffer[0 .. 1];
+        }
+
+        // Find the right exponent for denormals.
+        for (long cursor = 1UL << 51; (bits & cursor) == 0; cursor >>= 1)
+        {
+            --exponent;
+        }
+    }
+
+    // "617 / 2048" and "1233 / 4096" are estimations for the common logarithm
+    // (log10) of 2. Multiplied by a binary number it tells how big the number
+    // is in decimals, so it translates the binary exponent into decimal
+    // format. The estimation is tweaked to hit or undershoot by no more than
+    // 1 of log10 of all exponents 1..2046.
+    int tens = exponent - 1023; // Bias.
+    if (tens < 0)
+    {
+        tens = tens * 617 / 2048;
+    }
+    else
+    {
+        tens = tens * 1233 / 4096 + 1;
+    }
+
+    // Move the significant bits into position and stick them into an int.
+    HP p = raise2Power10(HP(value), 18 - tens);
+
+    // Get full as much precision from double-double as possible.
+    bits = cast(long) p.base;
+    double vh = cast(double) bits;
+    double ahi = p.base - vh;
+    double t = ahi - p.base;
+    double alo = p.base - ahi + t - vh - t;
+    bits += cast(long) (ahi + alo + p.offset);
+
+    // Check if we undershot (bits >= 10 ^ 19).
+    if ((cast(ulong) bits) >= 1000000000000000000UL)
+    {
+        ++tens;
+    }
+
+    // Now do the rounding in integer land.
+    enum uint fracDigits = 6;
+
+    uint dg = 1;
+    if ((cast(ulong) bits) >= powersOf10[9])
+    {
+        dg = 10;
+    }
+    uint length;
+    while ((cast(ulong) bits) >= powersOf10[dg])
+    {
+        ++dg;
+        if (dg == 20)
+        {
+            goto NoRound;
+        }
+    }
+    if (fracDigits < dg)
+    {
+        // Add 0.5 at the right position and round.
+        length = dg - fracDigits;
+        if (length >= 24)
+        {
+            goto NoRound;
+        }
+        ulong r = powersOf10[length];
+        bits = bits + (r / 2);
+        if ((cast(ulong) bits) >= powersOf10[dg])
+        {
+            ++tens;
+        }
+        bits /= r;
+    }
+NoRound:
+
+    // Kill long trailing runs of zeros.
+    if (bits)
+    {
+        for (;;)
+        {
+            if (bits <= 0xffffffff)
+            {
+                break;
+            }
+            if (bits % 1000)
+            {
+                goto Zeroed;
+            }
+            bits /= 1000;
+        }
+        auto n = cast(uint) bits;
+        while ((n % 1000) == 0)
+        {
+            n /= 1000;
+        }
+        bits = n;
+    }
+Zeroed:
+
+    // Convert to string.
+    auto result = buffer.ptr + 64;
+    length = 0;
+    while (true)
+    {
+        uint n;
+        char* o = result - 8;
+        // Do the conversion in chunks of U32s (avoid most 64-bit divides,
+        // worth it, constant denomiators be damned).
+        if (bits >= 100000000)
+        {
+            n = cast(uint) (bits % 100000000);
+            bits /= 100000000;
+        }
+        else
+        {
+            n = cast(uint) bits;
+            bits = 0;
+        }
+        while (n)
+        {
+            result -= 2;
+            *cast(ushort*) result = *cast(ushort*) &digitPairs[(n % 100) * 2];
+            n /= 100;
+            length += 2;
+        }
+        if (bits == 0)
+        {
+            if ((length != 0) && (result[0] == '0'))
+            {
+                ++result;
+                --length;
+            }
+            break;
+        }
+        for (; result !is o; ++length, --result)
+        {
+            *result = '0';
+        }
+    }
+    exponent = tens;
+    return result[0 .. length];
+}
+
+package(tanya) String format(string fmt, Args...)(Args args)
+{
+    String ret;
+
+    foreach (spec; ParseFmt!(fmt, 0))
+    {
+        static if (is(typeof(spec) == string))
+        {
+            ret.insertBack(spec);
+        }
+        else static if (isSomeChar!(Args[0]))
+        {
+            ret.insertBack(args[0]);
+        }
+        else static if (isSomeString!(Args[0]))
+        {
+            if (args[0] is null)
+            {
+                ret.insertBack("null");
+            }
+            else
+            {
+                ret.insertBack(args[0]);
+            }
+        }
+        else static if (isIntegral!(Args[0]))
+        {
+            char[21] buffer = void;
+            ret.insertBack(integral2String(args[0], buffer));
+        }
+        else static if (isFloatingPoint!(Args[0]))
+        {
+            int precision = 6;
+            bool negative;
+            int decimalPos;
+            uint l;
+            char[512] num;
+
+            // Read the double into a string
+            const(char)[] sn = real2String(args[0], num, decimalPos, negative);
+        }
+        else
+        {
+            static assert(false, "Unable to format " ~ Args[0].stringof);
+        }
+    }
+
+    return ret;
+}
+
+@nogc pure @system unittest
+{
+    // Format without arguments.
+    assert(format!""() == "");
+    assert(format!"asdfqweryxcvz"() == "asdfqweryxcvz");
+
+    // String printing.
+    assert(format!"{}"('c') == "c"); // char.
+    assert(format!"{}"('с') == "с"); // wchar.
+
+    assert(format!"{}"("Протопи ты мне баньку по-белому!")
+                    == "Протопи ты мне баньку по-белому!");
+    assert(format!"{}"(cast(string) null) == "null");
+
+    // Integer conversions.
+    assert(format!"{}"(8) == "8");
+    assert(format!"{}"(8U) == "8");
+    assert(format!"{}"(-8) == "-8");
+    assert(format!"{}"(-8L) == "-8");
+
+    // Floating point conversions.
+    auto f = format!"{}"(1.2345);
+}
+
+private struct FormatSpec
+{
+}
+
+// Returns the position of `tag` in `fmt`. If `tag` can't be found, returns the
+// length of  `fmt`.
+private size_t specPosition(string fmt, char tag)()
+{
+    foreach (i, c; fmt)
+    {
+        if (c == tag)
+        {
+            return i;
+        }
+    }
+    return fmt.length;
+}
+
+private template ParseFmt(string fmt, size_t pos = 0)
+{
+    static if (fmt.length == 0)
+    {
+        alias ParseFmt = AliasSeq!();
+    }
+    else static if (fmt[0] == '{')
+    {
+        static if (fmt.length > 1 && fmt[1] == '{')
+        {
+            enum size_t pos = specPosition!(fmt[2 .. $], '{') + 2;
+            alias ParseFmt = AliasSeq!(fmt[1 .. pos],
+                                       ParseFmt!(fmt[pos .. $], pos));
+        }
+        else
+        {
+            enum size_t pos = specPosition!(fmt[1 .. $], '}') + 1;
+            static if (pos < fmt.length)
+            {
+                alias ParseFmt = AliasSeq!(FormatSpec(),
+                                           ParseFmt!(fmt[pos + 1 .. $], pos + 1));
+            }
+            else
+            {
+                static assert(false, "Enclosing '}' is missing");
+            }
+        }
+    }
+    else
+    {
+        enum size_t pos = specPosition!(fmt, '{');
+        alias ParseFmt = AliasSeq!(fmt[0 .. pos],
+                                   ParseFmt!(fmt[pos .. $], pos));
+    }
+}
+
+@nogc nothrow pure @safe unittest
+{
+    static assert(ParseFmt!"".length == 0);
+
+    static assert(ParseFmt!"asdf".length == 1);
+    static assert(ParseFmt!"asdf"[0] == "asdf");
+
+    static assert(ParseFmt!"{}".length == 1);
+}