From 18d54b4b186c5a05b054a015c966950dfba68c19 Mon Sep 17 00:00:00 2001 From: Eugene Wissner Date: Sun, 29 Apr 2018 09:12:48 +0200 Subject: [PATCH] HashArray as an internal store for hash containers --- source/tanya/container/entry.d | 109 ++++++++++++++++++++++++---- source/tanya/container/set.d | 128 +++++++++------------------------ 2 files changed, 130 insertions(+), 107 deletions(-) diff --git a/source/tanya/container/entry.d b/source/tanya/container/entry.d index 9dc2eb0..8822924 100644 --- a/source/tanya/container/entry.d +++ b/source/tanya/container/entry.d @@ -14,6 +14,8 @@ */ module tanya.container.entry; +import tanya.algorithm.mutation; +import tanya.container.array; import tanya.meta.trait; import tanya.typecons; @@ -44,6 +46,9 @@ package enum BucketStatus : byte package struct Bucket(K, V = void) { + private alias Key = K; + private alias Value = V; + @property void key(ref K key) { this.key_ = key; @@ -108,18 +113,98 @@ package static immutable size_t[33] primes = [ 805306457, 1610612741, 3221225473 ]; -/* - * Returns bucket position for `hash`. `0` may mean the 0th position or an - * empty `buckets` array. - */ -package size_t locateBucket(T)(ref const T buckets, const size_t hash) +package struct HashArray(alias hasher, K, V = void) { - return buckets.length == 0 ? 0 : hash % buckets.length; -} + alias Bucket = .Bucket!(K, V); + alias Buckets = Array!Bucket; -package enum InsertStatus : byte -{ - found = -1, - failed = 0, - added = 1, + Array!Bucket array; + size_t lengthIndex; + + /* + * Returns bucket position for `hash`. `0` may mean the 0th position or an + * empty `buckets` array. + */ + size_t locateBucket(ref const K key) const + { + return this.array.length == 0 ? 0 : hasher(key) % this.array.length; + } + + /* + * Inserts the value in an empty or deleted bucket. If the value is + * already in there, does nothing and returns InsertStatus.found. If the + * hash array is full returns InsertStatus.failed. Otherwise, + * InsertStatus.added is returned. + */ + ref Bucket insert(ref K key) + { + while (true) + { + auto bucketPosition = locateBucket(key); + + foreach (ref e; this.array[bucketPosition .. $]) + { + if (e == key || e.status != BucketStatus.used) + { + return e; + } + } + + if (primes.length == (this.lengthIndex + 1)) + { + this.array.insertBack(Bucket(key)); + return this.array[$ - 1]; + } + if (this.rehashToSize(this.lengthIndex + 1)) + { + ++this.lengthIndex; + } + } + } + + // Takes an index in the primes array. + bool rehashToSize(const size_t n) + { + auto storage = typeof(this.array)(primes[n], this.array.allocator); + DataLoop: foreach (ref e1; this.array[]) + { + if (e1.status == BucketStatus.used) + { + auto bucketPosition = hasher(e1.key) % storage.length; + + foreach (ref e2; storage[bucketPosition .. $]) + { + if (e2.status != BucketStatus.used) // Insert the value. + { + e2 = e1; + continue DataLoop; + } + } + return false; // Rehashing failed. + } + } + move(storage, this.array); + return true; + } + + void rehash(const size_t n) + { + size_t lengthIndex; + for (; lengthIndex < primes.length; ++lengthIndex) + { + if (primes[lengthIndex] >= n) + { + break; + } + } + if (this.rehashToSize(lengthIndex)) + { + this.lengthIndex = lengthIndex; + } + } + + @property size_t capacity() const + { + return this.array.length; + } } diff --git a/source/tanya/container/set.d b/source/tanya/container/set.d index 772e9b4..7d7449c 100644 --- a/source/tanya/container/set.d +++ b/source/tanya/container/set.d @@ -16,7 +16,7 @@ module tanya.container.set; import tanya.algorithm.mutation; -import tanya.container; +import tanya.container.array; import tanya.container.entry; import tanya.hash.lookup; import tanya.memory; @@ -155,6 +155,10 @@ struct Range(E) struct Set(T, alias hasher = hash) if (is(typeof(hasher(T.init)) == size_t)) { + private HashArray!(hasher, T) data; + + private alias Buckets = typeof(this.data).Buckets; + /// The range types for $(D_PSYMBOL Set). alias Range = .Range!T; @@ -163,9 +167,9 @@ if (is(typeof(hasher(T.init)) == size_t)) invariant { - assert(this.lengthIndex < primes.length); - assert(this.data.length == 0 - || this.data.length == primes[this.lengthIndex]); + assert(this.data.lengthIndex < primes.length); + assert(this.data.array.length == 0 + || this.data.array.length == primes[this.data.lengthIndex]); } /** @@ -196,7 +200,7 @@ if (is(typeof(hasher(T.init)) == size_t)) } do { - this.data = typeof(this.data)(allocator); + this.data = typeof(this.data)(Buckets(allocator)); } /** @@ -218,7 +222,7 @@ if (is(typeof(hasher(T.init)) == size_t)) } do { - this.data = typeof(this.data)(init.data, allocator); + this.data = typeof(this.data)(Buckets(init.data, allocator)); } /// ditto @@ -230,7 +234,7 @@ if (is(typeof(hasher(T.init)) == size_t)) } do { - this.data = typeof(this.data)(move(init.data), allocator); + this.data = typeof(this.data)(Buckets(move(init.data), allocator)); this.lengthIndex = init.lengthIndex; init.lengthIndex = 0; } @@ -276,7 +280,7 @@ if (is(typeof(hasher(T.init)) == size_t)) } do { - return cast(shared Allocator) this.data.allocator; + return this.data.array.allocator; } /** @@ -290,7 +294,7 @@ if (is(typeof(hasher(T.init)) == size_t)) */ @property size_t capacity() const { - return this.data.length; + return this.data.capacity; } /// @@ -311,7 +315,7 @@ if (is(typeof(hasher(T.init)) == size_t)) @property size_t length() const { size_t count; - foreach (ref e; this.data[]) + foreach (ref e; this.data.array[]) { if (e.status == BucketStatus.used) { @@ -334,31 +338,6 @@ if (is(typeof(hasher(T.init)) == size_t)) /// The maximum number of buckets the container can have. enum size_t maxBucketCount = primes[$ - 1]; - /* - * Inserts the value in an empty or deleted bucket. If the value is - * already in there, does nothing and returns InsertStatus.found. If the - * hash array is full returns InsertStatus.failed. Otherwise, - * InsertStatus.added is returned. - */ - private InsertStatus insertInUnusedBucket(ref T value) - { - auto bucketPosition = locateBucket(this.data, hasher(value)); - - foreach (ref e; this.data[bucketPosition .. $]) - { - if (e == value) // Already in the set. - { - return InsertStatus.found; - } - else if (e.status != BucketStatus.used) // Insert the value. - { - e.key = value; - return InsertStatus.added; - } - } - return InsertStatus.failed; - } - /** * Inserts a new element. * @@ -369,17 +348,13 @@ if (is(typeof(hasher(T.init)) == size_t)) */ size_t insert(T value) { - InsertStatus status = insertInUnusedBucket(value); - for (; !status; status = insertInUnusedBucket(value)) + auto e = ((ref v) @trusted => &this.data.insert(v))(value); + if (e.status != BucketStatus.used) { - if (primes.length == (this.lengthIndex + 1)) - { - this.data.insertBack(Bucket!T(value)); - return 1; - } - rehashToSize(this.lengthIndex + 1); + e.key = value; + return 1; } - return status == InsertStatus.added; + return 0; } /// @@ -411,8 +386,8 @@ if (is(typeof(hasher(T.init)) == size_t)) */ size_t remove(T value) { - auto bucketPosition = locateBucket(this.data, hasher(value)); - foreach (ref e; this.data[bucketPosition .. $]) + auto bucketPosition = this.data.locateBucket(value); + foreach (ref e; this.data.array[bucketPosition .. $]) { if (e == value) // Found. { @@ -452,8 +427,8 @@ if (is(typeof(hasher(T.init)) == size_t)) */ bool opBinaryRight(string op : "in")(auto ref const T value) const { - auto bucketPosition = locateBucket(this.data, hasher(value)); - foreach (ref e; this.data[bucketPosition .. $]) + auto bucketPosition = this.data.locateBucket(value); + foreach (ref e; this.data.array[bucketPosition .. $]) { if (e == value) // Found. { @@ -501,42 +476,9 @@ if (is(typeof(hasher(T.init)) == size_t)) * Params: * n = Minimum number of buckets. */ - void rehash(const size_t n) + void rehash(size_t n) { - size_t lengthIndex; - for (; lengthIndex < primes.length; ++lengthIndex) - { - if (primes[lengthIndex] >= n) - { - break; - } - } - rehashToSize(lengthIndex); - } - - // Takes an index in the primes array. - private void rehashToSize(const size_t n) - { - auto storage = DataType(primes[n], allocator); - DataLoop: foreach (ref e1; this.data[]) - { - if (e1.status == BucketStatus.used) - { - auto bucketPosition = hasher(e1.key) % storage.length; - - foreach (ref e2; storage[bucketPosition .. $]) - { - if (e2.status != BucketStatus.used) // Insert the value. - { - e2 = e1; - continue DataLoop; - } - } - return; // Rehashing failed. - } - } - move(storage, this.data); - this.lengthIndex = n; + this.data.rehash(n); } /** @@ -545,17 +487,17 @@ if (is(typeof(hasher(T.init)) == size_t)) */ Range opIndex() { - return typeof(return)(this.data[]); + return typeof(return)(this.data.array[]); } /// ditto ConstRange opIndex() const { - return typeof(return)(this.data[]); + return typeof(return)(this.data.array[]); } /// - @nogc unittest + @nogc nothrow pure @safe unittest { Set!int set; assert(set[].empty); @@ -568,10 +510,6 @@ if (is(typeof(hasher(T.init)) == size_t)) set.remove(8); assert(set[].empty); } - - private alias DataType = Array!(Bucket!T); - private DataType data; - private size_t lengthIndex; } // Basic insertion logic. @@ -581,16 +519,16 @@ if (is(typeof(hasher(T.init)) == size_t)) assert(set.insert(5) == 1); assert(5 in set); - assert(set.data.length == 3); + assert(set.data.array.length == 3); assert(set.insert(5) == 0); assert(5 in set); - assert(set.data.length == 3); + assert(set.data.array.length == 3); assert(set.insert(9) == 1); assert(9 in set); assert(5 in set); - assert(set.data.length == 3); + assert(set.data.array.length == 3); assert(set.insert(7) == 1); assert(set.insert(8) == 1); @@ -598,11 +536,11 @@ if (is(typeof(hasher(T.init)) == size_t)) assert(5 in set); assert(9 in set); assert(7 in set); - assert(set.data.length == 7); + assert(set.data.array.length == 7); assert(set.insert(16) == 1); assert(16 in set); - assert(set.data.length == 7); + assert(set.data.array.length == 7); } // Static checks.