From 533fa3b023e759b1f5c18e32256bef5ade86be56 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Wed, 27 Jun 2018 05:45:53 +0200 Subject: [PATCH] container.HashTable: Fix infinite rehashing when inserting Fix #53. --- source/tanya/container/entry.d | 47 ++++++++++++++++++--------- source/tanya/container/hashtable.d | 52 +++++++++++++++++++++++++----- source/tanya/container/set.d | 26 +++++++++++---- 3 files changed, 96 insertions(+), 29 deletions(-) diff --git a/source/tanya/container/entry.d b/source/tanya/container/entry.d index ff09669..a456dd9 100644 --- a/source/tanya/container/entry.d +++ b/source/tanya/container/entry.d @@ -171,24 +171,29 @@ package struct HashArray(alias hasher, K, V = void) this.length = that.length; } + @property size_t bucketCount() const + { + return primes[this.lengthIndex]; + } + /* * Returns bucket position for `hash`. `0` may mean the 0th position or an * empty `buckets` array. */ size_t locateBucket(ref const Key key) const { - return this.array.length == 0 - ? 0 - : hasher(key) % primes[this.lengthIndex]; + return this.array.length == 0 ? 0 : hasher(key) % bucketCount; } /* - * Inserts a key into an empty or deleted bucket. If the key is - * already in there, does nothing. Returns the bucket with the key. + * If the key doesn't already exists, returns an empty bucket the key can + * be inserted in and adjusts the element count. Otherwise returns the + * bucket containing the key. */ ref Bucket insert(ref Key key) { - while ((this.lengthIndex + 1) != primes.length) + const newLengthIndex = this.lengthIndex + 1; + if (newLengthIndex != primes.length) { foreach (ref e; this.array[locateBucket(key) .. $]) { @@ -203,17 +208,29 @@ package struct HashArray(alias hasher, K, V = void) } } - if (this.rehashToSize(this.lengthIndex + 1)) + this.rehashToSize(newLengthIndex); + } + + foreach (ref e; this.array[locateBucket(key) .. $]) + { + if (e == key) { - ++this.lengthIndex; + return e; + } + else if (e.status != BucketStatus.used) + { + ++this.length; + return e; } } - this.array.insertBack(Bucket(key)); + + this.array.length = this.array.length + 1; + ++this.length; return this.array[$ - 1]; } // Takes an index in the primes array. - bool rehashToSize(const size_t n) + void rehashToSize(const size_t n) in { assert(n < primes.length); @@ -231,15 +248,15 @@ package struct HashArray(alias hasher, K, V = void) { if (e2.status != BucketStatus.used) // Insert the key { - e2 = e1; + .move(e1, e2); continue DataLoop; } } - return false; // Rehashing failed. + storage.insertBack(.move(e1)); } } .move(storage, this.array); - return true; + this.lengthIndex = n; } void rehash(const size_t n) @@ -252,9 +269,9 @@ package struct HashArray(alias hasher, K, V = void) break; } } - if (this.rehashToSize(lengthIndex)) + if (lengthIndex > this.lengthIndex) { - this.lengthIndex = lengthIndex; + this.rehashToSize(lengthIndex); } } diff --git a/source/tanya/container/hashtable.d b/source/tanya/container/hashtable.d index d25c75b..deb0586 100644 --- a/source/tanya/container/hashtable.d +++ b/source/tanya/container/hashtable.d @@ -174,8 +174,6 @@ if (is(typeof(hasher(Key.init)) == size_t)) invariant { assert(this.data.lengthIndex < primes.length); - assert(this.data.array.length == 0 - || this.data.array.length == primes[this.data.lengthIndex]); } /** @@ -440,6 +438,23 @@ if (is(typeof(hasher(Key.init)) == size_t)) assert(hashTable.empty); } + /** + * Returns current bucket count in the container. + * + * Bucket count equals to the number of the elements can be saved in the + * container in the best case scenario for key distribution, i.d. every key + * has a unique hash value. In a worse case the bucket count can be less + * than the number of elements stored in the container. + * + * Returns: Current bucket count. + * + * See_Also: $(D_PSYMBOL rehash). + */ + @property size_t bucketCount() const + { + return this.data.bucketCount; + } + /// The maximum number of buckets the container can have. enum size_t maxBucketCount = primes[$ - 1]; @@ -645,18 +660,15 @@ if (is(typeof(hasher(Key.init)) == size_t)) * Sets the number of buckets in the container to at least $(D_PARAM n) * and rearranges all the elements according to their hash values. * - * If $(D_PARAM n) is greater than the current $(D_PSYMBOL capacity) + * If $(D_PARAM n) is greater than the current $(D_PSYMBOL bucketCount) * and lower than or equal to $(D_PSYMBOL maxBucketCount), a rehash is * forced. * * If $(D_PARAM n) is greater than $(D_PSYMBOL maxBucketCount), * $(D_PSYMBOL maxBucketCount) is used instead as a new number of buckets. * - * If $(D_PARAM n) is equal to the current $(D_PSYMBOL capacity), rehashing - * is forced without resizing the container. - * - * If $(D_PARAM n) is lower than the current $(D_PSYMBOL capacity), the - * function may have no effect. + * If $(D_PARAM n) is less than or equal to the current + * $(D_PSYMBOL bucketCount), the function may have no effect. * * Rehashing is automatically performed whenever the container needs space * to insert new elements. @@ -773,3 +785,27 @@ if (is(typeof(hasher(Key.init)) == size_t)) } testFunc(hashTable); } + +// Issue 53: https://github.com/caraus-ecms/tanya/issues/53 +@nogc nothrow pure @safe unittest +{ + { + HashTable!(uint, uint) hashTable; + foreach (uint i; 0 .. 14) + { + hashTable[i + 1] = i; + } + assert(hashTable.length == 14); + } + { + HashTable!(int, int) hashtable; + + hashtable[1194250162] = 3; + hashtable[-1131293824] = 6; + hashtable[838100082] = 9; + + hashtable.rehash(11); + + assert(hashtable[-1131293824] == 6); + } +} diff --git a/source/tanya/container/set.d b/source/tanya/container/set.d index 9ee1472..d578280 100644 --- a/source/tanya/container/set.d +++ b/source/tanya/container/set.d @@ -432,6 +432,23 @@ if (is(typeof(hasher(T.init)) == size_t)) assert(set.empty); } + /** + * Returns current bucket count in the container. + * + * Bucket count equals to the number of the elements can be saved in the + * container in the best case scenario for key distribution, i.d. every key + * has a unique hash value. In a worse case the bucket count can be less + * than the number of elements stored in the container. + * + * Returns: Current bucket count. + * + * See_Also: $(D_PSYMBOL rehash). + */ + @property size_t bucketCount() const + { + return this.data.bucketCount; + } + /// The maximum number of buckets the container can have. enum size_t maxBucketCount = primes[$ - 1]; @@ -559,18 +576,15 @@ if (is(typeof(hasher(T.init)) == size_t)) * Sets the number of buckets in the container to at least $(D_PARAM n) * and rearranges all the elements according to their hash values. * - * If $(D_PARAM n) is greater than the current $(D_PSYMBOL capacity) + * If $(D_PARAM n) is greater than the current $(D_PSYMBOL bucketCount) * and lower than or equal to $(D_PSYMBOL maxBucketCount), a rehash is * forced. * * If $(D_PARAM n) is greater than $(D_PSYMBOL maxBucketCount), * $(D_PSYMBOL maxBucketCount) is used instead as a new number of buckets. * - * If $(D_PARAM n) is equal to the current $(D_PSYMBOL capacity), rehashing - * is forced without resizing the container. - * - * If $(D_PARAM n) is lower than the current $(D_PSYMBOL capacity), the - * function may have no effect. + * If $(D_PARAM n) is less than or equal to the current + * $(D_PSYMBOL bucketCount), the function may have no effect. * * Rehashing is automatically performed whenever the container needs space * to insert new elements.