summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2018-06-27 05:45:53 +0200
committerEugen Wissner <belka@caraus.de>2018-06-27 05:45:53 +0200
commit533fa3b023e759b1f5c18e32256bef5ade86be56 (patch)
treea4cf5efa8554b304f6e9390909483e0964b5ec28
parentadf2d8b689c4174590dfda78aa4b3a8b6ac4437b (diff)
downloadtanya-533fa3b023e759b1f5c18e32256bef5ade86be56.tar.gz
container.HashTable: Fix infinite rehashing when inserting
Fix #53.
-rw-r--r--source/tanya/container/entry.d47
-rw-r--r--source/tanya/container/hashtable.d52
-rw-r--r--source/tanya/container/set.d26
3 files changed, 96 insertions, 29 deletions
diff --git a/source/tanya/container/entry.d b/source/tanya/container/entry.d
index ff09669..a456dd9 100644
--- a/source/tanya/container/entry.d
+++ b/source/tanya/container/entry.d
@@ -171,24 +171,29 @@ package struct HashArray(alias hasher, K, V = void)
this.length = that.length;
}
+ @property size_t bucketCount() const
+ {
+ return primes[this.lengthIndex];
+ }
+
/*
* Returns bucket position for `hash`. `0` may mean the 0th position or an
* empty `buckets` array.
*/
size_t locateBucket(ref const Key key) const
{
- return this.array.length == 0
- ? 0
- : hasher(key) % primes[this.lengthIndex];
+ return this.array.length == 0 ? 0 : hasher(key) % bucketCount;
}
/*
- * Inserts a key into an empty or deleted bucket. If the key is
- * already in there, does nothing. Returns the bucket with the key.
+ * If the key doesn't already exists, returns an empty bucket the key can
+ * be inserted in and adjusts the element count. Otherwise returns the
+ * bucket containing the key.
*/
ref Bucket insert(ref Key key)
{
- while ((this.lengthIndex + 1) != primes.length)
+ const newLengthIndex = this.lengthIndex + 1;
+ if (newLengthIndex != primes.length)
{
foreach (ref e; this.array[locateBucket(key) .. $])
{
@@ -203,17 +208,29 @@ package struct HashArray(alias hasher, K, V = void)
}
}
- if (this.rehashToSize(this.lengthIndex + 1))
+ this.rehashToSize(newLengthIndex);
+ }
+
+ foreach (ref e; this.array[locateBucket(key) .. $])
+ {
+ if (e == key)
{
- ++this.lengthIndex;
+ return e;
+ }
+ else if (e.status != BucketStatus.used)
+ {
+ ++this.length;
+ return e;
}
}
- this.array.insertBack(Bucket(key));
+
+ this.array.length = this.array.length + 1;
+ ++this.length;
return this.array[$ - 1];
}
// Takes an index in the primes array.
- bool rehashToSize(const size_t n)
+ void rehashToSize(const size_t n)
in
{
assert(n < primes.length);
@@ -231,15 +248,15 @@ package struct HashArray(alias hasher, K, V = void)
{
if (e2.status != BucketStatus.used) // Insert the key
{
- e2 = e1;
+ .move(e1, e2);
continue DataLoop;
}
}
- return false; // Rehashing failed.
+ storage.insertBack(.move(e1));
}
}
.move(storage, this.array);
- return true;
+ this.lengthIndex = n;
}
void rehash(const size_t n)
@@ -252,9 +269,9 @@ package struct HashArray(alias hasher, K, V = void)
break;
}
}
- if (this.rehashToSize(lengthIndex))
+ if (lengthIndex > this.lengthIndex)
{
- this.lengthIndex = lengthIndex;
+ this.rehashToSize(lengthIndex);
}
}
diff --git a/source/tanya/container/hashtable.d b/source/tanya/container/hashtable.d
index d25c75b..deb0586 100644
--- a/source/tanya/container/hashtable.d
+++ b/source/tanya/container/hashtable.d
@@ -174,8 +174,6 @@ if (is(typeof(hasher(Key.init)) == size_t))
invariant
{
assert(this.data.lengthIndex < primes.length);
- assert(this.data.array.length == 0
- || this.data.array.length == primes[this.data.lengthIndex]);
}
/**
@@ -440,6 +438,23 @@ if (is(typeof(hasher(Key.init)) == size_t))
assert(hashTable.empty);
}
+ /**
+ * Returns current bucket count in the container.
+ *
+ * Bucket count equals to the number of the elements can be saved in the
+ * container in the best case scenario for key distribution, i.d. every key
+ * has a unique hash value. In a worse case the bucket count can be less
+ * than the number of elements stored in the container.
+ *
+ * Returns: Current bucket count.
+ *
+ * See_Also: $(D_PSYMBOL rehash).
+ */
+ @property size_t bucketCount() const
+ {
+ return this.data.bucketCount;
+ }
+
/// The maximum number of buckets the container can have.
enum size_t maxBucketCount = primes[$ - 1];
@@ -645,18 +660,15 @@ if (is(typeof(hasher(Key.init)) == size_t))
* Sets the number of buckets in the container to at least $(D_PARAM n)
* and rearranges all the elements according to their hash values.
*
- * If $(D_PARAM n) is greater than the current $(D_PSYMBOL capacity)
+ * If $(D_PARAM n) is greater than the current $(D_PSYMBOL bucketCount)
* and lower than or equal to $(D_PSYMBOL maxBucketCount), a rehash is
* forced.
*
* If $(D_PARAM n) is greater than $(D_PSYMBOL maxBucketCount),
* $(D_PSYMBOL maxBucketCount) is used instead as a new number of buckets.
*
- * If $(D_PARAM n) is equal to the current $(D_PSYMBOL capacity), rehashing
- * is forced without resizing the container.
- *
- * If $(D_PARAM n) is lower than the current $(D_PSYMBOL capacity), the
- * function may have no effect.
+ * If $(D_PARAM n) is less than or equal to the current
+ * $(D_PSYMBOL bucketCount), the function may have no effect.
*
* Rehashing is automatically performed whenever the container needs space
* to insert new elements.
@@ -773,3 +785,27 @@ if (is(typeof(hasher(Key.init)) == size_t))
}
testFunc(hashTable);
}
+
+// Issue 53: https://github.com/caraus-ecms/tanya/issues/53
+@nogc nothrow pure @safe unittest
+{
+ {
+ HashTable!(uint, uint) hashTable;
+ foreach (uint i; 0 .. 14)
+ {
+ hashTable[i + 1] = i;
+ }
+ assert(hashTable.length == 14);
+ }
+ {
+ HashTable!(int, int) hashtable;
+
+ hashtable[1194250162] = 3;
+ hashtable[-1131293824] = 6;
+ hashtable[838100082] = 9;
+
+ hashtable.rehash(11);
+
+ assert(hashtable[-1131293824] == 6);
+ }
+}
diff --git a/source/tanya/container/set.d b/source/tanya/container/set.d
index 9ee1472..d578280 100644
--- a/source/tanya/container/set.d
+++ b/source/tanya/container/set.d
@@ -432,6 +432,23 @@ if (is(typeof(hasher(T.init)) == size_t))
assert(set.empty);
}
+ /**
+ * Returns current bucket count in the container.
+ *
+ * Bucket count equals to the number of the elements can be saved in the
+ * container in the best case scenario for key distribution, i.d. every key
+ * has a unique hash value. In a worse case the bucket count can be less
+ * than the number of elements stored in the container.
+ *
+ * Returns: Current bucket count.
+ *
+ * See_Also: $(D_PSYMBOL rehash).
+ */
+ @property size_t bucketCount() const
+ {
+ return this.data.bucketCount;
+ }
+
/// The maximum number of buckets the container can have.
enum size_t maxBucketCount = primes[$ - 1];
@@ -559,18 +576,15 @@ if (is(typeof(hasher(T.init)) == size_t))
* Sets the number of buckets in the container to at least $(D_PARAM n)
* and rearranges all the elements according to their hash values.
*
- * If $(D_PARAM n) is greater than the current $(D_PSYMBOL capacity)
+ * If $(D_PARAM n) is greater than the current $(D_PSYMBOL bucketCount)
* and lower than or equal to $(D_PSYMBOL maxBucketCount), a rehash is
* forced.
*
* If $(D_PARAM n) is greater than $(D_PSYMBOL maxBucketCount),
* $(D_PSYMBOL maxBucketCount) is used instead as a new number of buckets.
*
- * If $(D_PARAM n) is equal to the current $(D_PSYMBOL capacity), rehashing
- * is forced without resizing the container.
- *
- * If $(D_PARAM n) is lower than the current $(D_PSYMBOL capacity), the
- * function may have no effect.
+ * If $(D_PARAM n) is less than or equal to the current
+ * $(D_PSYMBOL bucketCount), the function may have no effect.
*
* Rehashing is automatically performed whenever the container needs space
* to insert new elements.