container.HashTable: Fix infinite rehashing when inserting

Fix #53.
This commit is contained in:
Eugen Wissner 2018-06-27 05:45:53 +02:00
parent adf2d8b689
commit 533fa3b023
3 changed files with 96 additions and 29 deletions

View File

@ -171,24 +171,29 @@ package struct HashArray(alias hasher, K, V = void)
this.length = that.length;
}
@property size_t bucketCount() const
{
return primes[this.lengthIndex];
}
/*
* Returns bucket position for `hash`. `0` may mean the 0th position or an
* empty `buckets` array.
*/
size_t locateBucket(ref const Key key) const
{
return this.array.length == 0
? 0
: hasher(key) % primes[this.lengthIndex];
return this.array.length == 0 ? 0 : hasher(key) % bucketCount;
}
/*
* Inserts a key into an empty or deleted bucket. If the key is
* already in there, does nothing. Returns the bucket with the key.
* If the key doesn't already exists, returns an empty bucket the key can
* be inserted in and adjusts the element count. Otherwise returns the
* bucket containing the key.
*/
ref Bucket insert(ref Key key)
{
while ((this.lengthIndex + 1) != primes.length)
const newLengthIndex = this.lengthIndex + 1;
if (newLengthIndex != primes.length)
{
foreach (ref e; this.array[locateBucket(key) .. $])
{
@ -203,17 +208,29 @@ package struct HashArray(alias hasher, K, V = void)
}
}
if (this.rehashToSize(this.lengthIndex + 1))
this.rehashToSize(newLengthIndex);
}
foreach (ref e; this.array[locateBucket(key) .. $])
{
if (e == key)
{
++this.lengthIndex;
return e;
}
else if (e.status != BucketStatus.used)
{
++this.length;
return e;
}
}
this.array.insertBack(Bucket(key));
this.array.length = this.array.length + 1;
++this.length;
return this.array[$ - 1];
}
// Takes an index in the primes array.
bool rehashToSize(const size_t n)
void rehashToSize(const size_t n)
in
{
assert(n < primes.length);
@ -231,15 +248,15 @@ package struct HashArray(alias hasher, K, V = void)
{
if (e2.status != BucketStatus.used) // Insert the key
{
e2 = e1;
.move(e1, e2);
continue DataLoop;
}
}
return false; // Rehashing failed.
storage.insertBack(.move(e1));
}
}
.move(storage, this.array);
return true;
this.lengthIndex = n;
}
void rehash(const size_t n)
@ -252,9 +269,9 @@ package struct HashArray(alias hasher, K, V = void)
break;
}
}
if (this.rehashToSize(lengthIndex))
if (lengthIndex > this.lengthIndex)
{
this.lengthIndex = lengthIndex;
this.rehashToSize(lengthIndex);
}
}

View File

@ -174,8 +174,6 @@ if (is(typeof(hasher(Key.init)) == size_t))
invariant
{
assert(this.data.lengthIndex < primes.length);
assert(this.data.array.length == 0
|| this.data.array.length == primes[this.data.lengthIndex]);
}
/**
@ -440,6 +438,23 @@ if (is(typeof(hasher(Key.init)) == size_t))
assert(hashTable.empty);
}
/**
* Returns current bucket count in the container.
*
* Bucket count equals to the number of the elements can be saved in the
* container in the best case scenario for key distribution, i.d. every key
* has a unique hash value. In a worse case the bucket count can be less
* than the number of elements stored in the container.
*
* Returns: Current bucket count.
*
* See_Also: $(D_PSYMBOL rehash).
*/
@property size_t bucketCount() const
{
return this.data.bucketCount;
}
/// The maximum number of buckets the container can have.
enum size_t maxBucketCount = primes[$ - 1];
@ -645,18 +660,15 @@ if (is(typeof(hasher(Key.init)) == size_t))
* Sets the number of buckets in the container to at least $(D_PARAM n)
* and rearranges all the elements according to their hash values.
*
* If $(D_PARAM n) is greater than the current $(D_PSYMBOL capacity)
* If $(D_PARAM n) is greater than the current $(D_PSYMBOL bucketCount)
* and lower than or equal to $(D_PSYMBOL maxBucketCount), a rehash is
* forced.
*
* If $(D_PARAM n) is greater than $(D_PSYMBOL maxBucketCount),
* $(D_PSYMBOL maxBucketCount) is used instead as a new number of buckets.
*
* If $(D_PARAM n) is equal to the current $(D_PSYMBOL capacity), rehashing
* is forced without resizing the container.
*
* If $(D_PARAM n) is lower than the current $(D_PSYMBOL capacity), the
* function may have no effect.
* If $(D_PARAM n) is less than or equal to the current
* $(D_PSYMBOL bucketCount), the function may have no effect.
*
* Rehashing is automatically performed whenever the container needs space
* to insert new elements.
@ -773,3 +785,27 @@ if (is(typeof(hasher(Key.init)) == size_t))
}
testFunc(hashTable);
}
// Issue 53: https://github.com/caraus-ecms/tanya/issues/53
@nogc nothrow pure @safe unittest
{
{
HashTable!(uint, uint) hashTable;
foreach (uint i; 0 .. 14)
{
hashTable[i + 1] = i;
}
assert(hashTable.length == 14);
}
{
HashTable!(int, int) hashtable;
hashtable[1194250162] = 3;
hashtable[-1131293824] = 6;
hashtable[838100082] = 9;
hashtable.rehash(11);
assert(hashtable[-1131293824] == 6);
}
}

View File

@ -432,6 +432,23 @@ if (is(typeof(hasher(T.init)) == size_t))
assert(set.empty);
}
/**
* Returns current bucket count in the container.
*
* Bucket count equals to the number of the elements can be saved in the
* container in the best case scenario for key distribution, i.d. every key
* has a unique hash value. In a worse case the bucket count can be less
* than the number of elements stored in the container.
*
* Returns: Current bucket count.
*
* See_Also: $(D_PSYMBOL rehash).
*/
@property size_t bucketCount() const
{
return this.data.bucketCount;
}
/// The maximum number of buckets the container can have.
enum size_t maxBucketCount = primes[$ - 1];
@ -559,18 +576,15 @@ if (is(typeof(hasher(T.init)) == size_t))
* Sets the number of buckets in the container to at least $(D_PARAM n)
* and rearranges all the elements according to their hash values.
*
* If $(D_PARAM n) is greater than the current $(D_PSYMBOL capacity)
* If $(D_PARAM n) is greater than the current $(D_PSYMBOL bucketCount)
* and lower than or equal to $(D_PSYMBOL maxBucketCount), a rehash is
* forced.
*
* If $(D_PARAM n) is greater than $(D_PSYMBOL maxBucketCount),
* $(D_PSYMBOL maxBucketCount) is used instead as a new number of buckets.
*
* If $(D_PARAM n) is equal to the current $(D_PSYMBOL capacity), rehashing
* is forced without resizing the container.
*
* If $(D_PARAM n) is lower than the current $(D_PSYMBOL capacity), the
* function may have no effect.
* If $(D_PARAM n) is less than or equal to the current
* $(D_PSYMBOL bucketCount), the function may have no effect.
*
* Rehashing is automatically performed whenever the container needs space
* to insert new elements.