HashArray as an internal store for hash containers

This commit is contained in:
Eugen Wissner 2018-04-29 09:12:48 +02:00
parent 36646aa2c4
commit 18d54b4b18
2 changed files with 130 additions and 107 deletions

View File

@ -14,6 +14,8 @@
*/ */
module tanya.container.entry; module tanya.container.entry;
import tanya.algorithm.mutation;
import tanya.container.array;
import tanya.meta.trait; import tanya.meta.trait;
import tanya.typecons; import tanya.typecons;
@ -44,6 +46,9 @@ package enum BucketStatus : byte
package struct Bucket(K, V = void) package struct Bucket(K, V = void)
{ {
private alias Key = K;
private alias Value = V;
@property void key(ref K key) @property void key(ref K key)
{ {
this.key_ = key; this.key_ = key;
@ -108,18 +113,98 @@ package static immutable size_t[33] primes = [
805306457, 1610612741, 3221225473 805306457, 1610612741, 3221225473
]; ];
/* package struct HashArray(alias hasher, K, V = void)
* Returns bucket position for `hash`. `0` may mean the 0th position or an
* empty `buckets` array.
*/
package size_t locateBucket(T)(ref const T buckets, const size_t hash)
{ {
return buckets.length == 0 ? 0 : hash % buckets.length; alias Bucket = .Bucket!(K, V);
} alias Buckets = Array!Bucket;
package enum InsertStatus : byte Array!Bucket array;
{ size_t lengthIndex;
found = -1,
failed = 0, /*
added = 1, * Returns bucket position for `hash`. `0` may mean the 0th position or an
* empty `buckets` array.
*/
size_t locateBucket(ref const K key) const
{
return this.array.length == 0 ? 0 : hasher(key) % this.array.length;
}
/*
* Inserts the value in an empty or deleted bucket. If the value is
* already in there, does nothing and returns InsertStatus.found. If the
* hash array is full returns InsertStatus.failed. Otherwise,
* InsertStatus.added is returned.
*/
ref Bucket insert(ref K key)
{
while (true)
{
auto bucketPosition = locateBucket(key);
foreach (ref e; this.array[bucketPosition .. $])
{
if (e == key || e.status != BucketStatus.used)
{
return e;
}
}
if (primes.length == (this.lengthIndex + 1))
{
this.array.insertBack(Bucket(key));
return this.array[$ - 1];
}
if (this.rehashToSize(this.lengthIndex + 1))
{
++this.lengthIndex;
}
}
}
// Takes an index in the primes array.
bool rehashToSize(const size_t n)
{
auto storage = typeof(this.array)(primes[n], this.array.allocator);
DataLoop: foreach (ref e1; this.array[])
{
if (e1.status == BucketStatus.used)
{
auto bucketPosition = hasher(e1.key) % storage.length;
foreach (ref e2; storage[bucketPosition .. $])
{
if (e2.status != BucketStatus.used) // Insert the value.
{
e2 = e1;
continue DataLoop;
}
}
return false; // Rehashing failed.
}
}
move(storage, this.array);
return true;
}
void rehash(const size_t n)
{
size_t lengthIndex;
for (; lengthIndex < primes.length; ++lengthIndex)
{
if (primes[lengthIndex] >= n)
{
break;
}
}
if (this.rehashToSize(lengthIndex))
{
this.lengthIndex = lengthIndex;
}
}
@property size_t capacity() const
{
return this.array.length;
}
} }

View File

@ -16,7 +16,7 @@
module tanya.container.set; module tanya.container.set;
import tanya.algorithm.mutation; import tanya.algorithm.mutation;
import tanya.container; import tanya.container.array;
import tanya.container.entry; import tanya.container.entry;
import tanya.hash.lookup; import tanya.hash.lookup;
import tanya.memory; import tanya.memory;
@ -155,6 +155,10 @@ struct Range(E)
struct Set(T, alias hasher = hash) struct Set(T, alias hasher = hash)
if (is(typeof(hasher(T.init)) == size_t)) if (is(typeof(hasher(T.init)) == size_t))
{ {
private HashArray!(hasher, T) data;
private alias Buckets = typeof(this.data).Buckets;
/// The range types for $(D_PSYMBOL Set). /// The range types for $(D_PSYMBOL Set).
alias Range = .Range!T; alias Range = .Range!T;
@ -163,9 +167,9 @@ if (is(typeof(hasher(T.init)) == size_t))
invariant invariant
{ {
assert(this.lengthIndex < primes.length); assert(this.data.lengthIndex < primes.length);
assert(this.data.length == 0 assert(this.data.array.length == 0
|| this.data.length == primes[this.lengthIndex]); || this.data.array.length == primes[this.data.lengthIndex]);
} }
/** /**
@ -196,7 +200,7 @@ if (is(typeof(hasher(T.init)) == size_t))
} }
do do
{ {
this.data = typeof(this.data)(allocator); this.data = typeof(this.data)(Buckets(allocator));
} }
/** /**
@ -218,7 +222,7 @@ if (is(typeof(hasher(T.init)) == size_t))
} }
do do
{ {
this.data = typeof(this.data)(init.data, allocator); this.data = typeof(this.data)(Buckets(init.data, allocator));
} }
/// ditto /// ditto
@ -230,7 +234,7 @@ if (is(typeof(hasher(T.init)) == size_t))
} }
do do
{ {
this.data = typeof(this.data)(move(init.data), allocator); this.data = typeof(this.data)(Buckets(move(init.data), allocator));
this.lengthIndex = init.lengthIndex; this.lengthIndex = init.lengthIndex;
init.lengthIndex = 0; init.lengthIndex = 0;
} }
@ -276,7 +280,7 @@ if (is(typeof(hasher(T.init)) == size_t))
} }
do do
{ {
return cast(shared Allocator) this.data.allocator; return this.data.array.allocator;
} }
/** /**
@ -290,7 +294,7 @@ if (is(typeof(hasher(T.init)) == size_t))
*/ */
@property size_t capacity() const @property size_t capacity() const
{ {
return this.data.length; return this.data.capacity;
} }
/// ///
@ -311,7 +315,7 @@ if (is(typeof(hasher(T.init)) == size_t))
@property size_t length() const @property size_t length() const
{ {
size_t count; size_t count;
foreach (ref e; this.data[]) foreach (ref e; this.data.array[])
{ {
if (e.status == BucketStatus.used) if (e.status == BucketStatus.used)
{ {
@ -334,31 +338,6 @@ if (is(typeof(hasher(T.init)) == size_t))
/// The maximum number of buckets the container can have. /// The maximum number of buckets the container can have.
enum size_t maxBucketCount = primes[$ - 1]; enum size_t maxBucketCount = primes[$ - 1];
/*
* Inserts the value in an empty or deleted bucket. If the value is
* already in there, does nothing and returns InsertStatus.found. If the
* hash array is full returns InsertStatus.failed. Otherwise,
* InsertStatus.added is returned.
*/
private InsertStatus insertInUnusedBucket(ref T value)
{
auto bucketPosition = locateBucket(this.data, hasher(value));
foreach (ref e; this.data[bucketPosition .. $])
{
if (e == value) // Already in the set.
{
return InsertStatus.found;
}
else if (e.status != BucketStatus.used) // Insert the value.
{
e.key = value;
return InsertStatus.added;
}
}
return InsertStatus.failed;
}
/** /**
* Inserts a new element. * Inserts a new element.
* *
@ -369,17 +348,13 @@ if (is(typeof(hasher(T.init)) == size_t))
*/ */
size_t insert(T value) size_t insert(T value)
{ {
InsertStatus status = insertInUnusedBucket(value); auto e = ((ref v) @trusted => &this.data.insert(v))(value);
for (; !status; status = insertInUnusedBucket(value)) if (e.status != BucketStatus.used)
{ {
if (primes.length == (this.lengthIndex + 1)) e.key = value;
{ return 1;
this.data.insertBack(Bucket!T(value));
return 1;
}
rehashToSize(this.lengthIndex + 1);
} }
return status == InsertStatus.added; return 0;
} }
/// ///
@ -411,8 +386,8 @@ if (is(typeof(hasher(T.init)) == size_t))
*/ */
size_t remove(T value) size_t remove(T value)
{ {
auto bucketPosition = locateBucket(this.data, hasher(value)); auto bucketPosition = this.data.locateBucket(value);
foreach (ref e; this.data[bucketPosition .. $]) foreach (ref e; this.data.array[bucketPosition .. $])
{ {
if (e == value) // Found. if (e == value) // Found.
{ {
@ -452,8 +427,8 @@ if (is(typeof(hasher(T.init)) == size_t))
*/ */
bool opBinaryRight(string op : "in")(auto ref const T value) const bool opBinaryRight(string op : "in")(auto ref const T value) const
{ {
auto bucketPosition = locateBucket(this.data, hasher(value)); auto bucketPosition = this.data.locateBucket(value);
foreach (ref e; this.data[bucketPosition .. $]) foreach (ref e; this.data.array[bucketPosition .. $])
{ {
if (e == value) // Found. if (e == value) // Found.
{ {
@ -501,42 +476,9 @@ if (is(typeof(hasher(T.init)) == size_t))
* Params: * Params:
* n = Minimum number of buckets. * n = Minimum number of buckets.
*/ */
void rehash(const size_t n) void rehash(size_t n)
{ {
size_t lengthIndex; this.data.rehash(n);
for (; lengthIndex < primes.length; ++lengthIndex)
{
if (primes[lengthIndex] >= n)
{
break;
}
}
rehashToSize(lengthIndex);
}
// Takes an index in the primes array.
private void rehashToSize(const size_t n)
{
auto storage = DataType(primes[n], allocator);
DataLoop: foreach (ref e1; this.data[])
{
if (e1.status == BucketStatus.used)
{
auto bucketPosition = hasher(e1.key) % storage.length;
foreach (ref e2; storage[bucketPosition .. $])
{
if (e2.status != BucketStatus.used) // Insert the value.
{
e2 = e1;
continue DataLoop;
}
}
return; // Rehashing failed.
}
}
move(storage, this.data);
this.lengthIndex = n;
} }
/** /**
@ -545,17 +487,17 @@ if (is(typeof(hasher(T.init)) == size_t))
*/ */
Range opIndex() Range opIndex()
{ {
return typeof(return)(this.data[]); return typeof(return)(this.data.array[]);
} }
/// ditto /// ditto
ConstRange opIndex() const ConstRange opIndex() const
{ {
return typeof(return)(this.data[]); return typeof(return)(this.data.array[]);
} }
/// ///
@nogc unittest @nogc nothrow pure @safe unittest
{ {
Set!int set; Set!int set;
assert(set[].empty); assert(set[].empty);
@ -568,10 +510,6 @@ if (is(typeof(hasher(T.init)) == size_t))
set.remove(8); set.remove(8);
assert(set[].empty); assert(set[].empty);
} }
private alias DataType = Array!(Bucket!T);
private DataType data;
private size_t lengthIndex;
} }
// Basic insertion logic. // Basic insertion logic.
@ -581,16 +519,16 @@ if (is(typeof(hasher(T.init)) == size_t))
assert(set.insert(5) == 1); assert(set.insert(5) == 1);
assert(5 in set); assert(5 in set);
assert(set.data.length == 3); assert(set.data.array.length == 3);
assert(set.insert(5) == 0); assert(set.insert(5) == 0);
assert(5 in set); assert(5 in set);
assert(set.data.length == 3); assert(set.data.array.length == 3);
assert(set.insert(9) == 1); assert(set.insert(9) == 1);
assert(9 in set); assert(9 in set);
assert(5 in set); assert(5 in set);
assert(set.data.length == 3); assert(set.data.array.length == 3);
assert(set.insert(7) == 1); assert(set.insert(7) == 1);
assert(set.insert(8) == 1); assert(set.insert(8) == 1);
@ -598,11 +536,11 @@ if (is(typeof(hasher(T.init)) == size_t))
assert(5 in set); assert(5 in set);
assert(9 in set); assert(9 in set);
assert(7 in set); assert(7 in set);
assert(set.data.length == 7); assert(set.data.array.length == 7);
assert(set.insert(16) == 1); assert(set.insert(16) == 1);
assert(16 in set); assert(16 in set);
assert(set.data.length == 7); assert(set.data.array.length == 7);
} }
// Static checks. // Static checks.