container.Set: Support customizable hasher

This commit is contained in:
Eugen Wissner 2018-04-28 17:49:49 +02:00
parent 55c36d22a0
commit 8733b93ca0
3 changed files with 122 additions and 155 deletions
source/tanya/container

View File

@ -35,17 +35,6 @@ package struct DEntry(T)
DEntry* next, prev; DEntry* next, prev;
} }
package struct HashEntry(K, V)
{
this(ref K key, ref V value)
{
this.pair = Pair!(K, V)(key, value);
}
Pair!(K, V) pair;
HashEntry* next;
}
package enum BucketStatus : byte package enum BucketStatus : byte
{ {
deleted = -1, deleted = -1,
@ -53,31 +42,31 @@ package enum BucketStatus : byte
used = 1, used = 1,
} }
package struct Bucket(T) package struct Bucket(K, V = void)
{ {
@property void content(ref T content) @property void key(ref K key)
{ {
this.content_ = content; this.key_ = key;
this.status = BucketStatus.used; this.status = BucketStatus.used;
} }
@property ref inout(T) content() inout @property ref inout(K) key() inout
{ {
return this.content_; return this.key_;
} }
bool opEquals(ref T content) bool opEquals(ref K key)
{ {
if (this.status == BucketStatus.used && this.content == content) if (this.status == BucketStatus.used && this.key == key)
{ {
return true; return true;
} }
return false; return false;
} }
bool opEquals(ref const T content) const bool opEquals(ref const K key) const
{ {
if (this.status == BucketStatus.used && this.content == content) if (this.status == BucketStatus.used && this.key == key)
{ {
return true; return true;
} }
@ -86,23 +75,51 @@ package struct Bucket(T)
bool opEquals(ref typeof(this) that) bool opEquals(ref typeof(this) that)
{ {
return this.content == that.content && this.status == that.status; return key == that.key && this.status == that.status;
} }
bool opEquals(ref typeof(this) that) const bool opEquals(ref typeof(this) that) const
{ {
return this.content == that.content && this.status == that.status; return key == that.key && this.status == that.status;
} }
void remove() void remove()
{ {
static if (hasElaborateDestructor!T) static if (hasElaborateDestructor!K)
{ {
destroy(this.content); destroy(key);
} }
this.status = BucketStatus.deleted; this.status = BucketStatus.deleted;
} }
T content_; private K key_;
static if (!is(V == void))
{
V value;
}
BucketStatus status = BucketStatus.empty; BucketStatus status = BucketStatus.empty;
} }
// Possible sizes for the hash-based containers.
package static immutable size_t[33] primes = [
0, 3, 7, 13, 23, 37, 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289,
24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469,
12582917, 25165843, 50331653, 100663319, 201326611, 402653189,
805306457, 1610612741, 3221225473
];
/*
* Returns bucket position for `hash`. `0` may mean the 0th position or an
* empty `buckets` array.
*/
package size_t locateBucket(T)(ref const T buckets, const size_t hash)
{
return buckets.length == 0 ? 0 : hash % buckets.length;
}
package enum InsertStatus : byte
{
found = -1,
failed = 0,
added = 1,
}

View File

@ -24,6 +24,7 @@ public import tanya.container.string;
* Thrown if $(D_PSYMBOL Set) cannot insert a new element because the container * Thrown if $(D_PSYMBOL Set) cannot insert a new element because the container
* is full. * is full.
*/ */
deprecated
class HashContainerFullException : Exception class HashContainerFullException : Exception
{ {
/** /**
@ -36,7 +37,7 @@ class HashContainerFullException : Exception
this(string msg, this(string msg,
string file = __FILE__, string file = __FILE__,
size_t line = __LINE__, size_t line = __LINE__,
Throwable next = null) @nogc @safe pure nothrow Throwable next = null) @nogc nothrow pure @safe
{ {
super(msg, file, line, next); super(msg, file, line, next);
} }

View File

@ -18,6 +18,7 @@ module tanya.container.set;
import tanya.algorithm.mutation; import tanya.algorithm.mutation;
import tanya.container; import tanya.container;
import tanya.container.entry; import tanya.container.entry;
import tanya.hash.lookup;
import tanya.memory; import tanya.memory;
import tanya.meta.trait; import tanya.meta.trait;
import tanya.meta.transform; import tanya.meta.transform;
@ -113,7 +114,7 @@ struct Range(E)
} }
do do
{ {
return dataRange.front.content; return dataRange.front.key;
} }
@property ref inout(E) back() inout @property ref inout(E) back() inout
@ -124,7 +125,7 @@ struct Range(E)
} }
do do
{ {
return dataRange.back.content; return dataRange.back.key;
} }
Range opIndex() Range opIndex()
@ -148,10 +149,11 @@ struct Range(E)
* Currently works only with integral types. * Currently works only with integral types.
* *
* Params: * Params:
* T = Element type. * T = Element type.
* hasher = Hash function for $(D_PARAM T).
*/ */
struct Set(T) struct Set(T, alias hasher = hash)
if (isIntegral!T || is(Unqual!T == bool)) if (is(typeof(hasher(T.init)) == size_t))
{ {
/// The range types for $(D_PSYMBOL Set). /// The range types for $(D_PSYMBOL Set).
alias Range = .Range!T; alias Range = .Range!T;
@ -175,7 +177,7 @@ struct Set(T)
* *
* Precondition: $(D_INLINECODE allocator !is null). * Precondition: $(D_INLINECODE allocator !is null).
*/ */
this(const size_t n, shared Allocator allocator = defaultAllocator) this(size_t n, shared Allocator allocator = defaultAllocator)
in in
{ {
assert(allocator !is null); assert(allocator !is null);
@ -197,19 +199,6 @@ struct Set(T)
this.data = typeof(this.data)(allocator); this.data = typeof(this.data)(allocator);
} }
///
unittest
{
{
auto set = Set!int(defaultAllocator);
assert(set.capacity == 0);
}
{
auto set = Set!int(8);
assert(set.capacity == 13);
}
}
/** /**
* Initializes this $(D_PARAM Set) from another one. * Initializes this $(D_PARAM Set) from another one.
* *
@ -222,7 +211,7 @@ struct Set(T)
* allocator = Allocator. * allocator = Allocator.
*/ */
this(S)(ref S init, shared Allocator allocator = defaultAllocator) this(S)(ref S init, shared Allocator allocator = defaultAllocator)
if (is(Unqual!S == Set)) if (is(Unqual!S == Set))
in in
{ {
assert(allocator !is null); assert(allocator !is null);
@ -234,7 +223,7 @@ struct Set(T)
/// ditto /// ditto
this(S)(S init, shared Allocator allocator = defaultAllocator) this(S)(S init, shared Allocator allocator = defaultAllocator)
if (is(S == Set)) if (is(S == Set))
in in
{ {
assert(allocator !is null); assert(allocator !is null);
@ -259,7 +248,7 @@ struct Set(T)
* Returns: $(D_KEYWORD this). * Returns: $(D_KEYWORD this).
*/ */
ref typeof(this) opAssign(S)(ref S that) ref typeof(this) opAssign(S)(ref S that)
if (is(Unqual!S == Set)) if (is(Unqual!S == Set))
{ {
this.data = that.data; this.data = that.data;
this.lengthIndex = that.lengthIndex; this.lengthIndex = that.lengthIndex;
@ -268,7 +257,7 @@ struct Set(T)
/// ditto /// ditto
ref typeof(this) opAssign(S)(S that) @trusted ref typeof(this) opAssign(S)(S that) @trusted
if (is(S == Set)) if (is(S == Set))
{ {
swap(this.data, that.data); swap(this.data, that.data);
swap(this.lengthIndex, that.lengthIndex); swap(this.lengthIndex, that.lengthIndex);
@ -305,7 +294,7 @@ struct Set(T)
} }
/// ///
unittest @nogc nothrow pure @safe unittest
{ {
Set!int set; Set!int set;
assert(set.capacity == 0); assert(set.capacity == 0);
@ -333,7 +322,7 @@ struct Set(T)
} }
/// ///
unittest @nogc nothrow pure @safe unittest
{ {
Set!int set; Set!int set;
assert(set.length == 0); assert(set.length == 0);
@ -342,56 +331,9 @@ struct Set(T)
assert(set.length == 1); assert(set.length == 1);
} }
private static const size_t[41] primes = [
3, 7, 13, 23, 29, 37, 53, 71, 97, 131, 163, 193, 239, 293, 389, 521,
769, 919, 1103, 1327, 1543, 2333, 3079, 4861, 6151, 12289, 24593,
49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469,
12582917, 25165843, 139022417, 282312799, 573292817, 1164186217,
];
/// The maximum number of buckets the container can have. /// The maximum number of buckets the container can have.
enum size_t maxBucketCount = primes[$ - 1]; enum size_t maxBucketCount = primes[$ - 1];
static private size_t calculateHash(U)(ref const U value)
if (is(U == Unqual!T))
{
static if (isIntegral!T || isSomeChar!T || is(T == bool))
{
return (cast(size_t) value);
}
else
{
static assert(false);
}
}
static private size_t locateBucket(ref const DataType buckets,
const size_t hash)
in
{
assert(buckets.length > 0);
}
do
{
return hash % buckets.length;
}
/*
* Returns bucket position for `hash`. `0` may mean the 0th position or an
* empty `buckets` array.
*/
private size_t locateBucket(const size_t hash) const
{
return this.data.length == 0 ? 0 : locateBucket(this.data, hash);
}
private enum InsertStatus : byte
{
found = -1,
failed = 0,
added = 1,
}
/* /*
* Inserts the value in an empty or deleted bucket. If the value is * Inserts the value in an empty or deleted bucket. If the value is
* already in there, does nothing and returns InsertStatus.found. If the * already in there, does nothing and returns InsertStatus.found. If the
@ -400,7 +342,7 @@ struct Set(T)
*/ */
private InsertStatus insertInUnusedBucket(ref T value) private InsertStatus insertInUnusedBucket(ref T value)
{ {
auto bucketPosition = locateBucket(this.data, calculateHash(value)); auto bucketPosition = locateBucket(this.data, hasher(value));
foreach (ref e; this.data[bucketPosition .. $]) foreach (ref e; this.data[bucketPosition .. $])
{ {
@ -410,7 +352,7 @@ struct Set(T)
} }
else if (e.status != BucketStatus.used) // Insert the value. else if (e.status != BucketStatus.used) // Insert the value.
{ {
e.content = value; e.key = value;
return InsertStatus.added; return InsertStatus.added;
} }
} }
@ -424,23 +366,16 @@ struct Set(T)
* value = Element value. * value = Element value.
* *
* Returns: Amount of new elements inserted. * Returns: Amount of new elements inserted.
*
* Throws: $(D_PSYMBOL HashContainerFullException) if the insertion failed.
*/ */
size_t insert(T value) size_t insert(T value)
{ {
if (this.data.length == 0)
{
this.data = DataType(primes[0], allocator);
}
InsertStatus status = insertInUnusedBucket(value); InsertStatus status = insertInUnusedBucket(value);
for (; !status; status = insertInUnusedBucket(value)) for (; !status; status = insertInUnusedBucket(value))
{ {
if (this.primes.length == (this.lengthIndex + 1)) if (primes.length == (this.lengthIndex + 1))
{ {
throw make!HashContainerFullException(defaultAllocator, this.data.insertBack(Bucket!T(value));
"Set is full"); return 1;
} }
rehashToSize(this.lengthIndex + 1); rehashToSize(this.lengthIndex + 1);
} }
@ -448,7 +383,7 @@ struct Set(T)
} }
/// ///
unittest @nogc nothrow pure @safe unittest
{ {
Set!int set; Set!int set;
assert(8 !in set); assert(8 !in set);
@ -476,7 +411,7 @@ struct Set(T)
*/ */
size_t remove(T value) size_t remove(T value)
{ {
auto bucketPosition = locateBucket(calculateHash(value)); auto bucketPosition = locateBucket(this.data, hasher(value));
foreach (ref e; this.data[bucketPosition .. $]) foreach (ref e; this.data[bucketPosition .. $])
{ {
if (e == value) // Found. if (e == value) // Found.
@ -493,7 +428,7 @@ struct Set(T)
} }
/// ///
@nogc unittest @nogc nothrow pure @safe unittest
{ {
Set!int set; Set!int set;
assert(8 !in set); assert(8 !in set);
@ -517,7 +452,7 @@ struct Set(T)
*/ */
bool opBinaryRight(string op : "in")(auto ref const T value) const bool opBinaryRight(string op : "in")(auto ref const T value) const
{ {
auto bucketPosition = locateBucket(calculateHash(value)); auto bucketPosition = locateBucket(this.data, hasher(value));
foreach (ref e; this.data[bucketPosition .. $]) foreach (ref e; this.data[bucketPosition .. $])
{ {
if (e == value) // Found. if (e == value) // Found.
@ -533,7 +468,7 @@ struct Set(T)
} }
/// ///
@nogc unittest @nogc nothrow pure @safe unittest
{ {
Set!int set; Set!int set;
@ -587,14 +522,13 @@ struct Set(T)
{ {
if (e1.status == BucketStatus.used) if (e1.status == BucketStatus.used)
{ {
auto bucketPosition = locateBucket(storage, auto bucketPosition = hasher(e1.key) % storage.length;
calculateHash(e1.content));
foreach (ref e2; storage[bucketPosition .. $]) foreach (ref e2; storage[bucketPosition .. $])
{ {
if (e2.status != BucketStatus.used) // Insert the value. if (e2.status != BucketStatus.used) // Insert the value.
{ {
e2.content = e1.content; e2 = e1;
continue DataLoop; continue DataLoop;
} }
} }
@ -635,72 +569,52 @@ struct Set(T)
assert(set[].empty); assert(set[].empty);
} }
private @nogc unittest
{
const Set!int set;
assert(set[].empty);
}
private @nogc unittest
{
Set!int set;
set.insert(8);
auto r1 = set[];
auto r2 = r1.save();
r1.popFront();
assert(r1.empty);
r2.popBack();
assert(r2.empty);
}
private alias DataType = Array!(Bucket!T); private alias DataType = Array!(Bucket!T);
private DataType data; private DataType data;
private size_t lengthIndex; private size_t lengthIndex;
} }
// Basic insertion logic. // Basic insertion logic.
private @nogc unittest @nogc nothrow pure @safe unittest
{ {
Set!int set; Set!int set;
assert(set.insert(5) == 1); assert(set.insert(5) == 1);
assert(set.data[0].status == BucketStatus.empty); assert(set.data[0].status == BucketStatus.empty);
assert(set.data[1].status == BucketStatus.empty); assert(set.data[1].key == 5 && set.data[1].status == BucketStatus.used);
assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used); assert(set.data[2].status == BucketStatus.empty);
assert(set.data.length == 3); assert(set.data.length == 3);
assert(set.insert(5) == 0); assert(set.insert(5) == 0);
assert(set.data[0].status == BucketStatus.empty); assert(set.data[0].status == BucketStatus.empty);
assert(set.data[1].status == BucketStatus.empty); assert(set.data[1].key == 5 && set.data[1].status == BucketStatus.used);
assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used); assert(set.data[2].status == BucketStatus.empty);
assert(set.data.length == 3); assert(set.data.length == 3);
assert(set.insert(9) == 1); assert(set.insert(9) == 1);
assert(set.data[0].content == 9 && set.data[0].status == BucketStatus.used); assert(set.data[0].key == 9 && set.data[0].status == BucketStatus.used);
assert(set.data[1].status == BucketStatus.empty); assert(set.data[1].key == 5 && set.data[1].status == BucketStatus.used);
assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used); assert(set.data[2].status == BucketStatus.empty);
assert(set.data.length == 3); assert(set.data.length == 3);
assert(set.insert(7) == 1); assert(set.insert(7) == 1);
assert(set.insert(8) == 1); assert(set.insert(8) == 1);
assert(set.data[0].content == 7); assert(set.data[0].status == BucketStatus.empty);
assert(set.data[1].content == 8); assert(set.data[1].key == 8);
assert(set.data[2].content == 9); assert(set.data[2].key == 5);
assert(set.data[3].status == BucketStatus.empty); assert(set.data[3].status == BucketStatus.empty);
assert(set.data[5].content == 5); assert(set.data[4].key == 9);
assert(set.data[5].key == 7);
assert(set.data.length == 7); assert(set.data.length == 7);
assert(set.insert(16) == 1); assert(set.insert(16) == 1);
assert(set.data[2].content == 9); assert(set.data[5].key == 7);
assert(set.data[3].content == 16); assert(set.data[6].key == 16);
assert(set.data[4].status == BucketStatus.empty); assert(set.data.length == 7);
} }
// Static checks. // Static checks.
private unittest @nogc nothrow pure @safe unittest
{ {
import tanya.range.primitive; import tanya.range.primitive;
@ -717,3 +631,38 @@ private unittest
static assert(is(Set!ushort)); static assert(is(Set!ushort));
static assert(is(Set!bool)); static assert(is(Set!bool));
} }
@nogc nothrow pure @safe unittest
{
const Set!int set;
assert(set[].empty);
}
@nogc nothrow pure @safe unittest
{
Set!int set;
set.insert(8);
auto r1 = set[];
auto r2 = r1.save();
r1.popFront();
assert(r1.empty);
r2.popBack();
assert(r2.empty);
}
// Initial capacity is 0.
@nogc nothrow pure @safe unittest
{
auto set = Set!int(defaultAllocator);
assert(set.capacity == 0);
}
// Capacity is set to a prime.
@nogc nothrow pure @safe unittest
{
auto set = Set!int(8);
assert(set.capacity == 13);
}