Implement a Set container first
This commit is contained in:
parent
ac3935d71b
commit
f013e2f1f4
@ -42,3 +42,38 @@ package struct HashEntry(K, V)
|
||||
Pair!(K, V) pair;
|
||||
HashEntry* next;
|
||||
}
|
||||
|
||||
package enum BucketStatus : byte
|
||||
{
|
||||
deleted = -1,
|
||||
empty = 0,
|
||||
used = 1,
|
||||
}
|
||||
|
||||
package struct Bucket(T)
|
||||
{
|
||||
this(ref T content)
|
||||
{
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
@property void content(ref T content)
|
||||
{
|
||||
this.content_ = content;
|
||||
this.status = BucketStatus.used;
|
||||
}
|
||||
|
||||
@property ref T content()
|
||||
{
|
||||
return this.content_;
|
||||
}
|
||||
|
||||
void remove()
|
||||
{
|
||||
this.content = T.init;
|
||||
this.status = BucketStatus.deleted;
|
||||
}
|
||||
|
||||
T content_;
|
||||
BucketStatus status = BucketStatus.empty;
|
||||
}
|
||||
|
@ -1,221 +0,0 @@
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/**
|
||||
* Hash table.
|
||||
*
|
||||
* Copyright: Eugene Wissner 2017.
|
||||
* License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
|
||||
* Mozilla Public License, v. 2.0).
|
||||
* Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
|
||||
*/
|
||||
module tanya.container.hashtable;
|
||||
|
||||
import std.algorithm.comparison;
|
||||
import std.traits;
|
||||
import tanya.container.entry;
|
||||
import tanya.memory;
|
||||
|
||||
private int compare(const(char)[] key1, const(char)[] key2)
|
||||
{
|
||||
return cmp(key1, key2);
|
||||
}
|
||||
|
||||
private int compare(K)(K key1, K key2)
|
||||
if (isIntegral!K)
|
||||
{
|
||||
return cast(int) (key1 - key2);
|
||||
}
|
||||
|
||||
struct Range(K, V)
|
||||
{
|
||||
private HashEntry!(K, V)*[] table;
|
||||
private size_t begin, end;
|
||||
|
||||
invariant
|
||||
{
|
||||
assert(this.begin <= this.end);
|
||||
}
|
||||
|
||||
private this(HashEntry!(K, V)*[] table)
|
||||
{
|
||||
this.table = table;
|
||||
}
|
||||
|
||||
@property bool empty() const
|
||||
{
|
||||
for (size_t i = this.begin; i < this.begin; ++i)
|
||||
{
|
||||
if (this.table[i] !is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
struct HashTable(K, V)
|
||||
{
|
||||
/**
|
||||
* Create a new hashtable.
|
||||
*
|
||||
* Params:
|
||||
* size = Minimum number of initial buckets.
|
||||
* allocator = Allocator.
|
||||
*/
|
||||
this(const size_t size, shared Allocator allocator = defaultAllocator)
|
||||
in
|
||||
{
|
||||
assert(size >= 1);
|
||||
}
|
||||
body
|
||||
{
|
||||
this(allocator);
|
||||
this.table = new HashEntry!(K, V)*[size];
|
||||
}
|
||||
|
||||
/// Ditto.
|
||||
this(shared Allocator allocator)
|
||||
in
|
||||
{
|
||||
assert(allocator !is null);
|
||||
}
|
||||
body
|
||||
{
|
||||
this.allocator_ = allocator;
|
||||
}
|
||||
|
||||
private size_t calculateHash(const(char)[] key)
|
||||
{
|
||||
size_t hashval;
|
||||
|
||||
for (int i; hashval < size_t.max && i < key.length; ++i)
|
||||
{
|
||||
hashval = hashval << 8;
|
||||
hashval += key[i];
|
||||
}
|
||||
|
||||
return hashval % this.table.length;
|
||||
}
|
||||
|
||||
private size_t calculateHash()(K key)
|
||||
if (isIntegral!K)
|
||||
{
|
||||
return key % this.table.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve a key-value pair from a hash table.
|
||||
*/
|
||||
V opIndex(K key)
|
||||
{
|
||||
auto bin = calculateHash(key);
|
||||
auto pair = this.table[bin];
|
||||
|
||||
while (pair !is null && compare(key, pair.pair[0]) > 0)
|
||||
{
|
||||
pair = pair.next;
|
||||
}
|
||||
|
||||
// Did we actually find anything?
|
||||
if (pair is null || compare(key, pair.pair[0]) != 0)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
else
|
||||
{
|
||||
return pair.pair[1];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert a key-value pair into a hash table.
|
||||
*/
|
||||
bool insert(K key, V value)
|
||||
{
|
||||
HashEntry!(K, V)* last;
|
||||
auto bin = calculateHash(key);
|
||||
auto next = this.table[bin];
|
||||
|
||||
while (next !is null && compare(key, next.pair[0]) > 0)
|
||||
{
|
||||
last = next;
|
||||
next = next.next;
|
||||
}
|
||||
|
||||
// There's already a pair.
|
||||
if (next !is null && compare(key, next.pair[0]) == 0)
|
||||
{
|
||||
next.pair[1] = value;
|
||||
return false;
|
||||
}
|
||||
else // Nope, could't find it. Time to grow a pair.
|
||||
{
|
||||
auto newpair = new HashEntry!(K, V)(key, value);
|
||||
|
||||
// We're at the start of the linked list in this bin.
|
||||
if (next == this.table[bin])
|
||||
{
|
||||
newpair.next = next;
|
||||
this.table[bin] = newpair;
|
||||
}
|
||||
else if (next is null)
|
||||
{
|
||||
// We're at the end of the linked list in this bin.
|
||||
last.next = newpair;
|
||||
}
|
||||
else
|
||||
{
|
||||
// We're in the middle of the list.
|
||||
newpair.next = next;
|
||||
last.next = newpair;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
void opIndexAssign(V value, K key)
|
||||
{
|
||||
insert(key, value);
|
||||
}
|
||||
|
||||
Range!(K, V) opIndex()
|
||||
{
|
||||
return typeof(return)(this.table);
|
||||
}
|
||||
|
||||
@property bool empty() const
|
||||
{
|
||||
foreach (entry; this.table)
|
||||
{
|
||||
if (entry !is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private HashEntry!(K, V)*[] table;
|
||||
|
||||
mixin DefaultAllocator;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
auto ht = HashTable!(string, string)(65536);
|
||||
assert(ht.empty);
|
||||
|
||||
ht["key1"] = "inky";
|
||||
ht["key2"] = "pinky";
|
||||
ht["key3"] = "blinky";
|
||||
ht["key4"] = "floyd";
|
||||
|
||||
assert(!ht.empty);
|
||||
assert("inky" == ht["key1"]);
|
||||
assert("pinky" == ht["key2"]);
|
||||
assert("blinky" == ht["key3"]);
|
||||
assert("floyd" == ht["key4"]);
|
||||
}
|
@ -14,7 +14,29 @@ module tanya.container;
|
||||
|
||||
public import tanya.container.array;
|
||||
public import tanya.container.buffer;
|
||||
public import tanya.container.hashtable;
|
||||
public import tanya.container.set;
|
||||
public import tanya.container.list;
|
||||
public import tanya.container.string;
|
||||
public import tanya.container.queue;
|
||||
|
||||
/**
|
||||
* Thrown if $(D_PSYMBOL Set) cannot insert a new element because the container
|
||||
* is full.
|
||||
*/
|
||||
class HashContainerFullException : Exception
|
||||
{
|
||||
/**
|
||||
* Params:
|
||||
* msg = The message for the exception.
|
||||
* file = The file where the exception occurred.
|
||||
* line = The line number where the exception occurred.
|
||||
* next = The previous exception in the chain of exceptions, if any.
|
||||
*/
|
||||
this(string msg,
|
||||
string file = __FILE__,
|
||||
size_t line = __LINE__,
|
||||
Throwable next = null) @nogc @safe pure nothrow
|
||||
{
|
||||
super(msg, file, line, next);
|
||||
}
|
||||
}
|
||||
|
262
source/tanya/container/set.d
Normal file
262
source/tanya/container/set.d
Normal file
@ -0,0 +1,262 @@
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/**
|
||||
* This module implements a $(D_PSymbol Set) container that stores unique
|
||||
* values without any particular order.
|
||||
*
|
||||
* Copyright: Eugene Wissner 2017.
|
||||
* License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
|
||||
* Mozilla Public License, v. 2.0).
|
||||
* Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
|
||||
*/
|
||||
module tanya.container.set;
|
||||
|
||||
import std.algorithm.mutation;
|
||||
import std.traits;
|
||||
import tanya.container;
|
||||
import tanya.container.entry;
|
||||
import tanya.memory;
|
||||
|
||||
/**
|
||||
* Bidirectional range that iterates over the $(D_PSYMBOL Set)'s values.
|
||||
*
|
||||
* Params:
|
||||
* E = Element type.
|
||||
*/
|
||||
struct Range(E)
|
||||
{
|
||||
@disable this();
|
||||
|
||||
@property Range save()
|
||||
{
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set is a data structure that stores unique values without any particular
|
||||
* order.
|
||||
*
|
||||
* This $(D_PSYMBOL Set) is implemented using closed hashing. Hash collisions
|
||||
* are resolved with linear probing.
|
||||
*
|
||||
* Params:
|
||||
* T = Element type.
|
||||
*/
|
||||
struct Set(T)
|
||||
{
|
||||
invariant
|
||||
{
|
||||
assert(this.lengthIndex < primes.length);
|
||||
assert(this.data.length == 0
|
||||
|| this.data.length == primes[this.lengthIndex]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* Params:
|
||||
* allocator = Allocator.
|
||||
*
|
||||
* Precondition: $(D_INLINECODE allocator !is null).
|
||||
*/
|
||||
this(shared Allocator allocator)
|
||||
in
|
||||
{
|
||||
assert(allocator !is null);
|
||||
}
|
||||
body
|
||||
{
|
||||
this.allocator_ = allocator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Maximum amount of elements this $(D_PSYMBOL Set) can hold without
|
||||
* resizing and rehashing. Note that it doesn't mean that the
|
||||
* $(D_PSYMBOL Set) will hold $(I exactly) $(D_PSYMBOL capacity) elements.
|
||||
* $(D_PSYMBOL capacity) tells the size of the container under a best-case
|
||||
* distribution of elements.
|
||||
*
|
||||
* Returns: $(D_PSYMBOL Set) capacity.
|
||||
*/
|
||||
@property size_t capacity() const
|
||||
{
|
||||
return this.data.length;
|
||||
}
|
||||
|
||||
private static const size_t[41] primes = [
|
||||
3, 7, 13, 23, 29, 37, 53, 71, 97, 131, 163, 193, 239, 293, 389, 521,
|
||||
769, 919, 1103, 1327, 1543, 2333, 3079, 4861, 6151, 12289, 24593,
|
||||
49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469,
|
||||
12582917, 25165843, 139022417, 282312799, 573292817, 1164186217,
|
||||
];
|
||||
|
||||
static private size_t calculateHash(ref T value)
|
||||
{
|
||||
static if (isIntegral!T || isSomeChar!T || is(T == bool))
|
||||
{
|
||||
return (cast(size_t) value);
|
||||
}
|
||||
else
|
||||
{
|
||||
static assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
static private size_t locateBucket(ref DataType buckets, size_t hash)
|
||||
{
|
||||
return hash % buckets.length;
|
||||
}
|
||||
|
||||
private enum InsertStatus : byte
|
||||
{
|
||||
found = -1,
|
||||
failed = 0,
|
||||
added = 1,
|
||||
}
|
||||
|
||||
// Inserts the value in an empty or deleted bucket. If the value is
|
||||
// already in there, does nothing and returns true. If the hash array
|
||||
// is full returns false.
|
||||
static private InsertStatus insertInUnusedBucket(ref DataType buckets,
|
||||
ref T value)
|
||||
{
|
||||
auto bucketPosition = locateBucket(buckets, calculateHash(value));
|
||||
|
||||
foreach (ref e; buckets[bucketPosition .. $])
|
||||
{
|
||||
if (e.content == value) // Already in the set.
|
||||
{
|
||||
return InsertStatus.found;
|
||||
}
|
||||
else if (e.status != BucketStatus.used) // Insert the value.
|
||||
{
|
||||
e.content = value;
|
||||
return InsertStatus.added;
|
||||
}
|
||||
}
|
||||
return InsertStatus.failed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inserts a new element.
|
||||
*
|
||||
* Params:
|
||||
* value = Element value.
|
||||
*
|
||||
* Returns: Amount of new elements inserted.
|
||||
*
|
||||
* Throws: $(D_PSYMBOL HashContainerFullException) if the insertion failed.
|
||||
*/
|
||||
size_t insert(T value)
|
||||
{
|
||||
if (this.data.length == 0)
|
||||
{
|
||||
this.data = DataType(primes[0], allocator);
|
||||
}
|
||||
|
||||
InsertStatus status = insertInUnusedBucket(this.data, value);
|
||||
for (; !status; status = insertInUnusedBucket(this.data, value))
|
||||
{
|
||||
rehash();
|
||||
}
|
||||
return status == InsertStatus.added;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes an element.
|
||||
*
|
||||
* Params:
|
||||
* value = Element value.
|
||||
*
|
||||
* Returns: Amount of the elements removed.
|
||||
*/
|
||||
size_t remove(T value)
|
||||
{
|
||||
if (this.data.length == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto bucketPosition = locateBucket(this.data, calculateHash(value));
|
||||
foreach (ref e; this.data[bucketPosition .. $])
|
||||
{
|
||||
if (e.content == value) // Found.
|
||||
{
|
||||
e.remove();
|
||||
return 1;
|
||||
}
|
||||
else if (e.status == BucketStatus.empty) // Insert the value.
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private void rehash()
|
||||
{
|
||||
if ((this.primes.length - 1) == this.lengthIndex)
|
||||
{
|
||||
throw make!HashContainerFullException(defaultAllocator,
|
||||
"Set is full");
|
||||
}
|
||||
|
||||
auto storage = DataType(primes[this.lengthIndex + 1], allocator);
|
||||
foreach (ref e; this.data[])
|
||||
{
|
||||
if (e.status == BucketStatus.used)
|
||||
{
|
||||
insertInUnusedBucket(storage, e.content);
|
||||
}
|
||||
}
|
||||
move(storage, this.data);
|
||||
++this.lengthIndex;
|
||||
}
|
||||
|
||||
private alias DataType = Array!(Bucket!T);
|
||||
private DataType data;
|
||||
private size_t lengthIndex;
|
||||
|
||||
mixin DefaultAllocator;
|
||||
}
|
||||
|
||||
// Basic insertion logic.
|
||||
private unittest
|
||||
{
|
||||
Set!int set;
|
||||
|
||||
assert(set.insert(5) == 1);
|
||||
assert(set.data[0].status == BucketStatus.empty);
|
||||
assert(set.data[1].status == BucketStatus.empty);
|
||||
assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used);
|
||||
assert(set.data.length == 3);
|
||||
|
||||
assert(set.insert(5) == 0);
|
||||
assert(set.data[0].status == BucketStatus.empty);
|
||||
assert(set.data[1].status == BucketStatus.empty);
|
||||
assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used);
|
||||
assert(set.data.length == 3);
|
||||
|
||||
assert(set.insert(9) == 1);
|
||||
assert(set.data[0].content == 9 && set.data[0].status == BucketStatus.used);
|
||||
assert(set.data[1].status == BucketStatus.empty);
|
||||
assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used);
|
||||
assert(set.data.length == 3);
|
||||
|
||||
assert(set.insert(7) == 1);
|
||||
assert(set.insert(8) == 1);
|
||||
assert(set.data[0].content == 7);
|
||||
assert(set.data[1].content == 8);
|
||||
assert(set.data[2].content == 9);
|
||||
assert(set.data[3].status == BucketStatus.empty);
|
||||
assert(set.data[5].content == 5);
|
||||
assert(set.data.length == 7);
|
||||
|
||||
assert(set.insert(16) == 1);
|
||||
assert(set.data[2].content == 9);
|
||||
assert(set.data[3].content == 16);
|
||||
assert(set.data[4].status == BucketStatus.empty);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user