diff --git a/source/tanya/container/entry.d b/source/tanya/container/entry.d index 3300d13..e3b6ea8 100644 --- a/source/tanya/container/entry.d +++ b/source/tanya/container/entry.d @@ -42,3 +42,38 @@ package struct HashEntry(K, V) Pair!(K, V) pair; HashEntry* next; } + +package enum BucketStatus : byte +{ + deleted = -1, + empty = 0, + used = 1, +} + +package struct Bucket(T) +{ + this(ref T content) + { + this.content = content; + } + + @property void content(ref T content) + { + this.content_ = content; + this.status = BucketStatus.used; + } + + @property ref T content() + { + return this.content_; + } + + void remove() + { + this.content = T.init; + this.status = BucketStatus.deleted; + } + + T content_; + BucketStatus status = BucketStatus.empty; +} diff --git a/source/tanya/container/hashtable.d b/source/tanya/container/hashtable.d deleted file mode 100644 index f2ddc5c..0000000 --- a/source/tanya/container/hashtable.d +++ /dev/null @@ -1,221 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -/** - * Hash table. - * - * Copyright: Eugene Wissner 2017. - * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/, - * Mozilla Public License, v. 2.0). - * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner) - */ -module tanya.container.hashtable; - -import std.algorithm.comparison; -import std.traits; -import tanya.container.entry; -import tanya.memory; - -private int compare(const(char)[] key1, const(char)[] key2) -{ - return cmp(key1, key2); -} - -private int compare(K)(K key1, K key2) - if (isIntegral!K) -{ - return cast(int) (key1 - key2); -} - -struct Range(K, V) -{ - private HashEntry!(K, V)*[] table; - private size_t begin, end; - - invariant - { - assert(this.begin <= this.end); - } - - private this(HashEntry!(K, V)*[] table) - { - this.table = table; - } - - @property bool empty() const - { - for (size_t i = this.begin; i < this.begin; ++i) - { - if (this.table[i] !is null) - { - return false; - } - } - return true; - } -} - -struct HashTable(K, V) -{ - /** - * Create a new hashtable. - * - * Params: - * size = Minimum number of initial buckets. - * allocator = Allocator. - */ - this(const size_t size, shared Allocator allocator = defaultAllocator) - in - { - assert(size >= 1); - } - body - { - this(allocator); - this.table = new HashEntry!(K, V)*[size]; - } - - /// Ditto. - this(shared Allocator allocator) - in - { - assert(allocator !is null); - } - body - { - this.allocator_ = allocator; - } - - private size_t calculateHash(const(char)[] key) - { - size_t hashval; - - for (int i; hashval < size_t.max && i < key.length; ++i) - { - hashval = hashval << 8; - hashval += key[i]; - } - - return hashval % this.table.length; - } - - private size_t calculateHash()(K key) - if (isIntegral!K) - { - return key % this.table.length; - } - - /** - * Retrieve a key-value pair from a hash table. - */ - V opIndex(K key) - { - auto bin = calculateHash(key); - auto pair = this.table[bin]; - - while (pair !is null && compare(key, pair.pair[0]) > 0) - { - pair = pair.next; - } - - // Did we actually find anything? - if (pair is null || compare(key, pair.pair[0]) != 0) - { - return null; - } - else - { - return pair.pair[1]; - } - } - - /** - * Insert a key-value pair into a hash table. - */ - bool insert(K key, V value) - { - HashEntry!(K, V)* last; - auto bin = calculateHash(key); - auto next = this.table[bin]; - - while (next !is null && compare(key, next.pair[0]) > 0) - { - last = next; - next = next.next; - } - - // There's already a pair. - if (next !is null && compare(key, next.pair[0]) == 0) - { - next.pair[1] = value; - return false; - } - else // Nope, could't find it. Time to grow a pair. - { - auto newpair = new HashEntry!(K, V)(key, value); - - // We're at the start of the linked list in this bin. - if (next == this.table[bin]) - { - newpair.next = next; - this.table[bin] = newpair; - } - else if (next is null) - { - // We're at the end of the linked list in this bin. - last.next = newpair; - } - else - { - // We're in the middle of the list. - newpair.next = next; - last.next = newpair; - } - return true; - } - } - - void opIndexAssign(V value, K key) - { - insert(key, value); - } - - Range!(K, V) opIndex() - { - return typeof(return)(this.table); - } - - @property bool empty() const - { - foreach (entry; this.table) - { - if (entry !is null) - { - return false; - } - } - return true; - } - - private HashEntry!(K, V)*[] table; - - mixin DefaultAllocator; -} - -unittest -{ - auto ht = HashTable!(string, string)(65536); - assert(ht.empty); - - ht["key1"] = "inky"; - ht["key2"] = "pinky"; - ht["key3"] = "blinky"; - ht["key4"] = "floyd"; - - assert(!ht.empty); - assert("inky" == ht["key1"]); - assert("pinky" == ht["key2"]); - assert("blinky" == ht["key3"]); - assert("floyd" == ht["key4"]); -} diff --git a/source/tanya/container/package.d b/source/tanya/container/package.d index 04ceffb..c813f76 100644 --- a/source/tanya/container/package.d +++ b/source/tanya/container/package.d @@ -14,7 +14,29 @@ module tanya.container; public import tanya.container.array; public import tanya.container.buffer; -public import tanya.container.hashtable; +public import tanya.container.set; public import tanya.container.list; public import tanya.container.string; public import tanya.container.queue; + +/** + * Thrown if $(D_PSYMBOL Set) cannot insert a new element because the container + * is full. + */ +class HashContainerFullException : Exception +{ + /** + * Params: + * msg = The message for the exception. + * file = The file where the exception occurred. + * line = The line number where the exception occurred. + * next = The previous exception in the chain of exceptions, if any. + */ + this(string msg, + string file = __FILE__, + size_t line = __LINE__, + Throwable next = null) @nogc @safe pure nothrow + { + super(msg, file, line, next); + } +} diff --git a/source/tanya/container/set.d b/source/tanya/container/set.d new file mode 100644 index 0000000..62c8585 --- /dev/null +++ b/source/tanya/container/set.d @@ -0,0 +1,262 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * This module implements a $(D_PSymbol Set) container that stores unique + * values without any particular order. + * + * Copyright: Eugene Wissner 2017. + * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/, + * Mozilla Public License, v. 2.0). + * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner) + */ +module tanya.container.set; + +import std.algorithm.mutation; +import std.traits; +import tanya.container; +import tanya.container.entry; +import tanya.memory; + +/** + * Bidirectional range that iterates over the $(D_PSYMBOL Set)'s values. + * + * Params: + * E = Element type. + */ +struct Range(E) +{ + @disable this(); + + @property Range save() + { + return this; + } +} + +/** + * Set is a data structure that stores unique values without any particular + * order. + * + * This $(D_PSYMBOL Set) is implemented using closed hashing. Hash collisions + * are resolved with linear probing. + * + * Params: + * T = Element type. + */ +struct Set(T) +{ + invariant + { + assert(this.lengthIndex < primes.length); + assert(this.data.length == 0 + || this.data.length == primes[this.lengthIndex]); + } + + /** + * Constructor. + * + * Params: + * allocator = Allocator. + * + * Precondition: $(D_INLINECODE allocator !is null). + */ + this(shared Allocator allocator) + in + { + assert(allocator !is null); + } + body + { + this.allocator_ = allocator; + } + + /** + * Maximum amount of elements this $(D_PSYMBOL Set) can hold without + * resizing and rehashing. Note that it doesn't mean that the + * $(D_PSYMBOL Set) will hold $(I exactly) $(D_PSYMBOL capacity) elements. + * $(D_PSYMBOL capacity) tells the size of the container under a best-case + * distribution of elements. + * + * Returns: $(D_PSYMBOL Set) capacity. + */ + @property size_t capacity() const + { + return this.data.length; + } + + private static const size_t[41] primes = [ + 3, 7, 13, 23, 29, 37, 53, 71, 97, 131, 163, 193, 239, 293, 389, 521, + 769, 919, 1103, 1327, 1543, 2333, 3079, 4861, 6151, 12289, 24593, + 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, + 12582917, 25165843, 139022417, 282312799, 573292817, 1164186217, + ]; + + static private size_t calculateHash(ref T value) + { + static if (isIntegral!T || isSomeChar!T || is(T == bool)) + { + return (cast(size_t) value); + } + else + { + static assert(false); + } + } + + static private size_t locateBucket(ref DataType buckets, size_t hash) + { + return hash % buckets.length; + } + + private enum InsertStatus : byte + { + found = -1, + failed = 0, + added = 1, + } + + // Inserts the value in an empty or deleted bucket. If the value is + // already in there, does nothing and returns true. If the hash array + // is full returns false. + static private InsertStatus insertInUnusedBucket(ref DataType buckets, + ref T value) + { + auto bucketPosition = locateBucket(buckets, calculateHash(value)); + + foreach (ref e; buckets[bucketPosition .. $]) + { + if (e.content == value) // Already in the set. + { + return InsertStatus.found; + } + else if (e.status != BucketStatus.used) // Insert the value. + { + e.content = value; + return InsertStatus.added; + } + } + return InsertStatus.failed; + } + + /** + * Inserts a new element. + * + * Params: + * value = Element value. + * + * Returns: Amount of new elements inserted. + * + * Throws: $(D_PSYMBOL HashContainerFullException) if the insertion failed. + */ + size_t insert(T value) + { + if (this.data.length == 0) + { + this.data = DataType(primes[0], allocator); + } + + InsertStatus status = insertInUnusedBucket(this.data, value); + for (; !status; status = insertInUnusedBucket(this.data, value)) + { + rehash(); + } + return status == InsertStatus.added; + } + + /** + * Removes an element. + * + * Params: + * value = Element value. + * + * Returns: Amount of the elements removed. + */ + size_t remove(T value) + { + if (this.data.length == 0) + { + return 0; + } + + auto bucketPosition = locateBucket(this.data, calculateHash(value)); + foreach (ref e; this.data[bucketPosition .. $]) + { + if (e.content == value) // Found. + { + e.remove(); + return 1; + } + else if (e.status == BucketStatus.empty) // Insert the value. + { + return 0; + } + } + return 0; + } + + private void rehash() + { + if ((this.primes.length - 1) == this.lengthIndex) + { + throw make!HashContainerFullException(defaultAllocator, + "Set is full"); + } + + auto storage = DataType(primes[this.lengthIndex + 1], allocator); + foreach (ref e; this.data[]) + { + if (e.status == BucketStatus.used) + { + insertInUnusedBucket(storage, e.content); + } + } + move(storage, this.data); + ++this.lengthIndex; + } + + private alias DataType = Array!(Bucket!T); + private DataType data; + private size_t lengthIndex; + + mixin DefaultAllocator; +} + +// Basic insertion logic. +private unittest +{ + Set!int set; + + assert(set.insert(5) == 1); + assert(set.data[0].status == BucketStatus.empty); + assert(set.data[1].status == BucketStatus.empty); + assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used); + assert(set.data.length == 3); + + assert(set.insert(5) == 0); + assert(set.data[0].status == BucketStatus.empty); + assert(set.data[1].status == BucketStatus.empty); + assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used); + assert(set.data.length == 3); + + assert(set.insert(9) == 1); + assert(set.data[0].content == 9 && set.data[0].status == BucketStatus.used); + assert(set.data[1].status == BucketStatus.empty); + assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used); + assert(set.data.length == 3); + + assert(set.insert(7) == 1); + assert(set.insert(8) == 1); + assert(set.data[0].content == 7); + assert(set.data[1].content == 8); + assert(set.data[2].content == 9); + assert(set.data[3].status == BucketStatus.empty); + assert(set.data[5].content == 5); + assert(set.data.length == 7); + + assert(set.insert(16) == 1); + assert(set.data[2].content == 9); + assert(set.data[3].content == 16); + assert(set.data[4].status == BucketStatus.empty); +}