summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/tanya/container/entry.d35
-rw-r--r--source/tanya/container/hashtable.d221
-rw-r--r--source/tanya/container/package.d24
-rw-r--r--source/tanya/container/set.d262
4 files changed, 320 insertions, 222 deletions
diff --git a/source/tanya/container/entry.d b/source/tanya/container/entry.d
index 3300d13..e3b6ea8 100644
--- a/source/tanya/container/entry.d
+++ b/source/tanya/container/entry.d
@@ -42,3 +42,38 @@ package struct HashEntry(K, V)
Pair!(K, V) pair;
HashEntry* next;
}
+
+package enum BucketStatus : byte
+{
+ deleted = -1,
+ empty = 0,
+ used = 1,
+}
+
+package struct Bucket(T)
+{
+ this(ref T content)
+ {
+ this.content = content;
+ }
+
+ @property void content(ref T content)
+ {
+ this.content_ = content;
+ this.status = BucketStatus.used;
+ }
+
+ @property ref T content()
+ {
+ return this.content_;
+ }
+
+ void remove()
+ {
+ this.content = T.init;
+ this.status = BucketStatus.deleted;
+ }
+
+ T content_;
+ BucketStatus status = BucketStatus.empty;
+}
diff --git a/source/tanya/container/hashtable.d b/source/tanya/container/hashtable.d
deleted file mode 100644
index f2ddc5c..0000000
--- a/source/tanya/container/hashtable.d
+++ /dev/null
@@ -1,221 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-/**
- * Hash table.
- *
- * Copyright: Eugene Wissner 2017.
- * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
- * Mozilla Public License, v. 2.0).
- * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
- */
-module tanya.container.hashtable;
-
-import std.algorithm.comparison;
-import std.traits;
-import tanya.container.entry;
-import tanya.memory;
-
-private int compare(const(char)[] key1, const(char)[] key2)
-{
- return cmp(key1, key2);
-}
-
-private int compare(K)(K key1, K key2)
- if (isIntegral!K)
-{
- return cast(int) (key1 - key2);
-}
-
-struct Range(K, V)
-{
- private HashEntry!(K, V)*[] table;
- private size_t begin, end;
-
- invariant
- {
- assert(this.begin <= this.end);
- }
-
- private this(HashEntry!(K, V)*[] table)
- {
- this.table = table;
- }
-
- @property bool empty() const
- {
- for (size_t i = this.begin; i < this.begin; ++i)
- {
- if (this.table[i] !is null)
- {
- return false;
- }
- }
- return true;
- }
-}
-
-struct HashTable(K, V)
-{
- /**
- * Create a new hashtable.
- *
- * Params:
- * size = Minimum number of initial buckets.
- * allocator = Allocator.
- */
- this(const size_t size, shared Allocator allocator = defaultAllocator)
- in
- {
- assert(size >= 1);
- }
- body
- {
- this(allocator);
- this.table = new HashEntry!(K, V)*[size];
- }
-
- /// Ditto.
- this(shared Allocator allocator)
- in
- {
- assert(allocator !is null);
- }
- body
- {
- this.allocator_ = allocator;
- }
-
- private size_t calculateHash(const(char)[] key)
- {
- size_t hashval;
-
- for (int i; hashval < size_t.max && i < key.length; ++i)
- {
- hashval = hashval << 8;
- hashval += key[i];
- }
-
- return hashval % this.table.length;
- }
-
- private size_t calculateHash()(K key)
- if (isIntegral!K)
- {
- return key % this.table.length;
- }
-
- /**
- * Retrieve a key-value pair from a hash table.
- */
- V opIndex(K key)
- {
- auto bin = calculateHash(key);
- auto pair = this.table[bin];
-
- while (pair !is null && compare(key, pair.pair[0]) > 0)
- {
- pair = pair.next;
- }
-
- // Did we actually find anything?
- if (pair is null || compare(key, pair.pair[0]) != 0)
- {
- return null;
- }
- else
- {
- return pair.pair[1];
- }
- }
-
- /**
- * Insert a key-value pair into a hash table.
- */
- bool insert(K key, V value)
- {
- HashEntry!(K, V)* last;
- auto bin = calculateHash(key);
- auto next = this.table[bin];
-
- while (next !is null && compare(key, next.pair[0]) > 0)
- {
- last = next;
- next = next.next;
- }
-
- // There's already a pair.
- if (next !is null && compare(key, next.pair[0]) == 0)
- {
- next.pair[1] = value;
- return false;
- }
- else // Nope, could't find it. Time to grow a pair.
- {
- auto newpair = new HashEntry!(K, V)(key, value);
-
- // We're at the start of the linked list in this bin.
- if (next == this.table[bin])
- {
- newpair.next = next;
- this.table[bin] = newpair;
- }
- else if (next is null)
- {
- // We're at the end of the linked list in this bin.
- last.next = newpair;
- }
- else
- {
- // We're in the middle of the list.
- newpair.next = next;
- last.next = newpair;
- }
- return true;
- }
- }
-
- void opIndexAssign(V value, K key)
- {
- insert(key, value);
- }
-
- Range!(K, V) opIndex()
- {
- return typeof(return)(this.table);
- }
-
- @property bool empty() const
- {
- foreach (entry; this.table)
- {
- if (entry !is null)
- {
- return false;
- }
- }
- return true;
- }
-
- private HashEntry!(K, V)*[] table;
-
- mixin DefaultAllocator;
-}
-
-unittest
-{
- auto ht = HashTable!(string, string)(65536);
- assert(ht.empty);
-
- ht["key1"] = "inky";
- ht["key2"] = "pinky";
- ht["key3"] = "blinky";
- ht["key4"] = "floyd";
-
- assert(!ht.empty);
- assert("inky" == ht["key1"]);
- assert("pinky" == ht["key2"]);
- assert("blinky" == ht["key3"]);
- assert("floyd" == ht["key4"]);
-}
diff --git a/source/tanya/container/package.d b/source/tanya/container/package.d
index 04ceffb..c813f76 100644
--- a/source/tanya/container/package.d
+++ b/source/tanya/container/package.d
@@ -14,7 +14,29 @@ module tanya.container;
public import tanya.container.array;
public import tanya.container.buffer;
-public import tanya.container.hashtable;
+public import tanya.container.set;
public import tanya.container.list;
public import tanya.container.string;
public import tanya.container.queue;
+
+/**
+ * Thrown if $(D_PSYMBOL Set) cannot insert a new element because the container
+ * is full.
+ */
+class HashContainerFullException : Exception
+{
+ /**
+ * Params:
+ * msg = The message for the exception.
+ * file = The file where the exception occurred.
+ * line = The line number where the exception occurred.
+ * next = The previous exception in the chain of exceptions, if any.
+ */
+ this(string msg,
+ string file = __FILE__,
+ size_t line = __LINE__,
+ Throwable next = null) @nogc @safe pure nothrow
+ {
+ super(msg, file, line, next);
+ }
+}
diff --git a/source/tanya/container/set.d b/source/tanya/container/set.d
new file mode 100644
index 0000000..62c8585
--- /dev/null
+++ b/source/tanya/container/set.d
@@ -0,0 +1,262 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/**
+ * This module implements a $(D_PSymbol Set) container that stores unique
+ * values without any particular order.
+ *
+ * Copyright: Eugene Wissner 2017.
+ * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
+ * Mozilla Public License, v. 2.0).
+ * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
+ */
+module tanya.container.set;
+
+import std.algorithm.mutation;
+import std.traits;
+import tanya.container;
+import tanya.container.entry;
+import tanya.memory;
+
+/**
+ * Bidirectional range that iterates over the $(D_PSYMBOL Set)'s values.
+ *
+ * Params:
+ * E = Element type.
+ */
+struct Range(E)
+{
+ @disable this();
+
+ @property Range save()
+ {
+ return this;
+ }
+}
+
+/**
+ * Set is a data structure that stores unique values without any particular
+ * order.
+ *
+ * This $(D_PSYMBOL Set) is implemented using closed hashing. Hash collisions
+ * are resolved with linear probing.
+ *
+ * Params:
+ * T = Element type.
+ */
+struct Set(T)
+{
+ invariant
+ {
+ assert(this.lengthIndex < primes.length);
+ assert(this.data.length == 0
+ || this.data.length == primes[this.lengthIndex]);
+ }
+
+ /**
+ * Constructor.
+ *
+ * Params:
+ * allocator = Allocator.
+ *
+ * Precondition: $(D_INLINECODE allocator !is null).
+ */
+ this(shared Allocator allocator)
+ in
+ {
+ assert(allocator !is null);
+ }
+ body
+ {
+ this.allocator_ = allocator;
+ }
+
+ /**
+ * Maximum amount of elements this $(D_PSYMBOL Set) can hold without
+ * resizing and rehashing. Note that it doesn't mean that the
+ * $(D_PSYMBOL Set) will hold $(I exactly) $(D_PSYMBOL capacity) elements.
+ * $(D_PSYMBOL capacity) tells the size of the container under a best-case
+ * distribution of elements.
+ *
+ * Returns: $(D_PSYMBOL Set) capacity.
+ */
+ @property size_t capacity() const
+ {
+ return this.data.length;
+ }
+
+ private static const size_t[41] primes = [
+ 3, 7, 13, 23, 29, 37, 53, 71, 97, 131, 163, 193, 239, 293, 389, 521,
+ 769, 919, 1103, 1327, 1543, 2333, 3079, 4861, 6151, 12289, 24593,
+ 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469,
+ 12582917, 25165843, 139022417, 282312799, 573292817, 1164186217,
+ ];
+
+ static private size_t calculateHash(ref T value)
+ {
+ static if (isIntegral!T || isSomeChar!T || is(T == bool))
+ {
+ return (cast(size_t) value);
+ }
+ else
+ {
+ static assert(false);
+ }
+ }
+
+ static private size_t locateBucket(ref DataType buckets, size_t hash)
+ {
+ return hash % buckets.length;
+ }
+
+ private enum InsertStatus : byte
+ {
+ found = -1,
+ failed = 0,
+ added = 1,
+ }
+
+ // Inserts the value in an empty or deleted bucket. If the value is
+ // already in there, does nothing and returns true. If the hash array
+ // is full returns false.
+ static private InsertStatus insertInUnusedBucket(ref DataType buckets,
+ ref T value)
+ {
+ auto bucketPosition = locateBucket(buckets, calculateHash(value));
+
+ foreach (ref e; buckets[bucketPosition .. $])
+ {
+ if (e.content == value) // Already in the set.
+ {
+ return InsertStatus.found;
+ }
+ else if (e.status != BucketStatus.used) // Insert the value.
+ {
+ e.content = value;
+ return InsertStatus.added;
+ }
+ }
+ return InsertStatus.failed;
+ }
+
+ /**
+ * Inserts a new element.
+ *
+ * Params:
+ * value = Element value.
+ *
+ * Returns: Amount of new elements inserted.
+ *
+ * Throws: $(D_PSYMBOL HashContainerFullException) if the insertion failed.
+ */
+ size_t insert(T value)
+ {
+ if (this.data.length == 0)
+ {
+ this.data = DataType(primes[0], allocator);
+ }
+
+ InsertStatus status = insertInUnusedBucket(this.data, value);
+ for (; !status; status = insertInUnusedBucket(this.data, value))
+ {
+ rehash();
+ }
+ return status == InsertStatus.added;
+ }
+
+ /**
+ * Removes an element.
+ *
+ * Params:
+ * value = Element value.
+ *
+ * Returns: Amount of the elements removed.
+ */
+ size_t remove(T value)
+ {
+ if (this.data.length == 0)
+ {
+ return 0;
+ }
+
+ auto bucketPosition = locateBucket(this.data, calculateHash(value));
+ foreach (ref e; this.data[bucketPosition .. $])
+ {
+ if (e.content == value) // Found.
+ {
+ e.remove();
+ return 1;
+ }
+ else if (e.status == BucketStatus.empty) // Insert the value.
+ {
+ return 0;
+ }
+ }
+ return 0;
+ }
+
+ private void rehash()
+ {
+ if ((this.primes.length - 1) == this.lengthIndex)
+ {
+ throw make!HashContainerFullException(defaultAllocator,
+ "Set is full");
+ }
+
+ auto storage = DataType(primes[this.lengthIndex + 1], allocator);
+ foreach (ref e; this.data[])
+ {
+ if (e.status == BucketStatus.used)
+ {
+ insertInUnusedBucket(storage, e.content);
+ }
+ }
+ move(storage, this.data);
+ ++this.lengthIndex;
+ }
+
+ private alias DataType = Array!(Bucket!T);
+ private DataType data;
+ private size_t lengthIndex;
+
+ mixin DefaultAllocator;
+}
+
+// Basic insertion logic.
+private unittest
+{
+ Set!int set;
+
+ assert(set.insert(5) == 1);
+ assert(set.data[0].status == BucketStatus.empty);
+ assert(set.data[1].status == BucketStatus.empty);
+ assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used);
+ assert(set.data.length == 3);
+
+ assert(set.insert(5) == 0);
+ assert(set.data[0].status == BucketStatus.empty);
+ assert(set.data[1].status == BucketStatus.empty);
+ assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used);
+ assert(set.data.length == 3);
+
+ assert(set.insert(9) == 1);
+ assert(set.data[0].content == 9 && set.data[0].status == BucketStatus.used);
+ assert(set.data[1].status == BucketStatus.empty);
+ assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used);
+ assert(set.data.length == 3);
+
+ assert(set.insert(7) == 1);
+ assert(set.insert(8) == 1);
+ assert(set.data[0].content == 7);
+ assert(set.data[1].content == 8);
+ assert(set.data[2].content == 9);
+ assert(set.data[3].status == BucketStatus.empty);
+ assert(set.data[5].content == 5);
+ assert(set.data.length == 7);
+
+ assert(set.insert(16) == 1);
+ assert(set.data[2].content == 9);
+ assert(set.data[3].content == 16);
+ assert(set.data[4].status == BucketStatus.empty);
+}