Implement a Set container first
This commit is contained in:
		| @@ -42,3 +42,38 @@ package struct HashEntry(K, V) | ||||
|     Pair!(K, V) pair; | ||||
|     HashEntry* next; | ||||
| } | ||||
|  | ||||
| package enum BucketStatus : byte | ||||
| { | ||||
|     deleted = -1, | ||||
|     empty = 0, | ||||
|     used = 1, | ||||
| } | ||||
|  | ||||
| package struct Bucket(T) | ||||
| { | ||||
|     this(ref T content) | ||||
|     { | ||||
|         this.content = content; | ||||
|     } | ||||
|  | ||||
|     @property void content(ref T content) | ||||
|     { | ||||
|         this.content_ = content; | ||||
|         this.status = BucketStatus.used; | ||||
|     } | ||||
|  | ||||
|     @property ref T content() | ||||
|     { | ||||
|         return this.content_; | ||||
|     } | ||||
|  | ||||
|     void remove() | ||||
|     { | ||||
|         this.content = T.init; | ||||
|         this.status = BucketStatus.deleted; | ||||
|     } | ||||
|  | ||||
|     T content_; | ||||
|     BucketStatus status = BucketStatus.empty; | ||||
| } | ||||
|   | ||||
| @@ -1,221 +0,0 @@ | ||||
| /* This Source Code Form is subject to the terms of the Mozilla Public | ||||
|  * License, v. 2.0. If a copy of the MPL was not distributed with this | ||||
|  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | ||||
|  | ||||
| /** | ||||
|  * Hash table. | ||||
|  * | ||||
|  * Copyright: Eugene Wissner 2017. | ||||
|  * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/, | ||||
|  *                  Mozilla Public License, v. 2.0). | ||||
|  * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner) | ||||
|  */ | ||||
| module tanya.container.hashtable; | ||||
|  | ||||
| import std.algorithm.comparison; | ||||
| import std.traits; | ||||
| import tanya.container.entry; | ||||
| import tanya.memory; | ||||
|  | ||||
| private int compare(const(char)[] key1, const(char)[] key2) | ||||
| { | ||||
|     return cmp(key1, key2); | ||||
| } | ||||
|  | ||||
| private int compare(K)(K key1, K key2) | ||||
|     if (isIntegral!K) | ||||
| { | ||||
|     return cast(int) (key1 - key2); | ||||
| } | ||||
|  | ||||
| struct Range(K, V) | ||||
| { | ||||
|     private HashEntry!(K, V)*[] table; | ||||
|     private size_t begin, end; | ||||
|  | ||||
|     invariant | ||||
|     { | ||||
|         assert(this.begin <= this.end); | ||||
|     } | ||||
|  | ||||
|     private this(HashEntry!(K, V)*[] table) | ||||
|     { | ||||
|         this.table = table; | ||||
|     } | ||||
|  | ||||
|     @property bool empty() const | ||||
|     { | ||||
|         for (size_t i = this.begin; i < this.begin; ++i) | ||||
|         { | ||||
|             if (this.table[i] !is null) | ||||
|             { | ||||
|                 return false; | ||||
|             } | ||||
|         } | ||||
|         return true; | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct HashTable(K, V) | ||||
| { | ||||
|     /** | ||||
|      * Create a new hashtable. | ||||
|      * | ||||
|      * Params: | ||||
|      *  size      = Minimum number of initial buckets. | ||||
|      *  allocator = Allocator. | ||||
|      */ | ||||
|     this(const size_t size, shared Allocator allocator = defaultAllocator) | ||||
|     in | ||||
|     { | ||||
|         assert(size >= 1); | ||||
|     } | ||||
|     body | ||||
|     { | ||||
|         this(allocator); | ||||
|         this.table = new HashEntry!(K, V)*[size]; | ||||
|     } | ||||
|  | ||||
|     /// Ditto. | ||||
|     this(shared Allocator allocator) | ||||
|     in | ||||
|     { | ||||
|         assert(allocator !is null); | ||||
|     } | ||||
|     body | ||||
|     { | ||||
|         this.allocator_ = allocator; | ||||
|     } | ||||
|  | ||||
|     private size_t calculateHash(const(char)[] key) | ||||
|     { | ||||
|         size_t hashval; | ||||
|  | ||||
|         for (int i; hashval < size_t.max && i < key.length; ++i) | ||||
|         { | ||||
|             hashval = hashval << 8; | ||||
|             hashval += key[i]; | ||||
|         } | ||||
|  | ||||
|         return hashval % this.table.length; | ||||
|     } | ||||
|  | ||||
|     private size_t calculateHash()(K key) | ||||
|         if (isIntegral!K) | ||||
|     { | ||||
|         return key % this.table.length; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Retrieve a key-value pair from a hash table. | ||||
|      */ | ||||
|     V opIndex(K key) | ||||
|     { | ||||
|         auto bin = calculateHash(key); | ||||
|         auto pair = this.table[bin]; | ||||
|  | ||||
|         while (pair !is null && compare(key, pair.pair[0]) > 0) | ||||
|         { | ||||
|             pair = pair.next; | ||||
|         } | ||||
|  | ||||
|         // Did we actually find anything? | ||||
|         if (pair is null || compare(key, pair.pair[0]) != 0) | ||||
|         { | ||||
|             return null; | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             return pair.pair[1]; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Insert a key-value pair into a hash table. | ||||
|      */ | ||||
|     bool insert(K key, V value) | ||||
|     { | ||||
|         HashEntry!(K, V)* last; | ||||
|         auto bin = calculateHash(key); | ||||
|         auto next = this.table[bin]; | ||||
|  | ||||
|         while (next !is null && compare(key, next.pair[0]) > 0) | ||||
|         { | ||||
|             last = next; | ||||
|             next = next.next; | ||||
|         } | ||||
|  | ||||
|         // There's already a pair. | ||||
|         if (next !is null && compare(key, next.pair[0]) == 0) | ||||
|         { | ||||
|             next.pair[1] = value; | ||||
|             return false; | ||||
|         } | ||||
|         else // Nope, could't find it.  Time to grow a pair. | ||||
|         { | ||||
|             auto newpair = new HashEntry!(K, V)(key, value); | ||||
|  | ||||
|             // We're at the start of the linked list in this bin. | ||||
|             if (next == this.table[bin]) | ||||
|             { | ||||
|                 newpair.next = next; | ||||
|                 this.table[bin] = newpair; | ||||
|             } | ||||
|             else if (next is null) | ||||
|             { | ||||
|                 // We're at the end of the linked list in this bin. | ||||
|                 last.next = newpair; | ||||
|             } | ||||
|             else | ||||
|             { | ||||
|                 // We're in the middle of the list. | ||||
|                 newpair.next = next; | ||||
|                 last.next = newpair; | ||||
|             } | ||||
|             return true; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     void opIndexAssign(V value, K key) | ||||
|     { | ||||
|         insert(key, value); | ||||
|     } | ||||
|  | ||||
|     Range!(K, V) opIndex() | ||||
|     { | ||||
|         return typeof(return)(this.table); | ||||
|     } | ||||
|  | ||||
|     @property bool empty() const | ||||
|     { | ||||
|         foreach (entry; this.table) | ||||
|         { | ||||
|             if (entry !is null) | ||||
|             { | ||||
|                 return false; | ||||
|             } | ||||
|         } | ||||
|         return true; | ||||
|     } | ||||
|  | ||||
|     private HashEntry!(K, V)*[] table; | ||||
|  | ||||
|     mixin DefaultAllocator; | ||||
| } | ||||
|  | ||||
| unittest | ||||
| { | ||||
|     auto ht = HashTable!(string, string)(65536); | ||||
|     assert(ht.empty); | ||||
|  | ||||
|     ht["key1"] = "inky"; | ||||
|     ht["key2"] = "pinky"; | ||||
|     ht["key3"] = "blinky"; | ||||
|     ht["key4"] = "floyd"; | ||||
|  | ||||
|     assert(!ht.empty); | ||||
|     assert("inky" == ht["key1"]); | ||||
|     assert("pinky" == ht["key2"]); | ||||
|     assert("blinky" == ht["key3"]); | ||||
|     assert("floyd" == ht["key4"]); | ||||
| } | ||||
| @@ -14,7 +14,29 @@ module tanya.container; | ||||
|  | ||||
| public import tanya.container.array; | ||||
| public import tanya.container.buffer; | ||||
| public import tanya.container.hashtable; | ||||
| public import tanya.container.set; | ||||
| public import tanya.container.list; | ||||
| public import tanya.container.string; | ||||
| public import tanya.container.queue; | ||||
|  | ||||
| /** | ||||
|  * Thrown if $(D_PSYMBOL Set) cannot insert a new element because the container | ||||
|  * is full. | ||||
|  */ | ||||
| class HashContainerFullException : Exception | ||||
| { | ||||
|     /** | ||||
|      * Params: | ||||
|      *  msg  = The message for the exception. | ||||
|      *  file = The file where the exception occurred. | ||||
|      *  line = The line number where the exception occurred. | ||||
|      *  next = The previous exception in the chain of exceptions, if any. | ||||
|      */ | ||||
|     this(string msg, | ||||
|          string file = __FILE__, | ||||
|          size_t line = __LINE__, | ||||
|          Throwable next = null) @nogc @safe pure nothrow | ||||
|     { | ||||
|         super(msg, file, line, next); | ||||
|     } | ||||
| } | ||||
|   | ||||
							
								
								
									
										262
									
								
								source/tanya/container/set.d
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										262
									
								
								source/tanya/container/set.d
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,262 @@ | ||||
| /* This Source Code Form is subject to the terms of the Mozilla Public | ||||
|  * License, v. 2.0. If a copy of the MPL was not distributed with this | ||||
|  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | ||||
|  | ||||
| /** | ||||
|  * This module implements a $(D_PSymbol Set) container that stores unique | ||||
|  * values without any particular order. | ||||
|  * | ||||
|  * Copyright: Eugene Wissner 2017. | ||||
|  * License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/, | ||||
|  *                  Mozilla Public License, v. 2.0). | ||||
|  * Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner) | ||||
|  */ | ||||
| module tanya.container.set; | ||||
|  | ||||
| import std.algorithm.mutation; | ||||
| import std.traits; | ||||
| import tanya.container; | ||||
| import tanya.container.entry; | ||||
| import tanya.memory; | ||||
|  | ||||
| /** | ||||
|  * Bidirectional range that iterates over the $(D_PSYMBOL Set)'s values. | ||||
|  * | ||||
|  * Params: | ||||
|  *  E = Element type. | ||||
|  */ | ||||
| struct Range(E) | ||||
| { | ||||
|     @disable this(); | ||||
|  | ||||
|     @property Range save() | ||||
|     { | ||||
|         return this; | ||||
|     } | ||||
| } | ||||
|  | ||||
| /** | ||||
|  * Set is a data structure that stores unique values without any particular | ||||
|  * order. | ||||
|  * | ||||
|  * This $(D_PSYMBOL Set) is implemented using closed hashing. Hash collisions | ||||
|  * are resolved with linear probing. | ||||
|  * | ||||
|  * Params: | ||||
|  *  T = Element type. | ||||
|  */ | ||||
| struct Set(T) | ||||
| { | ||||
|     invariant | ||||
|     { | ||||
|         assert(this.lengthIndex < primes.length); | ||||
|         assert(this.data.length == 0 | ||||
|             || this.data.length == primes[this.lengthIndex]); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Constructor. | ||||
|      * | ||||
|      * Params: | ||||
|      *  allocator = Allocator. | ||||
|      * | ||||
|      * Precondition: $(D_INLINECODE allocator !is null). | ||||
|      */ | ||||
|     this(shared Allocator allocator) | ||||
|     in | ||||
|     { | ||||
|         assert(allocator !is null); | ||||
|     } | ||||
|     body | ||||
|     { | ||||
|         this.allocator_ = allocator; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Maximum amount of elements this $(D_PSYMBOL Set) can hold without | ||||
|      * resizing and rehashing. Note that it doesn't mean that the | ||||
|      * $(D_PSYMBOL Set) will hold $(I exactly) $(D_PSYMBOL capacity) elements. | ||||
|      * $(D_PSYMBOL capacity) tells the size of the container under a best-case | ||||
|      * distribution of elements. | ||||
|      * | ||||
|      * Returns: $(D_PSYMBOL Set) capacity. | ||||
|      */ | ||||
|     @property size_t capacity() const | ||||
|     { | ||||
|         return this.data.length; | ||||
|     } | ||||
|  | ||||
|     private static const size_t[41] primes = [ | ||||
|         3, 7, 13, 23, 29, 37, 53, 71, 97, 131, 163, 193, 239, 293, 389, 521, | ||||
|         769, 919, 1103, 1327, 1543, 2333, 3079, 4861, 6151, 12289, 24593, | ||||
|         49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, | ||||
|         12582917, 25165843, 139022417, 282312799, 573292817, 1164186217, | ||||
|     ]; | ||||
|  | ||||
|     static private size_t calculateHash(ref T value) | ||||
|     { | ||||
|         static if (isIntegral!T || isSomeChar!T || is(T == bool)) | ||||
|         { | ||||
|             return (cast(size_t) value); | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|             static assert(false); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     static private size_t locateBucket(ref DataType buckets, size_t hash) | ||||
|     { | ||||
|         return hash % buckets.length; | ||||
|     } | ||||
|  | ||||
|     private enum InsertStatus : byte | ||||
|     { | ||||
|         found = -1, | ||||
|         failed = 0, | ||||
|         added = 1, | ||||
|     } | ||||
|  | ||||
|     // Inserts the value in an empty or deleted bucket. If the value is | ||||
|     // already in there, does nothing and returns true. If the hash array | ||||
|     // is full returns false. | ||||
|     static private InsertStatus insertInUnusedBucket(ref DataType buckets, | ||||
|                                                      ref T value) | ||||
|     { | ||||
|         auto bucketPosition = locateBucket(buckets, calculateHash(value)); | ||||
|  | ||||
|         foreach (ref e; buckets[bucketPosition .. $]) | ||||
|         { | ||||
|             if (e.content == value) // Already in the set. | ||||
|             { | ||||
|                 return InsertStatus.found; | ||||
|             } | ||||
|             else if (e.status != BucketStatus.used) // Insert the value. | ||||
|             { | ||||
|                 e.content = value; | ||||
|                 return InsertStatus.added; | ||||
|             } | ||||
|         } | ||||
|         return InsertStatus.failed; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Inserts a new element. | ||||
|      * | ||||
|      * Params: | ||||
|      *  value = Element value. | ||||
|      * | ||||
|      * Returns: Amount of new elements inserted. | ||||
|      * | ||||
|      * Throws: $(D_PSYMBOL HashContainerFullException) if the insertion failed. | ||||
|      */ | ||||
|     size_t insert(T value) | ||||
|     { | ||||
|         if (this.data.length == 0) | ||||
|         { | ||||
|             this.data = DataType(primes[0], allocator); | ||||
|         } | ||||
|  | ||||
|         InsertStatus status = insertInUnusedBucket(this.data, value); | ||||
|         for (; !status; status = insertInUnusedBucket(this.data, value)) | ||||
|         { | ||||
|             rehash(); | ||||
|         } | ||||
|         return status == InsertStatus.added; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Removes an element. | ||||
|      * | ||||
|      * Params: | ||||
|      *  value = Element value. | ||||
|      * | ||||
|      * Returns: Amount of the elements removed. | ||||
|      */ | ||||
|     size_t remove(T value) | ||||
|     { | ||||
|         if (this.data.length == 0) | ||||
|         { | ||||
|             return 0; | ||||
|         } | ||||
|  | ||||
|         auto bucketPosition = locateBucket(this.data, calculateHash(value)); | ||||
|         foreach (ref e; this.data[bucketPosition .. $]) | ||||
|         { | ||||
|             if (e.content == value) // Found. | ||||
|             { | ||||
|                 e.remove(); | ||||
|                 return 1; | ||||
|             } | ||||
|             else if (e.status == BucketStatus.empty) // Insert the value. | ||||
|             { | ||||
|                 return 0; | ||||
|             } | ||||
|         } | ||||
|         return 0; | ||||
|     } | ||||
|  | ||||
|     private void rehash() | ||||
|     { | ||||
|         if ((this.primes.length - 1) == this.lengthIndex) | ||||
|         { | ||||
|             throw make!HashContainerFullException(defaultAllocator, | ||||
|                                                   "Set is full"); | ||||
|         } | ||||
|  | ||||
|         auto storage = DataType(primes[this.lengthIndex + 1], allocator); | ||||
|         foreach (ref e; this.data[]) | ||||
|         { | ||||
|             if (e.status == BucketStatus.used) | ||||
|             { | ||||
|                 insertInUnusedBucket(storage, e.content); | ||||
|             } | ||||
|         } | ||||
|         move(storage, this.data); | ||||
|         ++this.lengthIndex; | ||||
|     } | ||||
|  | ||||
|     private alias DataType = Array!(Bucket!T); | ||||
|     private DataType data; | ||||
|     private size_t lengthIndex; | ||||
|  | ||||
|     mixin DefaultAllocator; | ||||
| } | ||||
|  | ||||
| // Basic insertion logic. | ||||
| private unittest | ||||
| { | ||||
|     Set!int set; | ||||
|  | ||||
|     assert(set.insert(5) == 1); | ||||
|     assert(set.data[0].status == BucketStatus.empty); | ||||
|     assert(set.data[1].status == BucketStatus.empty); | ||||
|     assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used); | ||||
|     assert(set.data.length == 3); | ||||
|  | ||||
|     assert(set.insert(5) == 0); | ||||
|     assert(set.data[0].status == BucketStatus.empty); | ||||
|     assert(set.data[1].status == BucketStatus.empty); | ||||
|     assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used); | ||||
|     assert(set.data.length == 3); | ||||
|  | ||||
|     assert(set.insert(9) == 1); | ||||
|     assert(set.data[0].content == 9 && set.data[0].status == BucketStatus.used); | ||||
|     assert(set.data[1].status == BucketStatus.empty); | ||||
|     assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used); | ||||
|     assert(set.data.length == 3); | ||||
|  | ||||
|     assert(set.insert(7) == 1); | ||||
|     assert(set.insert(8) == 1); | ||||
|     assert(set.data[0].content == 7); | ||||
|     assert(set.data[1].content == 8); | ||||
|     assert(set.data[2].content == 9); | ||||
|     assert(set.data[3].status == BucketStatus.empty); | ||||
|     assert(set.data[5].content == 5); | ||||
|     assert(set.data.length == 7); | ||||
|  | ||||
|     assert(set.insert(16) == 1); | ||||
|     assert(set.data[2].content == 9); | ||||
|     assert(set.data[3].content == 16); | ||||
|     assert(set.data[4].status == BucketStatus.empty); | ||||
| } | ||||
		Reference in New Issue
	
	Block a user