tanya/source/tanya/container/hashtable.d

531 lines
13 KiB
D
Raw Normal View History

2018-04-28 18:15:39 +02:00
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/**
* Hash table.
*
* Copyright: Eugene Wissner 2018.
* License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
* Mozilla Public License, v. 2.0).
* Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
* Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/container/hashtable.d,
* tanya/container/hashtable.d)
*/
module tanya.container.hashtable;
import tanya.container.array;
import tanya.container.entry;
import tanya.hash.lookup;
import tanya.memory;
2018-05-01 15:43:38 +02:00
import tanya.meta.trait;
import tanya.meta.transform;
2018-05-11 05:44:46 +02:00
import tanya.typecons;
2018-04-28 18:15:39 +02:00
2018-05-01 15:43:38 +02:00
/**
2018-05-11 05:44:46 +02:00
* Bidirectional range whose element type is a tuple of a key and the
* respective value.
2018-05-01 15:43:38 +02:00
*
* Params:
2018-05-11 05:44:46 +02:00
* T = Type of the internal hash storage.
2018-05-01 15:43:38 +02:00
*/
2018-05-11 05:44:46 +02:00
struct Range(T)
2018-04-28 18:15:39 +02:00
{
2018-05-11 05:44:46 +02:00
private alias KV = CopyConstness!(T, T.Bucket.KV);
static if (isMutable!T)
2018-04-28 18:15:39 +02:00
{
2018-05-11 05:44:46 +02:00
private alias DataRange = T.array.Range;
2018-04-28 18:15:39 +02:00
}
else
{
2018-05-11 05:44:46 +02:00
private alias DataRange = T.array.ConstRange;
2018-04-28 18:15:39 +02:00
}
2018-05-01 15:43:38 +02:00
private DataRange dataRange;
2018-04-28 18:15:39 +02:00
2018-05-01 15:43:38 +02:00
@disable this();
2018-04-28 18:15:39 +02:00
2018-05-01 15:43:38 +02:00
private this(DataRange dataRange)
2018-04-28 18:15:39 +02:00
{
2018-05-01 15:43:38 +02:00
while (!dataRange.empty && dataRange.front.status != BucketStatus.used)
2018-04-28 18:15:39 +02:00
{
2018-05-01 15:43:38 +02:00
dataRange.popFront();
2018-04-28 18:15:39 +02:00
}
2018-05-01 15:43:38 +02:00
while (!dataRange.empty && dataRange.back.status != BucketStatus.used)
{
dataRange.popBack();
}
this.dataRange = dataRange;
2018-04-28 18:15:39 +02:00
}
@property Range save()
{
return this;
}
@property bool empty() const
{
2018-05-01 15:43:38 +02:00
return this.dataRange.empty();
}
@property void popFront()
in
{
2018-05-11 05:44:46 +02:00
assert(!empty);
2018-05-01 15:43:38 +02:00
assert(this.dataRange.front.status == BucketStatus.used);
}
out
{
2018-05-11 05:44:46 +02:00
assert(empty || this.dataRange.back.status == BucketStatus.used);
2018-05-01 15:43:38 +02:00
}
do
{
do
{
2018-05-11 05:44:46 +02:00
this.dataRange.popFront();
2018-05-01 15:43:38 +02:00
}
2018-05-11 05:44:46 +02:00
while (!empty && dataRange.front.status != BucketStatus.used);
2018-05-01 15:43:38 +02:00
}
@property void popBack()
in
{
2018-05-11 05:44:46 +02:00
assert(!empty);
2018-05-01 15:43:38 +02:00
assert(this.dataRange.back.status == BucketStatus.used);
}
out
{
2018-05-11 05:44:46 +02:00
assert(empty || this.dataRange.back.status == BucketStatus.used);
2018-05-01 15:43:38 +02:00
}
do
{
do
{
2018-05-11 05:44:46 +02:00
this.dataRange.popBack();
2018-05-01 15:43:38 +02:00
}
2018-05-11 05:44:46 +02:00
while (!empty && dataRange.back.status != BucketStatus.used);
2018-04-28 18:15:39 +02:00
}
2018-05-11 05:44:46 +02:00
@property ref inout(KV) front() inout
2018-04-28 18:15:39 +02:00
in
{
2018-05-11 05:44:46 +02:00
assert(!empty);
2018-05-01 15:43:38 +02:00
assert(this.dataRange.front.status == BucketStatus.used);
2018-04-28 18:15:39 +02:00
}
do
{
2018-05-11 05:44:46 +02:00
return this.dataRange.front.kv;
2018-04-28 18:15:39 +02:00
}
2018-05-11 05:44:46 +02:00
@property ref inout(KV) back() inout
2018-04-28 18:15:39 +02:00
in
{
2018-05-11 05:44:46 +02:00
assert(!empty);
2018-05-01 15:43:38 +02:00
assert(this.dataRange.back.status == BucketStatus.used);
2018-04-28 18:15:39 +02:00
}
do
{
2018-05-11 05:44:46 +02:00
return this.dataRange.back.kv;
2018-05-01 15:43:38 +02:00
}
Range opIndex()
{
return typeof(return)(this.dataRange[]);
}
2018-05-11 05:44:46 +02:00
Range!(const T) opIndex() const
2018-05-01 15:43:38 +02:00
{
return typeof(return)(this.dataRange[]);
2018-04-28 18:15:39 +02:00
}
}
/**
2018-05-11 05:44:46 +02:00
* Hash table is a data structure that stores pairs of keys and values without
* any particular order.
*
* This $(D_PSYMBOL HashTable) is implemented using closed hashing. Hash
* collisions are resolved with linear probing.
*
* $(D_PARAM Key) should be hashable with $(D_PARAM hasher). $(D_PARAM hasher)
* is a callable that accepts an argument of type $(D_PARAM Key) and returns a
* hash value for it ($(D_KEYWORD size_t)).
2018-04-28 18:15:39 +02:00
*
* Params:
* Key = Key type.
* Value = Value type.
2018-05-11 05:44:46 +02:00
* hasher = Hash function for $(D_PARAM K).
2018-04-28 18:15:39 +02:00
*/
struct HashTable(Key, Value, alias hasher = hash)
if (is(typeof(hasher(Key.init)) == size_t))
{
2018-05-11 05:44:46 +02:00
private alias HashArray = .HashArray!(hasher, Key, Value);
private alias Buckets = HashArray.Buckets;
2018-04-28 18:15:39 +02:00
2018-05-11 05:44:46 +02:00
private HashArray data;
2018-04-29 09:16:04 +02:00
2018-05-01 15:43:38 +02:00
/// The range types for $(D_PSYMBOL HashTable).
2018-05-11 05:44:46 +02:00
alias Range = .Range!HashArray;
2018-05-01 15:43:38 +02:00
/// ditto
2018-05-11 05:44:46 +02:00
alias ConstRange = .Range!(const HashArray);
2018-05-01 15:43:38 +02:00
2018-04-28 18:15:39 +02:00
/**
2018-05-01 15:43:38 +02:00
* Constructor.
2018-04-28 18:15:39 +02:00
*
* Params:
2018-05-01 15:43:38 +02:00
* n = Minimum number of buckets.
2018-04-28 18:15:39 +02:00
* allocator = Allocator.
*
2018-05-01 15:43:38 +02:00
* Precondition: $(D_INLINECODE allocator !is null).
2018-04-28 18:15:39 +02:00
*/
2018-05-01 15:43:38 +02:00
this(size_t n, shared Allocator allocator = defaultAllocator)
2018-04-28 18:15:39 +02:00
in
{
assert(allocator !is null);
}
do
{
2018-05-01 15:43:38 +02:00
this(allocator);
rehash(n);
2018-04-28 18:15:39 +02:00
}
/// ditto
this(shared Allocator allocator)
in
{
assert(allocator !is null);
}
do
{
2018-05-11 05:44:46 +02:00
this.data = HashArray(Buckets(allocator));
2018-04-28 18:15:39 +02:00
}
/**
2018-05-01 15:43:38 +02:00
* Initializes this $(D_PARAM HashTable) from another one.
2018-04-28 18:15:39 +02:00
*
2018-05-01 15:43:38 +02:00
* If $(D_PARAM init) is passed by reference, it will be copied.
* If $(D_PARAM init) is passed by value, it will be moved.
*
* Params:
* S = Source set type.
* init = Source set.
* allocator = Allocator.
2018-04-28 18:15:39 +02:00
*/
2018-05-01 15:43:38 +02:00
this(S)(ref S init, shared Allocator allocator = defaultAllocator)
if (is(Unqual!S == HashTable))
in
2018-04-28 18:15:39 +02:00
{
2018-05-01 15:43:38 +02:00
assert(allocator !is null);
}
do
{
2018-05-11 05:44:46 +02:00
this.data = HashArray(Buckets(init.data, allocator));
2018-05-01 15:43:38 +02:00
}
/// ditto
this(S)(S init, shared Allocator allocator = defaultAllocator)
if (is(S == HashTable))
in
{
assert(allocator !is null);
}
do
{
2018-05-11 05:44:46 +02:00
this.data = HashArray(Buckets(move(init.data), allocator));
2018-05-01 15:43:38 +02:00
this.lengthIndex = init.lengthIndex;
init.lengthIndex = 0;
2018-04-28 18:15:39 +02:00
}
/**
2018-05-01 15:43:38 +02:00
* Assigns another hash table.
2018-04-28 18:15:39 +02:00
*
2018-05-01 15:43:38 +02:00
* If $(D_PARAM that) is passed by reference, it will be copied.
* If $(D_PARAM that) is passed by value, it will be moved.
*
* Params:
* S = Content type.
* that = The value should be assigned.
*
* Returns: $(D_KEYWORD this).
2018-04-28 18:15:39 +02:00
*/
2018-05-01 15:43:38 +02:00
ref typeof(this) opAssign(S)(ref S that)
if (is(Unqual!S == HashTable))
2018-04-28 18:15:39 +02:00
{
2018-05-01 15:43:38 +02:00
this.data = that.data;
this.data.lengthIndex = that.data.lengthIndex;
return this;
2018-04-28 18:15:39 +02:00
}
2018-05-01 15:43:38 +02:00
/// ditto
ref typeof(this) opAssign(S)(S that) @trusted
if (is(S == HashTable))
2018-04-28 18:15:39 +02:00
{
2018-05-01 15:43:38 +02:00
swap(this.data, that.data);
swap(this.lengthIndex, that.lengthIndex);
return this;
2018-04-28 18:15:39 +02:00
}
/**
* Returns: Used allocator.
*
* Postcondition: $(D_INLINECODE allocator !is null)
*/
@property shared(Allocator) allocator() const
out (allocator)
{
assert(allocator !is null);
}
do
{
2018-04-29 09:16:04 +02:00
return this.data.array.allocator;
2018-04-28 18:15:39 +02:00
}
2018-04-29 09:16:04 +02:00
/**
2018-05-01 15:43:38 +02:00
* Maximum amount of elements this $(D_PSYMBOL HashTable) can hold without
2018-04-29 09:16:04 +02:00
* resizing and rehashing. Note that it doesn't mean that the
* $(D_PSYMBOL Set) will hold $(I exactly) $(D_PSYMBOL capacity) elements.
* $(D_PSYMBOL capacity) tells the size of the container under a best-case
* distribution of elements.
*
2018-05-01 15:43:38 +02:00
* Returns: $(D_PSYMBOL HashTable) capacity.
2018-04-29 09:16:04 +02:00
*/
@property size_t capacity() const
{
return this.data.capacity;
}
2018-05-01 15:43:38 +02:00
///
@nogc nothrow pure @safe unittest
{
HashTable!(string, int) hashTable;
assert(hashTable.capacity == 0);
hashTable["eight"] = 8;
assert(hashTable.capacity == 3);
}
/**
* Returns the number of elements in the container.
*
* Returns: The number of elements in the container.
*/
@property size_t length() const
{
return this.data.length;
}
///
@nogc nothrow pure @safe unittest
{
HashTable!(string, int) hashTable;
assert(hashTable.length == 0);
hashTable["eight"] = 8;
assert(hashTable.length == 1);
}
/**
* Tells whether the container contains any elements.
*
* Returns: Whether the container is empty.
*/
@property bool empty() const
{
return length == 0;
}
///
@nogc nothrow pure @safe unittest
{
HashTable!(string, int) hashTable;
assert(hashTable.empty);
hashTable["five"] = 5;
assert(!hashTable.empty);
}
/**
* Removes all elements.
*/
void clear()
{
this.data.clear();
}
///
@nogc nothrow pure @safe unittest
{
HashTable!(string, int) hashTable;
hashTable["five"] = 5;
assert(!hashTable.empty);
hashTable.clear();
assert(hashTable.empty);
}
2018-04-29 09:16:04 +02:00
/// The maximum number of buckets the container can have.
enum size_t maxBucketCount = primes[$ - 1];
2018-04-28 18:15:39 +02:00
/**
* Inserts a new value at $(D_PARAM key) or reassigns the element if
* $(D_PARAM key) already exists in the hash table.
*
* Params:
* key = The key to insert the value at.
* value = The value to be inserted.
*
* Returns: Just inserted element.
*/
ref Value opIndexAssign(Value value, Key key)
{
2018-04-29 09:16:04 +02:00
auto e = ((ref v) @trusted => &this.data.insert(v))(key);
if (e.status != BucketStatus.used)
2018-04-28 18:15:39 +02:00
{
2018-04-29 09:16:04 +02:00
e.key = key;
2018-04-28 18:15:39 +02:00
}
2018-05-11 05:44:46 +02:00
e.kv.value = value;
return e.kv.value;
2018-04-28 18:15:39 +02:00
}
/**
* Find the element with the key $(D_PARAM key).
*
* Params:
* key = The key to be find.
*
* Returns: The value associated with $(D_PARAM key).
*
* Precondition: Element with $(D_PARAM key) is in this hash table.
*/
ref Value opIndex(Key key)
{
2018-04-29 09:16:04 +02:00
const code = this.data.locateBucket(key);
2018-04-28 18:15:39 +02:00
2018-04-29 09:16:04 +02:00
for (auto range = this.data.array[code .. $]; !range.empty; range.popFront())
2018-04-28 18:15:39 +02:00
{
if (key == range.front.key)
{
2018-05-11 05:44:46 +02:00
return range.front.kv.value;
2018-04-28 18:15:39 +02:00
}
}
assert(false, "Range violation");
}
/**
* Removes the element with the key $(D_PARAM key).
*
* The method returns the number of elements removed. Since
* the hash table contains only unique keys, $(D_PARAM remove) always
* returns `1` if an element with the $(D_PARAM key) was found, `0`
* otherwise.
*
* Params:
* key = The key to be removed.
*
* Returns: Number of the removed elements.
*/
size_t remove(Key key)
{
2018-04-30 12:51:06 +02:00
return this.data.remove(key);
2018-04-28 18:15:39 +02:00
}
/**
* Looks for $(D_PARAM key) in this hash table.
*
* Params:
* key = The key to look for.
*
* Returns: $(D_KEYWORD true) if $(D_PARAM key) exists in the hash table,
* $(D_KEYWORD false) otherwise.
*/
bool opBinaryRight(string op : "in")(Key key)
{
2018-05-11 05:44:46 +02:00
return this.data.canFind(key);
2018-04-28 18:15:39 +02:00
}
2018-04-29 09:16:04 +02:00
/**
* Sets the number of buckets in the container to at least $(D_PARAM n)
* and rearranges all the elements according to their hash values.
*
* If $(D_PARAM n) is greater than the current $(D_PSYMBOL capacity)
* and lower than or equal to $(D_PSYMBOL maxBucketCount), a rehash is
* forced.
*
* If $(D_PARAM n) is greater than $(D_PSYMBOL maxBucketCount),
* $(D_PSYMBOL maxBucketCount) is used instead as a new number of buckets.
*
* If $(D_PARAM n) is equal to the current $(D_PSYMBOL capacity), rehashing
* is forced without resizing the container.
*
* If $(D_PARAM n) is lower than the current $(D_PSYMBOL capacity), the
* function may have no effect.
*
* Rehashing is automatically performed whenever the container needs space
* to insert new elements.
*
* Params:
* n = Minimum number of buckets.
*/
void rehash(size_t n)
{
this.data.rehash(n);
}
2018-05-11 05:44:46 +02:00
/**
* Returns a bidirectional range whose element type is a tuple of a key and
* the respective value.
*
* Returns: A bidirectional range that iterates over the container.
*/
Range opIndex()
{
return typeof(return)(this.data.array[]);
}
/// ditto
ConstRange opIndex() const
{
return typeof(return)(this.data.array[]);
}
2018-04-28 18:15:39 +02:00
}
@nogc nothrow pure @safe unittest
{
auto dinos = HashTable!(string, int)(17);
assert(dinos.empty);
dinos["Euoplocephalus"] = 6;
dinos["Triceratops"] = 7;
dinos["Pachycephalosaurus"] = 6;
dinos["Shantungosaurus"] = 15;
dinos["Ornithominus"] = 4;
dinos["Tyrannosaurus"] = 12;
dinos["Deinonychus"] = 3;
dinos["Iguanodon"] = 9;
dinos["Stegosaurus"] = 6;
dinos["Brachiosaurus"] = 25;
assert(dinos.length == 10);
assert(dinos["Iguanodon"] == 9);
assert(dinos["Ornithominus"] == 4);
assert(dinos["Stegosaurus"] == 6);
assert(dinos["Euoplocephalus"] == 6);
assert(dinos["Deinonychus"] == 3);
assert(dinos["Tyrannosaurus"] == 12);
assert(dinos["Pachycephalosaurus"] == 6);
assert(dinos["Shantungosaurus"] == 15);
assert(dinos["Triceratops"] == 7);
assert(dinos["Brachiosaurus"] == 25);
assert("Shantungosaurus" in dinos);
assert("Ceratopsia" !in dinos);
dinos.clear();
assert(dinos.empty);
}
2018-05-11 05:44:46 +02:00
@nogc nothrow pure @safe unittest
{
import tanya.range.primitive : isForwardRange;
static assert(is(HashTable!(string, int) a));
static assert(is(const HashTable!(string, int)));
static assert(isForwardRange!(HashTable!(string, int).Range));
}