720 lines
18 KiB
D
720 lines
18 KiB
D
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
/**
|
|
* This module implements a $(D_PSYMBOL Set) container that stores unique
|
|
* values without any particular order.
|
|
*
|
|
* Copyright: Eugene Wissner 2017.
|
|
* License: $(LINK2 https://www.mozilla.org/en-US/MPL/2.0/,
|
|
* Mozilla Public License, v. 2.0).
|
|
* Authors: $(LINK2 mailto:info@caraus.de, Eugene Wissner)
|
|
* Source: $(LINK2 https://github.com/caraus-ecms/tanya/blob/master/source/tanya/container/set.d,
|
|
* tanya/container/set.d)
|
|
*/
|
|
module tanya.container.set;
|
|
|
|
import std.algorithm.mutation;
|
|
import tanya.container;
|
|
import tanya.container.entry;
|
|
import tanya.memory;
|
|
import tanya.meta.trait;
|
|
import tanya.meta.transform;
|
|
|
|
/**
|
|
* Bidirectional range that iterates over the $(D_PSYMBOL Set)'s values.
|
|
*
|
|
* Params:
|
|
* E = Element type.
|
|
*/
|
|
struct Range(E)
|
|
{
|
|
static if (isMutable!E)
|
|
{
|
|
private alias DataRange = Array!(Bucket!(Unqual!E)).Range;
|
|
}
|
|
else
|
|
{
|
|
private alias DataRange = Array!(Bucket!(Unqual!E)).ConstRange;
|
|
}
|
|
private DataRange dataRange;
|
|
|
|
@disable this();
|
|
|
|
private this(DataRange dataRange)
|
|
{
|
|
while (!dataRange.empty && dataRange.front.status != BucketStatus.used)
|
|
{
|
|
dataRange.popFront();
|
|
}
|
|
while (!dataRange.empty && dataRange.back.status != BucketStatus.used)
|
|
{
|
|
dataRange.popBack();
|
|
}
|
|
this.dataRange = dataRange;
|
|
}
|
|
|
|
@property Range save()
|
|
{
|
|
return this;
|
|
}
|
|
|
|
@property bool empty() const
|
|
{
|
|
return this.dataRange.empty();
|
|
}
|
|
|
|
@property void popFront()
|
|
in
|
|
{
|
|
assert(!this.dataRange.empty);
|
|
assert(this.dataRange.front.status == BucketStatus.used);
|
|
}
|
|
out
|
|
{
|
|
assert(this.dataRange.empty
|
|
|| this.dataRange.back.status == BucketStatus.used);
|
|
}
|
|
body
|
|
{
|
|
do
|
|
{
|
|
dataRange.popFront();
|
|
}
|
|
while (!dataRange.empty && dataRange.front.status != BucketStatus.used);
|
|
}
|
|
|
|
@property void popBack()
|
|
in
|
|
{
|
|
assert(!this.dataRange.empty);
|
|
assert(this.dataRange.back.status == BucketStatus.used);
|
|
}
|
|
out
|
|
{
|
|
assert(this.dataRange.empty
|
|
|| this.dataRange.back.status == BucketStatus.used);
|
|
}
|
|
body
|
|
{
|
|
do
|
|
{
|
|
dataRange.popBack();
|
|
}
|
|
while (!dataRange.empty && dataRange.back.status != BucketStatus.used);
|
|
}
|
|
|
|
@property ref inout(E) front() inout
|
|
in
|
|
{
|
|
assert(!this.dataRange.empty);
|
|
assert(this.dataRange.front.status == BucketStatus.used);
|
|
}
|
|
body
|
|
{
|
|
return dataRange.front.content;
|
|
}
|
|
|
|
@property ref inout(E) back() inout
|
|
in
|
|
{
|
|
assert(!this.dataRange.empty);
|
|
assert(this.dataRange.back.status == BucketStatus.used);
|
|
}
|
|
body
|
|
{
|
|
return dataRange.back.content;
|
|
}
|
|
|
|
Range opIndex()
|
|
{
|
|
return typeof(return)(this.dataRange[]);
|
|
}
|
|
|
|
Range!(const E) opIndex() const
|
|
{
|
|
return typeof(return)(this.dataRange[]);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Set is a data structure that stores unique values without any particular
|
|
* order.
|
|
*
|
|
* This $(D_PSYMBOL Set) is implemented using closed hashing. Hash collisions
|
|
* are resolved with linear probing.
|
|
*
|
|
* Currently works only with integral types.
|
|
*
|
|
* Params:
|
|
* T = Element type.
|
|
*/
|
|
struct Set(T)
|
|
if (isIntegral!T || is(Unqual!T == bool))
|
|
{
|
|
/// The range types for $(D_PSYMBOL Set).
|
|
alias Range = .Range!T;
|
|
|
|
/// ditto
|
|
alias ConstRange = .Range!(const T);
|
|
|
|
invariant
|
|
{
|
|
assert(this.lengthIndex < primes.length);
|
|
assert(this.data.length == 0
|
|
|| this.data.length == primes[this.lengthIndex]);
|
|
}
|
|
|
|
/**
|
|
* Constructor.
|
|
*
|
|
* Params:
|
|
* n = Minimum number of buckets.
|
|
* allocator = Allocator.
|
|
*
|
|
* Precondition: $(D_INLINECODE allocator !is null).
|
|
*/
|
|
this(const size_t n, shared Allocator allocator = defaultAllocator)
|
|
in
|
|
{
|
|
assert(allocator !is null);
|
|
}
|
|
body
|
|
{
|
|
this(allocator);
|
|
rehash(n);
|
|
}
|
|
|
|
/// ditto
|
|
this(shared Allocator allocator)
|
|
in
|
|
{
|
|
assert(allocator !is null);
|
|
}
|
|
body
|
|
{
|
|
this.data = typeof(this.data)(allocator);
|
|
}
|
|
|
|
///
|
|
unittest
|
|
{
|
|
{
|
|
auto set = Set!int(defaultAllocator);
|
|
assert(set.capacity == 0);
|
|
}
|
|
{
|
|
auto set = Set!int(8);
|
|
assert(set.capacity == 13);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Initializes this $(D_PARAM Set) from another one.
|
|
*
|
|
* If $(D_PARAM init) is passed by reference, it will be copied.
|
|
* If $(D_PARAM init) is passed by value, it will be moved.
|
|
*
|
|
* Params:
|
|
* S = Source set type.
|
|
* init = Source set.
|
|
* allocator = Allocator.
|
|
*/
|
|
this(S)(ref S init, shared Allocator allocator = defaultAllocator)
|
|
if (is(Unqual!S == Set))
|
|
in
|
|
{
|
|
assert(allocator !is null);
|
|
}
|
|
body
|
|
{
|
|
this.data = typeof(this.data)(init.data, allocator);
|
|
}
|
|
|
|
/// ditto
|
|
this(S)(S init, shared Allocator allocator = defaultAllocator)
|
|
if (is(S == Set))
|
|
in
|
|
{
|
|
assert(allocator !is null);
|
|
}
|
|
body
|
|
{
|
|
this.data = typeof(this.data)(move(init.data), allocator);
|
|
this.lengthIndex = init.lengthIndex;
|
|
init.lengthIndex = 0;
|
|
}
|
|
|
|
/**
|
|
* Assigns another set.
|
|
*
|
|
* If $(D_PARAM that) is passed by reference, it will be copied.
|
|
* If $(D_PARAM that) is passed by value, it will be moved.
|
|
*
|
|
* Params:
|
|
* S = Content type.
|
|
* that = The value should be assigned.
|
|
*
|
|
* Returns: $(D_KEYWORD this).
|
|
*/
|
|
ref typeof(this) opAssign(S)(ref S that)
|
|
if (is(Unqual!S == Set))
|
|
{
|
|
this.data = that.data;
|
|
this.lengthIndex = that.lengthIndex;
|
|
return this;
|
|
}
|
|
|
|
/// ditto
|
|
ref typeof(this) opAssign(S)(S that) @trusted
|
|
if (is(S == Set))
|
|
{
|
|
swap(this.data, that.data);
|
|
swap(this.lengthIndex, that.lengthIndex);
|
|
return this;
|
|
}
|
|
|
|
/**
|
|
* Returns: Used allocator.
|
|
*
|
|
* Postcondition: $(D_INLINECODE allocator !is null)
|
|
*/
|
|
@property shared(Allocator) allocator() const
|
|
out (allocator)
|
|
{
|
|
assert(allocator !is null);
|
|
}
|
|
body
|
|
{
|
|
return cast(shared Allocator) this.data.allocator;
|
|
}
|
|
|
|
/**
|
|
* Maximum amount of elements this $(D_PSYMBOL Set) can hold without
|
|
* resizing and rehashing. Note that it doesn't mean that the
|
|
* $(D_PSYMBOL Set) will hold $(I exactly) $(D_PSYMBOL capacity) elements.
|
|
* $(D_PSYMBOL capacity) tells the size of the container under a best-case
|
|
* distribution of elements.
|
|
*
|
|
* Returns: $(D_PSYMBOL Set) capacity.
|
|
*/
|
|
@property size_t capacity() const
|
|
{
|
|
return this.data.length;
|
|
}
|
|
|
|
///
|
|
unittest
|
|
{
|
|
Set!int set;
|
|
assert(set.capacity == 0);
|
|
|
|
set.insert(8);
|
|
assert(set.capacity == 3);
|
|
}
|
|
|
|
/**
|
|
* Iterates over the $(D_PSYMBOL Set) and counts the elements.
|
|
*
|
|
* Returns: Count of elements within the $(D_PSYMBOL Set).
|
|
*/
|
|
@property size_t length() const
|
|
{
|
|
size_t count;
|
|
foreach (ref e; this.data[])
|
|
{
|
|
if (e.status == BucketStatus.used)
|
|
{
|
|
++count;
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
|
|
///
|
|
unittest
|
|
{
|
|
Set!int set;
|
|
assert(set.length == 0);
|
|
|
|
set.insert(8);
|
|
assert(set.length == 1);
|
|
}
|
|
|
|
private static const size_t[41] primes = [
|
|
3, 7, 13, 23, 29, 37, 53, 71, 97, 131, 163, 193, 239, 293, 389, 521,
|
|
769, 919, 1103, 1327, 1543, 2333, 3079, 4861, 6151, 12289, 24593,
|
|
49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469,
|
|
12582917, 25165843, 139022417, 282312799, 573292817, 1164186217,
|
|
];
|
|
|
|
/// The maximum number of buckets the container can have.
|
|
enum size_t maxBucketCount = primes[$ - 1];
|
|
|
|
static private size_t calculateHash(U)(ref const U value)
|
|
if (is(U == Unqual!T))
|
|
{
|
|
static if (isIntegral!T || isSomeChar!T || is(T == bool))
|
|
{
|
|
return (cast(size_t) value);
|
|
}
|
|
else
|
|
{
|
|
static assert(false);
|
|
}
|
|
}
|
|
|
|
static private size_t locateBucket(ref const DataType buckets,
|
|
const size_t hash)
|
|
in
|
|
{
|
|
assert(buckets.length > 0);
|
|
}
|
|
body
|
|
{
|
|
return hash % buckets.length;
|
|
}
|
|
|
|
/*
|
|
* Returns bucket position for `hash`. `0` may mean the 0th position or an
|
|
* empty `buckets` array.
|
|
*/
|
|
private size_t locateBucket(const size_t hash) const
|
|
{
|
|
return this.data.length == 0 ? 0 : locateBucket(this.data, hash);
|
|
}
|
|
|
|
private enum InsertStatus : byte
|
|
{
|
|
found = -1,
|
|
failed = 0,
|
|
added = 1,
|
|
}
|
|
|
|
/*
|
|
* Inserts the value in an empty or deleted bucket. If the value is
|
|
* already in there, does nothing and returns InsertStatus.found. If the
|
|
* hash array is full returns InsertStatus.failed. Otherwise,
|
|
* InsertStatus.added is returned.
|
|
*/
|
|
private InsertStatus insertInUnusedBucket(ref T value)
|
|
{
|
|
auto bucketPosition = locateBucket(this.data, calculateHash(value));
|
|
|
|
foreach (ref e; this.data[bucketPosition .. $])
|
|
{
|
|
if (e == value) // Already in the set.
|
|
{
|
|
return InsertStatus.found;
|
|
}
|
|
else if (e.status != BucketStatus.used) // Insert the value.
|
|
{
|
|
e.content = value;
|
|
return InsertStatus.added;
|
|
}
|
|
}
|
|
return InsertStatus.failed;
|
|
}
|
|
|
|
/**
|
|
* Inserts a new element.
|
|
*
|
|
* Params:
|
|
* value = Element value.
|
|
*
|
|
* Returns: Amount of new elements inserted.
|
|
*
|
|
* Throws: $(D_PSYMBOL HashContainerFullException) if the insertion failed.
|
|
*/
|
|
size_t insert(T value)
|
|
{
|
|
if (this.data.length == 0)
|
|
{
|
|
this.data = DataType(primes[0], allocator);
|
|
}
|
|
|
|
InsertStatus status = insertInUnusedBucket(value);
|
|
for (; !status; status = insertInUnusedBucket(value))
|
|
{
|
|
if (this.primes.length == (this.lengthIndex + 1))
|
|
{
|
|
throw make!HashContainerFullException(defaultAllocator,
|
|
"Set is full");
|
|
}
|
|
rehashToSize(this.lengthIndex + 1);
|
|
}
|
|
return status == InsertStatus.added;
|
|
}
|
|
|
|
///
|
|
unittest
|
|
{
|
|
Set!int set;
|
|
assert(8 !in set);
|
|
|
|
assert(set.insert(8) == 1);
|
|
assert(set.length == 1);
|
|
assert(8 in set);
|
|
|
|
assert(set.insert(8) == 0);
|
|
assert(set.length == 1);
|
|
assert(8 in set);
|
|
|
|
assert(set.remove(8));
|
|
assert(set.insert(8) == 1);
|
|
}
|
|
|
|
/**
|
|
* Removes an element.
|
|
*
|
|
* Params:
|
|
* value = Element value.
|
|
*
|
|
* Returns: Number of elements removed, which is in the container with
|
|
* unique values `1` if an element existed, and `0` otherwise.
|
|
*/
|
|
size_t remove(T value)
|
|
{
|
|
auto bucketPosition = locateBucket(calculateHash(value));
|
|
foreach (ref e; this.data[bucketPosition .. $])
|
|
{
|
|
if (e == value) // Found.
|
|
{
|
|
e.remove();
|
|
return 1;
|
|
}
|
|
else if (e.status == BucketStatus.empty)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
///
|
|
@nogc unittest
|
|
{
|
|
Set!int set;
|
|
assert(8 !in set);
|
|
|
|
set.insert(8);
|
|
assert(8 in set);
|
|
|
|
assert(set.remove(8) == 1);
|
|
assert(set.remove(8) == 0);
|
|
assert(8 !in set);
|
|
}
|
|
|
|
/**
|
|
* $(D_KEYWORD in) operator.
|
|
*
|
|
* Params:
|
|
* value = Element to be searched for.
|
|
*
|
|
* Returns: $(D_KEYWORD true) if the given element exists in the container,
|
|
* $(D_KEYWORD false) otherwise.
|
|
*/
|
|
bool opBinaryRight(string op : "in")(auto ref const T value) const
|
|
{
|
|
auto bucketPosition = locateBucket(calculateHash(value));
|
|
foreach (ref e; this.data[bucketPosition .. $])
|
|
{
|
|
if (e == value) // Found.
|
|
{
|
|
return true;
|
|
}
|
|
else if (e.status == BucketStatus.empty)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
///
|
|
@nogc unittest
|
|
{
|
|
Set!int set;
|
|
|
|
assert(5 !in set);
|
|
set.insert(5);
|
|
assert(5 in set);
|
|
assert(8 !in set);
|
|
}
|
|
|
|
/**
|
|
* Sets the number of buckets in the container to at least $(D_PARAM n)
|
|
* and rearranges all the elements according to their hash values.
|
|
*
|
|
* If $(D_PARAM n) is greater than the current $(D_PSYMBOL capacity)
|
|
* and lower than or equal to $(D_PSYMBOL maxBucketCount), a rehash is
|
|
* forced.
|
|
*
|
|
* If $(D_PARAM n) is greater than $(D_PSYMBOL maxBucketCount),
|
|
* $(D_PSYMBOL maxBucketCount) is used instead as a new number of buckets.
|
|
*
|
|
* If $(D_PARAM n) is equal to the current $(D_PSYMBOL capacity), rehashing
|
|
* is forced without resizing the container.
|
|
*
|
|
* If $(D_PARAM n) is lower than the current $(D_PSYMBOL capacity), the
|
|
* function may have no effect.
|
|
*
|
|
* Rehashing is automatically performed whenever the container needs space
|
|
* to insert new elements.
|
|
*
|
|
* Params:
|
|
* n = Minimum number of buckets.
|
|
*/
|
|
void rehash(const size_t n)
|
|
{
|
|
size_t lengthIndex;
|
|
for (; lengthIndex < primes.length; ++lengthIndex)
|
|
{
|
|
if (primes[lengthIndex] >= n)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
rehashToSize(lengthIndex);
|
|
}
|
|
|
|
// Takes an index in the primes array.
|
|
private void rehashToSize(const size_t n)
|
|
{
|
|
auto storage = DataType(primes[n], allocator);
|
|
DataLoop: foreach (ref e1; this.data[])
|
|
{
|
|
if (e1.status == BucketStatus.used)
|
|
{
|
|
auto bucketPosition = locateBucket(storage,
|
|
calculateHash(e1.content));
|
|
|
|
foreach (ref e2; storage[bucketPosition .. $])
|
|
{
|
|
if (e2.status != BucketStatus.used) // Insert the value.
|
|
{
|
|
e2.content = e1.content;
|
|
continue DataLoop;
|
|
}
|
|
}
|
|
return; // Rehashing failed.
|
|
}
|
|
}
|
|
move(storage, this.data);
|
|
this.lengthIndex = n;
|
|
}
|
|
|
|
/**
|
|
* Returns: A bidirectional range that iterates over the $(D_PSYMBOL Set)'s
|
|
* elements.
|
|
*/
|
|
Range opIndex()
|
|
{
|
|
return typeof(return)(this.data[]);
|
|
}
|
|
|
|
/// ditto
|
|
ConstRange opIndex() const
|
|
{
|
|
return typeof(return)(this.data[]);
|
|
}
|
|
|
|
///
|
|
@nogc unittest
|
|
{
|
|
Set!int set;
|
|
assert(set[].empty);
|
|
|
|
set.insert(8);
|
|
assert(!set[].empty);
|
|
assert(set[].front == 8);
|
|
assert(set[].back == 8);
|
|
|
|
set.remove(8);
|
|
assert(set[].empty);
|
|
}
|
|
|
|
private @nogc unittest
|
|
{
|
|
const Set!int set;
|
|
assert(set[].empty);
|
|
}
|
|
|
|
private @nogc unittest
|
|
{
|
|
Set!int set;
|
|
set.insert(8);
|
|
|
|
auto r1 = set[];
|
|
auto r2 = r1.save();
|
|
|
|
r1.popFront();
|
|
assert(r1.empty);
|
|
|
|
r2.popBack();
|
|
assert(r2.empty);
|
|
}
|
|
|
|
private alias DataType = Array!(Bucket!T);
|
|
private DataType data;
|
|
private size_t lengthIndex;
|
|
}
|
|
|
|
// Basic insertion logic.
|
|
private @nogc unittest
|
|
{
|
|
Set!int set;
|
|
|
|
assert(set.insert(5) == 1);
|
|
assert(set.data[0].status == BucketStatus.empty);
|
|
assert(set.data[1].status == BucketStatus.empty);
|
|
assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used);
|
|
assert(set.data.length == 3);
|
|
|
|
assert(set.insert(5) == 0);
|
|
assert(set.data[0].status == BucketStatus.empty);
|
|
assert(set.data[1].status == BucketStatus.empty);
|
|
assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used);
|
|
assert(set.data.length == 3);
|
|
|
|
assert(set.insert(9) == 1);
|
|
assert(set.data[0].content == 9 && set.data[0].status == BucketStatus.used);
|
|
assert(set.data[1].status == BucketStatus.empty);
|
|
assert(set.data[2].content == 5 && set.data[2].status == BucketStatus.used);
|
|
assert(set.data.length == 3);
|
|
|
|
assert(set.insert(7) == 1);
|
|
assert(set.insert(8) == 1);
|
|
assert(set.data[0].content == 7);
|
|
assert(set.data[1].content == 8);
|
|
assert(set.data[2].content == 9);
|
|
assert(set.data[3].status == BucketStatus.empty);
|
|
assert(set.data[5].content == 5);
|
|
assert(set.data.length == 7);
|
|
|
|
assert(set.insert(16) == 1);
|
|
assert(set.data[2].content == 9);
|
|
assert(set.data[3].content == 16);
|
|
assert(set.data[4].status == BucketStatus.empty);
|
|
}
|
|
|
|
// Static checks.
|
|
private unittest
|
|
{
|
|
import tanya.range.primitive;
|
|
|
|
static assert(isBidirectionalRange!(Set!int.ConstRange));
|
|
static assert(isBidirectionalRange!(Set!int.Range));
|
|
|
|
static assert(!isInfinite!(Set!int.Range));
|
|
static assert(!hasLength!(Set!int.Range));
|
|
|
|
static assert(is(Set!uint));
|
|
static assert(is(Set!long));
|
|
static assert(is(Set!ulong));
|
|
static assert(is(Set!short));
|
|
static assert(is(Set!ushort));
|
|
static assert(is(Set!bool));
|
|
}
|