316 lines
8.2 KiB
C++
316 lines
8.2 KiB
C++
#include <array>
|
|
#include <cstdint>
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <algorithm>
|
|
#include <bitset>
|
|
#include "huffman.hpp"
|
|
#include <arpa/inet.h>
|
|
|
|
huffman_tree::huffman_tree(const std::string& value)
|
|
: _value(value), _left(nullptr), _right(nullptr)
|
|
{
|
|
}
|
|
|
|
huffman_tree::huffman_tree(std::shared_ptr<huffman_tree> left, std::shared_ptr<huffman_tree> right)
|
|
: _value(""), _left(left), _right(right)
|
|
{
|
|
}
|
|
|
|
std::uint8_t huffman_tree::value() const
|
|
{
|
|
return _value.front();
|
|
}
|
|
|
|
std::shared_ptr<huffman_tree> huffman_tree::left() const
|
|
{
|
|
return _left;
|
|
}
|
|
|
|
std::shared_ptr<huffman_tree> huffman_tree::right() const
|
|
{
|
|
return _right;
|
|
}
|
|
|
|
bool huffman_tree::is_leaf() const
|
|
{
|
|
return !(this->left() || this->right());
|
|
}
|
|
|
|
huffman_probability::huffman_probability(const std::string& bytes, double probability)
|
|
: bytes(bytes), probability(probability), tree(std::make_shared<huffman_tree>(bytes))
|
|
{
|
|
}
|
|
|
|
huffman_probability huffman_probability::operator+(const huffman_probability& that) const
|
|
{
|
|
huffman_probability result{ this->bytes + that.bytes, this->probability + that.probability };
|
|
result.tree = std::make_shared<huffman_tree>(this->tree, that.tree);
|
|
|
|
return result;
|
|
}
|
|
|
|
bool huffman_probability::operator<(const huffman_probability& that) const
|
|
{
|
|
return this->probability < that.probability;
|
|
}
|
|
|
|
bool huffman_probability::operator>(const huffman_probability& that) const
|
|
{
|
|
return this->probability > that.probability;
|
|
}
|
|
|
|
bool huffman_probability::operator<=(const huffman_probability& that) const
|
|
{
|
|
return this->probability <= that.probability;
|
|
}
|
|
|
|
bool huffman_probability::operator>=(const huffman_probability& that) const
|
|
{
|
|
return this->probability >= that.probability;
|
|
}
|
|
|
|
std::pair<std::bitset<8>, std::size_t> encode_coding(const coding& input, std::ostream& output,
|
|
std::bitset<8> bitset, std::size_t index)
|
|
{
|
|
for (const bool bit: input)
|
|
{
|
|
bitset[index] = bit;
|
|
|
|
if (index == 7)
|
|
{
|
|
output.put(static_cast<std::ostream::char_type>(bitset.to_ulong()));
|
|
index = 0;
|
|
bitset = std::bitset<8>();
|
|
}
|
|
else
|
|
{
|
|
++index;
|
|
}
|
|
}
|
|
return { bitset, index };
|
|
}
|
|
|
|
void huffman_table::create_table(std::shared_ptr<huffman_tree> tree, coding path)
|
|
{
|
|
if (tree->is_leaf())
|
|
{
|
|
insert(tree->value(), std::move(path));
|
|
}
|
|
else
|
|
{
|
|
coding new_path;
|
|
|
|
new_path = path;
|
|
new_path.push_back(false);
|
|
create_table(tree->left(), std::move(new_path));
|
|
|
|
new_path = path;
|
|
new_path.push_back(true);
|
|
create_table(tree->right(), std::move(new_path));
|
|
}
|
|
}
|
|
|
|
huffman_table::huffman_table(std::shared_ptr<huffman_tree> tree)
|
|
{
|
|
create_table(tree, coding());
|
|
}
|
|
|
|
huffman_table::huffman_table(std::vector<std::uint8_t>::const_iterator& coding_stream)
|
|
{
|
|
std::size_t table_size = *coding_stream;
|
|
|
|
std::advance(coding_stream, 1);
|
|
|
|
for (std::uint8_t i = 0; i < table_size; ++i)
|
|
{
|
|
auto coded_symbol = static_cast<std::byte>(*coding_stream);
|
|
|
|
std::advance(coding_stream, 1);
|
|
std::uint8_t bits_in_coding = *coding_stream;
|
|
|
|
std::advance(coding_stream, 1);
|
|
coding current_coding;
|
|
std::size_t current_bit{ 0 };
|
|
|
|
for (std::uint8_t j = 0; j < bits_in_coding; ++j)
|
|
{
|
|
std::bitset<8> current_bitset{ *coding_stream };
|
|
|
|
current_coding.push_back(current_bitset[current_bit]);
|
|
|
|
if (current_bit == 7)
|
|
{
|
|
current_bit = 0;
|
|
std::advance(coding_stream, 1);
|
|
}
|
|
else
|
|
{
|
|
++current_bit;
|
|
}
|
|
}
|
|
if (current_bit != 0)
|
|
{
|
|
std::advance(coding_stream, 1);
|
|
}
|
|
this->payload.insert({ coded_symbol, current_coding });
|
|
}
|
|
}
|
|
|
|
void huffman_table::insert(std::uint8_t byte, coding&& path)
|
|
{
|
|
this->payload.insert({ static_cast<std::byte>(byte), std::move(path) });
|
|
}
|
|
|
|
const coding& huffman_table::operator[](std::uint8_t byte) const
|
|
{
|
|
return this->payload.at(static_cast<std::byte>(byte));
|
|
}
|
|
|
|
std::size_t huffman_table::size() const
|
|
{
|
|
return this->payload.size();
|
|
}
|
|
|
|
void huffman_table::encode(std::ostream& output) const
|
|
{
|
|
// The first byte is the table size.
|
|
output.put(static_cast<std::ostream::char_type>(size()));
|
|
|
|
for (const std::pair<std::byte, coding>& entry: this->payload)
|
|
{
|
|
// For each entry write the byte encoded.
|
|
output.put(static_cast<std::ostream::char_type>(entry.first));
|
|
output.put(static_cast<std::ostream::char_type>(entry.second.size()));
|
|
|
|
auto rest = encode_coding(entry.second, output, {}, 0);
|
|
if (rest.second > 0)
|
|
{
|
|
output << static_cast<std::ostream::char_type>(rest.first.to_ulong());
|
|
}
|
|
}
|
|
}
|
|
|
|
std::unordered_map<std::byte, coding>::iterator huffman_table::begin()
|
|
{
|
|
return this->payload.begin();
|
|
}
|
|
|
|
std::unordered_map<std::byte, coding>::iterator huffman_table::end()
|
|
{
|
|
return this->payload.end();
|
|
}
|
|
|
|
std::unordered_map<std::byte, coding>::const_iterator huffman_table::begin() const
|
|
{
|
|
return this->payload.cbegin();
|
|
}
|
|
|
|
std::unordered_map<std::byte, coding>::const_iterator huffman_table::end() const
|
|
{
|
|
return this->payload.cend();
|
|
}
|
|
|
|
std::shared_ptr<huffman_tree> create_tree(probability_list& probabilities)
|
|
{
|
|
while (probabilities.size() > 1)
|
|
{
|
|
std::sort(std::begin(probabilities), std::end(probabilities), std::greater());
|
|
auto last = probabilities.back();
|
|
|
|
probabilities.pop_back();
|
|
probabilities.back() = probabilities.back() + last;
|
|
}
|
|
return probabilities.front().tree;
|
|
}
|
|
|
|
probability_list adapt_probabilities(const std::vector<std::uint8_t>& input)
|
|
{
|
|
std::array<std::size_t, 256> counts = {};
|
|
probability_list result;
|
|
|
|
for (auto byte: input)
|
|
{
|
|
++counts[byte];
|
|
}
|
|
for (std::array<std::size_t, 256>::const_iterator count = std::cbegin(counts); count != std::cend(counts); ++count)
|
|
{
|
|
if (*count > 0)
|
|
{
|
|
result.push_back({
|
|
std::string(1, static_cast<char>(std::distance(std::cbegin(counts), count))),
|
|
*count / 256.0
|
|
});
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
void encode(const huffman_table& table, const std::vector<std::uint8_t>& input, std::ostream& output)
|
|
{
|
|
std::pair<std::bitset<8>, std::size_t> rest{ {}, 0 };
|
|
auto input_size = htonl(input.size());
|
|
|
|
// Write the number of elements.
|
|
output.write(reinterpret_cast<const char *>(&input_size), 4);
|
|
|
|
table.encode(output);
|
|
for (const std::uint8_t character: input)
|
|
{
|
|
rest = encode_coding(table[character], output, rest.first, rest.second);
|
|
}
|
|
if (rest.second > 0)
|
|
{
|
|
output << static_cast<std::ostream::char_type>(rest.first.to_ulong());
|
|
}
|
|
}
|
|
|
|
void compress(const std::vector<std::uint8_t>& input, std::ostream& output)
|
|
{
|
|
probability_list probabilities = adapt_probabilities(input);
|
|
std::shared_ptr<huffman_tree> tree = create_tree(probabilities);
|
|
huffman_table table{ tree };
|
|
|
|
encode(table, input, output);
|
|
}
|
|
|
|
void decompress(const std::vector<std::uint8_t>& input, std::ostream& output)
|
|
{
|
|
auto input_size = ntohl(*reinterpret_cast<const std::uint32_t *>(input.data()));
|
|
auto table_iterator = std::cbegin(input) + 4;
|
|
huffman_table table{ table_iterator };
|
|
std::unordered_map<coding, std::byte> reverse_table;
|
|
|
|
for (auto entry: table)
|
|
{
|
|
reverse_table.insert({ entry.second, entry.first });
|
|
}
|
|
std::vector<bool> bit_input;
|
|
std::size_t element_count{ 0 };
|
|
|
|
while (table_iterator != std::cend(input))
|
|
{
|
|
std::bitset<8> current_bitset{ *table_iterator };
|
|
|
|
for (auto j = 0; j < current_bitset.size(); ++j)
|
|
{
|
|
bit_input.push_back(current_bitset[j]);
|
|
auto lookup_result = reverse_table.find(bit_input);
|
|
|
|
if (lookup_result != std::cend(reverse_table))
|
|
{
|
|
output << static_cast<unsigned char>(lookup_result->second);
|
|
bit_input.clear();
|
|
if (++element_count == input_size)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
std::advance(table_iterator, 1);
|
|
}
|
|
}
|