1
0
Files

316 lines
8.2 KiB
C++

#include <array>
#include <cstdint>
#include <iostream>
#include <vector>
#include <memory>
#include <string>
#include <algorithm>
#include <bitset>
#include "huffman.hpp"
#include <arpa/inet.h>
huffman_tree::huffman_tree(const std::string& value)
: _value(value), _left(nullptr), _right(nullptr)
{
}
huffman_tree::huffman_tree(std::shared_ptr<huffman_tree> left, std::shared_ptr<huffman_tree> right)
: _value(""), _left(left), _right(right)
{
}
std::uint8_t huffman_tree::value() const
{
return _value.front();
}
std::shared_ptr<huffman_tree> huffman_tree::left() const
{
return _left;
}
std::shared_ptr<huffman_tree> huffman_tree::right() const
{
return _right;
}
bool huffman_tree::is_leaf() const
{
return !(this->left() || this->right());
}
huffman_probability::huffman_probability(const std::string& bytes, double probability)
: bytes(bytes), probability(probability), tree(std::make_shared<huffman_tree>(bytes))
{
}
huffman_probability huffman_probability::operator+(const huffman_probability& that) const
{
huffman_probability result{ this->bytes + that.bytes, this->probability + that.probability };
result.tree = std::make_shared<huffman_tree>(this->tree, that.tree);
return result;
}
bool huffman_probability::operator<(const huffman_probability& that) const
{
return this->probability < that.probability;
}
bool huffman_probability::operator>(const huffman_probability& that) const
{
return this->probability > that.probability;
}
bool huffman_probability::operator<=(const huffman_probability& that) const
{
return this->probability <= that.probability;
}
bool huffman_probability::operator>=(const huffman_probability& that) const
{
return this->probability >= that.probability;
}
std::pair<std::bitset<8>, std::size_t> encode_coding(const coding& input, std::ostream& output,
std::bitset<8> bitset, std::size_t index)
{
for (const bool bit: input)
{
bitset[index] = bit;
if (index == 7)
{
output.put(static_cast<std::ostream::char_type>(bitset.to_ulong()));
index = 0;
bitset = std::bitset<8>();
}
else
{
++index;
}
}
return { bitset, index };
}
void huffman_table::create_table(std::shared_ptr<huffman_tree> tree, coding path)
{
if (tree->is_leaf())
{
insert(tree->value(), std::move(path));
}
else
{
coding new_path;
new_path = path;
new_path.push_back(false);
create_table(tree->left(), std::move(new_path));
new_path = path;
new_path.push_back(true);
create_table(tree->right(), std::move(new_path));
}
}
huffman_table::huffman_table(std::shared_ptr<huffman_tree> tree)
{
create_table(tree, coding());
}
huffman_table::huffman_table(std::vector<std::uint8_t>::const_iterator& coding_stream)
{
std::size_t table_size = *coding_stream;
std::advance(coding_stream, 1);
for (std::uint8_t i = 0; i < table_size; ++i)
{
auto coded_symbol = static_cast<std::byte>(*coding_stream);
std::advance(coding_stream, 1);
std::uint8_t bits_in_coding = *coding_stream;
std::advance(coding_stream, 1);
coding current_coding;
std::size_t current_bit{ 0 };
for (std::uint8_t j = 0; j < bits_in_coding; ++j)
{
std::bitset<8> current_bitset{ *coding_stream };
current_coding.push_back(current_bitset[current_bit]);
if (current_bit == 7)
{
current_bit = 0;
std::advance(coding_stream, 1);
}
else
{
++current_bit;
}
}
if (current_bit != 0)
{
std::advance(coding_stream, 1);
}
this->payload.insert({ coded_symbol, current_coding });
}
}
void huffman_table::insert(std::uint8_t byte, coding&& path)
{
this->payload.insert({ static_cast<std::byte>(byte), std::move(path) });
}
const coding& huffman_table::operator[](std::uint8_t byte) const
{
return this->payload.at(static_cast<std::byte>(byte));
}
std::size_t huffman_table::size() const
{
return this->payload.size();
}
void huffman_table::encode(std::ostream& output) const
{
// The first byte is the table size.
output.put(static_cast<std::ostream::char_type>(size()));
for (const std::pair<std::byte, coding>& entry: this->payload)
{
// For each entry write the byte encoded.
output.put(static_cast<std::ostream::char_type>(entry.first));
output.put(static_cast<std::ostream::char_type>(entry.second.size()));
auto rest = encode_coding(entry.second, output, {}, 0);
if (rest.second > 0)
{
output << static_cast<std::ostream::char_type>(rest.first.to_ulong());
}
}
}
std::unordered_map<std::byte, coding>::iterator huffman_table::begin()
{
return this->payload.begin();
}
std::unordered_map<std::byte, coding>::iterator huffman_table::end()
{
return this->payload.end();
}
std::unordered_map<std::byte, coding>::const_iterator huffman_table::begin() const
{
return this->payload.cbegin();
}
std::unordered_map<std::byte, coding>::const_iterator huffman_table::end() const
{
return this->payload.cend();
}
std::shared_ptr<huffman_tree> create_tree(probability_list& probabilities)
{
while (probabilities.size() > 1)
{
std::sort(std::begin(probabilities), std::end(probabilities), std::greater());
auto last = probabilities.back();
probabilities.pop_back();
probabilities.back() = probabilities.back() + last;
}
return probabilities.front().tree;
}
probability_list adapt_probabilities(const std::vector<std::uint8_t>& input)
{
std::array<std::size_t, 256> counts = {};
probability_list result;
for (auto byte: input)
{
++counts[byte];
}
for (std::array<std::size_t, 256>::const_iterator count = std::cbegin(counts); count != std::cend(counts); ++count)
{
if (*count > 0)
{
result.push_back({
std::string(1, static_cast<char>(std::distance(std::cbegin(counts), count))),
*count / 256.0
});
}
}
return result;
}
void encode(const huffman_table& table, const std::vector<std::uint8_t>& input, std::ostream& output)
{
std::pair<std::bitset<8>, std::size_t> rest{ {}, 0 };
auto input_size = htonl(input.size());
// Write the number of elements.
output.write(reinterpret_cast<const char *>(&input_size), 4);
table.encode(output);
for (const std::uint8_t character: input)
{
rest = encode_coding(table[character], output, rest.first, rest.second);
}
if (rest.second > 0)
{
output << static_cast<std::ostream::char_type>(rest.first.to_ulong());
}
}
void compress(const std::vector<std::uint8_t>& input, std::ostream& output)
{
probability_list probabilities = adapt_probabilities(input);
std::shared_ptr<huffman_tree> tree = create_tree(probabilities);
huffman_table table{ tree };
encode(table, input, output);
}
void decompress(const std::vector<std::uint8_t>& input, std::ostream& output)
{
auto input_size = ntohl(*reinterpret_cast<const std::uint32_t *>(input.data()));
auto table_iterator = std::cbegin(input) + 4;
huffman_table table{ table_iterator };
std::unordered_map<coding, std::byte> reverse_table;
for (auto entry: table)
{
reverse_table.insert({ entry.second, entry.first });
}
std::vector<bool> bit_input;
std::size_t element_count{ 0 };
while (table_iterator != std::cend(input))
{
std::bitset<8> current_bitset{ *table_iterator };
for (auto j = 0; j < current_bitset.size(); ++j)
{
bit_input.push_back(current_bitset[j]);
auto lookup_result = reverse_table.find(bit_input);
if (lookup_result != std::cend(reverse_table))
{
output << static_cast<unsigned char>(lookup_result->second);
bit_input.clear();
if (++element_count == input_size)
{
break;
}
}
}
std::advance(table_iterator, 1);
}
}