#include #include #include #include #include #include #include #include #include "huffman.hpp" #include huffman_tree::huffman_tree(const std::string& value) : _value(value), _left(nullptr), _right(nullptr) { } huffman_tree::huffman_tree(std::shared_ptr left, std::shared_ptr right) : _value(""), _left(left), _right(right) { } std::uint8_t huffman_tree::value() const { return _value.front(); } std::shared_ptr huffman_tree::left() const { return _left; } std::shared_ptr huffman_tree::right() const { return _right; } bool huffman_tree::is_leaf() const { return !(this->left() || this->right()); } huffman_probability::huffman_probability(const std::string& bytes, double probability) : bytes(bytes), probability(probability), tree(std::make_shared(bytes)) { } huffman_probability huffman_probability::operator+(const huffman_probability& that) const { huffman_probability result{ this->bytes + that.bytes, this->probability + that.probability }; result.tree = std::make_shared(this->tree, that.tree); return result; } bool huffman_probability::operator<(const huffman_probability& that) const { return this->probability < that.probability; } bool huffman_probability::operator>(const huffman_probability& that) const { return this->probability > that.probability; } bool huffman_probability::operator<=(const huffman_probability& that) const { return this->probability <= that.probability; } bool huffman_probability::operator>=(const huffman_probability& that) const { return this->probability >= that.probability; } std::pair, std::size_t> encode_coding(const coding& input, std::ostream& output, std::bitset<8> bitset, std::size_t index) { for (const bool bit: input) { bitset[index] = bit; if (index == 7) { output.put(static_cast(bitset.to_ulong())); index = 0; bitset = std::bitset<8>(); } else { ++index; } } return { bitset, index }; } void huffman_table::create_table(std::shared_ptr tree, coding path) { if (tree->is_leaf()) { insert(tree->value(), std::move(path)); } else { coding new_path; new_path = path; new_path.push_back(false); create_table(tree->left(), std::move(new_path)); new_path = path; new_path.push_back(true); create_table(tree->right(), std::move(new_path)); } } huffman_table::huffman_table(std::shared_ptr tree) { create_table(tree, coding()); } huffman_table::huffman_table(std::vector::const_iterator& coding_stream) { std::size_t table_size = *coding_stream; std::advance(coding_stream, 1); for (std::uint8_t i = 0; i < table_size; ++i) { auto coded_symbol = static_cast(*coding_stream); std::advance(coding_stream, 1); std::uint8_t bits_in_coding = *coding_stream; std::advance(coding_stream, 1); coding current_coding; std::size_t current_bit{ 0 }; for (std::uint8_t j = 0; j < bits_in_coding; ++j) { std::bitset<8> current_bitset{ *coding_stream }; current_coding.push_back(current_bitset[current_bit]); if (current_bit == 7) { current_bit = 0; std::advance(coding_stream, 1); } else { ++current_bit; } } if (current_bit != 0) { std::advance(coding_stream, 1); } this->payload.insert({ coded_symbol, current_coding }); } } void huffman_table::insert(std::uint8_t byte, coding&& path) { this->payload.insert({ static_cast(byte), std::move(path) }); } const coding& huffman_table::operator[](std::uint8_t byte) const { return this->payload.at(static_cast(byte)); } std::size_t huffman_table::size() const { return this->payload.size(); } void huffman_table::encode(std::ostream& output) const { // The first byte is the table size. output.put(static_cast(size())); for (const std::pair& entry: this->payload) { // For each entry write the byte encoded. output.put(static_cast(entry.first)); output.put(static_cast(entry.second.size())); auto rest = encode_coding(entry.second, output, {}, 0); if (rest.second > 0) { output << static_cast(rest.first.to_ulong()); } } } std::unordered_map::iterator huffman_table::begin() { return this->payload.begin(); } std::unordered_map::iterator huffman_table::end() { return this->payload.end(); } std::unordered_map::const_iterator huffman_table::begin() const { return this->payload.cbegin(); } std::unordered_map::const_iterator huffman_table::end() const { return this->payload.cend(); } std::shared_ptr create_tree(probability_list& probabilities) { while (probabilities.size() > 1) { std::sort(std::begin(probabilities), std::end(probabilities), std::greater()); auto last = probabilities.back(); probabilities.pop_back(); probabilities.back() = probabilities.back() + last; } return probabilities.front().tree; } probability_list adapt_probabilities(const std::vector& input) { std::array counts = {}; probability_list result; for (auto byte: input) { ++counts[byte]; } for (std::array::const_iterator count = std::cbegin(counts); count != std::cend(counts); ++count) { if (*count > 0) { result.push_back({ std::string(1, static_cast(std::distance(std::cbegin(counts), count))), *count / 256.0 }); } } return result; } void encode(const huffman_table& table, const std::vector& input, std::ostream& output) { std::pair, std::size_t> rest{ {}, 0 }; auto input_size = htonl(input.size()); // Write the number of elements. output.write(reinterpret_cast(&input_size), 4); table.encode(output); for (const std::uint8_t character: input) { rest = encode_coding(table[character], output, rest.first, rest.second); } if (rest.second > 0) { output << static_cast(rest.first.to_ulong()); } } void compress(const std::vector& input, std::ostream& output) { probability_list probabilities = adapt_probabilities(input); std::shared_ptr tree = create_tree(probabilities); huffman_table table{ tree }; encode(table, input, output); } void decompress(const std::vector& input, std::ostream& output) { auto input_size = ntohl(*reinterpret_cast(input.data())); auto table_iterator = std::cbegin(input) + 4; huffman_table table{ table_iterator }; std::unordered_map reverse_table; for (auto entry: table) { reverse_table.insert({ entry.second, entry.first }); } std::vector bit_input; std::size_t element_count{ 0 }; while (table_iterator != std::cend(input)) { std::bitset<8> current_bitset{ *table_iterator }; for (auto j = 0; j < current_bitset.size(); ++j) { bit_input.push_back(current_bitset[j]); auto lookup_result = reverse_table.find(bit_input); if (lookup_result != std::cend(reverse_table)) { output << static_cast(lookup_result->second); bit_input.clear(); if (++element_count == input_size) { break; } } } std::advance(table_iterator, 1); } }