#include #include #include #include "utf8.h" #include "context.hpp" #include "utf8/checked.h" static context_model process_input() { std::array buffer; std::ptrdiff_t left_over{ 0 }; context_model context; while (!std::cin.eof()) { std::cin.read(buffer.data() + left_over, buffer.size() - left_over); std::array::iterator octet_iterator = std::begin(buffer); std::array::iterator start_iterator = octet_iterator; std::array::iterator end_iterator = octet_iterator + std::cin.gcount() + left_over; try { while (start_iterator != end_iterator) { utf8::utfchar32_t character = utf8::next(start_iterator, end_iterator); context.add(character); octet_iterator = start_iterator; } left_over = 0; } catch (const utf8::not_enough_room&) { std::copy(octet_iterator, end_iterator, std::begin(buffer)); left_over = std::distance(octet_iterator, end_iterator); } } return context; } /* * Reads a UTF-8 file from stdin, analyzes it and prints context information. */ int main(int argc, char **argv) { context_model context = process_input(); std::size_t i{ 0 }; for (auto [context_characters, occurrence]: context) { ++i; std::cout << i << ". " << context_characters << ": "; for (auto [character, count]: occurrence) { std::string character_bytes; utf8::append(character, character_bytes); std::cout << character_bytes << " - " << count << "; "; } std::cout << std::endl; } return EXIT_SUCCESS; }