68 lines
1.8 KiB
C++
68 lines
1.8 KiB
C++
#include <iostream>
|
|
#include <cstdlib>
|
|
#include <array>
|
|
#include "utf8.h"
|
|
#include "context.hpp"
|
|
#include "utf8/checked.h"
|
|
|
|
static
|
|
context_model process_input()
|
|
{
|
|
std::array<char, 4096> buffer;
|
|
std::ptrdiff_t left_over{ 0 };
|
|
context_model context;
|
|
|
|
while (!std::cin.eof())
|
|
{
|
|
std::cin.read(buffer.data() + left_over, buffer.size() - left_over);
|
|
|
|
std::array<char, 4096>::iterator octet_iterator = std::begin(buffer);
|
|
std::array<char, 4096>::iterator start_iterator = octet_iterator;
|
|
std::array<char, 4096>::iterator end_iterator = octet_iterator + std::cin.gcount() + left_over;
|
|
|
|
try
|
|
{
|
|
while (start_iterator != end_iterator)
|
|
{
|
|
utf8::utfchar32_t character = utf8::next(start_iterator, end_iterator);
|
|
|
|
context.add(character);
|
|
octet_iterator = start_iterator;
|
|
}
|
|
left_over = 0;
|
|
}
|
|
catch (const utf8::not_enough_room&)
|
|
{
|
|
std::copy(octet_iterator, end_iterator, std::begin(buffer));
|
|
left_over = std::distance(octet_iterator, end_iterator);
|
|
}
|
|
}
|
|
return context;
|
|
}
|
|
|
|
/*
|
|
* Reads a UTF-8 file from stdin, analyzes it and prints context information.
|
|
*/
|
|
int main(int argc, char **argv)
|
|
{
|
|
context_model context = process_input();
|
|
|
|
std::size_t i{ 0 };
|
|
for (auto [context_characters, occurrence]: context)
|
|
{
|
|
++i;
|
|
std::cout << i << ". " << context_characters << ": ";
|
|
|
|
for (auto [character, count]: occurrence)
|
|
{
|
|
std::string character_bytes;
|
|
utf8::append(character, character_bytes);
|
|
|
|
std::cout << character_bytes << " - " << count << "; ";
|
|
}
|
|
std::cout << std::endl;
|
|
}
|
|
|
|
return EXIT_SUCCESS;
|
|
}
|