68 lines
1.6 KiB
C++
68 lines
1.6 KiB
C++
#include "utf8.h"
|
|
#include "context.hpp"
|
|
|
|
context_occurrence::context_occurrence(utf8::utfchar32_t character)
|
|
{
|
|
this->counts.insert({ character, 1 });
|
|
}
|
|
|
|
void context_occurrence::add(utf8::utfchar32_t character)
|
|
{
|
|
++this->count;
|
|
|
|
auto lookup_result = this->counts.find(character);
|
|
if (lookup_result == this->counts.end())
|
|
{
|
|
this->counts.insert({ character, 1 });
|
|
}
|
|
else
|
|
{
|
|
lookup_result->second = lookup_result->second + 1;
|
|
}
|
|
}
|
|
|
|
std::unordered_map<utf8::utfchar32_t, unsigned int>::iterator context_occurrence::begin()
|
|
{
|
|
return this->counts.begin();
|
|
}
|
|
|
|
std::unordered_map<utf8::utfchar32_t, unsigned int>::iterator context_occurrence::end()
|
|
{
|
|
return this->counts.end();
|
|
}
|
|
|
|
void context_model::add(utf8::utfchar32_t character)
|
|
{
|
|
if (this->current_context.size() >= 2)
|
|
{
|
|
std::string context_key;
|
|
|
|
for (const auto context_character: this->current_context)
|
|
{
|
|
utf8::append(context_character, context_key);
|
|
}
|
|
auto lookup_result = this->occurrences.find(context_key);
|
|
|
|
if (lookup_result == this->occurrences.end())
|
|
{
|
|
this->occurrences.insert({ context_key, context_occurrence(character) });
|
|
}
|
|
else
|
|
{
|
|
lookup_result->second.add(character);
|
|
}
|
|
this->current_context.pop_front();
|
|
}
|
|
this->current_context.push_back(character);
|
|
}
|
|
|
|
std::unordered_map<std::string, context_occurrence>::iterator context_model::begin()
|
|
{
|
|
return this->occurrences.begin();
|
|
}
|
|
|
|
std::unordered_map<std::string, context_occurrence>::iterator context_model::end()
|
|
{
|
|
return this->occurrences.end();
|
|
}
|