|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef LM_BHIKSHA_H |
|
#define LM_BHIKSHA_H |
|
|
|
#include "lm/model_type.hh" |
|
#include "lm/trie.hh" |
|
#include "util/bit_packing.hh" |
|
#include "util/sorted_uniform.hh" |
|
|
|
#include <algorithm> |
|
#include <stdint.h> |
|
#include <cassert> |
|
|
|
namespace lm { |
|
namespace ngram { |
|
struct Config; |
|
class BinaryFormat; |
|
|
|
namespace trie { |
|
|
|
class DontBhiksha { |
|
public: |
|
static const ModelType kModelTypeAdd = static_cast<ModelType>(0); |
|
|
|
static void UpdateConfigFromBinary(const BinaryFormat &, uint64_t, Config &) {} |
|
|
|
static uint64_t Size(uint64_t , uint64_t , const Config &) { return 0; } |
|
|
|
static uint8_t InlineBits(uint64_t , uint64_t max_next, const Config &) { |
|
return util::RequiredBits(max_next); |
|
} |
|
|
|
DontBhiksha(const void *base, uint64_t max_offset, uint64_t max_next, const Config &config); |
|
|
|
void ReadNext(const void *base, uint64_t bit_offset, uint64_t , uint8_t total_bits, NodeRange &out) const { |
|
out.begin = util::ReadInt57(base, bit_offset, next_.bits, next_.mask); |
|
out.end = util::ReadInt57(base, bit_offset + total_bits, next_.bits, next_.mask); |
|
|
|
} |
|
|
|
void WriteNext(void *base, uint64_t bit_offset, uint64_t , uint64_t value) { |
|
util::WriteInt57(base, bit_offset, next_.bits, value); |
|
} |
|
|
|
void FinishedLoading(const Config &) {} |
|
|
|
uint8_t InlineBits() const { return next_.bits; } |
|
|
|
private: |
|
util::BitsMask next_; |
|
}; |
|
|
|
class ArrayBhiksha { |
|
public: |
|
static const ModelType kModelTypeAdd = kArrayAdd; |
|
|
|
static void UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config); |
|
|
|
static uint64_t Size(uint64_t max_offset, uint64_t max_next, const Config &config); |
|
|
|
static uint8_t InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config); |
|
|
|
ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_value, const Config &config); |
|
|
|
void ReadNext(const void *base, uint64_t bit_offset, uint64_t index, uint8_t total_bits, NodeRange &out) const { |
|
|
|
|
|
|
|
|
|
const uint64_t *begin_it = std::upper_bound(offset_begin_, offset_end_, index) - 1; |
|
|
|
|
|
const uint64_t *end_it; |
|
for (end_it = begin_it + 1; (end_it < offset_end_) && (*end_it <= index + 1); ++end_it) {} |
|
|
|
--end_it; |
|
|
|
out.begin = ((begin_it - offset_begin_) << next_inline_.bits) | |
|
util::ReadInt57(base, bit_offset, next_inline_.bits, next_inline_.mask); |
|
out.end = ((end_it - offset_begin_) << next_inline_.bits) | |
|
util::ReadInt57(base, bit_offset + total_bits, next_inline_.bits, next_inline_.mask); |
|
|
|
assert(out.end >= out.begin); |
|
} |
|
|
|
void WriteNext(void *base, uint64_t bit_offset, uint64_t index, uint64_t value) { |
|
uint64_t encode = value >> next_inline_.bits; |
|
for (; write_to_ <= offset_begin_ + encode; ++write_to_) *write_to_ = index; |
|
util::WriteInt57(base, bit_offset, next_inline_.bits, value & next_inline_.mask); |
|
} |
|
|
|
void FinishedLoading(const Config &config); |
|
|
|
uint8_t InlineBits() const { return next_inline_.bits; } |
|
|
|
private: |
|
const util::BitsMask next_inline_; |
|
|
|
const uint64_t *const offset_begin_; |
|
const uint64_t *const offset_end_; |
|
|
|
uint64_t *write_to_; |
|
|
|
void *original_base_; |
|
}; |
|
|
|
} |
|
} |
|
} |
|
|
|
#endif |
|
|