File size: 1,471 Bytes
8652957 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
#ifndef LM_COMMON_PRINT_H
#define LM_COMMON_PRINT_H
#include "lm/word_index.hh"
#include "util/mmap.hh"
#include "util/string_piece.hh"
#include <cassert>
#include <vector>
namespace util { namespace stream { class ChainPositions; }}
// Warning: PrintARPA routines read all unigrams before all bigrams before all
// trigrams etc. So if other parts of the chain move jointly, you'll have to
// buffer.
namespace lm {
class VocabReconstitute {
public:
// fd must be alive for life of this object; does not take ownership.
explicit VocabReconstitute(int fd);
const char *Lookup(WordIndex index) const {
assert(index < map_.size() - 1);
return map_[index];
}
StringPiece LookupPiece(WordIndex index) const {
return StringPiece(map_[index], map_[index + 1] - 1 - map_[index]);
}
std::size_t Size() const {
// There's an extra entry to support StringPiece lengths.
return map_.size() - 1;
}
private:
util::scoped_memory memory_;
std::vector<const char*> map_;
};
class PrintARPA {
public:
// Does not take ownership of vocab_fd or out_fd.
explicit PrintARPA(int vocab_fd, int out_fd, const std::vector<uint64_t> &counts)
: vocab_fd_(vocab_fd), out_fd_(out_fd), counts_(counts) {}
void Run(const util::stream::ChainPositions &positions);
private:
int vocab_fd_;
int out_fd_;
std::vector<uint64_t> counts_;
};
} // namespace lm
#endif // LM_COMMON_PRINT_H
|