ctranslate2/tokenizer/
mod.rs

1#[cfg(feature = "tokenizers")]
2pub mod bpe;
3#[cfg(feature = "tokenizers")]
4pub mod hf;
5#[cfg(feature = "rust_tokenizers")]
6pub mod rust_tokenizers;
7#[cfg(feature = "sentencepiece")]
8pub mod sentencepiece;
9
10pub trait Tokenizer {
11    /// Encodes a given string into a sequence of tokens
12    fn encode(&self, input: &str) -> anyhow::Result<Vec<String>>;
13
14    /// Decodes a given sequence of tokens back into a single string
15    fn decode(&self, tokens: Vec<String>) -> anyhow::Result<String>;
16}