Consolidate TokenIndex definition (#84)

kimishpatel · web-flow · commit 4da2344c0403 · 2025-06-16T20:46:13.000-07:00
Summary: Trying to include sentencpiece tokenizer in llm runner result in duplicated definition

Differential Revision: D76631817
diff --git a/include/pytorch/tokenizers/llama2c_tokenizer.h b/include/pytorch/tokenizers/llama2c_tokenizer.h
@@ -12,11 +12,6 @@
 
 namespace tokenizers {
 
-struct TokenIndex {
-  const char* str;
-  int32_t id;
-};
-
 // A simple Byte Pair Encoding (BPE) Tokenizer. Note that the current C++ code
 // won't work with this class, it needs to go through tokenizer.py first.
 class Llama2cTokenizer : public Tokenizer {
diff --git a/include/pytorch/tokenizers/sentencepiece.h b/include/pytorch/tokenizers/sentencepiece.h
@@ -16,11 +16,6 @@
 #include "sentencepiece_processor.h"
 namespace tokenizers {
 
-struct TokenIndex {
-  const char* str;
-  int32_t id;
-};
-
 class SPTokenizer : public Tokenizer {
  public:
   explicit SPTokenizer();
diff --git a/include/pytorch/tokenizers/tokenizer.h b/include/pytorch/tokenizers/tokenizer.h
@@ -20,6 +20,11 @@
 
 namespace tokenizers {
 
+struct TokenIndex {
+  const char* str;
+  int32_t id;
+};
+
 class Tokenizer {
  public:
   explicit Tokenizer() {}