Skip to content

Commit 4da2344

Browse files
authored
Consolidate TokenIndex definition (#84)
Summary: Trying to include sentencpiece tokenizer in llm runner result in duplicated definition Differential Revision: D76631817
1 parent 3d67b29 commit 4da2344

File tree

3 files changed

+5
-10
lines changed

3 files changed

+5
-10
lines changed

include/pytorch/tokenizers/llama2c_tokenizer.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,6 @@
1212

1313
namespace tokenizers {
1414

15-
struct TokenIndex {
16-
const char* str;
17-
int32_t id;
18-
};
19-
2015
// A simple Byte Pair Encoding (BPE) Tokenizer. Note that the current C++ code
2116
// won't work with this class, it needs to go through tokenizer.py first.
2217
class Llama2cTokenizer : public Tokenizer {

include/pytorch/tokenizers/sentencepiece.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,6 @@
1616
#include "sentencepiece_processor.h"
1717
namespace tokenizers {
1818

19-
struct TokenIndex {
20-
const char* str;
21-
int32_t id;
22-
};
23-
2419
class SPTokenizer : public Tokenizer {
2520
public:
2621
explicit SPTokenizer();

include/pytorch/tokenizers/tokenizer.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@
2020

2121
namespace tokenizers {
2222

23+
struct TokenIndex {
24+
const char* str;
25+
int32_t id;
26+
};
27+
2328
class Tokenizer {
2429
public:
2530
explicit Tokenizer() {}

0 commit comments

Comments
 (0)