diff --git a/cli/pawls/preprocessors/tesseract.py b/cli/pawls/preprocessors/tesseract.py index 0c48f257..d0bd49b2 100644 --- a/cli/pawls/preprocessors/tesseract.py +++ b/cli/pawls/preprocessors/tesseract.py @@ -40,7 +40,7 @@ def extract_page_tokens( gp["width"].max(), gp["height"].max(), gp["conf"].mean(), - gp["text"].str.cat(sep=" "), + gp["text"].astype(str).str.cat(sep=" "), ] ) )