From 82d29e53131df5543dac0d5c463dd6cc3ef5658a Mon Sep 17 00:00:00 2001 From: "Stephen M. Coakley" Date: Fri, 14 Sep 2018 13:06:10 -0500 Subject: [PATCH] Ensure streams support marks during detection Ensure we pass in a rewindable InputStream to Tika so that we can start from the beginning of the stream when we do the actual file parsing. Fixes #20. --- .../java/com/widen/tabitha/RowReaderFactory.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/widen/tabitha/RowReaderFactory.java b/src/main/java/com/widen/tabitha/RowReaderFactory.java index 5ea4d3e..b40da5b 100644 --- a/src/main/java/com/widen/tabitha/RowReaderFactory.java +++ b/src/main/java/com/widen/tabitha/RowReaderFactory.java @@ -1,13 +1,12 @@ package com.widen.tabitha; -import com.widen.tabitha.formats.delimited.DelimitedRowReader; import com.widen.tabitha.formats.delimited.DelimitedFormat; -import com.widen.tabitha.formats.excel.WorkbookRowReader; +import com.widen.tabitha.formats.delimited.DelimitedRowReader; import com.widen.tabitha.formats.excel.XLSRowReader; import com.widen.tabitha.formats.excel.XLSXRowReader; -import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.tika.Tika; +import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -69,6 +68,9 @@ public static Optional open(InputStream inputStream) throws IOExcepti * @return A row reader if the stream is in a supported format. */ public static Optional open(InputStream inputStream, String filename) throws IOException { + // If our input stream supports marks, Tika will rewind the stream back to the start for us after detecting the + // format, so ensure our input stream supports it. + inputStream = createRewindableInputStream(inputStream); String mimeType = tika.detect(inputStream, filename); switch (mimeType) { @@ -90,6 +92,10 @@ public static Optional open(InputStream inputStream, String filename) return Optional.empty(); } + private static InputStream createRewindableInputStream(InputStream inputStream) { + return inputStream.markSupported() ? inputStream : new BufferedInputStream(inputStream); + } + // Apache Tika instance for detecting MIME types. private static final Tika tika = new Tika(); }