41
41
import org .apache .tika .batch .fs .FSUtil ;
42
42
import org .apache .tika .batch .fs .RecursiveParserWrapperFSConsumer ;
43
43
import org .apache .tika .config .TikaConfig ;
44
+ import org .apache .tika .sax .BasicContentHandlerFactory ;
44
45
import org .apache .tika .sax .ContentHandlerFactory ;
45
46
import org .apache .tika .util .ClassLoaderUtil ;
46
47
import org .apache .tika .util .PropsUtil ;
@@ -125,7 +126,9 @@ public ConsumersManager build(Node node, Map<String, String> runtimeAttributes,
125
126
}
126
127
ContentHandlerFactory contentHandlerFactory = getContentHandlerFactory (contentHandlerFactoryNode , runtimeAttributes );
127
128
ParserFactory parserFactory = getParserFactory (parserFactoryNode , runtimeAttributes );
128
- OutputStreamFactory outputStreamFactory = getOutputStreamFactory (outputStreamFactoryNode , runtimeAttributes );
129
+ OutputStreamFactory outputStreamFactory = getOutputStreamFactory (
130
+ outputStreamFactoryNode , runtimeAttributes ,
131
+ contentHandlerFactory , recursiveParserWrapper );
129
132
130
133
if (recursiveParserWrapper ) {
131
134
for (int i = 0 ; i < numConsumers ; i ++) {
@@ -147,7 +150,6 @@ public ConsumersManager build(Node node, Map<String, String> runtimeAttributes,
147
150
return manager ;
148
151
}
149
152
150
-
151
153
private ContentHandlerFactory getContentHandlerFactory (Node node , Map <String , String > runtimeAttributes ) {
152
154
153
155
Map <String , String > localAttrs = XMLDOMUtil .mapifyAttrs (node , runtimeAttributes );
@@ -166,7 +168,10 @@ private ParserFactory getParserFactory(Node node, Map<String, String> runtimeAtt
166
168
return builder .build (node , runtimeAttributes );
167
169
}
168
170
169
- private OutputStreamFactory getOutputStreamFactory (Node node , Map <String , String > runtimeAttributes ) {
171
+ private OutputStreamFactory getOutputStreamFactory (Node node ,
172
+ Map <String , String > runtimeAttributes ,
173
+ ContentHandlerFactory contentHandlerFactory ,
174
+ boolean useRecursiveParserWrapper ) {
170
175
Map <String , String > attrs = XMLDOMUtil .mapifyAttrs (node , runtimeAttributes );
171
176
172
177
Path outputDir = PropsUtil .getPath (attrs .get ("outputDir" ), null );
@@ -196,6 +201,17 @@ private OutputStreamFactory getOutputStreamFactory(Node node, Map<String, String
196
201
compression = FSOutputStreamFactory .COMPRESSION .ZIP ;
197
202
}
198
203
String suffix = attrs .get ("outputSuffix" );
204
+ //suffix should not start with "."
205
+ if (suffix == null ) {
206
+ StringBuilder sb = new StringBuilder ();
207
+ if (useRecursiveParserWrapper ) {
208
+ sb .append ("json" );
209
+ } else if (contentHandlerFactory instanceof BasicContentHandlerFactory ) {
210
+ appendSuffix (((BasicContentHandlerFactory ) contentHandlerFactory ).getType (), sb );
211
+ }
212
+ appendCompression (compression , sb );
213
+ suffix = sb .toString ();
214
+ }
199
215
200
216
//TODO: possibly open up the different handle-existings in the future
201
217
//but for now, lock it down to require skip. Too dangerous otherwise
@@ -204,4 +220,33 @@ private OutputStreamFactory getOutputStreamFactory(Node node, Map<String, String
204
220
compression , suffix );
205
221
}
206
222
223
+ private void appendCompression (FSOutputStreamFactory .COMPRESSION compression , StringBuilder sb ) {
224
+ switch (compression ) {
225
+ case NONE :
226
+ break ;
227
+ case ZIP :
228
+ sb .append (".zip" );
229
+ break ;
230
+ case BZIP2 :
231
+ sb .append (".bz2" );
232
+ break ;
233
+ case GZIP :
234
+ sb .append (".gz" );
235
+ break ;
236
+ }
237
+ }
238
+
239
+ private void appendSuffix (BasicContentHandlerFactory .HANDLER_TYPE type , StringBuilder sb ) {
240
+ switch (type ) {
241
+ case XML :
242
+ sb .append ("xml" );
243
+ break ;
244
+ case HTML :
245
+ sb .append ("html" );
246
+ break ;
247
+ default :
248
+ sb .append ("txt" );
249
+ }
250
+ }
251
+
207
252
}
0 commit comments