Skip to content

Commit 6c6c17d

Browse files
committed
TIKA-293: XWPFWordExtractorDecorator does not extract bookmarks
Patch by Maxim Valyanskiy. git-svn-id: https://svn.apache.org/repos/asf/lucene/tika/trunk@820962 13f79535-47bb-0310-9956-ffa450edef68
1 parent 0297f44 commit 6c6c17d

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
2929
import org.apache.tika.sax.XHTMLContentHandler;
3030
import org.apache.xmlbeans.XmlException;
31+
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
3132
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
3233
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
3334
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
@@ -70,9 +71,15 @@ protected void buildXHTML(XHTMLContentHandler xhtml)
7071
new XWPFHeaderFooterPolicy(document, ctSectPr);
7172
extractHeaders(xhtml, headerFooterPolicy);
7273
}
73-
74+
7475
XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
7576
new XWPFHyperlinkDecorator(paragraph, null, true));
77+
78+
CTBookmark[] bookmarks = paragraph.getCTP().getBookmarkStartArray();
79+
for (CTBookmark bookmark : bookmarks) {
80+
xhtml.element("p", bookmark.getName());
81+
}
82+
7683
xhtml.element("p", decorator.getText());
7784

7885
if (ctSectPr != null) {

0 commit comments

Comments
 (0)