java.io.Closeable
, java.lang.AutoCloseable
public final class WordExtractor extends POIOLE2TextExtractor
document
Constructor | Description |
---|---|
WordExtractor(java.io.InputStream is) |
Create a new Word Extractor
|
WordExtractor(HWPFDocument doc) |
Create a new Word Extractor
|
WordExtractor(DirectoryNode dir) |
|
WordExtractor(DirectoryNode dir,
POIFSFileSystem fs) |
Deprecated.
Use
WordExtractor(DirectoryNode) instead |
WordExtractor(POIFSFileSystem fs) |
Create a new Word Extractor
|
Modifier and Type | Method | Description |
---|---|---|
java.lang.String[] |
getCommentsText() |
|
java.lang.String[] |
getEndnoteText() |
|
java.lang.String |
getFooterText() |
Deprecated.
|
java.lang.String[] |
getFootnoteText() |
|
java.lang.String |
getHeaderText() |
Deprecated.
|
java.lang.String[] |
getMainTextboxText() |
|
java.lang.String[] |
getParagraphText() |
Get the text from the word file, as an array with one String per
paragraph
|
protected static java.lang.String[] |
getParagraphText(Range r) |
|
java.lang.String |
getText() |
Grab the text, based on the WordToTextConverter.
|
java.lang.String |
getTextFromPieces() |
Grab the text out of the text pieces.
|
static void |
main(java.lang.String[] args) |
Command line extractor, so people will stop moaning that they can't just
run this.
|
static java.lang.String |
stripFields(java.lang.String text) |
Removes any fields (eg macros, page markers etc) from the string.
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getDocSummaryInformation, getFileSystem, getMetadataTextExtractor, getRoot, getSummaryInformation
close
public WordExtractor(java.io.InputStream is) throws java.io.IOException
is
- InputStream containing the word filejava.io.IOException
public WordExtractor(POIFSFileSystem fs) throws java.io.IOException
fs
- POIFSFileSystem containing the word filejava.io.IOException
@Deprecated public WordExtractor(DirectoryNode dir, POIFSFileSystem fs) throws java.io.IOException
WordExtractor(DirectoryNode)
insteadjava.io.IOException
public WordExtractor(DirectoryNode dir) throws java.io.IOException
java.io.IOException
public WordExtractor(HWPFDocument doc)
doc
- The HWPFDocument to extract frompublic static void main(java.lang.String[] args) throws java.io.IOException
java.io.IOException
public java.lang.String[] getParagraphText()
public java.lang.String[] getFootnoteText()
public java.lang.String[] getMainTextboxText()
public java.lang.String[] getEndnoteText()
public java.lang.String[] getCommentsText()
protected static java.lang.String[] getParagraphText(Range r)
@Deprecated public java.lang.String getHeaderText()
@Deprecated public java.lang.String getFooterText()
public java.lang.String getTextFromPieces()
public java.lang.String getText()
getText
in class POITextExtractor
public static java.lang.String stripFields(java.lang.String text)
Copyright 2018 The Apache Software Foundation or its licensors, as applicable.