public class HtmlUtils extends Object
htmlUtils
.Modifier and Type | Class | Description |
---|---|---|
static class |
HtmlUtils.IndexPair |
Modifier and Type | Method | Description |
---|---|---|
static String |
combineTextWithExceptionInfo(String text,
Exception ex) |
|
static String |
element(String name,
String content) |
|
static String |
element(String name,
Map<String,String> attributes,
String content) |
|
static int |
endOfText(String html) |
|
static String |
extractRawBody(String text) |
|
static Element |
getCurrentLinkElement(HTMLDocument doc,
int pos) |
|
static HtmlUtils |
getInstance() |
|
static int |
getMaximalOriginalPosition(int pI,
ArrayList<HtmlUtils.IndexPair> pListOfIndices) |
|
static int |
getMinimalOriginalPosition(int pI,
ArrayList<HtmlUtils.IndexPair> pListOfIndices) |
|
static String |
getReplaceResult(Pattern pattern,
String text,
String replacement) |
Replaces text in node content without replacing tags.
|
static String |
getURLOfExistingLink(HTMLDocument doc,
int pos) |
Gets the string URL of an existing link, or null if none.
|
static String |
htmlToPlain(String text) |
equivalent to htmlToPlain(text, strictHTMLOnly=true, removeNewLines=true)
|
static String |
htmlToPlain(String text,
boolean strictHTMLOnly) |
equivalent to htmlToPlain(text, strictHTMLOnly, removeNewLines=true)
|
static String |
htmlToPlain(String text,
boolean strictHTMLOnly,
boolean removeNewLines) |
removes html markup and entities, partly and where appropriate by replacing it by plaintext equivalents like
<li> → '*'.
|
static boolean |
isEmpty(String newText) |
|
static boolean |
isHtmlNode(String text) |
|
static String |
join(String... texts) |
Join arbitrary texts to html.
|
static String |
plainToHTML(String text) |
transforms
&, <, >, \n and whitespace by their HTML counterpart and
encloses the whole text in <html><body><p>...</p></body></html> . |
static String |
removeAllTagsFromString(String text) |
|
static String |
removeHtmlTagsFromString(String text) |
Removes all tags (<..>) from a string if it starts with "<html>..." to
make it compareable.
|
static String |
toHtml(String xhtmlText) |
|
static String |
toHTMLEscapedText(String s) |
|
static String |
toXhtml(String htmlText) |
|
static String |
toXMLEscapedText(String text) |
|
static String |
toXMLEscapedTextExpandingWhitespace(String text) |
|
static String |
toXMLUnescapedText(String text) |
|
static String |
unescapeHTMLUnicodeEntity(String text) |
|
static String |
unicodeToHTMLUnicodeEntity(String text) |
public static HtmlUtils getInstance()
public static String htmlToPlain(String text)
htmlToPlain(String, boolean, boolean)
public static String htmlToPlain(String text, boolean strictHTMLOnly)
htmlToPlain(String, boolean, boolean)
public static String htmlToPlain(String text, boolean strictHTMLOnly, boolean removeNewLines)
strictHTMLOnly
- if true does nothing unless the text starts with <html>removeNewLines
- set to false to keep all blank lines.public static boolean isHtmlNode(String text)
public static String plainToHTML(String text)
&, <, >, \n
and whitespace by their HTML counterpart and
encloses the whole text in <html><body><p>...</p></body></html>
.public static String removeHtmlTagsFromString(String text)
public static String toXMLEscapedTextExpandingWhitespace(String text)
public static int getMaximalOriginalPosition(int pI, ArrayList<HtmlUtils.IndexPair> pListOfIndices)
public static int getMinimalOriginalPosition(int pI, ArrayList<HtmlUtils.IndexPair> pListOfIndices)
public static String getReplaceResult(Pattern pattern, String text, String replacement)
public static int endOfText(String html)
public static String combineTextWithExceptionInfo(String text, Exception ex)
public static String getURLOfExistingLink(HTMLDocument doc, int pos)
public static Element getCurrentLinkElement(HTMLDocument doc, int pos)
public static boolean isEmpty(String newText)
public static String join(String... texts)
plainToHTML(String)
, i.e. newlines and other special characters will
be translated to their HTML counterpart and wrapped in a paragraph (<p></p>).
// plain + html -> <html><body><p>text1</p>text2</body></html>
HtmlUtils.join("text1", "", "<html><body>text2</body></html>");
// insert an empty paragraph (<p></p>) between two strings:
HtmlUtils.join("text1", "", "text2");
// this will insert two paragraphs:
HtmlUtils.join("text1", "\n", "text2");
texts
- either html (starting with <HTML> or <html>) or plain text.