public class ContentExtractor extends Object
| 限定符和类型 | 字段和说明 |
|---|---|
protected org.jsoup.nodes.Document |
doc |
protected HashMap<org.jsoup.nodes.Element,cn.edu.hfut.dmic.contentextractor.ContentExtractor.CountInfo> |
infoMap |
static org.slf4j.Logger |
LOG |
| 限定符和类型 | 方法和说明 |
|---|---|
protected void |
clean() |
protected cn.edu.hfut.dmic.contentextractor.ContentExtractor.CountInfo |
computeInfo(org.jsoup.nodes.Node node) |
protected double |
computeScore(org.jsoup.nodes.Element tag) |
protected double |
computeVar(ArrayList<Integer> data) |
protected int |
editDistance(String word1,
String word2) |
static String |
getContentByDoc(org.jsoup.nodes.Document doc) |
static String |
getContentByHtml(String html) |
static String |
getContentByHtml(String html,
String url) |
static String |
getContentByUrl(String url) |
org.jsoup.nodes.Element |
getContentElement() |
static org.jsoup.nodes.Element |
getContentElementByDoc(org.jsoup.nodes.Document doc) |
static org.jsoup.nodes.Element |
getContentElementByHtml(String html) |
static org.jsoup.nodes.Element |
getContentElementByHtml(String html,
String url) |
static org.jsoup.nodes.Element |
getContentElementByUrl(String url) |
protected String |
getDate(org.jsoup.nodes.Element contentElement) |
News |
getNews() |
static News |
getNewsByDoc(org.jsoup.nodes.Document doc) |
static News |
getNewsByHtml(String html) |
static News |
getNewsByHtml(String html,
String url) |
static News |
getNewsByUrl(String url) |
protected String |
getTime(org.jsoup.nodes.Element contentElement) |
protected String |
getTitle(org.jsoup.nodes.Element contentElement) |
protected String |
getTitleByEditDistance(org.jsoup.nodes.Element contentElement) |
protected int |
lcs(String x,
String y) |
static void |
main(String[] args) |
protected double |
strSim(String a,
String b) |
public static final org.slf4j.Logger LOG
protected org.jsoup.nodes.Document doc
protected HashMap<org.jsoup.nodes.Element,cn.edu.hfut.dmic.contentextractor.ContentExtractor.CountInfo> infoMap
protected void clean()
protected cn.edu.hfut.dmic.contentextractor.ContentExtractor.CountInfo computeInfo(org.jsoup.nodes.Node node)
protected double computeScore(org.jsoup.nodes.Element tag)
protected String getTime(org.jsoup.nodes.Element contentElement) throws Exception
Exceptionprotected String getDate(org.jsoup.nodes.Element contentElement) throws Exception
Exceptionprotected String getTitle(org.jsoup.nodes.Element contentElement) throws Exception
Exceptionprotected String getTitleByEditDistance(org.jsoup.nodes.Element contentElement) throws Exception
Exceptionpublic static org.jsoup.nodes.Element getContentElementByDoc(org.jsoup.nodes.Document doc)
throws Exception
Exceptionpublic static org.jsoup.nodes.Element getContentElementByHtml(String html) throws Exception
Exceptionpublic static org.jsoup.nodes.Element getContentElementByHtml(String html, String url) throws Exception
Exceptionpublic static org.jsoup.nodes.Element getContentElementByUrl(String url) throws Exception
Exceptionpublic static String getContentByDoc(org.jsoup.nodes.Document doc) throws Exception
Exceptionpublic static String getContentByHtml(String html, String url) throws Exception
Exceptionpublic static News getNewsByDoc(org.jsoup.nodes.Document doc) throws Exception
Exceptionpublic static News getNewsByHtml(String html, String url) throws Exception
ExceptionCopyright © 2017. All Rights Reserved.