一。dom4j 解析xml:
俩jar包:dom4j-1.6.1.jar 和jaxen-1.1-beta-6.jar
view plainprint?
/** * java解析xml文件各个节点信息* * @author Jeelon * @param string * :解析的文件名*/ private static void getXmlInfo(String string) { SAXReader reader = new SAXReader();InputStream in = Thread.currentThread()。getContextClassLoader()。getResourceAsStream(string);try { Document doc = reader.read(in);Element driverNameEls = (Element) doc.selectObject("/config/db-info/driver-name");Element urlEls = (Element) doc.selectObject("/config/db-info/url");Element userNameEls = (Element) doc.selectObject("/config/db-info/user-name");Element passwordEls = (Element) doc.selectObject("/config/db-info/password");
String driverName = driverNameEls.getStringValue();String url = urlEls.getStringValue();String userName = userNameEls.getStringValue();String password = passwordEls.getStringValue();
System.out.println("====================================");System.out.println("驱动名:" + driverName);System.out.println("URL地址:" + url);System.out.println("用户名:" + userName);System.out.println("密码:" + password);System.out.println("====================================");} catch (DocumentException e) { e.printStackTrace();}
}
二。java解析HTML需要的jar包:jsoup-1.6.0.jar
view plainprint?
/** * 提取HTML文件的文本内容* * @author Jeelon * @param html * 提取的html文件名* @return 返回提取内容String */ private static String getDocument(File html) { String text = "";try { // 设置编码集org.jsoup.nodes.Document doc = Jsoup.parse(html, "UTF-8");// 提取标题信息Elements title = doc.select("title");for (org.jsoup.nodes.Element link : title) { text += link.text() + " ";} // 提取table中的文本信息Elements links = doc.select("table");for (org.jsoup.nodes.Element link : links) { text += link.text() + " ";} // 提取div中的文本信息Elements divs = doc.select("div[class=post]");for (org.jsoup.nodes.Element link : divs) { text += link.text() + " ";} } catch (IOException e) { e.printStackTrace();}
return text;}
view plainprint?
Element element = null;File f = new File("a.xml");DocumentBuilder db = null; // documentBuilder为抽象不能直接实例化(将XML文件转换为DOM文件)
DocumentBuilderFactory dbf = null;try {
dbf = DocumentBuilderFactory.newInstance(); // 返回documentBuilderFactory对象db = dbf.newDocumentBuilder();// 返回db对象用documentBuilderFatory对象获得返回documentBuildr对象
Document dt = db.parse(f); // 得到一个DOM并返回给document对象element = dt.getDocumentElement();// 得到一个elment根元素
System.out.println("根元素:" + element.getNodeName()); // 获得根节点
NodeList childNodes = element.getChildNodes(); // 获得根元素下的子节点
for (int i = 0; i < childNodes.getLength(); i++) {// 遍历这些子节点Node node1 = childNodes.item(i); // childNodes.item(i);// 获得每个对应位置i的结点if ("Account".equals(node1.getNodeName())) { // 如果节点的名称为"Account",则输出Account元素属性type System.out.println(" 找到一篇账号。 所属区域: " + node1.getAttributes()。getNamedItem("type")。getNodeValue() + ". ");NodeList nodeDetail = node1.getChildNodes(); // 获得<Accounts>下的节点for (int j = 0; j < nodeDetail.getLength(); j++) { // 遍历<Accounts>下的节点Node detail = nodeDetail.item(j); // 获得<Accounts>元素每一个节点if ("code".equals(detail.getNodeName())) // 输出code System.out.println("卡号: " + detail.getTextContent());else if ("pass".equals(detail.getNodeName())) // 输出pass System.out.println("密码: " + detail.getTextContent());else if ("name".equals(detail.getNodeName())) // 输出name System.out.println("姓名: " + detail.getTextContent());else if ("money".equals(detail.getNodeName())) // 输出money System.out.println("余额: " + detail.getTextContent());
}
} } catch (Exception e) { System.out.println(e);}