123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293 |
- package com.sinosoft.am.Splider.test;
- import java.io.IOException;
- import java.sql.Connection;
- import java.sql.DriverManager;
- import java.sql.PreparedStatement;
- import java.sql.ResultSet;
- import java.sql.SQLException;
- import java.sql.Statement;
- import java.util.List;
- import java.util.Map;
- import nl.justobjects.pushlet.util.Sys;
- import org.apache.log4j.Logger;
- import org.jsoup.Jsoup;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- import org.pdfbox.examples.fdf.PrintFields;
- import com.esri.arcgis.system.Array;
- import com.persistence.DBdll.SysOperator;
- import com.persistence.service.PersistenceFactory;
- import com.persistence.service.SysPersistence;
- import com.persistence.service.exception.PersistenceException;
- import com.sinosoft.am.Splider.bean.LinkTypeData;
- import com.sinosoft.am.Splider.core.ExtracNewsInfoService;
- import com.sinosoft.am.Splider.core.ExtractService;
- import com.sinosoft.am.Splider.dao.daoServer;
- import com.sinosoft.am.Splider.ruleider.rule.Rule;
- import com.sinosoft.am.plan.dao.ContingencyPlanHuiZongDao;
- import com.sysmodel.datamodel.xmlmodel.ModelFactory;
- import com.sysmodel.datamodel.xmlmodel.able.SysModel;
- import flex.messaging.io.ArrayList;
- public class Test {
-
- //从EMC_AM_CONFIGURATION表里取URL链接的集合。
-
-
- /**
- * 抓取广东人民政府应急管理办公室的新闻链接,只设置url和关键字与返回类型 http://www.gdemo.gov.cn/gzyw/
- * searchword2 http://news.baidu.com/ns
- * @throws SQLException
- */
- // @org.junit.Test
- public void getDatasByCssQueryUserBaidu() throws SQLException
- {
- Rule rule = new Rule("http://www.gdemo.gov.cn/gzyw/", new String[] {
- "title","searchword2" }, new String[] { "应急预案","国内" },
- null, -1, Rule.GET);
- // List<LinkTypeData> extracts = ExtractService.extract(rule);
- // //抓取到的新闻链接为后面那一部分。需要用前面部分来拼接一下字符串
- // printf(extracts);
- }
- /**
- * 抓取广东人民政府应急管理办公室的新闻链接,只设置url和关键字与返回类型
- * 广东三防返回的数据链接需要拼:http://www.gd3f.gov.cn/ 这个字符串
- *
- * @throws SQLException
- */
- //@org.junit.Test
- public void getDatasByCssQueryUserGuangdsf() throws SQLException {
-
- // http://www.gd3f.gov.cn/xgxw/snxw/index.shtm
- Rule rule = new Rule("http://www.gd3f.gov.cn",
- new String[] { "1111111" }, new String[] { "广东三防" }, null, -1,
- Rule.GET);
- // List<LinkTypeData> extracts = ExtractService.extract(rule);
- // 抓取到的新闻链接为后面那一部分。需要用前面部分来拼接一下字符串
- // printf(extracts);
- }
-
- //@org.junit.Test
- public void getNewsInfo() throws SQLException {
- List<String[]> newsUrl =queryNewsURL();//初始化新闻 链接
- String url = "";
- String title = "";
- if(newsUrl.size()>0){
- for(int i=0; i<newsUrl.size(); i++){
- url = newsUrl.get(i)[0];
- title = newsUrl.get(i)[1];
- // getDatasByCssQueryUserGuangdsf(url, title);
- blog(url,title);
- }
- }
- }
-
-
- public void getDatasByCssQueryUserGuangdsf(String url,String title) throws SQLException{
- Rule rule = new Rule(url,new String[] { "1111111" }, new String[] { "广东三防" }, null, -1,Rule.GET);
- List<LinkTypeData> extracts = ExtracNewsInfoService.extract(rule);
- System.out.println("******DB11111111111*********");
- String content = null;
- for (LinkTypeData data : extracts) {
- content += data.getContent()+"/n";
- }
- System.out.println(content);
-
- System.out.println("******DB11111111111*********");
- System.out.println(title+","+url+","+content);
- conDb1(title,url,content);
- }
-
-
-
- public void printf(List<LinkTypeData> datas) throws SQLException {
- System.out.println(datas);
- for (LinkTypeData data : datas) {
- String title = data.getLinkText();
- String href = "http://www.gd3f.gov.cn" + data.getLinkHref();
- String content = data.getContent();
- //conDb1(title, href,content);
- System.out.println(content);
- }
- }
-
- /**
- * 获取指定博客文章的内容
- * @throws SQLException
- */
- /*@org.junit.Test*/
- public void blog(String url,String title) throws SQLException {
- System.out.println("11111111111111111");
- Document doc;
- String nn="";//取内容
- String pushTime="";//取发布时间
- try {
- doc = Jsoup.connect(url).get();
- Elements ListDiv = doc.getElementsByAttributeValue("class","tf24");
- Elements ListpushTime = doc.getElementsByAttributeValue("class","tf23");
- for (Element element :ListDiv) {
- nn+=element.text();
- //System.out.println(element.html());
- }
- for (Element element :ListpushTime) {
- nn=element.text();
- //System.out.println(element.html());
- }
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
-
- int nnLength = nn.length();
- pushTime = nn.substring(nnLength-36,nnLength-17);
-
- System.out.println(pushTime);
- // conDb1(title,url,nn);
-
- }
-
-
-
- /***
- * 判断返回来的title和herf是纯列表标签还是新闻正文。
- * 如果返回来的是列表标签,继续调用getDatasByCssQueryUserGuangdsf方法,进入详情
- * 页面抓取新闻内容。
- *
- * 抓取新闻内容:从库表里读取链接,获取新闻链接,将链接传入getDatasByCssQueryUserGuangdsf方法。
- * getDatasByCssQueryUserGuangdsf方法需要重写一个带接受参数的方法。
- *
- * 调用ExtractService类。List<LinkTypeData> extracts = ExtractService.extract(rule);
- * 重写一下这个类,新闻内容只抓取带<p>标签的
- *
- * 将返回的<p>标签内容作为变量记录下来存库。
- * @return
- **/
-
- /* public String getNewsInfo(List<LinkTypeData> datas) throws SQLException {
-
- for (LinkTypeData data : datas) {
- data = new LinkTypeData();
-
- String title = data.getLinkText();
- String href = "http://www.gd3f.gov.cn" + data.getLinkHref();
- //conDb(title, href);
- System.out.println(href);
- System.out.println(title);
- System.out.println("***********************************");
- }
- return null;
- }*/
-
- // 到数据库表插入一条数据新闻链接和新闻标题的数据
- public List<String[]> queryNewsURL() throws SQLException {
- Connection con = null;
- Statement stmt = null;
- String FD_OBJECTID="";
- String SOURCE_URL="";
- List<String[]> list = new ArrayList();
- try {
- Class.forName("dm.jdbc.driver.DmDriver");
- String url = "jdbc:dm://192.168.1.19:5236";
- String username = "NWYJ";
- String password = "NWYJ123456";
- con = DriverManager.getConnection(url, username, password);
- System.out.println("");
- /* String sql = "insert into emc_am_news(FD_OBJECEID,NEW_TITLE,NEW_URL,IS_DEL) values(sys_guid(),'"
- + title + "','" + URL + "','0')";*/
-
- String sql = "select FD_OBJECTID,SOURCE_URL,NAME,TITLE,NEWSTIME,UPDATEDATE,IS_DEL FROM EMC_AM_CONFIGURATION WHERE IS_DEL='0'";
- stmt = con.createStatement();
- ResultSet rs=stmt.executeQuery(sql);
-
- while(rs.next()){
- String[]arr=new String[2];
- arr[0]=rs.getString("SOURCE_URL");
- arr[1]=rs.getString("TITLE");
- list.add(arr);
- // FD_OBJECTID+=rs.getString("FD_OBJECTID") + "||";
- // SOURCE_URL+=rs.getString("SOURCE_URL") + "||";
- }
- System.out.println(FD_OBJECTID);
- System.out.println(SOURCE_URL);
- } catch (SQLException se) {
- System.out.println("数据库连接失败!");
- } catch (Exception e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } finally {
- stmt.close();
- con.close();
- }
- return list;
- }
-
-
-
- // 到数据库表插入一条数据新闻内容和新闻标题的数据
- public void conDb1(String title, String URL,String new_content) throws SQLException {
- Connection con = null;
- Statement stmt = null;
- try {
- Class.forName("dm.jdbc.driver.DmDriver");
- String url = "jdbc:dm://192.168.1.19:5236";
- String username = "NWYJ";
- String password = "NWYJ123456";
- con = DriverManager.getConnection(url, username, password);
- String sql = "insert into emc_am_news(FD_OBJECEID,NEW_TITLE,NEW_URL,NEW_CONTENT,IS_DEL) values(sys_guid(),'"+ title + "','" + URL + "','"+new_content+"','0')";
- stmt = con.createStatement();
- stmt.execute(sql);
- } catch (SQLException se) {
- System.out.println("数据库连接失败!");
- } catch (Exception e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } finally {
- stmt.close();
- con.close();
- }
- }
- // 到数据库表插入一条数据新闻链接和新闻标题的数据
- public void conDb(String title, String URL) throws SQLException {
- Connection con = null;
- Statement stmt = null;
- try {
- Class.forName("dm.jdbc.driver.DmDriver");
- String url = "jdbc:dm://192.168.1.19:5236";
- String username = "NWYJ";
- String password = "NWYJ123456";
- con = DriverManager.getConnection(url, username, password);
- /* String sql = "insert into emc_am_news(FD_OBJECEID,NEW_TITLE,NEW_URL,NEW_CONTENT,IS_DEL) values(sys_guid(),'"
- + title + "','" + URL + "',+NEW_CONTENT+'0')";*/
-
- String sql = "insert into EMC_AM_CONFIGURATION(FD_OBJECTID,SOURCE_URL,TITLE,IS_DEL) values(sys_guid(),'"
- + URL + "','" + title + "','0')";
- stmt = con.createStatement();
- stmt.execute(sql);
- } catch (SQLException se) {
- System.out.println("数据库连接失败!");
- } catch (Exception e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } finally {
- stmt.close();
- con.close();
- }
- }
- }
|