d4ba1ecd72384b220103da860a642d2bee108a20.svn-base 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. package com.sinosoft.am.Splider.test;
  2. import java.io.IOException;
  3. import java.sql.Connection;
  4. import java.sql.DriverManager;
  5. import java.sql.PreparedStatement;
  6. import java.sql.ResultSet;
  7. import java.sql.SQLException;
  8. import java.sql.Statement;
  9. import java.util.List;
  10. import java.util.Map;
  11. import nl.justobjects.pushlet.util.Sys;
  12. import org.apache.log4j.Logger;
  13. import org.jsoup.Jsoup;
  14. import org.jsoup.nodes.Document;
  15. import org.jsoup.nodes.Element;
  16. import org.jsoup.select.Elements;
  17. import org.pdfbox.examples.fdf.PrintFields;
  18. import com.esri.arcgis.system.Array;
  19. import com.persistence.DBdll.SysOperator;
  20. import com.persistence.service.PersistenceFactory;
  21. import com.persistence.service.SysPersistence;
  22. import com.persistence.service.exception.PersistenceException;
  23. import com.sinosoft.am.Splider.bean.LinkTypeData;
  24. import com.sinosoft.am.Splider.core.ExtracNewsInfoService;
  25. import com.sinosoft.am.Splider.core.ExtractService;
  26. import com.sinosoft.am.Splider.dao.daoServer;
  27. import com.sinosoft.am.Splider.ruleider.rule.Rule;
  28. import com.sinosoft.am.plan.dao.ContingencyPlanHuiZongDao;
  29. import com.sysmodel.datamodel.xmlmodel.ModelFactory;
  30. import com.sysmodel.datamodel.xmlmodel.able.SysModel;
  31. import flex.messaging.io.ArrayList;
  32. public class Test {
  33. //从EMC_AM_CONFIGURATION表里取URL链接的集合。
  34. /**
  35. * 抓取广东人民政府应急管理办公室的新闻链接,只设置url和关键字与返回类型 http://www.gdemo.gov.cn/gzyw/
  36. * searchword2 http://news.baidu.com/ns
  37. * @throws SQLException
  38. */
  39. // @org.junit.Test
  40. public void getDatasByCssQueryUserBaidu() throws SQLException
  41. {
  42. Rule rule = new Rule("http://www.gdemo.gov.cn/gzyw/", new String[] {
  43. "title","searchword2" }, new String[] { "应急预案","国内" },
  44. null, -1, Rule.GET);
  45. // List<LinkTypeData> extracts = ExtractService.extract(rule);
  46. // //抓取到的新闻链接为后面那一部分。需要用前面部分来拼接一下字符串
  47. // printf(extracts);
  48. }
  49. /**
  50. * 抓取广东人民政府应急管理办公室的新闻链接,只设置url和关键字与返回类型
  51. * 广东三防返回的数据链接需要拼:http://www.gd3f.gov.cn/ 这个字符串
  52. *
  53. * @throws SQLException
  54. */
  55. //@org.junit.Test
  56. public void getDatasByCssQueryUserGuangdsf() throws SQLException {
  57. // http://www.gd3f.gov.cn/xgxw/snxw/index.shtm
  58. Rule rule = new Rule("http://www.gd3f.gov.cn",
  59. new String[] { "1111111" }, new String[] { "广东三防" }, null, -1,
  60. Rule.GET);
  61. // List<LinkTypeData> extracts = ExtractService.extract(rule);
  62. // 抓取到的新闻链接为后面那一部分。需要用前面部分来拼接一下字符串
  63. // printf(extracts);
  64. }
  65. //@org.junit.Test
  66. public void getNewsInfo() throws SQLException {
  67. List<String[]> newsUrl =queryNewsURL();//初始化新闻 链接
  68. String url = "";
  69. String title = "";
  70. if(newsUrl.size()>0){
  71. for(int i=0; i<newsUrl.size(); i++){
  72. url = newsUrl.get(i)[0];
  73. title = newsUrl.get(i)[1];
  74. // getDatasByCssQueryUserGuangdsf(url, title);
  75. blog(url,title);
  76. }
  77. }
  78. }
  79. public void getDatasByCssQueryUserGuangdsf(String url,String title) throws SQLException{
  80. Rule rule = new Rule(url,new String[] { "1111111" }, new String[] { "广东三防" }, null, -1,Rule.GET);
  81. List<LinkTypeData> extracts = ExtracNewsInfoService.extract(rule);
  82. System.out.println("******DB11111111111*********");
  83. String content = null;
  84. for (LinkTypeData data : extracts) {
  85. content += data.getContent()+"/n";
  86. }
  87. System.out.println(content);
  88. System.out.println("******DB11111111111*********");
  89. System.out.println(title+","+url+","+content);
  90. conDb1(title,url,content);
  91. }
  92. public void printf(List<LinkTypeData> datas) throws SQLException {
  93. System.out.println(datas);
  94. for (LinkTypeData data : datas) {
  95. String title = data.getLinkText();
  96. String href = "http://www.gd3f.gov.cn" + data.getLinkHref();
  97. String content = data.getContent();
  98. //conDb1(title, href,content);
  99. System.out.println(content);
  100. }
  101. }
  102. /**
  103. * 获取指定博客文章的内容
  104. * @throws SQLException
  105. */
  106. /*@org.junit.Test*/
  107. public void blog(String url,String title) throws SQLException {
  108. System.out.println("11111111111111111");
  109. Document doc;
  110. String nn="";//取内容
  111. String pushTime="";//取发布时间
  112. try {
  113. doc = Jsoup.connect(url).get();
  114. Elements ListDiv = doc.getElementsByAttributeValue("class","tf24");
  115. Elements ListpushTime = doc.getElementsByAttributeValue("class","tf23");
  116. for (Element element :ListDiv) {
  117. nn+=element.text();
  118. //System.out.println(element.html());
  119. }
  120. for (Element element :ListpushTime) {
  121. nn=element.text();
  122. //System.out.println(element.html());
  123. }
  124. } catch (IOException e) {
  125. // TODO Auto-generated catch block
  126. e.printStackTrace();
  127. }
  128. int nnLength = nn.length();
  129. pushTime = nn.substring(nnLength-36,nnLength-17);
  130. System.out.println(pushTime);
  131. // conDb1(title,url,nn);
  132. }
  133. /***
  134. * 判断返回来的title和herf是纯列表标签还是新闻正文。
  135. * 如果返回来的是列表标签,继续调用getDatasByCssQueryUserGuangdsf方法,进入详情
  136. * 页面抓取新闻内容。
  137. *
  138. * 抓取新闻内容:从库表里读取链接,获取新闻链接,将链接传入getDatasByCssQueryUserGuangdsf方法。
  139. * getDatasByCssQueryUserGuangdsf方法需要重写一个带接受参数的方法。
  140. *
  141. * 调用ExtractService类。List<LinkTypeData> extracts = ExtractService.extract(rule);
  142. * 重写一下这个类,新闻内容只抓取带<p>标签的
  143. *
  144. * 将返回的<p>标签内容作为变量记录下来存库。
  145. * @return
  146. **/
  147. /* public String getNewsInfo(List<LinkTypeData> datas) throws SQLException {
  148. for (LinkTypeData data : datas) {
  149. data = new LinkTypeData();
  150. String title = data.getLinkText();
  151. String href = "http://www.gd3f.gov.cn" + data.getLinkHref();
  152. //conDb(title, href);
  153. System.out.println(href);
  154. System.out.println(title);
  155. System.out.println("***********************************");
  156. }
  157. return null;
  158. }*/
  159. // 到数据库表插入一条数据新闻链接和新闻标题的数据
  160. public List<String[]> queryNewsURL() throws SQLException {
  161. Connection con = null;
  162. Statement stmt = null;
  163. String FD_OBJECTID="";
  164. String SOURCE_URL="";
  165. List<String[]> list = new ArrayList();
  166. try {
  167. Class.forName("dm.jdbc.driver.DmDriver");
  168. String url = "jdbc:dm://192.168.1.19:5236";
  169. String username = "NWYJ";
  170. String password = "NWYJ123456";
  171. con = DriverManager.getConnection(url, username, password);
  172. System.out.println("");
  173. /* String sql = "insert into emc_am_news(FD_OBJECEID,NEW_TITLE,NEW_URL,IS_DEL) values(sys_guid(),'"
  174. + title + "','" + URL + "','0')";*/
  175. String sql = "select FD_OBJECTID,SOURCE_URL,NAME,TITLE,NEWSTIME,UPDATEDATE,IS_DEL FROM EMC_AM_CONFIGURATION WHERE IS_DEL='0'";
  176. stmt = con.createStatement();
  177. ResultSet rs=stmt.executeQuery(sql);
  178. while(rs.next()){
  179. String[]arr=new String[2];
  180. arr[0]=rs.getString("SOURCE_URL");
  181. arr[1]=rs.getString("TITLE");
  182. list.add(arr);
  183. // FD_OBJECTID+=rs.getString("FD_OBJECTID") + "||";
  184. // SOURCE_URL+=rs.getString("SOURCE_URL") + "||";
  185. }
  186. System.out.println(FD_OBJECTID);
  187. System.out.println(SOURCE_URL);
  188. } catch (SQLException se) {
  189. System.out.println("数据库连接失败!");
  190. } catch (Exception e) {
  191. // TODO Auto-generated catch block
  192. e.printStackTrace();
  193. } finally {
  194. stmt.close();
  195. con.close();
  196. }
  197. return list;
  198. }
  199. // 到数据库表插入一条数据新闻内容和新闻标题的数据
  200. public void conDb1(String title, String URL,String new_content) throws SQLException {
  201. Connection con = null;
  202. Statement stmt = null;
  203. try {
  204. Class.forName("dm.jdbc.driver.DmDriver");
  205. String url = "jdbc:dm://192.168.1.19:5236";
  206. String username = "NWYJ";
  207. String password = "NWYJ123456";
  208. con = DriverManager.getConnection(url, username, password);
  209. String sql = "insert into emc_am_news(FD_OBJECEID,NEW_TITLE,NEW_URL,NEW_CONTENT,IS_DEL) values(sys_guid(),'"+ title + "','" + URL + "','"+new_content+"','0')";
  210. stmt = con.createStatement();
  211. stmt.execute(sql);
  212. } catch (SQLException se) {
  213. System.out.println("数据库连接失败!");
  214. } catch (Exception e) {
  215. // TODO Auto-generated catch block
  216. e.printStackTrace();
  217. } finally {
  218. stmt.close();
  219. con.close();
  220. }
  221. }
  222. // 到数据库表插入一条数据新闻链接和新闻标题的数据
  223. public void conDb(String title, String URL) throws SQLException {
  224. Connection con = null;
  225. Statement stmt = null;
  226. try {
  227. Class.forName("dm.jdbc.driver.DmDriver");
  228. String url = "jdbc:dm://192.168.1.19:5236";
  229. String username = "NWYJ";
  230. String password = "NWYJ123456";
  231. con = DriverManager.getConnection(url, username, password);
  232. /* String sql = "insert into emc_am_news(FD_OBJECEID,NEW_TITLE,NEW_URL,NEW_CONTENT,IS_DEL) values(sys_guid(),'"
  233. + title + "','" + URL + "',+NEW_CONTENT+'0')";*/
  234. String sql = "insert into EMC_AM_CONFIGURATION(FD_OBJECTID,SOURCE_URL,TITLE,IS_DEL) values(sys_guid(),'"
  235. + URL + "','" + title + "','0')";
  236. stmt = con.createStatement();
  237. stmt.execute(sql);
  238. } catch (SQLException se) {
  239. System.out.println("数据库连接失败!");
  240. } catch (Exception e) {
  241. // TODO Auto-generated catch block
  242. e.printStackTrace();
  243. } finally {
  244. stmt.close();
  245. con.close();
  246. }
  247. }
  248. }