using System; using System.Collections; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Globalization; using System.IO; using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; using System.Windows.Forms; namespace NewsCrawler { public partial class Form1 : Form { delegate void InsertListView(bool bInitial, string strTitle, DateTime time, string strURL, string strRef); System.Timers.Timer m_CrawlTimer = new System.Timers.Timer(); int m_iCrawlInterval = 500; public Form1() { InitializeComponent(); wbView.ScriptErrorsSuppressed = false; ReadKIND(true); ReadDart(true); ReadAsiaE(true); ReadEtoday(true); lvList.ListViewItemSorter = new ListViewItemComparer(0, SortOrder.Ascending); lvList.Sorting = SortOrder.Ascending; lvList.Sort(); if(lvList.Items.Count > 0) { lvList.Items[lvList.Items.Count - 1].Selected = true; lvList.Select(); if(lvList.SelectedItems.Count > 0) lvList.SelectedItems[0].EnsureVisible(); } m_CrawlTimer.Elapsed+=CrawlTimer_Tick; m_CrawlTimer.Interval=m_iCrawlInterval; m_CrawlTimer.Start(); } private void InsertItem(bool bInitial, string strTitle, DateTime time, string strURL, string strRef) { if(strTitle == "") Console.WriteLine("break"); try { if(this.InvokeRequired) { this.Invoke(new InsertListView(InsertItem), bInitial, strTitle, time, strURL, strRef); } else { //lock(lvList) { foreach(ListViewItem item in lvList.Items) { if(item.SubItems[chLink.Index].Text == strURL) return; } lvList.Items.Add(new ListViewItem(new string[] { time.ToString("HH:mm:ss"), strTitle, "", "", strRef, strURL })); if(chAutoSelect.Checked == true) { lvList.Items[lvList.Items.Count - 1].Selected = true; lvList.Select(); if(lvList.SelectedItems.Count > 0) lvList.SelectedItems[0].EnsureVisible(); } if(bInitial == false) { lvList.Sort(); } foreach(ColumnHeader col in lvList.Columns) col.Width = -2; } } } catch(Exception ex) { Console.WriteLine(ex.Message); } } bool ReadAsiaE(bool bInitial = false) { bool bHasNew = false; try { string strServerURL = "http://www.asiae.co.kr"; WebRequest request = WebRequest.Create("http://www.asiae.co.kr/news/sokbo/sokbo_left.htm"); request.Credentials=CredentialCache.DefaultCredentials; request.Timeout=2000; using(HttpWebResponse response = (HttpWebResponse)request.GetResponse()) { using(Stream dataStream = response.GetResponseStream()) { using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("EUC-KR"))) { string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd()); responseFromServer = responseFromServer.Replace("\"\"", "\""); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(responseFromServer); string strXPath = "//div[contains(@class, 'txtform')]/ul/li"; var lists = doc.DocumentNode.SelectNodes(strXPath); foreach(var item in lists) { string strTitle = item.SelectSingleNode(".//a").GetAttributeValue("title", ""); string strTime = item.SelectSingleNode(".//span").InnerText; string strURL = strServerURL+item.SelectSingleNode(".//a").GetAttributeValue("href", ""); if(Regex.IsMatch(strTime, @"\d+/\d+")==true) { //Console.WriteLine("어제 기사 : " + item.InnerHtml); continue; } InsertItem(bInitial, strTitle, DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture), strURL, "아시아경제"); } } } } } catch(Exception e) { Console.WriteLine(e.ToString()); } return bHasNew; } bool ReadEtoday(bool bInitial = false) { bool bHasNew = false; try { //string strServerURL = "http://www.etoday.co.kr"; WebRequest request = WebRequest.Create("http://www.etoday.co.kr/news/flash/flash_list.php?tab=1"); request.Credentials=CredentialCache.DefaultCredentials; request.Timeout=2000; using(HttpWebResponse response = (HttpWebResponse)request.GetResponse()) { using(Stream dataStream = response.GetResponseStream()) { using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8"))) { string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd()); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(responseFromServer); string strXPath = "//div[contains(@class, 'flash_tab_lst')]/ul/li"; var lists = doc.DocumentNode.SelectNodes(strXPath); foreach(var item in lists) { string strTitle = item.SelectSingleNode(".//a").InnerText; string strTime = item.SelectSingleNode(".//span[contains(@class, 'flash_press')]").InnerText; string strURL = item.SelectSingleNode(".//a").GetAttributeValue("href", ""); strURL="http://www.etoday.co.kr/news/section/newsview.php?idxno="+Regex.Replace(strURL, @"\D", ""); if(Regex.IsMatch(strTime, @"\d+/\d+")==true) { //Console.WriteLine("어제 기사 : " + item.InnerHtml); continue; } InsertItem(bInitial, strTitle, DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture), strURL, "이투데이"); } } } } } catch(Exception e) { Console.WriteLine(e.ToString()); } return bHasNew; } bool ReadDart(bool bInitial=false) { bool bHasNew = false; try { string strServerURL = "https://dart.fss.or.kr"; WebRequest request = WebRequest.Create("https://dart.fss.or.kr/dsac001/mainAll.do"); request.Credentials=CredentialCache.DefaultCredentials; request.Timeout=2000; using(HttpWebResponse response = (HttpWebResponse)request.GetResponse()) { using(Stream dataStream = response.GetResponseStream()) { using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8"))) { string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd()); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(responseFromServer); string strXPath = "//div[@id='listContents']/div[contains(@class, 'table_list')]/table/tr"; var lists = doc.DocumentNode.SelectNodes(strXPath); foreach(var item in lists) { var rows = item.SelectNodes(".//td"); if(rows.Count < 3) continue; string strTitle = rows[2].InnerText; strTitle=strTitle.Trim(); string strTime = item.SelectSingleNode(".//td[contains(@class, 'cen_txt')]").InnerText; strTime=strTime.Trim(); string strURL = rows[2].SelectSingleNode(".//a").GetAttributeValue("href", ""); strURL=strURL.Trim(); strURL =strServerURL+strURL; if(Regex.IsMatch(strTime, @"\d+/\d+")==true) { //Console.WriteLine("어제 기사 : " + item.InnerHtml); continue; } InsertItem(bInitial, strTitle, DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture), strURL, "DART"); } } } } } catch(Exception e) { Console.WriteLine(e.ToString()); } return bHasNew; } bool ReadKIND(bool bInitial=false) { bool bHasNew = false; try { //string strServerURL = "https://dart.fss.or.kr"; WebRequest request = WebRequest.Create("http://kind.krx.co.kr/disclosure/rsstodaydistribute.do?method=searchRssTodayDistribute&repIsuSrtCd=&mktTpCd=0&searchCorpName=¤tPageSize=1000"); request.Credentials=CredentialCache.DefaultCredentials; request.Timeout=2000; int iCDATALen = "