using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Globalization; using System.IO; using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; using System.Windows.Forms; namespace NewsCrawler { public partial class Form1 : Form { class NEWS_ITEM { public string m_strTitle; public DateTime m_Time; public string m_strURL; } Timer m_Timer = new Timer(); List m_ItemList = new List(); public Form1() { InitializeComponent(); m_Timer.Tick+=Timer_Tick; m_Timer.Interval=500; m_Timer.Start(); } bool ReadAsiaE() { bool bHasNew = false; string strServerURL = "http://www.asiae.co.kr"; WebRequest request = WebRequest.Create("http://www.asiae.co.kr/news/sokbo/sokbo_left.htm"); request.Credentials=CredentialCache.DefaultCredentials; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Console.WriteLine(response.StatusDescription); Stream dataStream = response.GetResponseStream(); StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("EUC-KR")); string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd()); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(responseFromServer); string strXPath = "//div[contains(@class, 'txtform')]/ul/li"; var lists = doc.DocumentNode.SelectNodes(strXPath); foreach(var item in lists) { string strTitle = item.SelectSingleNode(".//a").GetAttributeValue("title", ""); string strTime = item.SelectSingleNode(".//span").InnerText; string strURL = strServerURL+item.SelectSingleNode(".//a").GetAttributeValue("href", ""); if(m_ItemList.Any(c => c.m_strURL==strURL)==false) { NEWS_ITEM news = new NEWS_ITEM(); news.m_strTitle=strTitle; news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture); news.m_strURL=strURL; m_ItemList.Add(news); bHasNew=true; } } reader.Close(); dataStream.Close(); response.Close(); return bHasNew; } bool ReadEtoday() { bool bHasNew = false; string strServerURL = "http://www.etoday.co.kr"; WebRequest request = WebRequest.Create("http://www.etoday.co.kr/news/flash/flash_list.php?tab=2"); request.Credentials=CredentialCache.DefaultCredentials; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Console.WriteLine(response.StatusDescription); Stream dataStream = response.GetResponseStream(); StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")); string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd()); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(responseFromServer); string strXPath = "//div[contains(@class, 'flash_tab_lst')]/ul/li"; var lists = doc.DocumentNode.SelectNodes(strXPath); foreach(var item in lists) { string strTitle = item.SelectSingleNode(".//a").InnerText; string strTime = item.SelectSingleNode(".//span[contains(@class, 'flash_press')]").InnerText; string strURL = item.SelectSingleNode(".//a").GetAttributeValue("href", ""); strURL = "http://www.etoday.co.kr/news/section/newsview.php?idxno="+Regex.Replace(strURL, @"\D", ""); if(m_ItemList.Any(c => c.m_strURL==strURL)==false) { NEWS_ITEM news = new NEWS_ITEM(); news.m_strTitle=strTitle; news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture); news.m_strURL=strURL; m_ItemList.Add(news); bHasNew=true; } } reader.Close(); dataStream.Close(); response.Close(); return bHasNew; } private void Timer_Tick(object sender, EventArgs e) { bool bHasNew1 = ReadAsiaE(); bool bHasNew2 = ReadEtoday(); if(bHasNew1 ||bHasNew2) { m_ItemList.Sort((NEWS_ITEM news1, NEWS_ITEM news2) => news1.m_Time.CompareTo(news2.m_Time)); foreach(var item in m_ItemList) { string strResult = string.Format("[{0}] {1} ({2})", item.m_Time, item.m_strTitle, item.m_strURL); Console.WriteLine(strResult); System.IO.File.AppendAllText("output.txt", strResult+"\n", Encoding.GetEncoding("EUC-KR")); } } } } }