From 08e6ff57cdb350e7c29af787e7f4644f0403f8da Mon Sep 17 00:00:00 2001 From: mjjo Date: Tue, 2 Aug 2016 11:56:12 +0900 Subject: [PATCH] =?UTF-8?q?-=20=EC=A0=84=EC=9E=90=EA=B3=B5=EC=8B=9C=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80=20-=20=ED=83=80=EC=9E=84=EC=95=84=EC=9B=83?= =?UTF-8?q?=20=EC=A0=81=EC=9A=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Form1.cs | 241 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 162 insertions(+), 79 deletions(-) diff --git a/Form1.cs b/Form1.cs index bab798b..d0589de 100644 --- a/Form1.cs +++ b/Form1.cs @@ -49,54 +49,61 @@ namespace NewsCrawler { bool bHasNew = false; - string strServerURL = "http://www.asiae.co.kr"; - WebRequest request = WebRequest.Create("http://www.asiae.co.kr/news/sokbo/sokbo_left.htm"); - //request.Method = "POST"; - //request. - request.Credentials=CredentialCache.DefaultCredentials; - - HttpWebResponse response = (HttpWebResponse)request.GetResponse(); - Console.WriteLine(response.StatusDescription); - - Stream dataStream = response.GetResponseStream(); - StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("EUC-KR")); - string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd()); - - HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); - doc.LoadHtml(responseFromServer); - string strXPath = "//div[contains(@class, 'txtform')]/ul/li"; - var lists = doc.DocumentNode.SelectNodes(strXPath); - foreach(var item in lists) + try { - string strTitle = item.SelectSingleNode(".//a").GetAttributeValue("title", ""); - string strTime = item.SelectSingleNode(".//span").InnerText; - string strURL = strServerURL+item.SelectSingleNode(".//a").GetAttributeValue("href", ""); + string strServerURL = "http://www.asiae.co.kr"; + WebRequest request = WebRequest.Create("http://www.asiae.co.kr/news/sokbo/sokbo_left.htm"); + request.Credentials=CredentialCache.DefaultCredentials; + request.Timeout=2000; - - if (Regex.IsMatch(strTime, @"\d+/\d+") == true) - { - //Console.WriteLine("어제 기사 : " + item.InnerHtml); - continue; - } - - - if (m_ItemList.Any(c => c.m_strURL==strURL)==false) + using(HttpWebResponse response = (HttpWebResponse)request.GetResponse()) { - NEWS_ITEM news = new NEWS_ITEM(); - news.m_strTitle=strTitle; - news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture); - news.m_strURL=strURL; - news.m_strReference = "아시아경제"; + using(Stream dataStream = response.GetResponseStream()) + { + using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("EUC-KR"))) + { + string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd()); - m_ItemList.Add(news); + HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); + doc.LoadHtml(responseFromServer); + string strXPath = "//div[contains(@class, 'txtform')]/ul/li"; + var lists = doc.DocumentNode.SelectNodes(strXPath); + foreach(var item in lists) + { + string strTitle = item.SelectSingleNode(".//a").GetAttributeValue("title", ""); + string strTime = item.SelectSingleNode(".//span").InnerText; + string strURL = strServerURL+item.SelectSingleNode(".//a").GetAttributeValue("href", ""); - bHasNew=true; + + if(Regex.IsMatch(strTime, @"\d+/\d+")==true) + { + //Console.WriteLine("어제 기사 : " + item.InnerHtml); + continue; + } + + + if(m_ItemList.Any(c => c.m_strURL==strURL)==false) + { + NEWS_ITEM news = new NEWS_ITEM(); + news.m_strTitle=strTitle; + news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture); + news.m_strURL=strURL; + news.m_strReference="아시아경제"; + + m_ItemList.Add(news); + + bHasNew=true; + } + } + + } + } } } - - reader.Close(); - dataStream.Close(); - response.Close(); + catch(Exception e) + { + Console.WriteLine(e.ToString()); + } return bHasNew; } @@ -105,51 +112,126 @@ namespace NewsCrawler { bool bHasNew = false; - //string strServerURL = "http://www.etoday.co.kr"; - WebRequest request = WebRequest.Create("http://www.etoday.co.kr/news/flash/flash_list.php?tab=2"); - request.Credentials=CredentialCache.DefaultCredentials; - - HttpWebResponse response = (HttpWebResponse)request.GetResponse(); - Console.WriteLine(response.StatusDescription); - - Stream dataStream = response.GetResponseStream(); - StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")); - string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd()); - - HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); - doc.LoadHtml(responseFromServer); - string strXPath = "//div[contains(@class, 'flash_tab_lst')]/ul/li"; - var lists = doc.DocumentNode.SelectNodes(strXPath); - foreach(var item in lists) + try { - string strTitle = item.SelectSingleNode(".//a").InnerText; - string strTime = item.SelectSingleNode(".//span[contains(@class, 'flash_press')]").InnerText; - string strURL = item.SelectSingleNode(".//a").GetAttributeValue("href", ""); - strURL = "http://www.etoday.co.kr/news/section/newsview.php?idxno="+Regex.Replace(strURL, @"\D", ""); + //string strServerURL = "http://www.etoday.co.kr"; + WebRequest request = WebRequest.Create("http://www.etoday.co.kr/news/flash/flash_list.php?tab=1"); + request.Credentials=CredentialCache.DefaultCredentials; + request.Timeout=2000; - if (Regex.IsMatch(strTime, @"\d+/\d+") == true) - { - //Console.WriteLine("어제 기사 : " + item.InnerHtml); - continue; - } - - if (m_ItemList.Any(c => c.m_strURL==strURL)==false) + using(HttpWebResponse response = (HttpWebResponse)request.GetResponse()) { - NEWS_ITEM news = new NEWS_ITEM(); - news.m_strTitle=strTitle; - news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture); - news.m_strURL=strURL; - news.m_strReference = "이투데이"; + using(Stream dataStream = response.GetResponseStream()) + { + using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8"))) + { + string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd()); - m_ItemList.Add(news); + HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); + doc.LoadHtml(responseFromServer); + string strXPath = "//div[contains(@class, 'flash_tab_lst')]/ul/li"; + var lists = doc.DocumentNode.SelectNodes(strXPath); + foreach(var item in lists) + { + string strTitle = item.SelectSingleNode(".//a").InnerText; + string strTime = item.SelectSingleNode(".//span[contains(@class, 'flash_press')]").InnerText; + string strURL = item.SelectSingleNode(".//a").GetAttributeValue("href", ""); + strURL="http://www.etoday.co.kr/news/section/newsview.php?idxno="+Regex.Replace(strURL, @"\D", ""); - bHasNew=true; + if(Regex.IsMatch(strTime, @"\d+/\d+")==true) + { + //Console.WriteLine("어제 기사 : " + item.InnerHtml); + continue; + } + + if(m_ItemList.Any(c => c.m_strURL==strURL)==false) + { + NEWS_ITEM news = new NEWS_ITEM(); + news.m_strTitle=strTitle; + news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture); + news.m_strURL=strURL; + news.m_strReference="이투데이"; + + m_ItemList.Add(news); + + bHasNew=true; + } + } + } + } } } + catch(Exception e) + { + Console.WriteLine(e.ToString()); + } - reader.Close(); - dataStream.Close(); - response.Close(); + return bHasNew; + } + + bool ReadDart() + { + bool bHasNew = false; + + try + { + string strServerURL = "https://dart.fss.or.kr"; + WebRequest request = WebRequest.Create("https://dart.fss.or.kr/dsac001/mainAll.do"); + request.Credentials=CredentialCache.DefaultCredentials; + request.Timeout=2000; + + using(HttpWebResponse response = (HttpWebResponse)request.GetResponse()) + { + using(Stream dataStream = response.GetResponseStream()) + { + using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8"))) + { + string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd()); + + HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); + doc.LoadHtml(responseFromServer); + + string strXPath = "//div[@id='listContents']/div[contains(@class, 'table_list')]/table/tr"; + var lists = doc.DocumentNode.SelectNodes(strXPath); + foreach(var item in lists) + { + var rows = item.SelectNodes(".//td"); + + string strTitle = rows[2].InnerText; + strTitle=strTitle.Trim(); + string strTime = item.SelectSingleNode(".//td[contains(@class, 'cen_txt')]").InnerText; + strTime=strTime.Trim(); + string strURL = rows[2].SelectSingleNode(".//a").GetAttributeValue("href", ""); + strURL=strURL.Trim(); + strURL =strServerURL+strURL; + + if(Regex.IsMatch(strTime, @"\d+/\d+")==true) + { + //Console.WriteLine("어제 기사 : " + item.InnerHtml); + continue; + } + + if(m_ItemList.Any(c => c.m_strURL==strURL)==false) + { + NEWS_ITEM news = new NEWS_ITEM(); + news.m_strTitle=strTitle; + news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture); + news.m_strURL=strURL; + news.m_strReference="전자공시"; + + m_ItemList.Add(news); + + bHasNew=true; + } + } + } + } + } + } + catch(Exception e) + { + Console.WriteLine(e.ToString()); + } return bHasNew; } @@ -160,7 +242,8 @@ namespace NewsCrawler if(chAutoReload.Checked == true) { - ReadAsiaE(); + ReadDart(); + ReadAsiaE(); ReadEtoday(); }