- 전자공시 추가

- 타임아웃 적용
This commit is contained in:
mjjo
2016-08-02 11:56:12 +09:00
parent b35ec4e09a
commit 08e6ff57cd

241
Form1.cs
View File

@@ -49,54 +49,61 @@ namespace NewsCrawler
{
bool bHasNew = false;
string strServerURL = "http://www.asiae.co.kr";
WebRequest request = WebRequest.Create("http://www.asiae.co.kr/news/sokbo/sokbo_left.htm");
//request.Method = "POST";
//request.
request.Credentials=CredentialCache.DefaultCredentials;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Console.WriteLine(response.StatusDescription);
Stream dataStream = response.GetResponseStream();
StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("EUC-KR"));
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(responseFromServer);
string strXPath = "//div[contains(@class, 'txtform')]/ul/li";
var lists = doc.DocumentNode.SelectNodes(strXPath);
foreach(var item in lists)
try
{
string strTitle = item.SelectSingleNode(".//a").GetAttributeValue("title", "");
string strTime = item.SelectSingleNode(".//span").InnerText;
string strURL = strServerURL+item.SelectSingleNode(".//a").GetAttributeValue("href", "");
string strServerURL = "http://www.asiae.co.kr";
WebRequest request = WebRequest.Create("http://www.asiae.co.kr/news/sokbo/sokbo_left.htm");
request.Credentials=CredentialCache.DefaultCredentials;
request.Timeout=2000;
if (Regex.IsMatch(strTime, @"\d+/\d+") == true)
{
//Console.WriteLine("어제 기사 : " + item.InnerHtml);
continue;
}
if (m_ItemList.Any(c => c.m_strURL==strURL)==false)
using(HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
NEWS_ITEM news = new NEWS_ITEM();
news.m_strTitle=strTitle;
news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture);
news.m_strURL=strURL;
news.m_strReference = "아시아경제";
using(Stream dataStream = response.GetResponseStream())
{
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("EUC-KR")))
{
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
m_ItemList.Add(news);
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(responseFromServer);
string strXPath = "//div[contains(@class, 'txtform')]/ul/li";
var lists = doc.DocumentNode.SelectNodes(strXPath);
foreach(var item in lists)
{
string strTitle = item.SelectSingleNode(".//a").GetAttributeValue("title", "");
string strTime = item.SelectSingleNode(".//span").InnerText;
string strURL = strServerURL+item.SelectSingleNode(".//a").GetAttributeValue("href", "");
bHasNew=true;
if(Regex.IsMatch(strTime, @"\d+/\d+")==true)
{
//Console.WriteLine("어제 기사 : " + item.InnerHtml);
continue;
}
if(m_ItemList.Any(c => c.m_strURL==strURL)==false)
{
NEWS_ITEM news = new NEWS_ITEM();
news.m_strTitle=strTitle;
news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture);
news.m_strURL=strURL;
news.m_strReference="아시아경제";
m_ItemList.Add(news);
bHasNew=true;
}
}
}
}
}
}
reader.Close();
dataStream.Close();
response.Close();
catch(Exception e)
{
Console.WriteLine(e.ToString());
}
return bHasNew;
}
@@ -105,51 +112,126 @@ namespace NewsCrawler
{
bool bHasNew = false;
//string strServerURL = "http://www.etoday.co.kr";
WebRequest request = WebRequest.Create("http://www.etoday.co.kr/news/flash/flash_list.php?tab=2");
request.Credentials=CredentialCache.DefaultCredentials;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Console.WriteLine(response.StatusDescription);
Stream dataStream = response.GetResponseStream();
StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8"));
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(responseFromServer);
string strXPath = "//div[contains(@class, 'flash_tab_lst')]/ul/li";
var lists = doc.DocumentNode.SelectNodes(strXPath);
foreach(var item in lists)
try
{
string strTitle = item.SelectSingleNode(".//a").InnerText;
string strTime = item.SelectSingleNode(".//span[contains(@class, 'flash_press')]").InnerText;
string strURL = item.SelectSingleNode(".//a").GetAttributeValue("href", "");
strURL = "http://www.etoday.co.kr/news/section/newsview.php?idxno="+Regex.Replace(strURL, @"\D", "");
//string strServerURL = "http://www.etoday.co.kr";
WebRequest request = WebRequest.Create("http://www.etoday.co.kr/news/flash/flash_list.php?tab=1");
request.Credentials=CredentialCache.DefaultCredentials;
request.Timeout=2000;
if (Regex.IsMatch(strTime, @"\d+/\d+") == true)
{
//Console.WriteLine("어제 기사 : " + item.InnerHtml);
continue;
}
if (m_ItemList.Any(c => c.m_strURL==strURL)==false)
using(HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
NEWS_ITEM news = new NEWS_ITEM();
news.m_strTitle=strTitle;
news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture);
news.m_strURL=strURL;
news.m_strReference = "이투데이";
using(Stream dataStream = response.GetResponseStream())
{
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")))
{
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
m_ItemList.Add(news);
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(responseFromServer);
string strXPath = "//div[contains(@class, 'flash_tab_lst')]/ul/li";
var lists = doc.DocumentNode.SelectNodes(strXPath);
foreach(var item in lists)
{
string strTitle = item.SelectSingleNode(".//a").InnerText;
string strTime = item.SelectSingleNode(".//span[contains(@class, 'flash_press')]").InnerText;
string strURL = item.SelectSingleNode(".//a").GetAttributeValue("href", "");
strURL="http://www.etoday.co.kr/news/section/newsview.php?idxno="+Regex.Replace(strURL, @"\D", "");
bHasNew=true;
if(Regex.IsMatch(strTime, @"\d+/\d+")==true)
{
//Console.WriteLine("어제 기사 : " + item.InnerHtml);
continue;
}
if(m_ItemList.Any(c => c.m_strURL==strURL)==false)
{
NEWS_ITEM news = new NEWS_ITEM();
news.m_strTitle=strTitle;
news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture);
news.m_strURL=strURL;
news.m_strReference="이투데이";
m_ItemList.Add(news);
bHasNew=true;
}
}
}
}
}
}
catch(Exception e)
{
Console.WriteLine(e.ToString());
}
reader.Close();
dataStream.Close();
response.Close();
return bHasNew;
}
bool ReadDart()
{
bool bHasNew = false;
try
{
string strServerURL = "https://dart.fss.or.kr";
WebRequest request = WebRequest.Create("https://dart.fss.or.kr/dsac001/mainAll.do");
request.Credentials=CredentialCache.DefaultCredentials;
request.Timeout=2000;
using(HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
using(Stream dataStream = response.GetResponseStream())
{
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")))
{
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(responseFromServer);
string strXPath = "//div[@id='listContents']/div[contains(@class, 'table_list')]/table/tr";
var lists = doc.DocumentNode.SelectNodes(strXPath);
foreach(var item in lists)
{
var rows = item.SelectNodes(".//td");
string strTitle = rows[2].InnerText;
strTitle=strTitle.Trim();
string strTime = item.SelectSingleNode(".//td[contains(@class, 'cen_txt')]").InnerText;
strTime=strTime.Trim();
string strURL = rows[2].SelectSingleNode(".//a").GetAttributeValue("href", "");
strURL=strURL.Trim();
strURL =strServerURL+strURL;
if(Regex.IsMatch(strTime, @"\d+/\d+")==true)
{
//Console.WriteLine("어제 기사 : " + item.InnerHtml);
continue;
}
if(m_ItemList.Any(c => c.m_strURL==strURL)==false)
{
NEWS_ITEM news = new NEWS_ITEM();
news.m_strTitle=strTitle;
news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture);
news.m_strURL=strURL;
news.m_strReference="전자공시";
m_ItemList.Add(news);
bHasNew=true;
}
}
}
}
}
}
catch(Exception e)
{
Console.WriteLine(e.ToString());
}
return bHasNew;
}
@@ -160,7 +242,8 @@ namespace NewsCrawler
if(chAutoReload.Checked == true)
{
ReadAsiaE();
ReadDart();
ReadAsiaE();
ReadEtoday();
}