391 lines
12 KiB
C#
391 lines
12 KiB
C#
using System;
|
|
using System.Collections;
|
|
using System.Collections.Generic;
|
|
using System.ComponentModel;
|
|
using System.Data;
|
|
using System.Drawing;
|
|
using System.Globalization;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Net;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
using System.Threading.Tasks;
|
|
using System.Windows.Forms;
|
|
|
|
namespace NewsCrawler
|
|
{
|
|
public partial class Form1 : Form
|
|
{
|
|
delegate void InsertListView(bool bInitial, string strTitle, DateTime time, string strURL, string strRef);
|
|
|
|
System.Timers.Timer m_CrawlTimer = new System.Timers.Timer();
|
|
int m_iCrawlInterval = 500;
|
|
public Form1()
|
|
{
|
|
InitializeComponent();
|
|
|
|
wbView.ScriptErrorsSuppressed = false;
|
|
|
|
ReadKIND(true);
|
|
ReadDart(true);
|
|
ReadAsiaE(true);
|
|
ReadEtoday(true);
|
|
|
|
lvList.ListViewItemSorter = new ListViewItemComparer(0, SortOrder.Ascending);
|
|
lvList.Sorting = SortOrder.Ascending;
|
|
lvList.Sort();
|
|
|
|
if(lvList.Items.Count > 0)
|
|
{
|
|
lvList.Items[lvList.Items.Count - 1].Selected = true;
|
|
lvList.Select();
|
|
if(lvList.SelectedItems.Count > 0)
|
|
lvList.SelectedItems[0].EnsureVisible();
|
|
}
|
|
|
|
|
|
m_CrawlTimer.Elapsed+=CrawlTimer_Tick;
|
|
m_CrawlTimer.Interval=m_iCrawlInterval;
|
|
m_CrawlTimer.Start();
|
|
}
|
|
|
|
private void InsertItem(bool bInitial, string strTitle, DateTime time, string strURL, string strRef)
|
|
{
|
|
if(strTitle == "")
|
|
Console.WriteLine("break");
|
|
|
|
try
|
|
{
|
|
if(this.InvokeRequired)
|
|
{
|
|
this.Invoke(new InsertListView(InsertItem), bInitial, strTitle, time, strURL, strRef);
|
|
}
|
|
else
|
|
{
|
|
//lock(lvList)
|
|
{
|
|
foreach(ListViewItem item in lvList.Items)
|
|
{
|
|
if(item.SubItems[chLink.Index].Text == strURL)
|
|
return;
|
|
}
|
|
|
|
lvList.Items.Add(new ListViewItem(new string[] { time.ToString("HH:mm:ss"), strTitle, "", "", strRef, strURL }));
|
|
|
|
|
|
if(chAutoSelect.Checked == true)
|
|
{
|
|
lvList.Items[lvList.Items.Count - 1].Selected = true;
|
|
lvList.Select();
|
|
if(lvList.SelectedItems.Count > 0)
|
|
lvList.SelectedItems[0].EnsureVisible();
|
|
}
|
|
|
|
if(bInitial == false)
|
|
{
|
|
lvList.Sort();
|
|
}
|
|
|
|
foreach(ColumnHeader col in lvList.Columns)
|
|
col.Width = -2;
|
|
}
|
|
}
|
|
}
|
|
catch(Exception ex)
|
|
{
|
|
Console.WriteLine(ex.Message);
|
|
}
|
|
}
|
|
|
|
bool ReadAsiaE(bool bInitial = false)
|
|
{
|
|
bool bHasNew = false;
|
|
|
|
try
|
|
{
|
|
string strServerURL = "http://www.asiae.co.kr";
|
|
WebRequest request = WebRequest.Create("http://www.asiae.co.kr/news/sokbo/sokbo_left.htm");
|
|
request.Credentials=CredentialCache.DefaultCredentials;
|
|
request.Timeout=2000;
|
|
|
|
using(HttpWebResponse response = (HttpWebResponse)request.GetResponse())
|
|
{
|
|
using(Stream dataStream = response.GetResponseStream())
|
|
{
|
|
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("EUC-KR")))
|
|
{
|
|
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
|
|
responseFromServer = responseFromServer.Replace("\"\"", "\"");
|
|
|
|
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
|
|
doc.LoadHtml(responseFromServer);
|
|
string strXPath = "//div[contains(@class, 'txtform')]/ul/li";
|
|
var lists = doc.DocumentNode.SelectNodes(strXPath);
|
|
foreach(var item in lists)
|
|
{
|
|
string strTitle = item.SelectSingleNode(".//a").GetAttributeValue("title", "");
|
|
string strTime = item.SelectSingleNode(".//span").InnerText;
|
|
string strURL = strServerURL+item.SelectSingleNode(".//a").GetAttributeValue("href", "");
|
|
|
|
|
|
if(Regex.IsMatch(strTime, @"\d+/\d+")==true)
|
|
{
|
|
//Console.WriteLine("어제 기사 : " + item.InnerHtml);
|
|
continue;
|
|
}
|
|
|
|
InsertItem(bInitial, strTitle, DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture), strURL, "아시아경제");
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch(Exception e)
|
|
{
|
|
Console.WriteLine(e.ToString());
|
|
}
|
|
|
|
return bHasNew;
|
|
}
|
|
|
|
bool ReadEtoday(bool bInitial = false)
|
|
{
|
|
bool bHasNew = false;
|
|
|
|
try
|
|
{
|
|
//string strServerURL = "http://www.etoday.co.kr";
|
|
WebRequest request = WebRequest.Create("http://www.etoday.co.kr/news/flash/flash_list.php?tab=1");
|
|
request.Credentials=CredentialCache.DefaultCredentials;
|
|
request.Timeout=2000;
|
|
|
|
using(HttpWebResponse response = (HttpWebResponse)request.GetResponse())
|
|
{
|
|
using(Stream dataStream = response.GetResponseStream())
|
|
{
|
|
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")))
|
|
{
|
|
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
|
|
|
|
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
|
|
doc.LoadHtml(responseFromServer);
|
|
string strXPath = "//div[contains(@class, 'flash_tab_lst')]/ul/li";
|
|
var lists = doc.DocumentNode.SelectNodes(strXPath);
|
|
foreach(var item in lists)
|
|
{
|
|
string strTitle = item.SelectSingleNode(".//a").InnerText;
|
|
string strTime = item.SelectSingleNode(".//span[contains(@class, 'flash_press')]").InnerText;
|
|
string strURL = item.SelectSingleNode(".//a").GetAttributeValue("href", "");
|
|
strURL="http://www.etoday.co.kr/news/section/newsview.php?idxno="+Regex.Replace(strURL, @"\D", "");
|
|
|
|
if(Regex.IsMatch(strTime, @"\d+/\d+")==true)
|
|
{
|
|
//Console.WriteLine("어제 기사 : " + item.InnerHtml);
|
|
continue;
|
|
}
|
|
|
|
InsertItem(bInitial, strTitle, DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture), strURL, "이투데이");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch(Exception e)
|
|
{
|
|
Console.WriteLine(e.ToString());
|
|
}
|
|
|
|
return bHasNew;
|
|
}
|
|
|
|
bool ReadDart(bool bInitial=false)
|
|
{
|
|
bool bHasNew = false;
|
|
|
|
try
|
|
{
|
|
string strServerURL = "https://dart.fss.or.kr";
|
|
WebRequest request = WebRequest.Create("https://dart.fss.or.kr/dsac001/mainAll.do");
|
|
request.Credentials=CredentialCache.DefaultCredentials;
|
|
request.Timeout=2000;
|
|
|
|
using(HttpWebResponse response = (HttpWebResponse)request.GetResponse())
|
|
{
|
|
using(Stream dataStream = response.GetResponseStream())
|
|
{
|
|
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")))
|
|
{
|
|
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
|
|
|
|
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
|
|
doc.LoadHtml(responseFromServer);
|
|
|
|
string strXPath = "//div[@id='listContents']/div[contains(@class, 'table_list')]/table/tr";
|
|
var lists = doc.DocumentNode.SelectNodes(strXPath);
|
|
foreach(var item in lists)
|
|
{
|
|
var rows = item.SelectNodes(".//td");
|
|
if(rows.Count < 3)
|
|
continue;
|
|
|
|
string strTitle = rows[2].InnerText;
|
|
strTitle=strTitle.Trim();
|
|
string strTime = item.SelectSingleNode(".//td[contains(@class, 'cen_txt')]").InnerText;
|
|
strTime=strTime.Trim();
|
|
string strURL = rows[2].SelectSingleNode(".//a").GetAttributeValue("href", "");
|
|
strURL=strURL.Trim();
|
|
strURL =strServerURL+strURL;
|
|
|
|
if(Regex.IsMatch(strTime, @"\d+/\d+")==true)
|
|
{
|
|
//Console.WriteLine("어제 기사 : " + item.InnerHtml);
|
|
continue;
|
|
}
|
|
|
|
InsertItem(bInitial, strTitle, DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture), strURL, "DART");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch(Exception e)
|
|
{
|
|
Console.WriteLine(e.ToString());
|
|
}
|
|
|
|
return bHasNew;
|
|
}
|
|
|
|
bool ReadKIND(bool bInitial=false)
|
|
{
|
|
bool bHasNew = false;
|
|
|
|
try
|
|
{
|
|
//string strServerURL = "https://dart.fss.or.kr";
|
|
WebRequest request = WebRequest.Create("http://kind.krx.co.kr/disclosure/rsstodaydistribute.do?method=searchRssTodayDistribute&repIsuSrtCd=&mktTpCd=0&searchCorpName=¤tPageSize=1000");
|
|
request.Credentials=CredentialCache.DefaultCredentials;
|
|
request.Timeout=2000;
|
|
int iCDATALen = "<![CDATA[".Length;
|
|
|
|
using(HttpWebResponse response = (HttpWebResponse)request.GetResponse())
|
|
{
|
|
using(Stream dataStream = response.GetResponseStream())
|
|
{
|
|
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")))
|
|
{
|
|
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
|
|
|
|
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
|
|
doc.LoadHtml(responseFromServer);
|
|
|
|
string strXPath = "//item";
|
|
var lists = doc.DocumentNode.SelectNodes(strXPath);
|
|
if(lists == null)
|
|
return false;
|
|
|
|
foreach(var item in lists)
|
|
{
|
|
string strTitle = item.SelectSingleNode(".//title").InnerText;
|
|
strTitle = strTitle.Substring(iCDATALen, strTitle.Length-iCDATALen-3);
|
|
string strTime = item.ChildNodes["pubDate"].InnerText;
|
|
DateTime time = Convert.ToDateTime(strTime);
|
|
strTime = time.ToString("HH:mm:ss");
|
|
string strURL = item.ChildNodes["link"].NextSibling.InnerText;
|
|
strURL = strURL.Substring(iCDATALen, strURL.Length-iCDATALen-3);
|
|
|
|
if(Regex.IsMatch(strTime, @"\d+/\d+")==true)
|
|
{
|
|
//Console.WriteLine("어제 기사 : " + item.InnerHtml);
|
|
continue;
|
|
}
|
|
|
|
InsertItem(bInitial, strTitle, DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture), strURL, "KIND");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch(Exception e)
|
|
{
|
|
Console.WriteLine(e.ToString());
|
|
}
|
|
|
|
return bHasNew;
|
|
}
|
|
|
|
private void CrawlTimer_Tick(object sender, EventArgs e)
|
|
{
|
|
m_CrawlTimer.Enabled = false;
|
|
|
|
if(chAutoReload.Checked == true)
|
|
{
|
|
ReadKIND();
|
|
ReadDart();
|
|
ReadAsiaE();
|
|
ReadEtoday();
|
|
}
|
|
|
|
m_CrawlTimer.Interval = m_iCrawlInterval;
|
|
m_CrawlTimer.Enabled = true;
|
|
}
|
|
|
|
private void lvList_SelectedIndexChanged(object sender, EventArgs e)
|
|
{
|
|
if (lvList.SelectedItems.Count <= 0)
|
|
return;
|
|
|
|
string strURL = lvList.SelectedItems[0].SubItems[chLink.Index].Text;
|
|
wbView.ScriptErrorsSuppressed = true;
|
|
wbView.Navigate(strURL);
|
|
}
|
|
|
|
private void tbInterval_KeyPress(object sender, KeyPressEventArgs e)
|
|
{
|
|
if(e.KeyChar == Convert.ToChar(Keys.Enter))
|
|
{
|
|
m_iCrawlInterval = (int)(float.Parse(Regex.Replace(tbInterval.Text, @"\D\.", "")) * 1000);
|
|
if (m_iCrawlInterval < 1)
|
|
m_iCrawlInterval = 500;
|
|
|
|
tbInterval.Text = (m_iCrawlInterval / (float)1000).ToString("##0.0") + "초";
|
|
}
|
|
}
|
|
|
|
private void lvList_ColumnClick(object sender, ColumnClickEventArgs e)
|
|
{
|
|
SortOrder Order = (lvList.Sorting == SortOrder.Descending || lvList.Sorting == SortOrder.None) ? SortOrder.Ascending : SortOrder.Descending;
|
|
|
|
lvList.ListViewItemSorter = new ListViewItemComparer(e.Column, Order);
|
|
lvList.Sorting = Order;
|
|
lvList.Sort();
|
|
}
|
|
}
|
|
|
|
|
|
class ListViewItemComparer : IComparer
|
|
{
|
|
int m_iColumn = 0;
|
|
SortOrder m_Order = SortOrder.Ascending;
|
|
|
|
public ListViewItemComparer(int column, SortOrder Order)
|
|
{
|
|
m_iColumn = column;
|
|
m_Order = Order;
|
|
}
|
|
|
|
public int Compare(object x, object y)
|
|
{
|
|
ListViewItem item1 = (ListViewItem)x;
|
|
ListViewItem item2 = (ListViewItem)y;
|
|
if(m_Order == SortOrder.Ascending)
|
|
return string.Compare(item1.SubItems[m_iColumn].Text, item2.SubItems[m_iColumn].Text);
|
|
else
|
|
return string.Compare(item2.SubItems[m_iColumn].Text, item1.SubItems[m_iColumn].Text);
|
|
}
|
|
}
|
|
|
|
}
|