Files
NewsCrawler/Form1.cs
mjjo ebcdf9b7ea 리스트뷰 추가
뷰어 추가
쓰레드 처리
2016-07-15 00:10:00 +09:00

209 lines
6.0 KiB
C#

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace NewsCrawler
{
public partial class Form1 : Form
{
class NEWS_ITEM
{
public string m_strTitle;
public DateTime m_Time;
public string m_strURL;
public string m_strReference;
}
System.Timers.Timer m_CrawlTimer = new System.Timers.Timer();
int m_iCrawlInterval = 500;
Timer m_UITimer = new Timer();
List<NEWS_ITEM> m_ItemList = new List<NEWS_ITEM>();
int m_iLastItemCnt = 0;
public Form1()
{
InitializeComponent();
wbView.ScriptErrorsSuppressed = false;
m_CrawlTimer.Elapsed+=CrawlTimer_Tick;
m_CrawlTimer.Interval=1000;
m_CrawlTimer.Start();
m_UITimer.Tick += UITimer_Tick;
m_UITimer.Interval = m_iCrawlInterval;
m_UITimer.Start();
}
bool ReadAsiaE()
{
bool bHasNew = false;
string strServerURL = "http://www.asiae.co.kr";
WebRequest request = WebRequest.Create("http://www.asiae.co.kr/news/sokbo/sokbo_left.htm");
//request.Method = "POST";
//request.
request.Credentials=CredentialCache.DefaultCredentials;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Console.WriteLine(response.StatusDescription);
Stream dataStream = response.GetResponseStream();
StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("EUC-KR"));
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(responseFromServer);
string strXPath = "//div[contains(@class, 'txtform')]/ul/li";
var lists = doc.DocumentNode.SelectNodes(strXPath);
foreach(var item in lists)
{
string strTitle = item.SelectSingleNode(".//a").GetAttributeValue("title", "");
string strTime = item.SelectSingleNode(".//span").InnerText;
string strURL = strServerURL+item.SelectSingleNode(".//a").GetAttributeValue("href", "");
if(m_ItemList.Any(c => c.m_strURL==strURL)==false)
{
NEWS_ITEM news = new NEWS_ITEM();
news.m_strTitle=strTitle;
news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture);
news.m_strURL=strURL;
news.m_strReference = "아시아경제";
m_ItemList.Add(news);
bHasNew=true;
}
}
reader.Close();
dataStream.Close();
response.Close();
return bHasNew;
}
bool ReadEtoday()
{
bool bHasNew = false;
//string strServerURL = "http://www.etoday.co.kr";
WebRequest request = WebRequest.Create("http://www.etoday.co.kr/news/flash/flash_list.php?tab=2");
request.Credentials=CredentialCache.DefaultCredentials;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Console.WriteLine(response.StatusDescription);
Stream dataStream = response.GetResponseStream();
StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8"));
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(responseFromServer);
string strXPath = "//div[contains(@class, 'flash_tab_lst')]/ul/li";
var lists = doc.DocumentNode.SelectNodes(strXPath);
foreach(var item in lists)
{
string strTitle = item.SelectSingleNode(".//a").InnerText;
string strTime = item.SelectSingleNode(".//span[contains(@class, 'flash_press')]").InnerText;
string strURL = item.SelectSingleNode(".//a").GetAttributeValue("href", "");
strURL = "http://www.etoday.co.kr/news/section/newsview.php?idxno="+Regex.Replace(strURL, @"\D", "");
if(m_ItemList.Any(c => c.m_strURL==strURL)==false)
{
NEWS_ITEM news = new NEWS_ITEM();
news.m_strTitle=strTitle;
news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture);
news.m_strURL=strURL;
news.m_strReference = "이투데이";
m_ItemList.Add(news);
bHasNew=true;
}
}
reader.Close();
dataStream.Close();
response.Close();
return bHasNew;
}
private void CrawlTimer_Tick(object sender, EventArgs e)
{
m_CrawlTimer.Enabled = false;
if(chAutoReload.Checked == true)
{
ReadAsiaE();
ReadEtoday();
}
m_CrawlTimer.Interval = m_iCrawlInterval;
m_CrawlTimer.Enabled = true;
}
private void UITimer_Tick(object sender, EventArgs e)
{
if (m_iLastItemCnt == m_ItemList.Count)
return;
m_ItemList.Sort((NEWS_ITEM news1, NEWS_ITEM news2) => news1.m_Time.CompareTo(news2.m_Time));
foreach (var item in m_ItemList)
{
string strResult = string.Format("[{0}] {1} ({2})", item.m_Time, item.m_strTitle, item.m_strURL);
lvList.Items.Add(new ListViewItem(new string[] { item.m_Time.ToString("HH:mm:ss"), item.m_strTitle, "", "", item.m_strReference, item.m_strURL }));
//Console.WriteLine(strResult);
//System.IO.File.AppendAllText("output.txt", strResult+"\n", Encoding.GetEncoding("EUC-KR"));
}
if (chAutoSelect.Checked == true)
{
lvList.Items[lvList.Items.Count - 1].Selected = true;
lvList.Select();
lvList.SelectedItems[0].EnsureVisible();
}
foreach (ColumnHeader col in lvList.Columns)
col.Width = -2;
m_iLastItemCnt = m_ItemList.Count;
}
private void lvList_SelectedIndexChanged(object sender, EventArgs e)
{
if (lvList.SelectedItems.Count <= 0)
return;
string strURL = lvList.SelectedItems[0].SubItems[chLink.Index].Text;
wbView.ScriptErrorsSuppressed = true;
wbView.Navigate(strURL);
}
private void tbInterval_KeyPress(object sender, KeyPressEventArgs e)
{
if(e.KeyChar == Convert.ToChar(Keys.Enter))
{
m_iCrawlInterval = (int)(float.Parse(Regex.Replace(tbInterval.Text, @"\D\.", "")) * 1000);
if (m_iCrawlInterval < 1)
m_iCrawlInterval = 500;
tbInterval.Text = (m_iCrawlInterval / (float)1000).ToString("##0.0") + "초";
}
}
}
}