initial commit
This commit is contained in:
150
Form1.cs
Normal file
150
Form1.cs
Normal file
@@ -0,0 +1,150 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.ComponentModel;
|
||||
using System.Data;
|
||||
using System.Drawing;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using System.Windows.Forms;
|
||||
|
||||
namespace NewsCrawler
|
||||
{
|
||||
public partial class Form1 : Form
|
||||
{
|
||||
class NEWS_ITEM
|
||||
{
|
||||
public string m_strTitle;
|
||||
public DateTime m_Time;
|
||||
public string m_strURL;
|
||||
}
|
||||
|
||||
|
||||
Timer m_Timer = new Timer();
|
||||
List<NEWS_ITEM> m_ItemList = new List<NEWS_ITEM>();
|
||||
|
||||
public Form1()
|
||||
{
|
||||
InitializeComponent();
|
||||
|
||||
m_Timer.Tick+=Timer_Tick;
|
||||
m_Timer.Interval=500;
|
||||
m_Timer.Start();
|
||||
}
|
||||
|
||||
bool ReadAsiaE()
|
||||
{
|
||||
bool bHasNew = false;
|
||||
|
||||
string strServerURL = "http://www.asiae.co.kr";
|
||||
WebRequest request = WebRequest.Create("http://www.asiae.co.kr/news/sokbo/sokbo_left.htm");
|
||||
request.Credentials=CredentialCache.DefaultCredentials;
|
||||
|
||||
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
|
||||
Console.WriteLine(response.StatusDescription);
|
||||
|
||||
Stream dataStream = response.GetResponseStream();
|
||||
StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("EUC-KR"));
|
||||
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
|
||||
|
||||
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
|
||||
doc.LoadHtml(responseFromServer);
|
||||
string strXPath = "//div[contains(@class, 'txtform')]/ul/li";
|
||||
var lists = doc.DocumentNode.SelectNodes(strXPath);
|
||||
foreach(var item in lists)
|
||||
{
|
||||
string strTitle = item.SelectSingleNode(".//a").GetAttributeValue("title", "");
|
||||
string strTime = item.SelectSingleNode(".//span").InnerText;
|
||||
string strURL = strServerURL+item.SelectSingleNode(".//a").GetAttributeValue("href", "");
|
||||
|
||||
if(m_ItemList.Any(c => c.m_strURL==strURL)==false)
|
||||
{
|
||||
NEWS_ITEM news = new NEWS_ITEM();
|
||||
news.m_strTitle=strTitle;
|
||||
news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture);
|
||||
news.m_strURL=strURL;
|
||||
|
||||
m_ItemList.Add(news);
|
||||
|
||||
bHasNew=true;
|
||||
}
|
||||
}
|
||||
|
||||
reader.Close();
|
||||
dataStream.Close();
|
||||
response.Close();
|
||||
|
||||
return bHasNew;
|
||||
}
|
||||
|
||||
bool ReadEtoday()
|
||||
{
|
||||
bool bHasNew = false;
|
||||
|
||||
string strServerURL = "http://www.etoday.co.kr";
|
||||
WebRequest request = WebRequest.Create("http://www.etoday.co.kr/news/flash/flash_list.php?tab=2");
|
||||
request.Credentials=CredentialCache.DefaultCredentials;
|
||||
|
||||
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
|
||||
Console.WriteLine(response.StatusDescription);
|
||||
|
||||
Stream dataStream = response.GetResponseStream();
|
||||
StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8"));
|
||||
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
|
||||
|
||||
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
|
||||
doc.LoadHtml(responseFromServer);
|
||||
string strXPath = "//div[contains(@class, 'flash_tab_lst')]/ul/li";
|
||||
var lists = doc.DocumentNode.SelectNodes(strXPath);
|
||||
foreach(var item in lists)
|
||||
{
|
||||
string strTitle = item.SelectSingleNode(".//a").InnerText;
|
||||
string strTime = item.SelectSingleNode(".//span[contains(@class, 'flash_press')]").InnerText;
|
||||
string strURL = item.SelectSingleNode(".//a").GetAttributeValue("href", "");
|
||||
strURL = "http://www.etoday.co.kr/news/section/newsview.php?idxno="+Regex.Replace(strURL, @"\D", "");
|
||||
|
||||
if(m_ItemList.Any(c => c.m_strURL==strURL)==false)
|
||||
{
|
||||
NEWS_ITEM news = new NEWS_ITEM();
|
||||
news.m_strTitle=strTitle;
|
||||
news.m_Time=DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture);
|
||||
news.m_strURL=strURL;
|
||||
|
||||
m_ItemList.Add(news);
|
||||
|
||||
bHasNew=true;
|
||||
}
|
||||
}
|
||||
|
||||
reader.Close();
|
||||
dataStream.Close();
|
||||
response.Close();
|
||||
|
||||
return bHasNew;
|
||||
}
|
||||
|
||||
private void Timer_Tick(object sender, EventArgs e)
|
||||
{
|
||||
bool bHasNew1 = ReadAsiaE();
|
||||
bool bHasNew2 = ReadEtoday();
|
||||
|
||||
if(bHasNew1 ||bHasNew2)
|
||||
{
|
||||
m_ItemList.Sort((NEWS_ITEM news1, NEWS_ITEM news2) => news1.m_Time.CompareTo(news2.m_Time));
|
||||
|
||||
foreach(var item in m_ItemList)
|
||||
{
|
||||
string strResult = string.Format("[{0}] {1} ({2})", item.m_Time, item.m_strTitle, item.m_strURL);
|
||||
Console.WriteLine(strResult);
|
||||
System.IO.File.AppendAllText("output.txt", strResult+"\n", Encoding.GetEncoding("EUC-KR"));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user