Files
NewsCrawler/NewsForm.cs
mjjo 9ceb78f872 - 이데일리, 머니투데이, 파이낸셜뉴스 추가
- 리스트창에 종목명, 종목코드 삭제
- NewtonSoft.Json 추가
2016-12-06 06:05:36 +09:00

679 lines
20 KiB
C#

using System;
using System.Collections;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace NewsCrawler
{
public partial class NewsForm : Form
{
CybosHelper m_CybosHelper = new CybosHelper();
CodeList m_CodeList = null;
TextCondition m_Condition = null;
ConfigForm m_ConfigForm = null;
delegate void InsertListView(string strTitle, DateTime time, string strURL, string strRef, bool bInitial);
System.Timers.Timer m_CrawlTimer = new System.Timers.Timer();
int m_iCrawlInterval = 500;
public NewsForm()
{
InitializeComponent();
Config.Init();
Util.SetLogView(tbLog);
m_ConfigForm = new ConfigForm(this);
m_CodeList = new CodeList();
m_Condition = new TextCondition();
wbView.ScriptErrorsSuppressed = false;
ReadKIND(true);
ReadDart(true);
ReadEtoday(true);
ReadAsiaE(true);
ReadEdaily(true);
ReadMoneyToday(true);
ReadFinacialNews(true);
lvList.ListViewItemSorter = new ListViewItemComparer(0, SortOrder.Ascending);
lvList.Sorting = SortOrder.Ascending;
lvList.Sort();
if(lvList.Items.Count > 0)
{
lvList.Items[lvList.Items.Count - 1].Selected = true;
lvList.Select();
if(lvList.SelectedItems.Count > 0)
lvList.SelectedItems[0].EnsureVisible();
}
m_CrawlTimer.Elapsed+=CrawlTimer_Tick;
m_CrawlTimer.Interval=m_iCrawlInterval;
m_CrawlTimer.Start();
Test();
}
void Test()
{
if(Util.IsDebugging() == false)
return;
InsertItem("[test] 덕산하이메탈, 덕산네오룩스 66만여주 취득14:38", new DateTime(2016, 11, 30, 14, 38, 00), "http://www.asiae.co.kr/news/sokbo/sokbo_view.htm?idxno=2016112914371817318", "asiae", false);
InsertItem("[test] 자연과환경, 12월15일~22일 주주명부폐쇄14:19", new DateTime(2016, 11, 30, 14, 19, 00), "http://www.asiae.co.kr/news/sokbo/sokbo_view.htm?idxno=2016112914193170301", "asiae", false);
InsertItem("[test] 이엠코리아, 한국항공우주산업과 3억원 규모 공급계약14:06", new DateTime(2016, 11, 30, 14, 06, 00), "http://www.asiae.co.kr/news/sokbo/sokbo_view.htm?idxno=2016112914055964082", "asiae", false);
}
void ProcessSearchAndBuy(string strTitle, string strRef)
{
CodeList.CODE_VALUE Code = m_CodeList.SearchCode(strTitle);
if(Code != null)
{
TextCondition.RESULT MatchResult = m_Condition.Match(strTitle);
switch(MatchResult.m_enType)
{
case TextCondition.TYPE.NEGATIVE:
Util.Log(Util.LOG_TYPE.NEGATIVE, string.Format("[{0}] {1} (keyword:{2}, code:{3})", strRef, strTitle, MatchResult.m_strKeyword, Code.ToString()));
break;
case TextCondition.TYPE.POSITIVE:
if((Code.m_enType&CodeList.CODE_TYPE.DENIAL) == CodeList.CODE_TYPE.DENIAL)
Util.Log(Util.LOG_TYPE.DENIAL, string.Format("[{0}] {1} (keyword:{2}, code:{3})", strRef, strTitle, MatchResult.m_strKeyword, Code.ToString()));
else if((Code.m_enType&CodeList.CODE_TYPE.DENIAL) == CodeList.CODE_TYPE.DUPLICATED)
Util.Log(Util.LOG_TYPE.DUPLICATED, string.Format("[{0}] {1} (keyword:{2}, code:{3})", strRef, strTitle, MatchResult.m_strKeyword, Code.ToString()));
else if((Code.m_enType&CodeList.CODE_TYPE.DENIAL) == CodeList.CODE_TYPE.MANUAL)
Util.Log(Util.LOG_TYPE.MANUAL_CODE, string.Format("[{0}] {1} (keyword:{2}, code:{3})", strRef, strTitle, MatchResult.m_strKeyword, Code.ToString()));
else
Util.Log(Util.LOG_TYPE.POSITIVE, string.Format("[{0}] {1} (keyword:{2}, code:{3})", strRef, strTitle, MatchResult.m_strKeyword, Code.ToString()));
break;
case TextCondition.TYPE.MANUAL:
Util.Log(Util.LOG_TYPE.MANUAL_KEYWORD, string.Format("[{0}] {1} (keyword:{2}, code:{3})", strRef, strTitle, MatchResult.m_strKeyword, Code.ToString()));
break;
case TextCondition.TYPE.NOT_MATCHED:
Util.Log(Util.LOG_TYPE.DEBUG, string.Format("[NOT_MATCHED] [{0}] {1}({2})", strRef, strTitle, Code.ToString()));
break;
}
}
}
private void InsertItem(string strTitle, DateTime time, string strURL, string strRef, bool bInitial)
{
try
{
if(this.InvokeRequired)
{
this.Invoke(new InsertListView(InsertItem), strTitle, time, strURL, strRef, bInitial);
}
else
{
foreach(ListViewItem item in lvList.Items)
{
if(item.SubItems[chLink.Index].Text == strURL)
return;
}
lvList.Items.Add(new ListViewItem(new string[] { time.ToString("HH:mm:ss"), strTitle, strRef, strURL }));
if(chAutoSelect.Checked == true)
{
lvList.Items[lvList.Items.Count - 1].Selected = true;
lvList.Select();
if(lvList.SelectedItems.Count > 0)
lvList.SelectedItems[0].EnsureVisible();
}
foreach(ColumnHeader col in lvList.Columns)
col.Width = -2;
if(bInitial == false)
{
lvList.Sort();
ProcessSearchAndBuy(strTitle, strRef);
}
}
}
catch(Exception ex)
{
Util.Log(Util.LOG_TYPE.ERROR, ex.Message);
}
}
bool ReadAsiaE(bool bInitial = false)
{
bool bHasNew = false;
try
{
string strServerURL = "http://www.asiae.co.kr";
WebRequest request = WebRequest.Create("http://www.asiae.co.kr/news/sokbo/sokbo_left.htm");
request.Credentials=CredentialCache.DefaultCredentials;
request.Timeout=2000;
using(HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
using(Stream dataStream = response.GetResponseStream())
{
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("EUC-KR")))
{
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
responseFromServer = responseFromServer.Replace("\"\"", "\"");
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(responseFromServer);
string strXPath = "//div[contains(@class, 'txtform')]/ul/li";
var lists = doc.DocumentNode.SelectNodes(strXPath);
foreach(var item in lists)
{
string strTitle1 = item.SelectSingleNode(".//a").GetAttributeValue("title", "");
string strTitle2 = item.SelectSingleNode(".//a").FirstChild.InnerText;
string strTitle = (strTitle1.Length > strTitle2.Length ? strTitle1 : strTitle2);
string strTime = item.SelectSingleNode(".//span").InnerText;
string strURL = strServerURL+item.SelectSingleNode(".//a").GetAttributeValue("href", "");
if(Regex.IsMatch(strTime, @"\d+/\d+")==true)
{
//Console.WriteLine("어제 기사 : " + item.InnerHtml);
continue;
}
InsertItem(strTitle, DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture), strURL, "아시아경제", bInitial);
}
}
}
}
}
catch(Exception e)
{
Util.Log(Util.LOG_TYPE.ERROR, e.ToString());
}
return bHasNew;
}
bool ReadEtoday(bool bInitial = false)
{
bool bHasNew = false;
try
{
//string strServerURL = "http://www.etoday.co.kr";
WebRequest request = WebRequest.Create("http://www.etoday.co.kr/news/flash/flash_list.php?tab=1");
request.Credentials=CredentialCache.DefaultCredentials;
request.Timeout=2000;
using(HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
using(Stream dataStream = response.GetResponseStream())
{
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")))
{
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(responseFromServer);
string strXPath = "//div[contains(@class, 'flash_tab_lst')]/ul/li";
var lists = doc.DocumentNode.SelectNodes(strXPath);
foreach(var item in lists)
{
string strTitle = item.SelectSingleNode(".//a").InnerText;
string strTime = item.SelectSingleNode(".//span[contains(@class, 'flash_press')]").InnerText;
string strURL = item.SelectSingleNode(".//a").GetAttributeValue("href", "");
strURL="http://www.etoday.co.kr/news/section/newsview.php?idxno="+Regex.Replace(strURL, @"\D", "");
if(Regex.IsMatch(strTime, @"\d+/\d+")==true)
{
//Console.WriteLine("어제 기사 : " + item.InnerHtml);
continue;
}
InsertItem(strTitle, DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture), strURL, "이투데이", bInitial);
}
}
}
}
}
catch(Exception e)
{
Util.Log(Util.LOG_TYPE.ERROR, e.ToString());
}
return bHasNew;
}
bool ReadDart(bool bInitial = false)
{
bool bHasNew = false;
try
{
string strServerURL = "https://dart.fss.or.kr";
WebRequest request = WebRequest.Create("https://dart.fss.or.kr/dsac001/mainAll.do");
request.Credentials=CredentialCache.DefaultCredentials;
request.Timeout=2000;
using(HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
using(Stream dataStream = response.GetResponseStream())
{
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")))
{
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(responseFromServer);
string strXPath = "//div[@id='listContents']/div[contains(@class, 'table_list')]/table/tr";
var lists = doc.DocumentNode.SelectNodes(strXPath);
foreach(var item in lists)
{
var rows = item.SelectNodes(".//td");
if(rows.Count < 3)
continue;
string strTitle = rows[2].InnerText;
strTitle=strTitle.Trim();
string strTime = item.SelectSingleNode(".//td[contains(@class, 'cen_txt')]").InnerText;
strTime=strTime.Trim();
string strURL = rows[2].SelectSingleNode(".//a").GetAttributeValue("href", "");
strURL=strURL.Trim();
strURL =strServerURL+strURL;
if(Regex.IsMatch(strTime, @"\d+/\d+")==true)
{
//Console.WriteLine("어제 기사 : " + item.InnerHtml);
continue;
}
InsertItem(strTitle, DateTime.ParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture), strURL, "DART", bInitial);
}
}
}
}
}
catch(Exception e)
{
Util.Log(Util.LOG_TYPE.ERROR, e.ToString());
}
return bHasNew;
}
bool ReadKIND(bool bInitial = false)
{
bool bHasNew = false;
try
{
//string strServerURL = "https://dart.fss.or.kr";
WebRequest request = WebRequest.Create("http://kind.krx.co.kr/disclosure/rsstodaydistribute.do?method=searchRssTodayDistribute&repIsuSrtCd=&mktTpCd=0&searchCorpName=&currentPageSize=1000");
request.Credentials=CredentialCache.DefaultCredentials;
request.Timeout=2000;
int iCDATALen = "<![CDATA[".Length;
using(HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
using(Stream dataStream = response.GetResponseStream())
{
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")))
{
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(responseFromServer);
string strXPath = "//item";
var lists = doc.DocumentNode.SelectNodes(strXPath);
if(lists == null)
return false;
foreach(var item in lists)
{
string strTitle = item.SelectSingleNode(".//title").InnerText;
strTitle = strTitle.Substring(iCDATALen, strTitle.Length-iCDATALen-3);
string strTime = item.ChildNodes["pubDate"].InnerText;
DateTime time = Convert.ToDateTime(strTime);
strTime = time.ToString("HH:mm:ss");
string strURL = item.ChildNodes["link"].NextSibling.InnerText;
strURL = strURL.Substring(iCDATALen, strURL.Length-iCDATALen-3);
if(Regex.IsMatch(strTime, @"\d+/\d+")==true)
{
//Console.WriteLine("어제 기사 : " + item.InnerHtml);
continue;
}
InsertItem(strTitle, DateTime.ParseExact(strTime, "HH:mm:ss", CultureInfo.CurrentCulture), strURL, "KIND", bInitial);
}
}
}
}
}
catch(Exception e)
{
Util.Log(Util.LOG_TYPE.ERROR, e.ToString());
}
return bHasNew;
}
bool ReadEdaily(bool bInitial = false)
{
bool bHasNew = false;
try
{
string strServerURL = "http://www.edaily.co.kr/news/realtime/realtime_NewsRead.asp";
WebRequest request = WebRequest.Create("http://www.edaily.co.kr/news/realtime/realtime_NewsList_1.asp");
request.Credentials=CredentialCache.DefaultCredentials;
request.Timeout=2000;
DateTime PrevTime = DateTime.Now;
using(HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
using(Stream dataStream = response.GetResponseStream())
{
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("EUC-KR")))
{
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(responseFromServer);
string strXPath = "//li[contains(@id, 'NewsIcon')]";
var lists = doc.DocumentNode.SelectNodes(strXPath);
foreach(var item in lists)
{
string strTitle = item.SelectSingleNode(".//a").GetAttributeValue("title", "");
string strTime = item.SelectSingleNode(".//span").InnerText;
string strID = item.GetAttributeValue("id", "");
strID = strID.Substring("NewsIcon_".Length);
string strURL = strServerURL+"?newsid="+strID;
DateTime Time;
DateTime.TryParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture, DateTimeStyles.None, out Time);
if(Time > PrevTime)
break;
PrevTime = Time;
InsertItem(strTitle, Time, strURL, "이데일리", bInitial);
}
}
}
}
}
catch(Exception e)
{
Util.Log(Util.LOG_TYPE.ERROR, e.ToString());
}
return bHasNew;
}
bool ReadMoneyToday(bool bInitial = false)
{
bool bHasNew = false;
try
{
string strServerURL = "http://news.mt.co.kr/mtview.php?no=";
WebRequest request = WebRequest.Create("http://news.mt.co.kr/newsflash/newsflash.html");
request.Credentials=CredentialCache.DefaultCredentials;
request.Timeout=2000;
DateTime PrevTime = DateTime.Now;
using(HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
using(Stream dataStream = response.GetResponseStream())
{
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("EUC-KR")))
{
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(responseFromServer);
string strXPath = "//div[@id='articleList']//li[@class='bundle']";
var lists = doc.DocumentNode.SelectNodes(strXPath);
foreach(var item in lists)
{
string strTitle = item.SelectSingleNode(".//a").InnerText;
string strTime = item.SelectSingleNode(".//span").InnerText;
string strID = item.SelectSingleNode(".//a").GetAttributeValue("href", "");
int iStart = strID.IndexOf('\'', 0);
iStart = strID.IndexOf('\'', iStart+1);
iStart = strID.IndexOf('\'', iStart+1);
int iEnd = strID.IndexOf('\'', iStart+1);
strID = strID.Substring(iStart+1, iEnd-iStart+1);
string strURL = strServerURL+strID;
DateTime Time;
DateTime.TryParseExact(strTime, "HH:mm", CultureInfo.CurrentCulture, DateTimeStyles.None, out Time);
if(Time > PrevTime)
break;
PrevTime = Time;
InsertItem(strTitle, Time, strURL, "머니투데이", bInitial);
}
}
}
}
}
catch(Exception e)
{
Util.Log(Util.LOG_TYPE.ERROR, e.ToString());
}
return bHasNew;
}
bool ReadFinacialNews(bool bInitial = false)
{
bool bHasNew = false;
try
{
string strServerURL = "http://www.fnnews.com/news/";
WebRequest request = WebRequest.Create("http://www.fnnews.com/newsflash/today/data?type=json");
request.Credentials=CredentialCache.DefaultCredentials;
request.Timeout=2000;
DateTime Today = DateTime.Now;
using(HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
using(Stream dataStream = response.GetResponseStream())
{
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("UTF-8")))
{
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
dynamic jObj = Newtonsoft.Json.JsonConvert.DeserializeObject(responseFromServer);
foreach(var data in jObj)
{
string strTitle = data["title"];
string strTime = data["date"];
DateTime Time;
DateTime.TryParseExact(strTime, "yyyy.MM.dd HH:mm", CultureInfo.CurrentCulture, DateTimeStyles.None, out Time);
string strCode = data["code"];
string strURL = strServerURL+strCode;
if(Time.DayOfYear == Today.DayOfYear)
InsertItem(strTitle, Time, strURL, "파이낸셜뉴스", bInitial);
}
}
}
}
}
catch(Exception e)
{
Util.Log(Util.LOG_TYPE.ERROR, e.ToString());
}
return bHasNew;
}
private void CrawlTimer_Tick(object sender, EventArgs e)
{
m_CrawlTimer.Enabled = false;
if(chAutoReload.Checked == true)
{
ReadKIND();
ReadDart();
ReadAsiaE();
ReadEtoday();
ReadEdaily();
ReadMoneyToday();
ReadFinacialNews();
}
m_CrawlTimer.Interval = m_iCrawlInterval;
m_CrawlTimer.Enabled = true;
}
private void lvList_SelectedIndexChanged(object sender, EventArgs e)
{
if(lvList.SelectedItems.Count <= 0)
return;
string strURL = lvList.SelectedItems[0].SubItems[chLink.Index].Text;
wbView.ScriptErrorsSuppressed = true;
wbView.Navigate(strURL);
}
private void tbInterval_KeyPress(object sender, KeyPressEventArgs e)
{
if(e.KeyChar == Convert.ToChar(Keys.Enter))
{
m_iCrawlInterval = (int)(float.Parse(Regex.Replace(tbInterval.Text, @"\D\.", "")) * 1000);
if(m_iCrawlInterval < 1)
m_iCrawlInterval = 500;
tbInterval.Text = (m_iCrawlInterval / (float)1000).ToString("##0.0") + "초";
}
}
private void lvList_ColumnClick(object sender, ColumnClickEventArgs e)
{
SortOrder Order = (lvList.Sorting == SortOrder.Descending || lvList.Sorting == SortOrder.None) ? SortOrder.Ascending : SortOrder.Descending;
lvList.ListViewItemSorter = new ListViewItemComparer(e.Column, Order);
lvList.Sorting = Order;
lvList.Sort();
}
private void btnConfig_Click(object sender, EventArgs e)
{
FormCollection OpenForms = Application.OpenForms;
bool bOpen = false;
foreach(Form form in OpenForms)
{
if(form == m_ConfigForm)
{
bOpen = true;
break;
}
}
if(bOpen == false)
{
m_ConfigForm.Show();
}
else
{
m_ConfigForm.BringToFront();
}
}
public void OnManualCodeClick(int iPrice)
{
m_CodeList.MakeManualList(iPrice);
}
public void ApplyDenialCode()
{
m_CodeList.LoadDenialList();
}
public void ApplyDuplicatedCode()
{
m_CodeList.LoadDuplicatedList();
}
public void ApplyPositive()
{
m_Condition.LoadPositive();
}
public void ApplyManual()
{
m_Condition.LoadManual();
}
public void ApplyNegative()
{
m_Condition.LoadNegative();
}
public void ApplySynonym()
{
m_CodeList.LoadSynonym();
}
private void NewsForm_FormClosing(object sender, FormClosingEventArgs e)
{
Util.Clear();
}
}
class ListViewItemComparer : IComparer
{
int m_iColumn = 0;
SortOrder m_Order = SortOrder.Ascending;
public ListViewItemComparer(int column, SortOrder Order)
{
m_iColumn = column;
m_Order = Order;
}
public int Compare(object x, object y)
{
ListViewItem item1 = (ListViewItem)x;
ListViewItem item2 = (ListViewItem)y;
if(m_Order == SortOrder.Ascending)
return string.Compare(item1.SubItems[m_iColumn].Text, item2.SubItems[m_iColumn].Text);
else
return string.Compare(item2.SubItems[m_iColumn].Text, item1.SubItems[m_iColumn].Text);
}
}
}