fix DartAPI
This commit is contained in:
73
Crawler.cs
73
Crawler.cs
@@ -5,6 +5,8 @@ using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Net.Http;
|
||||
using System.Net.Http.Headers;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
@@ -15,7 +17,7 @@ namespace NewsCrawler
|
||||
{
|
||||
NewsForm m_Listener = null;
|
||||
DateTime m_Today = DateTime.Now;
|
||||
int m_iDartAPIRetry = 5;
|
||||
int m_iDartAPIRetry = 10;
|
||||
Regex m_ProfitChange = new Regex("30%.*이상.*(변경|변동)");
|
||||
|
||||
public Crawler(NewsForm Listener)
|
||||
@@ -164,7 +166,7 @@ namespace NewsCrawler
|
||||
string strTitle = item.SelectSingleNode(".//a").InnerText.Trim();
|
||||
string strTime = item.SelectSingleNode(".//span[contains(@class, 'flash_press')]").InnerText.Trim();
|
||||
string strURL = item.SelectSingleNode(".//a").GetAttributeValue("href", "");
|
||||
strURL="http://www.etoday.co.kr/news/section/newsview.php?idxno="+Regex.Replace(strURL, @"\D", "");
|
||||
strURL="https://www.etoday.co.kr/news/section/newsview.php?idxno="+Regex.Replace(strURL, @"\D", "");
|
||||
|
||||
if(Regex.IsMatch(strTime, @"\d+/\d+")==true)
|
||||
{
|
||||
@@ -193,7 +195,7 @@ namespace NewsCrawler
|
||||
{
|
||||
try
|
||||
{
|
||||
HttpWebRequest HttpReq = WebRequest.Create("http://www.etoday.co.kr/news/flash/flash_list.php?tab=1") as HttpWebRequest;
|
||||
HttpWebRequest HttpReq = WebRequest.Create("https://www.etoday.co.kr//news/flashnews/flash_list") as HttpWebRequest;
|
||||
HttpReq.Credentials = CredentialCache.DefaultCredentials;
|
||||
HttpReq.Timeout = 2000;
|
||||
|
||||
@@ -382,7 +384,7 @@ namespace NewsCrawler
|
||||
HttpWebRequest HttpReq = State.m_HTTPReq;
|
||||
bool bInitial = State.m_bInitial;
|
||||
State.m_Timer.Stop();
|
||||
|
||||
string responseFromServer = "";
|
||||
try
|
||||
{
|
||||
using(HttpWebResponse response = (HttpWebResponse)HttpReq.GetResponse())
|
||||
@@ -391,11 +393,11 @@ namespace NewsCrawler
|
||||
{
|
||||
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")))
|
||||
{
|
||||
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
|
||||
|
||||
dynamic jObj = Newtonsoft.Json.JsonConvert.DeserializeObject(responseFromServer);
|
||||
string strBody = jObj["reportBody"];
|
||||
strBody = strBody.Replace("\\\"", "\"");
|
||||
responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
|
||||
|
||||
dynamic jObj = Newtonsoft.Json.JsonConvert.DeserializeObject(responseFromServer);
|
||||
string strBody = jObj["reportBody"];
|
||||
strBody = strBody.Replace("\\\"", "\"");
|
||||
strBody = strBody.Replace("\r\n", "");
|
||||
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
|
||||
doc.LoadHtml(strBody);
|
||||
@@ -434,6 +436,7 @@ namespace NewsCrawler
|
||||
try
|
||||
{
|
||||
HttpWebRequest HttpReq = WebRequest.Create(strURL) as HttpWebRequest;
|
||||
HttpReq.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.3; WOW64; Trident/7.0)";
|
||||
HttpReq.Credentials = CredentialCache.DefaultCredentials;
|
||||
HttpReq.Timeout = 2000;
|
||||
|
||||
@@ -456,8 +459,8 @@ namespace NewsCrawler
|
||||
REQUEST_STATUS State = (REQUEST_STATUS)result.AsyncState;
|
||||
HttpWebRequest HttpReq = State.m_HTTPReq;
|
||||
bool bInitial = State.m_bInitial;
|
||||
State.m_Timer.Stop();
|
||||
|
||||
State.m_Timer.Stop();
|
||||
string responseFromServer = "";
|
||||
try
|
||||
{
|
||||
using (HttpWebResponse response = (HttpWebResponse)HttpReq.GetResponse())
|
||||
@@ -466,11 +469,10 @@ namespace NewsCrawler
|
||||
{
|
||||
using (StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")))
|
||||
{
|
||||
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
|
||||
|
||||
dynamic jObj = Newtonsoft.Json.JsonConvert.DeserializeObject(responseFromServer);
|
||||
string strBody = jObj["reportBody"];
|
||||
strBody = strBody.Replace("\\\"", "\"");
|
||||
responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
|
||||
dynamic jObj = Newtonsoft.Json.JsonConvert.DeserializeObject(responseFromServer);
|
||||
string strBody = jObj["reportBody"];
|
||||
strBody = strBody.Replace("\\\"", "\"");
|
||||
strBody = strBody.Replace("\r\n", "");
|
||||
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
|
||||
doc.LoadHtml(strBody);
|
||||
@@ -511,6 +513,7 @@ namespace NewsCrawler
|
||||
try
|
||||
{
|
||||
HttpWebRequest HttpReq = WebRequest.Create(strURL) as HttpWebRequest;
|
||||
HttpReq.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.3; WOW64; Trident/7.0)";
|
||||
HttpReq.Credentials = CredentialCache.DefaultCredentials;
|
||||
HttpReq.Timeout = 2000;
|
||||
|
||||
@@ -583,6 +586,7 @@ namespace NewsCrawler
|
||||
try
|
||||
{
|
||||
HttpWebRequest HttpReq = WebRequest.Create(strURL) as HttpWebRequest;
|
||||
HttpReq.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.3; WOW64; Trident/7.0)";
|
||||
HttpReq.Credentials = CredentialCache.DefaultCredentials;
|
||||
HttpReq.Timeout = 2000;
|
||||
|
||||
@@ -656,6 +660,7 @@ namespace NewsCrawler
|
||||
try
|
||||
{
|
||||
HttpWebRequest HttpReq = WebRequest.Create(strURL) as HttpWebRequest;
|
||||
HttpReq.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.3; WOW64; Trident/7.0)";
|
||||
HttpReq.Credentials = CredentialCache.DefaultCredentials;
|
||||
HttpReq.Timeout = 2000;
|
||||
|
||||
@@ -740,6 +745,7 @@ namespace NewsCrawler
|
||||
try
|
||||
{
|
||||
HttpWebRequest HttpReq = WebRequest.Create(strURL) as HttpWebRequest;
|
||||
HttpReq.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.3; WOW64; Trident/7.0)";
|
||||
HttpReq.Credentials = CredentialCache.DefaultCredentials;
|
||||
HttpReq.Timeout = 2000;
|
||||
|
||||
@@ -770,21 +776,21 @@ namespace NewsCrawler
|
||||
{
|
||||
using(Stream dataStream = response.GetResponseStream())
|
||||
{
|
||||
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")))
|
||||
using(StreamReader reader = new StreamReader(dataStream, true))
|
||||
{
|
||||
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
|
||||
string responseFromServer = reader.ReadToEnd();
|
||||
|
||||
dynamic jObj = Newtonsoft.Json.JsonConvert.DeserializeObject(responseFromServer);
|
||||
string strErrCode = jObj["err_code"];
|
||||
if(strErrCode == "020")
|
||||
string strStatus = jObj["status"];
|
||||
if(strStatus == "020")
|
||||
{
|
||||
Config.SetDartAPIKeyLimit(State.m_strAuthKey);
|
||||
Util.Log(Util.LOG_TYPE.ERROR, string.Format("[DartAPI] {0} (key:{1}, retry:{2})", jObj["err_msg"], State.m_strAuthKey, m_iDartAPIRetry));
|
||||
Util.Log(Util.LOG_TYPE.ERROR, string.Format("[DartAPI] {0} (key:{1}, retry:{2})", jObj["message"], State.m_strAuthKey, m_iDartAPIRetry));
|
||||
return;
|
||||
}
|
||||
else if(strErrCode != "000")
|
||||
else if(strStatus != "000")
|
||||
{
|
||||
Util.Log(Util.LOG_TYPE.ERROR, string.Format("[DartAPI] {0} (key:{1}, retry:{2})", jObj["err_msg"], State.m_strAuthKey, m_iDartAPIRetry));
|
||||
Util.Log(Util.LOG_TYPE.ERROR, string.Format("[DartAPI] {0} (key:{1}, retry:{2})", jObj["message"], State.m_strAuthKey, m_iDartAPIRetry));
|
||||
m_iDartAPIRetry--;
|
||||
return;
|
||||
}
|
||||
@@ -793,10 +799,11 @@ namespace NewsCrawler
|
||||
foreach(var data in List)
|
||||
{
|
||||
string strTime = "00:00";
|
||||
string strTitle = data["rpt_nm"];
|
||||
string strCodeName = data["crp_nm"];
|
||||
string strURL = "http://m.dart.fss.or.kr/html_mdart/MD1007.html?rcpNo=" + data["rcp_no"];
|
||||
string strViewURL = "http://m.dart.fss.or.kr/viewer/main.st?rcpNo=" + data["rcp_no"];
|
||||
string strTitle = data["report_nm"];
|
||||
string strCodeName = data["corp_name"];
|
||||
string strCode = data["stock_code"];
|
||||
string strURL = "http://dart.fss.or.kr/dsaf001/main.do?rcpNo=" + data["rcept_no"];
|
||||
string strViewURL = "http://m.dart.fss.or.kr/viewer/main.st?rcpNo=" + data["rcept_no"];
|
||||
|
||||
TextCondition.RESULT MatchResult = m_Listener.CheckMatch(strTitle);
|
||||
if(MatchResult.m_enType != TextCondition.TYPE.NEGATIVE)
|
||||
@@ -841,10 +848,13 @@ namespace NewsCrawler
|
||||
try
|
||||
{
|
||||
string strAuthKey = Config.GetDartAPIKey();
|
||||
string strURL = "http://dart.fss.or.kr/api/search.json?auth="+strAuthKey+"&end_dt="+m_Today.ToString("yyyyMMdd")+"&sort=date&series=desc&page_set=100";
|
||||
string date = m_Today.ToString("yyyyMMdd");
|
||||
string strURL = $"https://opendart.fss.or.kr/api/list.json?crtfc_key={strAuthKey}&bgn_de={date}&end_de={date}&corp_cls=Y&sort=date&page_no=1&page_count=100";
|
||||
HttpWebRequest HttpReq = WebRequest.Create(strURL) as HttpWebRequest;
|
||||
HttpReq.Credentials = CredentialCache.DefaultCredentials;
|
||||
HttpReq.Timeout = 2000;
|
||||
HttpReq.Method = "GET";
|
||||
HttpReq.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.3; WOW64; Trident/7.0)";
|
||||
//HttpReq.Credentials = CredentialCache.DefaultCredentials;
|
||||
HttpReq.Timeout = 10000;
|
||||
|
||||
REQUEST_STATUS State = new REQUEST_STATUS();
|
||||
State.m_HTTPReq = HttpReq;
|
||||
@@ -953,7 +963,7 @@ namespace NewsCrawler
|
||||
{
|
||||
try
|
||||
{
|
||||
HttpWebRequest HttpReq = WebRequest.Create("http://kind.krx.co.kr/disclosure/rsstodaydistribute.do?method=searchRssTodayDistribute&repIsuSrtCd=&mktTpCd=0&searchCorpName=¤tPageSize=1000") as HttpWebRequest;
|
||||
HttpWebRequest HttpReq = WebRequest.Create("https://kind.krx.co.kr/disclosure/rsstodaydistribute.do?method=searchRssTodayDistribute&repIsuSrtCd=&mktTpCd=0&searchCorpName=¤tPageSize=1000") as HttpWebRequest;
|
||||
HttpReq.Credentials = CredentialCache.DefaultCredentials;
|
||||
HttpReq.Timeout = 2000;
|
||||
|
||||
@@ -961,6 +971,7 @@ namespace NewsCrawler
|
||||
State.m_HTTPReq = HttpReq;
|
||||
State.m_bInitial = bInitial;
|
||||
State.m_Timer.Start();
|
||||
ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12;
|
||||
|
||||
HttpReq.BeginGetResponse(new AsyncCallback(ResponseKIND), State);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user