fix DartAPI

This commit is contained in:
mjjo
2021-07-15 14:03:28 +09:00
parent cb4d22014c
commit f27351c335
44 changed files with 9536 additions and 27929 deletions

View File

@@ -5,6 +5,8 @@ using System.Globalization;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
@@ -15,7 +17,7 @@ namespace NewsCrawler
{
NewsForm m_Listener = null;
DateTime m_Today = DateTime.Now;
int m_iDartAPIRetry = 5;
int m_iDartAPIRetry = 10;
Regex m_ProfitChange = new Regex("30%.*이상.*(변경|변동)");
public Crawler(NewsForm Listener)
@@ -164,7 +166,7 @@ namespace NewsCrawler
string strTitle = item.SelectSingleNode(".//a").InnerText.Trim();
string strTime = item.SelectSingleNode(".//span[contains(@class, 'flash_press')]").InnerText.Trim();
string strURL = item.SelectSingleNode(".//a").GetAttributeValue("href", "");
strURL="http://www.etoday.co.kr/news/section/newsview.php?idxno="+Regex.Replace(strURL, @"\D", "");
strURL="https://www.etoday.co.kr/news/section/newsview.php?idxno="+Regex.Replace(strURL, @"\D", "");
if(Regex.IsMatch(strTime, @"\d+/\d+")==true)
{
@@ -193,7 +195,7 @@ namespace NewsCrawler
{
try
{
HttpWebRequest HttpReq = WebRequest.Create("http://www.etoday.co.kr/news/flash/flash_list.php?tab=1") as HttpWebRequest;
HttpWebRequest HttpReq = WebRequest.Create("https://www.etoday.co.kr//news/flashnews/flash_list") as HttpWebRequest;
HttpReq.Credentials = CredentialCache.DefaultCredentials;
HttpReq.Timeout = 2000;
@@ -382,7 +384,7 @@ namespace NewsCrawler
HttpWebRequest HttpReq = State.m_HTTPReq;
bool bInitial = State.m_bInitial;
State.m_Timer.Stop();
string responseFromServer = "";
try
{
using(HttpWebResponse response = (HttpWebResponse)HttpReq.GetResponse())
@@ -391,11 +393,11 @@ namespace NewsCrawler
{
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")))
{
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
dynamic jObj = Newtonsoft.Json.JsonConvert.DeserializeObject(responseFromServer);
string strBody = jObj["reportBody"];
strBody = strBody.Replace("\\\"", "\"");
responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
dynamic jObj = Newtonsoft.Json.JsonConvert.DeserializeObject(responseFromServer);
string strBody = jObj["reportBody"];
strBody = strBody.Replace("\\\"", "\"");
strBody = strBody.Replace("\r\n", "");
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(strBody);
@@ -434,6 +436,7 @@ namespace NewsCrawler
try
{
HttpWebRequest HttpReq = WebRequest.Create(strURL) as HttpWebRequest;
HttpReq.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.3; WOW64; Trident/7.0)";
HttpReq.Credentials = CredentialCache.DefaultCredentials;
HttpReq.Timeout = 2000;
@@ -456,8 +459,8 @@ namespace NewsCrawler
REQUEST_STATUS State = (REQUEST_STATUS)result.AsyncState;
HttpWebRequest HttpReq = State.m_HTTPReq;
bool bInitial = State.m_bInitial;
State.m_Timer.Stop();
State.m_Timer.Stop();
string responseFromServer = "";
try
{
using (HttpWebResponse response = (HttpWebResponse)HttpReq.GetResponse())
@@ -466,11 +469,10 @@ namespace NewsCrawler
{
using (StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")))
{
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
dynamic jObj = Newtonsoft.Json.JsonConvert.DeserializeObject(responseFromServer);
string strBody = jObj["reportBody"];
strBody = strBody.Replace("\\\"", "\"");
responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
dynamic jObj = Newtonsoft.Json.JsonConvert.DeserializeObject(responseFromServer);
string strBody = jObj["reportBody"];
strBody = strBody.Replace("\\\"", "\"");
strBody = strBody.Replace("\r\n", "");
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(strBody);
@@ -511,6 +513,7 @@ namespace NewsCrawler
try
{
HttpWebRequest HttpReq = WebRequest.Create(strURL) as HttpWebRequest;
HttpReq.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.3; WOW64; Trident/7.0)";
HttpReq.Credentials = CredentialCache.DefaultCredentials;
HttpReq.Timeout = 2000;
@@ -583,6 +586,7 @@ namespace NewsCrawler
try
{
HttpWebRequest HttpReq = WebRequest.Create(strURL) as HttpWebRequest;
HttpReq.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.3; WOW64; Trident/7.0)";
HttpReq.Credentials = CredentialCache.DefaultCredentials;
HttpReq.Timeout = 2000;
@@ -656,6 +660,7 @@ namespace NewsCrawler
try
{
HttpWebRequest HttpReq = WebRequest.Create(strURL) as HttpWebRequest;
HttpReq.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.3; WOW64; Trident/7.0)";
HttpReq.Credentials = CredentialCache.DefaultCredentials;
HttpReq.Timeout = 2000;
@@ -740,6 +745,7 @@ namespace NewsCrawler
try
{
HttpWebRequest HttpReq = WebRequest.Create(strURL) as HttpWebRequest;
HttpReq.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.3; WOW64; Trident/7.0)";
HttpReq.Credentials = CredentialCache.DefaultCredentials;
HttpReq.Timeout = 2000;
@@ -770,21 +776,21 @@ namespace NewsCrawler
{
using(Stream dataStream = response.GetResponseStream())
{
using(StreamReader reader = new StreamReader(dataStream, Encoding.GetEncoding("utf-8")))
using(StreamReader reader = new StreamReader(dataStream, true))
{
string responseFromServer = WebUtility.HtmlDecode(reader.ReadToEnd());
string responseFromServer = reader.ReadToEnd();
dynamic jObj = Newtonsoft.Json.JsonConvert.DeserializeObject(responseFromServer);
string strErrCode = jObj["err_code"];
if(strErrCode == "020")
string strStatus = jObj["status"];
if(strStatus == "020")
{
Config.SetDartAPIKeyLimit(State.m_strAuthKey);
Util.Log(Util.LOG_TYPE.ERROR, string.Format("[DartAPI] {0} (key:{1}, retry:{2})", jObj["err_msg"], State.m_strAuthKey, m_iDartAPIRetry));
Util.Log(Util.LOG_TYPE.ERROR, string.Format("[DartAPI] {0} (key:{1}, retry:{2})", jObj["message"], State.m_strAuthKey, m_iDartAPIRetry));
return;
}
else if(strErrCode != "000")
else if(strStatus != "000")
{
Util.Log(Util.LOG_TYPE.ERROR, string.Format("[DartAPI] {0} (key:{1}, retry:{2})", jObj["err_msg"], State.m_strAuthKey, m_iDartAPIRetry));
Util.Log(Util.LOG_TYPE.ERROR, string.Format("[DartAPI] {0} (key:{1}, retry:{2})", jObj["message"], State.m_strAuthKey, m_iDartAPIRetry));
m_iDartAPIRetry--;
return;
}
@@ -793,10 +799,11 @@ namespace NewsCrawler
foreach(var data in List)
{
string strTime = "00:00";
string strTitle = data["rpt_nm"];
string strCodeName = data["crp_nm"];
string strURL = "http://m.dart.fss.or.kr/html_mdart/MD1007.html?rcpNo=" + data["rcp_no"];
string strViewURL = "http://m.dart.fss.or.kr/viewer/main.st?rcpNo=" + data["rcp_no"];
string strTitle = data["report_nm"];
string strCodeName = data["corp_name"];
string strCode = data["stock_code"];
string strURL = "http://dart.fss.or.kr/dsaf001/main.do?rcpNo=" + data["rcept_no"];
string strViewURL = "http://m.dart.fss.or.kr/viewer/main.st?rcpNo=" + data["rcept_no"];
TextCondition.RESULT MatchResult = m_Listener.CheckMatch(strTitle);
if(MatchResult.m_enType != TextCondition.TYPE.NEGATIVE)
@@ -841,10 +848,13 @@ namespace NewsCrawler
try
{
string strAuthKey = Config.GetDartAPIKey();
string strURL = "http://dart.fss.or.kr/api/search.json?auth="+strAuthKey+"&end_dt="+m_Today.ToString("yyyyMMdd")+"&sort=date&series=desc&page_set=100";
string date = m_Today.ToString("yyyyMMdd");
string strURL = $"https://opendart.fss.or.kr/api/list.json?crtfc_key={strAuthKey}&bgn_de={date}&end_de={date}&corp_cls=Y&sort=date&page_no=1&page_count=100";
HttpWebRequest HttpReq = WebRequest.Create(strURL) as HttpWebRequest;
HttpReq.Credentials = CredentialCache.DefaultCredentials;
HttpReq.Timeout = 2000;
HttpReq.Method = "GET";
HttpReq.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.3; WOW64; Trident/7.0)";
//HttpReq.Credentials = CredentialCache.DefaultCredentials;
HttpReq.Timeout = 10000;
REQUEST_STATUS State = new REQUEST_STATUS();
State.m_HTTPReq = HttpReq;
@@ -953,7 +963,7 @@ namespace NewsCrawler
{
try
{
HttpWebRequest HttpReq = WebRequest.Create("http://kind.krx.co.kr/disclosure/rsstodaydistribute.do?method=searchRssTodayDistribute&repIsuSrtCd=&mktTpCd=0&searchCorpName=&currentPageSize=1000") as HttpWebRequest;
HttpWebRequest HttpReq = WebRequest.Create("https://kind.krx.co.kr/disclosure/rsstodaydistribute.do?method=searchRssTodayDistribute&repIsuSrtCd=&mktTpCd=0&searchCorpName=&currentPageSize=1000") as HttpWebRequest;
HttpReq.Credentials = CredentialCache.DefaultCredentials;
HttpReq.Timeout = 2000;
@@ -961,6 +971,7 @@ namespace NewsCrawler
State.m_HTTPReq = HttpReq;
State.m_bInitial = bInitial;
State.m_Timer.Start();
ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12;
HttpReq.BeginGetResponse(new AsyncCallback(ResponseKIND), State);
}