new crawler 수정

filterprocess spammer 그래프쪽 버그 수정


git-svn-id: svn://192.168.0.12/source@283 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2016-07-22 08:51:34 +00:00
parent 8212a845d0
commit 55512fe694
3 changed files with 187 additions and 21 deletions

View File

@@ -184,7 +184,7 @@ void SCrawler::UpdateError(QString _strError)
void SCrawler::saveResult(bool ok) void SCrawler::saveResult(bool ok)
{ {
qDebug() << "saveResult"; //qDebug() << "saveResult";
if (!ok) if (!ok)
{ {
@@ -781,6 +781,7 @@ bool SCrawler::saveFrameUrl(QWebFrame *frame)
void SCrawler::reloadPage() void SCrawler::reloadPage()
{ {
//qDebug() << "reloadPage called";
saveResult(true); saveResult(true);
} }
@@ -1758,15 +1759,23 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
{ {
if (m_bUse) return true; if (m_bUse) return true;
static bool bReplyDone = false; static bool bReplyDone = false;
static int reply_index = 0; //static int reply_index = 0;
static int iLoaded = 0;
static bool bProcessed = false;
qDebug() << frame->baseUrl().toString(); if (bProcessed)
qDebug() << "executed"; return false;
bProcessed = true;
//qDebug() << frame->baseUrl().toString();
//qDebug() << "executed";
if(frame->baseUrl().toString().contains("entertain") && !frame->baseUrl().toString().contains("comment")) if(frame->baseUrl().toString().contains("entertain") && !frame->baseUrl().toString().contains("comment"))
{ {
m_page->mainFrame()->load(QUrl(frame->baseUrl().toString().replace("read", "comment/list"))); m_page->mainFrame()->load(QUrl(frame->baseUrl().toString().replace("read", "comment/list")));
bProcessed = false;
return false; return false;
} }
@@ -1774,16 +1783,75 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
if(frame->baseUrl().toString().contains("sports") && !frame->baseUrl().toString().contains("m_view=1")) if(frame->baseUrl().toString().contains("sports") && !frame->baseUrl().toString().contains("m_view=1"))
{ {
m_page->mainFrame()->load(QUrl(frame->baseUrl().toString() + "&m_view=1")); m_page->mainFrame()->load(QUrl(frame->baseUrl().toString() + "&m_view=1"));
bProcessed = false;
return false; return false;
} }
if(m_nRetryCount < RETRY_MAX && !bReplyDone)
{
QWebElement u_cbox_paginate = Find(frame->documentElement(), "div", "class", "u_cbox_paginate");
if (u_cbox_paginate.isNull())
{
++m_nRetryCount;
// qDebug() << m_nRetryCount;
QTimer::singleShot(RETRY_INTERVAL, this, SLOT(reloadPage()));
bProcessed = false;
return false;
}
else
{
QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button"); QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button");
if(!a.isNull())
{
a.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(300, this, SLOT(reloadPage()));
// qDebug() << "load comments";
//QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button");
QWebElementCollection lis = frame->findAllElements("div[class='u_cbox_content_wrap']>ul[class='u_cbox_list']>li");
// qDebug() << lis.count();
if (lis.count() != iLoaded)
{
iLoaded = lis.count();
bProcessed = false;
return false;
}
else
{
bReplyDone = true;
}
}
}
}
/*
QWebElement u_cbox_paginate = Find(frame->documentElement(), "div", "class", "u_cbox_paginate");
//qDebug() << lis.count();
if (!u_cbox_paginate.isNull())
{
QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button");
//QWebElementCollection lis = frame->findAllElements("div[class='u_cbox_content_wrap']>ul[class='u_cbox_list']>li");
if(!a.isNull()) if(!a.isNull())
{ {
a.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(300, this, SLOT(reloadPage()));
qDebug() << "load comments";
//QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button");
QWebElementCollection lis = frame->findAllElements("div[class='u_cbox_content_wrap']>ul[class='u_cbox_list']>li");
qDebug() << lis.count();
if (lis.count() != iLoaded)
{
iLoaded = lis.count();
bProcessed = false;
return false;
}
}
//return false;
while(!bReplyDone) while(!bReplyDone)
{ {
QWebElement current = Find(a, "em", "class", "u_cbox_page_on __cbox_page_current"); QWebElement current = Find(a, "em", "class", "u_cbox_page_on __cbox_page_current");
@@ -1806,11 +1874,12 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
break; break;
} }
a.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);"); a.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(100, this, SLOT(reloadPage())); QTimer::singleShot(300, this, SLOT(reloadPage()));
qDebug() << "load comments"; qDebug() << "load comments";
return false; return false;
} }
QWebElementCollection reply_btns = frame->findAllElements("a[class='u_cbox_btn_reply']"); QWebElementCollection reply_btns = frame->findAllElements("a[class='u_cbox_btn_reply']");
for(;reply_index < reply_btns.count() ; reply_index++) for(;reply_index < reply_btns.count() ; reply_index++)
{ {
@@ -1820,12 +1889,14 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
else else
{ {
btn.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);"); btn.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(100, this, SLOT(reloadPage())); QTimer::singleShot(200, this, SLOT(reloadPage()));
//reply_index += 1; //reply_index += 1;
qDebug() << reply_index; qDebug() << reply_index;
return false; return false;
} }
} }
*/
/* /*
foreach(QWebElement a, reply_btns) foreach(QWebElement a, reply_btns)
{ {
@@ -1841,8 +1912,10 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
} }
} }
*/
} }
*/
/*
else else
{ {
if(m_nRetryCount < RETRY_MAX) if(m_nRetryCount < RETRY_MAX)
@@ -1850,15 +1923,106 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
m_nRetryCount++; m_nRetryCount++;
qDebug() << m_nRetryCount; qDebug() << m_nRetryCount;
QTimer::singleShot(RETRY_INTERVAL, this, SLOT(reloadPage())); QTimer::singleShot(RETRY_INTERVAL, this, SLOT(reloadPage()));
bProcessed = false;
return false; return false;
} }
*/
/*
else else
{ {
m_bUse = true; m_bUse = true;
return true; return true;
} }
*/
//}
/*
{
QWebElementCollection lis = frame->findAllElements("div[class='u_cbox_content_wrap']>ul[class='u_cbox_list']>li");
foreach (QWebElement li, lis)
{
QWebElement btn = li.findFirst("span[class='u_cbox_reply_cnt']");
QWebElement atag = li.findFirst("a[class='u_cbox_btn_reply']");
if (!btn.isNull() && !atag.isNull())
{
atag.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(300, this, SLOT(reloadPage()));
qDebug() << "click reply:" << btn.toPlainText();
bProcessed = false;
return false;
}
QWebElement div_load_more = li.findFirst("div[class='u_cbox_paginate']");
if (!div_load_more.isNull())
{
QWebElement load_more = div_load_more.findFirst("a[class='u_cbox_btn_more __cbox_page_button']");
if (!load_more.isNull())
{
load_more.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(300, this, SLOT(reloadPage()));
qDebug() << "load more reply";
bProcessed = false;
return false;
}
}
}
}
*/
QWebElementCollection reply_btns = frame->findAllElements("a[class^='u_cbox_btn_reply']");
foreach (QWebElement ele, reply_btns)
{
QWebElement btn = ele.findFirst("span[class='u_cbox_reply_cnt']");
if ((ele.attribute("class") == "u_cbox_btn_reply") && !btn.isNull())
{
ele.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(250, this, SLOT(reloadPage()));
//qDebug() << "click reply:" << btn.toPlainText();
bProcessed = false;
return false;
}
} }
QWebElementCollection allPaginate = frame->documentElement().findAll("div[class='u_cbox_paginate']");
foreach (QWebElement ele, allPaginate)
{
QWebElement load_more = ele.findFirst("a[class='u_cbox_btn_more __cbox_page_button']");
if (!load_more.isNull())
{
load_more.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(250, this, SLOT(reloadPage()));
//qDebug() << "load more reply";
bProcessed = false;
return false;
}
}
/*
//for(;reply_index < reply_btns.count() ;)
for (int k = 0; k < reply_btns.count(); ++k)
{
//QWebElement btn = Find(reply_btns[reply_index], "span", "class", "u_cbox_reply_cnt");
QWebElement btn = Find(reply_btns[k], "span", "class", "u_cbox_reply_cnt");
//reply_index += 1;
if(btn.isNull())
continue;
else
{
//QWebElement btnA = Find(reply_btns[reply_index - 1], "a", "class", "u_cbox_btn_reply");
reply_btns[k].evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
qDebug() << "load??????????????";
qDebug() << reply_btns.count();
QTimer::singleShot(300, this, SLOT(reloadPage()));
bProcessed = false;
return false;
}
}
*/
{ {
QWebElement logo = Find(frame->documentElement(),"div","class","press_logo"); QWebElement logo = Find(frame->documentElement(),"div","class","press_logo");
QString strPlatID, strPlatTitle; QString strPlatID, strPlatTitle;
@@ -1877,7 +2041,7 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
int order = 0; int order = 0;
foreach(QWebElement li, lis) foreach(QWebElement li, lis)
{ {
qDebug() << "li"; //qDebug() << "li";
QWebElement comment_box = li.findFirst("div[class='u_cbox_comment_box']"); QWebElement comment_box = li.findFirst("div[class='u_cbox_comment_box']");
QString strParent; QString strParent;
{ {
@@ -1913,14 +2077,14 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
} }
else if(strDate.contains("")) else if(strDate.contains(""))
{ {
current_time = current_time.addDays(-(60 * strTime.toInt())); current_time = current_time.addSecs(-(60 * strTime.toInt()));
} }
else else
{ {
; ;
} }
strDate = current_time.toString("yyyy-MM-dd hh:mm:ss"); strDate = current_time.toString("yyyy-MM-dd hh:mm:ss");
qDebug() << strDate; // qDebug() << strDate;
} }
{ {
QSqlQuery query; QSqlQuery query;
@@ -1973,7 +2137,7 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
} }
else if(strDate.contains("")) else if(strDate.contains(""))
{ {
current_time = current_time.addDays(-(60 * strTime.toInt())); current_time = current_time.addSecs(-(60 * strTime.toInt()));
} }
else else
{ {
@@ -2006,8 +2170,11 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
qDebug() << "lis count: " << lis.count(); qDebug() << "lis count: " << lis.count();
} }
//Debug("c:\\data\\replytest.html", frame->toHtml()); //Debug("c:\\data\\replytest.html", frame->toHtml());
m_bUse = true; m_bUse = true;
bProcessed = false;
return true; return true;
} }

View File

@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE QtCreatorProject> <!DOCTYPE QtCreatorProject>
<!-- Written by QtCreator 3.3.0, 2016-07-18T16:46:48. --> <!-- Written by QtCreator 3.3.0, 2016-07-20T11:36:02. -->
<qtcreator> <qtcreator>
<data> <data>
<variable>EnvironmentId</variable> <variable>EnvironmentId</variable>
@@ -787,7 +787,7 @@
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">sfilterprocess</value> <value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">sfilterprocess</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value> <value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4RunConfiguration:C:/source/sfilterprocess/sfilterprocess.pro</value> <value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4RunConfiguration:C:/source/sfilterprocess/sfilterprocess.pro</value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments">&quot;527&quot; &quot;testall2&quot;</value> <value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments">&quot;530&quot; &quot;testall2&quot;</value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">sfilterprocess.pro</value> <value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">sfilterprocess.pro</value>
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseDyldImageSuffix">false</value> <value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseDyldImageSuffix">false</value>
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseTerminal">true</value> <value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseTerminal">true</value>

View File

@@ -1159,7 +1159,7 @@ void SSpammer::SStatsJson::makeTableRank(SDatagroup &_datagroup, const QMap<int,
if (!mapScore.contains(strKey)) if (!mapScore.contains(strKey))
{ {
mapScore.insert(strKey, stSpammerScore()); mapScore.insert(strKey, stSpammerScore());
mapScore[strKey].id_num = _datagroup.getstBody(iterPos1.key())->id_num; mapScore[strKey].id_num = _datagroup.getstReply(iterPos1.key(), iterPos1.value())->id_num;
} }
mapScore[strKey].reply++; mapScore[strKey].reply++;
mapScore[strKey].nickname.insert((*mapReply)[iterPos1.key()][iterPos1.value()][anColumn[SInitializer::E_DATA_article_nickname]].trimmed()); mapScore[strKey].nickname.insert((*mapReply)[iterPos1.key()][iterPos1.value()][anColumn[SInitializer::E_DATA_article_nickname]].trimmed());
@@ -1272,7 +1272,7 @@ void SSpammer::SStatsJson::makeTableRank(SDatagroup &_datagroup, const QMap<int,
if (!mapScore.contains(strKey)) if (!mapScore.contains(strKey))
{ {
mapScore.insert(strKey, stSpammerScore()); mapScore.insert(strKey, stSpammerScore());
mapScore[strKey].id_num = _datagroup.getstBody(iterPos1.key())->id_num; mapScore[strKey].id_num = _datagroup.getstReply(iterPos1.key(), iterPos1.value())->id_num;
} }
mapScore[strKey].reply++; mapScore[strKey].reply++;
mapScore[strKey].nickname.insert((*mapReply)[iterPos1.key()][iterPos1.value()][anColumn[SInitializer::E_DATA_article_nickname]].trimmed()); mapScore[strKey].nickname.insert((*mapReply)[iterPos1.key()][iterPos1.value()][anColumn[SInitializer::E_DATA_article_nickname]].trimmed());
@@ -1358,7 +1358,6 @@ void SSpammer::SStatsJson::makeTable(SDatagroup &_datagroup, const QMap<int, stR
} }
bool SSpammer::SStatsJson::uploadData(QSqlDatabase &_db, const int &_nCompany) bool SSpammer::SStatsJson::uploadData(QSqlDatabase &_db, const int &_nCompany)
{ {
QSqlQuery query(_db); QSqlQuery query(_db);