new crawler 수정

filterprocess spammer 그래프쪽 버그 수정


git-svn-id: svn://192.168.0.12/source@283 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2016-07-22 08:51:34 +00:00
parent 8212a845d0
commit 55512fe694
3 changed files with 187 additions and 21 deletions

View File

@@ -184,7 +184,7 @@ void SCrawler::UpdateError(QString _strError)
void SCrawler::saveResult(bool ok)
{
qDebug() << "saveResult";
//qDebug() << "saveResult";
if (!ok)
{
@@ -781,6 +781,7 @@ bool SCrawler::saveFrameUrl(QWebFrame *frame)
void SCrawler::reloadPage()
{
//qDebug() << "reloadPage called";
saveResult(true);
}
@@ -1758,15 +1759,23 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
{
if (m_bUse) return true;
static bool bReplyDone = false;
static int reply_index = 0;
//static int reply_index = 0;
static int iLoaded = 0;
static bool bProcessed = false;
qDebug() << frame->baseUrl().toString();
qDebug() << "executed";
if (bProcessed)
return false;
bProcessed = true;
//qDebug() << frame->baseUrl().toString();
//qDebug() << "executed";
if(frame->baseUrl().toString().contains("entertain") && !frame->baseUrl().toString().contains("comment"))
{
m_page->mainFrame()->load(QUrl(frame->baseUrl().toString().replace("read", "comment/list")));
bProcessed = false;
return false;
}
@@ -1774,16 +1783,75 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
if(frame->baseUrl().toString().contains("sports") && !frame->baseUrl().toString().contains("m_view=1"))
{
m_page->mainFrame()->load(QUrl(frame->baseUrl().toString() + "&m_view=1"));
bProcessed = false;
return false;
}
if(m_nRetryCount < RETRY_MAX && !bReplyDone)
{
QWebElement u_cbox_paginate = Find(frame->documentElement(), "div", "class", "u_cbox_paginate");
if (u_cbox_paginate.isNull())
{
++m_nRetryCount;
// qDebug() << m_nRetryCount;
QTimer::singleShot(RETRY_INTERVAL, this, SLOT(reloadPage()));
bProcessed = false;
return false;
}
else
{
QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button");
if(!a.isNull())
{
a.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(300, this, SLOT(reloadPage()));
// qDebug() << "load comments";
//QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button");
QWebElementCollection lis = frame->findAllElements("div[class='u_cbox_content_wrap']>ul[class='u_cbox_list']>li");
// qDebug() << lis.count();
if (lis.count() != iLoaded)
{
iLoaded = lis.count();
bProcessed = false;
return false;
}
else
{
bReplyDone = true;
}
}
}
}
/*
QWebElement u_cbox_paginate = Find(frame->documentElement(), "div", "class", "u_cbox_paginate");
//qDebug() << lis.count();
if (!u_cbox_paginate.isNull())
{
QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button");
//QWebElementCollection lis = frame->findAllElements("div[class='u_cbox_content_wrap']>ul[class='u_cbox_list']>li");
if(!a.isNull())
{
a.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(300, this, SLOT(reloadPage()));
qDebug() << "load comments";
//QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button");
QWebElementCollection lis = frame->findAllElements("div[class='u_cbox_content_wrap']>ul[class='u_cbox_list']>li");
qDebug() << lis.count();
if (lis.count() != iLoaded)
{
iLoaded = lis.count();
bProcessed = false;
return false;
}
}
//return false;
while(!bReplyDone)
{
QWebElement current = Find(a, "em", "class", "u_cbox_page_on __cbox_page_current");
@@ -1806,11 +1874,12 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
break;
}
a.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(100, this, SLOT(reloadPage()));
QTimer::singleShot(300, this, SLOT(reloadPage()));
qDebug() << "load comments";
return false;
}
QWebElementCollection reply_btns = frame->findAllElements("a[class='u_cbox_btn_reply']");
for(;reply_index < reply_btns.count() ; reply_index++)
{
@@ -1820,12 +1889,14 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
else
{
btn.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(100, this, SLOT(reloadPage()));
QTimer::singleShot(200, this, SLOT(reloadPage()));
//reply_index += 1;
qDebug() << reply_index;
return false;
}
}
*/
/*
foreach(QWebElement a, reply_btns)
{
@@ -1841,8 +1912,10 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
}
}
*/
}
*/
/*
else
{
if(m_nRetryCount < RETRY_MAX)
@@ -1850,15 +1923,106 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
m_nRetryCount++;
qDebug() << m_nRetryCount;
QTimer::singleShot(RETRY_INTERVAL, this, SLOT(reloadPage()));
bProcessed = false;
return false;
}
*/
/*
else
{
m_bUse = true;
return true;
}
*/
//}
/*
{
QWebElementCollection lis = frame->findAllElements("div[class='u_cbox_content_wrap']>ul[class='u_cbox_list']>li");
foreach (QWebElement li, lis)
{
QWebElement btn = li.findFirst("span[class='u_cbox_reply_cnt']");
QWebElement atag = li.findFirst("a[class='u_cbox_btn_reply']");
if (!btn.isNull() && !atag.isNull())
{
atag.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(300, this, SLOT(reloadPage()));
qDebug() << "click reply:" << btn.toPlainText();
bProcessed = false;
return false;
}
QWebElement div_load_more = li.findFirst("div[class='u_cbox_paginate']");
if (!div_load_more.isNull())
{
QWebElement load_more = div_load_more.findFirst("a[class='u_cbox_btn_more __cbox_page_button']");
if (!load_more.isNull())
{
load_more.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(300, this, SLOT(reloadPage()));
qDebug() << "load more reply";
bProcessed = false;
return false;
}
}
}
}
*/
QWebElementCollection reply_btns = frame->findAllElements("a[class^='u_cbox_btn_reply']");
foreach (QWebElement ele, reply_btns)
{
QWebElement btn = ele.findFirst("span[class='u_cbox_reply_cnt']");
if ((ele.attribute("class") == "u_cbox_btn_reply") && !btn.isNull())
{
ele.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(250, this, SLOT(reloadPage()));
//qDebug() << "click reply:" << btn.toPlainText();
bProcessed = false;
return false;
}
}
QWebElementCollection allPaginate = frame->documentElement().findAll("div[class='u_cbox_paginate']");
foreach (QWebElement ele, allPaginate)
{
QWebElement load_more = ele.findFirst("a[class='u_cbox_btn_more __cbox_page_button']");
if (!load_more.isNull())
{
load_more.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
QTimer::singleShot(250, this, SLOT(reloadPage()));
//qDebug() << "load more reply";
bProcessed = false;
return false;
}
}
/*
//for(;reply_index < reply_btns.count() ;)
for (int k = 0; k < reply_btns.count(); ++k)
{
//QWebElement btn = Find(reply_btns[reply_index], "span", "class", "u_cbox_reply_cnt");
QWebElement btn = Find(reply_btns[k], "span", "class", "u_cbox_reply_cnt");
//reply_index += 1;
if(btn.isNull())
continue;
else
{
//QWebElement btnA = Find(reply_btns[reply_index - 1], "a", "class", "u_cbox_btn_reply");
reply_btns[k].evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
qDebug() << "load??????????????";
qDebug() << reply_btns.count();
QTimer::singleShot(300, this, SLOT(reloadPage()));
bProcessed = false;
return false;
}
}
*/
{
QWebElement logo = Find(frame->documentElement(),"div","class","press_logo");
QString strPlatID, strPlatTitle;
@@ -1877,7 +2041,7 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
int order = 0;
foreach(QWebElement li, lis)
{
qDebug() << "li";
//qDebug() << "li";
QWebElement comment_box = li.findFirst("div[class='u_cbox_comment_box']");
QString strParent;
{
@@ -1913,14 +2077,14 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
}
else if(strDate.contains(""))
{
current_time = current_time.addDays(-(60 * strTime.toInt()));
current_time = current_time.addSecs(-(60 * strTime.toInt()));
}
else
{
;
}
strDate = current_time.toString("yyyy-MM-dd hh:mm:ss");
qDebug() << strDate;
// qDebug() << strDate;
}
{
QSqlQuery query;
@@ -1973,7 +2137,7 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
}
else if(strDate.contains(""))
{
current_time = current_time.addDays(-(60 * strTime.toInt()));
current_time = current_time.addSecs(-(60 * strTime.toInt()));
}
else
{
@@ -2006,8 +2170,11 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
qDebug() << "lis count: " << lis.count();
}
//Debug("c:\\data\\replytest.html", frame->toHtml());
m_bUse = true;
bProcessed = false;
return true;
}

View File

@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE QtCreatorProject>
<!-- Written by QtCreator 3.3.0, 2016-07-18T16:46:48. -->
<!-- Written by QtCreator 3.3.0, 2016-07-20T11:36:02. -->
<qtcreator>
<data>
<variable>EnvironmentId</variable>
@@ -787,7 +787,7 @@
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">sfilterprocess</value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4RunConfiguration:C:/source/sfilterprocess/sfilterprocess.pro</value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments">&quot;527&quot; &quot;testall2&quot;</value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments">&quot;530&quot; &quot;testall2&quot;</value>
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">sfilterprocess.pro</value>
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseDyldImageSuffix">false</value>
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseTerminal">true</value>

View File

@@ -1159,7 +1159,7 @@ void SSpammer::SStatsJson::makeTableRank(SDatagroup &_datagroup, const QMap<int,
if (!mapScore.contains(strKey))
{
mapScore.insert(strKey, stSpammerScore());
mapScore[strKey].id_num = _datagroup.getstBody(iterPos1.key())->id_num;
mapScore[strKey].id_num = _datagroup.getstReply(iterPos1.key(), iterPos1.value())->id_num;
}
mapScore[strKey].reply++;
mapScore[strKey].nickname.insert((*mapReply)[iterPos1.key()][iterPos1.value()][anColumn[SInitializer::E_DATA_article_nickname]].trimmed());
@@ -1272,7 +1272,7 @@ void SSpammer::SStatsJson::makeTableRank(SDatagroup &_datagroup, const QMap<int,
if (!mapScore.contains(strKey))
{
mapScore.insert(strKey, stSpammerScore());
mapScore[strKey].id_num = _datagroup.getstBody(iterPos1.key())->id_num;
mapScore[strKey].id_num = _datagroup.getstReply(iterPos1.key(), iterPos1.value())->id_num;
}
mapScore[strKey].reply++;
mapScore[strKey].nickname.insert((*mapReply)[iterPos1.key()][iterPos1.value()][anColumn[SInitializer::E_DATA_article_nickname]].trimmed());
@@ -1358,7 +1358,6 @@ void SSpammer::SStatsJson::makeTable(SDatagroup &_datagroup, const QMap<int, stR
}
bool SSpammer::SStatsJson::uploadData(QSqlDatabase &_db, const int &_nCompany)
{
QSqlQuery query(_db);