new crawler 수정
filterprocess spammer 그래프쪽 버그 수정 git-svn-id: svn://192.168.0.12/source@283 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
@@ -184,7 +184,7 @@ void SCrawler::UpdateError(QString _strError)
|
||||
|
||||
void SCrawler::saveResult(bool ok)
|
||||
{
|
||||
qDebug() << "saveResult";
|
||||
//qDebug() << "saveResult";
|
||||
|
||||
if (!ok)
|
||||
{
|
||||
@@ -781,6 +781,7 @@ bool SCrawler::saveFrameUrl(QWebFrame *frame)
|
||||
|
||||
void SCrawler::reloadPage()
|
||||
{
|
||||
//qDebug() << "reloadPage called";
|
||||
saveResult(true);
|
||||
}
|
||||
|
||||
@@ -1758,15 +1759,23 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
|
||||
{
|
||||
if (m_bUse) return true;
|
||||
static bool bReplyDone = false;
|
||||
static int reply_index = 0;
|
||||
//static int reply_index = 0;
|
||||
static int iLoaded = 0;
|
||||
static bool bProcessed = false;
|
||||
|
||||
qDebug() << frame->baseUrl().toString();
|
||||
qDebug() << "executed";
|
||||
if (bProcessed)
|
||||
return false;
|
||||
bProcessed = true;
|
||||
|
||||
|
||||
//qDebug() << frame->baseUrl().toString();
|
||||
//qDebug() << "executed";
|
||||
|
||||
|
||||
if(frame->baseUrl().toString().contains("entertain") && !frame->baseUrl().toString().contains("comment"))
|
||||
{
|
||||
m_page->mainFrame()->load(QUrl(frame->baseUrl().toString().replace("read", "comment/list")));
|
||||
bProcessed = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1774,16 +1783,75 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
|
||||
if(frame->baseUrl().toString().contains("sports") && !frame->baseUrl().toString().contains("m_view=1"))
|
||||
{
|
||||
m_page->mainFrame()->load(QUrl(frame->baseUrl().toString() + "&m_view=1"));
|
||||
bProcessed = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button");
|
||||
|
||||
if(!a.isNull())
|
||||
if(m_nRetryCount < RETRY_MAX && !bReplyDone)
|
||||
{
|
||||
QWebElement u_cbox_paginate = Find(frame->documentElement(), "div", "class", "u_cbox_paginate");
|
||||
if (u_cbox_paginate.isNull())
|
||||
{
|
||||
++m_nRetryCount;
|
||||
// qDebug() << m_nRetryCount;
|
||||
QTimer::singleShot(RETRY_INTERVAL, this, SLOT(reloadPage()));
|
||||
bProcessed = false;
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button");
|
||||
if(!a.isNull())
|
||||
{
|
||||
a.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
||||
QTimer::singleShot(300, this, SLOT(reloadPage()));
|
||||
// qDebug() << "load comments";
|
||||
|
||||
//QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button");
|
||||
QWebElementCollection lis = frame->findAllElements("div[class='u_cbox_content_wrap']>ul[class='u_cbox_list']>li");
|
||||
|
||||
// qDebug() << lis.count();
|
||||
if (lis.count() != iLoaded)
|
||||
{
|
||||
iLoaded = lis.count();
|
||||
bProcessed = false;
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
bReplyDone = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
QWebElement u_cbox_paginate = Find(frame->documentElement(), "div", "class", "u_cbox_paginate");
|
||||
//qDebug() << lis.count();
|
||||
|
||||
if (!u_cbox_paginate.isNull())
|
||||
{
|
||||
QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button");
|
||||
//QWebElementCollection lis = frame->findAllElements("div[class='u_cbox_content_wrap']>ul[class='u_cbox_list']>li");
|
||||
|
||||
if(!a.isNull())
|
||||
{
|
||||
a.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
||||
QTimer::singleShot(300, this, SLOT(reloadPage()));
|
||||
qDebug() << "load comments";
|
||||
|
||||
//QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button");
|
||||
QWebElementCollection lis = frame->findAllElements("div[class='u_cbox_content_wrap']>ul[class='u_cbox_list']>li");
|
||||
|
||||
qDebug() << lis.count();
|
||||
if (lis.count() != iLoaded)
|
||||
{
|
||||
iLoaded = lis.count();
|
||||
bProcessed = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
//return false;
|
||||
while(!bReplyDone)
|
||||
{
|
||||
QWebElement current = Find(a, "em", "class", "u_cbox_page_on __cbox_page_current");
|
||||
@@ -1806,11 +1874,12 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
|
||||
break;
|
||||
}
|
||||
a.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
||||
QTimer::singleShot(100, this, SLOT(reloadPage()));
|
||||
QTimer::singleShot(300, this, SLOT(reloadPage()));
|
||||
qDebug() << "load comments";
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
QWebElementCollection reply_btns = frame->findAllElements("a[class='u_cbox_btn_reply']");
|
||||
for(;reply_index < reply_btns.count() ; reply_index++)
|
||||
{
|
||||
@@ -1820,12 +1889,14 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
|
||||
else
|
||||
{
|
||||
btn.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
||||
QTimer::singleShot(100, this, SLOT(reloadPage()));
|
||||
QTimer::singleShot(200, this, SLOT(reloadPage()));
|
||||
//reply_index += 1;
|
||||
qDebug() << reply_index;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
foreach(QWebElement a, reply_btns)
|
||||
{
|
||||
@@ -1841,8 +1912,10 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
|
||||
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
}
|
||||
*/
|
||||
/*
|
||||
else
|
||||
{
|
||||
if(m_nRetryCount < RETRY_MAX)
|
||||
@@ -1850,15 +1923,106 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
|
||||
m_nRetryCount++;
|
||||
qDebug() << m_nRetryCount;
|
||||
QTimer::singleShot(RETRY_INTERVAL, this, SLOT(reloadPage()));
|
||||
bProcessed = false;
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
/*
|
||||
else
|
||||
{
|
||||
m_bUse = true;
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
//}
|
||||
/*
|
||||
{
|
||||
QWebElementCollection lis = frame->findAllElements("div[class='u_cbox_content_wrap']>ul[class='u_cbox_list']>li");
|
||||
foreach (QWebElement li, lis)
|
||||
{
|
||||
QWebElement btn = li.findFirst("span[class='u_cbox_reply_cnt']");
|
||||
QWebElement atag = li.findFirst("a[class='u_cbox_btn_reply']");
|
||||
if (!btn.isNull() && !atag.isNull())
|
||||
{
|
||||
atag.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
||||
QTimer::singleShot(300, this, SLOT(reloadPage()));
|
||||
qDebug() << "click reply:" << btn.toPlainText();
|
||||
|
||||
bProcessed = false;
|
||||
return false;
|
||||
}
|
||||
QWebElement div_load_more = li.findFirst("div[class='u_cbox_paginate']");
|
||||
if (!div_load_more.isNull())
|
||||
{
|
||||
QWebElement load_more = div_load_more.findFirst("a[class='u_cbox_btn_more __cbox_page_button']");
|
||||
if (!load_more.isNull())
|
||||
{
|
||||
load_more.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
||||
QTimer::singleShot(300, this, SLOT(reloadPage()));
|
||||
qDebug() << "load more reply";
|
||||
bProcessed = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
QWebElementCollection reply_btns = frame->findAllElements("a[class^='u_cbox_btn_reply']");
|
||||
|
||||
foreach (QWebElement ele, reply_btns)
|
||||
{
|
||||
QWebElement btn = ele.findFirst("span[class='u_cbox_reply_cnt']");
|
||||
|
||||
if ((ele.attribute("class") == "u_cbox_btn_reply") && !btn.isNull())
|
||||
{
|
||||
ele.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
||||
QTimer::singleShot(250, this, SLOT(reloadPage()));
|
||||
//qDebug() << "click reply:" << btn.toPlainText();
|
||||
|
||||
bProcessed = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
QWebElementCollection allPaginate = frame->documentElement().findAll("div[class='u_cbox_paginate']");
|
||||
foreach (QWebElement ele, allPaginate)
|
||||
{
|
||||
QWebElement load_more = ele.findFirst("a[class='u_cbox_btn_more __cbox_page_button']");
|
||||
if (!load_more.isNull())
|
||||
{
|
||||
load_more.evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
||||
QTimer::singleShot(250, this, SLOT(reloadPage()));
|
||||
//qDebug() << "load more reply";
|
||||
bProcessed = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
//for(;reply_index < reply_btns.count() ;)
|
||||
for (int k = 0; k < reply_btns.count(); ++k)
|
||||
{
|
||||
//QWebElement btn = Find(reply_btns[reply_index], "span", "class", "u_cbox_reply_cnt");
|
||||
QWebElement btn = Find(reply_btns[k], "span", "class", "u_cbox_reply_cnt");
|
||||
//reply_index += 1;
|
||||
|
||||
if(btn.isNull())
|
||||
continue;
|
||||
else
|
||||
{
|
||||
//QWebElement btnA = Find(reply_btns[reply_index - 1], "a", "class", "u_cbox_btn_reply");
|
||||
reply_btns[k].evaluateJavaScript("var evObj = new Event('click', {bubbles: true, cancelable: true, view: window}); this.dispatchEvent(evObj);");
|
||||
qDebug() << "load??????????????";
|
||||
|
||||
qDebug() << reply_btns.count();
|
||||
QTimer::singleShot(300, this, SLOT(reloadPage()));
|
||||
bProcessed = false;
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
*/
|
||||
{
|
||||
QWebElement logo = Find(frame->documentElement(),"div","class","press_logo");
|
||||
QString strPlatID, strPlatTitle;
|
||||
@@ -1877,7 +2041,7 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
|
||||
int order = 0;
|
||||
foreach(QWebElement li, lis)
|
||||
{
|
||||
qDebug() << "li";
|
||||
//qDebug() << "li";
|
||||
QWebElement comment_box = li.findFirst("div[class='u_cbox_comment_box']");
|
||||
QString strParent;
|
||||
{
|
||||
@@ -1913,14 +2077,14 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
|
||||
}
|
||||
else if(strDate.contains("분"))
|
||||
{
|
||||
current_time = current_time.addDays(-(60 * strTime.toInt()));
|
||||
current_time = current_time.addSecs(-(60 * strTime.toInt()));
|
||||
}
|
||||
else
|
||||
{
|
||||
;
|
||||
}
|
||||
strDate = current_time.toString("yyyy-MM-dd hh:mm:ss");
|
||||
qDebug() << strDate;
|
||||
// qDebug() << strDate;
|
||||
}
|
||||
{
|
||||
QSqlQuery query;
|
||||
@@ -1973,7 +2137,7 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
|
||||
}
|
||||
else if(strDate.contains("분"))
|
||||
{
|
||||
current_time = current_time.addDays(-(60 * strTime.toInt()));
|
||||
current_time = current_time.addSecs(-(60 * strTime.toInt()));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -2006,8 +2170,11 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame)
|
||||
qDebug() << "lis count: " << lis.count();
|
||||
}
|
||||
|
||||
|
||||
//Debug("c:\\data\\replytest.html", frame->toHtml());
|
||||
m_bUse = true;
|
||||
bProcessed = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user