diff --git a/CrawlerProcess/scrawler.cpp b/CrawlerProcess/scrawler.cpp index 70772d2..4f8dbee 100644 --- a/CrawlerProcess/scrawler.cpp +++ b/CrawlerProcess/scrawler.cpp @@ -1558,7 +1558,51 @@ void SCrawler::saveFrameNewsUrl(QWebFrame *frame) //Debug("out.html",m_page->mainFrame()->toHtml()); } + + //sport + { + if(strTitle.isEmpty()) + { + strTitle = Find(frame->documentElement(),"h4","class","title").toPlainText(); + } + if(strData.isEmpty()) + { + strData = Find(frame->documentElement(), "div", "id", "newsEndContents").toPlainText(); + QString strSpam = Find(frame->documentElement(), "div", "class", "link_news").toPlainText(); + QString strSource = Find(frame->documentElement(), "p", "class", "source").toPlainText(); + strData = strData.left(strData.length() - strSpam.length() - strSource.length()); + } + if(strDate.isEmpty()) + { + strDate = frame->documentElement().findFirst("div[class='info']>span").toPlainText(); + QRegExp reDate("([\\d]{4}).([\\d]{2}).([\\d]{2})"); + QRegExp reTime("([\\d]{2}):([\\d]{2})"); + int pos = 0; + QString date; + QString time; + while((pos = reDate.indexIn(strDate, pos)) != -1) + { + date = reDate.cap(1) + "-" + reDate.cap(2) + "-" + reDate.cap(3); + break; + } + pos = 0; + while((pos = reTime.indexIn(strDate, pos)) != -1) + { + if(strDate.contains("오후") && (reTime.cap(1) != "12")) + time = QString::number(reTime.cap(1).toInt() + 12) + ":" + reTime.cap(2); + else + time = reTime.cap(1) + ":" + reTime.cap(2); + break; + } + + strDate = date + " " + time + ":00"; + } + } + + + element = Find(frame->documentElement(),"div","class","press_logo"); + if(!element.isNull()) { strPlatID = Find(element,"a").attribute("href"); strPlatTitle = Find(element,"img").attribute("alt"); @@ -1569,6 +1613,20 @@ void SCrawler::saveFrameNewsUrl(QWebFrame *frame) strPlatID = strlistPlat.at(1); } } + else //sports + { + element = frame->documentElement().findFirst("span[class='logo']>img"); + strPlatTitle = element.attribute("alt"); + QWebElement link = Find(frame->documentElement(), "a", "class", "press_link"); + QString strLink = link.attribute("href"); + strPlatID = strLink.left(strLink.mid(8).indexOf('/') + 7); + QStringList strlistPlat = strPlatID.split("."); + if(strlistPlat.size() > 2) + { + if (strlistPlat.at(0) == QString("http://www")) + strPlatID = strlistPlat.at(1); + } + } } bodydata.setTable(m_strTable); bodydata.setData(bodydata.GetSafeUtf(strTitle), SCrawlerData::ARTICLE_TITLE); @@ -1590,11 +1648,26 @@ bool SCrawler::saveFrameNewsComment(QWebFrame *frame) { if (m_bUse) return true; static bool bReplyDone = false; - static bool bReplyReplyDone = false; static int reply_index = 0; + qDebug() << frame->baseUrl().toString(); qDebug() << "executed"; + if(frame->baseUrl().toString().contains("entertain") && !frame->baseUrl().toString().contains("comment")) + { + m_page->mainFrame()->load(QUrl(frame->baseUrl().toString().replace("read", "comment/list"))); + return false; + } + + + if(frame->baseUrl().toString().contains("sports") && !frame->baseUrl().toString().contains("m_view=1")) + { + m_page->mainFrame()->load(QUrl(frame->baseUrl().toString() + "&m_view=1")); + return false; + } + + + QWebElement a = Find(frame->documentElement(), "a", "class", "u_cbox_btn_more __cbox_page_button"); if(!a.isNull())