From 9b05f4b83390777e7d870ef964a3380ff875004e Mon Sep 17 00:00:00 2001 From: admin Date: Mon, 31 Aug 2015 08:29:25 +0000 Subject: [PATCH] spmmer filter git-svn-id: svn://192.168.0.12/source@178 8346c931-da38-4b9b-9d4c-e48b93cbd075 --- FilterProcess/sfilterprocess.cpp | 153 +++++++++++++++++++++++++------ FilterProcess/sfilterprocess.h | 1 + 2 files changed, 125 insertions(+), 29 deletions(-) diff --git a/FilterProcess/sfilterprocess.cpp b/FilterProcess/sfilterprocess.cpp index bf45473..01aea96 100644 --- a/FilterProcess/sfilterprocess.cpp +++ b/FilterProcess/sfilterprocess.cpp @@ -1300,13 +1300,22 @@ bool QSortSpamer(const SSpamer &st1, const SSpamer &st2) return st1.m_dSpammerValue > st2.m_dSpammerValue; } +struct SSpammerMap +{ + int m_nReply_num; + int m_nBody_num; + int m_nId_num; +}; + void SFilterProcess::DataSpammersCount(QString _strFilter,QString _strCompanyNum) { QMap mapSpamer; + ReloadData(); InsertLog("Spamer Data Filter Run Start "); DataFilter(_strFilter); InsertLog("Spamer Data Filter Run End "); + QVector vecSpammerMap[E_ARTICLE_MAX]; { InsertLog("Spamer Rank Count Make Start "); if (m_bDataFlag[E_ARTICLE_BODY] == true) @@ -1325,6 +1334,12 @@ void SFilterProcess::DataSpammersCount(QString _strFilter,QString _strCompanyNum mapSpamer.insert(strKey,stSpamer); } mapSpamer[strKey].m_dBodycount+=1.0; + + SSpammerMap stSpammerMap; + QString strURL = strlist[m_nUrlColumn].trimmed(); + stSpammerMap.m_nBody_num = m_mapData[strURL].m_nNum; + stSpammerMap.m_nId_num = m_mapID[strlist[m_anData[E_DATA_article_id]].trimmed()].m_nNum; + vecSpammerMap[E_ARTICLE_BODY].push_back(stSpammerMap); } } @@ -1344,39 +1359,63 @@ void SFilterProcess::DataSpammersCount(QString _strFilter,QString _strCompanyNum mapSpamer.insert(strKey,stSpamer); } mapSpamer[strKey].m_dReplycount+=1.0; + + SSpammerMap stSpammerMap; + QString strURL = strlist[m_nUrlColumn].trimmed(); + stSpammerMap.m_nBody_num = m_mapData[strURL].m_nNum; + QString strSpammerReplyKey = QString::number(stSpammerMap.m_nBody_num) + "_" + strlist[m_anData[E_DATA_article_order]].trimmed(); + stSpammerMap.m_nReply_num = m_mapSpammerReply[strSpammerReplyKey]; + stSpammerMap.m_nId_num = m_mapID[strlist[m_anData[E_DATA_article_id]].trimmed()].m_nNum; + vecSpammerMap[E_ARTICLE_REPLY].push_back(stSpammerMap); } } - foreach(SBody stBody, m_mapData.values()) + + if (!m_bDataFlag[E_ARTICLE_BODY] && !m_bDataFlag[E_ARTICLE_REPLY]) { - if (stBody.m_strlist[m_anData[E_DATA_article_id]].trimmed().isEmpty()) continue; - QString strKey = stBody.m_strlist[m_anData[E_DATA_platform_name]].trimmed() + QString(","); - strKey += stBody.m_strlist[m_anData[E_DATA_platform_form]].trimmed() + QString(","); - strKey += stBody.m_strlist[m_anData[E_DATA_article_id]].trimmed(); - if (mapSpamer.contains(strKey) == false) + foreach(SBody stBody, m_mapData.values()) { - SSpamer stSpamer; - stSpamer.m_strKey = strKey; - stSpamer.m_strNickname = stBody.m_strlist[m_anData[E_DATA_article_nickname]].trimmed(); - mapSpamer.insert(strKey,stSpamer); - } - //mapSpamer[strKey].m_dBodycount+=1.0; - mapSpamer[strKey].m_setNick.insert(stBody.m_strlist[m_anData[E_DATA_article_nickname]].trimmed()); - foreach(SReply stReply , stBody.m_vecReply.values()) - { - QString strKey = stReply.m_strlist[m_anData[E_DATA_platform_name]].trimmed() + QString(","); - strKey += stReply.m_strlist[m_anData[E_DATA_platform_form]].trimmed() + QString(","); - strKey += stReply.m_strlist[m_anData[E_DATA_article_id]].trimmed(); + if (stBody.m_strlist[m_anData[E_DATA_article_id]].trimmed().isEmpty()) continue; + QString strKey = stBody.m_strlist[m_anData[E_DATA_platform_name]].trimmed() + QString(","); + strKey += stBody.m_strlist[m_anData[E_DATA_platform_form]].trimmed() + QString(","); + strKey += stBody.m_strlist[m_anData[E_DATA_article_id]].trimmed(); if (mapSpamer.contains(strKey) == false) { SSpamer stSpamer; stSpamer.m_strKey = strKey; - stSpamer.m_strNickname = stReply.m_strlist[m_anData[E_DATA_article_nickname]].trimmed(); + stSpamer.m_strNickname = stBody.m_strlist[m_anData[E_DATA_article_nickname]].trimmed(); mapSpamer.insert(strKey,stSpamer); } - //mapSpamer[strKey].m_dReplycount+=1.0; - QString strReKey = stReply.m_strlist[m_anData[E_DATA_article_nickname]].trimmed(); - if (mapSpamer[strKey].m_setNick.contains(strReKey) == false) - mapSpamer[strKey].m_setNick.insert(strReKey); + + SSpammerMap stSpammerMap; + stSpammerMap.m_nBody_num = stBody.m_nNum; + stSpammerMap.m_nId_num = m_mapID[stBody.m_strlist[m_anData[E_DATA_article_id]].trimmed()].m_nNum; + vecSpammerMap[E_ARTICLE_BODY].push_back(stSpammerMap); + + mapSpamer[strKey].m_setNick.insert(stBody.m_strlist[m_anData[E_DATA_article_nickname]].trimmed()); + foreach(SReply stReply , stBody.m_vecReply.values()) + { + QString strKey = stReply.m_strlist[m_anData[E_DATA_platform_name]].trimmed() + QString(","); + strKey += stReply.m_strlist[m_anData[E_DATA_platform_form]].trimmed() + QString(","); + strKey += stReply.m_strlist[m_anData[E_DATA_article_id]].trimmed(); + if (mapSpamer.contains(strKey) == false) + { + SSpamer stSpamer; + stSpamer.m_strKey = strKey; + stSpamer.m_strNickname = stReply.m_strlist[m_anData[E_DATA_article_nickname]].trimmed(); + mapSpamer.insert(strKey,stSpamer); + } + + SSpammerMap stSpammerMap; + stSpammerMap.m_nBody_num = stBody.m_nNum; + stSpammerMap.m_nId_num = m_mapID[stReply.m_strlist[m_anData[E_DATA_article_id]].trimmed()].m_nNum; + QString strSpammerReplyKey = QString::number(stSpammerMap.m_nBody_num) + "_" + stReply.m_strlist[m_anData[E_DATA_article_order]].trimmed(); + stSpammerMap.m_nReply_num = m_mapSpammerReply[strSpammerReplyKey]; + vecSpammerMap[E_ARTICLE_REPLY].push_back(stSpammerMap); + + QString strReKey = stReply.m_strlist[m_anData[E_DATA_article_nickname]].trimmed(); + if (mapSpamer[strKey].m_setNick.contains(strReKey) == false) + mapSpamer[strKey].m_setNick.insert(strReKey); + } } } @@ -1421,6 +1460,12 @@ void SFilterProcess::DataSpammersCount(QString _strFilter,QString _strCompanyNum strQuery = "delete from spammer where company_num = " + strCompanyNum; if(query.exec(strQuery.toUtf8()) == false) {InsertLog(query.lastError().text());return;} + strQuery = "delete from body_spammer_map_" + _strCompanyNum + "_update"; + if(query.exec(strQuery.toUtf8()) == false) {InsertLog(query.lastError().text());return;} + + strQuery = "delete from reply_spammer_map_" + _strCompanyNum + "_update"; + if(query.exec(strQuery.toUtf8()) == false) {InsertLog(query.lastError().text());return;} + QMap mapPlatformnameCount; foreach(QString str,m_mapPlatformname.keys()) @@ -1429,6 +1474,7 @@ void SFilterProcess::DataSpammersCount(QString _strFilter,QString _strCompanyNum UpdateDBInfoState(m_db,_strCompanyNum,"Spammer (Insert)"); int nCount = 1; + QSet setId; foreach(SSpamer stIn,list) { QStringList strList = stIn.m_strKey.split(","); @@ -1436,8 +1482,11 @@ void SFilterProcess::DataSpammersCount(QString _strFilter,QString _strCompanyNum QString strPlatformname = strList[0].trimmed(); if (strID.isEmpty()) continue; if (strPlatformname.isEmpty()) continue; + int nID = m_mapID[strID].m_nNum; QString strValue; - strValue += QString::number(m_mapID[strID].m_nNum) + ",'"; + if (setId.contains(nID)==false) + setId.insert(nID); + strValue += QString::number(nID) + ",'"; strValue += strID + "',"; strValue += QString::number(stIn.m_setNick.size()) + ","; strValue += QString::number((int)stIn.m_dBodycount) + ","; @@ -1469,6 +1518,47 @@ void SFilterProcess::DataSpammersCount(QString _strFilter,QString _strCompanyNum } nCount++; } + { + nCount=1; + foreach(SSpammerMap stMap,vecSpammerMap[E_ARTICLE_BODY]) + { + if(setId.contains(stMap.m_nId_num)==false) continue; + strQuery = "insert into body_spammer_map_"; + strQuery += _strCompanyNum + "_update"; + strQuery += " (num,body_num,id_num) VALUES ("; + strQuery += QString::number(nCount++) + ","; + strQuery += QString::number(stMap.m_nBody_num) + ","; + strQuery += QString::number(stMap.m_nId_num) + ")"; + if(query.exec(strQuery.toUtf8()) == false) + { + qDebug() << strQuery;InsertLog(query.lastError().text()); + InsertLog(strQuery); + } + } + } + + { + nCount=1; + foreach(SSpammerMap stMap,vecSpammerMap[E_ARTICLE_REPLY]) + { + if(setId.contains(stMap.m_nId_num)==false) continue; + strQuery = "insert into reply_spammer_map_"; + strQuery += _strCompanyNum + "_update"; + strQuery += " (num,body_num,reply_num,id_num) VALUES ("; + strQuery += QString::number(nCount++) + ","; + strQuery += QString::number(stMap.m_nBody_num) + ","; + strQuery += QString::number(stMap.m_nReply_num) + ","; + strQuery += QString::number(stMap.m_nId_num) + ")"; + if(query.exec(strQuery.toUtf8()) == false) + { + qDebug() << strQuery;InsertLog(query.lastError().text()); + InsertLog(strQuery); + } + } + } + + RenameTable("body_spammer_map_" + _strCompanyNum,"body_spammer_map_" + _strCompanyNum + "_update","body_spammer_map_" + _strCompanyNum +"_temp"); + RenameTable("reply_spammer_map_" + _strCompanyNum,"reply_spammer_map_" + _strCompanyNum + "_update","reply_spammer_map_" + _strCompanyNum +"_temp"); //influencercount strQuery = "update dbinfo set " @@ -1907,7 +1997,7 @@ void SFilterProcess::DataPut(QString _strCompanyNum) strQuery = "delete from reply_" + _strCompanyNum + "_update"; if(query.exec(strQuery.toUtf8()) == false) {InsertLog(query.lastError().text());return;} - strQuery = "delete from body_category_map_" + _strCompanyNum + "_update"; + strQuery = "delete from body_consumer_map_" + _strCompanyNum + "_update"; if(query.exec(strQuery.toUtf8()) == false) {InsertLog(query.lastError().text());return;} strQuery = "delete from id_" + _strCompanyNum + "_update"; @@ -1922,7 +2012,7 @@ void SFilterProcess::DataPut(QString _strCompanyNum) QString strReplyQuery = QString("insert into reply_" + _strCompanyNum + "_update (num,id_num,nickname_num,community_num,id_id,parent,data,date,body_num,realbycategorynum) VALUES (:NUM,:ID_NUM,:NICKNAME_NUM,:COMMUNITY_NUM,:ID_ID,:PARENT,:DATA,:DATE,:BODY_NUM,:REALBYCATEGORYNUM)").toUtf8(); QString strBodyQuery = QString("insert into body_" + _strCompanyNum + "_update(num,id_num,nickname_num,community_num,id_id,platformname_num,platformform_num, title, data, url, view, date, reply_startnum, reply_endnum, reply_count, lastupdate, realbycategorynum)" " VALUES (:NUM,:ID_NUM,:NICKNAME_NUM,:COMMUNITY_NUM,:ID_ID,:PLATFORMNAME_NUM,:PLATFORMFORM_NUM,:TITLE,:DATA,:URL,:VIEW,:DATE,:REPLY_STARTNUM,:REPLY_ENDNUM,:REPLY_COUNT,:LASTUPDATE,:REALBYCATEGORYNUM)").toUtf8(); - QString strMapQuery = QString("insert into body_category_map_" + _strCompanyNum + "_update (num,body_num,category_num,realin) VALUES (:NUM,:BODY_NUM,:CATEGORY_NUM,:REALIN)").toUtf8(); + QString strMapQuery = QString("insert into body_consumer_map_" + _strCompanyNum + "_update (num,body_num,category_num,realin) VALUES (:NUM,:BODY_NUM,:CATEGORY_NUM,:REALIN)").toUtf8(); QString strCommunityQuery = QString("insert into community_" + _strCompanyNum + "_update (num,id,title,url,exid,platformname_num,platformform_num) values ("); @@ -1938,7 +2028,8 @@ void SFilterProcess::DataPut(QString _strCompanyNum) strQuery += "," + QString::number(stCommunity.m_nPlatformname); strQuery += "," + QString::number(stCommunity.m_nPlatformform); strQuery += ")"; - if(query.exec(strQuery.toUtf8()) == false) { + if(query.exec(strQuery.toUtf8()) == false) + { InsertLog(query.lastError().text());m_db.close();qDebug() << strQuery;return; } } @@ -1985,7 +2076,7 @@ void SFilterProcess::DataPut(QString _strCompanyNum) foreach(SReply stReply,m_mapData[strUrl].m_vecReply) { query.prepare(strReplyQuery); - query.bindValue(":NUM",nReplyCount); + query.bindValue(":NUM",nReplyCount); query.bindValue(":ID_NUM",m_mapID[stReply.m_strlist[m_anData[E_DATA_article_id]].trimmed()].m_nNum); query.bindValue(":NICKNAME_NUM",m_mapNickname[stReply.m_strlist[m_anData[E_DATA_article_nickname]].trimmed()].m_nNum); query.bindValue(":COMMUNITY_NUM",m_mapCommunity[GetCommunityKey(m_mapData[strUrl].m_strlist)].m_nNum); @@ -1998,6 +2089,10 @@ void SFilterProcess::DataPut(QString _strCompanyNum) if (query.exec()==false){ InsertLog(query.lastError().text());m_db.close();return; } + { + QString strSpammerReplyKey = QString::number(m_mapData[strUrl].m_nNum) + "_" + stReply.m_strlist[m_anData[E_DATA_article_order]].trimmed(); + m_mapSpammerReply.insert(strSpammerReplyKey,nReplyCount); + } nReplyCount++; } query.prepare(strBodyQuery); @@ -2118,7 +2213,7 @@ void SFilterProcess::DataPut(QString _strCompanyNum) RenameTable("body_" + _strCompanyNum,"body_" + _strCompanyNum + "_update","body_" + _strCompanyNum +"_temp"); RenameTable("reply_" + _strCompanyNum,"reply_" + _strCompanyNum + "_update","reply_" + _strCompanyNum + "_temp"); - RenameTable("body_category_map_" + _strCompanyNum,"body_category_map_" + _strCompanyNum + "_update","body_category_map_" + _strCompanyNum + "_temp"); + RenameTable("body_consumer_map_" + _strCompanyNum,"body_consumer_map_" + _strCompanyNum + "_update","body_consumer_map_" + _strCompanyNum + "_temp"); RenameTable("id_" + _strCompanyNum,"id_" + _strCompanyNum + "_update","id_" + _strCompanyNum + "_temp"); RenameTable("nickname_" + _strCompanyNum,"nickname_" + _strCompanyNum + "_update","nickname_" + _strCompanyNum + "_temp"); RenameTable("community_" + _strCompanyNum,"community_" + _strCompanyNum + "_update","community_" + _strCompanyNum + "_temp"); diff --git a/FilterProcess/sfilterprocess.h b/FilterProcess/sfilterprocess.h index 9ff0f40..ce215bd 100644 --- a/FilterProcess/sfilterprocess.h +++ b/FilterProcess/sfilterprocess.h @@ -259,6 +259,7 @@ private: int m_nCutDays; QVector m_vecKeyword; + QMap m_mapSpammerReply; private: bool LockTable(QString _strTable);