dictionary 추가

git-svn-id: svn://192.168.0.12/source@95 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2015-05-07 02:12:44 +00:00
parent 0f7f868b96
commit b29e12595c
10 changed files with 206 additions and 158 deletions

View File

@@ -18,7 +18,8 @@ SOURCES += main.cpp\
stable.cpp \
../Json/sjson.cpp \
sanaly1.cpp \
sanalyzer.cpp
sanalyzer.cpp \
sdictionary.cpp
HEADERS += mainwindow.h \
widget.h \
@@ -54,6 +55,7 @@ HEADERS += mainwindow.h \
viterbi.h \
winmain.h \
writer.h \
sanalyzer.h
sanalyzer.h \
sdictionary.h
FORMS += mainwindow.ui

View File

@@ -1,99 +1,5 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2011 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_DICTIONARY_H_
#define MECAB_DICTIONARY_H_
#ifndef DICTIONARY
#define DICTIONARY
#include "mecab.h"
#include "mmap.h"
#include "darts.h"
#include "char_property.h"
#endif // DICTIONARY
namespace MeCab {
class Param;
struct Token {
unsigned short lcAttr;
unsigned short rcAttr;
unsigned short posid;
short wcost;
unsigned int feature;
unsigned int compound;
};
class Dictionary {
public:
typedef Darts::DoubleArray::result_pair_type result_type;
bool open(const char *filename, const char *mode = "r");
void close();
size_t commonPrefixSearch(const char* key, size_t len,
result_type *result,
size_t rlen) const {
return da_.commonPrefixSearch(key, result, rlen, len);
}
result_type exactMatchSearch(const char* key) const {
result_type n;
da_.exactMatchSearch(key, n);
return n;
}
bool isCompatible(const Dictionary &d) const {
return(version_ == d.version_ &&
lsize_ == d.lsize_ &&
rsize_ == d.rsize_ &&
decode_charset(charset_) ==
decode_charset(d.charset_));
}
const char *filename() const { return filename_.c_str(); }
const char *charset() const { return const_cast<const char*>(charset_); }
unsigned short version() const { return version_; }
size_t size() const { return static_cast<size_t>(lexsize_); }
int type() const { return static_cast<int>(type_); }
size_t lsize() const { return static_cast<size_t>(lsize_); }
size_t rsize() const { return static_cast<size_t>(rsize_); }
const Token *token(const result_type &n) const {
return token_ +(n.value >> 8);
}
size_t token_size(const result_type &n) const { return 0xff & n.value; }
const char *feature(const Token &t) const { return feature_ + t.feature; }
static bool compile(const Param &param,
const std::vector<std::string> &dics,
const char *output); // outputs
static bool assignUserDictionaryCosts(
const Param &param,
const std::vector<std::string> &dics,
const char *output); // outputs
const char *what() { return what_.str(); }
explicit Dictionary(): dmmap_(new Mmap<char>), token_(0),
feature_(0), charset_(0) {}
virtual ~Dictionary() { this->close(); }
private:
scoped_ptr<Mmap<char> > dmmap_;
const Token *token_;
const char *feature_;
const char *charset_;
unsigned int version_;
unsigned int type_;
unsigned int lexsize_;
unsigned int lsize_;
unsigned int rsize_;
std::string filename_;
whatlog what_;
Darts::DoubleArray da_;
};
}
#endif // MECAB_DICTIONARY_H_

View File

@@ -44,18 +44,20 @@
#include "widget.h"
#include "stable.h"
#include "sanaly1.h"
#include "sdictionary.h"
//! [0]
MainWindow::MainWindow()
{
QWidget *widget = new QWidget;
setCentralWidget(widget);
p_qwDB = new Widget;
p_qwFile = new QWidget;
p_qwAnalyzed1 = new QWidget;
p_qwText = new QWidget;
//p_qwFile = new QWidget;
//p_qwAnalyzed1 = new QWidget;
//p_qwText = new QWidget;
p_qwAnalyzer1 = new SAnaly1;
p_qwAnalyzer2 = new QWidget;
//p_qwAnalyzer2 = new QWidget;
p_qwDict = new SDictionary;
m_ptwSource = new QTabWidget;
@@ -239,7 +241,7 @@ void MainWindow::createActions()
actDictionary = new QAction(tr("Dictionary"), this);
actAnalyze->setStatusTip(tr("Execute Dictionary Widget"));
connect(actAnalyze, SIGNAL(triggered()), this, SLOT(slotDictionary()));
connect(actDictionary, SIGNAL(triggered()), this, SLOT(slotDictionary()));
/*
//!
newAct = new QAction(tr("&New"), this);
@@ -451,7 +453,9 @@ void MainWindow::exportDB()
void MainWindow::importMorphere()
{
qDebug() << "import is executed";
p_qwAnalyzer1->FileImport();
ViewResult();
}
void MainWindow::exportMorphere()
@@ -561,11 +565,13 @@ void MainWindow::slotAnalyze()
QPushButton *pbTitle = msg.addButton("Title",QMessageBox::ActionRole);
QPushButton *pbBody = msg.addButton("Body",QMessageBox::ActionRole);
QPushButton *pbAll = msg.addButton("ALL",QMessageBox::ActionRole);
QPushButton *pbPlatformTitle = msg.addButton("Platform Title",QMessageBox::ActionRole);
int setbody;
msg.exec();
if (msg.clickedButton() == pbTitle) { setbody = 0; }
else if (msg.clickedButton() == pbBody) { setbody = 1; }
else if (msg.clickedButton() == pbAll) { setbody = 2; }
else if (msg.clickedButton() == pbPlatformTitle) { setbody = 3; }
QMessageBox msg2;
msg2.setText("Please wait...");
@@ -591,7 +597,8 @@ void MainWindow::slotAnalyze()
void MainWindow::slotDictionary()
{
p_qwDict->show();
p_qwDict->repaint();
}
void MainWindow::ExecThread(int _setBodyTitle)
@@ -606,6 +613,7 @@ void MainWindow::ExecThread(int _setBodyTitle)
m_pThread[i]->setPosBody(p_qwDB->getBodyPosition());
m_pThread[i]->setPosDate(p_qwDB->getDatePosition());
m_pThread[i]->setPosTitle(p_qwDB->getTitlePosition());
m_pThread[i]->setPosPlatformTitle(p_qwDB->getPlatformTitlePosition());
m_pThread[i]->setTitleBody(_setBodyTitle);
}
for(int i=0;i<p_qwAnalyzer1->getThread();i++)

View File

@@ -47,6 +47,7 @@
#include "widget.h"
#include "sanaly1.h"
#include <QList>
#include "sdictionary.h"
QT_BEGIN_NAMESPACE
class QAction;
@@ -152,7 +153,7 @@ private:
QWidget *p_qwFile;
QWidget *p_qwAnalyzed1;
QWidget *p_qwText;
SDictionary *p_qwDict;
SAnaly1 *p_qwAnalyzer1;
QWidget *p_qwAnalyzer2;

View File

@@ -33,7 +33,7 @@ SAnaly1::SAnaly1(QWidget *parent)
{
m_strlistMorphere << "NNG" <<"NNP"<<"NNB"<<"NNBC"<<"NR"<<"NP"<<"VV"<<"VA"<<"VX"<<"VCP"<<"VCN"<<"MM"<<"MAG"<<"MAJ"<<"IC"<<"JKS"<<"JKC"<<"JKG"<<"JKO"<<"JKB"<<"JKV"<<"JKQ"<<"JX"<<"JC"<<"EP"<<"EF"<<"EC"<<"ETN"<<"ETM"<<"XPN"<<"XSN"<<"XSV"<<"XSA"<<"XR"<<"SF"<<"SE"<<"SSO"<<"SSC"<<"SC"<<"SY"<<"SL"<<"SH"<<"SN";
m_strlistMorphereko << "일반 명사(NNG)"<<"고유 명사(NNP)"<<"의존 명사(NNB)"<<"단위를 나타내는 명사(NNBC)"<<"수사(NR)"<<"대명사(NP)"<<"동사(VV)"<<"형용사(VA)"<<"보조 용언(VX)"<<"긍정 지정사(VCP)"<<"부정 지정사(VCN)"<<"관형사(MM)"<<"일반 부사(MAG)"<<"접속 부사(MAJ)"<<"감탄사(IC)"<<"주격 조사(JKS)"<<"보격 조사(JKC)"<<"관형격 조사(JKG)"<<"목적격 조사(JKO)"<<"부사격 조사(JKB)"<<"호격 조사(JKV)"<<"인용격 조사(JKQ)"<<"보조사(JX)"<<"접속 조사(JC)"<<"선어말 어미(EP)"<<"종결 어미(EF)"<<"연결 어미(EC)"<<"명사형 전성 어미(ETN)"<<"관형형 전성 어미(ETM)"<<"체언 접두사(XPN)"<<"명사 파생 접미사(XSN)"<<"동사 파생 접미사(XSV)"<<"형용사 파생 접미사(XSA)"<<"어근(XR)"<<"마침표, 물음표, 느낌표(SF)"<<"줄임표 …(SE)"<<"여는 괄호 (, [(SSO)"<<"닫는 괄호 ), ](SSC)"<<"구분자 , · / : (SC)"<<"SY(SY)"<<"외국어(SL)"<<"한자(SH)"<<"숫자(SN)";
qDebug() << m_strlistMorphere.size();
// qDebug() << m_strlistMorphere.size();
qDebug() << m_strlistMorphereko.size();
QVBoxLayout *vMainLayout = new QVBoxLayout;
vMainLayout->setAlignment(Qt::AlignVCenter);
@@ -45,12 +45,14 @@ SAnaly1::SAnaly1(QWidget *parent)
QGroupBox *gOther = setOther();
QGroupBox *gSort = setSort();
QHBoxLayout *hboxlayout = new QHBoxLayout;
QGroupBox *gDictionary = setDictionary();
hboxlayout->addWidget(gBox,5);
vboxlayout->addWidget(gDate,4);
vboxlayout->addWidget(gDate,3);
//vboxlayout->addWidget(gFilter,3);
vboxlayout->addWidget(gSort,3);
vboxlayout->addWidget(gOther,3);
vboxlayout->addWidget(gSort,2);
vboxlayout->addWidget(gOther,2);
vboxlayout->addWidget(gDictionary,2);
hboxlayout->addLayout(vboxlayout,5);
vMainLayout->addLayout(hboxlayout, 2);
@@ -233,6 +235,77 @@ QGroupBox *SAnaly1::setSort()
return groupBox;
}
QGroupBox *SAnaly1::setDictionary()
{
QHBoxLayout *hlayout = new QHBoxLayout;
m_pcbDictionary = new QComboBox;
m_pcbDictionary->setEditable(true);
m_pcbDictionary->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Preferred);
QPushButton* add = new QPushButton;
QPushButton* del = new QPushButton;
add->setText(tr("+"));
del->setText(tr("-"));
//hlayout->addWidget(new QLabel(tr("User-Dict: ")));
hlayout->addWidget(m_pcbDictionary);
hlayout->addWidget(add);
hlayout->addWidget(del);
connect(add, SIGNAL(released()),this, SLOT(AddDictionary()));
connect(del, SIGNAL(released()),this, SLOT(DelDictionary()));
QGroupBox *groupBox = new QGroupBox(tr("User Dictionary"));
groupBox->setCheckable(true);
groupBox->setLayout(hlayout);
return groupBox;
}
void SAnaly1::AddDictionary()
{
QStringList strFilenames = QFileDialog::getOpenFileNames(0,"Dictionary file",QDir::currentPath(),
"dic files (*.dic);;All files (*.*)",new QString("Dictionary files (*.dic)"));
foreach(QString strFilename, strFilenames)
{
QFile file(strFilename);
if (!file.open(QIODevice::ReadOnly)) return;
}
for(int i = 0; i < m_pcbDictionary->count(); i++)
{
if(strFilenames.contains(m_pcbDictionary->itemText(i)))
strFilenames.removeOne(m_pcbDictionary->itemText(i));
}
m_pcbDictionary->addItems(strFilenames);
}
QString SAnaly1::getUserDictList()
{
QStringList strList;
for(int i = 0; i < m_pcbDictionary->count();i++)
{
strList << m_pcbDictionary->itemText(i);
}
return strList.join(",");
}
void SAnaly1::DelDictionary()
{
m_pcbDictionary->removeItem(0);
qDebug() << getUserDictList();
}
bool SAnaly1::isSortMorphereDateChecked()
{
return m_rbMorphereDate->isChecked();
@@ -281,6 +354,7 @@ QTableWidget *SAnaly1::AddTable(QString _str)
void SAnaly1::CloseTab(int index)
{
((STable*)(m_ptwData->widget(index)))->clear();
m_ptwData->removeTab(index);
}
@@ -1238,53 +1312,63 @@ void SAnaly1::FileImport()
QFile file(strFilename);
if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) return;
STable *pNew = new STable;
QTextCodec *codec = QTextCodec::codecForName("eucKR");
m_pProgress->setRange(0,file.size());
int ncRow=0;
int nCount=0;
bool bQuit = true;
int nRead = 0;
while(bQuit)
m_TotalResult.clear();
m_mapViewResult.clear();
//qDebug() << "clear is complete";
QTextStream in(&file);
while(!in.atEnd())
{
QString strLine;
{
QByteArray byte = file.readLine(196608);
if (byte.isEmpty()) { bQuit = false; continue; }
strLine = codec->toUnicode(byte);
nRead += byte.size();
}
if (nCount == 0)
{
QStringList strings = strLine.split(",");
if (strings.at(0) == QString("#Head#"))
{
pNew->setColumnCount(strings.size()-1);
for (int i = 1; i < strings.size();i++)
pNew->setHorizontalHeaderItem(i-1,new QTableWidgetItem(strings.at(i)));
nCount++;
QString strLine = in.readLine();
strLine = strLine.replace("\"","");
//qDebug() << strLine;
QStringList strListLine = strLine.split(",");
if(strListLine.size() != 4)
continue;
//qDebug() << strListLine.at(4).trimmed().toInt();
QString strKey = strListLine.at(0).trimmed() + "~!@" + strListLine.at(1).trimmed() + "~!@" + strListLine.at(2).trimmed();
m_TotalResult.insert(strKey, strListLine.at(3).trimmed().toInt());
}
file.close();
for(QMap<QString, int>::iterator iterPos = m_TotalResult.begin(); iterPos != m_TotalResult.end(); iterPos++)
{
QString strkey = iterPos.key();
int count = iterPos.value();
QStringList strlistKey = strkey.split("~!@");
QString strDate = strlistKey.at(0);
QString strMorphere = strlistKey.at(1);
QString strKeyword = strlistKey.at(2);
if(m_mapViewResult.contains(strDate))
{
if(m_mapViewResult.value(strDate).contains(strMorphere))
{
m_mapViewResult[(strDate)][(strMorphere)].insertMulti(count, strKeyword);
}
else
pNew->setColumnCount(strings.size());
}
strLine = strLine.replace("\"","");
QStringList strings = strLine.split(",");
int ncCol=0;
pNew->setRowCount(strings.size());
foreach(QString str,strings)
{
pNew->setItem(ncRow,ncCol,new QTableWidgetItem(QString(" " + str + " ")));
ncCol++;
QMap<int, QString> qLast;
qLast.insert(count, strKeyword);
m_mapViewResult[(strDate)].insert(strMorphere, qLast);
}
ncRow++;
m_pProgress->setValue(nRead);
m_pProgress->repaint();
}
pNew->setRowCount(pNew->rowCount()-1);
file.close();
m_ptwData->addTab(pNew,"import");
m_ptwData->setCurrentIndex(m_ptwData->count()-1);
else
{
QMap<int, QString> qLast;
qLast.insert(count , strKeyword);
QMap<QString, QMap<int, QString> > qMedium;
qMedium.insert(strMorphere, qLast);
m_mapViewResult.insert(strDate, qMedium);
}
}
}
void SAnaly1::FileExport()
@@ -1438,7 +1522,6 @@ bool SAnaly1::getDateAll()
QTabWidget* SAnaly1::getQTabWidget()
{
return m_ptwData;
}
@@ -1453,3 +1536,4 @@ void SAnaly1::MemClear()
m_TotalResult.clear();
m_mapViewResult.clear();
}

View File

@@ -48,6 +48,9 @@ public:
int getPeriod();
QStringList getMorphereList();
bool getDateAll();
QString getUserDictList();
private:
// Data
QListWidget *m_plwData;
@@ -112,7 +115,7 @@ private:
QRadioButton *m_rbMorphereDate;
QButtonGroup *m_bgRadioGroup;
QComboBox *m_pcbDictionary;
public:
struct m_mapKey
{
@@ -141,6 +144,7 @@ private:
QGroupBox *setFilter();
QGroupBox *setOther();
QGroupBox *setSort();
QGroupBox *setDictionary();
QTableWidget *AddTable(QString _str);
@@ -180,6 +184,8 @@ public slots:
void MemClear();
void MorphereListAdd();
void MorphereListDel();
void AddDictionary();
void DelDictionary();
QTabWidget* getQTabWidget();

View File

@@ -124,22 +124,42 @@ void AnalyzerThread::setPosTitle(const int _nPos)
m_nPosTitle = _nPos;
}
void AnalyzerThread::setPosPlatformTitle(const int _nPos)
{
m_nPosPlatformTitle = _nPos;
}
void AnalyzerThread::run()
{
typedef mecab_t* (*mecab_new_fun)(int,char**);
typedef const char* (*mecab_sparse_tostr_fun)(mecab_t *mecab, const char *str);
typedef void (*mecab_destroy_fun)(mecab_t *mecab);
char *t[] = {"RRR","-d","dic"};
int int_t = 3;
// Create tagger object
mecab_t *mecab;
mecab_new_fun mecab_new = (mecab_new_fun)QLibrary::resolve("libmecab.dll","mecab_new");
mecab_sparse_tostr_fun mecab_sparse_tostr = (mecab_sparse_tostr_fun)QLibrary::resolve("libmecab.dll","mecab_sparse_tostr");
mecab = mecab_new(int_t, t);
QString userdict = pWidget->getUserDictList();
if(userdict.length() < 1)
{
char *t[] = {"RRR","-d","dic"};
int int_t = 3;
mecab = mecab_new(int_t, t);
}
else
{
char *cstr = new char[userdict.toStdString().length() + 1];
strcpy(cstr, userdict.toStdString().c_str());
char *t[] = {"RRR","-d","dic","-u",cstr};
int int_t = 5;
mecab = mecab_new(int_t, t);
delete[] cstr;
}
mecab_destroy_fun mecab_destroy = (mecab_destroy_fun)QLibrary::resolve("libmecab.dll","mecab_destroy");
@@ -154,7 +174,7 @@ void AnalyzerThread::run()
QString strBody;
QString strDate;
QString strData;
QString strPlatformTitle;
{
mutex->lock();
if(*nCount >= pCurrent->rowCount())
@@ -164,7 +184,9 @@ void AnalyzerThread::run()
}
strTitle = pCurrent->item((*nCount),m_nPosTitle)->text();
strDate = pCurrent->item((*nCount),m_nPosDate)->text();
strPlatformTitle = pCurrent->item((*nCount),m_nPosPlatformTitle)->text();
strBody = pCurrent->item((*nCount)++,m_nPosBody)->text();
mutex->unlock();
}
@@ -186,9 +208,13 @@ void AnalyzerThread::run()
strData = strTitle + "\n" + strBody;
break;
}
case 3:
strData = strPlatformTitle;
break;
}
strTitle.clear();
strBody.clear();
strPlatformTitle.clear();
}
if(strDate.length() < 11)
@@ -338,7 +364,6 @@ QString AnalyzerThread::getWeeksInMonth(unsigned int _nDate)
week = 0;
}
if((firstWeekDays < qToday.day()) && (qToday.day() <= (thisLastDay - thisLastDayofWeek)))
{
week = week + ((qToday.day() - firstWeekDays + WEEK - 1)/WEEK);

View File

@@ -55,6 +55,7 @@ private:
int m_nPosDate;
int m_nPosBody;
int m_nPosTitle;
int m_nPosPlatformTitle;
QMutex *mutex;
SAnaly1 *pWidget;
QTabWidget *m_pTabWidget;
@@ -88,6 +89,7 @@ public:
void setPosDate(const int _nPos);
void setPosBody(const int _nPos);
void setPosTitle(const int _nPos);
void setPosPlatformTitle(const int _nPos);
QString getWeeksInMonth(unsigned int _nDate);
void setAnalyzerThread(QMutex &_mutex, int number, int &_nCount);
};

View File

@@ -1135,6 +1135,7 @@ bool Widget::ReloadColumn()
m_nColumn = -1;
m_nTitle = -1;
m_nBody = -1;
m_nPlatformTitle = -1;
foreach(QStringList strList,m_vecColumn)
{
if (strList.at(E_COLUMN_DATE).trimmed() == QString("o") )
@@ -1160,6 +1161,14 @@ bool Widget::ReloadColumn()
}
m_nBody++;
foreach(QStringList strList,m_vecColumn)
{
if (strList.at(E_COLUMN_DATABASE).trimmed() == QString("platform_title") )
break;
else
m_nPlatformTitle++;
}
m_nPlatformTitle++;
return true;
}

View File

@@ -55,6 +55,10 @@ public:
{
return m_nBody;
}
int getPlatformTitlePosition()
{
return m_nPlatformTitle;
}
private:
// Data
@@ -68,6 +72,7 @@ private:
int m_nColumn;
int m_nTitle;
int m_nBody;
int m_nPlatformTitle;
// Keyword
QComboBox *m_pcbCatalog;
QComboBox *m_pcbKeyword;