MorphereAnalyzer

git-svn-id: svn://192.168.0.12/source@76 8346c931-da38-4b9b-9d4c-e48b93cbd075
This commit is contained in:
admin
2015-04-17 02:44:11 +00:00
parent ed59f23ac7
commit 2e7d343f4a
47 changed files with 167852 additions and 0 deletions

View File

@@ -0,0 +1,59 @@
#-------------------------------------------------
#
# Project created by QtCreator 2015-04-06T11:16:08
#
#-------------------------------------------------
QT += core gui sql
greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
TARGET = MorphereAnalyzer
TEMPLATE = app
SOURCES += main.cpp\
mainwindow.cpp \
widget.cpp \
stable.cpp \
../Json/sjson.cpp \
sanaly1.cpp \
sanalyzer.cpp
HEADERS += mainwindow.h \
widget.h \
stable.h \
../Json/sjson.h \
../common.h \
sanaly1.h \
char_property.h \
common.h \
connector.h \
context_id.h \
darts.h \
dictionary.h \
dictionary_rewriter.h \
feature_index.h \
freelist.h \
iconv_utils.h \
lbfgs.h \
learner_node.h \
learner_tagger.h \
mecab.h \
mmap.h \
nbest_generator.h \
param.h \
scoped_ptr.h \
stream_wrapper.h \
string_buffer.h \
thread.h \
tokenizer.h \
ucs.h \
ucstable.h \
utils.h \
viterbi.h \
winmain.h \
writer.h \
sanalyzer.h
FORMS += mainwindow.ui

View File

@@ -0,0 +1,92 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_CHARACTER_CATEGORY_H_
#define MECAB_CHARACTER_CATEGORY_H_
#include "mmap.h"
#include "scoped_ptr.h"
#include "ucs.h"
#include "utils.h"
namespace MeCab {
class Param;
struct CharInfo {
unsigned int type: 18;
unsigned int default_type: 8;
unsigned int length: 4;
unsigned int group: 1;
unsigned int invoke: 1;
CharInfo() : type(0), default_type(0), length(0), group(0), invoke(0) {}
bool isKindOf(CharInfo c) const { return type & c.type; }
};
class CharProperty {
public:
bool open(const Param &);
bool open(const char*);
void close();
size_t size() const;
void set_charset(const char *charset);
int id(const char *) const;
const char *name(size_t i) const;
const char *what() { return what_.str(); }
inline const char *seekToOtherType(const char *begin, const char *end,
CharInfo c, CharInfo *fail,
size_t *mblen, size_t *clen) const {
register const char *p = begin;
*clen = 0;
while (p != end && c.isKindOf(*fail = getCharInfo(p, end, mblen))) {
p += *mblen;
++(*clen);
c = *fail;
}
return p;
}
inline CharInfo getCharInfo(const char *begin,
const char *end,
size_t *mblen) const {
unsigned short int t = 0;
#ifndef MECAB_USE_UTF8_ONLY
switch (charset_) {
case EUC_JP: t = euc_to_ucs2(begin, end, mblen); break;
case CP932: t = cp932_to_ucs2(begin, end, mblen); break;
case UTF8: t = utf8_to_ucs2(begin, end, mblen); break;
case UTF16: t = utf16_to_ucs2(begin, end, mblen); break;
case UTF16LE: t = utf16le_to_ucs2(begin, end, mblen); break;
case UTF16BE: t = utf16be_to_ucs2(begin, end, mblen); break;
case ASCII: t = ascii_to_ucs2(begin, end, mblen); break;
default: t = utf8_to_ucs2(begin, end, mblen); break;
}
#else
switch (charset_) {
case UTF8: t = utf8_to_ucs2(begin, end, mblen); break;
case UTF16: t = utf16_to_ucs2(begin, end, mblen); break;
case UTF16LE: t = utf16le_to_ucs2(begin, end, mblen); break;
case UTF16BE: t = utf16be_to_ucs2(begin, end, mblen); break;
default: t = utf8_to_ucs2(begin, end, mblen); break;
}
#endif
return map_[t];
}
inline CharInfo getCharInfo(size_t id) const { return map_[id]; }
static bool compile(const char *, const char *, const char*);
CharProperty(): cmmap_(new Mmap<char>), map_(0), charset_(0) {}
virtual ~CharProperty() { this->close(); }
private:
scoped_ptr<Mmap<char> > cmmap_;
std::vector<const char *> clist_;
const CharInfo *map_;
int charset_;
whatlog what_;
};
}
#endif // MECAB_CHARACTER_CATEGORY_H_

134
MorphereAnalyzer/common.h Normal file
View File

@@ -0,0 +1,134 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_COMMON_H_
#define MECAB_COMMON_H_
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include <string>
#include <iostream>
#include <sstream>
#ifdef __CYGWIN__
#define _GLIBCXX_EXPORT_TEMPLATE
#endif
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#if defined(_MSC_VER) || defined(__CYGWIN__)
#define NOMINMAX
#define snprintf _snprintf
#endif
#define COPYRIGHT "MeCab: Yet Another Part-of-Speech and Morphological Analyzer\n\
\nCopyright(C) 2001-2012 Taku Kudo \nCopyright(C) 2004-2008 Nippon Telegraph and Telephone Corporation\n"
#define SYS_DIC_FILE "sys.dic"
#define UNK_DEF_FILE "unk.def"
#define UNK_DIC_FILE "unk.dic"
#define MATRIX_DEF_FILE "matrix.def"
#define MATRIX_FILE "matrix.bin"
#define CHAR_PROPERTY_DEF_FILE "char.def"
#define CHAR_PROPERTY_FILE "char.bin"
#define FEATURE_FILE "feature.def"
#define REWRITE_FILE "rewrite.def"
#define LEFT_ID_FILE "left-id.def"
#define RIGHT_ID_FILE "right-id.def"
#define POS_ID_FILE "pos-id.def"
#define MODEL_DEF_FILE "model.def"
#define MODEL_FILE "model.bin"
#define DICRC "dicrc"
#define BOS_KEY "BOS/EOS"
#define DEFAULT_MAX_GROUPING_SIZE 24
#define CHAR_PROPERTY_DEF_DEFAULT "DEFAULT 1 0 0\nSPACE 0 1 0\n0x0020 SPACE\n"
#define UNK_DEF_DEFAULT "DEFAULT,0,0,0,*\nSPACE,0,0,0,*\n"
#define MATRIX_DEF_DEFAULT "1 1\n0 0 0\n"
#ifdef MECAB_USE_UTF8_ONLY
#define MECAB_DEFAULT_CHARSET "UTF-8"
#endif
#ifndef MECAB_DEFAULT_CHARSET
#if defined(_WIN32) && !defined(__CYGWIN__)
#define MECAB_DEFAULT_CHARSET "SHIFT-JIS"
#else
#define MECAB_DEFAULT_CHARSET "EUC-JP"
#endif
#endif
#define NBEST_MAX 512
#define NODE_FREELIST_SIZE 512
#define PATH_FREELIST_SIZE 2048
#define MIN_INPUT_BUFFER_SIZE 8192
#define MAX_INPUT_BUFFER_SIZE (8192*640)
#define BUF_SIZE 8192
#ifndef EXIT_FAILURE
#define EXIT_FAILURE 1
#endif
#ifndef EXIT_SUCCESS
#define EXIT_SUCCESS 0
#endif
#if defined(_WIN32) && !defined(__CYGWIN__)
#define WPATH(path) (MeCab::Utf8ToWide(path).c_str())
#else
#define WPATH(path) (path)
#endif
namespace MeCab {
class die {
public:
die() {}
~die() {
std::cerr << std::endl;
exit(-1);
}
int operator&(std::ostream&) { return 0; }
};
struct whatlog {
std::ostringstream stream_;
std::string str_;
const char *str() {
str_ = stream_.str();
return str_.c_str();
}
};
class wlog {
public:
wlog(whatlog *what) : what_(what) {
what_->stream_.clear();
}
bool operator&(std::ostream &) {
return false;
}
private:
whatlog *what_;
};
} // MeCab
#define WHAT what_.stream_
#define CHECK_FALSE(condition) \
if (condition) {} else return \
wlog(&what_) & what_.stream_ << \
__FILE__ << "(" << __LINE__ << ") [" << #condition << "] "
#define CHECK_DIE(condition) \
(condition) ? 0 : die() & std::cerr << __FILE__ << \
"(" << __LINE__ << ") [" << #condition << "] "
#endif // MECAB_COMMON_H_

View File

@@ -0,0 +1,80 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_CONNECTOR_H_
#define MECAB_CONNECTOR_H_
#include "mecab.h"
#include "mmap.h"
#include "common.h"
#include "scoped_ptr.h"
namespace MeCab {
class Param;
class Connector {
private:
scoped_ptr<Mmap<short> > cmmap_;
short *matrix_;
unsigned short lsize_;
unsigned short rsize_;
whatlog what_;
// mecab-ko
class SpacePenalty {
public:
unsigned short posid_;
int penalty_cost_;
SpacePenalty(unsigned short posid, int penalty_cost)
: posid_(posid)
, penalty_cost_(penalty_cost)
{}
};
std::vector<SpacePenalty>left_space_penalty_factor_;
void set_left_space_penalty_factor(const char *factor_str);
int get_space_penalty_cost(const Node *rNode) const;
public:
bool open(const Param &param);
void close();
void clear() {}
const char *what() { return what_.str(); }
size_t left_size() const { return static_cast<size_t>(lsize_); }
size_t right_size() const { return static_cast<size_t>(rsize_); }
void set_left_size(size_t lsize) { lsize_ = lsize; }
void set_right_size(size_t rsize) { rsize_ = rsize; }
inline int transition_cost(unsigned short rcAttr,
unsigned short lcAttr) const {
return matrix_[rcAttr + lsize_ * lcAttr];
}
int cost(const Node *lNode, const Node *rNode) const;
// access to raw matrix
short *mutable_matrix() { return &matrix_[0]; }
const short *matrix() const { return &matrix_[0]; }
bool openText(const char *filename);
bool open(const char *filename, const char *white_space_penalty_info = "", const char *mode = "r");
bool is_valid(size_t lid, size_t rid) const {
return (lid >= 0 && lid < rsize_ && rid >= 0 && rid < lsize_);
}
static bool compile(const char *, const char *);
explicit Connector():
cmmap_(new Mmap<short>), matrix_(0), lsize_(0), rsize_(0) {}
virtual ~Connector() { this->close(); }
};
}
#endif // MECAB_CONNECTOR_H_

View File

@@ -0,0 +1,50 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_CONTEXT_ID_H
#define MECAB_CONTEXT_ID_H
#include <map>
#include <string>
#include <vector>
namespace MeCab {
class Param;
class Iconv;
class ContextID {
private:
std::map<std::string, int> left_;
std::map<std::string, int> right_;
std::string left_bos_;
std::string right_bos_;
public:
void clear();
void add(const char *l, const char *r);
void addBOS(const char *l, const char *r);
bool save(const char* lfile,
const char* rfile);
bool build();
bool open(const char *lfile,
const char *rfile,
Iconv *iconv = 0);
int lid(const char *l) const;
int rid(const char *r) const;
size_t left_size() const { return left_.size(); }
size_t right_size() const { return right_.size(); }
const std::map<std::string, int>& left_ids() const { return left_; }
const std::map<std::string, int>& right_ids() const { return right_; }
bool is_valid(size_t lid, size_t rid) {
return (lid >= 0 && lid < left_size() &&
rid >= 0 && rid < right_size());
}
};
}
#endif

518
MorphereAnalyzer/darts.h Normal file
View File

@@ -0,0 +1,518 @@
/*
Darts -- Double-ARray Trie System
Copyright(C) 2001-2007 Taku Kudo <taku@chasen.org>
*/
#ifndef DARTS_H_
#define DARTS_H_
#define DARTS_VERSION "0.31"
#include <vector>
#include <cstring>
#include <cstdio>
#ifdef HAVE_ZLIB_H
namespace zlib {
#include <zlib.h>
}
#define SH(p)((unsigned short)(unsigned char)((p)[0]) | ((unsigned short)(unsigned char)((p)[1]) << 8))
#define LG(p)((unsigned long)(SH(p)) |((unsigned long)(SH((p)+2)) << 16))
#endif
namespace MeCab {
namespace Darts {
template <class T> inline T _max(T x, T y) { return(x > y) ? x : y; }
template <class T> inline T* _resize(T* ptr, size_t n, size_t l, T v) {
T *tmp = new T[l];
for (size_t i = 0; i < n; ++i) tmp[i] = ptr[i];
for (size_t i = n; i < l; ++i) tmp[i] = v;
delete [] ptr;
return tmp;
}
template <class T>
class Length {
public: size_t operator()(const T *key) const
{ size_t i; for (i = 0; key[i] != (T)0; ++i) {} return i; }
};
template <> class Length<char> {
public: size_t operator()(const char *key) const
{ return std::strlen(key); }
};
template <class node_type_, class node_u_type_,
class array_type_, class array_u_type_,
class length_func_ = Length<node_type_> >
class DoubleArrayImpl {
private:
struct node_t {
array_u_type_ code;
size_t depth;
size_t left;
size_t right;
};
struct unit_t {
array_type_ base;
array_u_type_ check;
};
unit_t *array_;
unsigned char *used_;
size_t size_;
size_t alloc_size_;
node_type_ **key_;
size_t key_size_;
size_t *length_;
array_type_ *value_;
size_t progress_;
size_t next_check_pos_;
bool no_delete_;
int error_;
int (*progress_func_)(size_t, size_t);
size_t resize(const size_t new_size) {
unit_t tmp;
tmp.base = 0;
tmp.check = 0;
array_ = _resize(array_, alloc_size_, new_size, tmp);
used_ = _resize(used_, alloc_size_, new_size,
static_cast<unsigned char>(0));
alloc_size_ = new_size;
return new_size;
}
size_t fetch(const node_t &parent, std::vector <node_t> &siblings) {
if (error_ < 0) return 0;
array_u_type_ prev = 0;
for (size_t i = parent.left; i < parent.right; ++i) {
if ((length_ ? length_[i] : length_func_()(key_[i])) < parent.depth)
continue;
const node_u_type_ *tmp = reinterpret_cast<node_u_type_ *>(key_[i]);
array_u_type_ cur = 0;
if ((length_ ? length_[i] : length_func_()(key_[i])) != parent.depth)
cur = (array_u_type_)tmp[parent.depth] + 1;
if (prev > cur) {
error_ = -3;
return 0;
}
if (cur != prev || siblings.empty()) {
node_t tmp_node;
tmp_node.depth = parent.depth + 1;
tmp_node.code = cur;
tmp_node.left = i;
if (!siblings.empty()) siblings[siblings.size()-1].right = i;
siblings.push_back(tmp_node);
}
prev = cur;
}
if (!siblings.empty())
siblings[siblings.size()-1].right = parent.right;
return siblings.size();
}
size_t insert(const std::vector <node_t> &siblings) {
if (error_ < 0) return 0;
size_t begin = 0;
size_t pos = _max((size_t)siblings[0].code + 1, next_check_pos_) - 1;
size_t nonzero_num = 0;
int first = 0;
if (alloc_size_ <= pos) resize(pos + 1);
while (true) {
next:
++pos;
if (alloc_size_ <= pos) resize(pos + 1);
if (array_[pos].check) {
++nonzero_num;
continue;
} else if (!first) {
next_check_pos_ = pos;
first = 1;
}
begin = pos - siblings[0].code;
if (alloc_size_ <= (begin + siblings[siblings.size()-1].code))
resize(static_cast<size_t>(alloc_size_ *
_max(1.05, 1.0 * key_size_ / progress_)));
if (used_[begin]) continue;
for (size_t i = 1; i < siblings.size(); ++i)
if (array_[begin + siblings[i].code].check != 0) goto next;
break;
}
// -- Simple heuristics --
// if the percentage of non-empty contents in check between the index
// 'next_check_pos' and 'check' is greater than some constant
// value(e.g. 0.9),
// new 'next_check_pos' index is written by 'check'.
if (1.0 * nonzero_num/(pos - next_check_pos_ + 1) >= 0.95)
next_check_pos_ = pos;
used_[begin] = 1;
size_ = _max(size_,
begin +
static_cast<size_t>(siblings[siblings.size() - 1].code + 1));
for (size_t i = 0; i < siblings.size(); ++i)
array_[begin + siblings[i].code].check = begin;
for (size_t i = 0; i < siblings.size(); ++i) {
std::vector <node_t> new_siblings;
if (!fetch(siblings[i], new_siblings)) {
array_[begin + siblings[i].code].base =
value_ ?
static_cast<array_type_>(-value_[siblings[i].left]-1) :
static_cast<array_type_>(-siblings[i].left-1);
if (value_ && (array_type_)(-value_[siblings[i].left]-1) >= 0) {
error_ = -2;
return 0;
}
++progress_;
if (progress_func_)(*progress_func_)(progress_, key_size_);
} else {
size_t h = insert(new_siblings);
array_[begin + siblings[i].code].base = h;
}
}
return begin;
}
public:
typedef array_type_ value_type;
typedef node_type_ key_type;
typedef array_type_ result_type; // for compatibility
struct result_pair_type {
value_type value;
size_t length;
};
explicit DoubleArrayImpl(): array_(0), used_(0),
size_(0), alloc_size_(0),
no_delete_(0), error_(0) {}
~DoubleArrayImpl() { clear(); }
void set_result(value_type& x, value_type r, size_t) const {
x = r;
}
void set_result(result_pair_type& x, value_type r, size_t l) const {
x.value = r;
x.length = l;
}
void set_array(void *ptr, size_t size = 0) {
clear();
array_ = reinterpret_cast<unit_t *>(ptr);
no_delete_ = true;
size_ = size;
}
const void *array() const {
return const_cast<const void *>(reinterpret_cast<void *>(array_));
}
void clear() {
if (!no_delete_)
delete [] array_;
delete [] used_;
array_ = 0;
used_ = 0;
alloc_size_ = 0;
size_ = 0;
no_delete_ = false;
}
size_t unit_size() const { return sizeof(unit_t); }
size_t size() const { return size_; }
size_t total_size() const { return size_ * sizeof(unit_t); }
size_t nonzero_size() const {
size_t result = 0;
for (size_t i = 0; i < size_; ++i)
if (array_[i].check) ++result;
return result;
}
int build(size_t key_size,
key_type **key,
size_t *length = 0,
value_type *value = 0,
int (*progress_func)(size_t, size_t) = 0) {
if (!key_size || !key) return 0;
progress_func_ = progress_func;
key_ = key;
length_ = length;
key_size_ = key_size;
value_ = value;
progress_ = 0;
resize(8192);
array_[0].base = 1;
next_check_pos_ = 0;
node_t root_node;
root_node.left = 0;
root_node.right = key_size;
root_node.depth = 0;
std::vector <node_t> siblings;
fetch(root_node, siblings);
insert(siblings);
size_ += (1 << 8 * sizeof(key_type)) + 1;
if (size_ >= alloc_size_) resize(size_);
delete [] used_;
used_ = 0;
return error_;
}
int open(const char *file,
const char *mode = "rb",
size_t offset = 0,
size_t size = 0) {
std::FILE *fp = std::fopen(file, mode);
if (!fp) return -1;
if (std::fseek(fp, offset, SEEK_SET) != 0) return -1;
if (!size) {
if (std::fseek(fp, 0L, SEEK_END) != 0) return -1;
size = std::ftell(fp);
if (std::fseek(fp, offset, SEEK_SET) != 0) return -1;
}
clear();
size_ = size;
size_ /= sizeof(unit_t);
array_ = new unit_t[size_];
if (size_ != std::fread(reinterpret_cast<unit_t *>(array_),
sizeof(unit_t), size_, fp)) return -1;
std::fclose(fp);
return 0;
}
int save(const char *file,
const char *mode = "wb",
size_t offset = 0) {
if (!size_) return -1;
std::FILE *fp = std::fopen(file, mode);
if (!fp) return -1;
if (size_ != std::fwrite(reinterpret_cast<unit_t *>(array_),
sizeof(unit_t), size_, fp))
return -1;
std::fclose(fp);
return 0;
}
#ifdef HAVE_ZLIB_H
int gzopen(const char *file,
const char *mode = "rb",
size_t offset = 0,
size_t size = 0) {
std::FILE *fp = std::fopen(file, mode);
if (!fp) return -1;
clear();
size_ = size;
if (!size_) {
if (-1L != static_cast<long>(std::fseek(fp, -8, SEEK_END))) {
char buf[8];
if (std::fread(static_cast<char*>(buf),
1, 8, fp) != sizeof(buf)) {
std::fclose(fp);
return -1;
}
size_ = LG(buf+4);
size_ /= sizeof(unit_t);
}
}
std::fclose(fp);
if (!size_) return -1;
zlib::gzFile gzfp = zlib::gzopen(file, mode);
if (!gzfp) return -1;
array_ = new unit_t[size_];
if (zlib::gzseek(gzfp, offset, SEEK_SET) != 0) return -1;
zlib::gzread(gzfp, reinterpret_cast<unit_t *>(array_),
sizeof(unit_t) * size_);
zlib::gzclose(gzfp);
return 0;
}
int gzsave(const char *file, const char *mode = "wb",
size_t offset = 0) {
zlib::gzFile gzfp = zlib::gzopen(file, mode);
if (!gzfp) return -1;
zlib::gzwrite(gzfp, reinterpret_cast<unit_t *>(array_),
sizeof(unit_t) * size_);
zlib::gzclose(gzfp);
return 0;
}
#endif
template <class T>
inline void exactMatchSearch(const key_type *key,
T & result,
size_t len = 0,
size_t node_pos = 0) const {
result = exactMatchSearch<T>(key, len, node_pos);
return;
}
template <class T>
inline T exactMatchSearch(const key_type *key,
size_t len = 0,
size_t node_pos = 0) const {
if (!len) len = length_func_()(key);
T result;
set_result(result, -1, 0);
register array_type_ b = array_[node_pos].base;
register array_u_type_ p;
for (register size_t i = 0; i < len; ++i) {
p = b +(node_u_type_)(key[i]) + 1;
if (static_cast<array_u_type_>(b) == array_[p].check)
b = array_[p].base;
else
return result;
}
p = b;
array_type_ n = array_[p].base;
if (static_cast<array_u_type_>(b) == array_[p].check && n < 0)
set_result(result, -n-1, len);
return result;
}
template <class T>
size_t commonPrefixSearch(const key_type *key,
T* result,
size_t result_len,
size_t len = 0,
size_t node_pos = 0) const {
if (!len) len = length_func_()(key);
register array_type_ b = array_[node_pos].base;
register size_t num = 0;
register array_type_ n;
register array_u_type_ p;
for (register size_t i = 0; i < len; ++i) {
p = b; // + 0;
n = array_[p].base;
if ((array_u_type_) b == array_[p].check && n < 0) {
// result[num] = -n-1;
if (num < result_len) set_result(result[num], -n-1, i);
++num;
}
p = b +(node_u_type_)(key[i]) + 1;
if ((array_u_type_) b == array_[p].check)
b = array_[p].base;
else
return num;
}
p = b;
n = array_[p].base;
if ((array_u_type_)b == array_[p].check && n < 0) {
if (num < result_len) set_result(result[num], -n-1, len);
++num;
}
return num;
}
value_type traverse(const key_type *key,
size_t &node_pos,
size_t &key_pos,
size_t len = 0) const {
if (!len) len = length_func_()(key);
register array_type_ b = array_[node_pos].base;
register array_u_type_ p;
for (; key_pos < len; ++key_pos) {
p = b +(node_u_type_)(key[key_pos]) + 1;
if (static_cast<array_u_type_>(b) == array_[p].check) {
node_pos = p;
b = array_[p].base;
} else {
return -2; // no node
}
}
p = b;
array_type_ n = array_[p].base;
if (static_cast<array_u_type_>(b) == array_[p].check && n < 0)
return -n-1;
return -1; // found, but no value
}
};
#if 4 == 2
typedef Darts::DoubleArrayImpl<char, unsigned char, short,
unsigned short> DoubleArray;
#define DARTS_ARRAY_SIZE_IS_DEFINED 1
#endif
#if 4 == 4 && !defined(DARTS_ARRAY_SIZE_IS_DEFINED)
typedef Darts::DoubleArrayImpl<char, unsigned char, int,
unsigned int> DoubleArray;
#define DARTS_ARRAY_SIZE_IS_DEFINED 1
#endif
#if 4 == 4 && !defined(DARTS_ARRAY_SIZE_IS_DEFINED)
typedef Darts::DoubleArrayImpl<char, unsigned char, long,
unsigned long> DoubleArray;
#define DARTS_ARRAY_SIZE_IS_DEFINED 1
#endif
#if 4 == 8 && !defined(DARTS_ARRAY_SIZE_IS_DEFINED)
typedef Darts::DoubleArrayImpl<char, unsigned char, long long,
unsigned long long> DoubleArray;
#endif
}
}
#endif

25
MorphereAnalyzer/dicrc Normal file
View File

@@ -0,0 +1,25 @@
;
; Configuration file of mecab-ko-dic
;
# 비용 값으로 변환할 때 배율 팩터입니다. 700에서 800에서 문제가 없습니다.
cost-factor = 800
# 문장의 시작, 문장 끝에 대한 소성(素性)입니다. CSV로 표현합니다.
bos-feature = BOS/EOS,*,*,*,*,*,*,*
# 알려진 단어의 경우 소성(素性)의 처음부터 몇 개까지 일치하면 정답으로
# 인정하는지를 지정합니다. 일반적으로 알려진 단어는 품사 활용 등의 정보만
# 맞추면 되기 때문에, "읽기", "발음" 소성(素性)은 무시하도록 합니다.
# 여기에서는 3가지가 평가됩니다.
eval-size = 4
# 알 수 없는 단어의 경우
# 소성의 처음부터 몇 개까지 일치하면 정답으로 인정할지를 지정합니다.
unk-eval-size = 2
# dicrc, char.def, unk.def, pos-id.def 파일의 문자 코드셋입니다.
config-charset = UTF-8
# 좌측에 공백을 포함하는 품사의 연접 비용을 늘리기 위한 설정입니다.
# mecab-ko에서만 사용되는 설정입니다. 다음과 같은 형식을 가집니다.
# <posid 1>,<posid 1 penalty cost>,<posid 2>,<posid 2 penalty cost>...
#
# 예) 120,6000 => posid가 120인 품사(조사)의 좌측에 공백을 포함할 경우
# 연접 비용을 6000만큼 늘림
left-space-penalty-factor = 100,3000,120,6000,172,3000,183,3000,184,3000,185,3000,200,3000,210,6000,220,3000,221,3000,222,3000,230,3000

View File

@@ -0,0 +1,99 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2011 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_DICTIONARY_H_
#define MECAB_DICTIONARY_H_
#include "mecab.h"
#include "mmap.h"
#include "darts.h"
#include "char_property.h"
namespace MeCab {
class Param;
struct Token {
unsigned short lcAttr;
unsigned short rcAttr;
unsigned short posid;
short wcost;
unsigned int feature;
unsigned int compound;
};
class Dictionary {
public:
typedef Darts::DoubleArray::result_pair_type result_type;
bool open(const char *filename, const char *mode = "r");
void close();
size_t commonPrefixSearch(const char* key, size_t len,
result_type *result,
size_t rlen) const {
return da_.commonPrefixSearch(key, result, rlen, len);
}
result_type exactMatchSearch(const char* key) const {
result_type n;
da_.exactMatchSearch(key, n);
return n;
}
bool isCompatible(const Dictionary &d) const {
return(version_ == d.version_ &&
lsize_ == d.lsize_ &&
rsize_ == d.rsize_ &&
decode_charset(charset_) ==
decode_charset(d.charset_));
}
const char *filename() const { return filename_.c_str(); }
const char *charset() const { return const_cast<const char*>(charset_); }
unsigned short version() const { return version_; }
size_t size() const { return static_cast<size_t>(lexsize_); }
int type() const { return static_cast<int>(type_); }
size_t lsize() const { return static_cast<size_t>(lsize_); }
size_t rsize() const { return static_cast<size_t>(rsize_); }
const Token *token(const result_type &n) const {
return token_ +(n.value >> 8);
}
size_t token_size(const result_type &n) const { return 0xff & n.value; }
const char *feature(const Token &t) const { return feature_ + t.feature; }
static bool compile(const Param &param,
const std::vector<std::string> &dics,
const char *output); // outputs
static bool assignUserDictionaryCosts(
const Param &param,
const std::vector<std::string> &dics,
const char *output); // outputs
const char *what() { return what_.str(); }
explicit Dictionary(): dmmap_(new Mmap<char>), token_(0),
feature_(0), charset_(0) {}
virtual ~Dictionary() { this->close(); }
private:
scoped_ptr<Mmap<char> > dmmap_;
const Token *token_;
const char *feature_;
const char *charset_;
unsigned int version_;
unsigned int type_;
unsigned int lexsize_;
unsigned int lsize_;
unsigned int rsize_;
std::string filename_;
whatlog what_;
Darts::DoubleArray da_;
};
}
#endif // MECAB_DICTIONARY_H_

View File

@@ -0,0 +1,75 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_DICTIONARY_REWRITER_H
#define MECAB_DICTIONARY_REWRITER_H
#include <vector>
#include <string>
#include <map>
#include "common.h"
#include "mecab.h"
#include "freelist.h"
namespace MeCab {
class Iconv;
class RewritePattern {
private:
std::vector<std::string> spat_;
std::vector<std::string> dpat_;
public:
bool set_pattern(const char *src, const char *dst);
bool rewrite(size_t size,
const char **input,
std::string *output) const;
};
class RewriteRules: public std::vector<RewritePattern> {
public:
bool rewrite(size_t size, const char **input,
std::string *output) const;
};
struct FeatureSet {
std::string ufeature;
std::string lfeature;
std::string rfeature;
};
class DictionaryRewriter {
private:
RewriteRules unigram_rewrite_;
RewriteRules left_rewrite_;
RewriteRules right_rewrite_;
std::map<std::string, FeatureSet> cache_;
public:
bool open(const char *filename,
Iconv *iconv = 0);
void clear();
bool rewrite(const std::string &feature,
std::string *ufeature,
std::string *lfeature,
std::string *rfeature) const;
bool rewrite2(const std::string &feature,
std::string *ufeature,
std::string *lfeature,
std::string *rfeature);
};
class POSIDGenerator {
private:
RewriteRules rewrite_;
public:
bool open(const char *filename,
Iconv *iconv = 0);
void clear() { rewrite_.clear(); }
int id(const char *key) const;
};
}
#endif

View File

@@ -0,0 +1,115 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_FEATUREINDEX_H_
#define MECAB_FEATUREINDEX_H_
#include <map>
#include <vector>
#include "mecab.h"
#include "mmap.h"
#include "darts.h"
#include "freelist.h"
#include "common.h"
#include "learner_node.h"
#include "string_buffer.h"
#include "dictionary_rewriter.h"
namespace MeCab {
class Param;
class FeatureIndex {
public:
virtual bool open(const Param &param) = 0;
virtual void clear() = 0;
virtual void close() = 0;
virtual bool buildFeature(LearnerPath *path) = 0;
void set_alpha(const double *alpha);
size_t size() const { return maxid_; }
bool buildUnigramFeature(LearnerPath *, const char *);
bool buildBigramFeature(LearnerPath *, const char *, const char*);
void calcCost(LearnerPath *path);
void calcCost(LearnerNode *node);
const char *strdup(const char *str);
static bool convert(const Param &param,
const char *text_filename, std::string *output);
static bool compile(const Param &param,
const char *text_filename, const char *binary_filename);
explicit FeatureIndex(): feature_freelist_(8192 * 32),
char_freelist_(8192 * 32),
maxid_(0), alpha_(0) {}
virtual ~FeatureIndex() {}
protected:
std::vector<int> feature_;
ChunkFreeList<int> feature_freelist_;
ChunkFreeList<char> char_freelist_;
std::vector<const char*> unigram_templs_;
std::vector<const char*> bigram_templs_;
DictionaryRewriter rewrite_;
StringBuffer os_;
size_t maxid_;
const double *alpha_;
virtual int id(const char *key) = 0;
const char* getIndex(char **, char **, size_t);
bool openTemplate(const Param &param);
};
class EncoderFeatureIndex: public FeatureIndex {
public:
bool open(const Param &param);
void close();
void clear();
bool reopen(const char *filename,
const char *charset,
std::vector<double> *alpha,
Param *param);
bool save(const char *filename, const char *header) const;
void shrink(size_t freq,
std::vector<double> *observed);
bool buildFeature(LearnerPath *path);
void clearcache();
private:
std::map<std::string, int> dic_;
std::map<std::string, std::pair<const int*, size_t> > feature_cache_;
int id(const char *key);
};
class DecoderFeatureIndex: public FeatureIndex {
public:
bool open(const Param &param);
void clear();
void close();
bool buildFeature(LearnerPath *path);
const char *charset() const {
return charset_;
}
private:
bool openFromArray(const char *begin, const char *end);
bool openBinaryModel(const Param &param);
bool openTextModel(const Param &param);
int id(const char *key);
Mmap<char> mmap_;
std::string model_buffer_;
const uint64_t *key_;
const char *charset_;
};
}
#endif

View File

@@ -0,0 +1,85 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_FREELIST_H
#define MECAB_FREELIST_H
#include <vector>
#include <algorithm>
#include "utils.h"
#include "common.h"
namespace MeCab {
template <class T> class FreeList {
private:
std::vector<T *> freeList;
size_t pi_;
size_t li_;
size_t size;
public:
void free() { li_ = pi_ = 0; }
T* alloc() {
if (pi_ == size) {
li_++;
pi_ = 0;
}
if (li_ == freeList.size()) freeList.push_back(new T[size]);
return freeList[li_] + (pi_++);
}
explicit FreeList(size_t _size): pi_(0), li_(0), size(_size) {}
virtual ~FreeList() {
for (li_ = 0; li_ < freeList.size(); li_++)
delete [] freeList[li_];
}
};
template <class T> class ChunkFreeList {
private:
std::vector<std::pair<size_t, T *> > freelist_;
size_t pi_;
size_t li_;
size_t default_size;
public:
void free() { li_ = pi_ = 0; }
T* alloc(T *src) {
T* n = alloc(1);
*n = *src;
return n;
}
T* alloc(size_t req = 1) {
while (li_ < freelist_.size()) {
if ((pi_ + req) < freelist_[li_].first) {
T *r = freelist_[li_].second + pi_;
pi_ += req;
return r;
}
li_++;
pi_ = 0;
}
size_t _size = std::max(req, default_size);
freelist_.push_back(std::make_pair(_size, new T[_size]));
li_ = freelist_.size() - 1;
pi_ += req;
return freelist_[li_].second;
}
explicit ChunkFreeList(size_t _size):
pi_(0), li_(0), default_size(_size) {}
virtual ~ChunkFreeList() {
for (li_ = 0; li_ < freelist_.size(); li_++)
delete [] freelist_[li_].second;
}
};
}
#endif

View File

@@ -0,0 +1,40 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_ICONV_H
#define MECAB_ICONV_H
#if defined HAVE_ICONV
#include <iconv.h>
#endif
#if defined(_WIN32) && !defined(__CYGWIN__)
#include "windows.h"
#endif
namespace MeCab {
class Iconv {
private:
#ifdef HAVE_ICONV
iconv_t ic_;
#else
int ic_;
#endif
#if defined(_WIN32) && !defined(__CYGWIN__)
DWORD from_cp_;
DWORD to_cp_;
#endif
public:
explicit Iconv();
virtual ~Iconv();
bool open(const char *from, const char *to);
bool convert(std::string *);
};
}
#endif

71
MorphereAnalyzer/lbfgs.h Normal file
View File

@@ -0,0 +1,71 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_LBFGS_H_
#define MECAB_LBFGS_H_
#include <vector>
#include <iostream>
namespace MeCab {
class LBFGS {
public:
explicit LBFGS(): iflag_(0), iscn(0), nfev(0), iycn(0),
point(0), npt(0), iter(0), info(0),
ispt(0), isyt(0), iypt(0), maxfev(0),
stp(0.0), stp1(0.0), mcsrch_(0) {}
virtual ~LBFGS() { clear(); }
void clear();
int optimize(size_t size, double *x, double f, double *g,
bool orthant, double C) {
static const int msize = 5;
if (w_.empty()) {
iflag_ = 0;
w_.resize(size * (2 * msize + 1) + 2 * msize);
diag_.resize(size);
} else if (diag_.size() != size) {
std::cerr << "size of array is different" << std::endl;
return -1;
}
lbfgs_optimize(static_cast<int>(size),
msize, x, f, g, &diag_[0], &w_[0], orthant, C, &iflag_);
if (iflag_ < 0) {
std::cerr << "routine stops with unexpected error" << std::endl;
return -1;
}
if (iflag_ == 0) {
clear();
return 0; // terminate
}
return 1; // evaluate next f and g
}
private:
class Mcsrch;
int iflag_, iscn, nfev, iycn, point, npt;
int iter, info, ispt, isyt, iypt, maxfev;
double stp, stp1;
std::vector <double> diag_;
std::vector <double> w_;
Mcsrch *mcsrch_;
void lbfgs_optimize(int size,
int msize,
double *x,
double f,
const double *g,
double *diag,
double *w, bool orthant, double C, int *iflag);
};
}
#endif

View File

@@ -0,0 +1,134 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_LEARNER_NODE_H_
#define MECAB_LEARNER_NODE_H_
#include <cstring>
#include "mecab.h"
#include "common.h"
#include "utils.h"
struct mecab_learner_path_t {
struct mecab_learner_node_t* rnode;
struct mecab_learner_path_t* rnext;
struct mecab_learner_node_t* lnode;
struct mecab_learner_path_t* lnext;
double cost;
const int *fvector;
};
struct mecab_learner_node_t {
struct mecab_learner_node_t *prev;
struct mecab_learner_node_t *next;
struct mecab_learner_node_t *enext;
struct mecab_learner_node_t *bnext;
struct mecab_learner_path_t *rpath;
struct mecab_learner_path_t *lpath;
struct mecab_learner_node_t *anext;
const char *surface;
const char *feature;
unsigned int id;
unsigned short length;
unsigned short rlength;
unsigned short rcAttr;
unsigned short lcAttr;
unsigned short posid;
unsigned char char_type;
unsigned char stat;
unsigned char isbest;
double alpha;
double beta;
short wcost2;
double wcost;
double cost;
const int *fvector;
struct mecab_token_t *token;
};
namespace MeCab {
typedef struct mecab_learner_path_t LearnerPath;
typedef struct mecab_learner_node_t LearnerNode;
template <class T1, class T2> T1 repeat_find_if(T1 b, T1 e,
const T2& v, size_t n) {
T1 r = b;
for (size_t i = 0; i < n; ++i) {
r = std::find(b, e, v);
if (r == e) return e;
b = r + 1;
}
return r;
}
// NOTE: first argment: answer,
// second argment: system output
inline bool node_cmp_eq(const LearnerNode &node1,
const LearnerNode &node2,
size_t size, size_t unk_size) {
if (node1.length == node2.length &&
strncmp(node1.surface, node2.surface, node1.length) == 0) {
const char *p1 = node1.feature;
const char *p2 = node2.feature;
// There is NO case when node1 becomes MECAB_UNK_NODE
if (node2.stat == MECAB_UNK_NODE)
size = unk_size; // system cannot output other extra information
const char *r1 = repeat_find_if(p1, p1 + std::strlen(p1), ',', size);
const char *r2 = repeat_find_if(p2, p2 + std::strlen(p2), ',', size);
if (static_cast<size_t>(r1 - p1) == static_cast<size_t>(r2 - p2) &&
std::strncmp(p1, p2, static_cast<size_t>(r1 - p1)) == 0) {
return true;
}
}
return false;
}
inline bool is_empty(LearnerPath *path) {
return ((!path->rnode->rpath && path->rnode->stat != MECAB_EOS_NODE) ||
(!path->lnode->lpath && path->lnode->stat != MECAB_BOS_NODE) );
}
inline void calc_expectation(LearnerPath *path, double *expected, double Z) {
if (is_empty(path)) {
return;
}
const double c = std::exp(path->lnode->alpha +
path->cost +
path->rnode->beta - Z);
for (const int *f = path->fvector; *f != -1; ++f) {
expected[*f] += c;
}
if (path->rnode->stat != MECAB_EOS_NODE) {
for (const int *f = path->rnode->fvector; *f != -1; ++f) {
expected[*f] += c;
}
}
}
inline void calc_alpha(LearnerNode *n) {
n->alpha = 0.0;
for (LearnerPath *path = n->lpath; path; path = path->lnext) {
n->alpha = logsumexp(n->alpha,
path->cost + path->lnode->alpha,
path == n->lpath);
}
}
inline void calc_beta(LearnerNode *n) {
n->beta = 0.0;
for (LearnerPath *path = n->rpath; path; path = path->rnext) {
n->beta = logsumexp(n->beta,
path->cost + path->rnode->beta,
path == n->rpath);
}
}
}
#endif // MECAB_LEARNER_NODE_H_

View File

@@ -0,0 +1,80 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_TAGGER_H
#define MECAB_TAGGER_H
#include <vector>
#include "mecab.h"
#include "freelist.h"
#include "feature_index.h"
#include "tokenizer.h"
#include "scoped_ptr.h"
namespace MeCab {
class FeatureIndex;
class LearnerTagger {
public:
bool empty() const { return (len_ == 0); }
void close() {}
void clear() {}
explicit LearnerTagger(): tokenizer_(0), path_allocator_(0),
feature_index_(0), begin_(0), end_(0), len_(0) {}
virtual ~LearnerTagger() {}
protected:
Tokenizer<LearnerNode, LearnerPath> *tokenizer_;
Allocator<LearnerNode, LearnerPath> *allocator_;
FreeList<LearnerPath> *path_allocator_;
FeatureIndex *feature_index_;
scoped_string begin_data_;
const char *begin_;
const char *end_;
size_t len_;
std::vector<LearnerNode *> begin_node_list_;
std::vector<LearnerNode *> end_node_list_;
LearnerNode *lookup(size_t);
bool connect(size_t, LearnerNode *);
bool viterbi();
bool buildLattice();
bool initList();
};
class EncoderLearnerTagger: public LearnerTagger {
public:
bool open(Tokenizer<LearnerNode, LearnerPath> *tokenzier,
Allocator<LearnerNode, LearnerPath> *allocator,
FeatureIndex *feature_index,
size_t eval_size, size_t unk_eval_size);
bool read(std::istream *, std::vector<double> *);
int eval(size_t *, size_t *, size_t *) const;
double gradient(double *expected);
explicit EncoderLearnerTagger(): eval_size_(1024), unk_eval_size_(1024) {}
virtual ~EncoderLearnerTagger() { close(); }
private:
size_t eval_size_;
size_t unk_eval_size_;
std::vector<LearnerPath *> ans_path_list_;
};
class DecoderLearnerTagger: public LearnerTagger {
public:
bool open(const Param &);
bool parse(std::istream *, std::ostream *);
virtual ~DecoderLearnerTagger() { close(); }
private:
scoped_ptr<Tokenizer<LearnerNode, LearnerPath> > tokenizer_data_;
scoped_ptr<Allocator<LearnerNode, LearnerPath> > allocator_data_;
scoped_ptr<FeatureIndex> feature_index_data_;
};
}
#endif

Binary file not shown.

11
MorphereAnalyzer/main.cpp Normal file
View File

@@ -0,0 +1,11 @@
#include "mainwindow.h"
#include <QApplication>
int main(int argc, char *argv[])
{
QApplication a(argc, argv);
MainWindow w;
w.show();
return a.exec();
}

View File

@@ -0,0 +1,770 @@
/****************************************************************************
**
** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies).
** Contact: http://www.qt-project.org/legal
**
** This file is part of the examples of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:BSD$
** You may use this file under the terms of the BSD license as follows:
**
** "Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions are
** met:
** * Redistributions of source code must retain the above copyright
** notice, this list of conditions and the following disclaimer.
** * Redistributions in binary form must reproduce the above copyright
** notice, this list of conditions and the following disclaimer in
** the documentation and/or other materials provided with the
** distribution.
** * Neither the name of Digia Plc and its Subsidiary(-ies) nor the names
** of its contributors may be used to endorse or promote products derived
** from this software without specific prior written permission.
**
**
** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
** "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
** LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
** A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
** OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
** OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include <QtWidgets>
#include "mainwindow.h"
#include "widget.h"
#include "stable.h"
#include "sanaly1.h"
//! [0]
MainWindow::MainWindow()
{
QWidget *widget = new QWidget;
setCentralWidget(widget);
p_qwDB = new Widget;
p_qwFile = new QWidget;
p_qwAnalyzed1 = new QWidget;
p_qwText = new QWidget;
p_qwAnalyzer1 = new SAnaly1;
p_qwAnalyzer2 = new QWidget;
m_ptwSource = new QTabWidget;
m_ptwSource->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Expanding);
m_ptwSource->addTab(p_qwDB,"DB");
//m_ptwSource->addTab(p_qwFile,"File");
//m_ptwSource->addTab(p_qwText,"Text");
//m_ptwSource->addTab(p_qwAnalyzed1,"Analyzed1");
m_ptwResult = new QTabWidget;
m_ptwResult->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Expanding);
m_ptwResult->addTab(p_qwAnalyzer1, "Analysis1");
//m_ptwResult->addTab(p_qwAnalyzer2, "Analysis2");
QHBoxLayout *layout = new QHBoxLayout;
layout->setMargin(5);
layout->addWidget(m_ptwSource);
layout->addWidget(m_ptwResult);
widget->setLayout(layout);
//! [1]
//! [2]
createActions();
createMenus();
QString message = tr("A context menu is available by right-clicking");
statusBar()->showMessage(message);
setWindowTitle(tr("Morphere Analyzer"));
setMinimumSize(800, 600);
resize(800, 600);
//(STable*)(p_qwDB->GetTabWidget()->widget(1));
}
//! [2]
//! [3]
/*
void MainWindow::contextMenuEvent(QContextMenuEvent *event)
{
QMenu menu(this);
menu.addAction(cutAct);
menu.addAction(copyAct);
menu.addAction(pasteAct);
menu.exec(event->globalPos());
}
*/
//! [3]
/*
void MainWindow::newFile()
{
}
void MainWindow::open()
{
}
void MainWindow::save()
{
}
void MainWindow::print()
{
}
void MainWindow::undo()
{
}
void MainWindow::redo()
{
}
void MainWindow::cut()
{
}
void MainWindow::copy()
{
}
void MainWindow::paste()
{
}
void MainWindow::bold()
{
}
void MainWindow::italic()
{
}
void MainWindow::leftAlign()
{
}
void MainWindow::rightAlign()
{
}
void MainWindow::justify()
{
}
void MainWindow::center()
{
}
void MainWindow::setLineSpacing()
{
}
void MainWindow::setParagraphSpacing()
{
}
void MainWindow::about()
{
QMessageBox::about(this, tr("About Menu"),
tr("The <b>Menu</b> example shows how to create "
"menu-bar menus and context menus."));
}
void MainWindow::aboutQt()
{
}
*/
//! [4]
void MainWindow::createActions()
{
//! [5]
//!
actDBNew = new QAction(tr("&New DB "), this);
actDBNew->setStatusTip(tr("Create a new DB"));
connect(actDBNew, SIGNAL(triggered()), this, SLOT(newDB()));
actDBCsvImport = new QAction(tr("DB - CSV Import"), this);
actDBCsvImport->setStatusTip(tr("Import DB as a csv file"));
connect(actDBCsvImport, SIGNAL(triggered()), this, SLOT(importDB()));
actDBCsvExport = new QAction(tr("DB - CSV Export"), this);
actDBCsvExport->setStatusTip(tr("Export DB as a csv file"));
connect(actDBCsvExport, SIGNAL(triggered()), this, SLOT(exportDB()));
actMorphereOriginalExport = new QAction(tr("Morphere - CSV Export(Raw Result)"), this);
actMorphereOriginalExport->setStatusTip(tr("Export Morphere as a csv file"));
connect(actMorphereOriginalExport, SIGNAL(triggered()), this, SLOT(exportOriginalMorphere()));
actMorphereExport = new QAction(tr("Morphere - CSV Export"), this);
actMorphereExport->setStatusTip(tr("Export Morphere as a csv file"));
connect(actMorphereExport, SIGNAL(triggered()), this, SLOT(exportMorphere()));
actMorphereImport = new QAction(tr("Morphere - CSV Import"), this);
actMorphereImport->setStatusTip(tr("Import Morphere as a csv file"));
connect(actMorphereImport, SIGNAL(triggered()), this, SLOT(importMorphere()));
actExit = new QAction(tr("Exit"), this);
actExit->setStatusTip(tr("Exit the application"));
connect(actExit, SIGNAL(triggered()), this, SLOT(close()));
actAnalyze = new QAction(tr("Analyze"), this);
actAnalyze->setStatusTip(tr("Analyze"));
connect(actAnalyze, SIGNAL(triggered()), this, SLOT(slotAnalyze()));
actDictionary = new QAction(tr("Dictionary"), this);
actAnalyze->setStatusTip(tr("Execute Dictionary Widget"));
connect(actAnalyze, SIGNAL(triggered()), this, SLOT(slotDictionary()));
/*
//!
newAct = new QAction(tr("&New"), this);
newAct->setShortcuts(QKeySequence::New);
newAct->setStatusTip(tr("Create a new file"));
connect(newAct, SIGNAL(triggered()), this, SLOT(newFile()));
//! [4]
openAct = new QAction(tr("&Open..."), this);
openAct->setShortcuts(QKeySequence::Open);
openAct->setStatusTip(tr("Open an existing file"));
connect(openAct, SIGNAL(triggered()), this, SLOT(open()));
//! [5]
saveAct = new QAction(tr("&Save"), this);
saveAct->setShortcuts(QKeySequence::Save);
saveAct->setStatusTip(tr("Save the document to disk"));
connect(saveAct, SIGNAL(triggered()), this, SLOT(save()));
printAct = new QAction(tr("&Print..."), this);
printAct->setShortcuts(QKeySequence::Print);
printAct->setStatusTip(tr("Print the document"));
connect(printAct, SIGNAL(triggered()), this, SLOT(print()));
exitAct = new QAction(tr("E&xit"), this);
exitAct->setShortcuts(QKeySequence::Quit);
exitAct->setStatusTip(tr("Exit the application"));
connect(exitAct, SIGNAL(triggered()), this, SLOT(close()));
undoAct = new QAction(tr("&Undo"), this);
undoAct->setShortcuts(QKeySequence::Undo);
undoAct->setStatusTip(tr("Undo the last operation"));
connect(undoAct, SIGNAL(triggered()), this, SLOT(undo()));
redoAct = new QAction(tr("&Redo"), this);
redoAct->setShortcuts(QKeySequence::Redo);
redoAct->setStatusTip(tr("Redo the last operation"));
connect(redoAct, SIGNAL(triggered()), this, SLOT(redo()));
cutAct = new QAction(tr("Cu&t"), this);
cutAct->setShortcuts(QKeySequence::Cut);
cutAct->setStatusTip(tr("Cut the current selection's contents to the "
"clipboard"));
connect(cutAct, SIGNAL(triggered()), this, SLOT(cut()));
copyAct = new QAction(tr("&Copy"), this);
copyAct->setShortcuts(QKeySequence::Copy);
copyAct->setStatusTip(tr("Copy the current selection's contents to the "
"clipboard"));
connect(copyAct, SIGNAL(triggered()), this, SLOT(copy()));
pasteAct = new QAction(tr("&Paste"), this);
pasteAct->setShortcuts(QKeySequence::Paste);
pasteAct->setStatusTip(tr("Paste the clipboard's contents into the current "
"selection"));
connect(pasteAct, SIGNAL(triggered()), this, SLOT(paste()));
boldAct = new QAction(tr("&Bold"), this);
boldAct->setCheckable(true);
boldAct->setShortcut(QKeySequence::Bold);
boldAct->setStatusTip(tr("Make the text bold"));
connect(boldAct, SIGNAL(triggered()), this, SLOT(bold()));
QFont boldFont = boldAct->font();
boldFont.setBold(true);
boldAct->setFont(boldFont);
italicAct = new QAction(tr("&Italic"), this);
italicAct->setCheckable(true);
italicAct->setShortcut(QKeySequence::Italic);
italicAct->setStatusTip(tr("Make the text italic"));
connect(italicAct, SIGNAL(triggered()), this, SLOT(italic()));
QFont italicFont = italicAct->font();
italicFont.setItalic(true);
italicAct->setFont(italicFont);
setLineSpacingAct = new QAction(tr("Set &Line Spacing..."), this);
setLineSpacingAct->setStatusTip(tr("Change the gap between the lines of a "
"paragraph"));
connect(setLineSpacingAct, SIGNAL(triggered()), this, SLOT(setLineSpacing()));
setParagraphSpacingAct = new QAction(tr("Set &Paragraph Spacing..."), this);
setParagraphSpacingAct->setStatusTip(tr("Change the gap between paragraphs"));
connect(setParagraphSpacingAct, SIGNAL(triggered()),
this, SLOT(setParagraphSpacing()));
aboutAct = new QAction(tr("&About"), this);
aboutAct->setStatusTip(tr("Show the application's About box"));
connect(aboutAct, SIGNAL(triggered()), this, SLOT(about()));
aboutQtAct = new QAction(tr("About &Qt"), this);
aboutQtAct->setStatusTip(tr("Show the Qt library's About box"));
connect(aboutQtAct, SIGNAL(triggered()), qApp, SLOT(aboutQt()));
connect(aboutQtAct, SIGNAL(triggered()), this, SLOT(aboutQt()));
leftAlignAct = new QAction(tr("&Left Align"), this);
leftAlignAct->setCheckable(true);
leftAlignAct->setShortcut(tr("Ctrl+L"));
leftAlignAct->setStatusTip(tr("Left align the selected text"));
connect(leftAlignAct, SIGNAL(triggered()), this, SLOT(leftAlign()));
rightAlignAct = new QAction(tr("&Right Align"), this);
rightAlignAct->setCheckable(true);
rightAlignAct->setShortcut(tr("Ctrl+R"));
rightAlignAct->setStatusTip(tr("Right align the selected text"));
connect(rightAlignAct, SIGNAL(triggered()), this, SLOT(rightAlign()));
justifyAct = new QAction(tr("&Justify"), this);
justifyAct->setCheckable(true);
justifyAct->setShortcut(tr("Ctrl+J"));
justifyAct->setStatusTip(tr("Justify the selected text"));
connect(justifyAct, SIGNAL(triggered()), this, SLOT(justify()));
centerAct = new QAction(tr("&Center"), this);
centerAct->setCheckable(true);
centerAct->setShortcut(tr("Ctrl+E"));
centerAct->setStatusTip(tr("Center the selected text"));
connect(centerAct, SIGNAL(triggered()), this, SLOT(center()));
//! [6] //! [7]
alignmentGroup = new QActionGroup(this);
alignmentGroup->addAction(leftAlignAct);
alignmentGroup->addAction(rightAlignAct);
alignmentGroup->addAction(justifyAct);
alignmentGroup->addAction(centerAct);
leftAlignAct->setChecked(true);
//! [6]
//! */
}
//! [7]
//! [8]
//!
void MainWindow::createMenus()
{
//! [9] //! [10]
menuFile = menuBar()->addMenu(tr("File"));
menuFile->addAction(actDBNew);
menuFile->addSeparator();
//! [9]
menuFile->addAction(actDBCsvImport);
//! [10]
menuFile->addAction(actDBCsvExport);
menuFile->addSeparator();
menuFile->addAction(actMorphereImport);
menuFile->addAction(actMorphereExport);
menuFile->addAction(actMorphereOriginalExport);
menuFile->addSeparator();
menuFile->addAction(actExit);
//! [11]
//! [11]
menuDictionary = menuBar()->addMenu(tr("Dictionary"));
menuDictionary->addAction(actDictionary);
menuAnalyze = menuBar()->addMenu(tr("Analyze"));
menuAnalyze->addAction(actAnalyze);
/*
editMenu = menuBar()->addMenu(tr("&Edit"));
editMenu->addAction(undoAct);
editMenu->addAction(redoAct);
editMenu->addSeparator();
editMenu->addAction(cutAct);
editMenu->addAction(copyAct);
editMenu->addAction(pasteAct);
editMenu->addSeparator();
helpMenu = menuBar()->addMenu(tr("&Help"));
helpMenu->addAction(aboutAct);
helpMenu->addAction(aboutQtAct);
//! [8]
//! [12]
formatMenu = editMenu->addMenu(tr("&Format"));
formatMenu->addAction(boldAct);
formatMenu->addAction(italicAct);
formatMenu->addSeparator()->setText(tr("Alignment"));
formatMenu->addAction(leftAlignAct);
formatMenu->addAction(rightAlignAct);
formatMenu->addAction(justifyAct);
formatMenu->addAction(centerAct);
formatMenu->addSeparator();
formatMenu->addAction(setLineSpacingAct);
formatMenu->addAction(setParagraphSpacingAct);
*/
}
//! [12]
void MainWindow::newDB()
{
p_qwDB->FileNew();
}
void MainWindow::importDB()
{
p_qwDB->FileImport();
}
void MainWindow::exportDB()
{
p_qwDB->FileExport();
}
void MainWindow::importMorphere()
{
p_qwAnalyzer1->FileImport();
}
void MainWindow::exportMorphere()
{
p_qwAnalyzer1->FileExport();
}
void MainWindow::exportOriginalMorphere()
{
QString strFilename = QFileDialog::getSaveFileName(0,"Exoprt file",QDir::currentPath(),
"csv files (*.csv);;All files (*.*)",new QString("Text files (*.csv)"));
if (strFilename.toLower().right(4) != QString(".csv"))
strFilename += ".csv";
QFile file(strFilename);
if(!file.open(QFile::WriteOnly | QFile::Text)) return;
QTextStream out(&file);
/*
out << "#Head#,";
for (int nCount = 0;nCount < pCurrent->columnCount() ;nCount++ )
out << pCurrent->horizontalHeaderItem(nCount)->text() << ",";
m_pProgress->setRange(0,pCurrent->rowCount()-1);
*/
for( QMap<QString,QMap<QString,QMap<int, QString> > >::iterator iterPos = p_qwAnalyzer1->m_mapViewResult.begin(); iterPos != p_qwAnalyzer1->m_mapViewResult.end(); iterPos++)
{
for(QMap<QString, QMap<int, QString> >::iterator iterPos2 = iterPos.value().begin(); iterPos2 != iterPos.value().end(); iterPos2++)
{
QMapIterator<int, QString> i(p_qwAnalyzer1->m_mapViewResult[iterPos.key()][iterPos2.key()]);
i.toBack();
while(i.hasPrevious())
{
i.previous();
out << "\"" << iterPos.key() << "\"" << ",";
out << "\"" << iterPos2.key() << "\"" << ",";
out << "\"" << i.value() << "\"" << ",";
out << "\"" << i.key() << "\"" << "\n";
}
}
}
file.close();
/*
for( QMap<QString,QMap<QString,QMap<int, QString> > >::iterator iterPos = p_qwAnalyzer1->m_mapViewResult.begin(); iterPos != p_qwAnalyzer1->m_mapViewResult.end(); iterPos++)
{
for(QMap<QString, QMap<int, QString> >::iterator iterPos2 = iterPos.value().begin(); iterPos2 != iterPos.value().end(); iterPos2++)
{
for(QMap<int, QString>::iterator iterPos3 = iterPos2.value().begin();iterPos3 != iterPos2.value().end(); iterPos3++)
{
out2 << iterPos.key() << "," << iterPos2.key() << "," << iterPos3.key() << "," << iterPos3.value() << "\n";
}
}
}
for( QMap<QString,QMap<QString,QMap<int, QString> > >::iterator iterPos = p_qwAnalyzer1->m_mapViewResult.begin(); iterPos != p_qwAnalyzer1->m_mapViewResult.end(); iterPos++)
{
for(QMap<QString, QMap<int, QString> >::iterator iterPos2 = iterPos.value().begin(); iterPos2 != iterPos.value().end(); iterPos2++)
{
QMapIterator<int, QString> i(p_qwAnalyzer1->m_mapViewResult[iterPos.key()][iterPos2.key()]);
i.toBack();
while(i.hasPrevious())
{
i.previous();
out2 << iterPos.key() << "," << iterPos2.key() << "," << i.key() << "," << i.value() << "\n";
}
}
}
QFile file("result.txt");
if(!file.open(QIODevice::WriteOnly | QIODevice::Text)) {
qDebug() << "File read error";
return ;
}
QTextStream out(&file);
for(QMap<QString, int>::iterator iterPos = p_qwAnalyzer1->m_TotalResult.begin(); iterPos != p_qwAnalyzer1->m_TotalResult.end(); iterPos++)
{
QString strkey = iterPos.key();
QStringList strlistKey = strkey.split("~!@");
foreach(QString str, strlistKey)
{
out << str << ",";
}
out << iterPos.value() << "\n";
}
file.close();
*/
}
void MainWindow::slotAnalyze()
{
QMessageBox msg;
msg.setText("Please choose...");
msg.setModal(true);
QPushButton *pbTitle = msg.addButton("Title",QMessageBox::ActionRole);
QPushButton *pbBody = msg.addButton("Body",QMessageBox::ActionRole);
QPushButton *pbAll = msg.addButton("ALL",QMessageBox::ActionRole);
int setbody;
msg.exec();
if (msg.clickedButton() == pbTitle) { setbody = 0; }
else if (msg.clickedButton() == pbBody) { setbody = 1; }
else if (msg.clickedButton() == pbAll) { setbody = 2; }
QMessageBox msg2;
msg2.setText("Please wait...");
msg2.setVisible(true);
for(int i = 0; i < 16;i++)
{
p_qwAnalyzer1->m_HashResult[i].clear();
}
//p_qwAnalyzer1->MemClear();
p_qwAnalyzer1->m_mapViewResult.clear();
p_qwAnalyzer1->m_TotalResult.clear();
if(m_ptwResult->currentIndex() == 0)
{
ExecThread(setbody);
EmergeThreadResult();
SortViewResult();
ViewResult();
}
msg2.setVisible(false);
}
void MainWindow::slotDictionary()
{
}
void MainWindow::ExecThread(int _setBodyTitle)
{
int nCount = 0;
m_pThread = new AnalyzerThread*[p_qwAnalyzer1->getThread()];
for(int i=0;i<p_qwAnalyzer1->getThread();i++)
{
m_pThread[i] = new AnalyzerThread(mutex, i, nCount);
m_pThread[i]->setParametersfromWidget(p_qwAnalyzer1,m_ptwResult->currentIndex());
m_pThread[i]->setWidget(p_qwDB);
m_pThread[i]->setPosBody(p_qwDB->getBodyPosition());
m_pThread[i]->setPosDate(p_qwDB->getDatePosition());
m_pThread[i]->setPosTitle(p_qwDB->getTitlePosition());
m_pThread[i]->setTitleBody(_setBodyTitle);
}
for(int i=0;i<p_qwAnalyzer1->getThread();i++)
{
m_pThread[i]->start();
}
for(int i=0;i<p_qwAnalyzer1->getThread();i++)
{
m_pThread[i]->wait();
}
for(int i=0;i<p_qwAnalyzer1->getThread();i++)
{
delete m_pThread[i];
}
delete[] m_pThread;
}
void MainWindow::EmergeThreadResult()
{
for(int i=0;i<p_qwAnalyzer1->getThread();i++)
{
for(QHash<QString, int>::iterator iterPos = p_qwAnalyzer1->m_HashResult[i].begin(); iterPos != p_qwAnalyzer1->m_HashResult[i].end();iterPos++)
{
if(p_qwAnalyzer1->m_TotalResult.contains(iterPos.key()))
{
p_qwAnalyzer1->m_TotalResult[iterPos.key()] += iterPos.value();
}
else
{
p_qwAnalyzer1->m_TotalResult.insert(iterPos.key(), iterPos.value());
}
}
p_qwAnalyzer1->m_HashResult[i].clear();
}
}
void MainWindow::SortViewResult()
{
if(p_qwAnalyzer1->isSortDateMorphereChecked())
{
for(QMap<QString, int>::iterator iterPos = p_qwAnalyzer1->m_TotalResult.begin(); iterPos != p_qwAnalyzer1->m_TotalResult.end(); iterPos++)
{
QString strkey = iterPos.key();
int count = iterPos.value();
QStringList strlistKey = strkey.split("~!@");
QString strDate = strlistKey.at(0);
QString strMorphere = strlistKey.at(1);
QString strKeyword = strlistKey.at(2);
if(p_qwAnalyzer1->m_mapViewResult.contains(strDate))
{
if(p_qwAnalyzer1->m_mapViewResult.value(strDate).contains(strMorphere))
{
p_qwAnalyzer1->m_mapViewResult[(strDate)][(strMorphere)].insertMulti(count, strKeyword);
}
else
{
QMap<int, QString> qLast;
qLast.insert(count, strKeyword);
p_qwAnalyzer1->m_mapViewResult[(strDate)].insert(strMorphere, qLast);
}
}
else
{
QMap<int, QString> qLast;
qLast.insert(count , strKeyword);
QMap<QString, QMap<int, QString> > qMedium;
qMedium.insert(strMorphere, qLast);
p_qwAnalyzer1->m_mapViewResult.insert(strDate, qMedium);
}
}
}
else
{
for(QMap<QString, int>::iterator iterPos = p_qwAnalyzer1->m_TotalResult.begin(); iterPos != p_qwAnalyzer1->m_TotalResult.end(); iterPos++)
{
QString strkey = iterPos.key();
int count = iterPos.value();
QStringList strlistKey = strkey.split("~!@");
QString strDate = strlistKey.at(0);
QString strMorphere = strlistKey.at(1);
QString strKeyword = strlistKey.at(2);
if(p_qwAnalyzer1->m_mapViewResult.contains(strMorphere))
{
if(p_qwAnalyzer1->m_mapViewResult.value(strMorphere).contains(strDate))
{
p_qwAnalyzer1->m_mapViewResult[(strMorphere)][(strDate)].insertMulti(count, strKeyword);
}
else
{
QMap<int, QString> qLast;
qLast.insert(count, strKeyword);
p_qwAnalyzer1->m_mapViewResult[(strMorphere)].insert(strDate, qLast);
}
}
else
{
QMap<int, QString> qLast;
qLast.insert(count , strKeyword);
QMap<QString, QMap<int, QString> > qMedium;
qMedium.insert(strDate, qLast);
p_qwAnalyzer1->m_mapViewResult.insert(strMorphere, qMedium);
}
}
}
}
void MainWindow::ViewResult()
{
foreach(STable* ptable, m_lTable)
{
ptable->clear();
delete ptable;
}
m_lTable.clear();
foreach(QTabWidget* pwidget, m_lTabWidget)
{
pwidget->clear();
delete pwidget;
}
m_lTabWidget.clear();
m_ptwTable = p_qwAnalyzer1->getQTabWidget();
m_ptwTable->clear();
for( QMap<QString,QMap<QString,QMap<int, QString> > >::iterator iterPos = p_qwAnalyzer1->m_mapViewResult.begin(); iterPos != p_qwAnalyzer1->m_mapViewResult.end(); iterPos++)
{
QTabWidget* temp = new QTabWidget;
for(QMap<QString, QMap<int, QString> >::iterator iterPos2 = iterPos.value().begin(); iterPos2 != iterPos.value().end(); iterPos2++)
{
int ncRow = 0;
STable *pNew = new STable;
pNew->setColumnCount(2);
pNew->setRowCount(p_qwAnalyzer1->m_mapViewResult[iterPos.key()][iterPos2.key()].size());
pNew->setHorizontalHeaderItem(0 ,new QTableWidgetItem("Keyword"));
pNew->setHorizontalHeaderItem(1 ,new QTableWidgetItem("Count"));
QMapIterator<int, QString> i(p_qwAnalyzer1->m_mapViewResult[iterPos.key()][iterPos2.key()]);
i.toBack();
while(i.hasPrevious())
{
i.previous();
pNew->setItem(ncRow,0,new QTableWidgetItem(QString(i.value())));
pNew->setItem(ncRow,1,new QTableWidgetItem(QString::number(i.key())));
ncRow++;
}
temp->addTab(pNew, iterPos2.key());
m_lTable << pNew;
}
m_ptwTable->addTab(temp, iterPos.key());
m_lTabWidget << temp;
}
}

View File

@@ -0,0 +1,185 @@
/****************************************************************************
**
** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies).
** Contact: http://www.qt-project.org/legal
**
** This file is part of the examples of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:BSD$
** You may use this file under the terms of the BSD license as follows:
**
** "Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions are
** met:
** * Redistributions of source code must retain the above copyright
** notice, this list of conditions and the following disclaimer.
** * Redistributions in binary form must reproduce the above copyright
** notice, this list of conditions and the following disclaimer in
** the documentation and/or other materials provided with the
** distribution.
** * Neither the name of Digia Plc and its Subsidiary(-ies) nor the names
** of its contributors may be used to endorse or promote products derived
** from this software without specific prior written permission.
**
**
** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
** "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
** LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
** A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
** OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
** OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifndef MAINWINDOW_H
#define MAINWINDOW_H
#include <QMainWindow>
#include <QTabWidget>
#include "sanalyzer.h"
#include "widget.h"
#include "sanaly1.h"
#include <QList>
QT_BEGIN_NAMESPACE
class QAction;
class QActionGroup;
class QLabel;
class QMenu;
QT_END_NAMESPACE
//! [0]
class MainWindow : public QMainWindow
{
Q_OBJECT
public:
MainWindow();
void ExecThread(int _setBodyTitle);
void EmergeThreadResult();
void SortViewResult();
void ViewResult();
protected:
//void contextMenuEvent(QContextMenuEvent *event) Q_DECL_OVERRIDE;
//! [0]
//! [1]
private slots:
/*
void newFile();
void open();
void save();
void print();
void undo();
void redo();
void cut();
void copy();
void paste();
void bold();
void italic();
void leftAlign();
void rightAlign();
void justify();
void center();
void setLineSpacing();
void setParagraphSpacing();
void about();
void aboutQt();
*/
void newDB();
void importDB();
void exportDB();
void exportMorphere();
void importMorphere();
void slotAnalyze();
void slotDictionary();
void exportOriginalMorphere();
//! [1]
//! [2]
private:
void createActions();
void createMenus();
//! [2]
//! [3]
//!
//!
/*
* QMenu *fileMenu;
QMenu *editMenu;
QMenu *formatMenu;
QMenu *helpMenu;
QActionGroup *alignmentGroup;
QAction *newAct;
QAction *openAct;
QAction *saveAct;
QAction *printAct;
QAction *exitAct;
QAction *undoAct;
QAction *redoAct;
QAction *cutAct;
QAction *copyAct;
QAction *pasteAct;
QAction *boldAct;
QAction *italicAct;
QAction *leftAlignAct;
QAction *rightAlignAct;
QAction *justifyAct;
QAction *centerAct;
QAction *setLineSpacingAct;
QAction *setParagraphSpacingAct;
QAction *aboutAct;
QAction *aboutQtAct;
QLabel *infoLabel;
*/
QTabWidget *m_ptwSource;
QTabWidget *m_ptwResult;
QWidget *m_qwMain;
Widget *p_qwDB;
QWidget *p_qwFile;
QWidget *p_qwAnalyzed1;
QWidget *p_qwText;
SAnaly1 *p_qwAnalyzer1;
QWidget *p_qwAnalyzer2;
QAction *actDBNew;
QAction *actDBCsvImport;
QAction *actDBCsvExport;
QAction *actMorphereExport;
QAction *actMorphereImport;
QAction *actMorphereOriginalExport;
QAction *actExit;
QAction *actAnalyze;
QAction *actDictionary;
QMenu *menuFile;
QMenu *menuAnalyze;
QMenu *menuDictionary;
QMutex mutex;
AnalyzerThread **m_pThread;
//AnalyzerThread m_pThread[16];
QList<STable*> m_lTable;
QList<QTabWidget*> m_lTabWidget;
QTabWidget* m_ptwTable;
};
//! [3]
#endif

View File

@@ -0,0 +1,24 @@
<ui version="4.0">
<class>MainWindow</class>
<widget class="QMainWindow" name="MainWindow" >
<property name="geometry" >
<rect>
<x>0</x>
<y>0</y>
<width>400</width>
<height>300</height>
</rect>
</property>
<property name="windowTitle" >
<string>MainWindow</string>
</property>
<widget class="QMenuBar" name="menuBar" />
<widget class="QToolBar" name="mainToolBar" />
<widget class="QWidget" name="centralWidget" />
<widget class="QStatusBar" name="statusBar" />
</widget>
<layoutDefault spacing="6" margin="11" />
<pixmapfunction></pixmapfunction>
<resources/>
<connections/>
</ui>

1508
MorphereAnalyzer/mecab.h Normal file

File diff suppressed because it is too large Load Diff

214
MorphereAnalyzer/mmap.h Normal file
View File

@@ -0,0 +1,214 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_MMAP_H
#define MECAB_MMAP_H
#include <errno.h>
#include <string>
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
extern "C" {
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#if defined(_WIN32) && !defined(__CYGWIN__)
#ifdef HAVE_WINDOWS_H
#include <windows.h>
#endif
#else
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#endif
}
#include "common.h"
#include "utils.h"
#include <winnt.h>
#ifndef O_BINARY
#define O_BINARY 0
#endif
namespace MeCab {
template <class T> class Mmap {
private:
T *text;
size_t length;
std::string fileName;
whatlog what_;
#if defined(_WIN32) && !defined(__CYGWIN__)
HANDLE hFile;
HANDLE hMap;
#else
int fd;
int flag;
#endif
public:
T& operator[](size_t n) { return *(text + n); }
const T& operator[](size_t n) const { return *(text + n); }
T* begin() { return text; }
const T* begin() const { return text; }
T* end() { return text + size(); }
const T* end() const { return text + size(); }
size_t size() { return length/sizeof(T); }
const char *what() { return what_.str(); }
const char *file_name() { return fileName.c_str(); }
size_t file_size() { return length; }
bool empty() { return(length == 0); }
// This code is imported from sufary, develoved by
// TATUO Yamashita <yto@nais.to> Thanks!
#if defined(_WIN32) && !defined(__CYGWIN__)
bool open(const char *filename, const char *mode = "r") {
this->close();
unsigned long mode1, mode2, mode3;
fileName = std::string(filename);
if (std::strcmp(mode, "r") == 0) {
mode1 = GENERIC_READ;
mode2 = PAGE_READONLY;
mode3 = FILE_MAP_READ;
} else if (std::strcmp(mode, "r+") == 0) {
mode1 = GENERIC_READ | GENERIC_WRITE;
mode2 = PAGE_READWRITE;
mode3 = FILE_MAP_ALL_ACCESS;
} else {
CHECK_FALSE(false) << "unknown open mode:" << filename;
}
hFile = ::CreateFileW(WPATH(filename), mode1, FILE_SHARE_READ, 0,
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
CHECK_FALSE(hFile != INVALID_HANDLE_VALUE)
<< "CreateFile() failed: " << filename;
length = ::GetFileSize(hFile, 0);
hMap = ::CreateFileMapping(hFile, 0, mode2, 0, 0, 0);
CHECK_FALSE(hMap) << "CreateFileMapping() failed: " << filename;
text = reinterpret_cast<T *>(::MapViewOfFile(hMap, mode3, 0, 0, 0));
CHECK_FALSE(text) << "MapViewOfFile() failed: " << filename;
return true;
}
void close() {
if (text) { ::UnmapViewOfFile(text); }
if (hFile != INVALID_HANDLE_VALUE) {
::CloseHandle(hFile);
hFile = INVALID_HANDLE_VALUE;
}
if (hMap) {
::CloseHandle(hMap);
hMap = 0;
}
text = 0;
}
Mmap(): text(0), hFile(INVALID_HANDLE_VALUE), hMap(0) {}
#else
bool open(const char *filename, const char *mode = "r") {
this->close();
struct stat st;
fileName = std::string(filename);
if (std::strcmp(mode, "r") == 0)
flag = O_RDONLY;
else if (std::strcmp(mode, "r+") == 0)
flag = O_RDWR;
else
CHECK_FALSE(false) << "unknown open mode: " << filename;
CHECK_FALSE((fd = ::open(filename, flag | O_BINARY)) >= 0)
<< "open failed: " << filename;
CHECK_FALSE(::fstat(fd, &st) >= 0)
<< "failed to get file size: " << filename;
length = st.st_size;
#ifdef HAVE_MMAP
int prot = PROT_READ;
if (flag == O_RDWR) prot |= PROT_WRITE;
char *p;
CHECK_FALSE((p = reinterpret_cast<char *>
(::mmap(0, length, prot, MAP_SHARED, fd, 0)))
!= MAP_FAILED)
<< "mmap() failed: " << filename;
text = reinterpret_cast<T *>(p);
#else
text = new T[length];
CHECK_FALSE(::read(fd, text, length) >= 0)
<< "read() failed: " << filename;
#endif
::close(fd);
fd = -1;
return true;
}
void close() {
if (fd >= 0) {
::close(fd);
fd = -1;
}
if (text) {
#ifdef HAVE_MMAP
::munmap(reinterpret_cast<char *>(text), length);
text = 0;
#else
if (flag == O_RDWR) {
int fd2;
if ((fd2 = ::open(fileName.c_str(), O_RDWR)) >= 0) {
::write(fd2, text, length);
::close(fd2);
}
}
delete [] text;
#endif
}
text = 0;
}
Mmap() : text(0), fd(-1) {}
#endif
virtual ~Mmap() { this->close(); }
};
}
#endif

View File

@@ -0,0 +1,43 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_NBEST_GENERATOR_H_
#define MECAB_NBEST_GENERATOR_H_
#include <queue>
#include "mecab.h"
#include "freelist.h"
namespace MeCab {
class NBestGenerator {
private:
struct QueueElement {
Node *node;
QueueElement *next;
long fx; // f(x) = h(x) + g(x): cost function for A* search
long gx; // g(x)
};
class QueueElementComp {
public:
const bool operator()(QueueElement *q1, QueueElement *q2) {
return (q1->fx > q2->fx);
}
};
std::priority_queue<QueueElement *, std::vector<QueueElement *>,
QueueElementComp> agenda_;
FreeList <QueueElement> freelist_;
public:
explicit NBestGenerator() : freelist_(512) {}
virtual ~NBestGenerator() {}
bool set(Lattice *lattice);
bool next();
};
}
#endif // MECAB_NBEST_GENERATOR_H_

92
MorphereAnalyzer/param.h Normal file
View File

@@ -0,0 +1,92 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_PARAM_H
#define MECAB_PARAM_H
#include <map>
#include <string>
#include <vector>
#include <sstream>
#include "scoped_ptr.h"
#include "common.h"
namespace {
template <class Target, class Source>
Target lexical_cast(Source arg) {
std::stringstream interpreter;
Target result;
if (!(interpreter << arg) || !(interpreter >> result) ||
!(interpreter >> std::ws).eof()) {
MeCab::scoped_ptr<Target> r(new Target()); // return default value
return *r;
}
return result;
}
template <>
std::string lexical_cast<std::string, std::string>(std::string arg) {
return arg;
}
}
namespace MeCab {
struct Option {
const char *name;
char short_name;
const char *default_value;
const char *arg_description;
const char *description;
};
class Param {
private:
std::map<std::string, std::string> conf_;
std::vector<std::string> rest_;
std::string system_name_;
std::string help_;
std::string version_;
whatlog what_;
public:
bool open(int argc, char **argv, const Option *opt);
bool open(const char *arg, const Option *opt);
bool load(const char *filename);
void clear();
const std::vector<std::string>& rest_args() const { return rest_; }
const char* program_name() const { return system_name_.c_str(); }
const char *what() { return what_.str(); }
const char* help() const { return help_.c_str(); }
const char* version() const { return version_.c_str(); }
int help_version() const;
template <class T>
T get(const char *key) const {
std::map<std::string, std::string>::const_iterator it = conf_.find(key);
if (it == conf_.end()) {
scoped_ptr<T> r(new T());
return *r;
}
return lexical_cast<T, std::string>(it->second);
}
template <class T>
void set(const char* key, const T &value,
bool rewrite = true) {
std::string key2 = std::string(key);
if (rewrite || (!rewrite && conf_.find(key2) == conf_.end()))
conf_[key2] = lexical_cast<std::string, T>(value);
}
void dump_config(std::ostream *os) const;
explicit Param() {}
virtual ~Param() {}
};
}
#endif

1455
MorphereAnalyzer/sanaly1.cpp Normal file

File diff suppressed because it is too large Load Diff

192
MorphereAnalyzer/sanaly1.h Normal file
View File

@@ -0,0 +1,192 @@
#ifndef SANALY1
#define SANALY1
#include <QWidget>
#include <QCalendarWidget>
#include <QDateEdit>
#include <QPushButton>
#include <QHBoxLayout>
#include <QComboBox>
#include <QLineEdit>
#include <QGroupBox>
#include <QListWidget>
#include <QTabWidget>
#include <QRadioButton>
#include <QButtonGroup>
#include <QTableWidget>
#include <QProgressBar>
#include <QMenuBar>
#include <QSqlDatabase>
#include <QListWidgetItem>
#include "stable.h"
class SAnaly1 : public QWidget
{
enum E_COLUMN
{
E_COLUMN_DATABASE=0,
E_COLUMN_NAME,
E_COLUMN_DATE,
E_COLUMN_COUNT,
};
enum E_LENGTH_COMP
{
E_LENGTH_COMP_GREATER = 0,
E_LENGTH_COMP_LESS,
E_LENGTH_COMP_EQUAL,
};
Q_OBJECT
public:
SAnaly1(QWidget *parent = 0);
~SAnaly1();
int getThread();
unsigned int getDateStart();
unsigned int getDateEnd();
int getPeriod();
QStringList getMorphereList();
bool getDateAll();
private:
// Data
QListWidget *m_plwData;
// Date
QCalendarWidget *m_pcw;
QDateEdit *m_pdeStart;
QDateEdit *m_pdeEnd;
//QPushButton *m_ppbInsertCalc;
//QComboBox *m_pcbDateCatalog;
int m_nColumn;
// Keyword
QComboBox *m_pcbCatalog;
QComboBox *m_pcbKeyword;
QComboBox *m_pcbMethod;
QLineEdit *m_pleString;
//QPushButton *m_ppbInsertSearch;
// Length
QComboBox *m_pcbLengthCatalog;
QComboBox *m_pcbLengthComp;
QComboBox *m_pcbLengthInsDel;
QLineEdit *m_pleLength;
// Filter
QListWidget *m_plwFilterGroup;
QListWidget *m_plwFilter;
QLineEdit *m_pleFilterGroup;
QGroupBox *m_pgbFilter;
// Replace
QComboBox *m_pcbReplaceCatalog;
QComboBox *m_pcbReplaceFind;
QLineEdit *m_pleReplaceFind;
QLineEdit *m_pleReplace;
// Count
QComboBox *m_pcbCountCatalog;
//
QTabWidget *m_ptwData;
//
QProgressBar *m_pProgress;
// Column
QVector <QStringList> m_vecColumn;
// MorphereList
QListWidget *m_plwMorphereList;
QListWidget *m_plwMorphereAdd;
QComboBox *m_pcbDate;
QComboBox *m_pcbPeriod;
QLineEdit *m_pleTop;
QLineEdit *m_pleKeyword;
QComboBox *m_pcbThread;
QComboBox *m_pcbSort;
QListWidgetItem **m_plwiMorphere;
QStringList m_strlistMorphere;
QStringList m_strlistMorphereko;
QRadioButton *m_rbDateMorphere;
QRadioButton *m_rbMorphereDate;
QButtonGroup *m_bgRadioGroup;
public:
struct m_mapKey
{
QString strDate;
QString strMorphere;
QString strKeyword;
};
//QHash<m_mapKey, int> m_HashResult[16];
QHash<QString, int> m_HashResult[16];
QMap<QString, int> m_TotalResult;
QMap<QString, QMap<QString, QMap<int, QString> > > m_mapViewResult;
private:
QMenuBar *setMenuWidget();
QGroupBox *setDataWidgets();
QGroupBox *setDateWidgets();
QGroupBox *setCountWidgets();
QGroupBox *setSearchWidgets();
QGroupBox *setLengthWidgets();
QGroupBox *setFilterWidgets();
QGroupBox *setReplaceWidgets();
QGroupBox *setMorphereList();
QGroupBox *setDate();
QGroupBox *setFilter();
QGroupBox *setOther();
QGroupBox *setSort();
QTableWidget *AddTable(QString _str);
void SetTableHead();
void InsertCopyRow(int _nRow,QTableWidget *_pCurrent,QTableWidget *_pNew);
void DataReload(QString _strTableName,int _nSelect);
void InsertFilter(int _nType,QString _strJson,int _nGroup);
void InsertTimeFilter(int _nTimeCategory ,QDate _dateStart ,QDate _dateEnd ,int _nGroup);
void InsertSearchFilter(int _nArticle,int _nCategory,int _nMethod,int _nKeyword, QString _str,int _nGroup);
void InsertLengthFilter(int _nArticle,int _nCategory,int _nComp,int _nInsDel,QString _str,int _nGroup);
void InsertReplaceFilter(int _nArticle,int _nCategory,int _nFind,QString _strFind,QString _strReplace,int _nGroup);
void RefreshFilter(int _nGroup);
bool ReloadColumn();
QString GetArticleType(int _nSelect);
public slots:
void CloseTab(int index);
void DoubleClickTab(int index);
void SearchDate();
void CountSave();
void SearchKeyword();
void DataGroupRefresh();
void DataGroupItemChanged ( QListWidgetItem * item );
void FilterGroupInsert();
void FilterGroupDelete();
void FilterGroupModify();
void FilterGroupRefresh();
void currentGroupItemChanged(QListWidgetItem *_pCurrent, QListWidgetItem *_pPrev=0);
void FilterDelete();
void currentFilterItemChanged(QListWidgetItem *_pCurrent, QListWidgetItem *_pPrev=0);
void SearchLengthInsert();
void SearchReplaceInsert();
void FileNew();
void FileImport();
void FileExport();
void FileExit();
void MemClear();
void MorphereListAdd();
void MorphereListDel();
QTabWidget* getQTabWidget();
bool isSortMorphereDateChecked();
bool isSortDateMorphereChecked();
};
#endif // SANALY1

View File

@@ -0,0 +1,393 @@
#include "sanalyzer.h"
AnalyzerThread::AnalyzerThread(QObject *parent) : QThread(parent), m_nThread(1), m_pWidget(NULL), m_nSource(0), m_bPeriod(false), m_nPeriod(0)
{
}
AnalyzerThread::AnalyzerThread(int number)
{
}
AnalyzerThread::AnalyzerThread(QMutex &_mutex, int _number, int &_nCount)
{
AnalyzerThread();
num = _number;
mutex = &_mutex;
nCount = &_nCount;
}
void AnalyzerThread::setThreadNumber(int _nThread)
{
m_nThread = _nThread;
}
int AnalyzerThread::getThreadNumber()
{
return m_nThread;
}
void AnalyzerThread::setWidget(QWidget *_pWidget)
{
m_pWidget = (Widget*)_pWidget;
m_pTabWidget = m_pWidget->GetTabWidget();
}
QWidget* AnalyzerThread::getWidget()
{
return m_pQWidget;
}
void AnalyzerThread::setMorphereList(const QStringList morpherelist)
{
m_slMorphereList.append(morpherelist);
}
void AnalyzerThread::setMorphereList(const QString morphere)
{
m_slMorphereList.append(morphere);
}
void AnalyzerThread::resetMorphereList()
{
m_slMorphereList.clear();
}
void AnalyzerThread::setParametersfromWidget(QWidget *_pWidget, int _nSource)
{
resetMorphereList();
if(_nSource == 0)
{
pWidget = (SAnaly1*)_pWidget;
setThreadNumber(pWidget->getThread());
setPeriod(pWidget->getPeriod());
m_bDateAll = pWidget->getDateAll();
m_nDateStart = pWidget->getDateStart();
m_nDateEnd = pWidget->getDateEnd();
setMorphereList(pWidget->getMorphereList());
}
}
QStringList AnalyzerThread::getMorphereList()
{
return m_slMorphereList;
}
void AnalyzerThread::setPeriod(const int _nPeriod)
{
m_nPeriod = _nPeriod;
}
int AnalyzerThread::getPeriod()
{
return m_nPeriod;
}
void AnalyzerThread::setDate(const QDateTime _dtStart, const QDateTime _dtEnd)
{
m_nDateStart = _dtStart.toString("yyyy-MM-dd").replace("-","").toInt();
m_nDateEnd = _dtEnd.toString("yyyy-MM-dd").replace("-","").toInt();
}
void AnalyzerThread::setDate(const QString _strStart, const QString _strEnd)
{
m_nDateStart = _strStart.toInt();
m_nDateEnd = _strEnd.toInt();
}
unsigned int AnalyzerThread::getDateStart()
{
return m_nDateStart;
}
unsigned int AnalyzerThread::getDateEnd()
{
return m_nDateEnd;
}
void AnalyzerThread::setTitleBody(const int _n)
{
m_nTitleBody = _n;
}
void AnalyzerThread::setPosDate(const int _nPos)
{
m_nPosDate = _nPos;
}
void AnalyzerThread::setPosBody(const int _nPos)
{
m_nPosBody = _nPos;
}
void AnalyzerThread::setPosTitle(const int _nPos)
{
m_nPosTitle = _nPos;
}
void AnalyzerThread::run()
{
typedef mecab_t* (*mecab_new_fun)(int,char**);
typedef const char* (*mecab_sparse_tostr_fun)(mecab_t *mecab, const char *str);
typedef void (*mecab_destroy_fun)(mecab_t *mecab);
char *t[] = {"RRR","-d","dic"};
int int_t = 3;
// Create tagger object
mecab_t *mecab;
mecab_new_fun mecab_new = (mecab_new_fun)QLibrary::resolve("libmecab.dll","mecab_new");
mecab_sparse_tostr_fun mecab_sparse_tostr = (mecab_sparse_tostr_fun)QLibrary::resolve("libmecab.dll","mecab_sparse_tostr");
mecab = mecab_new(int_t, t);
mecab_destroy_fun mecab_destroy = (mecab_destroy_fun)QLibrary::resolve("libmecab.dll","mecab_destroy");
//qDebug() << num;
STable *pCurrent = (STable *)m_pTabWidget->currentWidget();
int nDate = 0;
while(true)
{
QString strTitle;
QString strBody;
QString strDate;
QString strData;
{
mutex->lock();
if(*nCount >= pCurrent->rowCount())
{
mutex->unlock();
break;
}
strTitle = pCurrent->item((*nCount),m_nPosTitle)->text();
strDate = pCurrent->item((*nCount),m_nPosDate)->text();
strBody = pCurrent->item((*nCount)++,m_nPosBody)->text();
mutex->unlock();
}
{
switch(m_nTitleBody)
{
case 0:
{
strData = strTitle;
break;
}
case 1:
{
strData = strBody;
break;
}
case 2:
{
strData = strTitle + "\n" + strBody;
break;
}
}
strTitle.clear();
strBody.clear();
}
if(strDate.length() < 11)
continue;
strDate = strDate.left(11);
if(strDate.trimmed().length()<10)
{
nDate = 0;
}
else
{
strDate = strDate.replace("-","").trimmed();
if(strDate.length() < 8)
{
nDate = 0;
}
else
nDate = strDate.toInt();
}
if(!m_bDateAll)
{
if(nDate < m_nDateStart || m_nDateEnd < nDate)
continue;
}
SAnaly1::m_mapKey mapkey;
switch(m_nPeriod)
{
case 0:
{
mapkey.strDate = "ALL";
break;
}
case 1:
{
mapkey.strDate = "D" + QString::number(nDate);
break;
}
case 2:
{
/*
QDate tempdate = QDate(nDate/10000, (nDate%10000)/100, nDate%100);
mapkey.strDate = "W" + QString::number(nDate/10000);
if(tempdate.weekNumber() < 10)
mapkey.strDate += "0";
mapkey.strDate += QString::number(tempdate.weekNumber());
*/
mapkey.strDate = getWeeksInMonth(nDate);
break;
}
case 3:
{
mapkey.strDate = "M";
if((nDate/100) < 10)
mapkey.strDate += "0";
mapkey.strDate += QString::number(nDate/100);
break;
}
}
QString strAnalyzedLine = QString::fromStdString(mecab_sparse_tostr(mecab, strData.toStdString().c_str())) + "\n";
QStringList strListAll = strAnalyzedLine.split("\n",QString::SkipEmptyParts);
foreach(QString strLine, strListAll)
{
QStringList strListLine = strLine.split("\t");
if(strListLine.size() < 2)
continue;
QStringList strpumsa = strListLine.at(1).trimmed().split(",");
foreach(QString strMorphere, m_slMorphereList)
{
if(strpumsa.at(0).trimmed().contains(strMorphere,Qt::CaseInsensitive))
{
mapkey.strKeyword = strListLine.at(0);
mapkey.strMorphere = strMorphere;
QString strkey = mapkey.strDate + "~!@" + mapkey.strMorphere + "~!@" + mapkey.strKeyword;
if(pWidget->m_HashResult[num].contains(strkey))
{
(pWidget->m_HashResult[num])[strkey]++;
}
else
{
pWidget->m_HashResult[num].insert(strkey,1);
}
}
}
}
//if(m_bDateAll)
//qDebug() << num << " : " << strData;
}
mecab_destroy(mecab);
/*
for (int nCount = 0 ; nCount < pCurrent->rowCount(); nCount++ )
{
QString strData = pCurrent->item(nCount,m_nPosBody)->text();
qDebug() << strData;
}
for(QHash<QString, int>::iterator iterPos = pWidget->m_HashResult[num].begin(); iterPos != pWidget->m_HashResult[num].end(); iterPos++)
{
qDebug() << num << " : " << iterPos.key() << " ===== " << iterPos.value();
}
foreach(QString str, m_slMorphereList)
{
qDebug() << num << " : " << str;
}
qDebug() << num << " : " << m_bDateAll;
qDebug() << num << " : " << m_bPeriod;
qDebug() << num << " : " << m_nDateStart;
qDebug() << num << " : " << m_nDateEnd;
qDebug() << num << " : " << pWidget->m_HashResult[num].isEmpty();
*/
}
QString AnalyzerThread::getWeeksInMonth(unsigned int _nDate)
{
QDate qToday(_nDate/10000, (_nDate/100)%100, _nDate%100);
if(!qToday.isValid())
return "inVaildDate";
QDate qTodayFirstDay = QDate(qToday.year(), qToday.month(), 1);
QDate qTodayLastDay = QDate(qToday.year(), qToday.month(), qToday.daysInMonth());
int thisFirstDayofWeek = qTodayFirstDay.dayOfWeek();
int thisLastDayofWeek = qTodayLastDay.dayOfWeek();
int thisLastDay = qTodayLastDay.daysInMonth();
int week = 0;
int firstWeekDays = (WEEK - thisFirstDayofWeek) + 1;
QString strWeek = "W";
if(thisFirstDayofWeek < FRIDAY)
{
week = 1;
}
else
{
week = 0;
}
if((firstWeekDays < qToday.day()) && (qToday.day() <= (thisLastDay - thisLastDayofWeek)))
{
week = week + ((qToday.day() - firstWeekDays + WEEK - 1)/WEEK);
}
else if((firstWeekDays >= qToday.day()))
{
if(thisFirstDayofWeek >= FRIDAY)
{
const int DAYS_IN_WEEK = 7;
qToday = qToday.addMonths(-1);
int DaysInMonth = qToday.daysInMonth();
QDate FirstDayOfMonth = qToday;
FirstDayOfMonth.setDate(qToday.year(), qToday.month(), 1);
int WeekCount = DaysInMonth / DAYS_IN_WEEK;
int DaysLeft = DaysInMonth % DAYS_IN_WEEK;
if (DaysLeft > 0) {
WeekCount++;
// Check if the remaining days are split on two weeks
if (FirstDayOfMonth.dayOfWeek() + DaysLeft - 1 > DAYS_IN_WEEK)
WeekCount++;
}
week = WeekCount;
}
}
else
{
if(thisLastDayofWeek < THURSDAY)
{
week = 1;
qToday = qToday.addMonths(1);
}
else
{
week = week + ((qToday.day() - firstWeekDays + WEEK - 1)/WEEK);
}
}
strWeek += qToday.toString("yyyyMM");
strWeek += QString::number(week);
return strWeek;
}
void AnalyzerThread::setAnalyzerThread(QMutex &_mutex, int number, int &_nCount)
{
num = number;
mutex = &_mutex;
nCount = &_nCount;
}

View File

@@ -0,0 +1,93 @@
#ifndef SANALYZER
#define SANALYZER
#endif // SANALYZER
#include <QThread>
#include <QWidget>
#include "widget.h"
#include <QDebug>
#include <QMutex>
#include "sanaly1.h"
#include "mecab.h"
#include <QLibrary>
class AnalyzerThread : public QThread
{
Q_OBJECT
public:
AnalyzerThread(QObject *parent = NULL);
AnalyzerThread(int number);
AnalyzerThread(QMutex &_mutex, int number, int &_nCount);
private:
enum {
MONDAY = 1,
TUESDAY,
WEDNESDAY,
THURSDAY,
FRIDAY,
SATURDAY,
SUNDAY
};
enum
{
WEEK = 7
};
int num;
QWidget *m_pQWidget;
int m_nThread;
QStringList m_slMorphereList;
int m_nSource;
bool m_bPeriod;
bool m_bDateAll;
int m_nPeriod; // 0 none, 1 1day, 2 1week, 3 1month;
unsigned int m_nDateStart;
unsigned int m_nDateEnd;
int m_nTitleBody;
Widget *m_pWidget;
int m_nPosDate;
int m_nPosBody;
int m_nPosTitle;
QMutex *mutex;
SAnaly1 *pWidget;
QTabWidget *m_pTabWidget;
int *nCount;
protected:
void run();
public:
void setThreadNumber(int _nThread = 1);
int getThreadNumber();
void setWidget(QWidget *_pWidget);
QWidget* getWidget();
void setMorphereList(const QStringList morpherelist);
void setMorphereList(const QString morphere);
QStringList getMorphereList();
void resetMorphereList();
void setParametersfromWidget(QWidget *_pWidget, int _nSource);
void setPeriod(const int _nPeriod);
int getPeriod();
void setDate(const QString _strStart, const QString _strEnd);
void setDate(const QDateTime _dtStart, const QDateTime _dtEnd);
void setDateStart(const QString _strStart);
void setDateStart(const QDateTime _dtStart);
void setDateEnd(const QString _strEnd);
void setDateEnd(const QDateTime _dtStart);
unsigned int getDateStart();
unsigned int getDateEnd();
void setTitleBody(const int _n);
void setSource(QWidget* _qwidget);
void setPosDate(const int _nPos);
void setPosBody(const int _nPos);
void setPosTitle(const int _nPos);
QString getWeeksInMonth(unsigned int _nDate);
void setAnalyzerThread(QMutex &_mutex, int number, int &_nCount);
};

View File

@@ -0,0 +1,95 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_SCOPED_PTR_H
#define MECAB_SCOPED_PTR_H
#include <cstring>
#include <string>
namespace MeCab {
template<class T> class scoped_ptr {
private:
T * ptr_;
scoped_ptr(scoped_ptr const &);
scoped_ptr & operator= (scoped_ptr const &);
typedef scoped_ptr<T> this_type;
public:
typedef T element_type;
explicit scoped_ptr(T * p = 0): ptr_(p) {}
virtual ~scoped_ptr() { delete ptr_; }
void reset(T * p = 0) {
delete ptr_;
ptr_ = p;
}
T & operator*() const { return *ptr_; }
T * operator->() const { return ptr_; }
T * get() const { return ptr_; }
};
template<class T> class scoped_array {
private:
T * ptr_;
scoped_array(scoped_array const &);
scoped_array & operator= (scoped_array const &);
typedef scoped_array<T> this_type;
public:
typedef T element_type;
explicit scoped_array(T * p = 0): ptr_(p) {}
virtual ~scoped_array() { delete [] ptr_; }
void reset(T * p = 0) {
delete [] ptr_;
ptr_ = p;
}
T & operator*() const { return *ptr_; }
T * operator->() const { return ptr_; }
T * get() const { return ptr_; }
T & operator[](size_t i) const { return ptr_[i]; }
};
template<class T, int N> class scoped_fixed_array {
private:
T * ptr_;
size_t size_;
scoped_fixed_array(scoped_fixed_array const &);
scoped_fixed_array & operator= (scoped_fixed_array const &);
typedef scoped_fixed_array<T, N> this_type;
public:
typedef T element_type;
explicit scoped_fixed_array()
: ptr_(new T[N]), size_(N) {}
virtual ~scoped_fixed_array() { delete [] ptr_; }
size_t size() const { return size_; }
T & operator*() const { return *ptr_; }
T * operator->() const { return ptr_; }
T * get() const { return ptr_; }
T & operator[](size_t i) const { return ptr_[i]; }
};
class scoped_string: public scoped_array<char> {
public:
explicit scoped_string() { reset_string(""); }
explicit scoped_string(const std::string &str) {
reset_string(str);
}
void reset_string(const std::string &str) {
char *p = new char[str.size() + 1];
std::strcpy(p, str.c_str());
reset(p);
}
void reset_string(const char *str) {
char *p = new char[std::strlen(str) + 1];
std::strcpy(p, str);
reset(p);
}
};
}
#endif

177
MorphereAnalyzer/stable.cpp Normal file
View File

@@ -0,0 +1,177 @@
#include "stable.h"
#include <QKeyEvent>
#include <QApplication>
#include <qclipboard>
#include <QMenu>
#include <QDebug>
#include <QHeaderView>
#include <QMessageBox>
STable::STable(QWidget *parent):
QTableWidget(parent)
{
setSortingEnabled(true);
verticalHeader()->setContextMenuPolicy(Qt::CustomContextMenu);
connect(verticalHeader(),SIGNAL(customContextMenuRequested(QPoint)),SLOT(HeaderContextMenuShow(QPoint)));
connect(this,SIGNAL(cellClicked(int,int)),SLOT(CellClick(int,int)));
setVerticalScrollMode(QAbstractItemView::ScrollPerPixel);
m_nArticle = E_ARTICLE_NONE;
}
void STable::SetHeaderList(QVector <QStringList>* _pvecHead,int _nColumn)
{
setColumnCount(_pvecHead->size());
int i = 0;
foreach(QStringList strList, *_pvecHead )
setHorizontalHeaderItem(i++,new QTableWidgetItem(strList.at(_nColumn)));
}
void STable::keyPressEvent(QKeyEvent* event)
{
// If Ctrl-C typed
if (event->key() == Qt::Key_C && (event->modifiers() & Qt::ControlModifier))
Copy();
if (event->key() == Qt::Key_V && (event->modifiers() & Qt::ControlModifier))
Paste();
if (event->key() == Qt::Key_Delete)
Delete();
}
void STable::menuInsert()
{
int nRow = currentRow();
insertRow(nRow);
for (int i = 0; i < columnCount() ; i++)
setItem(nRow,i,new QTableWidgetItem(" "));
}
void STable::menuDelete()
{
int nCount = 0;
foreach(QModelIndex current,verticalHeader()->selectionModel()->selectedRows())
{
removeRow(current.row()-nCount);
nCount++;
}
}
void STable::HeaderContextMenuShow(const QPoint& pos) // this is a slot
{
QMenu myMenu;
myMenu.addAction("Insert", this, SLOT(menuInsert()));
myMenu.addAction("Delete", this, SLOT(menuDelete()));
myMenu.exec(mapToGlobal(pos));
}
void STable::CellClick(int nRow,int nPrev)
{
Q_UNUSED(nPrev);
if (rowHeight(nRow) == 30)
resizeRowToContents(nRow);
else
setRowHeight(nRow,30);
repaint();
}
void STable::Copy()
{
QModelIndexList cells = selectedIndexes();
//qSort(cells);
QString text;
int currentRow = 0;
foreach (const QModelIndex& cell, cells)
{
if (text.length() == 0)
{
}
else if (cell.row() != currentRow)
text += '\n';
else
text += '\t';
currentRow = cell.row();
text += cell.data().toString();
}
QApplication::clipboard()->setText(text);
}
void STable::Paste()
{
//if(qApp->clipboard()->mimeData()->hasHtml())
{
// TODO, parse the html data
}
//else
{
QModelIndexList cells = selectedIndexes();
qSort(cells);
if (cells.size() == 0) return;
QString str = QApplication::clipboard()->text();
QStringList strRows = str.split("\n");
if (strRows.size() == 0) return;
int nStartRow = cells.at(0).row();
int nStartCol = cells.at(0).column();
int nRows = strRows.size();
{
int nPlusRow = nStartRow + nRows - rowCount();
if (nPlusRow > 0)
setRowCount(rowCount()+nPlusRow);
}
int nRow = nStartRow;
foreach(QString strRow,strRows)
{
QStringList strCols = strRow.split("\t");
int nCol = nStartCol;
foreach(QString strCol,strCols)
{
if (nCol >= columnCount()) continue;
QTableWidgetItem *pItem;
if (nRows >= nRows)
pItem = new QTableWidgetItem;
else
pItem = item(nRow,nCol);
pItem->setText(" "+strCol.trimmed()+" ");
setItem(nRow,nCol,pItem);
nCol++;
}
nRow++;
}
}
}
void STable::Delete()
{
QModelIndexList cells = selectedIndexes();
foreach (const QModelIndex& cell, cells)
{
QTableWidgetItem *pItem = item(cell.row(),cell.column());
if (pItem != NULL)
{
pItem->setText("");
setItem(cell.row(),cell.column(),pItem);
}
}
}
void STable::setArticleSelect(int _nArticle)
{
m_nArticle = _nArticle;
}
QString STable::GetArticleType(int _nSelect)
{
switch(_nSelect)
{
case E_ARTICLE_NONE:
return QString("{None} ");
case E_ARTICLE_BODY:
return QString("{Body} ");
case E_ARTICLE_REPLY:
return QString("{Relpy} ");
case E_ARTICLE_ALL:
return QString("{All} ");
}
return QString("{Other} ");
}

39
MorphereAnalyzer/stable.h Normal file
View File

@@ -0,0 +1,39 @@
#ifndef STABLE_H
#define STABLE_H
#include <QTableWidget>
#include <QStringList>
class STable : public QTableWidget
{
Q_OBJECT
public:
enum E_ARTICLE
{
E_ARTICLE_NONE = -1,
E_ARTICLE_ALL = 0,
E_ARTICLE_BODY = 1,
E_ARTICLE_REPLY = 2,
};
explicit STable(QWidget *parent = 0);
void keyPressEvent(QKeyEvent* event);
void SetHeaderList(QVector <QStringList> *_vecColumn,int _nColumn);
void setArticleSelect(int _nArticle);
int getArticleSelect(){return m_nArticle;}
void Copy();
void Paste();
void Delete();
static QString GetArticleType(int _nSelect);
private:
int m_nArticle;
public:
QStringList m_strListHeader;
signals:
public slots:
void HeaderContextMenuShow(const QPoint& pos);
void menuInsert();
void menuDelete();
void CellClick(int nRow,int nPrev);
};
#endif // STABLE_H

View File

@@ -0,0 +1,55 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_STREAM_WRAPPER_H_
#define MECAB_STREAM_WRAPPER_H_
#include <cstring>
#include <fstream>
#include <iostream>
#include "utils.h"
namespace MeCab {
class istream_wrapper {
private:
std::istream* is_;
public:
std::istream &operator*() const { return *is_; }
std::istream *operator->() const { return is_; }
explicit istream_wrapper(const char* filename): is_(0) {
if (std::strcmp(filename, "-") == 0) {
is_ = &std::cin;
} else {
is_ = new std::ifstream(WPATH(filename));
}
}
virtual ~istream_wrapper() {
if (is_ != &std::cin) delete is_;
}
};
class ostream_wrapper {
private:
std::ostream* os_;
public:
std::ostream &operator*() const { return *os_; }
std::ostream *operator->() const { return os_; }
explicit ostream_wrapper(const char* filename): os_(0) {
if (std::strcmp(filename, "-") == 0) {
os_ = &std::cout;
} else {
os_ = new std::ofstream(WPATH(filename));
}
}
virtual ~ostream_wrapper() {
if (os_ != &std::cout) delete os_;
}
};
}
#endif // MECAB_STREAM_WRAPPER_H_

View File

@@ -0,0 +1,74 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_STRINGBUFFER_H
#define MECAB_STRINGBUFFER_H
#include <string>
#include "common.h"
#include "utils.h"
namespace MeCab {
#define _ITOA(n) do { char fbuf[64]; itoa(n, fbuf); return this->write(fbuf); } while (0)
#define _UITOA(n) do { char fbuf[64]; uitoa(n, fbuf); return this->write(fbuf);} while (0)
#define _DTOA(n) do { char fbuf[64]; dtoa(n, fbuf); return this->write(fbuf); } while (0)
class StringBuffer {
private:
size_t size_;
size_t alloc_size_;
char *ptr_;
bool is_delete_;
bool error_;
bool reserve(size_t);
public:
explicit StringBuffer(): size_(0), alloc_size_(0),
ptr_(0), is_delete_(true), error_(false) {}
explicit StringBuffer(char *_s, size_t _l):
size_(0), alloc_size_(_l), ptr_(_s),
is_delete_(false), error_(false) {}
virtual ~StringBuffer();
StringBuffer& write(char);
StringBuffer& write(const char*, size_t);
StringBuffer& write(const char*);
StringBuffer& operator<<(double n) { _DTOA(n); }
StringBuffer& operator<<(short int n) { _ITOA(n); }
StringBuffer& operator<<(int n) { _ITOA(n); }
StringBuffer& operator<<(long int n) { _ITOA(n); }
StringBuffer& operator<<(unsigned short int n) { _UITOA(n); }
StringBuffer& operator<<(unsigned int n) { _UITOA(n); }
StringBuffer& operator<<(unsigned long int n) { _UITOA(n); }
#ifdef HAVE_UNSIGNED_LONG_LONG_INT
StringBuffer& operator<<(unsigned long long int n) { _UITOA(n); }
#endif
StringBuffer& operator<< (char n) {
return this->write(n);
}
StringBuffer& operator<< (unsigned char n) {
return this->write(n);
}
StringBuffer& operator<< (const char* n) {
return this->write(n);
}
StringBuffer& operator<< (const std::string& n) {
return this->write(n.c_str());
}
void clear() { size_ = 0; }
const char *str() const {
return error_ ? 0 : const_cast<const char*>(ptr_);
}
};
}
#endif

189
MorphereAnalyzer/thread.h Normal file
View File

@@ -0,0 +1,189 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_THREAD_H
#define MECAB_THREAD_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_PTHREAD_H
#include <pthread.h>
#else
#ifdef _WIN32
#include <windows.h>
#include <process.h>
#endif
#endif
#if defined HAVE_GCC_ATOMIC_OPS || defined HAVE_OSX_ATOMIC_OPS
#include <sched.h>
#endif
#if defined HAVE_OSX_ATOMIC_OPS
#include <libkern/OSAtomic.h>
#endif
#if defined HAVE_PTHREAD_H
#define MECAB_USE_THREAD 1
#endif
#if (defined(_WIN32) && !defined(__CYGWIN__))
#define MECAB_USE_THREAD 1
#define BEGINTHREAD(src, stack, func, arg, flag, id) \
(HANDLE)_beginthreadex((void *)(src), (unsigned)(stack), \
(unsigned(_stdcall *)(void *))(func), (void *)(arg), \
(unsigned)(flag), (unsigned *)(id))
#endif
namespace MeCab {
#if (defined(_WIN32) && !defined(__CYGWIN__))
#undef atomic_add
#undef compare_and_swap
#undef yield_processor
#define atomic_add(a, b) ::InterlockedExchangeAdd(a, b)
#define compare_and_swap(a, b, c) ::InterlockedCompareExchange(a, c, b)
#define yield_processor() YieldProcessor()
#define HAVE_ATOMIC_OPS 1
#endif
#ifdef HAVE_GCC_ATOMIC_OPS
#undef atomic_add
#undef compare_and_swap
#undef yield_processor
#define atomic_add(a, b) __sync_add_and_fetch(a, b)
#define compare_and_swap(a, b, c) __sync_val_compare_and_swap(a, b, c)
#define yield_processor() sched_yield()
#define HAVE_ATOMIC_OPS 1
#endif
#ifdef HAVE_OSX_ATOMIC_OPS
#undef atomic_add
#undef compare_and_swap
#undef yield_processor
#define atomic_add(a, b) OSAtomicAdd32(b, a)
#define compare_and_swap(a, b, c) OSAtomicCompareAndSwapInt(b, c, a)
#define yield_processor() sched_yield()
#define HAVE_ATOMIC_OPS 1
#endif
#ifdef HAVE_ATOMIC_OPS
// This is a simple non-scalable writer-preference lock.
// Slightly modified the following paper.
// "Scalable Reader-Writer Synchronization for Shared-Memory Multiprocessors"
// PPoPP '91. John M. Mellor-Crummey and Michael L. Scott. T
class read_write_mutex {
public:
inline void write_lock() {
atomic_add(&write_pending_, 1);
while (compare_and_swap(&l_, 0, kWaFlag)) {
yield_processor();
}
}
inline void read_lock() {
while (write_pending_ > 0) {
yield_processor();
}
atomic_add(&l_, kRcIncr);
while ((l_ & kWaFlag) != 0) {
yield_processor();
}
}
inline void write_unlock() {
atomic_add(&l_, -kWaFlag);
atomic_add(&write_pending_, -1);
}
inline void read_unlock() {
atomic_add(&l_, -kRcIncr);
}
read_write_mutex(): l_(0), write_pending_(0) {}
private:
static const int kWaFlag = 0x1;
static const int kRcIncr = 0x2;
#ifdef HAVE_OSX_ATOMIC_OPS
volatile int l_;
volatile int write_pending_;
#else
long l_;
long write_pending_;
#endif
};
class scoped_writer_lock {
public:
scoped_writer_lock(read_write_mutex *mutex) : mutex_(mutex) {
mutex_->write_lock();
}
~scoped_writer_lock() {
mutex_->write_unlock();
}
private:
read_write_mutex *mutex_;
};
class scoped_reader_lock {
public:
scoped_reader_lock(read_write_mutex *mutex) : mutex_(mutex) {
mutex_->read_lock();
}
~scoped_reader_lock() {
mutex_->read_unlock();
}
private:
read_write_mutex *mutex_;
};
#endif // HAVE_ATOMIC_OPS
class thread {
private:
#ifdef HAVE_PTHREAD_H
pthread_t hnd;
#else
#ifdef _WIN32
HANDLE hnd;
#endif
#endif
public:
static void* wrapper(void *ptr) {
thread *p = static_cast<thread *>(ptr);
p->run();
return 0;
}
virtual void run() {}
void start() {
#ifdef HAVE_PTHREAD_H
pthread_create(&hnd, 0, &thread::wrapper,
static_cast<void *>(this));
#else
#ifdef _WIN32
DWORD id;
hnd = BEGINTHREAD(0, 0, &thread::wrapper, this, 0, &id);
#endif
#endif
}
void join() {
#ifdef HAVE_PTHREAD_H
pthread_join(hnd, 0);
#else
#ifdef _WIN32
WaitForSingleObject(hnd, INFINITE);
CloseHandle(hnd);
#endif
#endif
}
virtual ~thread() {}
};
}
#endif

View File

@@ -0,0 +1,134 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2011 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_TOKENIZER_H_
#define MECAB_TOKENIZER_H_
#include "mecab.h"
#include "freelist.h"
#include "dictionary.h"
#include "char_property.h"
#include "nbest_generator.h"
#include "scoped_ptr.h"
namespace MeCab {
class Param;
class NBestGenerator;
template <typename N, typename P>
class Allocator {
public:
N *newNode() {
N *node = node_freelist_->alloc();
std::memset(node, 0, sizeof(N));
node->id = id_++;
return node;
}
P *newPath() {
if (!path_freelist_.get()) {
path_freelist_.reset(new FreeList<P>(PATH_FREELIST_SIZE));
}
return path_freelist_->alloc();
}
Dictionary::result_type *mutable_results() {
return results_.get();
}
char *alloc(size_t size) {
if (!char_freelist_.get()) {
char_freelist_.reset(new ChunkFreeList<char>(BUF_SIZE));
}
return char_freelist_->alloc(size + 1);
}
char *strdup(const char *str, size_t size) {
char *n = alloc(size + 1);
std::strncpy(n, str, size + 1);
return n;
}
NBestGenerator *nbest_generator() {
if (!nbest_generator_.get()) {
nbest_generator_.reset(new NBestGenerator);
}
return nbest_generator_.get();
}
char *partial_buffer(size_t size) {
partial_buffer_.resize(size);
return &partial_buffer_[0];
}
size_t results_size() const {
return kResultsSize;
}
void free() {
id_ = 0;
node_freelist_->free();
if (path_freelist_.get()) {
path_freelist_->free();
}
if (char_freelist_.get()) {
char_freelist_->free();
}
}
Allocator()
: id_(0),
node_freelist_(new FreeList<N>(NODE_FREELIST_SIZE)),
path_freelist_(0),
char_freelist_(0),
nbest_generator_(0),
results_(new Dictionary::result_type[kResultsSize]) {}
virtual ~Allocator() {}
private:
static const size_t kResultsSize = 512;
size_t id_;
scoped_ptr<FreeList<N> > node_freelist_;
scoped_ptr<FreeList<P> > path_freelist_;
scoped_ptr<ChunkFreeList<char> > char_freelist_;
scoped_ptr<NBestGenerator> nbest_generator_;
std::vector<char> partial_buffer_;
scoped_array<Dictionary::result_type> results_;
};
template <typename N, typename P>
class Tokenizer {
private:
std::vector<Dictionary *> dic_;
Dictionary unkdic_;
scoped_string bos_feature_;
scoped_string unk_feature_;
FreeList<DictionaryInfo> dictionary_info_freelist_;
std::vector<std::pair<const Token *, size_t> > unk_tokens_;
DictionaryInfo *dictionary_info_;
CharInfo space_;
CharProperty property_;
size_t max_grouping_size_;
whatlog what_;
public:
N *getBOSNode(Allocator<N, P> *allocator) const;
N *getEOSNode(Allocator<N, P> *allocator) const;
template <bool IsPartial> N *lookup(const char *begin, const char *end,
Allocator<N, P> *allocator,
Lattice *lattice) const;
bool open(const Param &param);
void close();
const DictionaryInfo *dictionary_info() const;
const char *what() { return what_.str(); }
explicit Tokenizer();
virtual ~Tokenizer() { this->close(); }
};
}
#endif // MECAB_TOKENIZER_H_

148
MorphereAnalyzer/ucs.h Normal file
View File

@@ -0,0 +1,148 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_UCS_H
#define MECAB_UCS_H
#ifndef MECAB_USE_UTF8_ONLY
#include "ucstable.h"
#endif
namespace MeCab {
// All internal codes are represented in UCS2,
// if you want to use specific local codes, e.g, big5/euc-kr,
// make a function which maps the local code to the UCS code.
inline unsigned short utf8_to_ucs2(const char *begin, const char *end,
size_t* mblen) {
const size_t len = end - begin;
if (static_cast<unsigned char>(begin[0]) < 0x80) {
*mblen = 1;
return static_cast<unsigned char>(begin[0]);
} else if (len >= 2 && (begin[0] & 0xe0) == 0xc0) {
*mblen = 2;
return((begin[0] & 0x1f) << 6) |(begin[1] & 0x3f);
} else if (len >= 3 && (begin[0] & 0xf0) == 0xe0) {
*mblen = 3;
return ((begin[0] & 0x0f) << 12) |
((begin[1] & 0x3f) << 6) |(begin[2] & 0x3f);
/* belows are out of UCS2 */
} else if (len >= 4 && (begin[0] & 0xf8) == 0xf0) {
*mblen = 4;
return 0;
} else if (len >= 5 && (begin[0] & 0xfc) == 0xf8) {
*mblen = 5;
return 0;
} else if (len >= 6 && (begin[0] & 0xfe) == 0xfc) {
*mblen = 6;
return 0;
} else {
*mblen = 1;
return 0;
}
}
inline unsigned short ascii_to_ucs2(const char *begin, const char *end,
size_t *mblen) {
*mblen = 1;
return static_cast<unsigned char>(begin[0]);
}
inline unsigned short utf16be_to_ucs2(const char *begin, const char *end,
size_t *mblen) {
const size_t len = end - begin;
if (len <= 1) {
*mblen = 1;
return 0;
}
*mblen = 2;
#if defined WORDS_BIGENDIAN
return (begin[0] << 8 | begin[1]);
#else
return (begin[1] << 8 | begin[0]);
#endif
return 0;
}
inline unsigned short utf16le_to_ucs2(const char *begin, const char *end,
size_t *mblen) {
const size_t len = end - begin;
if (len <= 1) {
*mblen = 1;
return 0;
}
*mblen = 2;
#if defined WORDS_BIGENDIAN
return (begin[1] << 8 | begin[0]);
#else
return (begin[0] << 8 | begin[1]);
#endif
}
inline unsigned short utf16_to_ucs2(const char *begin, const char *end,
size_t *mblen) {
#if defined WORDS_BIGENDIAN
return utf16be_to_ucs2(begin, end, mblen);
#else
return utf16le_to_ucs2(begin, end, mblen);
#endif
}
#ifndef MECAB_USE_UTF8_ONLY
inline unsigned short euc_to_ucs2(const char *begin, const char *end,
size_t *mblen) {
const size_t len = end - begin;
// JISX 0212, 0213
if (static_cast<unsigned char>(begin[0]) == 0x8f && len >= 3) {
unsigned short key = (static_cast<unsigned char>(begin[1]) << 8) +
static_cast<unsigned char>(begin[2]);
if (key < 0xA0A0) { // offset violation
*mblen = 1;
return static_cast<unsigned char>(begin[0]);
}
*mblen = 3;
return euc_hojo_tbl[ key - 0xA0A0 ];
// JISX 0208 + 0201
} else if ((static_cast<unsigned char>(begin[0]) & 0x80) && len >= 2) {
*mblen = 2;
return euc_tbl[(static_cast<unsigned char>(begin[0]) << 8) +
static_cast<unsigned char>(begin[1]) ];
} else {
*mblen = 1;
return static_cast<unsigned char>(begin[0]);
}
}
inline unsigned short cp932_to_ucs2(const char *begin, const char *end,
size_t *mblen) {
const size_t len = end - begin;
if ((static_cast<unsigned char>(begin[0]) >= 0xA1 &&
static_cast<unsigned char>(begin[0]) <= 0xDF)) {
*mblen = 1;
return cp932_tbl[static_cast<unsigned char>(begin[0]) ];
} else if ((static_cast<unsigned char>(begin[0]) & 0x80) && len >= 2) {
*mblen = 2;
return cp932_tbl[(static_cast<unsigned char>(begin[0]) << 8)
+ static_cast<unsigned char>(begin[1]) ];
} else {
*mblen = 1;
return static_cast<unsigned char>(begin[0]);
}
}
#endif
}
#endif

155511
MorphereAnalyzer/ucstable.h Normal file

File diff suppressed because it is too large Load Diff

258
MorphereAnalyzer/utils.h Normal file
View File

@@ -0,0 +1,258 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_UTILS_H
#define MECAB_UTILS_H
#include <algorithm>
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include <string>
#include <vector>
#include "common.h"
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_STDINT_H
#include <stdint.h>
#else // HAVE_STDINT_H
#if defined(_WIN32) && !defined(__CYGWIN__)
#if defined(_MSC_VER) && (_MSC_VER <= 1500)
typedef unsigned char uint8_t;
typedef unsigned long uint32_t;
typedef unsigned long long uint64_t;
#else // _MSC_VER
#include <stdint.h>
#endif // _MSC_VER
#else // _WIN32
typedef unsigned char uint8_t;
typedef unsigned long uint32_t;
typedef unsigned __int64 uint64_t;
#endif // _WIN32
#endif // HAVE_STDINT_H
namespace MeCab {
class Param;
enum { EUC_JP, CP932, UTF8, UTF16, UTF16LE, UTF16BE, ASCII };
int decode_charset(const char *charset);
void inline dtoa(double val, char *s) {
std::sprintf(s, "%-16f", val);
char *p = s;
for (; *p != ' '; ++p) {}
*p = '\0';
return;
}
template <class T>
inline void itoa(T val, char *s) {
char *t;
T mod;
if (val < 0) {
*s++ = '-';
val = -val;
}
t = s;
while (val) {
mod = val % 10;
*t++ = static_cast<char>(mod) + '0';
val /= 10;
}
if (s == t) *t++ = '0';
*t = '\0';
std::reverse(s, t);
return;
}
template <class T>
inline void uitoa(T val, char *s) {
char *t;
T mod;
t = s;
while (val) {
mod = val % 10;
*t++ = static_cast<char>(mod) + '0';
val /= 10;
}
if (s == t) *t++ = '0';
*t = '\0';
std::reverse(s, t);
return;
}
inline const char *read_ptr(const char **ptr, size_t size) {
const char *r = *ptr;
*ptr += size;
return r;
}
template <class T>
inline void read_static(const char **ptr, T& value) {
const char *r = read_ptr(ptr, sizeof(T));
memcpy(&value, r, sizeof(T));
}
bool file_exists(const char *filename);
int load_request_type(const Param &param);
bool load_dictionary_resource(Param *);
bool escape_csv_element(std::string *w);
void enum_csv_dictionaries(const char *path,
std::vector<std::string> *dics);
int progress_bar(const char* message, size_t current, size_t total);
bool toLower(std::string *);
std::string create_filename(const std::string &path,
const std::string &file);
void remove_filename(std::string *s);
void remove_pathname(std::string *s);
void replace_string(std::string *s,
const std::string &src,
const std::string &dst);
template <class Iterator>
inline size_t tokenizeCSV(char *str,
Iterator out, size_t max) {
char *eos = str + std::strlen(str);
char *start = 0;
char *end = 0;
size_t n = 0;
for (; str < eos; ++str) {
// skip white spaces
while (*str == ' ' || *str == '\t') ++str;
if (*str == '"') {
start = ++str;
end = start;
for (; str < eos; ++str) {
if (*str == '"') {
str++;
if (*str != '"')
break;
}
*end++ = *str;
}
str = std::find(str, eos, ',');
} else {
start = str;
str = std::find(str, eos, ',');
end = str;
}
if (max-- > 1) *end = '\0';
*out++ = start;
++n;
if (max == 0) break;
}
return n;
}
template <class Iterator>
inline size_t tokenize(char *str, const char *del,
Iterator out, size_t max) {
char *stre = str + std::strlen(str);
const char *dele = del + std::strlen(del);
size_t size = 0;
while (size < max) {
char *n = std::find_first_of(str, stre, del, dele);
*n = '\0';
*out++ = str;
++size;
if (n == stre) break;
str = n + 1;
}
return size;
}
// continus run of space is regarded as one space
template <class Iterator>
inline size_t tokenize2(char *str, const char *del,
Iterator out, size_t max) {
char *stre = str + std::strlen(str);
const char *dele = del + std::strlen(del);
size_t size = 0;
while (size < max) {
char *n = std::find_first_of(str, stre, del, dele);
*n = '\0';
if (*str != '\0') {
*out++ = str;
++size;
}
if (n == stre) break;
str = n + 1;
}
return size;
}
inline double logsumexp(double x, double y, bool flg) {
#define MINUS_LOG_EPSILON 50
if (flg) return y; // init mode
double vmin = std::min<double>(x, y);
double vmax = std::max<double>(x, y);
if (vmax > vmin + MINUS_LOG_EPSILON) {
return vmax;
} else {
return vmax + std::log(std::exp(vmin - vmax) + 1.0);
}
}
inline short int tocost(double d, int n) {
static const short max = +32767;
static const short min = -32767;
return static_cast<short>(std::max<double>(
std::min<double>(
-n * d,
static_cast<double>(max)),
static_cast<double>(min)) );
}
inline char getEscapedChar(const char p) {
switch (p) {
case '0': return '\0';
case 'a': return '\a';
case 'b': return '\b';
case 't': return '\t';
case 'n': return '\n';
case 'v': return '\v';
case 'f': return '\f';
case 'r': return '\r';
case 's': return ' ';
case '\\': return '\\';
default: break;
}
return '\0'; // never be here
}
// return 64 bit hash
uint64_t fingerprint(const char *str, size_t size);
uint64_t fingerprint(const std::string &str);
#if defined(_WIN32) && !defined(__CYGWIN__)
std::wstring Utf8ToWide(const std::string &input);
std::string WideToUtf8(const std::wstring &input);
#endif
}
#endif

View File

@@ -0,0 +1,53 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_VITERBI_H_
#define MECAB_VITERBI_H_
#include <vector>
#include "mecab.h"
#include "thread.h"
namespace MeCab {
class Lattice;
class Param;
class Connector;
template <typename N, typename P> class Tokenizer;
class Viterbi {
public:
bool open(const Param &param);
bool analyze(Lattice *lattice) const;
const Tokenizer<Node, Path> *tokenizer() const;
const Connector *connector() const;
const char *what() { return what_.str(); }
static bool buildResultForNBest(Lattice *lattice);
Viterbi();
virtual ~Viterbi();
private:
template <bool IsAllPath, bool IsPartial> bool viterbi(Lattice *lattice) const;
static bool forwardbackward(Lattice *lattice);
static bool initPartial(Lattice *lattice);
static bool initNBest(Lattice *lattice);
static bool buildBestLattice(Lattice *lattice);
static bool buildAllLattice(Lattice *lattice);
static bool buildAlternative(Lattice *lattice);
scoped_ptr<Tokenizer<Node, Path> > tokenizer_;
scoped_ptr<Connector> connector_;
int cost_factor_;
whatlog what_;
};
}
#endif // MECAB_VITERBI_H_

1368
MorphereAnalyzer/widget.cpp Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

150
MorphereAnalyzer/widget.h Normal file
View File

@@ -0,0 +1,150 @@
#ifndef WIDGET_H
#define WIDGET_H
#include <QWidget>
#include <QCalendarWidget>
#include <QDateEdit>
#include <QPushButton>
#include <QHBoxLayout>
#include <QComboBox>
#include <QLineEdit>
#include <QGroupBox>
#include <QListWidget>
#include <QTabWidget>
#include <QRadioButton>
#include <QButtonGroup>
#include <QTableWidget>
#include <QProgressBar>
#include <QMenuBar>
#include <QSqlDatabase>
#include "stable.h"
class Widget : public QWidget
{
enum E_COLUMN
{
E_COLUMN_DATABASE=0,
E_COLUMN_NAME,
E_COLUMN_DATE,
E_COLUMN_COUNT,
};
enum E_LENGTH_COMP
{
E_LENGTH_COMP_GREATER = 0,
E_LENGTH_COMP_LESS,
E_LENGTH_COMP_EQUAL,
};
Q_OBJECT
public:
Widget(QWidget *parent = 0);
~Widget();
QTabWidget *GetTabWidget()
{
return m_ptwData;
}
int getDatePosition()
{
return m_nColumn;
}
int getTitlePosition()
{
return m_nTitle;
}
int getBodyPosition()
{
return m_nBody;
}
private:
// Data
QListWidget *m_plwData;
// Date
QCalendarWidget *m_pcw;
QDateEdit *m_pdeStart;
QDateEdit *m_pdeEnd;
//QPushButton *m_ppbInsertCalc;
//QComboBox *m_pcbDateCatalog;
int m_nColumn;
int m_nTitle;
int m_nBody;
// Keyword
QComboBox *m_pcbCatalog;
QComboBox *m_pcbKeyword;
QComboBox *m_pcbMethod;
QLineEdit *m_pleString;
//QPushButton *m_ppbInsertSearch;
// Length
QComboBox *m_pcbLengthCatalog;
QComboBox *m_pcbLengthComp;
QComboBox *m_pcbLengthInsDel;
QLineEdit *m_pleLength;
// Filter
QListWidget *m_plwFilterGroup;
QListWidget *m_plwFilter;
QLineEdit *m_pleFilterGroup;
QGroupBox *m_pgbFilter;
// Replace
QComboBox *m_pcbReplaceCatalog;
QComboBox *m_pcbReplaceFind;
QLineEdit *m_pleReplaceFind;
QLineEdit *m_pleReplace;
QComboBox *m_pcbReplace;
// Count
QComboBox *m_pcbCountCatalog;
//
QTabWidget *m_ptwData;
//
QProgressBar *m_pProgress;
// Column
QVector <QStringList> m_vecColumn;
private:
QMenuBar *setMenuWidget();
QGroupBox *setDataWidgets();
QGroupBox *setDateWidgets();
QGroupBox *setCountWidgets();
QGroupBox *setSearchWidgets();
QGroupBox *setLengthWidgets();
QGroupBox *setFilterWidgets();
QGroupBox *setReplaceWidgets();
QTableWidget *AddTable(QString _str);
void SetTableHead();
void InsertCopyRow(int _nRow,QTableWidget *_pCurrent,QTableWidget *_pNew);
void DataReload(QString _strTableName,int _nSelect);
void InsertFilter(int _nType,QString _strJson,int _nGroup);
void InsertTimeFilter(int _nTimeCategory ,QDate _dateStart ,QDate _dateEnd ,int _nGroup);
void InsertSearchFilter(int _nArticle,int _nCategory,int _nMethod,int _nKeyword, QString _str,int _nGroup);
void InsertLengthFilter(int _nArticle,int _nCategory,int _nComp,int _nInsDel,QString _str,int _nGroup);
void InsertReplaceFilter(int _nArticle,int _nCategory,int _nFind,QString _strFind,QString _strReplace,int _nGroup);
void RefreshFilter(int _nGroup);
bool ReloadColumn();
QString GetArticleType(int _nSelect);
public slots:
void CloseTab(int index);
void DoubleClickTab(int index);
void SearchDate();
void CountSave();
void SearchKeyword();
void DataGroupRefresh();
void DataGroupItemChanged ( QListWidgetItem * item );
void FilterGroupInsert();
void FilterGroupDelete();
void FilterGroupModify();
void FilterGroupRefresh();
void FilterGroupCopyPaste();
void currentGroupItemChanged(QListWidgetItem *_pCurrent, QListWidgetItem *_pPrev=0);
void FilterDelete();
void currentFilterItemChanged(QListWidgetItem *_pCurrent, QListWidgetItem *_pPrev=0);
void SearchLengthInsert();
void SearchReplaceInsert();
void FileNew();
void FileImport();
void FileExport();
void FileExit();
};
#endif // WIDGET_H

View File

@@ -0,0 +1,148 @@
#ifndef WIDGET_H
#define WIDGET_H
#include <QWidget>
#include <QCalendarWidget>
#include <QDateEdit>
#include <QPushButton>
#include <QHBoxLayout>
#include <QComboBox>
#include <QLineEdit>
#include <QGroupBox>
#include <QListWidget>
#include <QTabWidget>
#include <QRadioButton>
#include <QButtonGroup>
#include <QTableWidget>
#include <QProgressBar>
#include <QMenuBar>
#include <QSqlDatabase>
#include "stable.h"
class Widget : public QWidget
{
enum E_COLUMN
{
E_COLUMN_DATABASE=0,
E_COLUMN_NAME,
E_COLUMN_DATE,
E_COLUMN_COUNT,
};
enum E_LENGTH_COMP
{
E_LENGTH_COMP_GREATER = 0,
E_LENGTH_COMP_LESS,
E_LENGTH_COMP_EQUAL,
};
Q_OBJECT
public:
Widget(QWidget *parent = 0);
~Widget();
QTabWidget *GetTabWidget()
{
return m_ptwData;
}
int getDatePosition()
{
return m_nColumn;
}
int getTitlePosition()
{
return m_nTitle;
}
int getBodyPosition()
{
return m_nBody;
}
private:
// Data
QListWidget *m_plwData;
// Date
QCalendarWidget *m_pcw;
QDateEdit *m_pdeStart;
QDateEdit *m_pdeEnd;
//QPushButton *m_ppbInsertCalc;
//QComboBox *m_pcbDateCatalog;
int m_nColumn;
int m_nTitle;
int m_nBody;
// Keyword
QComboBox *m_pcbCatalog;
QComboBox *m_pcbKeyword;
QComboBox *m_pcbMethod;
QLineEdit *m_pleString;
//QPushButton *m_ppbInsertSearch;
// Length
QComboBox *m_pcbLengthCatalog;
QComboBox *m_pcbLengthComp;
QComboBox *m_pcbLengthInsDel;
QLineEdit *m_pleLength;
// Filter
QListWidget *m_plwFilterGroup;
QListWidget *m_plwFilter;
QLineEdit *m_pleFilterGroup;
QGroupBox *m_pgbFilter;
// Replace
QComboBox *m_pcbReplaceCatalog;
QComboBox *m_pcbReplaceFind;
QLineEdit *m_pleReplaceFind;
QLineEdit *m_pleReplace;
// Count
QComboBox *m_pcbCountCatalog;
//
QTabWidget *m_ptwData;
//
QProgressBar *m_pProgress;
// Column
QVector <QStringList> m_vecColumn;
private:
QMenuBar *setMenuWidget();
QGroupBox *setDataWidgets();
QGroupBox *setDateWidgets();
QGroupBox *setCountWidgets();
QGroupBox *setSearchWidgets();
QGroupBox *setLengthWidgets();
QGroupBox *setFilterWidgets();
QGroupBox *setReplaceWidgets();
QTableWidget *AddTable(QString _str);
void SetTableHead();
void InsertCopyRow(int _nRow,QTableWidget *_pCurrent,QTableWidget *_pNew);
void DataReload(QString _strTableName,int _nSelect);
void InsertFilter(int _nType,QString _strJson,int _nGroup);
void InsertTimeFilter(int _nTimeCategory ,QDate _dateStart ,QDate _dateEnd ,int _nGroup);
void InsertSearchFilter(int _nArticle,int _nCategory,int _nMethod,int _nKeyword, QString _str,int _nGroup);
void InsertLengthFilter(int _nArticle,int _nCategory,int _nComp,int _nInsDel,QString _str,int _nGroup);
void InsertReplaceFilter(int _nArticle,int _nCategory,int _nFind,QString _strFind,QString _strReplace,int _nGroup);
void RefreshFilter(int _nGroup);
bool ReloadColumn();
QString GetArticleType(int _nSelect);
public slots:
void CloseTab(int index);
void DoubleClickTab(int index);
void SearchDate();
void CountSave();
void SearchKeyword();
void DataGroupRefresh();
void DataGroupItemChanged ( QListWidgetItem * item );
void FilterGroupInsert();
void FilterGroupDelete();
void FilterGroupModify();
void FilterGroupRefresh();
void currentGroupItemChanged(QListWidgetItem *_pCurrent, QListWidgetItem *_pPrev=0);
void FilterDelete();
void currentFilterItemChanged(QListWidgetItem *_pCurrent, QListWidgetItem *_pPrev=0);
void SearchLengthInsert();
void SearchReplaceInsert();
void FileNew();
void FileImport();
void FileExport();
void FileExit();
};
#endif // WIDGET_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,148 @@
#ifndef WIDGET_H
#define WIDGET_H
#include <QWidget>
#include <QCalendarWidget>
#include <QDateEdit>
#include <QPushButton>
#include <QHBoxLayout>
#include <QComboBox>
#include <QLineEdit>
#include <QGroupBox>
#include <QListWidget>
#include <QTabWidget>
#include <QRadioButton>
#include <QButtonGroup>
#include <QTableWidget>
#include <QProgressBar>
#include <QMenuBar>
#include <QSqlDatabase>
#include "stable.h"
class Widget : public QWidget
{
enum E_COLUMN
{
E_COLUMN_DATABASE=0,
E_COLUMN_NAME,
E_COLUMN_DATE,
E_COLUMN_COUNT,
};
enum E_LENGTH_COMP
{
E_LENGTH_COMP_GREATER = 0,
E_LENGTH_COMP_LESS,
E_LENGTH_COMP_EQUAL,
};
Q_OBJECT
public:
Widget(QWidget *parent = 0);
~Widget();
QTabWidget *GetTabWidget()
{
return m_ptwData;
}
int getDatePosition()
{
return m_nColumn;
}
int getTitlePosition()
{
return m_nTitle;
}
int getBodyPosition()
{
return m_nBody;
}
private:
// Data
QListWidget *m_plwData;
// Date
QCalendarWidget *m_pcw;
QDateEdit *m_pdeStart;
QDateEdit *m_pdeEnd;
//QPushButton *m_ppbInsertCalc;
//QComboBox *m_pcbDateCatalog;
int m_nColumn;
int m_nTitle;
int m_nBody;
// Keyword
QComboBox *m_pcbCatalog;
QComboBox *m_pcbKeyword;
QComboBox *m_pcbMethod;
QLineEdit *m_pleString;
//QPushButton *m_ppbInsertSearch;
// Length
QComboBox *m_pcbLengthCatalog;
QComboBox *m_pcbLengthComp;
QComboBox *m_pcbLengthInsDel;
QLineEdit *m_pleLength;
// Filter
QListWidget *m_plwFilterGroup;
QListWidget *m_plwFilter;
QLineEdit *m_pleFilterGroup;
QGroupBox *m_pgbFilter;
// Replace
QComboBox *m_pcbReplaceCatalog;
QComboBox *m_pcbReplaceFind;
QLineEdit *m_pleReplaceFind;
QLineEdit *m_pleReplace;
// Count
QComboBox *m_pcbCountCatalog;
//
QTabWidget *m_ptwData;
//
QProgressBar *m_pProgress;
// Column
QVector <QStringList> m_vecColumn;
private:
QMenuBar *setMenuWidget();
QGroupBox *setDataWidgets();
QGroupBox *setDateWidgets();
QGroupBox *setCountWidgets();
QGroupBox *setSearchWidgets();
QGroupBox *setLengthWidgets();
QGroupBox *setFilterWidgets();
QGroupBox *setReplaceWidgets();
QTableWidget *AddTable(QString _str);
void SetTableHead();
void InsertCopyRow(int _nRow,QTableWidget *_pCurrent,QTableWidget *_pNew);
void DataReload(QString _strTableName,int _nSelect);
void InsertFilter(int _nType,QString _strJson,int _nGroup);
void InsertTimeFilter(int _nTimeCategory ,QDate _dateStart ,QDate _dateEnd ,int _nGroup);
void InsertSearchFilter(int _nArticle,int _nCategory,int _nMethod,int _nKeyword, QString _str,int _nGroup);
void InsertLengthFilter(int _nArticle,int _nCategory,int _nComp,int _nInsDel,QString _str,int _nGroup);
void InsertReplaceFilter(int _nArticle,int _nCategory,int _nFind,QString _strFind,QString _strReplace,int _nGroup);
void RefreshFilter(int _nGroup);
bool ReloadColumn();
QString GetArticleType(int _nSelect);
public slots:
void CloseTab(int index);
void DoubleClickTab(int index);
void SearchDate();
void CountSave();
void SearchKeyword();
void DataGroupRefresh();
void DataGroupItemChanged ( QListWidgetItem * item );
void FilterGroupInsert();
void FilterGroupDelete();
void FilterGroupModify();
void FilterGroupRefresh();
void currentGroupItemChanged(QListWidgetItem *_pCurrent, QListWidgetItem *_pPrev=0);
void FilterDelete();
void currentFilterItemChanged(QListWidgetItem *_pCurrent, QListWidgetItem *_pPrev=0);
void SearchLengthInsert();
void SearchReplaceInsert();
void FileNew();
void FileImport();
void FileExport();
void FileExit();
};
#endif // WIDGET_H

View File

@@ -0,0 +1,69 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
// Copyright(C) 2001-2011 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#if defined(_WIN32) || defined(__CYGWIN__)
#include <windows.h>
#include <string>
namespace {
class CommandLine {
public:
CommandLine(int argc, wchar_t **argv) : argc_(argc), argv_(0) {
argv_ = new char * [argc_];
for (int i = 0; i < argc_; ++i) {
const std::string arg = WideToUtf8(argv[i]);
argv_[i] = new char[arg.size() + 1];
::memcpy(argv_[i], arg.data(), arg.size());
argv_[i][arg.size()] = '\0';
}
}
~CommandLine() {
for (int i = 0; i < argc_; ++i) {
delete [] argv_[i];
}
delete [] argv_;
}
int argc() const { return argc_; }
char **argv() const { return argv_; }
private:
static std::string WideToUtf8(const std::wstring &input) {
const int output_length = ::WideCharToMultiByte(CP_UTF8, 0,
input.c_str(), -1, NULL, 0,
NULL, NULL);
if (output_length == 0) {
return "";
}
char *input_encoded = new char[output_length + 1];
const int result = ::WideCharToMultiByte(CP_UTF8, 0, input.c_str(), -1,
input_encoded,
output_length + 1, NULL, NULL);
std::string output;
if (result > 0) {
output.assign(input_encoded);
}
delete [] input_encoded;
return output;
}
int argc_;
char **argv_;
};
} // namespace
#define main(argc, argv) wmain_to_main_wrapper(argc, argv)
int wmain_to_main_wrapper(int argc, char **argv);
#if defined(__MINGW32__)
extern "C"
#endif
int wmain(int argc, wchar_t **argv) {
CommandLine cmd(argc, argv);
return wmain_to_main_wrapper(cmd.argc(), cmd.argv());
}
#endif

57
MorphereAnalyzer/writer.h Normal file
View File

@@ -0,0 +1,57 @@
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_WRITER_H_
#define MECAB_WRITER_H_
#include <string>
#include "common.h"
#include "mecab.h"
#include "utils.h"
#include "scoped_ptr.h"
#include "string_buffer.h"
namespace MeCab {
class Param;
class Writer {
public:
Writer();
virtual ~Writer();
bool open(const Param &param);
void close();
bool writeNode(Lattice *lattice,
const char *format,
const Node *node, StringBuffer *s) const;
bool writeNode(Lattice *lattice,
const Node *node,
StringBuffer *s) const;
bool write(Lattice *lattice, StringBuffer *node) const;
const char *what() { return what_.str(); }
private:
scoped_string node_format_;
scoped_string bos_format_;
scoped_string eos_format_;
scoped_string unk_format_;
scoped_string eon_format_;
whatlog what_;
bool writeLattice(Lattice *lattice, StringBuffer *s) const;
bool writeWakati(Lattice *lattice, StringBuffer *s) const;
bool writeNone(Lattice *lattice, StringBuffer *s) const;
bool writeUser(Lattice *lattice, StringBuffer *s) const;
bool writeDump(Lattice *lattice, StringBuffer *s) const;
bool writeEM(Lattice *lattice, StringBuffer *s) const;
bool (Writer::*write_)(Lattice *lattice, StringBuffer *s) const;
};
}
#endif // WRITER_H_