Files
clients/MorphereAnalyzer/utils.h
admin 2e7d343f4a MorphereAnalyzer
git-svn-id: svn://192.168.0.12/source@76 8346c931-da38-4b9b-9d4c-e48b93cbd075
2015-04-17 02:44:11 +00:00

259 lines
5.7 KiB
C++

// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
//
//
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
#ifndef MECAB_UTILS_H
#define MECAB_UTILS_H
#include <algorithm>
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include <string>
#include <vector>
#include "common.h"
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_STDINT_H
#include <stdint.h>
#else // HAVE_STDINT_H
#if defined(_WIN32) && !defined(__CYGWIN__)
#if defined(_MSC_VER) && (_MSC_VER <= 1500)
typedef unsigned char uint8_t;
typedef unsigned long uint32_t;
typedef unsigned long long uint64_t;
#else // _MSC_VER
#include <stdint.h>
#endif // _MSC_VER
#else // _WIN32
typedef unsigned char uint8_t;
typedef unsigned long uint32_t;
typedef unsigned __int64 uint64_t;
#endif // _WIN32
#endif // HAVE_STDINT_H
namespace MeCab {
class Param;
enum { EUC_JP, CP932, UTF8, UTF16, UTF16LE, UTF16BE, ASCII };
int decode_charset(const char *charset);
void inline dtoa(double val, char *s) {
std::sprintf(s, "%-16f", val);
char *p = s;
for (; *p != ' '; ++p) {}
*p = '\0';
return;
}
template <class T>
inline void itoa(T val, char *s) {
char *t;
T mod;
if (val < 0) {
*s++ = '-';
val = -val;
}
t = s;
while (val) {
mod = val % 10;
*t++ = static_cast<char>(mod) + '0';
val /= 10;
}
if (s == t) *t++ = '0';
*t = '\0';
std::reverse(s, t);
return;
}
template <class T>
inline void uitoa(T val, char *s) {
char *t;
T mod;
t = s;
while (val) {
mod = val % 10;
*t++ = static_cast<char>(mod) + '0';
val /= 10;
}
if (s == t) *t++ = '0';
*t = '\0';
std::reverse(s, t);
return;
}
inline const char *read_ptr(const char **ptr, size_t size) {
const char *r = *ptr;
*ptr += size;
return r;
}
template <class T>
inline void read_static(const char **ptr, T& value) {
const char *r = read_ptr(ptr, sizeof(T));
memcpy(&value, r, sizeof(T));
}
bool file_exists(const char *filename);
int load_request_type(const Param &param);
bool load_dictionary_resource(Param *);
bool escape_csv_element(std::string *w);
void enum_csv_dictionaries(const char *path,
std::vector<std::string> *dics);
int progress_bar(const char* message, size_t current, size_t total);
bool toLower(std::string *);
std::string create_filename(const std::string &path,
const std::string &file);
void remove_filename(std::string *s);
void remove_pathname(std::string *s);
void replace_string(std::string *s,
const std::string &src,
const std::string &dst);
template <class Iterator>
inline size_t tokenizeCSV(char *str,
Iterator out, size_t max) {
char *eos = str + std::strlen(str);
char *start = 0;
char *end = 0;
size_t n = 0;
for (; str < eos; ++str) {
// skip white spaces
while (*str == ' ' || *str == '\t') ++str;
if (*str == '"') {
start = ++str;
end = start;
for (; str < eos; ++str) {
if (*str == '"') {
str++;
if (*str != '"')
break;
}
*end++ = *str;
}
str = std::find(str, eos, ',');
} else {
start = str;
str = std::find(str, eos, ',');
end = str;
}
if (max-- > 1) *end = '\0';
*out++ = start;
++n;
if (max == 0) break;
}
return n;
}
template <class Iterator>
inline size_t tokenize(char *str, const char *del,
Iterator out, size_t max) {
char *stre = str + std::strlen(str);
const char *dele = del + std::strlen(del);
size_t size = 0;
while (size < max) {
char *n = std::find_first_of(str, stre, del, dele);
*n = '\0';
*out++ = str;
++size;
if (n == stre) break;
str = n + 1;
}
return size;
}
// continus run of space is regarded as one space
template <class Iterator>
inline size_t tokenize2(char *str, const char *del,
Iterator out, size_t max) {
char *stre = str + std::strlen(str);
const char *dele = del + std::strlen(del);
size_t size = 0;
while (size < max) {
char *n = std::find_first_of(str, stre, del, dele);
*n = '\0';
if (*str != '\0') {
*out++ = str;
++size;
}
if (n == stre) break;
str = n + 1;
}
return size;
}
inline double logsumexp(double x, double y, bool flg) {
#define MINUS_LOG_EPSILON 50
if (flg) return y; // init mode
double vmin = std::min<double>(x, y);
double vmax = std::max<double>(x, y);
if (vmax > vmin + MINUS_LOG_EPSILON) {
return vmax;
} else {
return vmax + std::log(std::exp(vmin - vmax) + 1.0);
}
}
inline short int tocost(double d, int n) {
static const short max = +32767;
static const short min = -32767;
return static_cast<short>(std::max<double>(
std::min<double>(
-n * d,
static_cast<double>(max)),
static_cast<double>(min)) );
}
inline char getEscapedChar(const char p) {
switch (p) {
case '0': return '\0';
case 'a': return '\a';
case 'b': return '\b';
case 't': return '\t';
case 'n': return '\n';
case 'v': return '\v';
case 'f': return '\f';
case 'r': return '\r';
case 's': return ' ';
case '\\': return '\\';
default: break;
}
return '\0'; // never be here
}
// return 64 bit hash
uint64_t fingerprint(const char *str, size_t size);
uint64_t fingerprint(const std::string &str);
#if defined(_WIN32) && !defined(__CYGWIN__)
std::wstring Utf8ToWide(const std::string &input);
std::string WideToUtf8(const std::wstring &input);
#endif
}
#endif