275 lines
6.1 KiB
C#
275 lines
6.1 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace NewsCrawler
|
|
{
|
|
public class TextCondition
|
|
{
|
|
public enum TYPE
|
|
{
|
|
NOT_MATCHED,
|
|
NEGATIVE,
|
|
POSITIVE,
|
|
POSITIVE_FORCE,
|
|
MANUAL
|
|
}
|
|
|
|
List<Regex> m_Positive = new List<Regex>();
|
|
List<Regex> m_PositiveForce = new List<Regex>();
|
|
List<Regex> m_Negative = new List<Regex>();
|
|
List<Regex> m_Manual = new List<Regex>();
|
|
List<Regex> m_Duplicated = new List<Regex>();
|
|
|
|
public TextCondition()
|
|
{
|
|
LoadAll();
|
|
Test();
|
|
}
|
|
|
|
void Test()
|
|
{
|
|
if(Util.IsDebugging() == false)
|
|
return;
|
|
|
|
Console.WriteLine(Match("다음 주가 상승 기대"));
|
|
Console.WriteLine(Match("네이버 파산 기대"));
|
|
Console.WriteLine(Match("김장철"));
|
|
|
|
Console.WriteLine(Match("15억"));
|
|
Console.WriteLine(Match("33억"));
|
|
|
|
Console.WriteLine(Match("846조"));
|
|
Console.WriteLine(Match("39조"));
|
|
Console.WriteLine(Match("48437조"));
|
|
|
|
Console.WriteLine(Match("1만"));
|
|
Console.WriteLine(Match("10만"));
|
|
Console.WriteLine(Match("100만"));
|
|
Console.WriteLine(Match("200만"));
|
|
Console.WriteLine(Match("500만"));
|
|
Console.WriteLine(Match("1000만"));
|
|
Console.WriteLine(Match("10000만"));
|
|
Console.WriteLine(Match("100000만"));
|
|
Console.WriteLine(Match("1000000만"));
|
|
|
|
Console.WriteLine("test end");
|
|
}
|
|
|
|
public void LoadPositive()
|
|
{
|
|
m_Positive.Clear();
|
|
|
|
string strPath = Util.GetConfigPath() + "/keyword-positive.txt";
|
|
if(File.Exists(strPath) == true)
|
|
{
|
|
string[] aLines = File.ReadAllLines(strPath);
|
|
foreach(string line in aLines)
|
|
{
|
|
if(line.Trim().Length == 0 || line[0] == '#')
|
|
continue;
|
|
|
|
try
|
|
{
|
|
m_Positive.Add(new Regex(line));
|
|
}
|
|
catch(ArgumentException ex)
|
|
{
|
|
Util.Log(Util.LOG_TYPE.ERROR, string.Format("[keyword-positive] 잘못된 키워드 ({0})", ex.Message));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public void LoadPositiveForce()
|
|
{
|
|
m_PositiveForce.Clear();
|
|
|
|
string strPath = Util.GetConfigPath() + "/keyword-positive-force.txt";
|
|
if (File.Exists(strPath) == true)
|
|
{
|
|
string[] aLines = File.ReadAllLines(strPath);
|
|
foreach (string line in aLines)
|
|
{
|
|
if (line.Trim().Length == 0 || line[0] == '#')
|
|
continue;
|
|
|
|
try
|
|
{
|
|
m_PositiveForce.Add(new Regex(line));
|
|
}
|
|
catch (ArgumentException ex)
|
|
{
|
|
Util.Log(Util.LOG_TYPE.ERROR, string.Format("[keyword-positive-force] 잘못된 키워드 ({0})", ex.Message));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public void LoadManual()
|
|
{
|
|
m_Manual.Clear();
|
|
|
|
string strPath = Util.GetConfigPath() + "/keyword-manual.txt";
|
|
if(File.Exists(strPath) == true)
|
|
{
|
|
string[] aLines = File.ReadAllLines(strPath);
|
|
foreach(string line in aLines)
|
|
{
|
|
if(line.Trim().Length == 0 || line[0] == '#')
|
|
continue;
|
|
|
|
try
|
|
{
|
|
m_Manual.Add(new Regex(line));
|
|
}
|
|
catch(ArgumentException ex)
|
|
{
|
|
Util.Log(Util.LOG_TYPE.ERROR, string.Format("[keyword-manual] 잘못된 키워드 ({0})", ex.Message));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public void LoadNegative()
|
|
{
|
|
m_Negative.Clear();
|
|
|
|
string strPath = Util.GetConfigPath() + "/keyword-negative.txt";
|
|
if(File.Exists(strPath) == true)
|
|
{
|
|
string[] aLines = File.ReadAllLines(strPath);
|
|
foreach(string line in aLines)
|
|
{
|
|
if(line.Trim().Length == 0 || line[0] == '#')
|
|
continue;
|
|
|
|
try
|
|
{
|
|
m_Negative.Add(new Regex(line));
|
|
}
|
|
catch(ArgumentException ex)
|
|
{
|
|
Util.Log(Util.LOG_TYPE.ERROR, string.Format("[keyword-negative] 잘못된 키워드 ({0})", ex.Message));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public void LoadDuplicatedKeyword()
|
|
{
|
|
m_Duplicated.Clear();
|
|
|
|
string strPath = Util.GetConfigPath() + "/keyword-duplicated.txt";
|
|
if (File.Exists(strPath) == true)
|
|
{
|
|
string[] aLines = File.ReadAllLines(strPath);
|
|
foreach (string line in aLines)
|
|
{
|
|
if (line.Trim().Length == 0 || line[0] == '#')
|
|
continue;
|
|
|
|
try
|
|
{
|
|
m_Duplicated.Add(new Regex(line));
|
|
}
|
|
catch (ArgumentException ex)
|
|
{
|
|
Util.Log(Util.LOG_TYPE.ERROR, string.Format("[keyword-duplicated] 잘못된 키워드 ({0})", ex.Message));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void LoadAll()
|
|
{
|
|
LoadPositive();
|
|
LoadPositiveForce();
|
|
LoadNegative();
|
|
LoadManual();
|
|
LoadDuplicatedKeyword();
|
|
}
|
|
|
|
public string GetKeywordsCnt()
|
|
{
|
|
return string.Format("Positive: {0}\nPositiveForce: {1}\nNegative: {2}\nManual: {3}",
|
|
m_Positive.Count,
|
|
m_PositiveForce.Count,
|
|
m_Negative.Count,
|
|
m_Manual.Count);
|
|
}
|
|
|
|
public class RESULT
|
|
{
|
|
public TYPE m_enType;
|
|
public string m_strKeyword;
|
|
|
|
public RESULT(TYPE enType, string strKeyword)
|
|
{
|
|
m_enType = enType;
|
|
m_strKeyword = strKeyword;
|
|
}
|
|
|
|
public override string ToString()
|
|
{
|
|
return string.Format("[{0}] {1}", m_enType, m_strKeyword);
|
|
}
|
|
}
|
|
|
|
public RESULT Match(string strText)
|
|
{
|
|
Regex result = m_Negative.Find(s => s.IsMatch(strText));
|
|
if(result != null)
|
|
return new RESULT(TYPE.NEGATIVE, result.ToString());
|
|
|
|
result = m_PositiveForce.Find(s => s.IsMatch(strText));
|
|
if (result != null)
|
|
return new RESULT(TYPE.POSITIVE_FORCE, result.ToString());
|
|
|
|
result = m_Manual.Find(s => s.IsMatch(strText));
|
|
if(result != null)
|
|
return new RESULT(TYPE.MANUAL, result.ToString());
|
|
|
|
result = m_Positive.Find(s => s.IsMatch(strText));
|
|
if(result != null)
|
|
return new RESULT(TYPE.POSITIVE, result.ToString());
|
|
|
|
return new RESULT(TYPE.NOT_MATCHED, "");
|
|
}
|
|
|
|
public bool IsDuplicatedKeyword(string strText)
|
|
{
|
|
Regex result = m_Duplicated.Find(s => s.IsMatch(strText));
|
|
return (result != null);
|
|
}
|
|
|
|
public List<string> GetAllPositive()
|
|
{
|
|
return m_Positive.Select(a => a.ToString()).ToList();
|
|
}
|
|
|
|
public List<string> GetAllPositiveForce()
|
|
{
|
|
return m_PositiveForce.Select(a => a.ToString()).ToList();
|
|
}
|
|
|
|
public List<string> GetAllNegative()
|
|
{
|
|
return m_Negative.Select(a => a.ToString()).ToList();
|
|
}
|
|
|
|
public List<string> GetAllManual()
|
|
{
|
|
return m_Manual.Select(a => a.ToString()).ToList();
|
|
}
|
|
|
|
public List<string> GetAllDuplicated()
|
|
{
|
|
return m_Duplicated.Select(a => a.ToString()).ToList();
|
|
}
|
|
}
|
|
}
|