using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text.RegularExpressions; namespace NewsCrawler { public class TextCondition { public enum TYPE { NOT_MATCHED, NEGATIVE, POSITIVE, POSITIVE_FORCE, MANUAL } List m_Positive = new List(); List m_PositiveForce = new List(); List m_Negative = new List(); List m_Manual = new List(); List m_Duplicated = new List(); public TextCondition() { LoadAll(); Test(); } void Test() { if(Util.IsDebugging() == false) return; Console.WriteLine(Match("다음 주가 상승 기대")); Console.WriteLine(Match("네이버 파산 기대")); Console.WriteLine(Match("김장철")); Console.WriteLine(Match("15억")); Console.WriteLine(Match("33억")); Console.WriteLine(Match("846조")); Console.WriteLine(Match("39조")); Console.WriteLine(Match("48437조")); Console.WriteLine(Match("1만")); Console.WriteLine(Match("10만")); Console.WriteLine(Match("100만")); Console.WriteLine(Match("200만")); Console.WriteLine(Match("500만")); Console.WriteLine(Match("1000만")); Console.WriteLine(Match("10000만")); Console.WriteLine(Match("100000만")); Console.WriteLine(Match("1000000만")); Console.WriteLine("test end"); } public void LoadPositive() { m_Positive.Clear(); string strPath = Util.GetConfigPath() + "/keyword-positive.txt"; if(File.Exists(strPath) == true) { string[] aLines = File.ReadAllLines(strPath); foreach(string line in aLines) { if(line.Trim().Length == 0 || line[0] == '#') continue; try { m_Positive.Add(new Regex(line)); } catch(ArgumentException ex) { Util.Log(Util.LOG_TYPE.ERROR, string.Format("[keyword-positive] 잘못된 키워드 ({0})", ex.Message)); } } } } public void LoadPositiveForce() { m_PositiveForce.Clear(); string strPath = Util.GetConfigPath() + "/keyword-positive-force.txt"; if (File.Exists(strPath) == true) { string[] aLines = File.ReadAllLines(strPath); foreach (string line in aLines) { if (line.Trim().Length == 0 || line[0] == '#') continue; try { m_PositiveForce.Add(new Regex(line)); } catch (ArgumentException ex) { Util.Log(Util.LOG_TYPE.ERROR, string.Format("[keyword-positive-force] 잘못된 키워드 ({0})", ex.Message)); } } } } public void LoadManual() { m_Manual.Clear(); string strPath = Util.GetConfigPath() + "/keyword-manual.txt"; if(File.Exists(strPath) == true) { string[] aLines = File.ReadAllLines(strPath); foreach(string line in aLines) { if(line.Trim().Length == 0 || line[0] == '#') continue; try { m_Manual.Add(new Regex(line)); } catch(ArgumentException ex) { Util.Log(Util.LOG_TYPE.ERROR, string.Format("[keyword-manual] 잘못된 키워드 ({0})", ex.Message)); } } } } public void LoadNegative() { m_Negative.Clear(); string strPath = Util.GetConfigPath() + "/keyword-negative.txt"; if(File.Exists(strPath) == true) { string[] aLines = File.ReadAllLines(strPath); foreach(string line in aLines) { if(line.Trim().Length == 0 || line[0] == '#') continue; try { m_Negative.Add(new Regex(line)); } catch(ArgumentException ex) { Util.Log(Util.LOG_TYPE.ERROR, string.Format("[keyword-negative] 잘못된 키워드 ({0})", ex.Message)); } } } } public void LoadDuplicatedKeyword() { m_Duplicated.Clear(); string strPath = Util.GetConfigPath() + "/keyword-duplicated.txt"; if (File.Exists(strPath) == true) { string[] aLines = File.ReadAllLines(strPath); foreach (string line in aLines) { if (line.Trim().Length == 0 || line[0] == '#') continue; try { m_Duplicated.Add(new Regex(line)); } catch (ArgumentException ex) { Util.Log(Util.LOG_TYPE.ERROR, string.Format("[keyword-duplicated] 잘못된 키워드 ({0})", ex.Message)); } } } } void LoadAll() { LoadPositive(); LoadPositiveForce(); LoadNegative(); LoadManual(); LoadDuplicatedKeyword(); } public string GetKeywordsCnt() { return string.Format("Positive: {0}\nPositiveForce: {1}\nNegative: {2}\nManual: {3}", m_Positive.Count, m_PositiveForce.Count, m_Negative.Count, m_Manual.Count); } public class RESULT { public TYPE m_enType; public string m_strKeyword; public RESULT(TYPE enType, string strKeyword) { m_enType = enType; m_strKeyword = strKeyword; } public override string ToString() { return string.Format("[{0}] {1}", m_enType, m_strKeyword); } } public RESULT Match(string strText) { Regex result = m_Negative.Find(s => s.IsMatch(strText)); if(result != null) return new RESULT(TYPE.NEGATIVE, result.ToString()); result = m_PositiveForce.Find(s => s.IsMatch(strText)); if (result != null) return new RESULT(TYPE.POSITIVE_FORCE, result.ToString()); result = m_Manual.Find(s => s.IsMatch(strText)); if(result != null) return new RESULT(TYPE.MANUAL, result.ToString()); result = m_Positive.Find(s => s.IsMatch(strText)); if(result != null) return new RESULT(TYPE.POSITIVE, result.ToString()); return new RESULT(TYPE.NOT_MATCHED, ""); } public bool IsDuplicatedKeyword(string strText) { Regex result = m_Duplicated.Find(s => s.IsMatch(strText)); return (result != null); } } }