-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathInitialization.cpp
More file actions
84 lines (79 loc) · 3.23 KB
/
Initialization.cpp
File metadata and controls
84 lines (79 loc) · 3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#include"Initialization.h"
/*=================================================================================================*/
std::string DataItem::GetStrWithId(unsigned start_word,unsigned end_word)
{
stringstream ss;
if((int) start_word < 0){cerr<<"wrong start-word : "<<int (start_word)<<endl;}
copy(sentenceVec.begin()+start_word,sentenceVec.begin()+end_word+1, ostream_iterator<string,char>(ss," "));
string r=ss.str();
return r.substr(0,r.length()-1);
}
void DataItem::GetSubStrVec(vector<string>&result)
{
stringstream ss; size_t i(0);
if(this->sepVec.empty()){copy(sentenceVec.begin(),sentenceVec.end(), ostream_iterator<string,char>(ss," "));result.push_back(ss.str());return;}
else if(this->sepVec.size()==1 && (this->sepVec[0]==0 || this->sepVec[0]==sentenceVec.size()-1)) {copy(sentenceVec.begin(),sentenceVec.end(), ostream_iterator<string,char>(ss," "));result.push_back(ss.str());return;}
else{
if(sepVec.front()!=0){copy(sentenceVec.begin(),sentenceVec.begin()+sepVec.front()+1, ostream_iterator<string,char>(ss," "));result.push_back(ss.str());ss.str("");}
for(i=1;i<sepVec.size();++i)
{
copy(sentenceVec.begin()+sepVec[i-1]+1,sentenceVec.begin()+sepVec[i]+1, ostream_iterator<string,char>(ss," "));
result.push_back(ss.str());ss.str("");
}
if(sepVec.back()!=sentenceVec.size()-1){copy(sentenceVec.begin()+sepVec.back()+1,sentenceVec.end(), ostream_iterator<string,char>(ss," "));result.push_back(ss.str());ss.str("");}
}
}
/*==============================================================================================*/
void Resource::LoadKeyWordSet(const char*file)
{
ifstream in(file);
if(!in.is_open()){cerr<<"wrong to open file: "<<file<<'\n';exit(1);}
string line;
string rp1 = "\\[X\\]";
string rp2="\\s";
regex rg1(rp1),rg2(rp2);
string rgx_result("");
boost::match_results<std::string::const_iterator> mr;
bool CHECK1,CHECK2;
while(getline(in,line))
{
rgx_result="";CHECK1 = regex_search(line,mr,rg1);CHECK2=regex_search(line,mr,rg2);
if(!CHECK1 && !CHECK2)keyWordSet.insert(line);
else
{
oriKeyPhrase.push_back(line);
if(CHECK1)
{
boost::regex_replace(back_inserter(rgx_result),line.begin(),line.end(),rg1," (.*) ");
line=rgx_result;rgx_result="";
}
if(CHECK2)
{
boost::regex_replace(back_inserter(rgx_result),line.begin(),line.end(),rg2," ");
line=rgx_result;rgx_result="";
}
keyPhraseVec.push_back("(^| )("+line+")( |$)");
}
}
cerr<<"keyWordSet size is: "<<keyWordSet.size()<<endl;
cerr<<"keyPhraseSet size is: "<<keyPhraseVec.size()<<endl;
}
string Resource::RegexCompile()
{
return "";
}
void Resource::TestKeyPhraseSet(const string& s)
{
vector<string>::iterator it = keyPhraseVec.begin();
boost::match_results<string::const_iterator> mr;
while(it != keyPhraseVec.end()){
if(regex_search(s,mr,regex(*it++)))
cerr<< mr[0] <<endl;
else cerr<<"no"<<endl;
}
}
string half2full(const string&s)
{
using namespace boost;
return replace_all_copy(s,",",",");
}