Skip to content

Commit af49d93

Browse files
committed
Bayes implemented, compile using: 'make compilar_bayes'
1 parent 34924d4 commit af49d93

File tree

11 files changed

+25244
-25280
lines changed

11 files changed

+25244
-25280
lines changed

Codigo/Makefile

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
CFLAGS=-g -Wall -std=c++11 -pedantic
1+
CFLAGS=-g -Wall -std=c++11 -pedantic
22
CC=g++
33
VG=valgrind
44
VFLAGS=--leak-check=full --show-reachable=yes
@@ -9,6 +9,8 @@ BOSTFLAGS = -lboost_system -lboost_regex
99
main.o: src/SA.cpp
1010
$(CC) "src/SA.cpp" -c $(CFLAGS)
1111

12+
main_bayes.o: src/bayes_main.cpp
13+
$(CC) "src/bayes_main.cpp" -c $(CFLAGS)
1214

1315
Perceptron.o: src/Perceptron.cpp src/Perceptron.h
1416
$(CC) "src/Perceptron.cpp" -c $(CFLAGS)
@@ -17,5 +19,16 @@ Bayes.o: src/Bayes.cpp src/Bayes.h
1719
$(CC) "src/Bayes.cpp" -c $(CFLAGS)
1820

1921

20-
compilar: Perceptron.o main.o Bayes.o
22+
compilar: Perceptron.o main.o
2123
$(CC) *.o $(CFLAGS) -lm -o ejecutable $(BOSTFLAGS)
24+
25+
26+
compilar_bayes: main_bayes.o Bayes.o
27+
$(CC) *.o $(CFLAGS) -lm -o ejecutable $(BOSTFLAGS)
28+
29+
30+
clean:
31+
find . -name "*.o" -type f -delete
32+
find . -name "*~" -type f -delete
33+
find . -name "a.out" -type f -delete
34+
find . -name "main" -type f -delete

Codigo/Makefile~

Lines changed: 0 additions & 21 deletions
This file was deleted.

Codigo/src/Bayes.cpp

Lines changed: 137 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11

22
#include "Bayes.h"
3-
3+
#include <string>
4+
#include <iostream>
5+
#include <sstream>
6+
#include <fstream>
7+
#include <boost/regex.hpp>
8+
#include <boost/algorithm/string.hpp>
9+
#include <tr1/functional>
10+
#include <locale> // std::locale, std::tolower
11+
#include <math.h>
12+
#include <cmath>
413
/*
514
bool check_positive_negative(review, data){
615
total = 0
@@ -10,22 +19,70 @@ bool check_positive_negative(review, data){
1019
return 1 if total/float(len(review.split())) >= 0.5 else 0
1120
}*/
1221

22+
typedef std::vector<std::vector<std::string> > Rows;
23+
1324

1425
Bayes::Bayes(){
1526

27+
Rows rows;
28+
std::ifstream input("labeledTrainData.tsv");
29+
if (!input) {
30+
std::cout << "unable to load file" << std::endl;
31+
}
32+
char const row_delim = '\n';
33+
char const field_delim = '\t';
34+
std::string firstrow;
35+
getline(input, firstrow, row_delim);
36+
for (std::string row; getline(input, row, row_delim); ) {
37+
rows.push_back(Rows::value_type());
38+
std::istringstream ss(row);
39+
for (std::string field; getline(ss, field, field_delim); ) {
40+
rows.back().push_back(field);
41+
}
42+
}
43+
for (std::vector<std::vector<std::string> >::iterator it = rows.begin(); it !=rows.end(); ++it){
44+
int tag = std::stoi((*it)[1]);
45+
std::string text = (*it)[2];
46+
47+
boost::regex regex("\\w+");
48+
boost::sregex_token_iterator iter(text.begin(), text.end(), regex, 0);
49+
boost::sregex_token_iterator end;
50+
51+
// Tengo que hacer la iteracion una vez antes del for:
52+
std::string pal_ant(*iter);
53+
std::transform(pal_ant.begin(), pal_ant.end(), pal_ant.begin(), ::tolower);
54+
// palabra está en: pal_ant
55+
56+
for( ; iter != end; ++iter ) {
57+
//std::tr1::hash<std::string> hash_fn;
58+
/*std::transform((*iter).begin(), (*iter).end(), (*iter).begin(), ::tolower);*/
59+
std::string temp(*iter);
60+
std::transform(temp.begin(), temp.end(), temp.begin(), ::tolower);
61+
entrenar(tag, temp);
62+
//hash_palabras.push_back(str_hash % dimensiones);
63+
/*if (bigramas){
64+
std::string gram(pal_ant + " " + temp);
65+
// printf("Bigrama %s", gram.c_str());
66+
std::transform(gram.begin(), gram.end(), gram.begin(), ::tolower);
67+
std::size_t gram_hash = hash_fn(gram);
68+
hash_palabras.push_back(gram_hash % dimensiones);
69+
pal_ant = temp;
70+
}*/
71+
}
72+
}
73+
1674
}
1775

1876
Bayes::~Bayes(){
1977

2078
}
2179

22-
void Bayes::entrenar(int tag, std::vector<std::string*>* oracion){
80+
// void Bayes::entrenar(int tag, std::vector<std::string*>* oracion){
81+
void Bayes::entrenar(int tag, std::string word ){
2382
std::string* palabra;
24-
25-
for (unsigned int i = 0; i < oracion->size() ; i++){
2683

2784

28-
palabra = (*oracion)[i];
85+
palabra = &word;
2986
//std::cout << palabra << "\n";
3087

3188

@@ -36,13 +93,84 @@ void Bayes::entrenar(int tag, std::vector<std::string*>* oracion){
3693

3794
}
3895

39-
if(tag > 0){
96+
if(tag == 0){
4097
(*data[*palabra])[0]++;
4198
}else{
4299
(*data[*palabra])[1]++;
43100
}
44-
45-
}
101+
46102
}
47103

48-
double evaluar (std::vector<std::vector<std::string*>*>* oraciones);
104+
double evaluar (std::vector<std::vector<std::string*>*>* oraciones);
105+
106+
std::vector<long double>* Bayes::Predicciones(){
107+
Rows rows;
108+
std::ifstream input("testData.tsv");
109+
if (!input) {
110+
std::cout << "unable to load file" << std::endl;
111+
}
112+
char const row_delim = '\n';
113+
char const field_delim = '\t';
114+
std::string firstrow;
115+
getline(input, firstrow, row_delim);
116+
for (std::string row; getline(input, row, row_delim); ) {
117+
rows.push_back(Rows::value_type());
118+
std::istringstream ss(row);
119+
for (std::string field; getline(ss, field, field_delim); ) {
120+
rows.back().push_back(field);
121+
}
122+
}
123+
std::vector<long double>* pred = new std::vector<long double>();
124+
for (std::vector<std::vector<std::string> >::iterator it = rows.begin(); it !=rows.end(); ++it){
125+
std::string id = (*it)[0];
126+
ids.push_back(id);
127+
128+
std::string text = (*it)[1];
129+
130+
boost::regex regex("\\w+");
131+
boost::sregex_token_iterator iter(text.begin(), text.end(), regex, 0);
132+
boost::sregex_token_iterator end;
133+
134+
// Tengo que hacer la iteracion una vez antes del for:
135+
//std::string pal_ant(*iter);
136+
//std::transform(pal_ant.begin(), pal_ant.end(), pal_ant.begin(), ::tolower);
137+
// palabra está en: pal_ant
138+
139+
long double suma_probas = 0;
140+
float cantidad_palabras = 0; // es una cantidad, pero no quiero castear después
141+
for( ; iter != end; ++iter ) {
142+
//std::tr1::hash<std::string> hash_fn;
143+
/*std::transform((*iter).begin(), (*iter).end(), (*iter).begin(), ::tolower);*/
144+
std::string temp(*iter);
145+
std::transform(temp.begin(), temp.end(), temp.begin(), ::tolower);
146+
147+
//long double proba = ((long double) *data[temp] )[1]/( (long double) *data[temp])[0] + (long double) *data[temp])[1]);
148+
auto it = data.find(temp);
149+
// la clave existe
150+
if (it != data.end()){
151+
long double proba = ((long double)(*data[temp])[1])/(((long double)(*data[temp])[0]) + ((long double)(*data[temp])[1]));
152+
suma_probas += proba;
153+
cantidad_palabras++;
154+
}
155+
156+
//hash_palabras.push_back(str_hash % dimensiones);
157+
/*if (bigramas){
158+
std::string gram(pal_ant + " " + temp);
159+
// printf("Bigrama %s", gram.c_str());
160+
std::transform(gram.begin(), gram.end(), gram.begin(), ::tolower);
161+
std::size_t gram_hash = hash_fn(gram);
162+
hash_palabras.push_back(gram_hash % dimensiones);
163+
pal_ant = temp;
164+
}*/
165+
}
166+
// pred->push_back(suma_probas/cantidad_palabras);
167+
if (cantidad_palabras != 0){
168+
pred->push_back(suma_probas/cantidad_palabras);
169+
} else {
170+
puts("Un error");
171+
pred->push_back(0.5); // no se que está pasando acá
172+
}
173+
}
174+
input.close();
175+
return pred;
176+
}

Codigo/src/Bayes.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,18 @@ class Bayes {
1818

1919
virtual ~Bayes();
2020

21+
std::vector<std::string> ids;
22+
2123
std::unordered_map< std::string, std::vector<int>* > data;
2224

2325
// entrenar, recibimos oraciones. Oraciones entiendo como un string que representa una oracion
2426
// y si es positivo o negativo
2527

26-
void entrenar(int tag, std::vector<std::string*>* oracion);
28+
//void entrenar(int tag, std::vector<std::string*>* oracion);
29+
void entrenar(int tag, std::string word);
2730

2831
double evaluar (std::vector<std::vector<std::string*>*>* oraciones);
32+
std::vector<long double>* Predicciones();
2933

3034
};
3135

0 commit comments

Comments
 (0)