piyushghai
diff --git a/‎Report/1_star_wordcloud_500k.png‎
276 KB b/‎Report/1_star_wordcloud_500k.png‎
276 KB
diff --git a/‎Report/2_star_wordcloud_500k.png‎
285 KB b/‎Report/2_star_wordcloud_500k.png‎
285 KB
diff --git a/‎Report/3_star_wordcloud_500k.png‎
217 KB b/‎Report/3_star_wordcloud_500k.png‎
217 KB
diff --git a/‎Report/4_star_wordcloud_500k.png‎
279 KB b/‎Report/4_star_wordcloud_500k.png‎
279 KB
diff --git a/‎Report/5_star_wordcloud_500k.png‎
364 KB b/‎Report/5_star_wordcloud_500k.png‎
364 KB
diff --git a/‎Report/report.aux‎
Lines changed: 43 additions & 32 deletions b/‎Report/report.aux‎
Lines changed: 43 additions & 32 deletions
@@ -12,6 +12,7 @@
 \@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Our Mission}{2}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {1.3}About the dataset}{2}}
 \@writefile{toc}{\contentsline {section}{\numberline {2}Exploratory Analysis of Yelp Dataset}{3}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Word Cloud}{3}}
 \@writefile{lot}{\contentsline {table}{\numberline {2.1}{\ignorespaces Categories and sub categories a review belongs to in the Yelp business dataset}}{4}}
 \newlabel{review_cat_table}{{2.1}{4}}
 \@writefile{lof}{\contentsline {figure}{\numberline {2.1}{\ignorespaces Review count v/s category they belong to}}{4}}
@@ -24,39 +25,49 @@
 \newlabel{length_count}{{2.3}{5}}
 \@writefile{lof}{\contentsline {figure}{\numberline {2.4}{\ignorespaces Length of a review v/s count of reviews}}{5}}
 \newlabel{star_distribution}{{2.4}{5}}
-\@writefile{toc}{\contentsline {section}{\numberline {3}Program Description}{6}}
-\@writefile{toc}{\contentsline {section}{\numberline {4}Model Description}{6}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Flow chart showing the models in the workflow}}{7}}
-\newlabel{models_diag}{{4.1}{7}}
-\@writefile{toc}{\contentsline {section}{\numberline {5}Model Evaluation}{8}}
-\@writefile{lot}{\contentsline {table}{\numberline {5.1}{\ignorespaces Baseline model performance metrics}}{8}}
-\newlabel{baseline_performance}{{5.1}{8}}
-\@writefile{lot}{\contentsline {table}{\numberline {5.2}{\ignorespaces TFIDF model performance metrics}}{8}}
-\newlabel{tfidf_performance}{{5.2}{8}}
-\@writefile{lot}{\contentsline {table}{\numberline {5.3}{\ignorespaces Bag of words model performance metrics}}{8}}
-\newlabel{bow_performance}{{5.3}{8}}
-\@writefile{lot}{\contentsline {table}{\numberline {5.4}{\ignorespaces Bigram model performance metrics}}{8}}
-\newlabel{bigram_performance}{{5.4}{8}}
-\@writefile{lot}{\contentsline {table}{\numberline {5.5}{\ignorespaces Trigram model performance metrics}}{8}}
-\newlabel{trigram_performance}{{5.5}{8}}
-\@writefile{lot}{\contentsline {table}{\numberline {5.6}{\ignorespaces Bi and Tri-gram model performance metrics}}{9}}
-\newlabel{bow_performance}{{5.6}{9}}
-\@writefile{lot}{\contentsline {table}{\numberline {5.7}{\ignorespaces LDA model performance metrics}}{9}}
-\newlabel{bow_performance}{{5.7}{9}}
-\@writefile{lot}{\contentsline {table}{\numberline {5.8}{\ignorespaces LDA + Sentiment model performance metrics}}{9}}
-\newlabel{lda_sentiment_performance}{{5.8}{9}}
-\@writefile{toc}{\contentsline {section}{\numberline {6}Coding Contribution}{9}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.1}Data Transformation \ Pre-Processing}{9}}
+\@writefile{lof}{\contentsline {figure}{\numberline {2.5}{\ignorespaces Word Cloud for 1 star rating reviews}}{6}}
+\newlabel{wc1}{{2.5}{6}}
+\@writefile{lof}{\contentsline {figure}{\numberline {2.6}{\ignorespaces Word Cloud for 2 star rating reviews}}{6}}
+\newlabel{wc2}{{2.6}{6}}
+\@writefile{lof}{\contentsline {figure}{\numberline {2.7}{\ignorespaces Word Cloud for 3 star rating reviews}}{6}}
+\newlabel{wc3}{{2.7}{6}}
+\@writefile{lof}{\contentsline {figure}{\numberline {2.8}{\ignorespaces Word Cloud for 4 star rating reviews}}{7}}
+\newlabel{wc4}{{2.8}{7}}
+\@writefile{lof}{\contentsline {figure}{\numberline {2.9}{\ignorespaces Word Cloud for 5 star rating reviews}}{7}}
+\newlabel{wc5}{{2.9}{7}}
+\@writefile{toc}{\contentsline {section}{\numberline {3}Program Description}{8}}
+\@writefile{toc}{\contentsline {section}{\numberline {4}Model Description}{8}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Flow chart showing the models in the workflow}}{9}}
+\newlabel{models_diag}{{4.1}{9}}
+\@writefile{toc}{\contentsline {section}{\numberline {5}Model Evaluation}{10}}
+\@writefile{lot}{\contentsline {table}{\numberline {5.1}{\ignorespaces Baseline model performance metrics}}{10}}
+\newlabel{baseline_performance}{{5.1}{10}}
+\@writefile{lot}{\contentsline {table}{\numberline {5.2}{\ignorespaces TFIDF model performance metrics}}{10}}
+\newlabel{tfidf_performance}{{5.2}{10}}
+\@writefile{lot}{\contentsline {table}{\numberline {5.3}{\ignorespaces Bag of words model performance metrics}}{10}}
+\newlabel{bow_performance}{{5.3}{10}}
+\@writefile{lot}{\contentsline {table}{\numberline {5.4}{\ignorespaces Bigram model performance metrics}}{10}}
+\newlabel{bigram_performance}{{5.4}{10}}
+\@writefile{lot}{\contentsline {table}{\numberline {5.5}{\ignorespaces Trigram model performance metrics}}{10}}
+\newlabel{trigram_performance}{{5.5}{10}}
+\@writefile{lot}{\contentsline {table}{\numberline {5.6}{\ignorespaces Bi and Tri-gram model performance metrics}}{11}}
+\newlabel{bow_performance}{{5.6}{11}}
+\@writefile{lot}{\contentsline {table}{\numberline {5.7}{\ignorespaces LDA model performance metrics}}{11}}
+\newlabel{bow_performance}{{5.7}{11}}
+\@writefile{lot}{\contentsline {table}{\numberline {5.8}{\ignorespaces LDA + Sentiment model performance metrics}}{11}}
+\newlabel{lda_sentiment_performance}{{5.8}{11}}
+\@writefile{toc}{\contentsline {section}{\numberline {6}Coding Contribution}{11}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.1}Data Transformation \ Pre-Processing}{11}}
+\@writefile{lof}{\contentsline {figure}{\numberline {6.1}{\ignorespaces Data Cleanup Flow}}{12}}
+\newlabel{cleanup_diag}{{6.1}{12}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Cleaning}{12}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Creating Training and Testing Corpus}{12}}
+\@writefile{lot}{\contentsline {table}{\numberline {6.1}{\ignorespaces Star rating distribution in the training corpora}}{12}}
+\newlabel{corpus_size}{{6.1}{12}}
 \citation{lda}
-\@writefile{lof}{\contentsline {figure}{\numberline {6.1}{\ignorespaces Data Cleanup Flow}}{10}}
-\newlabel{cleanup_diag}{{6.1}{10}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Cleaning}{10}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Creating Training and Testing Corpus}{10}}
-\@writefile{lot}{\contentsline {table}{\numberline {6.1}{\ignorespaces Star rating distribution in the training corpora}}{10}}
-\newlabel{corpus_size}{{6.1}{10}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.4}LDA Model Development}{11}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.5}Contributions from team members}{11}}
-\@writefile{toc}{\contentsline {section}{\numberline {7}Tools and Technology Stacks}{11}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.4}LDA Model Development}{13}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.5}Contributions from team members}{13}}
+\@writefile{toc}{\contentsline {section}{\numberline {7}Tools and Technology Stacks}{13}}
 \bibcite{yelp}{1}
 \bibcite{yelp_dataset_challenge}{2}
 \bibcite{nltk}{3}