|
12 | 12 | \@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Our Mission}{2}} |
13 | 13 | \@writefile{toc}{\contentsline {subsection}{\numberline {1.3}About the dataset}{2}} |
14 | 14 | \@writefile{toc}{\contentsline {section}{\numberline {2}Exploratory Analysis of Yelp Dataset}{3}} |
| 15 | +\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Word Cloud}{3}} |
15 | 16 | \@writefile{lot}{\contentsline {table}{\numberline {2.1}{\ignorespaces Categories and sub categories a review belongs to in the Yelp business dataset}}{4}} |
16 | 17 | \newlabel{review_cat_table}{{2.1}{4}} |
17 | 18 | \@writefile{lof}{\contentsline {figure}{\numberline {2.1}{\ignorespaces Review count v/s category they belong to}}{4}} |
|
24 | 25 | \newlabel{length_count}{{2.3}{5}} |
25 | 26 | \@writefile{lof}{\contentsline {figure}{\numberline {2.4}{\ignorespaces Length of a review v/s count of reviews}}{5}} |
26 | 27 | \newlabel{star_distribution}{{2.4}{5}} |
27 | | -\@writefile{toc}{\contentsline {section}{\numberline {3}Program Description}{6}} |
28 | | -\@writefile{toc}{\contentsline {section}{\numberline {4}Model Description}{6}} |
29 | | -\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Flow chart showing the models in the workflow}}{7}} |
30 | | -\newlabel{models_diag}{{4.1}{7}} |
31 | | -\@writefile{toc}{\contentsline {section}{\numberline {5}Model Evaluation}{8}} |
32 | | -\@writefile{lot}{\contentsline {table}{\numberline {5.1}{\ignorespaces Baseline model performance metrics}}{8}} |
33 | | -\newlabel{baseline_performance}{{5.1}{8}} |
34 | | -\@writefile{lot}{\contentsline {table}{\numberline {5.2}{\ignorespaces TFIDF model performance metrics}}{8}} |
35 | | -\newlabel{tfidf_performance}{{5.2}{8}} |
36 | | -\@writefile{lot}{\contentsline {table}{\numberline {5.3}{\ignorespaces Bag of words model performance metrics}}{8}} |
37 | | -\newlabel{bow_performance}{{5.3}{8}} |
38 | | -\@writefile{lot}{\contentsline {table}{\numberline {5.4}{\ignorespaces Bigram model performance metrics}}{8}} |
39 | | -\newlabel{bigram_performance}{{5.4}{8}} |
40 | | -\@writefile{lot}{\contentsline {table}{\numberline {5.5}{\ignorespaces Trigram model performance metrics}}{8}} |
41 | | -\newlabel{trigram_performance}{{5.5}{8}} |
42 | | -\@writefile{lot}{\contentsline {table}{\numberline {5.6}{\ignorespaces Bi and Tri-gram model performance metrics}}{9}} |
43 | | -\newlabel{bow_performance}{{5.6}{9}} |
44 | | -\@writefile{lot}{\contentsline {table}{\numberline {5.7}{\ignorespaces LDA model performance metrics}}{9}} |
45 | | -\newlabel{bow_performance}{{5.7}{9}} |
46 | | -\@writefile{lot}{\contentsline {table}{\numberline {5.8}{\ignorespaces LDA + Sentiment model performance metrics}}{9}} |
47 | | -\newlabel{lda_sentiment_performance}{{5.8}{9}} |
48 | | -\@writefile{toc}{\contentsline {section}{\numberline {6}Coding Contribution}{9}} |
49 | | -\@writefile{toc}{\contentsline {subsection}{\numberline {6.1}Data Transformation \ Pre-Processing}{9}} |
| 28 | +\@writefile{lof}{\contentsline {figure}{\numberline {2.5}{\ignorespaces Word Cloud for 1 star rating reviews}}{6}} |
| 29 | +\newlabel{wc1}{{2.5}{6}} |
| 30 | +\@writefile{lof}{\contentsline {figure}{\numberline {2.6}{\ignorespaces Word Cloud for 2 star rating reviews}}{6}} |
| 31 | +\newlabel{wc2}{{2.6}{6}} |
| 32 | +\@writefile{lof}{\contentsline {figure}{\numberline {2.7}{\ignorespaces Word Cloud for 3 star rating reviews}}{6}} |
| 33 | +\newlabel{wc3}{{2.7}{6}} |
| 34 | +\@writefile{lof}{\contentsline {figure}{\numberline {2.8}{\ignorespaces Word Cloud for 4 star rating reviews}}{7}} |
| 35 | +\newlabel{wc4}{{2.8}{7}} |
| 36 | +\@writefile{lof}{\contentsline {figure}{\numberline {2.9}{\ignorespaces Word Cloud for 5 star rating reviews}}{7}} |
| 37 | +\newlabel{wc5}{{2.9}{7}} |
| 38 | +\@writefile{toc}{\contentsline {section}{\numberline {3}Program Description}{8}} |
| 39 | +\@writefile{toc}{\contentsline {section}{\numberline {4}Model Description}{8}} |
| 40 | +\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Flow chart showing the models in the workflow}}{9}} |
| 41 | +\newlabel{models_diag}{{4.1}{9}} |
| 42 | +\@writefile{toc}{\contentsline {section}{\numberline {5}Model Evaluation}{10}} |
| 43 | +\@writefile{lot}{\contentsline {table}{\numberline {5.1}{\ignorespaces Baseline model performance metrics}}{10}} |
| 44 | +\newlabel{baseline_performance}{{5.1}{10}} |
| 45 | +\@writefile{lot}{\contentsline {table}{\numberline {5.2}{\ignorespaces TFIDF model performance metrics}}{10}} |
| 46 | +\newlabel{tfidf_performance}{{5.2}{10}} |
| 47 | +\@writefile{lot}{\contentsline {table}{\numberline {5.3}{\ignorespaces Bag of words model performance metrics}}{10}} |
| 48 | +\newlabel{bow_performance}{{5.3}{10}} |
| 49 | +\@writefile{lot}{\contentsline {table}{\numberline {5.4}{\ignorespaces Bigram model performance metrics}}{10}} |
| 50 | +\newlabel{bigram_performance}{{5.4}{10}} |
| 51 | +\@writefile{lot}{\contentsline {table}{\numberline {5.5}{\ignorespaces Trigram model performance metrics}}{10}} |
| 52 | +\newlabel{trigram_performance}{{5.5}{10}} |
| 53 | +\@writefile{lot}{\contentsline {table}{\numberline {5.6}{\ignorespaces Bi and Tri-gram model performance metrics}}{11}} |
| 54 | +\newlabel{bow_performance}{{5.6}{11}} |
| 55 | +\@writefile{lot}{\contentsline {table}{\numberline {5.7}{\ignorespaces LDA model performance metrics}}{11}} |
| 56 | +\newlabel{bow_performance}{{5.7}{11}} |
| 57 | +\@writefile{lot}{\contentsline {table}{\numberline {5.8}{\ignorespaces LDA + Sentiment model performance metrics}}{11}} |
| 58 | +\newlabel{lda_sentiment_performance}{{5.8}{11}} |
| 59 | +\@writefile{toc}{\contentsline {section}{\numberline {6}Coding Contribution}{11}} |
| 60 | +\@writefile{toc}{\contentsline {subsection}{\numberline {6.1}Data Transformation \ Pre-Processing}{11}} |
| 61 | +\@writefile{lof}{\contentsline {figure}{\numberline {6.1}{\ignorespaces Data Cleanup Flow}}{12}} |
| 62 | +\newlabel{cleanup_diag}{{6.1}{12}} |
| 63 | +\@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Cleaning}{12}} |
| 64 | +\@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Creating Training and Testing Corpus}{12}} |
| 65 | +\@writefile{lot}{\contentsline {table}{\numberline {6.1}{\ignorespaces Star rating distribution in the training corpora}}{12}} |
| 66 | +\newlabel{corpus_size}{{6.1}{12}} |
50 | 67 | \citation{lda} |
51 | | -\@writefile{lof}{\contentsline {figure}{\numberline {6.1}{\ignorespaces Data Cleanup Flow}}{10}} |
52 | | -\newlabel{cleanup_diag}{{6.1}{10}} |
53 | | -\@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Cleaning}{10}} |
54 | | -\@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Creating Training and Testing Corpus}{10}} |
55 | | -\@writefile{lot}{\contentsline {table}{\numberline {6.1}{\ignorespaces Star rating distribution in the training corpora}}{10}} |
56 | | -\newlabel{corpus_size}{{6.1}{10}} |
57 | | -\@writefile{toc}{\contentsline {subsection}{\numberline {6.4}LDA Model Development}{11}} |
58 | | -\@writefile{toc}{\contentsline {subsection}{\numberline {6.5}Contributions from team members}{11}} |
59 | | -\@writefile{toc}{\contentsline {section}{\numberline {7}Tools and Technology Stacks}{11}} |
| 68 | +\@writefile{toc}{\contentsline {subsection}{\numberline {6.4}LDA Model Development}{13}} |
| 69 | +\@writefile{toc}{\contentsline {subsection}{\numberline {6.5}Contributions from team members}{13}} |
| 70 | +\@writefile{toc}{\contentsline {section}{\numberline {7}Tools and Technology Stacks}{13}} |
60 | 71 | \bibcite{yelp}{1} |
61 | 72 | \bibcite{yelp_dataset_challenge}{2} |
62 | 73 | \bibcite{nltk}{3} |
|
0 commit comments