-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathload2.py
More file actions
117 lines (82 loc) · 3.13 KB
/
load2.py
File metadata and controls
117 lines (82 loc) · 3.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# coding: utf-8
from pandas import *
import pandas as pd
import numpy as np
from urllib import urlopen
from bokeh.plotting import *
import scipy.special
from matplotlib import pyplot as plt
print 'Pls wait till the data loads and prints columns'
babyCSV = urlopen("/home/alakshminara/Downloads/2008_births.csv")
DF_baby = read_csv(babyCSV)
columns = DF_baby.columns
print list(DF_baby.columns.values)
DF_sammi = pd.DataFrame
DF_train = pd.DataFrame
DF_test = pd.DataFrame
DF_eval = pd.DataFrame
def usr_load_sammi():
global DF_sammi
DF_sammi = pd.DataFrame(DF_baby[(DF_baby['RACEMOM']==1) & (DF_baby['RACEDAD']==1) &
(DF_baby['MAGE'] > 25) & (DF_baby['BPOUND'] < 20) & (DF_baby['MAGE'] < 50)
& (DF_baby['SEX'] == 2)])
print 'Mean Birth Weight for SammiDF ',DF_sammi['BPOUND'].mean()
print 'Median Birth Weight for SammiDF ', DF_sammi['BPOUND'].median()
def usr_scatter_plot(var1,var2):
figure(title="Dataset of Babies similar to Sammi's Baby",
x_axis_label = var1,
y_axis_label = var2)
# sample the distribution
# compute ideal values
#x = DF_sammi[var1]
# EXERCISE: output to a static HTML file
output_file('plot1.html')
# EXERCISE: turn on plot hold
hold()
scatter(DF_sammi[var1],DF_sammi[var2], marker="square", color="black")#, title="Dataset of Babies similar to Sammi's Baby",xlabel=var1, ylabel=var2)
# Move the legend to a better place.
# Acceptable values: 'top_left', 'top_right', 'bottom_left', and 'bottom_right'
show()
def usr_histogram_plot(var1):
hold(False)
figure(title="Dataset of Babies similar to Sammi's Baby",
x_axis_label = 'Birth Weight (lbs.)')
# sample the distribution
mu, sigma = 6.834,1 # NOTE: you can tinker with these values if you like
# sample the distribution
measured = np.random.normal(mu, sigma, 1000)
hist, edges = np.histogram(measured, density=True, bins=200)
# compute ideal values
x = DF_sammi[var1]
pdf = 1/(sigma * np.sqrt(2*np.pi)) * np.exp(-(x-mu)**2 / (2*sigma**2))
# EXERCISE: output to a static HTML file
output_file('plot.html')
# EXERCISE: turn on plot hold
hold()
# Use the `quad` renderer to display the histogram bars.
quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
fill_color="#036565", line_color="#033649",
# NOTE: these are only needed on the first renderer
title="Dataset of Babies similar to Sammi's Baby",
tools=""
)
# Move the legend to a better place.
# Acceptable values: 'top_left', 'top_right', 'bottom_left', and 'bottom_right'
legend().orientation = "top_left"
show()
def usr_random_splitDF():
DF_temp = pd.DataFrame
rand_nos = np.random.rand(len(DF_baby)) < 0.7
DF_train = DF_baby[rand_nos]
DF_temp = DF_baby[~rand_nos]
rand_nos = np.random.rand(len(DF_temp)) < 0.6
DF_test = DF_temp[rand_nos]
DF_eval = DF_temp[~rand_nos]
print 'Train(len) : {0}'.format(str(len(DF_train)))
print 'Test(len) : {0}'.format(str(len(DF_test)))
print 'Eval(len) : {0}'.format(str(len(DF_eval)))
#usr_load_sammi()
#usr_scatter_plot('GAINED','BPOUND')
#usr_histogram_plot('BPOUND')
#print_mean()
usr_random_splitDF()