-
Notifications
You must be signed in to change notification settings - Fork 1
/
Gaussian Code.py
202 lines (140 loc) · 6.12 KB
/
Gaussian Code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import math
import matplotlib.pyplot as plt
class Gaussian():
""" Gaussian distribution class for calculating and
visualizing a Gaussian distribution.
Attributes:
mean (float) representing the mean value of the distribution
stdev (float) representing the standard deviation of the distribution
data_list (list of floats) a list of floats extracted from the data file
"""
def __init__(self, mu=0, sigma=1):
self.mean = mu
self.stdev = sigma
self.data = []
def calculate_mean(self):
"""Method to calculate the mean of the data set.
Args:
None
Returns:
float: mean of the data set
"""
# TODO: Calculate the mean of the data set. Remember that the data set is stored in self.data
# Change the value of the mean attribute to be the mean of the data set
# Return the mean of the data set
pass
def calculate_stdev(self, sample=True):
"""Method to calculate the standard deviation of the data set.
Args:
sample (bool): whether the data represents a sample or population
Returns:
float: standard deviation of the data set
"""
# TODO:
# Calculate the standard deviation of the data set
#
# The sample variable determines if the data set contains a sample or a population
# If sample = True, this means the data is a sample.
# Keep the value of sample in mind for calculating the standard deviation
#
# Make sure to update self.stdev and return the standard deviation as well
pass
def read_data_file(self, file_name, sample=True):
"""Method to read in data from a txt file. The txt file should have
one number (float) per line. The numbers are stored in the data attribute.
After reading in the file, the mean and standard deviation are calculated
Args:
file_name (string): name of a file to read from
Returns:
None
"""
# This code opens a data file and appends the data to a list called data_list
with open(file_name) as file:
data_list = []
line = file.readline()
while line:
data_list.append(int(line))
line = file.readline()
file.close()
# TODO:
# Update the self.data attribute with the data_list
# Update self.mean with the mean of the data_list.
# You can use the calculate_mean() method with self.calculate_mean()
# Update self.stdev with the standard deviation of the data_list. Use the
# calcaulte_stdev() method.
def plot_histogram(self):
"""Method to output a histogram of the instance variable data using
matplotlib pyplot library.
Args:
None
Returns:
None
"""
# TODO: Plot a histogram of the data_list using the matplotlib package.
# Be sure to label the x and y axes and also give the chart a title
def pdf(self, x):
"""Probability density function calculator for the gaussian distribution.
Args:
x (float): point for calculating the probability density function
Returns:
float: probability density function output
"""
# TODO: Calculate the probability density function of the Gaussian distribution
# at the value x. You'll need to use self.stdev and self.mean to do the calculation
pass
def plot_histogram_pdf(self, n_spaces=50):
"""Method to plot the normalized histogram of the data and a plot of the
probability density function along the same range
Args:
n_spaces (int): number of data points
Returns:
list: x values for the pdf plot
list: y values for the pdf plot
"""
# TODO: Nothing to do for this method. Try it out and see how it works.
mu = self.mean
sigma = self.stdev
min_range = min(self.data)
max_range = max(self.data)
# calculates the interval between x values
interval = 1.0 * (max_range - min_range) / n_spaces
x = []
y = []
# calculate the x values to visualize
for i in range(n_spaces):
tmp = min_range + interval * i
x.append(tmp)
y.append(self.pdf(tmp))
# make the plots
fig, axes = plt.subplots(2, sharex=True)
fig.subplots_adjust(hspace=.5)
axes[0].hist(self.data, density=True)
axes[0].set_title('Normed Histogram of Data')
axes[0].set_ylabel('Density')
axes[1].plot(x, y)
axes[1].set_title('Normal Distribution for \n Sample Mean and Sample Standard Deviation')
axes[0].set_ylabel('Density')
plt.show()
return x, y
# Unit tests to check your solution
import unittest
class TestGaussianClass(unittest.TestCase):
def setUp(self):
self.gaussian = Gaussian(25, 2)
def test_initialization(self):
self.assertEqual(self.gaussian.mean, 25, 'incorrect mean')
self.assertEqual(self.gaussian.stdev, 2, 'incorrect standard deviation')
def test_pdf(self):
self.assertEqual(round(self.gaussian.pdf(25), 5), 0.19947, \
'pdf function does not give expected result')
def test_meancalculation(self):
self.gaussian.read_data_file('numbers.txt', True)
self.assertEqual(self.gaussian.calculate_mean(), sum(self.gaussian.data) / float(len(self.gaussian.data)), 'calculated mean not as expected')
def test_stdevcalculation(self):
self.gaussian.read_data_file('numbers.txt', True)
self.assertEqual(round(self.gaussian.stdev, 2), 92.87, 'sample standard deviation incorrect')
self.gaussian.read_data_file('numbers.txt', False)
self.assertEqual(round(self.gaussian.stdev, 2), 88.55, 'population standard deviation incorrect')
tests = TestGaussianClass()
tests_loaded = unittest.TestLoader().loadTestsFromModule(tests)
unittest.TextTestRunner().run(tests_loaded)