-
Notifications
You must be signed in to change notification settings - Fork 18
/
statistic.py
61 lines (52 loc) · 2 KB
/
statistic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import csv
import io
import urllib.request
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
from datetime import timedelta
from matplotlib.dates import date2num
def findNearestDate(alist,date,delta):
#Binary search to find the nearest datetime within tolerance in a list given a specific date
#Delta is the tolerance
midpoint = (len(alist)-1)//2
if midpoint < 0:
return None
else:
if abs(alist[midpoint]-date) < delta:
return alist[midpoint]
else:
if date < alist[midpoint]:
return findNearestDate(alist[0:midpoint],date,delta)
else:
return findNearestDate(alist[midpoint+1:],date,delta)
def mergeData(raw_date, CPM, date):
#Merge radiation data to weather data; raw_date and CPM are from radiation data
#Each time in date list is matched up with the nearest date in raw_date list
delta = timedelta(minutes=5)
merge = [0 for i in range(len(date))]
for i in range (len(date)):
new_date = findNearestDate(raw_date,date[i],delta)
merge[i] = CPM[raw_date.index(new_date)]
return merge
def calculateCorrelationCoefficient(data_x, data_y):
# Correlation Coefficient:
#r = sum((x(i)-x_avg)*(y(i)-y_avg))/sqrt( sum( (x(i)-x_avg)^2 )*sum( (y(i)-y_avg)^2 ) )
#Variance = (standard deviation)^2
sum_x = 0
sum_y = 0
sum_xy = 0
x_var = 0
y_var = 0
x_avg = sum(data_x)/len(data_x)
y_avg = sum(data_y)/len(data_y)
for i in range(0,len(data_x)):
sum_xy += (data_x[i]-x_avg) * (data_y[i]-y_avg)
sum_x += (data_x[i]-x_avg)*(data_x[i]-x_avg)
sum_y += (data_y[i]-y_avg)*(data_y[i]-y_avg)
x_var = np.sqrt(sum_x/(len(data_x)-1))
y_var = np.sqrt(sum_y/(len(data_y)-1))
r = sum_xy/np.sqrt(sum_x*sum_y)
return r, x_var, y_var,x_avg,y_avg