Skip to content
This repository has been archived by the owner on Jul 15, 2022. It is now read-only.

参赛作品 #33

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions 5 环境污染的预测/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
近年来,随着人工智能、大数据、云计算等技术的成熟,环保领域正向智能化转变,因为 AI 的加入环境保护充满了无限的想象,而其中环境污染预测就是重要一环——利用环境传感器、智能摄像头、环保设备 IoT 化,我们可以实现对空气、水源、噪声等污染情况进行实时监测及预警,做到“未病先知,未病先治”,进一步改善环境污染。

请以“环境污染的预测”为主题,利用人工智能技术完成一款产品(软件或硬件)的开发。

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pandas as pd
import matplotlib as mpl
import seaborn as sns
import csv

from sklearn import preprocessing
from matplotlib import pyplot as plt

def Normalization():
data_file = "..\PRSA2017_Data_20130301-20170228\PRSA_Data_20130301-20170228\simpleDataSet.csv"
pd_data = pd.read_csv(data_file)
sam = []
a = ["PM10", "SO2","NO2","CO","PRES","DEWP"]
for i in a:
y = pd_data.loc[:, i]
ys = list(preprocessing.scale(y))
sam.append(ys)

print(len(sam))
with open('eth2.csv', 'w') as file:
writer = csv.writer(file)
for i in range(len(sam[0])):
writer.writerow([sam[0][i],sam[1][i],sam[2][i],sam[3][i],sam[4][i],sam[5][i]])
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# 基于线性回归的PM2.5的预测模型

## 作品介绍

### 背景

随着经济社会发展,空气污染情况成为我们不得不关注的问题,其中Pm2.5为重要的空气质量评价指标,本项目使用`sklearn`进行线性回归建模,并进行预测。

### 截图

![](./Figure_2.png)

经过绘图发现,PM2.5和空气中$SO_2$、$NO_2$、$CO$, 以及气压、露点温度近似成线性关系,使用sklearn中的线性回归模型进行训练拟合并预测

![](./Figure_1.png)

取数据中的$20\%$作为测试数据验证模型的可靠性,如上图所示。

**数据来源**:

[UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets.php?format=mat&task=&att=&area=phys&numAtt=10to100&numIns=&type=ts&sort=attUp&view=list)

## 团队:今天睡醒了吗

**成员**:iLern

**联系方式**:

> email: [email protected]
>
> qq: 416138794

## 使用到的AWS技术

`Amazon SageMaker`

Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pandas as pd
import matplotlib as mpl
import seaborn as sns

from matplotlib import pyplot as plt

# data_file = "..\PRSA2017_Data_20130301-20170228\PRSA_Data_20130301-20170228\PRSA_Data_Aotizhongxin_20130301-20170228.csv"
data_file = "..\PRSA2017_Data_20130301-20170228\PRSA_Data_20130301-20170228\simpleDataSet.csv"

def display_data():
pd_data = pd.read_csv(data_file)
print(f'pd_data.head(10) = \n{pd_data.head(10)}')
mpl.rcParams['axes.unicode_minus'] = False
sns.pairplot(pd_data,
x_vars = ["PM10", "SO2","NO2","CO","PRES","DEWP"],
y_vars = ["PM2.5"],
# dropna = True,
kind = "reg",
height = 5,
aspect = 0.7)
plt.show()

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import display_lr
import Normalization
import predict

if __name__ == "__main__":
# display_lr.display_data()
# Normalization.Normalization()

predict.build_lr()
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2020-09-13T13:56:40.324016Z",
"start_time": "2020-09-13T13:56:36.982915Z"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib as mpl\n",
"import seaborn as sns\n",
"import numpy as np\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LinearRegression\n",
"from matplotlib import pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data_file = \"..\\PRSA2017_Data_20130301-20170228\\PRSA_Data_20130301-20170228\\simpleDataSet.csv\"\n",
"def build_lr():\n",
" pd_data = pd.read_csv(data_file)\n",
"\n",
" X = pd_data.loc[:, (\"PM10\", \"SO2\",\"NO2\",\"CO\",\"PRES\",\"DEWP\")]\n",
" y = pd_data.loc[:, \"PM2.5\"]\n",
" \n",
" #选择20%为测试集\n",
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=532) \n",
"\n",
" print('训练集测试及参数:')\n",
" print(f'X_train.shape={X_train.shape}\\ny_train.shape ={y_train.shape}\\nX_test.shape={X_test.shape}\\ny_test.shape={y_test.shape}')\n",
"\n",
" linear_reg = LinearRegression()\n",
" model = linear_reg.fit(X_train, y_train)\n",
"\n",
" print('模型参数')\n",
" print(model)\n",
"\n",
" print('模型截距')\n",
" print(linear_reg.intercept_)\n",
"\n",
" print('参数权重')\n",
" print(linear_reg.coef_)\n",
"\n",
" y_pred = linear_reg.predict(X_test)\n",
" sum_mean = 0\n",
"\n",
" for i in range(len(y_pred)):\n",
" sum_mean += (y_pred[i] - y_test.values[i]) ** 2\n",
"\n",
" sum_erro = np.sqrt(sum_mean / len(y_pred))\n",
"\n",
" print(sum_erro)\n",
"\n",
" plt.figure()\n",
" plt.figure()\n",
" plt.plot(range(len(y_pred)), y_pred, 'b', label=\"predict\")\n",
" plt.plot(range(len(y_pred)), y_test, 'r', label=\"test\")\n",
" plt.legend(loc=\"upper right\")\n",
" plt.xlabel(\"the number of sales\")\n",
" plt.ylabel('value of sales')\n",
" plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import pandas as pd
import matplotlib as mpl
import seaborn as sns
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt


data_file = "..\PRSA2017_Data_20130301-20170228\PRSA_Data_20130301-20170228\simpleDataSet.csv"
def build_lr():
pd_data = pd.read_csv(data_file)

X = pd_data.loc[:, ("PM10", "SO2","NO2","CO","PRES","DEWP")]
y = pd_data.loc[:, "PM2.5"]

#选择20%为测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=532)

print('训练集测试及参数:')
print(f'X_train.shape={X_train.shape}\ny_train.shape ={y_train.shape}\nX_test.shape={X_test.shape}\ny_test.shape={y_test.shape}')

linear_reg = LinearRegression()
model = linear_reg.fit(X_train, y_train)

print('模型参数')
print(model)

print('模型截距')
print(linear_reg.intercept_)

print('参数权重')
print(linear_reg.coef_)

y_pred = linear_reg.predict(X_test)
sum_mean = 0

for i in range(len(y_pred)):
sum_mean += (y_pred[i] - y_test.values[i]) ** 2

sum_erro = np.sqrt(sum_mean / len(y_pred))

print(sum_erro)

plt.figure()
plt.figure()
plt.plot(range(len(y_pred)), y_pred, 'b', label="predict")
plt.plot(range(len(y_pred)), y_test, 'r', label="test")
plt.legend(loc="upper right")
plt.xlabel("the number of sales")
plt.ylabel('value of sales')
plt.show()