取り敢えず・・・

in_file = "data.csv"
out_file = "kion10y.csv"

# CSVファイルを異業ずつ読み込む
df1 = open(in_file, encoding="utf8")
df = df1.readlines()

# ヘッダーをそぎ落として、新たなヘッダーを付ける
df = ["年, 月, 日, 気温, 品質, 均質\n"] + df[5:]
df = map(lambda v: v.replace('/', ','), df)
result = "".join(df).strip()
print(result)

年月日気温品質均質
2008,1,1,6,8,1
2008,1,2,6.2,8,1
・・・・・・・・ 2018,12,30,4.8,8,2
2018,12,31,4.9,8,2 # 結果をファイルへ出力する
new_df = open(out_file, "wt", encoding="utf8")
new_df = new_df.write(result)

import pandas as pd
df = pd.read_csv('kion10y.csv', encoding="utf-8")
df.head()

　　　　　年　　月　　日　　気温　　品質　　均質

0 2008 1 1 6 8 1

1 2008 1 2 6.2 8 1

2 2008 1 3 5.9 8 1

3 2008 1 4 7 8 1

4 2008 1 5 6 8 1

import pandas as pd

# Pandasで11年分の東京の気温データのCSVファイルを読み込む
df = pd.read_csv("kion10y.csv", encoding="utf-8")

# 日付ごとに気温をリストにまとめる
md = {}   # うるう年も考慮し、一度すべてのデータを辞書型に追加する
for i, row in df.iterrows():
    m, d, c = (int(row['月']), int(row['日']), float(row['気温'])) # 摂氏:Celsius
    key = str(m) + "/" + str(d)       # 年を省き、月/日を key にし、11年分をまとめる準備をする
    if not(key in md): md[key] = []
    md[key] += [c]

# 日付ごとに平均を求める
avs = {}
for key in md:
c = avs[key] = sum(md[key]) / len(md[key]) # うるう年もあり、11年分の日付ごとの気温 / 11年間の同じ日の数で平均を求める
print("{0} : {1}".format(key, c))

1/1 : 6.381818181818182
1/2 : 6.636363636363637
1/3 : 6.2 ・・・・・・ 12/29 : 6.336363636363635
12/30 : 6.609090909090909
12/31 : 6.318181818181818 月ごとの平均も出してみる

g = df.groupby(['月'])["気温"]
group_g = g.sum() / g.count()
# 結果を出力 ---(*3)
print(group_g) 月
1      5.795308
2      6.326688
3      9.951906
4     14.870606
5     19.705279
6     22.422424
7     26.931085
8     27.702346
9     24.082424
10    18.918768
11    13.326061
12     8.329032
Name: 気温, dtype: float64

from sklearn.linear_model import LinearRegression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# 11年分のデータを読み込む
df = pd.read_csv('kion10y.csv', encoding="utf-8")

# 学習用とテスト用に分ける
interval = 3
train_year = (df["年"] <= 2017)
test_year = (df["年"] >= 2018)

def make_data(data):
    x = []   # 学習データ用
    y = []   # ラベル用の結果
    temps = list(data["気温"])
    for i in range(len(temps)):
        if i <= interval: continue
        y.append(temps[i])
        xa = []
        for p in range(interval):
            d = i + p - interval
            xa.append(temps[d])
        x.append(xa)
    return (x, y)

train_x, train_y = make_data(df[train_year])
test_x, test_y = make_data(df[test_year])

# 機械学習（直線回帰）を行う
lr = LinearRegression(normalize=True)
lr.fit(train_x, train_y) # 学習
pre_y = lr.predict(test_x) # 予測

# 結果をプロットする
plt.figure(figsize=(10, 6), dpi=100)
plt.plot(test_y, c='c')
plt.plot(pre_y, c='m')
plt.savefig('predict-kion-lr.png')
plt.show()

python3Xのブログ

ここでは40代、50代の方が日々の生活で役に立つ情報や私の趣味であるプログラム、Excelや科学に関する内容で投稿する予定です。

取り敢えず・・・