/
get-japan-vaccine.py
32 lines (32 loc) · 1.69 KB
/
get-japan-vaccine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import datetime as dt
from itertools import product
import re
# 新しいデータを取得
url = "https://www.kantei.go.jp/jp/content/nenreikaikyubetsu-vaccination_data.xlsx"
#df_ratio = pd.read_excel(url, sheet_name="様式", header=3, index_col=0).iloc[:2,:]
latest_date = pd.read_excel(url, sheet_name="全国", header=None, index_col=None).iat[2,11]
df_num = pd.read_excel(url, sheet_name="全国", header=8, index_col=0).iloc[:3,:].astype(int)
latest_dt = dt.datetime(dt.datetime.today().year, *np.array(re.search("(\d+)月(\d+)日", latest_date).groups()).astype(int))
latest_dt_str = latest_dt.strftime("%Y/%m/%d")
ndf = pd.DataFrame(df_num.values.T.reshape(1,df_num.values.size),columns=pd.MultiIndex.from_tuples(product(df_num.columns,df_num.index)), index=[latest_dt_str]).astype(int)
# 既存のデータを取得
data_fname = "data/CoVid19-Japan-vaccine_by_age.csv"
orig_df = pd.read_csv(data_fname, index_col=0, header=[0,1])
# 新データの日付が既存データに含まれていなかったら,データを追加してファイルを更新
if latest_dt_str not in orig_df.index:
df = pd.concat([ndf, orig_df])
df.to_csv(data_fname)
# 都道府県別のデータも記録しておく
age_row_df = pd.read_excel(url, sheet_name="都道府県別(人口)", header=3, index_col=0).iloc[0:47].astype(int)
row = pd.DataFrame(age_row_df.stack())
row.columns=[latest_dt_str]
row = row.T
age_data_fname = "data/CoVid19-Japan-vaccine_pref_by_age.csv"
age_df = pd.read_csv(age_data_fname, index_col=0, header=[0,1])
if latest_dt_str not in age_df.index:
age_df = age_df.append(row)
age_df.to_csv(age_data_fname)