1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
| import pandas as pd from pandas import HDFStore, DataFrame from pandas import read_hdf import os,sys,string import numpy as np
hdf = HDFStore("site_2017_whole_year.h5")
os.chdir("./site_2017/") files = os.listdir("./") files.sort()
print files[0] test_file= "./../china_sites_20170101.csv" test_f = pd.read_csv(test_file,encoding='utf_8') site_columns = list(test_f.columns[3:]) print site_columns[1]
feature = ['pm25','pm10','O3','O3_8h','CO',"NO2",'SO2',"aqi"] fe_dict = {"pm25":1,"aqi":0, 'pm10':3, 'SO2':5,'NO2':7, 'O3':9,"O3_8h":11, "CO": 13} for k in range(0,len(feature),1): data_2017 = {"date":[],'hour':[],} for i in range(0,len(site_columns),1): data_2017[site_columns[i]] = [] for file in files[0:]: print file filename,extname = os.path.splitext(file) if (extname == ".csv"): datafile =file f_day = pd.read_csv(datafile,encoding='utf_8') for i in range(0,len(f_day),15): datetime = str(f_day["date"].iloc[i]) hour = "%02d" % ((f_day["hour"].iloc[i])) data_2017["date"].append(datetime) data_2017["hour"].append(hour) for t in range(0,len(site_columns),1): data_2017[site_columns[t]].append(f_day[site_columns[t]].iloc[i+fe_dict[feature[k]]]) print feature[k] data_2017 = pd.DataFrame(data_2017) hdf.put(feature[k], data_2017, format='table', encoding="utf-8")
|
Kommentare