diff --git a/dataacquisition/DwdAcquisition.py b/dataacquisition/DwdAcquisition.py index 48d1d7618c31349b3f6185795edab0979bc603ae..eaa4ed130df4409d04b3262461f47dde640fedb9 100644 --- a/dataacquisition/DwdAcquisition.py +++ b/dataacquisition/DwdAcquisition.py @@ -8,6 +8,8 @@ import pickle import numpy as np import ExportToWorldShape as exportToWorldShape import ExportToDatabase as exportToDatabase +from IPython.display import display +from pip._internal.utils.misc import tabulate """ example files: @@ -190,11 +192,15 @@ def start(): loadDWDGauges() stationList = stationList.loc[stationList['country'] == "Germany"] - # loadAndJoinDWDClimateFilenames() - # loadTemperatureFromDWDGauges() - with open("./pickle/stationList_germany.pickle", "wb") as pickleFile: - pickle.dump(stationList, pickleFile) - + loadAndJoinDWDClimateFilenames() + loadTemperatureFromDWDGauges() + + # with open("./pickle/stationList_germany.pickle", "wb") as pickleFile: + # pickle.dump(stationList, pickleFile) + # stationList = pickle.load(pickleFile) + # stationList = pd.read_pickle('./pickle/stationList_germany.pickle') + + # export station list to different outputs #exportToWorldShape.export(stationList) diff --git a/dataacquisition/ExportToDatabase.py b/dataacquisition/ExportToDatabase.py index 8b118fc621ab1ffd6b9e54280a4ac776c68f703b..9f8ea8a8955bc38a640e3953d158bee77fd030f7 100644 --- a/dataacquisition/ExportToDatabase.py +++ b/dataacquisition/ExportToDatabase.py @@ -16,7 +16,7 @@ stationGPD = None # Connect to DB "postgres" to check for database "temperatures_berteld_morstein" -def check_for_db_existence(): +def check_for_db_existence(stationList): print("Checking for database existence") with psycopg2.connect(database='postgres', user='postgres', password='postgres', host='localhost', port=5432) as connection: with connection.cursor() as cursor: @@ -30,7 +30,7 @@ def check_for_db_existence(): print('DB not existing') try: create_db(connection, cursor) - create_table() + create_table(stationList) print('Successfully created database and table') except (Exception, psycopg2.DatabaseError) as error: print(error) @@ -45,10 +45,17 @@ def create_db(connection, cursor): # Connect to DB "temperatures_berteld_morstein" to create table "temperatures" -def create_table(): +def create_table(stationList): + df_columns = list(stationList) + columns = ['id INTEGER', 'lon NUMERIC', 'lat NUMERIC', 'country TEXT', 'file TEXT'] + for column in df_columns: + if str(column).startswith('19') or str(column).startswith('20'): + columns.append('"{}" NUMERIC'.format(column)) + columns_clean = str(columns).strip('[]').replace("'", "") + with psycopg2.connect(database='temperatures_berteld_morstein', user='postgres', password='postgres', host='localhost', port=5432) as connection: with connection.cursor() as cursor: - query = sql.SQL("CREATE TABLE temperatures (id INTEGER, lon NUMERIC, lat NUMERIC, country TEXT);") + query = sql.SQL("""CREATE TABLE stations ({});""".format(columns_clean)) print(query) cursor.execute(query) @@ -60,19 +67,34 @@ def insert_data(stationList): if len(stationList) > 0: df_columns = list(stationList) # create (col1,col2,...) - columns = ",".join(df_columns) + + # As integers like 2018, 2017, etc. are not possible as column names, double quotes have to be added. This requires some tricks and cleanups + columns = [] + for column in df_columns: + columns.append('"' + column + '"') + columns = str(columns).replace('[', '').replace(']', '').replace("'", "").replace('\n', '').replace(' ', '') + + values = [] + for value in df_columns: + if isinstance(value, str): + value = value.replace('NaN', 'NULL') + elif isinstance(value, float): + value = round(value, 3) + values.append(value) + + values = "VALUES({})".format(",".join(values)) + # create VALUES('%s', '%s",...) one '%s' per column - values = "VALUES({})".format(",".join(["%s" for _ in df_columns])) + # values = "VALUES({})".format(",".join(["%s" for _ in df_columns])) # create INSERT INTO table (columns) VALUES('%s',...) - insert_stmt = "INSERT INTO {} ({}) {}".format('temperatures', columns, values) - + insert_stmt = """INSERT INTO {} ({}) {}""".format('stations', columns, values) psycopg2.extras.execute_batch(cursor, insert_stmt, stationList.values) def export(stationList): - check_for_db_existence() + check_for_db_existence(stationList) insert_data(stationList) # return stationList diff --git a/dataacquisition/pickle/stationList_germany.pickle b/dataacquisition/pickle/stationList_germany.pickle index cfe9b01de45901830483a9b5abc320c9e59529f1..4b638258aadebfe2cf7584c508c03244eea8f084 100644 Binary files a/dataacquisition/pickle/stationList_germany.pickle and b/dataacquisition/pickle/stationList_germany.pickle differ diff --git a/dataacquisition/testing.py b/dataacquisition/testing.py new file mode 100644 index 0000000000000000000000000000000000000000..d8150d8dbea9adc4f138e28e9937a6c429450503 --- /dev/null +++ b/dataacquisition/testing.py @@ -0,0 +1,2 @@ +for i in range(1949, 2019): + print(('"{}" NUMERIC,').format(str(i)))