Skip to content
Snippets Groups Projects
Commit 3686de5e authored by Clemens Berteld's avatar Clemens Berteld
Browse files

Optimized DB creation

Rounding values doesn't work yet, script will fail
parent 0c1ae937
No related branches found
No related tags found
No related merge requests found
...@@ -8,6 +8,8 @@ import pickle ...@@ -8,6 +8,8 @@ import pickle
import numpy as np import numpy as np
import ExportToWorldShape as exportToWorldShape import ExportToWorldShape as exportToWorldShape
import ExportToDatabase as exportToDatabase import ExportToDatabase as exportToDatabase
from IPython.display import display
from pip._internal.utils.misc import tabulate
""" """
example files: example files:
...@@ -190,11 +192,15 @@ def start(): ...@@ -190,11 +192,15 @@ def start():
loadDWDGauges() loadDWDGauges()
stationList = stationList.loc[stationList['country'] == "Germany"] stationList = stationList.loc[stationList['country'] == "Germany"]
# loadAndJoinDWDClimateFilenames() loadAndJoinDWDClimateFilenames()
# loadTemperatureFromDWDGauges() loadTemperatureFromDWDGauges()
with open("./pickle/stationList_germany.pickle", "wb") as pickleFile:
pickle.dump(stationList, pickleFile) # with open("./pickle/stationList_germany.pickle", "wb") as pickleFile:
# pickle.dump(stationList, pickleFile)
# stationList = pickle.load(pickleFile)
# stationList = pd.read_pickle('./pickle/stationList_germany.pickle')
# export station list to different outputs # export station list to different outputs
#exportToWorldShape.export(stationList) #exportToWorldShape.export(stationList)
......
...@@ -16,7 +16,7 @@ stationGPD = None ...@@ -16,7 +16,7 @@ stationGPD = None
# Connect to DB "postgres" to check for database "temperatures_berteld_morstein" # Connect to DB "postgres" to check for database "temperatures_berteld_morstein"
def check_for_db_existence(): def check_for_db_existence(stationList):
print("Checking for database existence") print("Checking for database existence")
with psycopg2.connect(database='postgres', user='postgres', password='postgres', host='localhost', port=5432) as connection: with psycopg2.connect(database='postgres', user='postgres', password='postgres', host='localhost', port=5432) as connection:
with connection.cursor() as cursor: with connection.cursor() as cursor:
...@@ -30,7 +30,7 @@ def check_for_db_existence(): ...@@ -30,7 +30,7 @@ def check_for_db_existence():
print('DB not existing') print('DB not existing')
try: try:
create_db(connection, cursor) create_db(connection, cursor)
create_table() create_table(stationList)
print('Successfully created database and table') print('Successfully created database and table')
except (Exception, psycopg2.DatabaseError) as error: except (Exception, psycopg2.DatabaseError) as error:
print(error) print(error)
...@@ -45,10 +45,17 @@ def create_db(connection, cursor): ...@@ -45,10 +45,17 @@ def create_db(connection, cursor):
# Connect to DB "temperatures_berteld_morstein" to create table "temperatures" # Connect to DB "temperatures_berteld_morstein" to create table "temperatures"
def create_table(): def create_table(stationList):
df_columns = list(stationList)
columns = ['id INTEGER', 'lon NUMERIC', 'lat NUMERIC', 'country TEXT', 'file TEXT']
for column in df_columns:
if str(column).startswith('19') or str(column).startswith('20'):
columns.append('"{}" NUMERIC'.format(column))
columns_clean = str(columns).strip('[]').replace("'", "")
with psycopg2.connect(database='temperatures_berteld_morstein', user='postgres', password='postgres', host='localhost', port=5432) as connection: with psycopg2.connect(database='temperatures_berteld_morstein', user='postgres', password='postgres', host='localhost', port=5432) as connection:
with connection.cursor() as cursor: with connection.cursor() as cursor:
query = sql.SQL("CREATE TABLE temperatures (id INTEGER, lon NUMERIC, lat NUMERIC, country TEXT);") query = sql.SQL("""CREATE TABLE stations ({});""".format(columns_clean))
print(query) print(query)
cursor.execute(query) cursor.execute(query)
...@@ -60,19 +67,34 @@ def insert_data(stationList): ...@@ -60,19 +67,34 @@ def insert_data(stationList):
if len(stationList) > 0: if len(stationList) > 0:
df_columns = list(stationList) df_columns = list(stationList)
# create (col1,col2,...) # create (col1,col2,...)
columns = ",".join(df_columns)
# As integers like 2018, 2017, etc. are not possible as column names, double quotes have to be added. This requires some tricks and cleanups
columns = []
for column in df_columns:
columns.append('"' + column + '"')
columns = str(columns).replace('[', '').replace(']', '').replace("'", "").replace('\n', '').replace(' ', '')
values = []
for value in df_columns:
if isinstance(value, str):
value = value.replace('NaN', 'NULL')
elif isinstance(value, float):
value = round(value, 3)
values.append(value)
values = "VALUES({})".format(",".join(values))
# create VALUES('%s', '%s",...) one '%s' per column # create VALUES('%s', '%s",...) one '%s' per column
values = "VALUES({})".format(",".join(["%s" for _ in df_columns])) # values = "VALUES({})".format(",".join(["%s" for _ in df_columns]))
# create INSERT INTO table (columns) VALUES('%s',...) # create INSERT INTO table (columns) VALUES('%s',...)
insert_stmt = "INSERT INTO {} ({}) {}".format('temperatures', columns, values) insert_stmt = """INSERT INTO {} ({}) {}""".format('stations', columns, values)
psycopg2.extras.execute_batch(cursor, insert_stmt, stationList.values) psycopg2.extras.execute_batch(cursor, insert_stmt, stationList.values)
def export(stationList): def export(stationList):
check_for_db_existence() check_for_db_existence(stationList)
insert_data(stationList) insert_data(stationList)
# return stationList # return stationList
......
No preview for this file type
for i in range(1949, 2019):
print(('"{}" NUMERIC,').format(str(i)))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment