diff --git a/dataacquisition/DwdAcquisition.py b/dataacquisition/DwdAcquisition.py index c1177122ba468d4950f26ba00c5e53a7d013c3b5..48d1d7618c31349b3f6185795edab0979bc603ae 100644 --- a/dataacquisition/DwdAcquisition.py +++ b/dataacquisition/DwdAcquisition.py @@ -97,6 +97,7 @@ def loadAndJoinDWDClimateFilenames(): # with open("stationList.pickle","wb") as pf: # pickle.dump(stationList, pf) + # here we have to try some interpolations for missing values def fillMissingData(annualData): months = ["Jan", "Feb", "Mrz","Apr","Mai","Jun","Jul","Aug","Sep","Okt","Nov","Dez"] @@ -145,6 +146,7 @@ def fillMissingData(annualData): return annualData + # load Temperatures from DWD gauges def loadTemperatureFromDWDGauges(): global climateCountry @@ -180,15 +182,16 @@ def loadTemperatureFromDWDGauges(): stationList.columns = stationList.columns.astype(str) stationList = stationList.sort_index(axis=1, ascending=False) - + + def start(): global stationList print("___ DWD Acquisition start___") loadDWDGauges() - stationList = stationList.loc[stationList['country']=="Germany"] - loadAndJoinDWDClimateFilenames() - loadTemperatureFromDWDGauges() + stationList = stationList.loc[stationList['country'] == "Germany"] + # loadAndJoinDWDClimateFilenames() + # loadTemperatureFromDWDGauges() with open("./pickle/stationList_germany.pickle", "wb") as pickleFile: pickle.dump(stationList, pickleFile) @@ -198,6 +201,7 @@ def start(): exportToDatabase.export(stationList) print("___DWD Acquisition finished___") - + + if __name__ == '__main__': - start() \ No newline at end of file + start() diff --git a/dataacquisition/ExportToDatabase.py b/dataacquisition/ExportToDatabase.py index 3bd6e1b699040889854f66f1af80db71d32025c8..8b118fc621ab1ffd6b9e54280a4ac776c68f703b 100644 --- a/dataacquisition/ExportToDatabase.py +++ b/dataacquisition/ExportToDatabase.py @@ -7,10 +7,73 @@ export the stationlist to database @author: geopeter """ +import psycopg2 +import psycopg2.extras +from psycopg2 import sql +from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT stationGPD = None + +# Connect to DB "postgres" to check for database "temperatures_berteld_morstein" +def check_for_db_existence(): + print("Checking for database existence") + with psycopg2.connect(database='postgres', user='postgres', password='postgres', host='localhost', port=5432) as connection: + with connection.cursor() as cursor: + cursor.execute("SELECT datname FROM pg_database WHERE datname LIKE 'temperatures_berteld_morstein';") + + try: + db_exists = cursor.fetchall()[0] + print('DB existing') + + except IndexError: # DB temperatures_berteld_morstein doesn't exist + print('DB not existing') + try: + create_db(connection, cursor) + create_table() + print('Successfully created database and table') + except (Exception, psycopg2.DatabaseError) as error: + print(error) + + +# Use existing connection to DB "postgres" to create DB "temperatures_berteld_morstein" +def create_db(connection, cursor): + connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) # Needs to be in AUTOCOMMIT mode for creating database + create_db_query = sql.SQL("CREATE DATABASE temperatures_berteld_morstein;") + print(create_db_query) + cursor.execute(create_db_query) + + +# Connect to DB "temperatures_berteld_morstein" to create table "temperatures" +def create_table(): + with psycopg2.connect(database='temperatures_berteld_morstein', user='postgres', password='postgres', host='localhost', port=5432) as connection: + with connection.cursor() as cursor: + query = sql.SQL("CREATE TABLE temperatures (id INTEGER, lon NUMERIC, lat NUMERIC, country TEXT);") + print(query) + cursor.execute(query) + + +def insert_data(stationList): + with psycopg2.connect(database='temperatures_berteld_morstein', user='postgres', password='postgres', host='localhost', port=5432) as connection: + with connection.cursor() as cursor: + + if len(stationList) > 0: + df_columns = list(stationList) + # create (col1,col2,...) + columns = ",".join(df_columns) + + # create VALUES('%s', '%s",...) one '%s' per column + values = "VALUES({})".format(",".join(["%s" for _ in df_columns])) + + # create INSERT INTO table (columns) VALUES('%s',...) + insert_stmt = "INSERT INTO {} ({}) {}".format('temperatures', columns, values) + + psycopg2.extras.execute_batch(cursor, insert_stmt, stationList.values) + + def export(stationList): - print("export the stationList to database") - - return stationList \ No newline at end of file + check_for_db_existence() + insert_data(stationList) + + # return stationList + diff --git a/dataacquisition/ExportToWorldShape.py b/dataacquisition/ExportToWorldShape.py index a3248a3030d68bda3676de26e5bdb0981c6a8768..8d65bf4e44a489be6041c572bf1c644d71df3410 100644 --- a/dataacquisition/ExportToWorldShape.py +++ b/dataacquisition/ExportToWorldShape.py @@ -10,6 +10,7 @@ Export the stationlist to the geopandas world shape file import numpy as np import geopandas as gpd + def buildAverageTimeseries(stationList, fromYear, toYear, name): meanAverage = [] for stationID, station in stationList.iterrows(): @@ -24,7 +25,8 @@ def buildAverageTimeseries(stationList, fromYear, toYear, name): meanAverage.append(np.NaN) stationList[name] = np.round(meanAverage,1) - + + def cleanAverageTimeseries(stationList): # determine gauges that includes both timeseries. If not delete them. @@ -35,6 +37,7 @@ def cleanAverageTimeseries(stationList): stationList.at[stationID, "m1961T1990"] = None stationList.at[stationID, "m1991T2018"] = None + def convertStationListToGPD(stationList): print("convert stationlist to GeoPandas") diff --git a/dataacquisition/pickle/stationList_germany.pickle b/dataacquisition/pickle/stationList_germany.pickle index e7c18a76be25b4cb9c2a0a9b9e4ece4695cc6e4b..cfe9b01de45901830483a9b5abc320c9e59529f1 100644 Binary files a/dataacquisition/pickle/stationList_germany.pickle and b/dataacquisition/pickle/stationList_germany.pickle differ diff --git a/dataacquisition/test_ExportToDatabase.py b/dataacquisition/test_ExportToDatabase.py index c31e11636006da85656266b53308d4fc76f7d204..3d1eec3d11adc886d57212c1c25106d0504b1192 100644 --- a/dataacquisition/test_ExportToDatabase.py +++ b/dataacquisition/test_ExportToDatabase.py @@ -10,8 +10,10 @@ import unittest import pickle import ExportToDatabase as cut + class TestExportToDatabase(unittest.TestCase): stationList = None + def testExport(self): global stationList with open("./pickle/stationList_germany.pickle", "rb") as pickleFile: diff --git a/dataacquisition/test_feed_db.py b/dataacquisition/test_feed_db.py new file mode 100644 index 0000000000000000000000000000000000000000..664b48c371925dec40fe838906cc835150ddc45b --- /dev/null +++ b/dataacquisition/test_feed_db.py @@ -0,0 +1,66 @@ +import urllib.request +import psycopg2 +from selenium import webdriver +from selenium.webdriver.firefox.firefox_profile import FirefoxProfile + + +def write_stations_to_db(): + url = "https://opendata.dwd.de/climate_environment/CDC/help/stations_list_CLIMAT_data.txt" + + for line in urllib.request.urlopen(url): + values = str(line.decode('latin1')).replace(' ', '').replace('\r', '').replace('\n', '').split(';') + if len(values) == 6: + station_id, name, lat, long, height, country = values + if country == 'Germany': + print(station_id, name, lat, long, height, country) + with psycopg2.connect(database=credentials['db_name'], user=credentials['user'], password=credentials['pw'], host=credentials['host'], port=credentials['port']) as connection: + with connection.cursor() as cursor: + sql = "INSERT INTO stations (station_id, name, lat, long, height, country) VALUES({}, '{}', {}, {}, {}, '{}')".format(station_id, name, lat, long, height, country) + print(sql) + cursor.execute(sql) + + +def get_stations_from_db(): + with psycopg2.connect(database=credentials['db_name'], user=credentials['user'], password=credentials['pw'], host=credentials['host'], port=credentials['port']) as connection: + with connection.cursor() as cursor: + sql = "SELECT station_id FROM stations_germany ORDER BY station_id ASC" + cursor.execute(sql) + results = cursor.fetchall() + for result in results: + station_id = result[0] + yield station_id + + +def create_driver(): + # Sets preference for direct download instead of download-menu + profile = FirefoxProfile() + profile.accept_untrusted_certs = True + profile.acceptSslCerts = True + profile.set_preference("browser.download.folderList", 2) + profile.set_preference("browser.download.manager.showWhenStarting", False) + profile.set_preference("browser.helperApps.neverAsk.saveToDisk", + "text/plain,text/x-csv,text/csv,application/vnd.ms-excel,application/csv,application/x-csv,text/csv,text/comma-separated-values,text/x-comma-separated-values,text/tab-separated-values,application/pdf") + + browser = webdriver.Firefox(firefox_profile=profile) + return browser + + +def get_weather_data_from_web(): + driver = create_driver() + driver.get('https://opendata.dwd.de/climate_environment/CDC/observations_global/CLIMAT/monthly/qc/air_temperature_mean/historical/') + links = driver.find_elements_by_tag_name('a') + for link in links: + linktext = link.get_attribute('innerHTML') + if '../' not in linktext: + + + +def write_weather_data_to_db(): + with psycopg2.connect(database=credentials['db_name'], user=credentials['user'], password=credentials['pw'], host=credentials['host'], port=credentials['port']) as connection: + with connection.cursor() as cursor: + station_id = next(get_stations_from_db()) + sql = "SELECT station_id FROM stations_germany ORDER BY station_id ASC" + cursor.execute(sql) + + +get_weather_data_from_web() \ No newline at end of file