Skip to content
Snippets Groups Projects
Commit 0c1ae937 authored by Clemens Berteld's avatar Clemens Berteld
Browse files

DB gets created if not existing

DB gets filled with data
parent 2c909099
No related branches found
No related tags found
No related merge requests found
...@@ -97,6 +97,7 @@ def loadAndJoinDWDClimateFilenames(): ...@@ -97,6 +97,7 @@ def loadAndJoinDWDClimateFilenames():
# with open("stationList.pickle","wb") as pf: # with open("stationList.pickle","wb") as pf:
# pickle.dump(stationList, pf) # pickle.dump(stationList, pf)
# here we have to try some interpolations for missing values # here we have to try some interpolations for missing values
def fillMissingData(annualData): def fillMissingData(annualData):
months = ["Jan", "Feb", "Mrz","Apr","Mai","Jun","Jul","Aug","Sep","Okt","Nov","Dez"] months = ["Jan", "Feb", "Mrz","Apr","Mai","Jun","Jul","Aug","Sep","Okt","Nov","Dez"]
...@@ -145,6 +146,7 @@ def fillMissingData(annualData): ...@@ -145,6 +146,7 @@ def fillMissingData(annualData):
return annualData return annualData
# load Temperatures from DWD gauges # load Temperatures from DWD gauges
def loadTemperatureFromDWDGauges(): def loadTemperatureFromDWDGauges():
global climateCountry global climateCountry
...@@ -180,15 +182,16 @@ def loadTemperatureFromDWDGauges(): ...@@ -180,15 +182,16 @@ def loadTemperatureFromDWDGauges():
stationList.columns = stationList.columns.astype(str) stationList.columns = stationList.columns.astype(str)
stationList = stationList.sort_index(axis=1, ascending=False) stationList = stationList.sort_index(axis=1, ascending=False)
def start(): def start():
global stationList global stationList
print("___ DWD Acquisition start___") print("___ DWD Acquisition start___")
loadDWDGauges() loadDWDGauges()
stationList = stationList.loc[stationList['country']=="Germany"] stationList = stationList.loc[stationList['country'] == "Germany"]
loadAndJoinDWDClimateFilenames() # loadAndJoinDWDClimateFilenames()
loadTemperatureFromDWDGauges() # loadTemperatureFromDWDGauges()
with open("./pickle/stationList_germany.pickle", "wb") as pickleFile: with open("./pickle/stationList_germany.pickle", "wb") as pickleFile:
pickle.dump(stationList, pickleFile) pickle.dump(stationList, pickleFile)
...@@ -198,6 +201,7 @@ def start(): ...@@ -198,6 +201,7 @@ def start():
exportToDatabase.export(stationList) exportToDatabase.export(stationList)
print("___DWD Acquisition finished___") print("___DWD Acquisition finished___")
if __name__ == '__main__': if __name__ == '__main__':
start() start()
\ No newline at end of file
...@@ -7,10 +7,73 @@ export the stationlist to database ...@@ -7,10 +7,73 @@ export the stationlist to database
@author: geopeter @author: geopeter
""" """
import psycopg2
import psycopg2.extras
from psycopg2 import sql
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
stationGPD = None stationGPD = None
# Connect to DB "postgres" to check for database "temperatures_berteld_morstein"
def check_for_db_existence():
print("Checking for database existence")
with psycopg2.connect(database='postgres', user='postgres', password='postgres', host='localhost', port=5432) as connection:
with connection.cursor() as cursor:
cursor.execute("SELECT datname FROM pg_database WHERE datname LIKE 'temperatures_berteld_morstein';")
try:
db_exists = cursor.fetchall()[0]
print('DB existing')
except IndexError: # DB temperatures_berteld_morstein doesn't exist
print('DB not existing')
try:
create_db(connection, cursor)
create_table()
print('Successfully created database and table')
except (Exception, psycopg2.DatabaseError) as error:
print(error)
# Use existing connection to DB "postgres" to create DB "temperatures_berteld_morstein"
def create_db(connection, cursor):
connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) # Needs to be in AUTOCOMMIT mode for creating database
create_db_query = sql.SQL("CREATE DATABASE temperatures_berteld_morstein;")
print(create_db_query)
cursor.execute(create_db_query)
# Connect to DB "temperatures_berteld_morstein" to create table "temperatures"
def create_table():
with psycopg2.connect(database='temperatures_berteld_morstein', user='postgres', password='postgres', host='localhost', port=5432) as connection:
with connection.cursor() as cursor:
query = sql.SQL("CREATE TABLE temperatures (id INTEGER, lon NUMERIC, lat NUMERIC, country TEXT);")
print(query)
cursor.execute(query)
def insert_data(stationList):
with psycopg2.connect(database='temperatures_berteld_morstein', user='postgres', password='postgres', host='localhost', port=5432) as connection:
with connection.cursor() as cursor:
if len(stationList) > 0:
df_columns = list(stationList)
# create (col1,col2,...)
columns = ",".join(df_columns)
# create VALUES('%s', '%s",...) one '%s' per column
values = "VALUES({})".format(",".join(["%s" for _ in df_columns]))
# create INSERT INTO table (columns) VALUES('%s',...)
insert_stmt = "INSERT INTO {} ({}) {}".format('temperatures', columns, values)
psycopg2.extras.execute_batch(cursor, insert_stmt, stationList.values)
def export(stationList): def export(stationList):
print("export the stationList to database") check_for_db_existence()
insert_data(stationList)
return stationList
\ No newline at end of file # return stationList
...@@ -10,6 +10,7 @@ Export the stationlist to the geopandas world shape file ...@@ -10,6 +10,7 @@ Export the stationlist to the geopandas world shape file
import numpy as np import numpy as np
import geopandas as gpd import geopandas as gpd
def buildAverageTimeseries(stationList, fromYear, toYear, name): def buildAverageTimeseries(stationList, fromYear, toYear, name):
meanAverage = [] meanAverage = []
for stationID, station in stationList.iterrows(): for stationID, station in stationList.iterrows():
...@@ -24,7 +25,8 @@ def buildAverageTimeseries(stationList, fromYear, toYear, name): ...@@ -24,7 +25,8 @@ def buildAverageTimeseries(stationList, fromYear, toYear, name):
meanAverage.append(np.NaN) meanAverage.append(np.NaN)
stationList[name] = np.round(meanAverage,1) stationList[name] = np.round(meanAverage,1)
def cleanAverageTimeseries(stationList): def cleanAverageTimeseries(stationList):
# determine gauges that includes both timeseries. If not delete them. # determine gauges that includes both timeseries. If not delete them.
...@@ -35,6 +37,7 @@ def cleanAverageTimeseries(stationList): ...@@ -35,6 +37,7 @@ def cleanAverageTimeseries(stationList):
stationList.at[stationID, "m1961T1990"] = None stationList.at[stationID, "m1961T1990"] = None
stationList.at[stationID, "m1991T2018"] = None stationList.at[stationID, "m1991T2018"] = None
def convertStationListToGPD(stationList): def convertStationListToGPD(stationList):
print("convert stationlist to GeoPandas") print("convert stationlist to GeoPandas")
......
No preview for this file type
...@@ -10,8 +10,10 @@ import unittest ...@@ -10,8 +10,10 @@ import unittest
import pickle import pickle
import ExportToDatabase as cut import ExportToDatabase as cut
class TestExportToDatabase(unittest.TestCase): class TestExportToDatabase(unittest.TestCase):
stationList = None stationList = None
def testExport(self): def testExport(self):
global stationList global stationList
with open("./pickle/stationList_germany.pickle", "rb") as pickleFile: with open("./pickle/stationList_germany.pickle", "rb") as pickleFile:
......
import urllib.request
import psycopg2
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
def write_stations_to_db():
url = "https://opendata.dwd.de/climate_environment/CDC/help/stations_list_CLIMAT_data.txt"
for line in urllib.request.urlopen(url):
values = str(line.decode('latin1')).replace(' ', '').replace('\r', '').replace('\n', '').split(';')
if len(values) == 6:
station_id, name, lat, long, height, country = values
if country == 'Germany':
print(station_id, name, lat, long, height, country)
with psycopg2.connect(database=credentials['db_name'], user=credentials['user'], password=credentials['pw'], host=credentials['host'], port=credentials['port']) as connection:
with connection.cursor() as cursor:
sql = "INSERT INTO stations (station_id, name, lat, long, height, country) VALUES({}, '{}', {}, {}, {}, '{}')".format(station_id, name, lat, long, height, country)
print(sql)
cursor.execute(sql)
def get_stations_from_db():
with psycopg2.connect(database=credentials['db_name'], user=credentials['user'], password=credentials['pw'], host=credentials['host'], port=credentials['port']) as connection:
with connection.cursor() as cursor:
sql = "SELECT station_id FROM stations_germany ORDER BY station_id ASC"
cursor.execute(sql)
results = cursor.fetchall()
for result in results:
station_id = result[0]
yield station_id
def create_driver():
# Sets preference for direct download instead of download-menu
profile = FirefoxProfile()
profile.accept_untrusted_certs = True
profile.acceptSslCerts = True
profile.set_preference("browser.download.folderList", 2)
profile.set_preference("browser.download.manager.showWhenStarting", False)
profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
"text/plain,text/x-csv,text/csv,application/vnd.ms-excel,application/csv,application/x-csv,text/csv,text/comma-separated-values,text/x-comma-separated-values,text/tab-separated-values,application/pdf")
browser = webdriver.Firefox(firefox_profile=profile)
return browser
def get_weather_data_from_web():
driver = create_driver()
driver.get('https://opendata.dwd.de/climate_environment/CDC/observations_global/CLIMAT/monthly/qc/air_temperature_mean/historical/')
links = driver.find_elements_by_tag_name('a')
for link in links:
linktext = link.get_attribute('innerHTML')
if '../' not in linktext:
def write_weather_data_to_db():
with psycopg2.connect(database=credentials['db_name'], user=credentials['user'], password=credentials['pw'], host=credentials['host'], port=credentials['port']) as connection:
with connection.cursor() as cursor:
station_id = next(get_stations_from_db())
sql = "SELECT station_id FROM stations_germany ORDER BY station_id ASC"
cursor.execute(sql)
get_weather_data_from_web()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment