import urllib.request import psycopg2 from selenium import webdriver from selenium.webdriver.firefox.firefox_profile import FirefoxProfile def write_stations_to_db(): url = "https://opendata.dwd.de/climate_environment/CDC/help/stations_list_CLIMAT_data.txt" for line in urllib.request.urlopen(url): values = str(line.decode('latin1')).replace(' ', '').replace('\r', '').replace('\n', '').split(';') if len(values) == 6: station_id, name, lat, long, height, country = values if country == 'Germany': print(station_id, name, lat, long, height, country) with psycopg2.connect(database=credentials['db_name'], user=credentials['user'], password=credentials['pw'], host=credentials['host'], port=credentials['port']) as connection: with connection.cursor() as cursor: sql = "INSERT INTO stations (station_id, name, lat, long, height, country) VALUES({}, '{}', {}, {}, {}, '{}')".format(station_id, name, lat, long, height, country) print(sql) cursor.execute(sql) def get_stations_from_db(): with psycopg2.connect(database=credentials['db_name'], user=credentials['user'], password=credentials['pw'], host=credentials['host'], port=credentials['port']) as connection: with connection.cursor() as cursor: sql = "SELECT station_id FROM stations_germany ORDER BY station_id ASC" cursor.execute(sql) results = cursor.fetchall() for result in results: station_id = result[0] yield station_id def create_driver(): # Sets preference for direct download instead of download-menu profile = FirefoxProfile() profile.accept_untrusted_certs = True profile.acceptSslCerts = True profile.set_preference("browser.download.folderList", 2) profile.set_preference("browser.download.manager.showWhenStarting", False) profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain,text/x-csv,text/csv,application/vnd.ms-excel,application/csv,application/x-csv,text/csv,text/comma-separated-values,text/x-comma-separated-values,text/tab-separated-values,application/pdf") browser = webdriver.Firefox(firefox_profile=profile) return browser def get_weather_data_from_web(): driver = create_driver() driver.get('https://opendata.dwd.de/climate_environment/CDC/observations_global/CLIMAT/monthly/qc/air_temperature_mean/historical/') links = driver.find_elements_by_tag_name('a') for link in links: linktext = link.get_attribute('innerHTML') if '../' not in linktext: def write_weather_data_to_db(): with psycopg2.connect(database=credentials['db_name'], user=credentials['user'], password=credentials['pw'], host=credentials['host'], port=credentials['port']) as connection: with connection.cursor() as cursor: station_id = next(get_stations_from_db()) sql = "SELECT station_id FROM stations_germany ORDER BY station_id ASC" cursor.execute(sql) get_weather_data_from_web()