import configparser import numpy as np from psycopg2 import sql import numpy as np cfg = configparser.ConfigParser() cfg.read('../config.ini') assert "POSTGRES" in cfg, "missing POSTGRES in config.ini" assert "INTERPOLATION" in cfg, "missing INTERPOLATION in config.ini" param_postgres = cfg["POSTGRES"] param_interpol = cfg["INTERPOLATION"] def get_average_of_multiple_years(cursor, years): avg_strings = " " where_sql = " WHERE lat IS NOT NULL " and_strings = "" n = int(years[1]) - int(years[0]) for year in range(int(years[0]), int(years[1])+1): avg_string = ' AVG ("{}") + '.format(str(year)) and_string = """ AND "{}" != 'NaN' """.format(str(year)) avg_strings += avg_string and_strings += and_string avg_strings = avg_strings[:-2] query = """SELECT station_id, ROUND(({}) / {}, 1), transparent FROM stations WHERE file IS NULL GROUP BY station_id, transparent ORDER BY station_id ASC;""".format(avg_strings, n) print(query) cursor.execute(query) return cursor.fetchall() # Getting all available year columns from database def get_year_columns(cursor): columns = [] query = sql.SQL("SELECT column_name FROM information_schema.columns WHERE table_schema = 'public' AND table_name = 'stations';") cursor.execute(query) results = cursor.fetchall() for result in results: try: columns.append(int(result[0])) except ValueError: pass return columns # Find n (defined in config) neighbours and return them ordered by distance def get_neighbours(cursor, lat, lon, columns): values = '' # Used in second parameter of cursor.execute() (Avoids SQL injection) for n in [lat, lon]: values = (*values, n) # adding n to existing tuple query = sql.SQL(""" SELECT array_to_json(array_agg(row_to_json(t))) from ( SELECT {columns}, ST_Distance(ST_MakePoint(lat, lon), ST_MakePoint({lon}, {lat})) AS distance FROM stations WHERE file IS NOT NULL ORDER BY distance LIMIT {amount_neighbours} ) t; """).format(columns=columns, lon=sql.Placeholder(), lat=sql.Placeholder(), amount_neighbours=sql.SQL(param_interpol["amount_neighbours"])) cursor.execute(query, values) neighbours = cursor.fetchall()[0][0] return neighbours # Deprecated and unused. Calculating interpolation data just by average. Insufficient statistical method for this use case def calc_averages(neighbours, years): averages = {} for year in years: values = [] for neighbour in neighbours: # print(neighbour[str(year)]) if not neighbour[str(year)] == 'NaN': values.append(neighbour[str(year)]) avg = round(sum(values) / len(values), 3) averages[year] = avg return averages # Calculating interpolation data by Inverse Distance Weighted method. Values are decreasingly important with increasing distance def calc_idw(neighbours, years): weighted_values = {} for year in years: values = [] distances = [] for neighbour in neighbours: distances.append(neighbour['distance']) for neighbour in neighbours: normalizer = float(param_interpol["amount_neighbours"]) / sum(distances) weight = neighbour['distance'] * normalizer if not neighbour[str(year)] == 'NaN': values.append(neighbour[str(year)] * weight) try: avg = round(sum(values) / len(values), 3) weighted_values[year] = avg except ZeroDivisionError: # print('No Data (NaN in DB)') pass return weighted_values # Collecting preparation data and execute interpolation def get_interpolation_data_for_point(lat, lon, columns, cursor): if '*' in str(columns): year_columns = get_year_columns(cursor) else: year_columns = (str(columns).replace("""SQL('""", "").replace('"', '').replace("')", "")).split(',') neighbours = get_neighbours(cursor, lat, lon, columns) avg_data = calc_idw(neighbours, year_columns) # print(avg_data) return avg_data # get_average_data_for_point(52.5, 13.4) def calcAverageYear(stationList, fromYear, toYear): dateRange = np.arange(fromYear, toYear+1) dateRangeRegex = "|".join(np.char.mod('%d', dateRange)) stationListDate = stationList.filter(regex=dateRangeRegex) stationList["anomalie"] = stationListDate.mean(axis=1) stationList = stationList.dropna(axis=0, subset=['anomalie']) return stationList