Newer
Older

Clemens Berteld
committed
import configparser

Clemens Berteld
committed
from psycopg2 import sql

Clemens Berteld
committed
cfg = configparser.ConfigParser()
cfg.read('../config.ini')

Clemens Berteld
committed
assert "POSTGRES" in cfg, "missing POSTGRES in config.ini"
assert "INTERPOLATION" in cfg, "missing INTERPOLATION in config.ini"

Clemens Berteld
committed
param_postgres = cfg["POSTGRES"]
param_interpol = cfg["INTERPOLATION"]

Clemens Berteld
committed
def get_average_of_multiple_years(cursor, years):
avg_strings = " "
where_sql = " WHERE lat IS NOT NULL "
and_strings = ""
n = int(years[1]) - int(years[0])
for year in range(int(years[0]), int(years[1])+1):
avg_string = ' AVG ("{}") + '.format(str(year))
and_string = """ AND "{}" != 'NaN' """.format(str(year))
avg_strings += avg_string
and_strings += and_string
avg_strings = avg_strings[:-2]
query = """SELECT station_id, ROUND(({}) / {}, 1), transparent FROM stations WHERE file IS NULL GROUP BY station_id, transparent ORDER BY station_id ASC;""".format(avg_strings, n)
print(query)
cursor.execute(query)
return cursor.fetchall()

Clemens Berteld
committed
def get_year_columns(cursor):
columns = []
query = sql.SQL("SELECT column_name FROM information_schema.columns WHERE table_schema = 'public' AND table_name = 'stations';")
cursor.execute(query)
results = cursor.fetchall()
for result in results:
try:
columns.append(int(result[0]))
except ValueError:
pass
return columns
# Find n (defined in config) neighbours and return them ordered by distance

Clemens Berteld
committed
def get_neighbours(cursor, lat, lon, columns):
values = '' # Used in second parameter of cursor.execute() (Avoids SQL injection)
for n in [lat, lon]:
values = (*values, n) # adding n to existing tuple
query = sql.SQL("""
SELECT array_to_json(array_agg(row_to_json(t))) from (
SELECT {columns}, ST_Distance(ST_MakePoint(lat, lon), ST_MakePoint({lon}, {lat})) AS distance
WHERE file IS NOT NULL
ORDER BY distance
LIMIT {amount_neighbours}

Clemens Berteld
committed
) t;
""").format(columns=columns, lon=sql.Placeholder(), lat=sql.Placeholder(), amount_neighbours=sql.SQL(param_interpol["amount_neighbours"]))

Clemens Berteld
committed
cursor.execute(query, values)
neighbours = cursor.fetchall()[0][0]
return neighbours
# Deprecated and unused. Calculating interpolation data just by average. Insufficient statistical method for this use case

Clemens Berteld
committed
def calc_averages(neighbours, years):
averages = {}
for year in years:
values = []
for neighbour in neighbours:
# print(neighbour[str(year)])
if not neighbour[str(year)] == 'NaN': values.append(neighbour[str(year)])
avg = round(sum(values) / len(values), 3)
averages[year] = avg
return averages
# Calculating interpolation data by Inverse Distance Weighted method. Values are decreasingly important with increasing distance
def calc_idw(neighbours, years):
weighted_values = {}
for year in years:
values = []
distances = []
for neighbour in neighbours:
distances.append(neighbour['distance'])
for neighbour in neighbours:
normalizer = float(param_interpol["amount_neighbours"]) / sum(distances)
weight = neighbour['distance'] * normalizer
if not neighbour[str(year)] == 'NaN': values.append(neighbour[str(year)] * weight)
try:
avg = round(sum(values) / len(values), 3)
weighted_values[year] = avg
except ZeroDivisionError:
# print('No Data (NaN in DB)')
pass
return weighted_values
# Collecting preparation data and execute interpolation
def get_interpolation_data_for_point(lat, lon, columns, cursor):
if '*' in str(columns):
year_columns = get_year_columns(cursor)
else:
year_columns = (str(columns).replace("""SQL('""", "").replace('"', '').replace("')", "")).split(',')
neighbours = get_neighbours(cursor, lat, lon, columns)
avg_data = calc_idw(neighbours, year_columns)
# print(avg_data)
return avg_data

Clemens Berteld
committed
# get_average_data_for_point(52.5, 13.4)
def calcAverageYear(stationList, fromYear, toYear):
dateRange = np.arange(fromYear, toYear+1)
dateRangeRegex = "|".join(np.char.mod('%d', dateRange))
stationListDate = stationList.filter(regex=dateRangeRegex)
stationList["anomalie"] = stationListDate.mean(axis=1)
stationList = stationList.dropna(axis=0, subset=['anomalie'])
return stationList