Skip to content
Snippets Groups Projects
GetAverageData.py 4.57 KiB
Newer Older
Clemens Berteld's avatar
Clemens Berteld committed
import numpy as np
Peter Morstein's avatar
Peter Morstein committed
import numpy as np
cfg.read('../config.ini')
assert "POSTGRES" in cfg, "missing POSTGRES in config.ini"
assert "INTERPOLATION" in cfg, "missing INTERPOLATION in config.ini"
param_interpol = cfg["INTERPOLATION"]
Clemens Berteld's avatar
Clemens Berteld committed
def get_average_of_multiple_years(cursor, years):
    avg_strings = " "
    where_sql = " WHERE lat IS NOT NULL "
    and_strings = ""
    n = int(years[1]) - int(years[0])
    for year in range(int(years[0]), int(years[1])+1):
        avg_string = ' AVG ("{}") + '.format(str(year))
        and_string = """ AND "{}" != 'NaN' """.format(str(year))

        avg_strings += avg_string
        and_strings += and_string
    avg_strings = avg_strings[:-2]

    query = """SELECT station_id, ROUND(({}) / {}, 1), transparent FROM stations WHERE file IS NULL GROUP BY station_id, transparent ORDER BY station_id ASC;""".format(avg_strings, n)
    print(query)
    cursor.execute(query)
    return cursor.fetchall()


Clemens Berteld's avatar
Clemens Berteld committed
# Getting all available year columns from database
def get_year_columns(cursor):
    columns = []
    query = sql.SQL("SELECT column_name FROM information_schema.columns WHERE table_schema = 'public' AND table_name = 'stations';")
    cursor.execute(query)
    results = cursor.fetchall()
    for result in results:
        try:
            columns.append(int(result[0]))
        except ValueError:
            pass
    return columns


Clemens Berteld's avatar
Clemens Berteld committed
# Find n (defined in config) neighbours and return them ordered by distance
def get_neighbours(cursor, lat, lon, columns):
    values = ''  # Used in second parameter of cursor.execute() (Avoids SQL injection)
    for n in [lat, lon]:
        values = (*values, n)  # adding n to existing tuple

    query = sql.SQL("""
                SELECT array_to_json(array_agg(row_to_json(t))) from (
                    SELECT {columns}, ST_Distance(ST_MakePoint(lat, lon), ST_MakePoint({lon}, {lat})) AS distance 
                    ORDER BY distance 
                    LIMIT {amount_neighbours}
                    """).format(columns=columns, lon=sql.Placeholder(), lat=sql.Placeholder(), amount_neighbours=sql.SQL(param_interpol["amount_neighbours"]))
    cursor.execute(query, values)
    neighbours = cursor.fetchall()[0][0]
    return neighbours


Clemens Berteld's avatar
Clemens Berteld committed
# Deprecated and unused. Calculating interpolation data just by average. Insufficient statistical method for this use case
def calc_averages(neighbours, years):
    averages = {}
    for year in years:
        values = []
        for neighbour in neighbours:
            # print(neighbour[str(year)])
            if not neighbour[str(year)] == 'NaN': values.append(neighbour[str(year)])
        avg = round(sum(values) / len(values), 3)
        averages[year] = avg
    return averages


Clemens Berteld's avatar
Clemens Berteld committed
# Calculating interpolation data by Inverse Distance Weighted method. Values are decreasingly important with increasing distance
def calc_idw(neighbours, years):
    weighted_values = {}
    for year in years:
        values = []
        distances = []
        for neighbour in neighbours:
            distances.append(neighbour['distance'])
        for neighbour in neighbours:
            normalizer = float(param_interpol["amount_neighbours"]) / sum(distances)
            weight = neighbour['distance'] * normalizer
            if not neighbour[str(year)] == 'NaN': values.append(neighbour[str(year)] * weight)
        try:
            avg = round(sum(values) / len(values), 3)
            weighted_values[year] = avg
        except ZeroDivisionError:
            # print('No Data (NaN in DB)')
            pass
Clemens Berteld's avatar
Clemens Berteld committed
# Collecting preparation data and execute interpolation
def get_interpolation_data_for_point(lat, lon, columns, cursor):
    if '*' in str(columns):
        year_columns = get_year_columns(cursor)
    else:
        year_columns = (str(columns).replace("""SQL('""", "").replace('"', '').replace("')", "")).split(',')
    neighbours = get_neighbours(cursor, lat, lon, columns)
    avg_data = calc_idw(neighbours, year_columns)
    # print(avg_data)
    return avg_data
Peter Morstein's avatar
Peter Morstein committed

def calcAverageYear(stationList, fromYear, toYear):
    
    dateRange = np.arange(fromYear, toYear+1)
    dateRangeRegex = "|".join(np.char.mod('%d', dateRange))
    
    stationListDate = stationList.filter(regex=dateRangeRegex)
    
    stationList["anomalie"] = stationListDate.mean(axis=1)
    stationList = stationList.dropna(axis=0, subset=['anomalie'])
    
    return stationList