diff --git a/README b/README index 837ee1e..f56cfbd 100644 --- a/README +++ b/README @@ -26,6 +26,11 @@ sensori_aria_2017.zip - 13M sensori_aria_2018.zip - 13M sensori_aria_2019.zip - 13M + +questi dati si trovano qui': +https://www.dati.lombardia.it/browse?q=dati%20sensori%20aria + + per visualizzarne i dati e' necessario scaricare l'intero file in una cartella, che di default e' csv/ ma puo' essere modificata nel file openlamb.py lo script se non trova i files necessari nella cartella specificata li scarica in formato .zip diff --git a/openlamb.py b/openlamb.py index 6f7f322..7f13b85 100755 --- a/openlamb.py +++ b/openlamb.py @@ -4,20 +4,30 @@ import argparse import traceback import sys import pandas as pd +import requests import numpy as np -from sodapy import Socrata import matplotlib.pyplot as plt import re +import json import glob import os from os import getcwd, chdir path_to_csv_files = "csv/" -datasets_ambiente = {"2020": "nicp-bhqi", - "2019": "kujm-kavy", - "2018": "bgqm-yq56", - "2017": "j8j8-qsb2"} +datasets_ambiente = {"3000": "nicp-bhqi", + "2018": "g2hp-ar79", + "2017": "j8j8-qsb2", + "2010": "nr8w-tj77", + "2000": "cthp-zqrr", + "1999": "evzn-32bs", + } +# 1999 fino al 1999 +# 2000 dal 2000 al 2009 +# 2010 dal 2010 al 2017 +# 2018 sono i dati dal 2018 ad oggi +# 3000 sono i dati dell'anno corrente + csv_ambiente = {"sensori_aria_1968-1995.zip": "puwt-3xxh", "sensori_aria_1996-2000.zip": "wabv-jucw", @@ -35,14 +45,54 @@ csv_ambiente = {"sensori_aria_1968-1995.zip": "puwt-3xxh", "sensori_aria_2019.zip": "j2mz-aium"} -def _connect(): - client = Socrata("www.dati.lombardia.it", None) - return client +class SocrataClient: + def __init__(self, domain, app_token=None): + self.base_url = f"https://{domain}/resource" + self.app_token = app_token + + def get(self, resource_id, IdSensore, limit, filters=None): + """ + Retrieve data from a Socrata dataset. + :param resource_id: The ID of the Socrata dataset. + :param filters: Optional filters to apply to the query. + :return: A list of records from the dataset. + """ + url = f"{self.base_url}/{resource_id}.json" + params = {"$limit": limit, + "idsensore": IdSensore, + } + + if filters: + params.update(filters) + + headers = {} + if self.app_token: + headers["X-App-Token"] = self.app_token + + response = requests.get(url, params=params, headers=headers) + if response.status_code == 200: + data = response.json() + return data + + raise Exception(f"Failed to retrieve data. Status code: {response.status_code}") def read_data_online(dataset, sensore): - client = _connect() - return client.get(dataset, IdSensore=sensore) + + client = SocrataClient('www.dati.lombardia.it', None) + limit = 1000000 + + try: + data = client.get(dataset, sensore, limit) + if data: + #print(json.dumps(data, indent=2)) + return data + else: + print("No data found for the specified sensor.") + sys.exit(-1) + except Exception as e: + print(f"Error: {e}") + sys.exit(-1) def read_data_from_csv(datafile): @@ -195,6 +245,39 @@ def check_year_range(arg): return True +def create_folder_if_not_exists(folder_path): + if not os.path.exists(folder_path): + try: + os.makedirs(folder_path) + print(f"Folder '{folder_path}' created successfully.") + except OSError as e: + print(f"Error creating folder '{folder_path}': {e}") + else: + print(f"Folder '{folder_path}' already exists.") + + +def is_graphical_environment_active(): + value = os.environ.get("DISPLAY") + if value is not None and value != "": + return True + else: + return False + + +def is_remote_tty(): + """ + Check if the script is executed on a remote TTY. + + Returns: + bool: True if running on a remote TTY, False if running locally. + """ + term = os.environ.get('TERM') + if term is not None and (term.startswith('xterm') or term == 'ssh'): + return True + else: + return False + + def main(): parser = argparse.ArgumentParser() parser.add_argument("--dataset", nargs='+', required=False, @@ -206,6 +289,7 @@ def main(): args = parser.parse_args() try: + create_folder_if_not_exists(path_to_csv_files) csv_dict = get_csv_dict(csv_ambiente) csv_files = list_of_csv_files(path_to_csv_files) dati_csv = [] @@ -229,21 +313,26 @@ def main(): s = stazioni.get_stazioni() for sensore in datamerged.columns[1:]: location = s.loc[s['idsensore'] == sensore.split("-")[0], 'nomestazione'].iloc[0] - print('Valore medio per il sensore %s %s: %s' % (sensore, location, datamerged[sensore].mean().round(1))) - plot_dataframe(datamerged) + sensor_type = s.loc[s['idsensore'] == sensore.split("-")[0], 'nometiposensore'].iloc[0] + print('Valore medio per il sensore %s %s %s: %s' % (sensore, sensor_type, location, datamerged[sensore].mean().round(1))) + if is_graphical_environment_active() and not is_remote_tty(): + plot_dataframe(datamerged) except KeyError: print("\nKeyError: forse hai specificato un dataset che non esiste ?\n" "i dataset sono disponibili per gli anni %s\n " % list(datasets_ambiente.keys())) - traceback.print_exc() + #traceback.print_exc() + sys.exit(-1) except KeyboardInterrupt: print("program terminated by user") + sys.exit(-1) except SystemExit: print("program terminated, bye") + sys.exit(-1) except: print("\nAn unhandled exception occured, here's the traceback!\n") traceback.print_exc() print("\nReport this to putro@autistici.org") - sys.exit() + sys.exit(-1) if __name__ == '__main__': diff --git a/stazioni.py b/stazioni.py index 554222f..1229532 100755 --- a/stazioni.py +++ b/stazioni.py @@ -3,12 +3,39 @@ import os.path import tabulate import pandas as pd -from sodapy import Socrata +import requests + + +class SocrataClient: + def __init__(self, domain, app_token=None): + self.domain = domain + self.app_token = app_token + + def get_data(self, resource_id, query=None, limit=None): + base_url = f"https://{self.domain}/resource/{resource_id}.json" + params = {'$limit': limit} if limit else {} + if query: + params['$query'] = query + + headers = {} + if self.app_token: + headers['X-App-Token'] = self.app_token + + response = requests.get(base_url, params=params, headers=headers) + + if response.status_code == 200: + data = response.json() + return data + else: + print(f"Error {response.status_code}: {response.text}") + return [] + + def get_dataframe(self, resource_id, query=None, limit=None): + data = self.get_data(resource_id, query=query, limit=limit) + df = pd.DataFrame(data) + return df -def _connect(): - client = Socrata("www.dati.lombardia.it", None) - return client def get_stazioni(offline=False): @@ -20,8 +47,8 @@ def get_stazioni(offline=False): "Limiti amministrativi 2015 delle province di Regione Lombardia"], inplace=True) stazioni_df.columns = [x.lower() for x in stazioni_df.columns] else: - client = _connect() - stazioni = client.get("ib47-atvt") + client = SocrataClient("www.dati.lombardia.it", None) + stazioni = client.get_data("ib47-atvt") stazioni_df = pd.DataFrame.from_records(stazioni) stazioni_df.drop(columns=[":@computed_region_6hky_swhk", ":@computed_region_ttgh_9sm5", "utm_est", "utm_nord", "storico", "idstazione", "lat", "lng", "location"], inplace=True)