merged replace_sodipy

This commit is contained in:
putro 2023-09-20 13:27:34 +02:00
parent 8b0c518be1
commit 6b50d67c0b
3 changed files with 141 additions and 20 deletions

5
README
View File

@ -26,6 +26,11 @@ sensori_aria_2017.zip - 13M
sensori_aria_2018.zip - 13M sensori_aria_2018.zip - 13M
sensori_aria_2019.zip - 13M sensori_aria_2019.zip - 13M
questi dati si trovano qui':
https://www.dati.lombardia.it/browse?q=dati%20sensori%20aria
per visualizzarne i dati e' necessario scaricare l'intero file in una cartella, che di default e' csv/ per visualizzarne i dati e' necessario scaricare l'intero file in una cartella, che di default e' csv/
ma puo' essere modificata nel file openlamb.py ma puo' essere modificata nel file openlamb.py
lo script se non trova i files necessari nella cartella specificata li scarica in formato .zip lo script se non trova i files necessari nella cartella specificata li scarica in formato .zip

View File

@ -4,20 +4,30 @@ import argparse
import traceback import traceback
import sys import sys
import pandas as pd import pandas as pd
import requests
import numpy as np import numpy as np
from sodapy import Socrata
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import re import re
import json
import glob import glob
import os import os
from os import getcwd, chdir from os import getcwd, chdir
path_to_csv_files = "csv/" path_to_csv_files = "csv/"
datasets_ambiente = {"2020": "nicp-bhqi", datasets_ambiente = {"3000": "nicp-bhqi",
"2019": "kujm-kavy", "2018": "g2hp-ar79",
"2018": "bgqm-yq56", "2017": "j8j8-qsb2",
"2017": "j8j8-qsb2"} "2010": "nr8w-tj77",
"2000": "cthp-zqrr",
"1999": "evzn-32bs",
}
# 1999 fino al 1999
# 2000 dal 2000 al 2009
# 2010 dal 2010 al 2017
# 2018 sono i dati dal 2018 ad oggi
# 3000 sono i dati dell'anno corrente
csv_ambiente = {"sensori_aria_1968-1995.zip": "puwt-3xxh", csv_ambiente = {"sensori_aria_1968-1995.zip": "puwt-3xxh",
"sensori_aria_1996-2000.zip": "wabv-jucw", "sensori_aria_1996-2000.zip": "wabv-jucw",
@ -35,14 +45,54 @@ csv_ambiente = {"sensori_aria_1968-1995.zip": "puwt-3xxh",
"sensori_aria_2019.zip": "j2mz-aium"} "sensori_aria_2019.zip": "j2mz-aium"}
def _connect(): class SocrataClient:
client = Socrata("www.dati.lombardia.it", None) def __init__(self, domain, app_token=None):
return client self.base_url = f"https://{domain}/resource"
self.app_token = app_token
def get(self, resource_id, IdSensore, limit, filters=None):
"""
Retrieve data from a Socrata dataset.
:param resource_id: The ID of the Socrata dataset.
:param filters: Optional filters to apply to the query.
:return: A list of records from the dataset.
"""
url = f"{self.base_url}/{resource_id}.json"
params = {"$limit": limit,
"idsensore": IdSensore,
}
if filters:
params.update(filters)
headers = {}
if self.app_token:
headers["X-App-Token"] = self.app_token
response = requests.get(url, params=params, headers=headers)
if response.status_code == 200:
data = response.json()
return data
raise Exception(f"Failed to retrieve data. Status code: {response.status_code}")
def read_data_online(dataset, sensore): def read_data_online(dataset, sensore):
client = _connect()
return client.get(dataset, IdSensore=sensore) client = SocrataClient('www.dati.lombardia.it', None)
limit = 1000000
try:
data = client.get(dataset, sensore, limit)
if data:
#print(json.dumps(data, indent=2))
return data
else:
print("No data found for the specified sensor.")
sys.exit(-1)
except Exception as e:
print(f"Error: {e}")
sys.exit(-1)
def read_data_from_csv(datafile): def read_data_from_csv(datafile):
@ -195,6 +245,39 @@ def check_year_range(arg):
return True return True
def create_folder_if_not_exists(folder_path):
if not os.path.exists(folder_path):
try:
os.makedirs(folder_path)
print(f"Folder '{folder_path}' created successfully.")
except OSError as e:
print(f"Error creating folder '{folder_path}': {e}")
else:
print(f"Folder '{folder_path}' already exists.")
def is_graphical_environment_active():
value = os.environ.get("DISPLAY")
if value is not None and value != "":
return True
else:
return False
def is_remote_tty():
"""
Check if the script is executed on a remote TTY.
Returns:
bool: True if running on a remote TTY, False if running locally.
"""
term = os.environ.get('TERM')
if term is not None and (term.startswith('xterm') or term == 'ssh'):
return True
else:
return False
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--dataset", nargs='+', required=False, parser.add_argument("--dataset", nargs='+', required=False,
@ -206,6 +289,7 @@ def main():
args = parser.parse_args() args = parser.parse_args()
try: try:
create_folder_if_not_exists(path_to_csv_files)
csv_dict = get_csv_dict(csv_ambiente) csv_dict = get_csv_dict(csv_ambiente)
csv_files = list_of_csv_files(path_to_csv_files) csv_files = list_of_csv_files(path_to_csv_files)
dati_csv = [] dati_csv = []
@ -229,21 +313,26 @@ def main():
s = stazioni.get_stazioni() s = stazioni.get_stazioni()
for sensore in datamerged.columns[1:]: for sensore in datamerged.columns[1:]:
location = s.loc[s['idsensore'] == sensore.split("-")[0], 'nomestazione'].iloc[0] location = s.loc[s['idsensore'] == sensore.split("-")[0], 'nomestazione'].iloc[0]
print('Valore medio per il sensore %s %s: %s' % (sensore, location, datamerged[sensore].mean().round(1))) sensor_type = s.loc[s['idsensore'] == sensore.split("-")[0], 'nometiposensore'].iloc[0]
print('Valore medio per il sensore %s %s %s: %s' % (sensore, sensor_type, location, datamerged[sensore].mean().round(1)))
if is_graphical_environment_active() and not is_remote_tty():
plot_dataframe(datamerged) plot_dataframe(datamerged)
except KeyError: except KeyError:
print("\nKeyError: forse hai specificato un dataset che non esiste ?\n" print("\nKeyError: forse hai specificato un dataset che non esiste ?\n"
"i dataset sono disponibili per gli anni %s\n " % list(datasets_ambiente.keys())) "i dataset sono disponibili per gli anni %s\n " % list(datasets_ambiente.keys()))
traceback.print_exc() #traceback.print_exc()
sys.exit(-1)
except KeyboardInterrupt: except KeyboardInterrupt:
print("program terminated by user") print("program terminated by user")
sys.exit(-1)
except SystemExit: except SystemExit:
print("program terminated, bye") print("program terminated, bye")
sys.exit(-1)
except: except:
print("\nAn unhandled exception occured, here's the traceback!\n") print("\nAn unhandled exception occured, here's the traceback!\n")
traceback.print_exc() traceback.print_exc()
print("\nReport this to putro@autistici.org") print("\nReport this to putro@autistici.org")
sys.exit() sys.exit(-1)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -3,12 +3,39 @@
import os.path import os.path
import tabulate import tabulate
import pandas as pd import pandas as pd
from sodapy import Socrata import requests
class SocrataClient:
def __init__(self, domain, app_token=None):
self.domain = domain
self.app_token = app_token
def get_data(self, resource_id, query=None, limit=None):
base_url = f"https://{self.domain}/resource/{resource_id}.json"
params = {'$limit': limit} if limit else {}
if query:
params['$query'] = query
headers = {}
if self.app_token:
headers['X-App-Token'] = self.app_token
response = requests.get(base_url, params=params, headers=headers)
if response.status_code == 200:
data = response.json()
return data
else:
print(f"Error {response.status_code}: {response.text}")
return []
def get_dataframe(self, resource_id, query=None, limit=None):
data = self.get_data(resource_id, query=query, limit=limit)
df = pd.DataFrame(data)
return df
def _connect():
client = Socrata("www.dati.lombardia.it", None)
return client
def get_stazioni(offline=False): def get_stazioni(offline=False):
@ -20,8 +47,8 @@ def get_stazioni(offline=False):
"Limiti amministrativi 2015 delle province di Regione Lombardia"], inplace=True) "Limiti amministrativi 2015 delle province di Regione Lombardia"], inplace=True)
stazioni_df.columns = [x.lower() for x in stazioni_df.columns] stazioni_df.columns = [x.lower() for x in stazioni_df.columns]
else: else:
client = _connect() client = SocrataClient("www.dati.lombardia.it", None)
stazioni = client.get("ib47-atvt") stazioni = client.get_data("ib47-atvt")
stazioni_df = pd.DataFrame.from_records(stazioni) stazioni_df = pd.DataFrame.from_records(stazioni)
stazioni_df.drop(columns=[":@computed_region_6hky_swhk", ":@computed_region_ttgh_9sm5", "utm_est", stazioni_df.drop(columns=[":@computed_region_6hky_swhk", ":@computed_region_ttgh_9sm5", "utm_est",
"utm_nord", "storico", "idstazione", "lat", "lng", "location"], inplace=True) "utm_nord", "storico", "idstazione", "lat", "lng", "location"], inplace=True)