diff --git a/openlamb.py b/openlamb.py index 59ecfe0..d74c391 100755 --- a/openlamb.py +++ b/openlamb.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/python3 import argparse import traceback @@ -7,6 +7,7 @@ import pandas as pd import numpy as np from sodapy import Socrata import matplotlib.pyplot as plt +import re import glob import os from os import getcwd, chdir @@ -18,6 +19,21 @@ datasets_ambiente = {"2020": "nicp-bhqi", "2018": "bgqm-yq56", "2017": "j8j8-qsb2"} +csv_ambiente = {"sensori_aria_1968-1995.zip": "puwt-3xxh", + "sensori_aria_1996-2000.zip": "wabv-jucw", + "sensori_aria_2001-2004.zip": "5jdj-7x8y", + "sensori_aria_2005-2007.zip": "h3i4-wm93", + "sensori_aria_2008-2010.zip": "wp2f-5nw6", + "sensori_aria_2011.zip": "5mut-i45n", + "sensori_aria_2012.zip": "wr4y-c9ti", + "sensori_aria_2013.zip": "hsdm-3yhd", + "sensori_aria_2014.zip": "69yc-isbh", + "sensori_aria_2015.zip": "bpin-c7k8", + "sensori_aria_2016.zip": "7v3n-37f3", + "sensori_aria_2017.zip": "fdv6-2rbs", + "sensori_aria_2018.zip": "4t9j-fd8z", + "sensori_aria_2019.zip": "j2mz-aium"} + def _connect(): client = Socrata("www.dati.lombardia.it", None) @@ -99,6 +115,69 @@ def list_of_csv_files(dir_name): return filelist +def parse_range(x): + x = x.strip() + if x.isdigit(): + yield str(x) + elif '-' in x: + xr = x.split('-') + yield from range(int(xr[0].strip()), int(xr[1].strip()) + 1) + else: + raise ValueError(f"Unknown range specified: {x}") + + +def get_csv_dict(dict): + d = {} + for (k, v) in dict.items(): + filename, id = k, v + match_multi = re.search("\\d{4}-\\d{4}", filename) + match_single = re.search("\\d{4}", filename) + if match_multi: + years = [str(x) for x in parse_range(str(match_multi.group()))] + elif match_single: + years = [match_single.group()] + else: + print("no match") + for year in years: + d.update({year: [filename, id]}) + return d + + +def check_csv(args, filelist, csv_dict): + years = [str(x) for x in parse_range(args)] + f = [] + for y in years: + if y not in csv_dict.keys(): + print("Errore: i dati per l'anno %s non sono disponibili come csv" % y) + sys.exit(-1) + if csv_dict[y][0] not in filelist: + print("file %s for year %s is not available in folder %s" % (csv_dict[y][0], y, path_to_csv_files)) + download_csv(csv_dict[y][0], csv_dict[y][1], path_to_csv_files) + f.append(csv_dict[y][0]) + return f + + +def download_csv(filename, id, path): + print("downloading %s....... please wait" % filename) + import requests + url = "https://www.dati.lombardia.it/download/" + id + "/application%2Fzip" + req = requests.get(url, allow_redirects=True) + try: + req.raise_for_status() + except (requests.ConnectionError, + requests.RequestException, + requests.HTTPError, + requests.Timeout, + requests.TooManyRedirects) as e: + print("Download error: \n\t %s" % str(e)) + sys.exit(-1) + else: + f = open(os.path.dirname(path) + "/" + filename, "wb") + f.write(req.content) + f.close() + pass + + def main(): parser = argparse.ArgumentParser() parser.add_argument("--dataset", nargs='+', required=False, @@ -110,19 +189,16 @@ def main(): args = parser.parse_args() try: - dati_csv = [] + csv_dict = get_csv_dict(csv_ambiente) csv_files = list_of_csv_files(path_to_csv_files) + dati_csv = [] if args.csv: - if "all" in args.csv: - dati_csv = csv_files + if not re.search("\\d{4}-\\d{4}", args.csv[0]): + if not re.search("\\d{4}", args.csv[0]): + print("Error: syntax for --csv parameter: year for single year or year1-year2 for years range") else: - for d in args.csv: - if d in csv_files: - dati_csv.append(d) - else: - print("spiacente, ma il file csv %s non e' disponibile nel " - "percorso indicato: %s" % (d, path_to_csv_files)) - sys.exit(-1) + dati_csv = check_csv(args.csv[0], csv_files, csv_dict) + print("daty csv = %s" % dati_csv) dati = [] if args.dataset: if "all" in args.dataset: @@ -132,7 +208,8 @@ def main(): for d in args.dataset: dati.append(datasets_ambiente[d]) dataframes = get_dataframes(dati_csv, dati, args.sensori) - datamerged = merge_df(dataframes, dataframes.keys()) + datamerged = merge_df(dataframes, list(dataframes.keys())) + datamerged.to_csv("export.csv") import stazioni s = stazioni.get_stazioni() for sensore in datamerged.columns[1:]: