added csv download

This commit is contained in:
putro 2020-04-16 13:17:16 +02:00
parent 53eb9ff835
commit 783a8a9057

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/python3
import argparse import argparse
import traceback import traceback
@ -7,6 +7,7 @@ import pandas as pd
import numpy as np import numpy as np
from sodapy import Socrata from sodapy import Socrata
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import re
import glob import glob
import os import os
from os import getcwd, chdir from os import getcwd, chdir
@ -18,6 +19,21 @@ datasets_ambiente = {"2020": "nicp-bhqi",
"2018": "bgqm-yq56", "2018": "bgqm-yq56",
"2017": "j8j8-qsb2"} "2017": "j8j8-qsb2"}
csv_ambiente = {"sensori_aria_1968-1995.zip": "puwt-3xxh",
"sensori_aria_1996-2000.zip": "wabv-jucw",
"sensori_aria_2001-2004.zip": "5jdj-7x8y",
"sensori_aria_2005-2007.zip": "h3i4-wm93",
"sensori_aria_2008-2010.zip": "wp2f-5nw6",
"sensori_aria_2011.zip": "5mut-i45n",
"sensori_aria_2012.zip": "wr4y-c9ti",
"sensori_aria_2013.zip": "hsdm-3yhd",
"sensori_aria_2014.zip": "69yc-isbh",
"sensori_aria_2015.zip": "bpin-c7k8",
"sensori_aria_2016.zip": "7v3n-37f3",
"sensori_aria_2017.zip": "fdv6-2rbs",
"sensori_aria_2018.zip": "4t9j-fd8z",
"sensori_aria_2019.zip": "j2mz-aium"}
def _connect(): def _connect():
client = Socrata("www.dati.lombardia.it", None) client = Socrata("www.dati.lombardia.it", None)
@ -99,6 +115,69 @@ def list_of_csv_files(dir_name):
return filelist return filelist
def parse_range(x):
x = x.strip()
if x.isdigit():
yield str(x)
elif '-' in x:
xr = x.split('-')
yield from range(int(xr[0].strip()), int(xr[1].strip()) + 1)
else:
raise ValueError(f"Unknown range specified: {x}")
def get_csv_dict(dict):
d = {}
for (k, v) in dict.items():
filename, id = k, v
match_multi = re.search("\\d{4}-\\d{4}", filename)
match_single = re.search("\\d{4}", filename)
if match_multi:
years = [str(x) for x in parse_range(str(match_multi.group()))]
elif match_single:
years = [match_single.group()]
else:
print("no match")
for year in years:
d.update({year: [filename, id]})
return d
def check_csv(args, filelist, csv_dict):
years = [str(x) for x in parse_range(args)]
f = []
for y in years:
if y not in csv_dict.keys():
print("Errore: i dati per l'anno %s non sono disponibili come csv" % y)
sys.exit(-1)
if csv_dict[y][0] not in filelist:
print("file %s for year %s is not available in folder %s" % (csv_dict[y][0], y, path_to_csv_files))
download_csv(csv_dict[y][0], csv_dict[y][1], path_to_csv_files)
f.append(csv_dict[y][0])
return f
def download_csv(filename, id, path):
print("downloading %s....... please wait" % filename)
import requests
url = "https://www.dati.lombardia.it/download/" + id + "/application%2Fzip"
req = requests.get(url, allow_redirects=True)
try:
req.raise_for_status()
except (requests.ConnectionError,
requests.RequestException,
requests.HTTPError,
requests.Timeout,
requests.TooManyRedirects) as e:
print("Download error: \n\t %s" % str(e))
sys.exit(-1)
else:
f = open(os.path.dirname(path) + "/" + filename, "wb")
f.write(req.content)
f.close()
pass
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--dataset", nargs='+', required=False, parser.add_argument("--dataset", nargs='+', required=False,
@ -110,19 +189,16 @@ def main():
args = parser.parse_args() args = parser.parse_args()
try: try:
dati_csv = [] csv_dict = get_csv_dict(csv_ambiente)
csv_files = list_of_csv_files(path_to_csv_files) csv_files = list_of_csv_files(path_to_csv_files)
dati_csv = []
if args.csv: if args.csv:
if "all" in args.csv: if not re.search("\\d{4}-\\d{4}", args.csv[0]):
dati_csv = csv_files if not re.search("\\d{4}", args.csv[0]):
print("Error: syntax for --csv parameter: year for single year or year1-year2 for years range")
else: else:
for d in args.csv: dati_csv = check_csv(args.csv[0], csv_files, csv_dict)
if d in csv_files: print("daty csv = %s" % dati_csv)
dati_csv.append(d)
else:
print("spiacente, ma il file csv %s non e' disponibile nel "
"percorso indicato: %s" % (d, path_to_csv_files))
sys.exit(-1)
dati = [] dati = []
if args.dataset: if args.dataset:
if "all" in args.dataset: if "all" in args.dataset:
@ -132,7 +208,8 @@ def main():
for d in args.dataset: for d in args.dataset:
dati.append(datasets_ambiente[d]) dati.append(datasets_ambiente[d])
dataframes = get_dataframes(dati_csv, dati, args.sensori) dataframes = get_dataframes(dati_csv, dati, args.sensori)
datamerged = merge_df(dataframes, dataframes.keys()) datamerged = merge_df(dataframes, list(dataframes.keys()))
datamerged.to_csv("export.csv")
import stazioni import stazioni
s = stazioni.get_stazioni() s = stazioni.get_stazioni()
for sensore in datamerged.columns[1:]: for sensore in datamerged.columns[1:]: