added csv download

master
putro 2020-04-16 13:17:16 +02:00
parent 53eb9ff835
commit 783a8a9057
1 changed files with 89 additions and 12 deletions

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/python3
import argparse
import traceback
@ -7,6 +7,7 @@ import pandas as pd
import numpy as np
from sodapy import Socrata
import matplotlib.pyplot as plt
import re
import glob
import os
from os import getcwd, chdir
@ -18,6 +19,21 @@ datasets_ambiente = {"2020": "nicp-bhqi",
"2018": "bgqm-yq56",
"2017": "j8j8-qsb2"}
csv_ambiente = {"sensori_aria_1968-1995.zip": "puwt-3xxh",
"sensori_aria_1996-2000.zip": "wabv-jucw",
"sensori_aria_2001-2004.zip": "5jdj-7x8y",
"sensori_aria_2005-2007.zip": "h3i4-wm93",
"sensori_aria_2008-2010.zip": "wp2f-5nw6",
"sensori_aria_2011.zip": "5mut-i45n",
"sensori_aria_2012.zip": "wr4y-c9ti",
"sensori_aria_2013.zip": "hsdm-3yhd",
"sensori_aria_2014.zip": "69yc-isbh",
"sensori_aria_2015.zip": "bpin-c7k8",
"sensori_aria_2016.zip": "7v3n-37f3",
"sensori_aria_2017.zip": "fdv6-2rbs",
"sensori_aria_2018.zip": "4t9j-fd8z",
"sensori_aria_2019.zip": "j2mz-aium"}
def _connect():
client = Socrata("www.dati.lombardia.it", None)
@ -99,6 +115,69 @@ def list_of_csv_files(dir_name):
return filelist
def parse_range(x):
x = x.strip()
if x.isdigit():
yield str(x)
elif '-' in x:
xr = x.split('-')
yield from range(int(xr[0].strip()), int(xr[1].strip()) + 1)
else:
raise ValueError(f"Unknown range specified: {x}")
def get_csv_dict(dict):
d = {}
for (k, v) in dict.items():
filename, id = k, v
match_multi = re.search("\\d{4}-\\d{4}", filename)
match_single = re.search("\\d{4}", filename)
if match_multi:
years = [str(x) for x in parse_range(str(match_multi.group()))]
elif match_single:
years = [match_single.group()]
else:
print("no match")
for year in years:
d.update({year: [filename, id]})
return d
def check_csv(args, filelist, csv_dict):
years = [str(x) for x in parse_range(args)]
f = []
for y in years:
if y not in csv_dict.keys():
print("Errore: i dati per l'anno %s non sono disponibili come csv" % y)
sys.exit(-1)
if csv_dict[y][0] not in filelist:
print("file %s for year %s is not available in folder %s" % (csv_dict[y][0], y, path_to_csv_files))
download_csv(csv_dict[y][0], csv_dict[y][1], path_to_csv_files)
f.append(csv_dict[y][0])
return f
def download_csv(filename, id, path):
print("downloading %s....... please wait" % filename)
import requests
url = "https://www.dati.lombardia.it/download/" + id + "/application%2Fzip"
req = requests.get(url, allow_redirects=True)
try:
req.raise_for_status()
except (requests.ConnectionError,
requests.RequestException,
requests.HTTPError,
requests.Timeout,
requests.TooManyRedirects) as e:
print("Download error: \n\t %s" % str(e))
sys.exit(-1)
else:
f = open(os.path.dirname(path) + "/" + filename, "wb")
f.write(req.content)
f.close()
pass
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", nargs='+', required=False,
@ -110,19 +189,16 @@ def main():
args = parser.parse_args()
try:
dati_csv = []
csv_dict = get_csv_dict(csv_ambiente)
csv_files = list_of_csv_files(path_to_csv_files)
dati_csv = []
if args.csv:
if "all" in args.csv:
dati_csv = csv_files
if not re.search("\\d{4}-\\d{4}", args.csv[0]):
if not re.search("\\d{4}", args.csv[0]):
print("Error: syntax for --csv parameter: year for single year or year1-year2 for years range")
else:
for d in args.csv:
if d in csv_files:
dati_csv.append(d)
else:
print("spiacente, ma il file csv %s non e' disponibile nel "
"percorso indicato: %s" % (d, path_to_csv_files))
sys.exit(-1)
dati_csv = check_csv(args.csv[0], csv_files, csv_dict)
print("daty csv = %s" % dati_csv)
dati = []
if args.dataset:
if "all" in args.dataset:
@ -132,7 +208,8 @@ def main():
for d in args.dataset:
dati.append(datasets_ambiente[d])
dataframes = get_dataframes(dati_csv, dati, args.sensori)
datamerged = merge_df(dataframes, dataframes.keys())
datamerged = merge_df(dataframes, list(dataframes.keys()))
datamerged.to_csv("export.csv")
import stazioni
s = stazioni.get_stazioni()
for sensore in datamerged.columns[1:]: