added csv download
This commit is contained in:
parent
53eb9ff835
commit
783a8a9057
101
openlamb.py
101
openlamb.py
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/python3
|
||||
|
||||
import argparse
|
||||
import traceback
|
||||
|
@ -7,6 +7,7 @@ import pandas as pd
|
|||
import numpy as np
|
||||
from sodapy import Socrata
|
||||
import matplotlib.pyplot as plt
|
||||
import re
|
||||
import glob
|
||||
import os
|
||||
from os import getcwd, chdir
|
||||
|
@ -18,6 +19,21 @@ datasets_ambiente = {"2020": "nicp-bhqi",
|
|||
"2018": "bgqm-yq56",
|
||||
"2017": "j8j8-qsb2"}
|
||||
|
||||
csv_ambiente = {"sensori_aria_1968-1995.zip": "puwt-3xxh",
|
||||
"sensori_aria_1996-2000.zip": "wabv-jucw",
|
||||
"sensori_aria_2001-2004.zip": "5jdj-7x8y",
|
||||
"sensori_aria_2005-2007.zip": "h3i4-wm93",
|
||||
"sensori_aria_2008-2010.zip": "wp2f-5nw6",
|
||||
"sensori_aria_2011.zip": "5mut-i45n",
|
||||
"sensori_aria_2012.zip": "wr4y-c9ti",
|
||||
"sensori_aria_2013.zip": "hsdm-3yhd",
|
||||
"sensori_aria_2014.zip": "69yc-isbh",
|
||||
"sensori_aria_2015.zip": "bpin-c7k8",
|
||||
"sensori_aria_2016.zip": "7v3n-37f3",
|
||||
"sensori_aria_2017.zip": "fdv6-2rbs",
|
||||
"sensori_aria_2018.zip": "4t9j-fd8z",
|
||||
"sensori_aria_2019.zip": "j2mz-aium"}
|
||||
|
||||
|
||||
def _connect():
|
||||
client = Socrata("www.dati.lombardia.it", None)
|
||||
|
@ -99,6 +115,69 @@ def list_of_csv_files(dir_name):
|
|||
return filelist
|
||||
|
||||
|
||||
def parse_range(x):
|
||||
x = x.strip()
|
||||
if x.isdigit():
|
||||
yield str(x)
|
||||
elif '-' in x:
|
||||
xr = x.split('-')
|
||||
yield from range(int(xr[0].strip()), int(xr[1].strip()) + 1)
|
||||
else:
|
||||
raise ValueError(f"Unknown range specified: {x}")
|
||||
|
||||
|
||||
def get_csv_dict(dict):
|
||||
d = {}
|
||||
for (k, v) in dict.items():
|
||||
filename, id = k, v
|
||||
match_multi = re.search("\\d{4}-\\d{4}", filename)
|
||||
match_single = re.search("\\d{4}", filename)
|
||||
if match_multi:
|
||||
years = [str(x) for x in parse_range(str(match_multi.group()))]
|
||||
elif match_single:
|
||||
years = [match_single.group()]
|
||||
else:
|
||||
print("no match")
|
||||
for year in years:
|
||||
d.update({year: [filename, id]})
|
||||
return d
|
||||
|
||||
|
||||
def check_csv(args, filelist, csv_dict):
|
||||
years = [str(x) for x in parse_range(args)]
|
||||
f = []
|
||||
for y in years:
|
||||
if y not in csv_dict.keys():
|
||||
print("Errore: i dati per l'anno %s non sono disponibili come csv" % y)
|
||||
sys.exit(-1)
|
||||
if csv_dict[y][0] not in filelist:
|
||||
print("file %s for year %s is not available in folder %s" % (csv_dict[y][0], y, path_to_csv_files))
|
||||
download_csv(csv_dict[y][0], csv_dict[y][1], path_to_csv_files)
|
||||
f.append(csv_dict[y][0])
|
||||
return f
|
||||
|
||||
|
||||
def download_csv(filename, id, path):
|
||||
print("downloading %s....... please wait" % filename)
|
||||
import requests
|
||||
url = "https://www.dati.lombardia.it/download/" + id + "/application%2Fzip"
|
||||
req = requests.get(url, allow_redirects=True)
|
||||
try:
|
||||
req.raise_for_status()
|
||||
except (requests.ConnectionError,
|
||||
requests.RequestException,
|
||||
requests.HTTPError,
|
||||
requests.Timeout,
|
||||
requests.TooManyRedirects) as e:
|
||||
print("Download error: \n\t %s" % str(e))
|
||||
sys.exit(-1)
|
||||
else:
|
||||
f = open(os.path.dirname(path) + "/" + filename, "wb")
|
||||
f.write(req.content)
|
||||
f.close()
|
||||
pass
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--dataset", nargs='+', required=False,
|
||||
|
@ -110,19 +189,16 @@ def main():
|
|||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
dati_csv = []
|
||||
csv_dict = get_csv_dict(csv_ambiente)
|
||||
csv_files = list_of_csv_files(path_to_csv_files)
|
||||
dati_csv = []
|
||||
if args.csv:
|
||||
if "all" in args.csv:
|
||||
dati_csv = csv_files
|
||||
if not re.search("\\d{4}-\\d{4}", args.csv[0]):
|
||||
if not re.search("\\d{4}", args.csv[0]):
|
||||
print("Error: syntax for --csv parameter: year for single year or year1-year2 for years range")
|
||||
else:
|
||||
for d in args.csv:
|
||||
if d in csv_files:
|
||||
dati_csv.append(d)
|
||||
else:
|
||||
print("spiacente, ma il file csv %s non e' disponibile nel "
|
||||
"percorso indicato: %s" % (d, path_to_csv_files))
|
||||
sys.exit(-1)
|
||||
dati_csv = check_csv(args.csv[0], csv_files, csv_dict)
|
||||
print("daty csv = %s" % dati_csv)
|
||||
dati = []
|
||||
if args.dataset:
|
||||
if "all" in args.dataset:
|
||||
|
@ -132,7 +208,8 @@ def main():
|
|||
for d in args.dataset:
|
||||
dati.append(datasets_ambiente[d])
|
||||
dataframes = get_dataframes(dati_csv, dati, args.sensori)
|
||||
datamerged = merge_df(dataframes, dataframes.keys())
|
||||
datamerged = merge_df(dataframes, list(dataframes.keys()))
|
||||
datamerged.to_csv("export.csv")
|
||||
import stazioni
|
||||
s = stazioni.get_stazioni()
|
||||
for sensore in datamerged.columns[1:]:
|
||||
|
|
Loading…
Reference in New Issue
Block a user