From 432396c83664b180064ea43beeb752ce06b17fb7 Mon Sep 17 00:00:00 2001 From: Blallo Date: Wed, 7 Sep 2022 23:44:53 +0200 Subject: [PATCH] Integrate FR24 data --- devloop/parse_retrieve.py | 11 ++++--- devloop/retrieve.py | 5 +-- latecomers/main.py | 10 +++--- latecomers/parse.py | 65 +++++++++++++++++++++++++++++++++++++-- latecomers/retrieve.py | 24 ++++++++++++--- latecomers/serializer.py | 1 + 6 files changed, 99 insertions(+), 17 deletions(-) diff --git a/devloop/parse_retrieve.py b/devloop/parse_retrieve.py index d4630f0..ddf30ae 100644 --- a/devloop/parse_retrieve.py +++ b/devloop/parse_retrieve.py @@ -1,12 +1,15 @@ # -*- encoding: utf-8 -*- import os -from latecomers.retrieve import retrieve -from latecomers.parse import find_table, get_details +from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24 +from latecomers.parse import find_table, get_details, parse_fr24 -body = retrieve() +body = retrieve_from_inst() flights = find_table(body) +aux_data = parse_fr24(retrieve_from_fr24()) + +breakpoint() for f in flights: - print(get_details(f, os.environ.get("DEBUG") is not None)) + print(get_details(f, aux_data=aux_data, debug=os.environ.get("DEBUG") is not None)) diff --git a/devloop/retrieve.py b/devloop/retrieve.py index e85886a..d67d543 100644 --- a/devloop/retrieve.py +++ b/devloop/retrieve.py @@ -1,6 +1,7 @@ # -*- encoding: utf-8 -*- -from latecomers.retrieve import retrieve +from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24 -body = retrieve() +body = retrieve_from_inst() +fr24_data = retrieve_from_fr24() print(body) diff --git a/latecomers/main.py b/latecomers/main.py index a1919ad..17c07bd 100644 --- a/latecomers/main.py +++ b/latecomers/main.py @@ -3,8 +3,8 @@ import logging import sys import typing as T -from latecomers.retrieve import retrieve -from latecomers.parse import find_table, get_details, Details +from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24 +from latecomers.parse import find_table, get_details, Details, parse_fr24 from latecomers.serializer import to_excel from latecomers.notifier import Notifier from latecomers.config import Config @@ -23,11 +23,13 @@ def main(config: Config): The main cli entrypoint. """ out = Notifier(**config.smtp) - body = retrieve() + body = retrieve_from_inst() table = find_table(body) + fr24_data = retrieve_from_fr24() + aux_data = parse_fr24(fr24_data) data: T.List[Details] = [] for row in table: - data.append(get_details(row)) + data.append(get_details(row, aux_data)) if not data: out.send_no_data(config.to) diff --git a/latecomers/parse.py b/latecomers/parse.py index fcf1821..ab0ef84 100644 --- a/latecomers/parse.py +++ b/latecomers/parse.py @@ -1,5 +1,6 @@ # -*- encoding: utf-8 -*- from dataclasses import dataclass +from datetime import datetime from enum import Enum import logging import re @@ -71,6 +72,7 @@ class Details(object): code: T.Optional[T.Text] = None origin: T.Optional[T.Text] = None status: Status = Status.UNKNOWN + fr24_landing_time: T.Optional[T.Text] = None def maybe_parse_hour_th(self, h5: et._ElementTree) -> None: """ @@ -101,7 +103,7 @@ class Details(object): return child = h5.xpath(".//strong") if len(child) == 1: - self.code = child[0].text.strip("\t\n ") + self.code = child[0].text.strip("\t\n ").replace(" ", "") def maybe_parse_airport(self, h5: et._ElementTree) -> None: """ @@ -125,20 +127,37 @@ class Details(object): if len(parsed) == 1: self.status = Status.from_str(parsed[0]) + def maybe_add_aux_data(self, aux_data: T.Dict[T.Text, T.Text]) -> None: + """ + This function extends the current data with auxiliary sources (currently + only FlightRadar24 data). + """ + if not self.code: + return + + self.fr24_landing_time = aux_data.get(self.code) + def __str__(self) -> T.Text: res: T.Dict[T.Text, T.Optional[T.Text]] = {} if self.th_arrival: res["theoric"] = self.th_arrival - res["real"] = self.real_arrival + if self.real_arrival: + res["real"] = self.real_arrival if self.code: res["code"] = self.code res["origin"] = self.origin res["status"] = self.status.value + if self.fr24_landing_time: + res["fr24_landing_time"] = self.fr24_landing_time desc = ",".join([f"{k}={v}" for k, v in res.items()]) return f"Detail<{desc}>" -def get_details(table_entry: et._ElementTree, debug: bool = False) -> Details: +def get_details( + table_entry: et._ElementTree, + aux_data: T.Optional[T.Dict[T.Text, T.Text]] = None, + debug: bool = False, +) -> Details: """ Find the dates in a table row. If a strikenthrough time is found, it is returned as second element in the tuple. @@ -166,4 +185,44 @@ def get_details(table_entry: et._ElementTree, debug: bool = False) -> Details: d.maybe_parse_airport(res[3]) d.maybe_parse_status(res[4]) + if aux_data: + d.maybe_add_aux_data(aux_data) + return d + + +def parse_fr24( + data: T.Optional[T.Dict[T.Text, T.Any]] +) -> T.Optional[T.Dict[T.Text, T.Text]]: + """ + This function parses the given FlightRadar24 data into a pandas DataFrame. + """ + if not data: + return None + + try: + results = {} + for flight in data["result"]["response"]["airport"]["pluginData"]["schedule"][ + "arrivals" + ][ + "data" + ]: # noqa: E501 + try: + id_num = flight["flight"]["identification"]["number"] + if (_code := id_num.get("default")): + code = _code + elif (_code := id_num.get("alternative")): + code = _code + else: + # skip if no flight code found + continue + ts = flight["flight"]["time"]["real"]["arrival"] + real_arrival = datetime.fromtimestamp(ts).strftime("%H:%M") + results[code] = real_arrival + except: # noqa: E722 + continue + + return results + + except: # noqa: E722 + return None diff --git a/latecomers/retrieve.py b/latecomers/retrieve.py index 6b00aec..957cad0 100644 --- a/latecomers/retrieve.py +++ b/latecomers/retrieve.py @@ -14,17 +14,33 @@ HOW_MANY = 200 logger = logging.getLogger(__name__) -def remote() -> T.Text: +def remote_inst() -> T.Text: """ - Returns the url to use to retrieve yesterday's data. + Returns the url to retrieve yesterday's data from institutional site. """ yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d") url = f"https://www.adr.it/pax-cia-voli-in-tempo-reale?p_p_id=3_WAR_realtimeflightsportlet&p_p_lifecycle=0&p_p_state=normal&p_p_mode=view&_3_WAR_realtimeflightsportlet_tab=arrival&_3_WAR_realtimeflightsportlet_codScaOpe=CIA&_3_WAR_realtimeflightsportlet_rouIata=&_3_WAR_realtimeflightsportlet_searchType=standard&_3_WAR_realtimeflightsportlet_airport=&_3_WAR_realtimeflightsportlet_date={yesterday}&_3_WAR_realtimeflightsportlet_orario=00:00-24:00&_3_WAR_realtimeflightsportlet_codVet=&_3_WAR_realtimeflightsportlet_carrier=&_3_WAR_realtimeflightsportlet_rtFlightsSearchContainerPrimaryKeys=&_3_WAR_realtimeflightsportlet_delta={HOW_MANY}" # noqa: E501 return url +def remote_fr24() -> T.Text: + """ + Returns the url to retrieve data from flightradar24. + """ + yesterday_ts = int((datetime.now() - timedelta(days=1)).timestamp()) + url = f"https://api.flightradar24.com/common/v1/airport.json?code=cia&plugin[]=&plugin-setting[schedule][mode]=arrivals&plugin-setting[schedule][timestamp]={yesterday_ts}&page=-1&limit=100&fleet=&token=" # noqa: E501 + return url + + @logit(logger) -def retrieve() -> T.Text: +def retrieve_from_inst() -> T.Text: """This function retrieves the body from the website page""" - r = requests.get(remote()) + r = requests.get(remote_inst()) return r.text + + +@logit(logger) +def retrieve_from_fr24() -> T.Dict[T.Text, T.Any]: + """This function retrieves data from flightradar24""" + r = requests.get(remote_fr24(), headers={"User-Agent": "curl/7.85.0"}) + return r.json() diff --git a/latecomers/serializer.py b/latecomers/serializer.py index a96cbf7..3062e95 100644 --- a/latecomers/serializer.py +++ b/latecomers/serializer.py @@ -25,6 +25,7 @@ def to_excel(data: T.List[Details]) -> bytes: "code": "Codice volo", "origin": "Aeroporto di partenza", "status": "Stato", + "fr24_landing_time": "Ora atterraggio (FlightRadar24)", } df = pd.DataFrame(data, columns=mapping) df["status"] = df["status"].map(lambda x: x.value)