From 5182cdeb844b0617c07909371f37a00bed91239d Mon Sep 17 00:00:00 2001 From: Blallo Date: Thu, 25 Aug 2022 21:35:25 +0200 Subject: [PATCH] Improve parsing hours --- latecomers/parse.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/latecomers/parse.py b/latecomers/parse.py index e1a3724..514fef2 100644 --- a/latecomers/parse.py +++ b/latecomers/parse.py @@ -72,17 +72,25 @@ class Details(object): origin: T.Optional[T.Text] = None status: Status = Status.UNKNOWN - def maybe_parse_hour(self, h5: et._ElementTree) -> None: + def maybe_parse_hour_th(self, h5: et._ElementTree) -> None: """ - This function fills the fileds related to the arrival hour, + This function fills the fileds related to the theoric arrival hour, if the input matches some heuristics. """ hour = TIME_RE.findall(h5.text) if len(hour) == 1: + self.th_arrival = hour[0] if "text-decoration: line-through" in h5.attrib.get("style", ""): - self.th_arrival = hour[0] - else: - self.real_arrival = hour[0] + self.real_arrival = None + + def maybe_parse_hour_real(self, h5: et._ElementTree) -> None: + """ + This function fills the fileds related to the theoric arrival hour, + if the input matches some heuristics. + """ + hour = TIME_RE.findall(h5.text) + if len(hour) == 1: + self.real_arrival = hour[0] def maybe_parse_code(self, h5: et._ElementTree) -> None: """ @@ -145,13 +153,13 @@ def get_details(table_entry: et._ElementTree, debug: bool = False) -> Details: d = Details() if len(res) == 5: - d.maybe_parse_hour(res[0]) + d.maybe_parse_hour_th(res[0]) d.maybe_parse_code(res[1]) d.maybe_parse_airport(res[2]) d.maybe_parse_status(res[3]) elif len(res) == 6: - d.maybe_parse_hour(res[0]) - d.maybe_parse_hour(res[1]) + d.maybe_parse_hour_th(res[0]) + d.maybe_parse_hour_real(res[1]) d.maybe_parse_code(res[2]) d.maybe_parse_airport(res[3]) d.maybe_parse_status(res[4])