diff --git a/devloop/parse.py b/devloop/parse.py index a7686c5..8f42d3f 100644 --- a/devloop/parse.py +++ b/devloop/parse.py @@ -3,7 +3,7 @@ import os from latecomers.parse import find_table, get_details -with open("./sample.html") as f: +with open("./devloop/sample.html") as f: content = f.read() flights = find_table(content) diff --git a/devloop/serialize.py b/devloop/serialize.py new file mode 100644 index 0000000..74ae9e5 --- /dev/null +++ b/devloop/serialize.py @@ -0,0 +1,16 @@ +# -*- encoding: utf-8 -*- +import os + +from latecomers.parse import find_table, get_details +from latecomers.serializer import to_excel + +with open("./devloop/sample.html") as f: + content = f.read() + +flights = find_table(content) +data = [] + +for f in flights: + data.append(get_details(f, os.environ.get("DEBUG") is not None)) + +print(to_excel(data)) diff --git a/latecomers/parse.py b/latecomers/parse.py index 0a5a502..b3e5302 100644 --- a/latecomers/parse.py +++ b/latecomers/parse.py @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +from dataclasses import dataclass from enum import Enum import re import typing as T @@ -57,6 +58,7 @@ class Status(Enum): return cls.UNKNOWN +@dataclass class Details(object): th_arrival: T.Optional[T.Text] = None real_arrival: T.Optional[T.Text] = None diff --git a/latecomers/serializer.py b/latecomers/serializer.py new file mode 100644 index 0000000..8a25587 --- /dev/null +++ b/latecomers/serializer.py @@ -0,0 +1,36 @@ +# -*- encoding: utf-8 -*- +import logging +from tempfile import NamedTemporaryFile +import typing as T + +from latecomers.parse import Details +from latecomers.helpers import logit + +import pandas as pd + + +logger = logging.getLogger(__name__) + + +@logit(logger) +def to_excel(data: T.List[Details]) -> bytes: + """ + This function takes the list of parsed rows as input and returns + the bytes corresponding to the excel file derived from such lines. + """ + + mapping = { + "th_arrival": "Arrivo teorico", + "real_arrival": "Arrivo reale", + "code": "Codice volo", + "origin": "Aeroporto di partenza", + "status": "Stato", + } + df = pd.DataFrame(data, columns=mapping) + + with NamedTemporaryFile() as tmp: + df.to_excel(tmp) + tmp.seek(0) + content = tmp.read() + + return content diff --git a/pyproject.toml b/pyproject.toml index 061f400..9fa5bb8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ dependencies = [ "requests", "lxml", "pandas", + "openpyxl", ] [tool.setuptools.packages.find]