Add serializer to write excel output

master
blallo 2022-08-24 12:43:32 +02:00
parent b15fc9cdca
commit 89abbff74d
Signed by: blallo
GPG Key ID: 0CBE577C9B72DC3F
5 changed files with 56 additions and 1 deletions

View File

@ -3,7 +3,7 @@ import os
from latecomers.parse import find_table, get_details
with open("./sample.html") as f:
with open("./devloop/sample.html") as f:
content = f.read()
flights = find_table(content)

View File

@ -0,0 +1,16 @@
# -*- encoding: utf-8 -*-
import os
from latecomers.parse import find_table, get_details
from latecomers.serializer import to_excel
with open("./devloop/sample.html") as f:
content = f.read()
flights = find_table(content)
data = []
for f in flights:
data.append(get_details(f, os.environ.get("DEBUG") is not None))
print(to_excel(data))

View File

@ -1,4 +1,5 @@
# -*- encoding: utf-8 -*-
from dataclasses import dataclass
from enum import Enum
import re
import typing as T
@ -57,6 +58,7 @@ class Status(Enum):
return cls.UNKNOWN
@dataclass
class Details(object):
th_arrival: T.Optional[T.Text] = None
real_arrival: T.Optional[T.Text] = None

View File

@ -0,0 +1,36 @@
# -*- encoding: utf-8 -*-
import logging
from tempfile import NamedTemporaryFile
import typing as T
from latecomers.parse import Details
from latecomers.helpers import logit
import pandas as pd
logger = logging.getLogger(__name__)
@logit(logger)
def to_excel(data: T.List[Details]) -> bytes:
"""
This function takes the list of parsed rows as input and returns
the bytes corresponding to the excel file derived from such lines.
"""
mapping = {
"th_arrival": "Arrivo teorico",
"real_arrival": "Arrivo reale",
"code": "Codice volo",
"origin": "Aeroporto di partenza",
"status": "Stato",
}
df = pd.DataFrame(data, columns=mapping)
with NamedTemporaryFile() as tmp:
df.to_excel(tmp)
tmp.seek(0)
content = tmp.read()
return content

View File

@ -10,6 +10,7 @@ dependencies = [
"requests",
"lxml",
"pandas",
"openpyxl",
]
[tool.setuptools.packages.find]