Iterate over pages

master
blallo 2024-05-06 01:10:43 +02:00
parent 6e56f90b6b
commit 4f67b1c97a
Signed by: blallo
GPG Key ID: C530464EEDCF489A
4 changed files with 49 additions and 17 deletions

View File

@ -2,14 +2,17 @@
import os
from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24
from latecomers.parse import find_table, get_details, parse_fr24
body = retrieve_from_inst()
flights = find_table(body)
aux_data = parse_fr24(retrieve_from_fr24())
from latecomers.parse import count_pages, find_table, get_details, parse_fr24
breakpoint()
aux_data = parse_fr24(retrieve_from_fr24())
body = retrieve_from_inst()
pages = count_pages(body)
flights = find_table(body)
for page in range(2, pages + 1):
body = retrieve_from_inst(page)
flights.extend(find_table(body))
for f in flights:
print(get_details(f, aux_data=aux_data, debug=os.environ.get("DEBUG") is not None))
print(get_details(f, aux_data=aux_data))

View File

@ -4,7 +4,7 @@ import sys
import typing as T
from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24
from latecomers.parse import find_table, get_details, Details, parse_fr24
from latecomers.parse import count_pages, find_table, get_details, Details, parse_fr24
from latecomers.serializer import to_excel
from latecomers.notifier import Notifier
from latecomers.config import Config
@ -23,11 +23,19 @@ def main(config: Config):
The main cli entrypoint.
"""
out = Notifier(**config.smtp)
body = retrieve_from_inst()
pages = count_pages(body)
logger.info(f"found all data in {pages} page(s)")
table = find_table(body)
for page in range(2, pages + 1):
body = retrieve_from_inst(page)
table.extend(find_table(body))
fr24_data = retrieve_from_fr24()
aux_data = parse_fr24(fr24_data)
data: T.List[Details] = []
for row in table:
data.append(get_details(row, aux_data))

View File

@ -28,6 +28,21 @@ def not_empty(obj: et._Element) -> bool:
raise RuntimeError(f"provided argument is of unsupported type: {type(obj)}")
@logit(logger)
def count_pages(html_content: T.Text) -> int:
"""
Count how many pages there are to be accessed
"""
root = et.fromstring(html_content, parser=PARSER)
il_items = root.xpath(
"//div[contains(@data-qa-id, 'paginator')]/ul[contains(@class, 'pagination')]/li"
)
if not il_items:
return 1
return len(il_items) - 2
@logit(logger)
def find_table(html_content: T.Text) -> T.List[et._ElementTree]:
"""
@ -110,7 +125,9 @@ class Details(object):
This function fills the fileds related to the flight code,
if present and the input matches some heuristics.
"""
code = self.row.xpath(".//td[contains(@class, 'lfr-departure-column-column')]//div/a/strong")
code = self.row.xpath(
".//td[contains(@class, 'lfr-departure-column-column')]//div/a/strong"
)
if len(code) != 1:
logger.debug("Cannot parse code")
return
@ -121,7 +138,9 @@ class Details(object):
This function fills the field for the airport, if the input matches some
heuristics.
"""
airport = self.row.xpath(".//td[contains(@class, 'lfr-flight-departure-column')]/h5")
airport = self.row.xpath(
".//td[contains(@class, 'lfr-flight-departure-column')]/h5"
)
if len(airport) != 1:
logger.debug("Cannot parse airport")
return
@ -132,7 +151,9 @@ class Details(object):
This function fills the filed for the status, if the input matches some
heuristics.
"""
status = self.row.xpath(".//td[contains(@class, 'lfr-flight-status-column')]/h5")
status = self.row.xpath(
".//td[contains(@class, 'lfr-flight-status-column')]/h5"
)
if len(status) != 1:
logger.debug("Cannot parse status")
return

File diff suppressed because one or more lines are too long