Iterate over pages

master
sfigato 2024-05-06 01:10:43 +02:00
parent 6e56f90b6b
commit 4f67b1c97a
Signed by: blallo
GPG Key ID: C530464EEDCF489A
4 changed files with 49 additions and 17 deletions

View File

@ -2,14 +2,17 @@
import os import os
from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24 from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24
from latecomers.parse import find_table, get_details, parse_fr24 from latecomers.parse import count_pages, find_table, get_details, parse_fr24
body = retrieve_from_inst()
flights = find_table(body)
aux_data = parse_fr24(retrieve_from_fr24())
breakpoint() breakpoint()
aux_data = parse_fr24(retrieve_from_fr24())
body = retrieve_from_inst()
pages = count_pages(body)
flights = find_table(body)
for page in range(2, pages + 1):
body = retrieve_from_inst(page)
flights.extend(find_table(body))
for f in flights: for f in flights:
print(get_details(f, aux_data=aux_data, debug=os.environ.get("DEBUG") is not None)) print(get_details(f, aux_data=aux_data))

View File

@ -4,7 +4,7 @@ import sys
import typing as T import typing as T
from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24 from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24
from latecomers.parse import find_table, get_details, Details, parse_fr24 from latecomers.parse import count_pages, find_table, get_details, Details, parse_fr24
from latecomers.serializer import to_excel from latecomers.serializer import to_excel
from latecomers.notifier import Notifier from latecomers.notifier import Notifier
from latecomers.config import Config from latecomers.config import Config
@ -23,11 +23,19 @@ def main(config: Config):
The main cli entrypoint. The main cli entrypoint.
""" """
out = Notifier(**config.smtp) out = Notifier(**config.smtp)
body = retrieve_from_inst() body = retrieve_from_inst()
pages = count_pages(body)
logger.info(f"found all data in {pages} page(s)")
table = find_table(body) table = find_table(body)
for page in range(2, pages + 1):
body = retrieve_from_inst(page)
table.extend(find_table(body))
fr24_data = retrieve_from_fr24() fr24_data = retrieve_from_fr24()
aux_data = parse_fr24(fr24_data) aux_data = parse_fr24(fr24_data)
data: T.List[Details] = [] data: T.List[Details] = []
for row in table: for row in table:
data.append(get_details(row, aux_data)) data.append(get_details(row, aux_data))

View File

@ -28,6 +28,21 @@ def not_empty(obj: et._Element) -> bool:
raise RuntimeError(f"provided argument is of unsupported type: {type(obj)}") raise RuntimeError(f"provided argument is of unsupported type: {type(obj)}")
@logit(logger)
def count_pages(html_content: T.Text) -> int:
"""
Count how many pages there are to be accessed
"""
root = et.fromstring(html_content, parser=PARSER)
il_items = root.xpath(
"//div[contains(@data-qa-id, 'paginator')]/ul[contains(@class, 'pagination')]/li"
)
if not il_items:
return 1
return len(il_items) - 2
@logit(logger) @logit(logger)
def find_table(html_content: T.Text) -> T.List[et._ElementTree]: def find_table(html_content: T.Text) -> T.List[et._ElementTree]:
""" """
@ -110,7 +125,9 @@ class Details(object):
This function fills the fileds related to the flight code, This function fills the fileds related to the flight code,
if present and the input matches some heuristics. if present and the input matches some heuristics.
""" """
code = self.row.xpath(".//td[contains(@class, 'lfr-departure-column-column')]//div/a/strong") code = self.row.xpath(
".//td[contains(@class, 'lfr-departure-column-column')]//div/a/strong"
)
if len(code) != 1: if len(code) != 1:
logger.debug("Cannot parse code") logger.debug("Cannot parse code")
return return
@ -121,7 +138,9 @@ class Details(object):
This function fills the field for the airport, if the input matches some This function fills the field for the airport, if the input matches some
heuristics. heuristics.
""" """
airport = self.row.xpath(".//td[contains(@class, 'lfr-flight-departure-column')]/h5") airport = self.row.xpath(
".//td[contains(@class, 'lfr-flight-departure-column')]/h5"
)
if len(airport) != 1: if len(airport) != 1:
logger.debug("Cannot parse airport") logger.debug("Cannot parse airport")
return return
@ -132,7 +151,9 @@ class Details(object):
This function fills the filed for the status, if the input matches some This function fills the filed for the status, if the input matches some
heuristics. heuristics.
""" """
status = self.row.xpath(".//td[contains(@class, 'lfr-flight-status-column')]/h5") status = self.row.xpath(
".//td[contains(@class, 'lfr-flight-status-column')]/h5"
)
if len(status) != 1: if len(status) != 1:
logger.debug("Cannot parse status") logger.debug("Cannot parse status")
return return

File diff suppressed because one or more lines are too long