Iterate over pages
This commit is contained in:
parent
6e56f90b6b
commit
4f67b1c97a
|
@ -2,14 +2,17 @@
|
|||
import os
|
||||
|
||||
from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24
|
||||
from latecomers.parse import find_table, get_details, parse_fr24
|
||||
|
||||
body = retrieve_from_inst()
|
||||
|
||||
flights = find_table(body)
|
||||
|
||||
aux_data = parse_fr24(retrieve_from_fr24())
|
||||
from latecomers.parse import count_pages, find_table, get_details, parse_fr24
|
||||
|
||||
breakpoint()
|
||||
aux_data = parse_fr24(retrieve_from_fr24())
|
||||
|
||||
body = retrieve_from_inst()
|
||||
pages = count_pages(body)
|
||||
flights = find_table(body)
|
||||
for page in range(2, pages + 1):
|
||||
body = retrieve_from_inst(page)
|
||||
flights.extend(find_table(body))
|
||||
|
||||
for f in flights:
|
||||
print(get_details(f, aux_data=aux_data, debug=os.environ.get("DEBUG") is not None))
|
||||
print(get_details(f, aux_data=aux_data))
|
||||
|
|
|
@ -4,7 +4,7 @@ import sys
|
|||
import typing as T
|
||||
|
||||
from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24
|
||||
from latecomers.parse import find_table, get_details, Details, parse_fr24
|
||||
from latecomers.parse import count_pages, find_table, get_details, Details, parse_fr24
|
||||
from latecomers.serializer import to_excel
|
||||
from latecomers.notifier import Notifier
|
||||
from latecomers.config import Config
|
||||
|
@ -23,11 +23,19 @@ def main(config: Config):
|
|||
The main cli entrypoint.
|
||||
"""
|
||||
out = Notifier(**config.smtp)
|
||||
|
||||
body = retrieve_from_inst()
|
||||
pages = count_pages(body)
|
||||
logger.info(f"found all data in {pages} page(s)")
|
||||
table = find_table(body)
|
||||
for page in range(2, pages + 1):
|
||||
body = retrieve_from_inst(page)
|
||||
table.extend(find_table(body))
|
||||
|
||||
fr24_data = retrieve_from_fr24()
|
||||
aux_data = parse_fr24(fr24_data)
|
||||
data: T.List[Details] = []
|
||||
|
||||
for row in table:
|
||||
data.append(get_details(row, aux_data))
|
||||
|
||||
|
|
|
@ -28,6 +28,21 @@ def not_empty(obj: et._Element) -> bool:
|
|||
raise RuntimeError(f"provided argument is of unsupported type: {type(obj)}")
|
||||
|
||||
|
||||
@logit(logger)
|
||||
def count_pages(html_content: T.Text) -> int:
|
||||
"""
|
||||
Count how many pages there are to be accessed
|
||||
"""
|
||||
root = et.fromstring(html_content, parser=PARSER)
|
||||
il_items = root.xpath(
|
||||
"//div[contains(@data-qa-id, 'paginator')]/ul[contains(@class, 'pagination')]/li"
|
||||
)
|
||||
if not il_items:
|
||||
return 1
|
||||
|
||||
return len(il_items) - 2
|
||||
|
||||
|
||||
@logit(logger)
|
||||
def find_table(html_content: T.Text) -> T.List[et._ElementTree]:
|
||||
"""
|
||||
|
@ -110,7 +125,9 @@ class Details(object):
|
|||
This function fills the fileds related to the flight code,
|
||||
if present and the input matches some heuristics.
|
||||
"""
|
||||
code = self.row.xpath(".//td[contains(@class, 'lfr-departure-column-column')]//div/a/strong")
|
||||
code = self.row.xpath(
|
||||
".//td[contains(@class, 'lfr-departure-column-column')]//div/a/strong"
|
||||
)
|
||||
if len(code) != 1:
|
||||
logger.debug("Cannot parse code")
|
||||
return
|
||||
|
@ -121,7 +138,9 @@ class Details(object):
|
|||
This function fills the field for the airport, if the input matches some
|
||||
heuristics.
|
||||
"""
|
||||
airport = self.row.xpath(".//td[contains(@class, 'lfr-flight-departure-column')]/h5")
|
||||
airport = self.row.xpath(
|
||||
".//td[contains(@class, 'lfr-flight-departure-column')]/h5"
|
||||
)
|
||||
if len(airport) != 1:
|
||||
logger.debug("Cannot parse airport")
|
||||
return
|
||||
|
@ -132,7 +151,9 @@ class Details(object):
|
|||
This function fills the filed for the status, if the input matches some
|
||||
heuristics.
|
||||
"""
|
||||
status = self.row.xpath(".//td[contains(@class, 'lfr-flight-status-column')]/h5")
|
||||
status = self.row.xpath(
|
||||
".//td[contains(@class, 'lfr-flight-status-column')]/h5"
|
||||
)
|
||||
if len(status) != 1:
|
||||
logger.debug("Cannot parse status")
|
||||
return
|
||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user