Iterate over pages
This commit is contained in:
parent
6e56f90b6b
commit
4f67b1c97a
|
@ -2,14 +2,17 @@
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24
|
from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24
|
||||||
from latecomers.parse import find_table, get_details, parse_fr24
|
from latecomers.parse import count_pages, find_table, get_details, parse_fr24
|
||||||
|
|
||||||
body = retrieve_from_inst()
|
|
||||||
|
|
||||||
flights = find_table(body)
|
|
||||||
|
|
||||||
aux_data = parse_fr24(retrieve_from_fr24())
|
|
||||||
|
|
||||||
breakpoint()
|
breakpoint()
|
||||||
|
aux_data = parse_fr24(retrieve_from_fr24())
|
||||||
|
|
||||||
|
body = retrieve_from_inst()
|
||||||
|
pages = count_pages(body)
|
||||||
|
flights = find_table(body)
|
||||||
|
for page in range(2, pages + 1):
|
||||||
|
body = retrieve_from_inst(page)
|
||||||
|
flights.extend(find_table(body))
|
||||||
|
|
||||||
for f in flights:
|
for f in flights:
|
||||||
print(get_details(f, aux_data=aux_data, debug=os.environ.get("DEBUG") is not None))
|
print(get_details(f, aux_data=aux_data))
|
||||||
|
|
|
@ -4,7 +4,7 @@ import sys
|
||||||
import typing as T
|
import typing as T
|
||||||
|
|
||||||
from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24
|
from latecomers.retrieve import retrieve_from_inst, retrieve_from_fr24
|
||||||
from latecomers.parse import find_table, get_details, Details, parse_fr24
|
from latecomers.parse import count_pages, find_table, get_details, Details, parse_fr24
|
||||||
from latecomers.serializer import to_excel
|
from latecomers.serializer import to_excel
|
||||||
from latecomers.notifier import Notifier
|
from latecomers.notifier import Notifier
|
||||||
from latecomers.config import Config
|
from latecomers.config import Config
|
||||||
|
@ -23,11 +23,19 @@ def main(config: Config):
|
||||||
The main cli entrypoint.
|
The main cli entrypoint.
|
||||||
"""
|
"""
|
||||||
out = Notifier(**config.smtp)
|
out = Notifier(**config.smtp)
|
||||||
|
|
||||||
body = retrieve_from_inst()
|
body = retrieve_from_inst()
|
||||||
|
pages = count_pages(body)
|
||||||
|
logger.info(f"found all data in {pages} page(s)")
|
||||||
table = find_table(body)
|
table = find_table(body)
|
||||||
|
for page in range(2, pages + 1):
|
||||||
|
body = retrieve_from_inst(page)
|
||||||
|
table.extend(find_table(body))
|
||||||
|
|
||||||
fr24_data = retrieve_from_fr24()
|
fr24_data = retrieve_from_fr24()
|
||||||
aux_data = parse_fr24(fr24_data)
|
aux_data = parse_fr24(fr24_data)
|
||||||
data: T.List[Details] = []
|
data: T.List[Details] = []
|
||||||
|
|
||||||
for row in table:
|
for row in table:
|
||||||
data.append(get_details(row, aux_data))
|
data.append(get_details(row, aux_data))
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,21 @@ def not_empty(obj: et._Element) -> bool:
|
||||||
raise RuntimeError(f"provided argument is of unsupported type: {type(obj)}")
|
raise RuntimeError(f"provided argument is of unsupported type: {type(obj)}")
|
||||||
|
|
||||||
|
|
||||||
|
@logit(logger)
|
||||||
|
def count_pages(html_content: T.Text) -> int:
|
||||||
|
"""
|
||||||
|
Count how many pages there are to be accessed
|
||||||
|
"""
|
||||||
|
root = et.fromstring(html_content, parser=PARSER)
|
||||||
|
il_items = root.xpath(
|
||||||
|
"//div[contains(@data-qa-id, 'paginator')]/ul[contains(@class, 'pagination')]/li"
|
||||||
|
)
|
||||||
|
if not il_items:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
return len(il_items) - 2
|
||||||
|
|
||||||
|
|
||||||
@logit(logger)
|
@logit(logger)
|
||||||
def find_table(html_content: T.Text) -> T.List[et._ElementTree]:
|
def find_table(html_content: T.Text) -> T.List[et._ElementTree]:
|
||||||
"""
|
"""
|
||||||
|
@ -110,7 +125,9 @@ class Details(object):
|
||||||
This function fills the fileds related to the flight code,
|
This function fills the fileds related to the flight code,
|
||||||
if present and the input matches some heuristics.
|
if present and the input matches some heuristics.
|
||||||
"""
|
"""
|
||||||
code = self.row.xpath(".//td[contains(@class, 'lfr-departure-column-column')]//div/a/strong")
|
code = self.row.xpath(
|
||||||
|
".//td[contains(@class, 'lfr-departure-column-column')]//div/a/strong"
|
||||||
|
)
|
||||||
if len(code) != 1:
|
if len(code) != 1:
|
||||||
logger.debug("Cannot parse code")
|
logger.debug("Cannot parse code")
|
||||||
return
|
return
|
||||||
|
@ -121,7 +138,9 @@ class Details(object):
|
||||||
This function fills the field for the airport, if the input matches some
|
This function fills the field for the airport, if the input matches some
|
||||||
heuristics.
|
heuristics.
|
||||||
"""
|
"""
|
||||||
airport = self.row.xpath(".//td[contains(@class, 'lfr-flight-departure-column')]/h5")
|
airport = self.row.xpath(
|
||||||
|
".//td[contains(@class, 'lfr-flight-departure-column')]/h5"
|
||||||
|
)
|
||||||
if len(airport) != 1:
|
if len(airport) != 1:
|
||||||
logger.debug("Cannot parse airport")
|
logger.debug("Cannot parse airport")
|
||||||
return
|
return
|
||||||
|
@ -132,7 +151,9 @@ class Details(object):
|
||||||
This function fills the filed for the status, if the input matches some
|
This function fills the filed for the status, if the input matches some
|
||||||
heuristics.
|
heuristics.
|
||||||
"""
|
"""
|
||||||
status = self.row.xpath(".//td[contains(@class, 'lfr-flight-status-column')]/h5")
|
status = self.row.xpath(
|
||||||
|
".//td[contains(@class, 'lfr-flight-status-column')]/h5"
|
||||||
|
)
|
||||||
if len(status) != 1:
|
if len(status) != 1:
|
||||||
logger.debug("Cannot parse status")
|
logger.debug("Cannot parse status")
|
||||||
return
|
return
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user