Skip to content

vendor_helpers

sc_crawler.vendor_helpers #

fetch_servers #

fetch_servers(fn, where, vendor)

Fetch servers of a region/zone.

Parameters:

Name Type Description Default
fn Callable

A function that takes the region or zone id as its first and only argument. The returning list must conform with the Server object, or need to be in a format that preprocess_servers's fn can manage.

required
where str

A Region or Zone api_reference or similar that is passed to fn.

required
vendor Optional[Vendor]

Optional Vendor instance used for logging and progress bar updates.

required
Source code in sc_crawler/vendor_helpers.py
def fetch_servers(fn: Callable, where: str, vendor: Optional[Vendor]) -> List[dict]:
    """Fetch servers of a region/zone.

    Args:
        fn: A function that takes the region or zone id as its first and only argument.
            The returning list must conform with the Server object, or need to
            be in a format that [preprocess_servers][sc_crawler.vendor_helpers.preprocess_servers]'s
            `fn` can manage.
        where: A [Region][sc_crawler.tables.Region] or [Zone][sc_crawler.tables.Zone]
            `api_reference` or similar that is passed to `fn`.
        vendor: Optional [Vendor][sc_crawler.tables.Vendor] instance used for
            logging and progress bar updates.
    """
    servers = fn(where)
    if vendor:
        vendor.log(f"{len(servers)} server(s) found in {where}.")
    if vendor:
        vendor.progress_tracker.advance_task()
    return servers

parallel_fetch_servers #

parallel_fetch_servers(vendor, fn, id_col, by)

Fetch servers of all regions/zones in parallel on 8 threads.

Parameters:

Name Type Description Default
vendor Vendor

Required Vendor instance used for the regions lookup, logging and progress bar updates.

required
fn Callable

A function to be passed to fetch_servers.

required
id_cols

Field name to be used to deduplicate the list of server dicts.

required
by Literal['regions', 'zones']

What objects of the vendor to iterate on.

required
Source code in sc_crawler/vendor_helpers.py
def parallel_fetch_servers(
    vendor: Vendor, fn: Callable, id_col: str, by: Literal["regions", "zones"]
) -> List[dict]:
    """Fetch servers of all regions/zones in parallel on 8 threads.

    Args:
        vendor: Required [Vendor][sc_crawler.tables.Vendor] instance used for
            the regions lookup, logging and progress bar updates.
        fn: A function to be passed to [fetch_servers][sc_crawler.vendor_helpers.fetch_servers].
        id_cols: Field name to be used to deduplicate the list of server dicts.
        by: What objects of the `vendor` to iterate on.
    """

    locations = [
        i.api_reference for i in getattr(vendor, by) if i.status == Status.ACTIVE
    ]
    vendor.progress_tracker.start_task(
        name=f"Scanning {by} for server(s)", total=len(locations)
    )

    with ThreadPoolExecutor(max_workers=8) as executor:
        servers = executor.map(fetch_servers, repeat(fn), locations, repeat(vendor))
    servers = list(chain.from_iterable(servers))

    vendor.log(f"{len(servers)} server(s) found in {len(locations)} {by}.")
    servers = list({s[id_col]: s for s in servers}.values())
    vendor.log(f"{len(servers)} unique server(s) found.")
    vendor.progress_tracker.hide_task()
    return servers

preprocess_servers #

preprocess_servers(servers, vendor, fn)

Preprocess servers before inserting into the database.

Takes a list of dicts and tranform to a list of dicts that follows the Server schema.

Parameters:

Name Type Description Default
servers List[dict]

To be passed to fn.

required
vendor Vendor

The related Vendor instance used for database connection, logging and progress bar updates.

required
fn Callable

A function that takes a server from servers (one-by-one) and the vendor.

required
Source code in sc_crawler/vendor_helpers.py
def preprocess_servers(servers: List[dict], vendor: Vendor, fn: Callable) -> List[dict]:
    """Preprocess servers before inserting into the database.

    Takes a list of dicts and tranform to a list of dicts that
    follows the [Server][sc_crawler.tables.Server] schema.

    Args:
        servers: To be passed to `fn`.
        vendor: The related [Vendor][sc_crawler.tables.Vendor] instance used
            for database connection, logging and progress bar updates.
        fn: A function that takes a server from `servers` (one-by-one) and the `vendor`.
    """
    vendor.progress_tracker.start_task(
        name="Preprocessing server(s)", total=len(servers)
    )
    processed = []
    for server in servers:
        processed.append(fn(server, vendor))
        vendor.progress_tracker.advance_task()
    vendor.progress_tracker.hide_task()
    return processed

add_vendor_id #

add_vendor_id(obj, vendor)

Adds vendor_id field to a dict.

Source code in sc_crawler/vendor_helpers.py
def add_vendor_id(obj: dict, vendor: Vendor) -> dict:
    """Adds `vendor_id` field to a dict."""
    obj["vendor_id"] = vendor.vendor_id
    return obj