Skip to content

description_ingestor

sc_crawler.description_ingestor #

Functions:

Name Description
descriptions_data_path

Download current server description data into a temp folder.

ingest_server_description

Load and validate generated description data for a Server.

ingest_server_descriptions

Load generated description data for all Servers with available output.

descriptions_data_path cached #

descriptions_data_path()

Download current server description data into a temp folder.

Setting the SC_CRAWLER_DESCRIPTIONS_DATA_PATH environment variable will override the default path for persistent/cached description data access.

Source code in sc_crawler/description_ingestor.py
@cache
def descriptions_data_path() -> str | PathLike:
    """Download current server description data into a temp folder.

    Setting the `SC_CRAWLER_DESCRIPTIONS_DATA_PATH` environment variable will
    override the default path for persistent/cached description data access.
    """
    if getenv("SC_CRAWLER_DESCRIPTIONS_DATA_PATH"):
        temp_dir = getenv("SC_CRAWLER_DESCRIPTIONS_DATA_PATH")
        makedirs(temp_dir, exist_ok=True)
    else:
        temp_dir = mkdtemp()
        register(rmtree, temp_dir)
    zip_path = path.join(temp_dir, "downloaded.zip")
    if not path.exists(zip_path):
        response = get(DESCRIPTIONS_ZIP_URL, timeout=60)
        response.raise_for_status()
        with open(zip_path, "wb") as f:
            f.write(response.content)
        with ZipFile(zip_path, "r") as zip_ref:
            zip_ref.extractall(temp_dir)
    return path.join(temp_dir, "sc-navigator-descriptions-main", "data")

ingest_server_description #

ingest_server_description(server)

Load and validate generated description data for a Server.

Source code in sc_crawler/description_ingestor.py
def ingest_server_description(server: "Server") -> dict | None:
    """Load and validate generated description data for a Server."""
    try:
        output = _load_server_description_output(server)
        fields = ServerDescriptionFields.model_validate(output)
        return {**_server_ids(server), **fields.model_dump(mode="json")}
    except Exception as e:
        _log_cannot_load_description(server, e)
        return None

ingest_server_descriptions #

ingest_server_descriptions(servers)

Load generated description data for all Servers with available output.

Source code in sc_crawler/description_ingestor.py
def ingest_server_descriptions(servers: List["Server"]) -> List[dict]:
    """Load generated description data for all Servers with available output."""
    descriptions = []
    for server in servers:
        description = ingest_server_description(server)
        if description is not None:
            descriptions.append(description)
    return descriptions