Skip to content


sc_crawler.logger #

log_start_end #


Log the start and end of the decorated function.

Source code in sc_crawler/
def log_start_end(func):
    """Log the start and end of the decorated function."""

    def wrap(*args, **kwargs):
        # log start of the step
            self = args[0]
            fname = f"{self.vendor_id}/{func.__name__}"
        except Exception:
            fname = func.__name__
        logger.debug("Starting %s", fname)

        # update Vendor's progress bar with the step name
                # drop `inventory_` prefix and prettify
                step=func.__name__[10:].replace("_", " ")
        except Exception:
            logger.error("Cannot update step name in the Vendor's progress bar.")

        # actually run step
        result = func(*args, **kwargs)

        # increment Vendor's progress bar

        # log end of the step and return
        logger.debug("Finished %s", fname)
        return result

    return wrap

ScRichHandler #

Bases: RichHandler

Extend RichHandler with function name logged in the right column.

Source code in sc_crawler/
class ScRichHandler(RichHandler):
    """Extend RichHandler with function name logged in the right column."""

    def render(
        record: logging.LogRecord,
        traceback: Optional[Traceback],
        message_renderable: "ConsoleRenderable",
        path = Path(record.pathname).name + ":" + record.funcName
        level = self.get_level_text(record)
        time_format = None if self.formatter is None else self.formatter.datefmt
        log_time = datetime.fromtimestamp(record.created)

        log_renderable = self._log_render(
            [message_renderable] if not traceback else [message_renderable, traceback],
            link_path=record.pathname if self.enable_link_path else None,
        return log_renderable

VendorProgressTracker #

Tracking the progress of the vendor's inventory updates.

Source code in sc_crawler/
class VendorProgressTracker:
    """Tracking the progress of the vendor's inventory updates."""

    vendor: Vendor
    """A [sc_crawler.tables.Vendor][] instance for which tracking progress."""
    progress_panel: ProgressPanel
    A `rich` panel including progress bars.
    Should not be used directly, see the `vendors`, `tasks` and `metadata` attributes.
    # reexport Progress attributes of the ProgressPanel
    vendors: Progress
    """[rich.progress.Progress][] for tracking the inventory steps of the vendor."""
    tasks: Progress
    """[rich.progress.Progress][] for tracking the lower-level tasks within each step."""
    metadata: Text
    """[rich.text.Text][] metadata, e.g. data sources and records to be udpated."""
    task_ids: List[TaskID] = []
    """List of active task ids for the current `vendor`."""

    def __init__(self, vendor: Vendor, progress_panel: ProgressPanel):
        self.vendor = vendor
        self.progress_panel = progress_panel
        self.vendors = progress_panel.vendors
        self.tasks = progress_panel.tasks
        self.metadata = progress_panel.metadata

    def start_vendor(self, total: int) -> TaskID:
        """Starts a progress bar for the Vendor's steps.

            total: Overall number of steps to show in the progress bar.

            TaskId: The progress bar's identifier to be referenced in future updates.
        return self.vendors.add_task(, total=total, step="")

    def advance_vendor(self, advance: int = 1) -> None:
        """Increment the number of finished steps.

            advance: Number of steps to advance.
        self.vendors.update(self.vendors.task_ids[-1], advance=advance)

    def update_vendor(self, **kwargs) -> None:
        """Update the vendor's progress bar.

        Useful fields:
        - `step`: Name of the currently running step to be shown on the progress bar.
        self.vendors.update(self.vendors.task_ids[-1], **kwargs)

    def start_task(self, name: str, total: int) -> TaskID:
        """Starts a progress bar in the list of current jobs.

        Besides returning the `TaskID`, it will also register in `self.tasks.task_ids`
        as the last task, which will be the default value for future `advance_task`,
        `hide_task` etc calls. The latter will remove the `TaskID` from the `task_ids`.

            name: Name to show in front of the progress bar. Will be prefixed by Vendor's name.
            total: Overall number of steps to show in the progress bar.

            TaskId: The progress bar's identifier to be referenced in future updates.
            self.tasks.add_task( + ": " + name, total=total)
        return self.last_task()

    def last_task(self) -> TaskID:
        """Returh the last registered TaskID."""
        return self.task_ids[-1]

    def advance_task(self, task_id: Optional[TaskID] = None, advance: int = 1):
        """Increment the number of finished steps.

            task_id: The progress bar's identifier returned by `start_task`.
                Defaults to the most recently created task.
            advance: Number of steps to advance.

        self.tasks.update(task_id or self.last_task(), advance=advance)

    def update_task(self, task_id: Optional[TaskID] = None, **kwargs) -> None:
        """Update the task's progress bar.

            task_id: The progress bar's identifier returned by `start_task`.
                Defaults to the most recently created task.

        Keyword Args:
            step (str): Name of the currently running step to be shown on the progress bar.

        See [`rich.progress.Progress.update`][] for further keyword arguments.
        self.tasks.update(task_id or self.last_task(), **kwargs)

    def hide_task(self, task_id: Optional[TaskID] = None):
        """Hide a task from the list of progress bars.

            task_id: The progress bar's identifier returned by `start_task`.
                Defaults to the most recently created task.
        self.tasks.update(task_id or self.last_task(), visible=False)

task_ids class-attribute instance-attribute #

task_ids = []

List of active task ids for the current vendor.

vendor instance-attribute #

vendor = vendor

A sc_crawler.tables.Vendor instance for which tracking progress.

progress_panel instance-attribute #

progress_panel = progress_panel

A rich panel including progress bars. Should not be used directly, see the vendors, tasks and metadata attributes.

vendors instance-attribute #

vendors = vendors

rich.progress.Progress for tracking the inventory steps of the vendor.

tasks instance-attribute #

tasks = tasks

rich.progress.Progress for tracking the lower-level tasks within each step.

metadata instance-attribute #

metadata = metadata

rich.text.Text metadata, e.g. data sources and records to be udpated.

start_vendor #


Starts a progress bar for the Vendor's steps.


Name Type Description Default
total int

Overall number of steps to show in the progress bar.



Name Type Description
TaskId TaskID

The progress bar's identifier to be referenced in future updates.

Source code in sc_crawler/
def start_vendor(self, total: int) -> TaskID:
    """Starts a progress bar for the Vendor's steps.

        total: Overall number of steps to show in the progress bar.

        TaskId: The progress bar's identifier to be referenced in future updates.
    return self.vendors.add_task(, total=total, step="")

advance_vendor #


Increment the number of finished steps.


Name Type Description Default
advance int

Number of steps to advance.

Source code in sc_crawler/
def advance_vendor(self, advance: int = 1) -> None:
    """Increment the number of finished steps.

        advance: Number of steps to advance.
    self.vendors.update(self.vendors.task_ids[-1], advance=advance)

update_vendor #


Update the vendor's progress bar.

Useful fields: - step: Name of the currently running step to be shown on the progress bar.

Source code in sc_crawler/
def update_vendor(self, **kwargs) -> None:
    """Update the vendor's progress bar.

    Useful fields:
    - `step`: Name of the currently running step to be shown on the progress bar.
    self.vendors.update(self.vendors.task_ids[-1], **kwargs)

start_task #

start_task(name, total)

Starts a progress bar in the list of current jobs.

Besides returning the TaskID, it will also register in self.tasks.task_ids as the last task, which will be the default value for future advance_task, hide_task etc calls. The latter will remove the TaskID from the task_ids.


Name Type Description Default
name str

Name to show in front of the progress bar. Will be prefixed by Vendor's name.

total int

Overall number of steps to show in the progress bar.



Name Type Description
TaskId TaskID

The progress bar's identifier to be referenced in future updates.

Source code in sc_crawler/
def start_task(self, name: str, total: int) -> TaskID:
    """Starts a progress bar in the list of current jobs.

    Besides returning the `TaskID`, it will also register in `self.tasks.task_ids`
    as the last task, which will be the default value for future `advance_task`,
    `hide_task` etc calls. The latter will remove the `TaskID` from the `task_ids`.

        name: Name to show in front of the progress bar. Will be prefixed by Vendor's name.
        total: Overall number of steps to show in the progress bar.

        TaskId: The progress bar's identifier to be referenced in future updates.
        self.tasks.add_task( + ": " + name, total=total)
    return self.last_task()

last_task #


Returh the last registered TaskID.

Source code in sc_crawler/
def last_task(self) -> TaskID:
    """Returh the last registered TaskID."""
    return self.task_ids[-1]

advance_task #

advance_task(task_id=None, advance=1)

Increment the number of finished steps.


Name Type Description Default
task_id Optional[TaskID]

The progress bar's identifier returned by start_task. Defaults to the most recently created task.

advance int

Number of steps to advance.

Source code in sc_crawler/
def advance_task(self, task_id: Optional[TaskID] = None, advance: int = 1):
    """Increment the number of finished steps.

        task_id: The progress bar's identifier returned by `start_task`.
            Defaults to the most recently created task.
        advance: Number of steps to advance.

    self.tasks.update(task_id or self.last_task(), advance=advance)

update_task #

update_task(task_id=None, **kwargs)

Update the task's progress bar.


Name Type Description Default
task_id Optional[TaskID]

The progress bar's identifier returned by start_task. Defaults to the most recently created task.


Other Parameters:

Name Type Description
step str

Name of the currently running step to be shown on the progress bar.

See rich.progress.Progress.update for further keyword arguments.

Source code in sc_crawler/
def update_task(self, task_id: Optional[TaskID] = None, **kwargs) -> None:
    """Update the task's progress bar.

        task_id: The progress bar's identifier returned by `start_task`.
            Defaults to the most recently created task.

    Keyword Args:
        step (str): Name of the currently running step to be shown on the progress bar.

    See [`rich.progress.Progress.update`][] for further keyword arguments.
    self.tasks.update(task_id or self.last_task(), **kwargs)

hide_task #


Hide a task from the list of progress bars.


Name Type Description Default
task_id Optional[TaskID]

The progress bar's identifier returned by start_task. Defaults to the most recently created task.

Source code in sc_crawler/
def hide_task(self, task_id: Optional[TaskID] = None):
    """Hide a task from the list of progress bars.

        task_id: The progress bar's identifier returned by `start_task`.
            Defaults to the most recently created task.
    self.tasks.update(task_id or self.last_task(), visible=False)

VoidProgressTracker #

Bases: VendorProgressTracker

Progress tracker reference not doing antyhing.

Source code in sc_crawler/
class VoidProgressTracker(VendorProgressTracker):
    """Progress tracker reference not doing antyhing."""

    def __init__(*args, **kwargs):

    def start_vendor(self, *args, **kwargs):

    def advance_vendor(self, *args, **kwargs):

    def update_vendor(self, *args, **kwargs):

    def start_task(self, *args, **kwargs):

    def last_task(self, *args, **kwargs):

    def advance_task(self, *args, **kwargs):

    def update_task(self, *args, **kwargs):

    def hide_task(self, *args, **kwargs):