Skip to content

DorsalFileCollection

dorsal.file.collection.DorsalFileCollection

DorsalFileCollection(
    collection_id,
    *,
    client=None,
    _metadata=None,
    _files=None,
    _pagination=None
)

Bases: _BaseFileCollection

Represents and interacts with a remote File Collection on DorsalHub.

Initializes a remote file collection.

Parameters:

Name Type Description Default
collection_id str

The unique ID of the collection to fetch.

required
client DorsalClient | None

An optional DorsalClient instance.

None
Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def __init__(
    self,
    collection_id: str,
    *,
    client: DorsalClient | None = None,
    _metadata: FileCollection | None = None,
    _files: list[DorsalFile] | None = None,
    _pagination: Pagination | None = None,
):
    """
    Initializes a remote file collection.

    Args:
        collection_id: The unique ID of the collection to fetch.
        client: An optional DorsalClient instance.
    """
    self._client: DorsalClient = client or get_shared_dorsal_client()
    self.collection_id: str = collection_id

    if _metadata and _files is not None and _pagination:
        self.metadata = _metadata
        self.files = _files
        self.pagination = _pagination
    else:
        response = self._client.get_collection(self.collection_id, hydrate=True)
        self._update_from_response(response)

    super().__init__(
        files=self.files,
        source_info={"type": "remote", "collection_id": self.collection_id},
    )
    self._is_populated = False
    logger.info(
        f"Initialized collection '{self.metadata.name}' ({self.collection_id}) with {len(self.files)} files."
    )

add_files

add_files(hashes)

Adds a list of files to this remote collection by their hash.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def add_files(self, hashes: list[str]) -> dict:
    """Adds a list of files to this remote collection by their hash."""
    response = self._client.add_files_to_collection(self.collection_id, hashes)
    self.refresh()
    return response.model_dump()

delete

delete()

Deletes the entire remote collection.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def delete(self) -> None:
    """Deletes the entire remote collection."""
    self._client.delete_collections(collection_ids=[self.collection_id])
    logger.info(f"Collection '{self.metadata.name}' ({self.collection_id}) has been deleted.")

fetch_page

fetch_page(page)

Fetches a specific page of file records, updating the collection in-place. ...

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def fetch_page(self, page: int) -> Self:
    """
    Fetches a specific page of file records, updating the collection in-place.
    ...
    """
    if not self.pagination or page < 1 or page > self.pagination.page_count:
        raise ValueError(f"Page number must be between 1 and {getattr(self.pagination, 'page_count', 1)}")

    response = self._client.get_collection(
        self.collection_id,
        page=page,
        per_page=self.pagination.per_page,
        hydrate=True,
    )
    self._update_from_response(response)
    logger.info(
        f"Fetched page {page}. Displaying items {self.pagination.start_index} to {self.pagination.end_index}."
    )
    return self

from_id classmethod

from_id(collection_id, client=None)

Explicitly create a DorsalFileCollection from a collection ID.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
@classmethod
def from_id(cls, collection_id: str, client: DorsalClient | None = None) -> Self:
    """Explicitly create a DorsalFileCollection from a collection ID."""
    return cls(collection_id, client=client)

from_id_metadata_only classmethod

from_id_metadata_only(collection_id, client=None)

Create collection with no files. Useful for management operations via the CLI.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
@classmethod
def from_id_metadata_only(cls, collection_id: str, client: DorsalClient | None = None) -> Self:
    """Create collection with no files. Useful for management operations via the CLI."""
    client_instance = client or get_shared_dorsal_client()

    response = client_instance.get_collection(collection_id, hydrate=False, per_page=0)

    instance = cls(
        collection_id=collection_id,
        client=client_instance,
        _metadata=response.collection,
        _files=[],
        _pagination=response.pagination,
    )
    return instance

from_remote classmethod

from_remote(
    collection_id,
    client=None,
    use_export=False,
    poll_interval=5,
    timeout=3600,
    console=None,
    palette=None,
)

Creates and returns a new, fully populated DorsalFileCollection.

This is a convenience method that initializes the collection and immediately calls .populate().

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
@classmethod
def from_remote(
    cls,
    collection_id: str,
    client: DorsalClient | None = None,
    use_export: bool = False,
    poll_interval: int = 5,
    timeout: int | None = 3600,
    console: "Console | None" = None,
    palette: dict | None = None,
) -> Self:
    """
    Creates and returns a new, fully populated DorsalFileCollection.

    This is a convenience method that initializes the collection and
    immediately calls .populate().
    """
    instance = cls(collection_id, client=client)
    instance.populate(
        use_export=use_export,
        poll_interval=poll_interval,
        timeout=timeout,
        console=console,
        palette=palette,
    )
    return instance

list_collections classmethod

list_collections(client=None, page=1, per_page=50)

List available remote collections for the user and return them as (unpopulated) DorsalFileCollection instances.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
@classmethod
def list_collections(cls, client: DorsalClient | None = None, page: int = 1, per_page: int = 50) -> list[Self]:
    """
    List available remote collections for the user and return them as (unpopulated) DorsalFileCollection instances.
    """
    client_instance = client or get_shared_dorsal_client()
    response = client_instance.list_collections(page=page, per_page=per_page)

    collections = []
    for collection_metadata in response.records:
        initial_pagination = Pagination(
            current_page=0,
            record_count=collection_metadata.file_count,
            page_count=0,
            per_page=0,
            has_next=collection_metadata.file_count > 0,
            has_prev=False,
            start_index=0,
            end_index=0,
        )

        instance = cls(
            collection_id=collection_metadata.collection_id,
            client=client_instance,
            _metadata=collection_metadata,
            _files=[],
            _pagination=initial_pagination,
        )
        collections.append(instance)

    return collections

make_private

make_private()

Makes the remote collection private.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def make_private(self) -> str:
    """Makes the remote collection private."""
    response = self._client.make_collection_private(self.collection_id)
    self.refresh()
    return response.location_url

make_public

make_public()

Makes the remote collection public.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def make_public(self) -> str:
    """Makes the remote collection public."""
    response = self._client.make_collection_public(self.collection_id)
    self.refresh()
    return response.location_url

populate

populate(
    use_export=False,
    poll_interval=5,
    timeout=3600,
    console=None,
    palette=None,
)

Populates the collection with all of its file records from the remote server.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def populate(
    self,
    use_export: bool = False,
    poll_interval: int = 5,
    timeout: int | None = 3600,
    console: "Console | None" = None,
    palette: dict | None = None,
) -> Self:
    """
    Populates the collection with all of its file records from the remote server.
    """
    logger.info(f"Requesting to fully populate collection '{self.collection_id}'...")

    collection_metadata = self.metadata
    total_files = collection_metadata.file_count
    dorsal_files: list[DorsalFile] = []

    if total_files > PAGINATION_RECORD_LIMIT and not use_export:
        raise DorsalClientError(
            f"Collection has {total_files} files, which exceeds the pagination limit of {PAGINATION_RECORD_LIMIT}. "
            f"To populate a large collection, you must use the server-side export feature. "
            f"Please re-run with `.populate(use_export=True)`."
        )

    if total_files > PAGINATION_RECORD_LIMIT and use_export:
        logger.info(f"Collection is large ({total_files} files). Using efficient server-side export...")
        with tempfile.NamedTemporaryFile(mode="w+", delete=True, suffix=".json") as tmp:
            self._client.export_collection(
                collection_id=self.collection_id,
                output_path=tmp.name,
                poll_interval=poll_interval,
                timeout=timeout,
                console=console,
                palette=palette,
            )
            tmp.seek(0)
            export_data = json.load(tmp)
        files_data = export_data.get("results", [])
        dorsal_files = [DorsalFile.from_record(FileRecordDateTime(**data), self._client) for data in files_data]
    else:
        logger.info(f"Collection has {total_files} files. Using paginated download...")
        per_page = 500
        total_pages = (total_files + per_page - 1) // per_page
        page_iterator = range(1, total_pages + 1)
        rich_progress = None

        iterator: Iterable[int]
        if is_jupyter_environment():
            iterator = tqdm(page_iterator, desc="Fetching pages", total=total_pages)
        elif console:
            from rich.progress import (
                Progress,
                BarColumn,
                TaskProgressColumn,
                MofNCompleteColumn,
                TextColumn,
                TimeElapsedColumn,
                TimeRemainingColumn,
            )
            from dorsal.cli.themes.palettes import DEFAULT_PALETTE

            active_palette = palette if palette is not None else DEFAULT_PALETTE
            progress_columns = (
                TextColumn(
                    "[progress.description]{task.description}",
                    style=active_palette.get("progress_description", "default"),
                ),
                BarColumn(
                    bar_width=None,
                    style=active_palette.get("progress_bar", "default"),
                ),
                TaskProgressColumn(style=active_palette.get("progress_percentage", "default")),
                MofNCompleteColumn(),
                TextColumn("•", style="dim"),
                TimeElapsedColumn(),
                TextColumn("•", style="dim"),
                TimeRemainingColumn(),
            )
            rich_progress = Progress(
                *progress_columns,
                console=console,
                redirect_stdout=True,
                redirect_stderr=True,
            )
            task_id = rich_progress.add_task("Fetching pages...", total=total_pages)
            iterator = page_iterator
        else:
            iterator = page_iterator

        with rich_progress if rich_progress else open(os.devnull, "w"):
            for page_num in iterator:
                response = self._client.get_collection(
                    collection_id=self.collection_id,
                    page=page_num,
                    per_page=per_page,
                    hydrate=True,
                )
                if not response.files:
                    break
                dorsal_files.extend([DorsalFile.from_record(rec, self._client) for rec in response.files])
                if rich_progress:
                    rich_progress.update(task_id, advance=1)

    self.files = dorsal_files
    self.pagination = Pagination(
        current_page=1,
        record_count=total_files,
        page_count=1,
        per_page=total_files,
        has_next=False,
        has_prev=False,
        start_index=1 if total_files > 0 else 0,
        end_index=total_files,
    )

    self._is_populated = True

    logger.info(f"Successfully populated collection with {len(self.files)} file records.")
    return self

refresh

refresh()

Refreshes the collection's metadata and re-fetches the current page.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def refresh(self) -> Self:
    """Refreshes the collection's metadata and re-fetches the current page."""
    current_page = self.pagination.current_page if self.pagination else 1
    return self.fetch_page(current_page)

remove_files

remove_files(hashes)

Removes a list of files from this remote collection by their hash. This does not delete the file records themselves from DorsalHub.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def remove_files(self, hashes: list[str]) -> dict:
    """
    Removes a list of files from this remote collection by their hash.
    This does not delete the file records themselves from DorsalHub.
    """
    response = self._client.remove_files_from_collection(self.collection_id, hashes)
    self.refresh()
    return response.model_dump()

to_csv

to_csv(file_path, force=False)

Exports the collection's metadata to a CSV file.

Raises an error if the collection is not fully populated, unless force=True.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def to_csv(self, file_path: str, force: bool = False) -> None:
    """
    Exports the collection's metadata to a CSV file.

    Raises an error if the collection is not fully populated, unless `force=True`.
    """
    self._check_if_populated(force)
    super().to_csv(file_path=file_path)

to_dataframe

to_dataframe(force=False)

Exports the collection's metadata to a pandas DataFrame.

Raises an error if the collection is not fully populated, unless force=True.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def to_dataframe(self, force: bool = False):  # pragma: no cover
    """
    Exports the collection's metadata to a pandas DataFrame.

    Raises an error if the collection is not fully populated, unless `force=True`.
    """
    self._check_if_populated(force)
    return super().to_dataframe()

to_dict

to_dict(
    by_alias=True,
    exclude_none=True,
    exclude=None,
    force=False,
)

Serializes the collection to a dictionary.

Raises an error if the collection is not fully populated, unless force=True.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def to_dict(
    self,
    by_alias: bool = True,
    exclude_none: bool = True,
    exclude: dict | set | None = None,
    force: bool = False,
) -> dict:
    """
    Serializes the collection to a dictionary.

    Raises an error if the collection is not fully populated, unless `force=True`.
    """
    self._check_if_populated(force)
    return super().to_dict(by_alias=by_alias, exclude_none=exclude_none, exclude=exclude)

to_json

to_json(
    filepath=None,
    indent=2,
    by_alias=True,
    exclude_none=True,
    exclude=None,
    force=False,
)

Saves the collection data to a JSON file or returns it as a string.

Raises an error if the collection is not fully populated, unless force=True.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def to_json(
    self,
    filepath: str | None = None,
    indent: int | None = 2,
    by_alias: bool = True,
    exclude_none: bool = True,
    exclude: dict | set | None = None,
    force: bool = False,
) -> str | None:
    """
    Saves the collection data to a JSON file or returns it as a string.

    Raises an error if the collection is not fully populated, unless `force=True`.
    """
    self._check_if_populated(force)
    return super().to_json(
        filepath=filepath,
        indent=indent,
        by_alias=by_alias,
        exclude_none=exclude_none,
        exclude=exclude,
    )

to_sqlite

to_sqlite(db_path, table_name='files', force=False)

Exports the collection's data to a table in an SQLite database.

Raises an error if the collection is not fully populated, unless force=True.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def to_sqlite(self, db_path: str, table_name: str = "files", force: bool = False) -> None:
    """
    Exports the collection's data to a table in an SQLite database.

    Raises an error if the collection is not fully populated, unless `force=True`.
    """
    self._check_if_populated(force)
    super().to_sqlite(db_path=db_path, table_name=table_name)

update

update(name=None, description=None)

Updates the metadata of the remote collection.

Source code in venv/lib/python3.13/site-packages/dorsal/file/collection/remote.py
def update(
    self,
    name: str | None = None,
    description: str | None = None,
) -> Self:
    """Updates the metadata of the remote collection."""
    updated_metadata = self._client.update_collection(
        collection_id=self.collection_id, name=name, description=description
    )
    self.metadata = updated_metadata
    return self