Skip to content

LocalFile

dorsal.file.dorsal_file.LocalFile

LocalFile(
    file_path,
    client=None,
    model_runner_pipeline="default",
    use_cache=True,
    overwrite_cache=False,
    offline=False,
    _file_record=None,
)

Bases: _DorsalFile

Parameters:

Name Type Description Default
file_path str

Absolute or relative path to the local file.

required
client DorsalClient | None

An optional DorsalClient instance to use for push() operations. If None, a globally shared DorsalClient instance will be used by push(). Ignored if offline is True.

None
model_runner_pipeline str | list[dict[str, Any]] | None

Optional configuration for the ModelRunner instance.

'default'
use_cache bool

Whether to use the local cache to speed up processing. Defaults to True.

True
overwrite_cache bool

Whether to run the full pipeline and overwrite the cache result. Defaults to False

False
offline bool

If True, puts the instance in Offline Mode. Blocks network calls from LocalFile.

False

Raises:

Type Description
FileNotFoundError

If the file_path does not exist or is not a file.

IOError

If there are issues reading the file.

DorsalClientError

If model runner encounters an issue that it wraps.

TypeError

If file_path is not a string.

Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def __init__(
    self,
    file_path: str,
    client: DorsalClient | None = None,
    model_runner_pipeline: str | list[dict[str, Any]] | None = "default",
    use_cache: bool = True,
    overwrite_cache: bool = False,
    offline: bool = False,
    _file_record: FileRecordStrict | None = None,
):
    """
    Args:
        file_path: Absolute or relative path to the local file.
        client: An optional DorsalClient instance to use for `push()` operations.
                If None, a globally shared DorsalClient instance will be used by `push()`.
                Ignored if `offline` is True.
        model_runner_pipeline: Optional configuration for the ModelRunner instance.
        use_cache: Whether to use the local cache to speed up processing. Defaults to True.
        overwrite_cache: Whether to run the full pipeline *and* overwrite the cache result. Defaults to False
        offline: If True, puts the instance in Offline Mode. Blocks network calls from `LocalFile`.

    Raises:
        FileNotFoundError: If the file_path does not exist or is not a file.
        IOError: If there are issues reading the file.
        DorsalClientError: If model runner encounters an issue that it wraps.
        TypeError: If file_path is not a string.
    """
    from dorsal.file.metadata_reader import MetadataReader

    if not isinstance(file_path, str):
        raise TypeError(f"file_path must be a string, got {type(file_path).__name__}")

    self.offline = offline or is_offline_mode()

    if self.offline:
        self._client = None
        if client is not None:
            logger.warning("LocalFile initialized in OFFLINE mode. The provided 'client' will be ignored.")
    else:
        self._client = client

    self._file_path: str = file_path
    self._use_cache = use_cache
    self._overwrite_cache = overwrite_cache

    if _file_record is None:
        self._metadata_reader = MetadataReader(client=self._client, model_config=model_runner_pipeline)
        logger.debug("LocalFile init: Generating record for local file at '%s'.", file_path)
        file_record_model = self._generate_record()
    else:
        self._metadata_reader = None
        file_record_model = _file_record
        logger.debug("LocalFile init: Loaded from injected record for '%s'.", file_path)

    self._source = file_record_model.source

    stat = pathlib.Path(file_path).stat()
    self.date_modified = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()

    if hasattr(stat, "st_birthtime"):  # type: ignore[attr-defined]
        self.date_created = datetime.datetime.fromtimestamp(
            stat.st_birthtime  # type: ignore[attr-defined]
        ).astimezone()
    else:
        self.date_created = datetime.datetime.fromtimestamp(stat.st_ctime).astimezone()

    super().__init__(file_record=file_record_model)
    logger.debug(
        "LocalFile for path '%s' (hash: %s) initialized successfully.",
        file_path,
        self.hash,
    )

model instance-attribute

model

Represents a file on the local filesystem.

Triggers an offline metadata extraction pipeline that generates/infers metadata for this file. Includes methods for updating, managing and indexing (to DorsalHub) the file metadata.

Attributes:

Name Type Description
hash str

The primary SHA-256 hash of the file content.

name str

The base name of the file.

size int

The file size in bytes.

media_type str

The detected media type of the file.

tags list[FileTag]

A list of tags associated with the file.

annotations object

A container for detailed metadata records. Specific annotations like pdf or mediainfo can be accessed as attributes on this object (e.g., local_file.pdf.page_count).

Example
from dorsal import LocalFile

# This line processes the file and populates its metadata.
local_file = LocalFile("path/to/my/document.pdf")

# Strict Offline usage (Blocks all network calls e.g. for validation or indexing)
offline_file = LocalFile("path/to/doc.pdf", offline=True)

print(f"Hashed {local_file.name} ({local_file.size} bytes)")

if local_file.pdf:
    print(f"It has {local_file.pdf.page_count} pages.")

add_classification

add_classification(
    labels,
    *,
    vocabulary=None,
    source=None,
    score_explanation=None,
    vocabulary_url=None,
    private=True,
    overwrite=False,
    api_key=None,
    ignore_linter_errors=False,
    force=False
)

Adds an 'open/classification' annotation to the file.

Parameters:

Name Type Description Default
- labels

can be simple strings (e.g., ["cat"]) or dictionaries

required
example

Only labels

lf.add_classification(labels=["EXPIRED", "COMPLETED"])

Labels with vocabulary

lf.add_classification(labels=["eng"], vocabulary=["eng", "fra", "deu"])

Labels, vocabulary, attributes and source

lf.add_classification( labels=[ { "label": "SENSITIVE", "score": 0.95, "attributes": { "page_number": 22, "context": "This document contains sensitive information" } } ], vocabulary=["SENSITIVE", "INTERNAL", "PUBLIC"], source="MySensitiveDocumentScannerV1.0" )

Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def add_classification(
    self,
    labels: list[str | ClassificationLabel],
    *,
    vocabulary: list[str] | None = None,
    source: str | None = None,
    score_explanation: str | None = None,
    vocabulary_url: str | None = None,
    private: bool = True,
    overwrite: bool = False,
    api_key: str | None = None,
    ignore_linter_errors: bool = False,
    force: bool = False,
) -> LocalFile:
    """
    Adds an 'open/classification' annotation to the file.

    Args:
        - labels: can be simple strings (e.g., ["cat"]) or dictionaries

    example:
        >>> # Only labels
        >>> lf.add_classification(labels=["EXPIRED", "COMPLETED"])
        >>> # Labels with vocabulary
        >>> lf.add_classification(labels=["eng"], vocabulary=["eng", "fra", "deu"])
        >>> # Labels, vocabulary, attributes and source
        >>> lf.add_classification(
                labels=[
                    {
                        "label": "SENSITIVE",
                        "score": 0.95,
                        "attributes": {
                            "page_number": 22,
                            "context": "This document contains sensitive information"
                        }
                    }
                ],
                vocabulary=["SENSITIVE", "INTERNAL", "PUBLIC"],
                source="MySensitiveDocumentScannerV1.0"
            )
    """
    from dorsal.file.helpers import build_classification_record

    record_data = build_classification_record(
        labels=labels,
        score_explanation=score_explanation,
        vocabulary=vocabulary,
        vocabulary_url=vocabulary_url,
    )

    return self._add_annotation(
        schema_id="open/classification",
        private=private,
        annotation_record=record_data,
        source_id=source,
        overwrite=overwrite,
        api_key=api_key,
        ignore_linter_errors=ignore_linter_errors,
        force=force,
    )

add_embedding

add_embedding(
    vector,
    *,
    model=None,
    target=None,
    source=None,
    private=True,
    overwrite=False,
    api_key=None,
    ignore_linter_errors=False,
    force=False
)

Adds an 'open/embedding' annotation to the file.

This helper provides a convenience wrapper for adding a simple embedding (feature vector) to the file record.

Parameters:

Name Type Description Default
vector list[float]

The embedding vector.

required
model str

Name of the algorithm or model that generated the embedding (e.g., 'CLIP', 'text-embedding-ada-002').

None
target str

Name of target feature/variable

None
source str

An optional string describing the source of the annotation (e.g., 'Local CLIP Model v1.2'). This will be passed to the 'detail' field.

None
private bool

Whether the annotation should be private.

True
overwrite bool

Whether to overwrite an existing annotation.

False
api_key str

API key for validation.

None

Returns:

Type Description
'LocalFile'

The LocalFile instance, for method chaining.

Raises:

Type Description
ValueError

If the file is missing a 'validation_hash'.

Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def add_embedding(
    self,
    vector: list[float],
    *,
    model: str | None = None,
    target: str | None = None,
    source: str | None = None,
    private: bool = True,
    overwrite: bool = False,
    api_key: str | None = None,
    ignore_linter_errors: bool = False,
    force: bool = False,
) -> "LocalFile":
    """
    Adds an 'open/embedding' annotation to the file.

    This helper provides a convenience wrapper for adding a simple
    embedding (feature vector) to the file record.

    Args:
        vector (list[float]): The embedding vector.
        model (str, optional): Name of the algorithm or model
            that generated the embedding (e.g., 'CLIP', 'text-embedding-ada-002').
        target (str, optional): Name of target feature/variable
        source (str, optional): An optional string describing the source
            of the annotation (e.g., 'Local CLIP Model v1.2').
            This will be passed to the 'detail' field.
        private (bool): Whether the annotation should be private.
        overwrite (bool): Whether to overwrite an existing annotation.
        api_key (str, optional): API key for validation.

    Returns:
        The LocalFile instance, for method chaining.

    Raises:
        ValueError: If the file is missing a 'validation_hash'.
    """
    from dorsal.file.helpers import build_embedding_record

    logger.debug(
        "Adding embedding (Model: %s, Dimensions: %d) to file '%s'.",
        model,
        len(vector),
        self._file_path,
    )

    record_data = build_embedding_record(vector=vector, model=model, target=target)

    return self._add_annotation(
        schema_id="open/embedding",
        private=private,
        annotation_record=record_data,
        source_id=source,
        overwrite=overwrite,
        api_key=api_key,
        ignore_linter_errors=ignore_linter_errors,
        force=force,
    )

add_label

add_label(value, auto_validate=False, api_key=None)

Adds a private 'label' tag to the local file model.

This method modifies self.model.tags locally. To synchronize these tags with DorsalHub, call push on the instance.

Note: This is strictly a private tag because "label" is not a whitelisted public tag namespace on DorsalHub.

Parameters:

Name Type Description Default
value str | bool | int | float | datetime

Value of the label (str, bool, datetime, int, or float).

required
api_key str | None

Optional API key to use for validation.

None
Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def add_label(
    self,
    value: str | bool | int | float | datetime.datetime,
    auto_validate: bool = False,
    api_key: str | None = None,
):
    """
    Adds a private 'label' tag to the local file model.

    This method modifies `self.model.tags` locally.
    To synchronize these tags with DorsalHub, call `push` on the instance.

    Note: This is strictly a private tag because "label" is not a whitelisted
    public tag namespace on DorsalHub.

    Args:
        value: Value of the label (str, bool, datetime, int, or float).
        api_key: Optional API key to use for validation.
    """
    return self._add_local_tag(
        name="label",
        value=value,
        private=True,
        auto_validate=auto_validate,
        api_key=api_key,
    )

add_llm_output

add_llm_output(
    model,
    response_data,
    *,
    prompt=None,
    language=None,
    score=None,
    score_explanation=None,
    generation_params=None,
    generation_metadata=None,
    source=None,
    private=True,
    overwrite=False,
    api_key=None,
    ignore_linter_errors=False,
    force=False
)

Adds an 'open/llm-output' annotation to the file.

This helper provides a convenience wrapper for storing the output of a Large Language Model (LLM) task related to this file.

Parameters:

Name Type Description Default
model str

The ID or name of the generative model used (e.g., 'gpt-4o').

required
response_data str | dict

The generative output from the model. Can be a string or a simple key-value dictionary.

required
prompt str

The text-based task or prompt provided to the model.

None
language str

The 3-letter ISO-639-3 language code of the response (e.g., 'eng').

None
score float

An optional confidence or evaluation score for the generated output, from -1 to 1.

None
generation_params dict

Optional parameters sent in the API request (e.g., {"temperature": 0.5, "max_tokens": 1000}).

None
generation_metadata dict

Optional metadata returned by the API response (e.g., {"usage": {...}, "finish_reason": "stop"}).

None
source str

An optional string describing the source of the annotation (e.g., 'OpenAI Summarizer v3'). This will be passed to the 'detail' field.

None
private bool

Whether the annotation should be private.

True
overwrite bool

Whether to overwrite an existing annotation.

False
api_key str

API key for validation.

None

Returns:

Type Description
'LocalFile'

The LocalFile instance, for method chaining.

Raises:

Type Description
ValueError

If the file is missing a 'validation_hash'.

Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def add_llm_output(
    self,
    model: str,
    response_data: str | dict[str, Any],
    *,
    prompt: str | None = None,
    language: str | None = None,
    score: float | None = None,
    score_explanation: str | None = None,
    generation_params: dict[str, Any] | None = None,
    generation_metadata: dict[str, Any] | None = None,
    source: str | None = None,
    private: bool = True,
    overwrite: bool = False,
    api_key: str | None = None,
    ignore_linter_errors: bool = False,
    force: bool = False,
) -> "LocalFile":
    """
    Adds an 'open/llm-output' annotation to the file.

    This helper provides a convenience wrapper for storing the output
    of a Large Language Model (LLM) task related to this file.

    Args:
        model (str): The ID or name of the generative model used
            (e.g., 'gpt-4o').
        response_data (str | dict): The generative output from the model.
            Can be a string or a simple key-value dictionary.
        prompt (str, optional): The text-based task or prompt
            provided to the model.
        language (str, optional): The 3-letter ISO-639-3 language
            code of the response (e.g., 'eng').
        score (float, optional): An optional confidence or evaluation score
            for the generated output, from -1 to 1.
        generation_params (dict, optional): Optional parameters sent in the
            API request (e.g., {"temperature": 0.5, "max_tokens": 1000}).
        generation_metadata (dict, optional): Optional metadata returned
            by the API response (e.g., {"usage": {...}, "finish_reason": "stop"}).
        source (str, optional): An optional string describing the source
            of the annotation (e.g., 'OpenAI Summarizer v3').
            This will be passed to the 'detail' field.
        private (bool): Whether the annotation should be private.
        overwrite (bool): Whether to overwrite an existing annotation.
        api_key (str, optional): API key for validation.

    Returns:
        The LocalFile instance, for method chaining.

    Raises:
        ValueError: If the file is missing a 'validation_hash'.
    """
    from dorsal.file.helpers import build_llm_output_record

    logger.debug(
        "Adding 'open/llm-output' (Model: %s) to file '%s'.",
        model,
        self._file_path,
    )

    record_data = build_llm_output_record(
        model=model,
        response_data=response_data,
        prompt=prompt,
        language=language,
        score=score,
        score_explanation=score_explanation,
        generation_params=generation_params,
        generation_metadata=generation_metadata,
    )

    return self._add_annotation(
        schema_id="open/llm-output",
        private=private,
        annotation_record=record_data,
        source_id=source,
        overwrite=overwrite,
        api_key=api_key,
        ignore_linter_errors=ignore_linter_errors,
        force=force,
    )

add_location

add_location(
    longitude,
    latitude,
    *,
    timestamp=None,
    camera_make=None,
    camera_model=None,
    bbox=None,
    source=None,
    private=True,
    overwrite=False,
    api_key=None,
    ignore_linter_errors=False,
    force=False
)

Adds an 'open/geolocation' annotation for a simple Point.

This helper provides a convenience wrapper for the common use case of tagging a file with a single GPS coordinate (longitude, latitude) and optional EXIF-like data.

It automatically builds the required GeoJSON Feature object.

Parameters:

Name Type Description Default
longitude float

The longitude coordinate (e.g., -0.5895).

required
latitude float

The latitude coordinate (e.g., 51.3814).

required
timestamp str

An ISO 8601 timestamp for when the geospatial data was captured (e.g., "2025-09-17T11:45:00Z").

None
camera_make str

The make of the camera or sensor.

None
camera_model str

The model of the camera or sensor.

None
source str

An optional string describing the source of the annotation (e.g., 'EXIF Data Parser'). This will be passed to the 'detail' field.

None
private bool

Whether the annotation should be private.

True
overwrite bool

Whether to overwrite an existing annotation.

False
api_key str

API key for validation.

None

Returns:

Type Description
'LocalFile'

The LocalFile instance, for method chaining.

Raises:

Type Description
ValueError

If the file is missing a 'validation_hash'.

Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def add_location(
    self,
    longitude: float,
    latitude: float,
    *,
    timestamp: str | None = None,
    camera_make: str | None = None,
    camera_model: str | None = None,
    bbox: list[float] | None = None,
    source: str | None = None,
    private: bool = True,
    overwrite: bool = False,
    api_key: str | None = None,
    ignore_linter_errors: bool = False,
    force: bool = False,
) -> "LocalFile":
    """
    Adds an 'open/geolocation' annotation for a simple Point.

    This helper provides a convenience wrapper for the common use case of
    tagging a file with a single GPS coordinate (longitude, latitude)
    and optional EXIF-like data.

    It automatically builds the required GeoJSON Feature object.

    Args:
        longitude (float): The longitude coordinate (e.g., -0.5895).
        latitude (float): The latitude coordinate (e.g., 51.3814).
        timestamp (str, optional): An ISO 8601 timestamp for when the
            geospatial data was captured (e.g., "2025-09-17T11:45:00Z").
        camera_make (str, optional): The make of the camera or sensor.
        camera_model (str, optional): The model of the camera or sensor.
        source (str, optional): An optional string describing the source
            of the annotation (e.g., 'EXIF Data Parser').
            This will be passed to the 'detail' field.
        private (bool): Whether the annotation should be private.
        overwrite (bool): Whether to overwrite an existing annotation.
        api_key (str, optional): API key for validation.

    Returns:
        The LocalFile instance, for method chaining.

    Raises:
        ValueError: If the file is missing a 'validation_hash'.
    """
    from dorsal.file.helpers import build_location_record

    logger.debug(
        "Adding 'open/geolocation' Point(%s, %s) to file '%s'.",
        longitude,
        latitude,
        self._file_path,
    )

    record_data = build_location_record(
        longitude=longitude,
        latitude=latitude,
        timestamp=timestamp,
        camera_make=camera_make,
        camera_model=camera_model,
        bbox=bbox,  # Passed through
    )

    return self._add_annotation(
        schema_id="open/geolocation",
        private=private,
        annotation_record=record_data,
        source_id=source,
        overwrite=overwrite,
        api_key=api_key,
        ignore_linter_errors=ignore_linter_errors,
        force=force,
    )

add_private_annotation

add_private_annotation(
    *,
    schema_id,
    annotation_record,
    validator=None,
    source=None,
    api_key=None,
    overwrite=False,
    ignore_linter_errors=False,
    force=False
)

Adds a private annotation to the local file model.

This is a wrapper for the _add_annotation method, pre-setting private=True.

The annotation is added locally and will be synchronized with DorsalHub upon calling push().

Parameters:

Name Type Description Default
schema_id str

The schema used for validation (e.g., 'open/generic').

required
annotation_record BaseModel | dict[str, Any]

The annotation data (a Pydantic model or dict).

required
validator BaseModel | None

An optional Pydantic model class or JsonSchemaValidator instance.

None
source str | None

An optional string describing the source of the manual data.

None
api_key str | None

An optional API key for fetching the schema.

None
overwrite bool

If True, overwrite an existing annotation for the same dataset.

False

Returns:

Type Description
'LocalFile'

The LocalFile instance, for method chaining.

Raises:

Type Description
ValueError

If the schema_id is invalid or validation fails.

FileAnnotatorError

If the annotation record cannot be processed.

Example
my_file = LocalFile("path/to/file.txt")
private_data = {"internal_id": 12345, "status": "pending_review"}
my_file.add_private_annotation(
    schema_id="dorsal/my-internal-schema",
    annotation_record=private_data
)
my_file.push(private=True)
Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def add_private_annotation(
    self,
    *,
    schema_id: str,
    annotation_record: BaseModel | dict[str, Any],
    validator: BaseModel | None = None,
    source: str | None = None,
    api_key: str | None = None,
    overwrite: bool = False,
    ignore_linter_errors: bool = False,
    force: bool = False,
) -> "LocalFile":
    """Adds a private annotation to the local file model.

    This is a wrapper for the `_add_annotation` method,
    pre-setting `private=True`.

    The annotation is added locally and will be synchronized with DorsalHub upon calling `push()`.

    Args:
        schema_id: The schema used for validation (e.g., 'open/generic').
        annotation_record: The annotation data (a Pydantic model or dict).
        validator: An optional Pydantic model class or `JsonSchemaValidator` instance.
        source: An optional string describing the source of the manual data.
        api_key: An optional API key for fetching the schema.
        overwrite: If True, overwrite an existing annotation for the same dataset.

    Returns:
        The LocalFile instance, for method chaining.

    Raises:
        ValueError: If the `schema_id` is invalid or validation fails.
        FileAnnotatorError: If the annotation record cannot be processed.

    Example:
        ```python
        my_file = LocalFile("path/to/file.txt")
        private_data = {"internal_id": 12345, "status": "pending_review"}
        my_file.add_private_annotation(
            schema_id="dorsal/my-internal-schema",
            annotation_record=private_data
        )
        my_file.push(private=True)
        ```
    """
    logger.debug(
        "Adding private annotation for schema '%s' to file '%s'.",
        schema_id,
        self._file_path,
    )
    return self._add_annotation(
        schema_id=schema_id,
        private=True,
        annotation_record=annotation_record,
        validator=validator,
        source_id=source,
        api_key=api_key,
        overwrite=overwrite,
        ignore_linter_errors=ignore_linter_errors,
        force=force,
    )

add_private_tag

add_private_tag(
    name, value, auto_validate=False, api_key=None
)

Adds a private file tag to the local file model.

This method modifies self.model.tags locally.

To synchronize these tags with DorsalHub, call push on the instance.

Parameters:

Name Type Description Default
name str

Name of the tag (typically 3-64 alphanumeric characters and underscores

required
value str | bool | int | float | datetime

Value of the tag (str, bool, datetime, int, or float).

required
api_key str | None

Optional API key to use for validation

None
Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def add_private_tag(
    self,
    name: str,
    value: str | bool | int | float | datetime.datetime,
    auto_validate: bool = False,
    api_key: str | None = None,
):
    """
    Adds a *private* file tag to the local file model.

    This method modifies `self.model.tags` locally.

    To synchronize these tags with DorsalHub, call `push` on the instance.

    Args:
        name: Name of the tag (typically 3-64 alphanumeric characters and
              underscores
        value: Value of the tag (str, bool, datetime, int, or float).
        api_key: Optional API key to use for validation
    """
    return self._add_local_tag(
        name=name,
        value=value,
        private=True,
        auto_validate=auto_validate,
        api_key=api_key,
    )

add_public_annotation

add_public_annotation(
    *,
    schema_id,
    annotation_record,
    validator=None,
    source=None,
    api_key=None,
    overwrite=False,
    ignore_linter_errors=False,
    force=False
)

Adds a public annotation to the local file model.

This is a wrapper for the _add_annotation method, pre-setting private=False.

The annotation is added locally and will be synchronized with DorsalHub upon calling push().

Parameters:

Name Type Description Default
schema_id str

The schema used for validation (e.g., 'open/generic').

required
annotation_record BaseModel | dict[str, Any]

The annotation data (a Pydantic model or dict).

required
validator BaseModel | None

An optional Pydantic model class or JsonSchemaValidator instance.

None
source str | None

An optional string describing the source of the manual data.

None
api_key str | None

An optional API key for fetching the schema.

None
overwrite bool

If True, overwrite an existing annotation for the same dataset.

False

Returns:

Type Description
'LocalFile'

The LocalFile instance, for method chaining.

Raises:

Type Description
ValueError

If the schema_id is invalid or validation fails.

FileAnnotatorError

If the annotation record cannot be processed.

Example
my_file = LocalFile("path/to/image.jpg")
public_data = {"label": "cat", "confidence": 0.98}
my_file.add_public_annotation(
    schema_id="open/classification",
    annotation_record=public_data
)
my_file.push(private=False)
Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def add_public_annotation(
    self,
    *,
    schema_id: str,
    annotation_record: BaseModel | dict[str, Any],
    validator: BaseModel | None = None,
    source: str | None = None,
    api_key: str | None = None,
    overwrite: bool = False,
    ignore_linter_errors: bool = False,
    force: bool = False,
) -> "LocalFile":
    """Adds a public annotation to the local file model.

    This is a wrapper for the `_add_annotation` method,
    pre-setting `private=False`.

    The annotation is added locally and will be synchronized with DorsalHub upon calling `push()`.

    Args:
        schema_id: The schema used for validation (e.g., 'open/generic').
        annotation_record: The annotation data (a Pydantic model or dict).
        validator: An optional Pydantic model class or `JsonSchemaValidator` instance.
        source: An optional string describing the source of the manual data.
        api_key: An optional API key for fetching the schema.
        overwrite: If True, overwrite an existing annotation for the same dataset.

    Returns:
        The LocalFile instance, for method chaining.

    Raises:
        ValueError: If the `schema_id` is invalid or validation fails.
        FileAnnotatorError: If the annotation record cannot be processed.

    Example:
        ```python
        my_file = LocalFile("path/to/image.jpg")
        public_data = {"label": "cat", "confidence": 0.98}
        my_file.add_public_annotation(
            schema_id="open/classification",
            annotation_record=public_data
        )
        my_file.push(private=False)
        ```
    """
    logger.debug(
        "Adding public annotation for schema '%s' to file '%s'.",
        schema_id,
        self._file_path,
    )
    return self._add_annotation(
        schema_id=schema_id,
        private=False,
        annotation_record=annotation_record,
        validator=validator,
        source_id=source,
        api_key=api_key,
        overwrite=overwrite,
        ignore_linter_errors=ignore_linter_errors,
        force=force,
    )

add_public_tag

add_public_tag(
    name, value, auto_validate=False, api_key=None
)

Adds a public file tag to the local file model.

This method modifies self.model.tags locally.

To synchronize these tags with DorsalHub, call push on the instance.

When auto_validate is True, validates the tag against the API

Parameters:

Name Type Description Default
name str

Name of the tag (typically 3-64 alphanumeric characters and underscores, subject to server-side validation if dorsal is online).

required
value str | bool | int | float | datetime

Value of the tag (str, bool, datetime, int, or float).

required
api_key str | None

Optional API key to use for validation

None
Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def add_public_tag(
    self,
    name: str,
    value: str | bool | int | float | datetime.datetime,
    auto_validate: bool = False,
    api_key: str | None = None,
):
    """
    Adds a *public* file tag to the local file model.

    This method modifies `self.model.tags` locally.

    To synchronize these tags with DorsalHub, call `push` on the instance.

    When `auto_validate` is True, validates the tag against the API

    Args:
        name: Name of the tag (typically 3-64 alphanumeric characters and
              underscores, subject to server-side validation if dorsal is online).
        value: Value of the tag (str, bool, datetime, int, or float).
        api_key: Optional API key to use for validation
    """
    return self._add_local_tag(
        name=name,
        value=value,
        private=False,
        auto_validate=auto_validate,
        api_key=api_key,
    )

add_regression

add_regression(
    value,
    *,
    target=None,
    unit=None,
    producer=None,
    score_explanation=None,
    statistic=None,
    quantile_level=None,
    interval_lower=None,
    interval_upper=None,
    score=None,
    timestamp=None,
    attributes=None,
    source=None,
    private=True,
    overwrite=False,
    api_key=None,
    ignore_linter_errors=False,
    force=False
)

Adds an 'open/regression' annotation for a single point estimate.

This helper creates a record containing a single data point.

Use for scalar predictions (e.g. a price).

For multi-point data (e.g. time-series, distributions), use add_private_annotation / add_public_annotation and construct the record manually with a list of points using dorsal.file.helpers.build_regression_point

Parameters:

Name Type Description Default
value float | None

The predicted or sampled value.

required
target str

The name of the variable being predicted (e.g., 'house_price', 'credit_score').

None
unit str

The unit of measurement (e.g., 'USD', 'kg').

None
statistic str

The statistical nature of this value (e.g., 'mean', 'median', 'max', 'quantile').

None
quantile_level float

If statistic='quantile', this defines the level (e.g., 0.95).

None
interval_lower float

The lower bound of the confidence interval.

None
interval_upper float

The upper bound of the confidence interval.

None
timestamp str | datetime

The specific time this prediction applies to.

None
source str

An optional string describing the source of the annotation (e.g., 'PricePredictor v1.0').

None
private bool

Whether the annotation should be private.

True
overwrite bool

Whether to overwrite an existing annotation.

False
api_key str

API key for validation.

None

Returns:

Type Description
'LocalFile'

The LocalFile instance, for method chaining.

Examples:

Simple Point Estimate:

>>> lf.add_regression(target="sentiment", value=0.85, statistic="mean")

Prediction with Confidence Interval:

>>> lf.add_regression(
...     target="temperature",
...     value=22.5,
...     unit="celsius",
...     interval_lower=21.0,
...     interval_upper=24.0
... )

Quantile Prediction:

>>> lf.add_regression(
...     target="latency",
...     value=150,
...     unit="ms",
...     statistic="quantile",
...     quantile_level=0.99
... )
Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def add_regression(
    self,
    value: float | None,
    *,
    target: str | None = None,
    unit: str | None = None,
    producer: str | None = None,
    score_explanation: str | None = None,
    statistic: str | None = None,
    quantile_level: float | None = None,
    interval_lower: float | None = None,
    interval_upper: float | None = None,
    score: float | None = None,  #
    timestamp: str | datetime.datetime | None = None,
    attributes: dict[str, Any] | None = None,
    source: str | None = None,
    private: bool = True,
    overwrite: bool = False,
    api_key: str | None = None,
    ignore_linter_errors: bool = False,
    force: bool = False,
) -> "LocalFile":
    """
    Adds an 'open/regression' annotation for a single point estimate.

    This helper creates a record containing a single data point.

    Use for scalar predictions (e.g. a price).

    For multi-point data (e.g. time-series, distributions), use `add_private_annotation` / `add_public_annotation` and
    construct the record manually with a list of points using `dorsal.file.helpers.build_regression_point`

    Args:
        value (float | None): The predicted or sampled value.
        target (str, optional): The name of the variable being predicted
            (e.g., 'house_price', 'credit_score').
        unit (str, optional): The unit of measurement (e.g., 'USD', 'kg').
        statistic (str, optional): The statistical nature of this value
            (e.g., 'mean', 'median', 'max', 'quantile').
        quantile_level (float, optional): If statistic='quantile', this defines
            the level (e.g., 0.95).
        interval_lower (float, optional): The lower bound of the confidence interval.
        interval_upper (float, optional): The upper bound of the confidence interval.
        timestamp (str | datetime, optional): The specific time this prediction applies to.
        source (str, optional): An optional string describing the source
            of the annotation (e.g., 'PricePredictor v1.0').
        private (bool): Whether the annotation should be private.
        overwrite (bool): Whether to overwrite an existing annotation.
        api_key (str, optional): API key for validation.

    Returns:
        The LocalFile instance, for method chaining.

    Examples:
        Simple Point Estimate:
        >>> lf.add_regression(target="sentiment", value=0.85, statistic="mean")

        Prediction with Confidence Interval:
        >>> lf.add_regression(
        ...     target="temperature",
        ...     value=22.5,
        ...     unit="celsius",
        ...     interval_lower=21.0,
        ...     interval_upper=24.0
        ... )

        Quantile Prediction:
        >>> lf.add_regression(
        ...     target="latency",
        ...     value=150,
        ...     unit="ms",
        ...     statistic="quantile",
        ...     quantile_level=0.99
        ... )
    """
    from dorsal.file.helpers import build_single_point_regression_record

    logger.debug(
        "Adding 'open/regression' (Target: %s, Value: %s) to file '%s'.",
        target,
        value,
        self._file_path,
    )

    record_data = build_single_point_regression_record(
        value=value,
        target=target,
        unit=unit,
        producer=producer,
        score_explanation=score_explanation,
        statistic=statistic,
        quantile_level=quantile_level,
        interval_lower=interval_lower,
        interval_upper=interval_upper,
        score=score,
        timestamp=timestamp,
        attributes=attributes,
    )

    return self._add_annotation(
        schema_id="open/regression",
        private=private,
        annotation_record=record_data,
        source_id=source,
        overwrite=overwrite,
        api_key=api_key,
        ignore_linter_errors=ignore_linter_errors,
        force=force,
    )

add_transcription

add_transcription(
    text,
    language,
    *,
    track_id=None,
    source=None,
    private=True,
    overwrite=False,
    api_key=None,
    ignore_linter_errors=False,
    force=False
)

Adds a simple 'open/audio-transcription' annotation to the file.

This helper provides a convenience wrapper for the common use case of storing the full, flat text transcription of an audio file.

NOTE: This helper populates the top-level 'text' field. It does NOT handle 'segments'. For timed transcriptions, build the dictionary and use the generic 'add_private_annotation' method.

Parameters:

Name Type Description Default
text str

The full, concatenated transcribed text.

required
language str

The 3-letter ISO-639-3 language code of the transcription (e.g., 'eng').

required
track_id str | int

Identifier for the specific audio track or channel in the source file.

None
source str

An optional string describing the source of the annotation (e.g., 'Whisper v3 (simple)'). This will be passed to the 'detail' field.

None
private bool

Whether the annotation should be private.

True
overwrite bool

Whether to overwrite an existing annotation.

False
api_key str

API key for validation.

None

Returns:

Type Description
'LocalFile'

The LocalFile instance, for method chaining.

Raises:

Type Description
ValueError

If the file is missing a 'validation_hash'.

Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def add_transcription(
    self,
    text: str,
    language: str,
    *,
    track_id: str | int | None = None,
    source: str | None = None,
    private: bool = True,
    overwrite: bool = False,
    api_key: str | None = None,
    ignore_linter_errors: bool = False,
    force: bool = False,
) -> "LocalFile":
    """
    Adds a simple 'open/audio-transcription' annotation to the file.

    This helper provides a convenience wrapper for the common use case of
    storing the *full, flat text* transcription of an audio file.

    NOTE: This helper populates the top-level 'text' field. It does
    NOT handle 'segments'. For timed transcriptions, build the
    dictionary and use the generic 'add_private_annotation' method.

    Args:
        text (str): The full, concatenated transcribed text.
        language (str): The 3-letter ISO-639-3 language
            code of the transcription (e.g., 'eng').
        track_id (str | int, optional): Identifier for the specific
            audio track or channel in the source file.
        source (str, optional): An optional string describing the source
            of the annotation (e.g., 'Whisper v3 (simple)').
            This will be passed to the 'detail' field.
        private (bool): Whether the annotation should be private.
        overwrite (bool): Whether to overwrite an existing annotation.
        api_key (str, optional): API key for validation.

    Returns:
        The LocalFile instance, for method chaining.

    Raises:
        ValueError: If the file is missing a 'validation_hash'.
    """
    from dorsal.file.helpers import build_transcription_record

    logger.debug(
        "Adding 'open/audio-transcription' (Language: %s, Length: %d) to file '%s'.",
        language,
        len(text),
        self._file_path,
    )

    record_data = build_transcription_record(
        language=language,
        text=text,
        track_id=track_id,
    )

    return self._add_annotation(
        schema_id="open/audio-transcription",
        private=private,
        annotation_record=record_data,
        source_id=source,
        overwrite=overwrite,
        api_key=api_key,
        ignore_linter_errors=ignore_linter_errors,
        force=force,
    )

from_json classmethod

from_json(path, check_file_exists=False)

Factory method: Instantiates a LocalFile from a JSON File Record.

Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
@classmethod
def from_json(cls, path: str | pathlib.Path, check_file_exists: bool = False) -> "LocalFile":
    """Factory method: Instantiates a LocalFile from a JSON File Record."""
    from dorsal.file.validators.file_record import FileRecordStrict

    input_path = pathlib.Path(path)
    if not input_path.exists():
        raise FileNotFoundError(f"JSON record not found: {input_path}")

    try:
        with open(input_path, "r", encoding="utf-8") as f:
            data = json.load(f)
    except json.JSONDecodeError as e:
        raise ValueError(f"Invalid JSON in {input_path}: {e}") from e

    local_attrs = data.get("local_attributes", {})
    original_file_path = local_attrs.get("file_path", str(input_path))

    if check_file_exists:
        target = pathlib.Path(original_file_path)
        if not target.exists():
            raise FileNotFoundError(
                f"Serialized record points to '{original_file_path}', which does not exist on this system."
            )

    try:
        record_model = FileRecordStrict.model_validate(data)
    except PydanticValidationError as e:
        raise ValueError(f"JSON data is not a valid FileRecordStrict: {e}") from e

    return cls(file_path=original_file_path, _file_record=record_model)

get_annotations

get_annotations(schema_id, source_id=None)

Retrieves a list of annotations from the local model by schema_id.

Parameters:

Name Type Description Default
schema_id str

The unique identifier of the dataset/schema.

required
source_id str | None

Optional. Filter custom annotations by their source ID.

None

Returns:

Type Description
Sequence[Annotation | AnnotationXL | PDFValidationModel | MediaInfoValidationModel | EbookValidationModel | OfficeDocumentValidationModel]

A list of annotation objects (Core models or generic Annotations).

Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def get_annotations(
    self, schema_id: str, source_id: str | None = None
) -> Sequence[
    Annotation
    | AnnotationXL
    | PDFValidationModel
    | MediaInfoValidationModel
    | EbookValidationModel
    | OfficeDocumentValidationModel
]:
    """
    Retrieves a list of annotations from the local model by schema_id.

    Args:
        schema_id: The unique identifier of the dataset/schema.
        source_id: Optional. Filter custom annotations by their source ID.

    Returns:
        A list of annotation objects (Core models or generic Annotations).
    """
    from dorsal.file.validators.file_record import AnnotationXL, AnnotationGroup, GenericFileAnnotation

    if schema_id == "file/pdf":
        return [self.pdf] if self.pdf else []
    elif schema_id == "file/mediainfo":
        return [self.mediainfo] if self.mediainfo else []
    elif schema_id == "file/ebook":
        return [self.ebook] if self.ebook else []
    elif schema_id == "file/office":
        return [self.office] if self.office else []

    value = getattr(self.model.annotations, schema_id, None)

    if value is None:
        return []

    raw_list = value if isinstance(value, list) else [value]
    processed_list = []

    for item in raw_list:
        if isinstance(item, AnnotationGroup):
            from dorsal.file.sharding import reassemble_record

            _, record_content = reassemble_record(item)
            head_chunk = item.annotations[0]
            try:
                reassembled_ann = AnnotationXL(
                    record=GenericFileAnnotation(**record_content),
                    private=head_chunk.private,
                    source=head_chunk.source,
                    schema_version=head_chunk.schema_version,
                    group=None,
                )
                processed_list.append(reassembled_ann)
            except Exception as err:
                raise RuntimeError(f"Failed to reassemble group for {schema_id}: {err}") from err
        else:
            processed_list.append(item)

    if source_id is not None:
        return [ann for ann in processed_list if ann.source.id == source_id]

    return processed_list

push

push(private=True, api_key=None)

Indexes file's metadata (annotations and tags) to DorsalHub.

If no record exists for this hash, a new record is created either privately or publicly.

When private is True, if you indexed any annotations or tags for this file before, they are overwritten. When private is False, the record is updated in an aggregate manner.

Note:

Parameters:

Name Type Description Default
private bool

If True, the file record will be created as private and will only be accessible to the authenticated user. Defaults to True.

True
api_key str

An API key to use for this specific request, overriding the client's default key. Defaults to None.

None

Returns:

Name Type Description
FileIndexResponse FileIndexResponse

A response object from the API detailing the result of the indexing operation.

Raises:

Type Description
DorsalClientError

If the push operation fails due to an API error, network issue, or authentication failure.

Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def push(self, private: bool = True, api_key: str | None = None) -> FileIndexResponse:
    """Indexes file's metadata (annotations and tags) to DorsalHub.

    If no record exists for this hash, a new record is created either privately or publicly.

    When `private` is `True`, if you indexed any annotations or tags for this file before, they are overwritten.
    When `private` is `False`, the record is updated in an aggregate manner.

    Note:

    Args:
        private (bool, optional): If True, the file record will be created
            as private and will only be accessible to the authenticated user.
            Defaults to True.
        api_key (str, optional): An API key to use for this specific request,
            overriding the client's default key. Defaults to None.

    Returns:
        FileIndexResponse: A response object from the API detailing the
            result of the indexing operation.

    Raises:
        DorsalClientError: If the push operation fails due to an API error,
            network issue, or authentication failure.
    """
    from dorsal.file.validators.file_record import FileRecordStrict

    if self.offline:
        raise DorsalError("Cannot push file record: LocalFile is in OFFLINE mode. ")

    if not isinstance(self.model, FileRecordStrict):
        logger.error("Cannot push LocalFile: internal model is not FileRecordStrict.")  # type: ignore[unreachable]
        raise DorsalClientError(
            message="Internal error: LocalFile model is not suitable for upload. Expected FileRecordStrict.",
        )

    if self._client is None:
        self._client = get_shared_dorsal_client(api_key=api_key)

    client = self._client

    logger.debug(
        "Pushing %s file record for local file '%s' (hash: %s) to DorsalHub.",
        "private" if private else "public",
        self._file_path,
        self.hash,
    )

    try:
        if private:
            response = client.index_private_file_records(file_records=[self.model], api_key=api_key)
        else:
            response = client.index_public_file_records(file_records=[self.model], api_key=api_key)

        logger.info(
            "Successfully pushed file record for '%s' to DorsalHub. Total: %s, Success: %s, Error: %s",
            self._file_path,
            response.total,
            response.success,
            response.error,
        )
        return response
    except DorsalClientError as err:
        logger.error(
            "Failed to push file record for '%s' to DorsalHub. Error: %s",
            self._file_path,
            err,
        )
        raise

remove_annotation

remove_annotation(schema_id, source_id=None)

Removes an annotation from the local file model. For custom schemas, providing source_id removes only that specific entry.

Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def remove_annotation(self, schema_id: str, source_id: str | None = None) -> "LocalFile":
    """
    Removes an annotation from the local file model.
    For custom schemas, providing `source_id` removes only that specific entry.
    """
    from dorsal.file.validators.file_record import CORE_MODEL_ANNOTATION_WRAPPERS

    annotation_id = schema_id
    is_core = schema_id in CORE_MODEL_ANNOTATION_WRAPPERS

    if is_core:
        annotation_id = schema_id.replace("/", "_").replace("-", "_")

    if source_id is None or is_core:
        if hasattr(self.model.annotations, annotation_id):
            try:
                delattr(self.model.annotations, annotation_id)
                if (
                    self.model.annotations.__pydantic_extra__
                    and annotation_id in self.model.annotations.__pydantic_extra__
                ):
                    del self.model.annotations.__pydantic_extra__[annotation_id]
                if annotation_id in self.model.annotations.model_fields_set:
                    self.model.annotations.model_fields_set.remove(annotation_id)

                logger.info("Removed all local annotations for '%s' (key: '%s').", schema_id, annotation_id)
                self._populate()
            except Exception as e:
                logger.warning("Failed to remove annotation '%s': %s", schema_id, e)
        return self

    current_val = getattr(self.model.annotations, annotation_id, None)

    if isinstance(current_val, list):
        original_len = len(current_val)

        new_list = [ann for ann in current_val if ann.source.id != source_id]

        if len(new_list) < original_len:
            if not new_list:
                return self.remove_annotation(schema_id)

            setattr(self.model.annotations, annotation_id, new_list)
            self._populate()
            logger.debug(
                "Removed %d annotation(s) with source_id '%s' from '%s'.",
                original_len - len(new_list),
                source_id,
                schema_id,
            )

    return self

save

save(path, indent=2, by_alias=True, exclude_none=True)

Exports the File Record to a JSON file on disk.

Parameters:

Name Type Description Default
path str | Path

The file path to write to.

required
indent int | None

JSON indentation level.

2
by_alias bool

Whether to use field aliases (required for correct schema loading).

True
exclude_none bool

Whether to exclude fields with None values.

True

Raises:

Type Description
IOError

If the file cannot be written.

Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def save(
    self,
    path: str | pathlib.Path,
    indent: int | None = 2,
    by_alias: bool = True,
    exclude_none: bool = True,
) -> None:
    """
    Exports the File Record to a JSON file on disk.

    Args:
        path: The file path to write to.
        indent: JSON indentation level.
        by_alias: Whether to use field aliases (required for correct schema loading).
        exclude_none: Whether to exclude fields with None values.

    Raises:
        IOError: If the file cannot be written.
    """
    output_path = pathlib.Path(path)

    try:
        output_path.parent.mkdir(parents=True, exist_ok=True)
    except OSError as e:
        logger.error(f"Failed to create directory for export: {e}")
        raise IOError(f"Could not create directory for '{output_path}'") from e

    json_content = self.to_json(indent=indent, by_alias=by_alias, exclude_none=exclude_none)

    try:
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(json_content)
        logger.debug(f"Successfully saved LocalFile state to {output_path}")
    except IOError as e:
        logger.error(f"Failed to write JSON to {output_path}: {e}")
        raise

to_dict

to_dict(
    by_alias=True,
    exclude_none=True,
    mode="python",
    exclude=None,
)

Overrides the parent method to include local file information.

Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def to_dict(
    self,
    by_alias=True,
    exclude_none=True,
    mode: Literal["python", "json"] = "python",
    exclude: dict | set | None = None,
) -> dict:
    """
    Overrides the parent method to include local file information.
    """
    base_dict = super().to_dict(
        by_alias=by_alias,
        exclude_none=exclude_none,
        mode=mode,
        exclude=exclude,
    )
    local_info = self._get_local_info_dict()
    base_dict["local_attributes"] = local_info
    return base_dict

to_json

to_json(
    indent=2, by_alias=True, exclude_none=True, exclude=None
)

Export the File Record to a JSON string.

Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def to_json(
    self,
    indent: int | None = 2,
    by_alias: bool = True,
    exclude_none: bool = True,
    exclude: dict | set | None = None,
) -> str:
    """Export the File Record to a JSON string."""
    output_dict = self.to_dict(
        by_alias=by_alias,
        exclude_none=exclude_none,
        mode="json",
        exclude=exclude,
    )

    return json.dumps(output_dict, indent=indent, default=str)

validate_tags

validate_tags(*, api_key=None)

Validates all tags against DorsalHub's API.

Parameters:

Name Type Description Default
api_key str | None

Optional API key.

None

Returns:

Type Description

The validation response object from the client.

Raises:

Type Description
DorsalError

If the instance is in offline mode.

InvalidTagError

If the tags are rejected by the API.

DorsalClientError

If the API call fails.

Source code in venv/lib/python3.13/site-packages/dorsal/file/dorsal_file.py
def validate_tags(self, *, api_key: str | None = None):
    """
    Validates all tags against DorsalHub's API.

    Args:
        api_key: Optional API key.

    Returns:
        The validation response object from the client.

    Raises:
        DorsalError: If the instance is in offline mode.
        InvalidTagError: If the tags are rejected by the API.
        DorsalClientError: If the API call fails.
    """
    if self.offline:
        raise DorsalError("Cannot validate tags: LocalFile is in OFFLINE mode.")

    if not self.tags:
        logger.debug("No tags to validate on file '%s'.", self._file_path)
        return None

    client = self._client or get_shared_dorsal_client(api_key=api_key)

    logger.debug(
        "Validating %d tags for file '%s' (hash: %s)",
        len(self.tags),
        self._file_path,
        self.hash,
    )

    validation_result = client.validate_tag(file_tags=self.tags, api_key=api_key)

    if not validation_result.valid:
        error_msg = validation_result.message or "Tag validation failed."
        logger.warning("Tag validation failed for file '%s': %s", self._file_path, error_msg)
        raise InvalidTagError(error_msg)

    logger.info(
        "Successfully validated %d tags for file '%s'.",
        len(self.tags),
        self._file_path,
    )
    return validation_result