`open/llm-output`

Title: LLM Output

Use Case: A schema for storing a text or simple JSON output from an LLM, the parameters used to create it, and the resulting provenance metadata.

URL: https://dorsalhub.com/schemas/open/llm-output

This schema stores the output from a large language model, along with the data needed to reproduce and verify the generation.

Core Data: Requires model (e.g., 'gpt-4o') and the response_data itself (string or serialized JSON).
Evaluation: Supports top-level score (-1 to 1), score_explanation, and language fields.
Generation Parameters: Can store the generation_params used for the API request, such as system_prompt, temperature, top_p, and stop sequences.
Provenance: Can store the generation_metadata returned by the API, including response_id, token usage statistics, and custom attributes.

Example RecordJSON Schema

{
  "model": "gpt-4o",
  "prompt": "Extract the invoice number, total amount, and due date from this document.",
  "response_data": "{\"invoice_id\": \"INV-9528\", \"total_due\": 1450.75, \"due_date\": \"2025-10-31\"}",
  "language": "eng",
  "score": 0.95,
  "generation_params": {
    "system_prompt": "You are an expert financial document parser. You must only output a valid JSON object.",
    "temperature": 0.0,
    "max_tokens": 1000,
    "response_format": {
      "type": "json_object"
    }
  },
  "generation_metadata": {
    "response_id": "chatcmpl-9qA8ZypD4YcW1bF5c6e7g8H9iJkLmN",
    "created": "2025-09-17T11:45:00Z",
    "finish_reason": "stop",
    "system_fingerprint": "fp_a24b9d3f8e",
    "usage": {
      "prompt_tokens": 820,
      "completion_tokens": 45,
      "total_tokens": 865
    }
  }
}

{
    "x-license": {
        "id": "Apache-2.0",
        "notice": "Copyright 2025 Dorsal Hub LTD",
        "url": "https://github.com/dorsalhub/open-validation-schemas/blob/main/LICENSE"
    },
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "$id": "https://dorsalhub.com/schemas/open/llm-output",
    "title": "LLM Output",
    "version": "0.2.0",
    "description": "Store a single interaction with an LLM (a Prompt/Response pair). Captures the input prompt, the resulting output, and the configuration parameters used at that moment.",
    "type": "object",
    "properties": {
        "model": {
            "type": "string",
            "description": "The ID or name of the model used (e.g., 'gpt-4o', 'claude-3-sonnet').",
            "maxLength": 1024
        },
        "prompt": {
            "type": "string",
            "description": "The text input provided to the model. For contexts exceeding the character limit, this field should be truncated or a reference to the full storage location.",
            "maxLength": 262144
        },
        "response_data": {
            "description": "The output from the model. Non-plaintext output (e.g. JSON) should be serialized. For contexts exceeding the character limit, this field should be truncated or a reference to the full storage location.",
            "type": "string",
            "maxLength": 524288
        },
        "language": {
            "type": "string",
            "description": "The 3-letter ISO-639-3 language code of the response (e.g., 'eng', 'fra').",
            "pattern": "^[a-z]{3}$",
            "maxLength": 3,
            "dorsal_type": "dorsal/language"
        },
        "score": {
            "type": "number",
            "description": "A normalized metric evaluating the response (-1.0 to 1.0). Positive values indicate high quality, acceptance, or positive sentiment. Negative values indicate rejection, low quality, or negative sentiment.",
            "minimum": -1,
            "maximum": 1
        },
        "score_explanation": {
            "type": "string",
            "description": "Defines the meaning and range of the 'score' field (e.g. 'Reward Model Score' or 'Confidence').",
            "maxLength": 256
        },
        "generation_params": {
            "type": "object",
            "description": "Optional parameters sent in the API request to control the generation.",
            "properties": {
                "system_prompt": {
                    "type": "string",
                    "description": "An optional system-level instruction that defines the model's persona, style, or constraints.",
                    "maxLength": 4096
                },
                "temperature": {
                    "type": "number",
                    "description": "Controls randomness (e.g., 0.0 to 2.0).",
                    "minimum": 0,
                    "maximum": 2
                },
                "top_p": {
                    "type": "number",
                    "description": "Controls nucleus sampling (e.g., 0.0 to 1.0).",
                    "minimum": 0,
                    "maximum": 1
                },
                "max_tokens": {
                    "type": "integer",
                    "description": "The maximum number of tokens that were allowed to be generated.",
                    "minimum": 1
                },
                "seed": {
                    "type": "integer",
                    "description": "The seed used for sampling for reproducible outputs."
                },
                "stop": {
                    "type": [
                        "string",
                        "array"
                    ],
                    "description": "One or more sequences where the API will stop generating further tokens.",
                    "maxLength": 128,
                    "items": {
                        "type": "string",
                        "maxLength": 128
                    },
                    "maxItems": 16
                },
                "presence_penalty": {
                    "type": "number",
                    "description": "Penalty for new tokens based on whether they appear in the text so far (e.g., -2.0 to 2.0).",
                    "minimum": -2,
                    "maximum": 2
                },
                "frequency_penalty": {
                    "type": "number",
                    "description": "Penalty for new tokens based on their existing frequency in the text so far (e.g., -2.0 to 2.0).",
                    "minimum": -2,
                    "maximum": 2
                },
                "response_format": {
                    "type": "object",
                    "description": "Specifies the output format (e.g., 'text' or 'json_object').",
                    "properties": {
                        "type": {
                            "type": "string",
                            "description": "The type of output format.",
                            "enum": [
                                "text",
                                "json_object"
                            ]
                        }
                    },
                    "required": [
                        "type"
                    ],
                    "additionalProperties": false
                }
            },
            "additionalProperties": false
        },
        "generation_metadata": {
            "type": "object",
            "description": "Optional metadata returned by the API response, used for provenance and logging.",
            "properties": {
                "response_id": {
                    "type": "string",
                    "description": "The unique identifier for the generation response from the API provider.",
                    "maxLength": 128
                },
                "created": {
                    "type": "string",
                    "format": "date-time",
                    "maxLength": 128,
                    "description": "The ISO-8601 timestamp of when the response was created."
                },
                "finish_reason": {
                    "type": "string",
                    "description": "The reason the model stopped generating tokens (e.g., 'stop', 'length', 'tool_calls').",
                    "maxLength": 128
                },
                "system_fingerprint": {
                    "type": "string",
                    "description": "An identifier for the backend configuration that served the request, for reproducibility.",
                    "maxLength": 128
                },
                "usage": {
                    "type": "object",
                    "description": "Token usage statistics for the generation request.",
                    "properties": {
                        "prompt_tokens": {
                            "type": "integer",
                            "minimum": 0
                        },
                        "completion_tokens": {
                            "type": "integer",
                            "minimum": 0
                        },
                        "total_tokens": {
                            "type": "integer",
                            "minimum": 0
                        }
                    },
                    "required": [
                        "prompt_tokens",
                        "completion_tokens",
                        "total_tokens"
                    ],
                    "additionalProperties": false
                }
            },
            "additionalProperties": false
        },
        "attributes": {
            "type": "object",
            "description": "Arbitrary metadata relevant to this item.",
            "maxProperties": 16,
            "additionalProperties": {
                "anyOf": [
                    {
                        "type": "string",
                        "maxLength": 1024
                    },
                    {
                        "type": "number"
                    },
                    {
                        "type": "boolean"
                    },
                    {
                        "type": "null"
                    }
                ]
            }
        }
    },
    "required": [
        "model",
        "response_data"
    ],
    "additionalProperties": false
}