open/llm-output
Title: LLM Output
Use Case: A schema for storing a text or simple JSON output from an LLM, the parameters used to create it, and the resulting provenance metadata.
URL: https://dorsalhub.com/schemas/open/llm-output
This schema stores the output from a large language model, along with the data needed to reproduce and verify the generation.
- Core Data: Requires
model(e.g., 'gpt-4o') and theresponse_dataitself (string or serialized JSON). - Evaluation: Supports top-level
score(-1 to 1),score_explanation, andlanguagefields. - Generation Parameters: Can store the
generation_paramsused for the API request, such assystem_prompt,temperature,top_p, andstopsequences. - Provenance: Can store the
generation_metadatareturned by the API, includingresponse_id, tokenusagestatistics, and customattributes.
{
"model": "gpt-4o",
"prompt": "Extract the invoice number, total amount, and due date from this document.",
"response_data": "{\"invoice_id\": \"INV-9528\", \"total_due\": 1450.75, \"due_date\": \"2025-10-31\"}",
"language": "eng",
"score": 0.95,
"generation_params": {
"system_prompt": "You are an expert financial document parser. You must only output a valid JSON object.",
"temperature": 0.0,
"max_tokens": 1000,
"response_format": {
"type": "json_object"
}
},
"generation_metadata": {
"response_id": "chatcmpl-9qA8ZypD4YcW1bF5c6e7g8H9iJkLmN",
"created": "2025-09-17T11:45:00Z",
"finish_reason": "stop",
"system_fingerprint": "fp_a24b9d3f8e",
"usage": {
"prompt_tokens": 820,
"completion_tokens": 45,
"total_tokens": 865
}
}
}
{
"_license": {
"id": "Apache-2.0",
"notice": "Copyright 2025 Dorsal Hub LTD",
"url": "https://github.com/dorsalhub/open-validation-schemas/blob/main/LICENSE"
},
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://dorsalhub.com/schemas/open/llm-output",
"title": "LLM Output",
"version": "0.1.0",
"description": "Store a single interaction with an LLM (a Prompt/Response pair). Captures the input prompt, the resulting output, and the configuration parameters used at that moment.",
"type": "object",
"properties": {
"model": {
"type": "string",
"description": "The ID or name of the model used (e.g., 'gpt-4o', 'claude-3-sonnet').",
"maxLength": 1024
},
"prompt": {
"type": "string",
"description": "The text input provided to the model. For contexts exceeding the character limit, this field should be truncated or a reference to the full storage location.",
"maxLength": 262144
},
"response_data": {
"description": "The output from the model. Non-plaintext output (e.g. JSON) should be serialized. For contexts exceeding the character limit, this field should be truncated or a reference to the full storage location.",
"type": "string",
"maxLength": 524288
},
"language": {
"type": "string",
"description": "The 3-letter ISO-639-3 language code of the response (e.g., 'eng', 'fra').",
"pattern": "^[a-z]{3}$",
"maxLength": 3
},
"score": {
"type": "number",
"description": "A normalized metric evaluating the response (-1.0 to 1.0). Positive values indicate high quality, acceptance, or positive sentiment. Negative values indicate rejection, low quality, or negative sentiment.",
"minimum": -1,
"maximum": 1
},
"score_explanation": {
"type": "string",
"description": "Defines the meaning and range of the 'score' field (e.g. 'Reward Model Score' or 'Confidence').",
"maxLength": 256
},
"generation_params": {
"type": "object",
"description": "Optional parameters sent in the API request to control the generation.",
"properties": {
"system_prompt": {
"type": "string",
"description": "An optional system-level instruction that defines the model's persona, style, or constraints.",
"maxLength": 4096
},
"temperature": {
"type": "number",
"description": "Controls randomness (e.g., 0.0 to 2.0).",
"minimum": 0,
"maximum": 2
},
"top_p": {
"type": "number",
"description": "Controls nucleus sampling (e.g., 0.0 to 1.0).",
"minimum": 0,
"maximum": 1
},
"max_tokens": {
"type": "integer",
"description": "The maximum number of tokens that were allowed to be generated.",
"minimum": 1
},
"seed": {
"type": "integer",
"description": "The seed used for sampling for reproducible outputs."
},
"stop": {
"type": [
"string",
"array"
],
"description": "One or more sequences where the API will stop generating further tokens.",
"maxLength": 128,
"items": {
"type": "string",
"maxLength": 128
},
"maxItems": 16
},
"presence_penalty": {
"type": "number",
"description": "Penalty for new tokens based on whether they appear in the text so far (e.g., -2.0 to 2.0).",
"minimum": -2,
"maximum": 2
},
"frequency_penalty": {
"type": "number",
"description": "Penalty for new tokens based on their existing frequency in the text so far (e.g., -2.0 to 2.0).",
"minimum": -2,
"maximum": 2
},
"response_format": {
"type": "object",
"description": "Specifies the output format (e.g., 'text' or 'json_object').",
"properties": {
"type": {
"type": "string",
"description": "The type of output format.",
"enum": [
"text",
"json_object"
]
}
},
"required": [
"type"
],
"additionalProperties": false
}
},
"additionalProperties": false
},
"generation_metadata": {
"type": "object",
"description": "Optional metadata returned by the API response, used for provenance and logging.",
"properties": {
"response_id": {
"type": "string",
"description": "The unique identifier for the generation response from the API provider.",
"maxLength": 128
},
"created": {
"type": "string",
"format": "date-time",
"description": "The ISO-8601 timestamp of when the response was created."
},
"finish_reason": {
"type": "string",
"description": "The reason the model stopped generating tokens (e.g., 'stop', 'length', 'tool_calls').",
"maxLength": 128
},
"system_fingerprint": {
"type": "string",
"description": "An identifier for the backend configuration that served the request, for reproducibility.",
"maxLength": 128
},
"usage": {
"type": "object",
"description": "Token usage statistics for the generation request.",
"properties": {
"prompt_tokens": {
"type": "integer",
"minimum": 0
},
"completion_tokens": {
"type": "integer",
"minimum": 0
},
"total_tokens": {
"type": "integer",
"minimum": 0
}
},
"required": [
"prompt_tokens",
"completion_tokens",
"total_tokens"
],
"additionalProperties": false
}
},
"additionalProperties": false
},
"attributes": {
"type": "object",
"description": "Arbitrary metadata relevant to this item.",
"maxProperties": 16,
"additionalProperties": {
"anyOf": [
{
"type": "string",
"maxLength": 1024
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
}
}
},
"required": [
"model",
"response_data"
],
"additionalProperties": false
}