Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/config.html
Original file line number Diff line number Diff line change
Expand Up @@ -988,6 +988,12 @@ <h2 id="llamastackconfiguration">LlamaStackConfiguration</h2>
<td>Path to configuration file used when Llama Stack is run in library
mode</td>
</tr>
<tr class="odd">
<td>timeout</td>
<td>integer</td>
<td>Timeout in seconds for requests to Llama Stack service. Default is
180 seconds (3 minutes) to accommodate long-running RAG queries.</td>
</tr>
</tbody>
</table>
<h2 id="modelcontextprotocolserver">ModelContextProtocolServer</h2>
Expand Down
220 changes: 219 additions & 1 deletion docs/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,40 @@
},
"components": {
"schemas": {
"A2AStateConfiguration": {
"additionalProperties": false,
"description": "A2A protocol persistent state configuration.\n\nConfigures how A2A task state and context-to-conversation mappings are\nstored. For multi-worker deployments, use SQLite or PostgreSQL to ensure\nstate is shared across all workers.\n\nIf no configuration is provided, in-memory storage is used (default).\nThis is suitable for single-worker deployments but state will be lost\non restarts and not shared across workers.\n\nAttributes:\n sqlite: SQLite database configuration for A2A state storage.\n postgres: PostgreSQL database configuration for A2A state storage.",
"properties": {
"sqlite": {
"anyOf": [
{
"$ref": "#/components/schemas/SQLiteDatabaseConfiguration"
},
{
"type": "null"
}
],
"default": null,
"description": "SQLite database configuration for A2A state storage.",
"title": "SQLite configuration"
},
"postgres": {
"anyOf": [
{
"$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration"
},
{
"type": "null"
}
],
"default": null,
"description": "PostgreSQL database configuration for A2A state storage.",
"title": "PostgreSQL configuration"
}
},
"title": "A2AStateConfiguration",
"type": "object"
},
"APIKeyTokenConfiguration": {
"additionalProperties": false,
"description": "API Key Token configuration.",
Expand Down Expand Up @@ -78,7 +112,11 @@
"get_config",
"info",
"model_override",
"rlsapi_v1_infer"
"rlsapi_v1_infer",
"a2a_agent_card",
"a2a_task_execution",
"a2a_message",
"a2a_jsonrpc"
],
"title": "Action",
"type": "string"
Expand All @@ -97,6 +135,12 @@
"title": "Skip Tls Verification",
"type": "boolean"
},
"skip_for_health_probes": {
"default": false,
"description": "Skip authorization for readiness and liveness probes",
"title": "Skip authorization for probes",
"type": "boolean"
},
"k8s_cluster_api": {
"type": "string",
"nullable": true,
Expand Down Expand Up @@ -162,6 +206,43 @@
"title": "AuthorizationConfiguration",
"type": "object"
},
"AzureEntraIdConfiguration": {
"additionalProperties": false,
"description": "Microsoft Entra ID authentication attributes for Azure.",
"properties": {
"tenant_id": {
"format": "password",
"title": "Tenant Id",
"type": "string",
"writeOnly": true
},
"client_id": {
"format": "password",
"title": "Client Id",
"type": "string",
"writeOnly": true
},
"client_secret": {
"format": "password",
"title": "Client Secret",
"type": "string",
"writeOnly": true
},
"scope": {
"default": "https://cognitiveservices.azure.com/.default",
"description": "Azure Cognitive Services scope for token requests. Override only if using a different Azure service.",
"title": "Token scope",
"type": "string"
}
},
"required": [
"tenant_id",
"client_id",
"client_secret"
],
"title": "AzureEntraIdConfiguration",
"type": "object"
},
"ByokRag": {
"additionalProperties": false,
"description": "BYOK (Bring Your Own Knowledge) RAG configuration.",
Expand Down Expand Up @@ -346,10 +427,45 @@
"title": "BYOK RAG configuration",
"type": "array"
},
"a2a_state": {
"$ref": "#/components/schemas/A2AStateConfiguration",
"description": "Configuration for A2A protocol persistent state storage.",
"title": "A2A state configuration"
},
"quota_handlers": {
"$ref": "#/components/schemas/QuotaHandlersConfiguration",
"description": "Quota handlers configuration",
"title": "Quota handlers"
},
"azure_entra_id": {
"anyOf": [
{
"$ref": "#/components/schemas/AzureEntraIdConfiguration"
},
{
"type": "null"
}
],
"default": null
},
"splunk": {
"anyOf": [
{
"$ref": "#/components/schemas/SplunkConfiguration"
},
{
"type": "null"
}
],
"default": null,
"description": "Splunk HEC configuration for sending telemetry events.",
"title": "Splunk configuration"
},
"deployment_environment": {
"default": "development",
"description": "Deployment environment name (e.g., 'development', 'staging', 'production'). Used in telemetry events.",
"title": "Deployment environment",
"type": "string"
}
},
"required": [
Expand Down Expand Up @@ -466,6 +582,18 @@
"default": null,
"title": "System Prompt"
},
"agent_card_path": {
"type": "string",
"nullable": true,
"default": null,
"title": "Agent Card Path"
},
"agent_card_config": {
"type": "object",
"nullable": true,
"default": null,
"title": "Agent Card Config"
},
"custom_profile": {
"anyOf": [
{
Expand Down Expand Up @@ -713,6 +841,21 @@
"description": "URL of the MCP server",
"title": "MCP server URL",
"type": "string"
},
"authorization_headers": {
"additionalProperties": {
"type": "string"
},
"description": "Headers to send to the MCP server. The map contains the header name and the path to a file containing the header value (secret). There are 2 special cases: 1. Usage of the kubernetes token in the header. To specify this use a string 'kubernetes' instead of the file path. 2. Usage of the client provided token in the header. To specify this use a string 'client' instead of the file path.",
"title": "Authorization headers",
"type": "object"
},
"timeout": {
"type": "integer",
"nullable": true,
"default": null,
"description": "Timeout in seconds for requests to the MCP server. If not specified, the default timeout from Llama Stack will be used. Note: This field is reserved for future use when Llama Stack adds timeout support.",
"title": "Request timeout"
}
},
"required": [
Expand Down Expand Up @@ -900,6 +1043,20 @@
"minimum": 0,
"title": "Period",
"type": "integer"
},
"database_reconnection_count": {
"default": 10,
"description": "Database reconnection count on startup. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.",
"minimum": 0,
"title": "Database reconnection count on startup",
"type": "integer"
},
"database_reconnection_delay": {
"default": 1,
"description": "Database reconnection delay specified in seconds. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.",
"minimum": 0,
"title": "Database reconnection delay",
"type": "integer"
}
},
"title": "QuotaSchedulerConfiguration",
Expand Down Expand Up @@ -953,6 +1110,13 @@
"title": "Port",
"type": "integer"
},
"base_url": {
"type": "string",
"nullable": true,
"default": null,
"description": "Externally reachable base URL for the service; needed for A2A support.",
"title": "Base URL"
},
"auth_enabled": {
"default": false,
"description": "Enables the authentication subsystem",
Expand Down Expand Up @@ -992,6 +1156,60 @@
"title": "ServiceConfiguration",
"type": "object"
},
"SplunkConfiguration": {
"additionalProperties": false,
"description": "Splunk HEC (HTTP Event Collector) configuration.\n\nSplunk HEC allows sending events directly to Splunk over HTTP/HTTPS.\nThis configuration is used to send telemetry events for inference\nrequests to the corporate Splunk deployment.\n\nUseful resources:\n\n - [Splunk HEC Docs](https://docs.splunk.com/Documentation/SplunkCloud)\n - [About HEC](https://docs.splunk.com/Documentation/Splunk/latest/Data)",
"properties": {
"enabled": {
"default": false,
"description": "Enable or disable Splunk HEC integration.",
"title": "Enabled",
"type": "boolean"
},
"url": {
"type": "string",
"nullable": true,
"default": null,
"description": "Splunk HEC endpoint URL.",
"title": "HEC URL"
},
"token_path": {
"type": "string",
"nullable": true,
"default": null,
"description": "Path to file containing the Splunk HEC authentication token.",
"title": "Token path"
},
"index": {
"type": "string",
"nullable": true,
"default": null,
"description": "Target Splunk index for events.",
"title": "Index"
},
"source": {
"default": "lightspeed-stack",
"description": "Event source identifier.",
"title": "Source",
"type": "string"
},
"timeout": {
"default": 5,
"description": "HTTP timeout in seconds for HEC requests.",
"minimum": 0,
"title": "Timeout",
"type": "integer"
},
"verify_ssl": {
"default": true,
"description": "Whether to verify SSL certificates for HEC endpoint.",
"title": "Verify SSL",
"type": "boolean"
}
},
"title": "SplunkConfiguration",
"type": "object"
},
"TLSConfiguration": {
"additionalProperties": false,
"description": "TLS configuration.\n\nTransport Layer Security (TLS) is a cryptographic protocol designed to\nprovide communications security over a computer network, such as the\nInternet. The protocol is widely used in applications such as email,\ninstant messaging, and voice over IP, but its use in securing HTTPS remains\nthe most publicly visible.\n\nUseful resources:\n\n - [FastAPI HTTPS Deployment](https://fastapi.tiangolo.com/deployment/https/)\n - [Transport Layer Security Overview](https://en.wikipedia.org/wiki/Transport_Layer_Security)\n - [What is TLS](https://www.ssltrust.eu/learning/ssl/transport-layer-security-tls)",
Expand Down
1 change: 1 addition & 0 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ Useful resources:
| api_key | string | API key to access Llama Stack service |
| use_as_library_client | boolean | When set to true Llama Stack will be used in library mode, not in server mode (default) |
| library_client_config_path | string | Path to configuration file used when Llama Stack is run in library mode |
| timeout | integer | Timeout in seconds for requests to Llama Stack service. Default is 180 seconds (3 minutes) to accommodate long-running RAG queries. |


## ModelContextProtocolServer
Expand Down
Loading
Loading