diff --git a/docs/byok_guide.md b/docs/byok_guide.md index ca9ab2ea3..29ac81151 100644 --- a/docs/byok_guide.md +++ b/docs/byok_guide.md @@ -167,29 +167,11 @@ apis: - tool_runtime - safety -models: - # Your LLM model - - model_id: your-llm-model - provider_id: openai # or your preferred provider - model_type: llm - provider_model_id: gpt-4o-mini - - # Embedding model for BYOK - - model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: /path/to/embedding_models/all-mpnet-base-v2 - providers: inference: - # Embedding model provider - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} - - # LLM provider (example: OpenAI) - provider_id: openai provider_type: remote::openai config: @@ -199,12 +181,13 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: .llama/distributions/ollama/agents_store.db - responses_store: - type: sqlite - db_path: .llama/distributions/ollama/responses_store.db + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default safety: - provider_id: llama-guard @@ -212,35 +195,54 @@ providers: config: excluded_categories: [] - # Vector database configuration vector_io: - provider_id: your-knowledge-base - provider_type: inline::faiss # or remote::pgvector + provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: /path/to/vector_db/faiss_store.db - namespace: null + persistence: + namespace: vector_io::faiss + backend: byok_backend # References storage.backends tool_runtime: - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} -# Enable RAG tools -tool_groups: -- provider_id: rag-runtime - toolgroup_id: builtin::rag - -# Vector database configuration -vector_dbs: -- embedding_dimension: 768 - embedding_model: sentence-transformers/all-mpnet-base-v2 - provider_id: your-knowledge-base - vector_db_id: your-index-id # ID used during index generation +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ~/.llama/storage/kv_store.db + sql_default: + type: sql_sqlite + db_path: ~/.llama/storage/sql_store.db + byok_backend: + type: kv_sqlite + db_path: /path/to/vector_db/faiss_store.db + +registered_resources: + models: + - model_id: your-llm-model + provider_id: openai + model_type: llm + provider_model_id: gpt-4o-mini + - model_id: sentence-transformers/all-mpnet-base-v2 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: /path/to/embedding_models/all-mpnet-base-v2 + metadata: + embedding_dimension: 768 + vector_stores: + - vector_store_id: your-index-id # ID used during index generation + provider_id: your-knowledge-base + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime ``` -**⚠️ Important**: The `vector_db_id` value must exactly match the ID you provided when creating the vector database using the rag-content tool. This identifier links your Llama Stack configuration to the specific vector database index you created. +**⚠️ Important**: The `vector_store_id` value must exactly match the ID you provided when creating the vector database using the rag-content tool. This identifier links your Llama Stack configuration to the specific vector database index you created. ### Step 5: Enable RAG Tools @@ -260,14 +262,20 @@ The configuration above automatically enables the RAG tools. The system will: - **Storage**: SQLite database file ```yaml -vector_io: -- provider_id: faiss-knowledge - provider_type: inline::faiss - config: - kvstore: - type: sqlite +providers: + vector_io: + - provider_id: faiss-knowledge + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: faiss_backend + +storage: + backends: + faiss_backend: + type: kv_sqlite db_path: /path/to/faiss_store.db - namespace: null ``` ### 2. pgvector (PostgreSQL) @@ -314,19 +322,6 @@ apis: - tool_runtime - safety -models: -- model_id: gpt-4o-mini - provider_id: openai - model_type: llm - provider_model_id: gpt-4o-mini - -- model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: /home/user/embedding_models/all-mpnet-base-v2 - providers: inference: - provider_id: sentence-transformers @@ -341,12 +336,13 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: .llama/distributions/ollama/agents_store.db - responses_store: - type: sqlite - db_path: .llama/distributions/ollama/responses_store.db + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default safety: - provider_id: llama-guard @@ -358,25 +354,47 @@ providers: - provider_id: company-docs provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: /home/user/vector_dbs/company_docs/faiss_store.db - namespace: null + persistence: + namespace: vector_io::faiss + backend: company_docs_backend tool_runtime: - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} -tool_groups: -- provider_id: rag-runtime - toolgroup_id: builtin::rag - -vector_dbs: -- embedding_dimension: 768 - embedding_model: sentence-transformers/all-mpnet-base-v2 - provider_id: company-docs - vector_db_id: company-knowledge-index +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ~/.llama/storage/kv_store.db + sql_default: + type: sql_sqlite + db_path: ~/.llama/storage/sql_store.db + company_docs_backend: + type: kv_sqlite + db_path: /home/user/vector_dbs/company_docs/faiss_store.db + +registered_resources: + models: + - model_id: gpt-4o-mini + provider_id: openai + model_type: llm + provider_model_id: gpt-4o-mini + - model_id: sentence-transformers/all-mpnet-base-v2 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: /home/user/embedding_models/all-mpnet-base-v2 + metadata: + embedding_dimension: 768 + vector_stores: + - vector_store_id: company-knowledge-index + provider_id: company-docs + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime ``` ### Example 2: vLLM + pgvector @@ -421,12 +439,13 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: .llama/distributions/ollama/agents_store.db - responses_store: - type: sqlite - db_path: .llama/distributions/ollama/responses_store.db + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default safety: - provider_id: llama-guard @@ -458,11 +477,11 @@ tool_groups: args: null mcp_endpoint: null -vector_dbs: +vector_stores: - embedding_dimension: 768 embedding_model: sentence-transformers/all-mpnet-base-v2 provider_id: enterprise-knowledge - vector_db_id: enterprise-docs + vector_store_id: enterprise-docs ``` --- diff --git a/docs/openapi.json b/docs/openapi.json index 693ff1b9e..8587e86ac 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -3664,7 +3664,7 @@ "rlsapi-v1" ], "summary": "Infer Endpoint", - "description": "Handle rlsapi v1 /infer requests for stateless inference.\n\nThis endpoint serves requests from the RHEL Lightspeed Command Line Assistant (CLA).\n\nAccepts a question with optional context (stdin, attachments, terminal output,\nsystem info) and returns an LLM-generated response.\n\nArgs:\n infer_request: The inference request containing question and context.\n auth: Authentication tuple from the configured auth provider.\n\nReturns:\n RlsapiV1InferResponse containing the generated response text and request ID.\n\nRaises:\n HTTPException: 503 if the LLM service is unavailable.", + "description": "Handle rlsapi v1 /infer requests for stateless inference.\n\nThis endpoint serves requests from the RHEL Lightspeed Command Line Assistant (CLA).\n\nAccepts a question with optional context (stdin, attachments, terminal output,\nsystem info) and returns an LLM-generated response.\n\nArgs:\n infer_request: The inference request containing question and context.\n request: The FastAPI request object for accessing headers and state.\n background_tasks: FastAPI background tasks for async Splunk event sending.\n auth: Authentication tuple from the configured auth provider.\n\nReturns:\n RlsapiV1InferResponse containing the generated response text and request ID.\n\nRaises:\n HTTPException: 503 if the LLM service is unavailable.", "operationId": "infer_endpoint_v1_infer_post", "requestBody": { "content": { @@ -4290,7 +4290,7 @@ ], "summary": "Handle A2A Jsonrpc", "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response", - "operationId": "handle_a2a_jsonrpc_a2a_get", + "operationId": "handle_a2a_jsonrpc_a2a_post", "responses": { "200": { "description": "Successful Response", @@ -4308,7 +4308,7 @@ ], "summary": "Handle A2A Jsonrpc", "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n request: FastAPI request object\n auth: Authentication tuple\n mcp_headers: MCP headers for context propagation\n\nReturns:\n JSON-RPC response or streaming response", - "operationId": "handle_a2a_jsonrpc_a2a_get", + "operationId": "handle_a2a_jsonrpc_a2a_post", "responses": { "200": { "description": "Successful Response", @@ -5339,7 +5339,7 @@ "type": "string", "minLength": 1, "title": "Vector DB ID", - "description": "Vector DB identification." + "description": "Vector database identification." }, "db_path": { "type": "string", diff --git a/docs/openapi.md b/docs/openapi.md index f576011e5..20f5fbc7a 100644 --- a/docs/openapi.md +++ b/docs/openapi.md @@ -3200,6 +3200,8 @@ system info) and returns an LLM-generated response. Args: infer_request: The inference request containing question and context. + request: The FastAPI request object for accessing headers and state. + background_tasks: FastAPI background tasks for async Splunk event sending. auth: Authentication tuple from the configured auth provider. Returns: @@ -4184,7 +4186,7 @@ BYOK (Bring Your Own Knowledge) RAG configuration. | rag_type | string | Type of RAG database. | | embedding_model | string | Embedding model identification | | embedding_dimension | integer | Dimensionality of embedding vectors. | -| vector_db_id | string | Vector DB identification. | +| vector_db_id | string | Vector database identification. | | db_path | string | Path to RAG database. | diff --git a/docs/rag_guide.md b/docs/rag_guide.md index f3b30a023..f84f3ca27 100644 --- a/docs/rag_guide.md +++ b/docs/rag_guide.md @@ -65,14 +65,6 @@ Update the `run.yaml` file used by Llama Stack to point to: ### FAISS example ```yaml -models: -- model_id: # e.g. sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: # e.g. 768 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: # e.g. /home/USER/embedding_model - providers: inference: - provider_id: sentence-transformers @@ -80,28 +72,42 @@ providers: config: {} # FAISS vector store - vector_io: + vector_io: - provider_id: custom-index provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: # e.g. /home/USER/vector_db/faiss_store.db - namespace: null - -vector_dbs: -- embedding_dimension: # e.g. 768 - embedding_model: # e.g. sentence-transformers/all-mpnet-base-v2 - provider_id: custom-index - vector_db_id: + persistence: + namespace: vector_io::faiss + backend: rag_backend # References storage.backends.rag_backend + +storage: + backends: + rag_backend: + type: kv_sqlite + db_path: # e.g. /home/USER/vector_db/faiss_store.db + +registered_resources: + models: + - model_id: # e.g. sentence-transformers/all-mpnet-base-v2 + metadata: + embedding_dimension: # e.g. 768 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: # e.g. /home/USER/embedding_model + + vector_stores: + - embedding_dimension: # e.g. 768 + embedding_model: # e.g. sentence-transformers/all-mpnet-base-v2 + provider_id: custom-index + vector_store_id: ``` Where: - `provider_model_id` is the path to the folder of the embedding model (or alternatively, the supported embedding model to download) - `db_path` is the path to the vector index (.db file in this case) -- `vector_db_id` is the index ID used to generate the db +- `vector_store_id` is the index ID used to generate the db -See the full working [config example](examples/openai-faiss-run.yaml) for more details. +See the full working [config example](examples/run.yaml) for more details. ### pgvector example @@ -121,7 +127,7 @@ Each pgvector-backed table follows this schema: - `embedding` (`vector(n)`): the embedding vector, where `n` is the embedding dimension and will match the model's output size (e.g. 768 for `all-mpnet-base-v2`) > [!NOTE] -> The `vector_db_id` (e.g. `rhdocs`) is used to point to the table named `vector_store_rhdocs` in the specified database, which stores the vector embeddings. +> The `vector_store_id` (e.g. `rhdocs`) is used to point to the table named `vector_store_rhdocs` in the specified database, which stores the vector embeddings. ```yaml @@ -141,14 +147,14 @@ providers: type: sqlite db_path: .llama/distributions/pgvector/pgvector_registry.db -vector_dbs: +vector_stores: - embedding_dimension: 768 embedding_model: sentence-transformers/all-mpnet-base-v2 provider_id: pgvector-example # A unique ID that becomes the PostgreSQL table name, prefixed with 'vector_store_'. # e.g., 'rhdocs' will create the table 'vector_store_rhdocs'. # If the table was already created, this value must match the ID used at creation. - vector_db_id: rhdocs + vector_store_id: rhdocs ``` See the full working [config example](examples/openai-pgvector-run.yaml) for more details. @@ -282,26 +288,12 @@ apis: - tool_runtime - safety -models: -- model_id: gpt-test - provider_id: openai # This ID is a reference to 'providers.inference' - model_type: llm - provider_model_id: gpt-4o-mini - -- model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - model_type: embedding - provider_id: sentence-transformers # This ID is a reference to 'providers.inference' - provider_model_id: /home/USER/lightspeed-stack/embedding_models/all-mpnet-base-v2 - providers: inference: - - provider_id: sentence-transformers + - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} - - - provider_id: openai + - provider_id: openai provider_type: remote::openai config: api_key: ${env.OPENAI_API_KEY} @@ -310,12 +302,13 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - db_path: .llama/distributions/ollama/agents_store.db - responses_store: - type: sqlite - db_path: .llama/distributions/ollama/responses_store.db + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default safety: - provider_id: llama-guard @@ -324,31 +317,50 @@ providers: excluded_categories: [] vector_io: - - provider_id: ocp-docs + - provider_id: ocp-docs provider_type: inline::faiss config: - kvstore: - type: sqlite - db_path: /home/USER/lightspeed-stack/vector_dbs/ocp_docs/faiss_store.db - namespace: null + persistence: + namespace: vector_io::faiss + backend: ocp_docs_backend # References storage.backends tool_runtime: - - provider_id: rag-runtime + - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} -# Enable the RAG tool -tool_groups: -- provider_id: rag-runtime - toolgroup_id: builtin::rag - args: null - mcp_endpoint: null - -vector_dbs: -- embedding_dimension: 768 - embedding_model: sentence-transformers/all-mpnet-base-v2 - provider_id: ocp-docs # This ID is a reference to 'providers.vector_io' - vector_db_id: openshift-index # This ID was defined during index generation +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ~/.llama/storage/kv_store.db + sql_default: + type: sql_sqlite + db_path: ~/.llama/storage/sql_store.db + ocp_docs_backend: + type: kv_sqlite + db_path: /home/USER/lightspeed-stack/vector_dbs/ocp_docs/faiss_store.db + +registered_resources: + models: + - model_id: gpt-test + provider_id: openai + model_type: llm + provider_model_id: gpt-4o-mini + - model_id: sentence-transformers/all-mpnet-base-v2 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: /home/USER/lightspeed-stack/embedding_models/all-mpnet-base-v2 + metadata: + embedding_dimension: 768 + vector_stores: + - vector_store_id: openshift-index # This ID was defined during index generation + provider_id: ocp-docs # References providers.vector_io + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime ``` --- diff --git a/examples/run.yaml b/examples/run.yaml index ea560fcdb..14d8ede43 100644 --- a/examples/run.yaml +++ b/examples/run.yaml @@ -20,18 +20,9 @@ apis: - vector_io benchmarks: [] -conversations_store: - db_path: ~/.llama/storage/conversations.db - type: sqlite datasets: [] image_name: starter # external_providers_dir: /opt/app-root/src/.llama/providers.d -inference_store: - db_path: ~/.llama/storage/inference-store.db - type: sqlite -metadata_store: - db_path: ~/.llama/storage/registry.db - type: sqlite providers: inference: @@ -55,7 +46,8 @@ providers: - config: excluded_categories: [] provider_id: llama-guard - provider_type: inline::llama-guard scoring: + provider_type: inline::llama-guard + scoring: - provider_id: basic provider_type: inline::basic config: {} @@ -155,6 +147,11 @@ registered_resources: provider_model_id: sentence-transformers/all-mpnet-base-v2 metadata: embedding_dimension: 768 + vector_stores: + - embedding_dimension: 768 + embedding_model: sentence-transformers/nomic-ai/nomic-embed-text-v1.5 + provider_id: faiss + vector_store_id: vs_503a2261-c256-45ff-90aa-580a80de64b8 shields: - shield_id: llama-guard provider_id: llama-guard diff --git a/run.yaml b/run.yaml index f85e628a0..3680f2b32 100644 --- a/run.yaml +++ b/run.yaml @@ -13,18 +13,9 @@ apis: - vector_io benchmarks: [] -conversations_store: - db_path: ~/.llama/storage/conversations.db - type: sqlite datasets: [] image_name: starter # external_providers_dir: /opt/app-root/src/.llama/providers.d -inference_store: - db_path: ~/.llama/storage/inference-store.db - type: sqlite -metadata_store: - db_path: ~/.llama/storage/registry.db - type: sqlite providers: inference: @@ -141,7 +132,7 @@ registered_resources: - shield_id: llama-guard provider_id: llama-guard provider_shield_id: openai/gpt-4o-mini - vector_dbs: [] + vector_stores: [] datasets: [] scoring_fns: [] benchmarks: [] diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py index ce0c87bed..eddf30f86 100644 --- a/src/app/endpoints/query.py +++ b/src/app/endpoints/query.py @@ -1,8 +1,6 @@ """Handler for REST API call to provide answer to query.""" -import ast import logging -import re from datetime import UTC, datetime from typing import Annotated, Any, Optional @@ -14,7 +12,6 @@ RateLimitError, # type: ignore ) from llama_stack_client.types.model_list_response import ModelListResponse -from llama_stack_client.types.shared.interleaved_content_item import TextContentItem from sqlalchemy.exc import SQLAlchemyError import constants @@ -36,7 +33,6 @@ PromptTooLongResponse, QueryResponse, QuotaExceededResponse, - ReferencedDocument, ServiceUnavailableResponse, UnauthorizedResponse, UnprocessableEntityResponse, @@ -553,39 +549,6 @@ def is_input_shield(shield: Shield) -> bool: return _is_inout_shield(shield) or not is_output_shield(shield) -def parse_metadata_from_text_item( - text_item: TextContentItem, -) -> Optional[ReferencedDocument]: - """ - Parse a single TextContentItem to extract referenced documents. - - Args: - text_item (TextContentItem): The TextContentItem containing metadata. - - Returns: - ReferencedDocument: A ReferencedDocument object containing 'doc_url' and 'doc_title' - representing the referenced documents found in the metadata. - """ - docs: list[ReferencedDocument] = [] - if not isinstance(text_item, TextContentItem): - return docs - - metadata_blocks = re.findall( - r"Metadata:\s*({.*?})(?:\n|$)", text_item.text, re.DOTALL - ) - for block in metadata_blocks: - try: - data = ast.literal_eval(block) - url = data.get("docs_url") - title = data.get("title") - if url and title: - return ReferencedDocument(doc_url=url, doc_title=title) - logger.debug("Invalid metadata block (missing url or title): %s", block) - except (ValueError, SyntaxError) as e: - logger.debug("Failed to parse metadata block: %s | Error: %s", block, e) - return None - - def validate_attachments_metadata(attachments: list[Attachment]) -> None: """Validate the attachments metadata provided in the request. diff --git a/src/app/endpoints/query_v2.py b/src/app/endpoints/query_v2.py index 64f4c5341..ecc39b071 100644 --- a/src/app/endpoints/query_v2.py +++ b/src/app/endpoints/query_v2.py @@ -560,74 +560,28 @@ def parse_referenced_documents_from_responses_api( for result in results: # Handle both object and dict access if isinstance(result, dict): - filename = result.get("filename") attributes = result.get("attributes", {}) else: - filename = getattr(result, "filename", None) attributes = getattr(result, "attributes", {}) # Try to get URL from attributes # Look for common URL fields in attributes doc_url = ( - attributes.get("link") + attributes.get("doc_url") + or attributes.get("docs_url") or attributes.get("url") - or attributes.get("doc_url") + or attributes.get("link") ) + doc_title = attributes.get("title") - # If we have at least a filename or url - if filename or doc_url: + if doc_title or doc_url: # Treat empty string as None for URL to satisfy Optional[AnyUrl] final_url = doc_url if doc_url else None - if (final_url, filename) not in seen_docs: + if (final_url, doc_title) not in seen_docs: documents.append( - ReferencedDocument(doc_url=final_url, doc_title=filename) + ReferencedDocument(doc_url=final_url, doc_title=doc_title) ) - seen_docs.add((final_url, filename)) - - # 2. Parse from message content annotations - elif item_type == "message": - content = getattr(output_item, "content", None) - if isinstance(content, list): - for part in content: - # Skip if part is a string or doesn't have annotations - if isinstance(part, str): - continue - - annotations = getattr(part, "annotations", []) or [] - for annotation in annotations: - # Handle both object and dict access for annotations - if isinstance(annotation, dict): - anno_type = annotation.get("type") - anno_url = annotation.get("url") - anno_title = annotation.get("title") or annotation.get( - "filename" - ) - else: - anno_type = getattr(annotation, "type", None) - anno_url = getattr(annotation, "url", None) - anno_title = getattr(annotation, "title", None) or getattr( - annotation, "filename", None - ) - - if anno_type == "url_citation": - # Treat empty string as None - final_url = anno_url if anno_url else None - if (final_url, anno_title) not in seen_docs: - documents.append( - ReferencedDocument( - doc_url=final_url, doc_title=anno_title - ) - ) - seen_docs.add((final_url, anno_title)) - - elif anno_type == "file_citation": - if (None, anno_title) not in seen_docs: - documents.append( - ReferencedDocument( - doc_url=None, doc_title=anno_title - ) - ) - seen_docs.add((None, anno_title)) + seen_docs.add((final_url, doc_title)) return documents diff --git a/src/llama_stack_configuration.py b/src/llama_stack_configuration.py index 0e45e0614..cf9c9269b 100644 --- a/src/llama_stack_configuration.py +++ b/src/llama_stack_configuration.py @@ -11,10 +11,9 @@ from pathlib import Path from typing import Any +import yaml from azure.core.exceptions import ClientAuthenticationError from azure.identity import ClientSecretCredential, CredentialUnavailableError - -import yaml from llama_stack.core.stack import replace_env_vars logger = logging.getLogger(__name__) @@ -115,51 +114,145 @@ def setup_azure_entra_id_token( # ============================================================================= -def construct_vector_dbs_section( +def construct_storage_backends_section( + ls_config: dict[str, Any], byok_rag: list[dict[str, Any]] +) -> dict[str, Any]: + """Construct storage.backends section in Llama Stack configuration file. + + Builds the storage.backends section for a Llama Stack configuration by + preserving existing backends and adding new ones for each BYOK RAG. + + Parameters: + ls_config (dict[str, Any]): Existing Llama Stack configuration mapping. + byok_rag (list[dict[str, Any]]): List of BYOK RAG definitions. + + Returns: + dict[str, Any]: The storage.backends dict with new backends added. + """ + output: dict[str, Any] = {} + + # preserve existing backends + if "storage" in ls_config and "backends" in ls_config["storage"]: + output = ls_config["storage"]["backends"].copy() + + # add new backends for each BYOK RAG + for brag in byok_rag: + vector_db_id = brag.get("vector_db_id", "") + backend_name = f"byok_{vector_db_id}_storage" + output[backend_name] = { + "type": "kv_sqlite", + "db_path": brag.get("db_path", f".llama/{vector_db_id}.db"), + } + logger.info( + "Added %s backends into storage.backends section, total backends %s", + len(byok_rag), + len(output), + ) + return output + + +def construct_vector_stores_section( ls_config: dict[str, Any], byok_rag: list[dict[str, Any]] ) -> list[dict[str, Any]]: - """Construct vector_dbs section in Llama Stack configuration file. + """Construct registered_resources.vector_stores section in Llama Stack config. - Builds the vector_dbs section for a Llama Stack configuration. + Builds the vector_stores section for a Llama Stack configuration. Parameters: ls_config (dict[str, Any]): Existing Llama Stack configuration mapping - used as the base; existing `vector_dbs` entries are preserved if - present. + used as the base; existing `registered_resources.vector_stores` entries + are preserved if present. byok_rag (list[dict[str, Any]]): List of BYOK RAG definitions to be added to - the `vector_dbs` section. + the `vector_stores` section. Returns: - list[dict[str, Any]]: The `vector_dbs` list where each entry is a mapping with keys: - - `vector_db_id`: identifier of the vector database + list[dict[str, Any]]: The `vector_stores` list where each entry is a mapping with keys: + - `vector_store_id`: identifier of the vector store (for Llama Stack config) - `provider_id`: provider identifier prefixed with `"byok_"` - `embedding_model`: name of the embedding model - `embedding_dimension`: embedding vector dimensionality """ output = [] - # fill-in existing vector_dbs entries - if "vector_dbs" in ls_config: - output = ls_config["vector_dbs"] + # fill-in existing vector_stores entries from registered_resources + if "registered_resources" in ls_config: + if "vector_stores" in ls_config["registered_resources"]: + output = ls_config["registered_resources"]["vector_stores"].copy() - # append new vector_dbs entries + # append new vector_stores entries for brag in byok_rag: + vector_db_id = brag.get("vector_db_id", "") output.append( { - "vector_db_id": brag.get("vector_db_id", ""), - "provider_id": "byok_" + brag.get("vector_db_id", ""), + "vector_store_id": vector_db_id, + "provider_id": f"byok_{vector_db_id}", "embedding_model": brag.get("embedding_model", ""), "embedding_dimension": brag.get("embedding_dimension"), } ) logger.info( - "Added %s items into vector_dbs section, total items %s", + "Added %s items into registered_resources.vector_stores, total items %s", len(byok_rag), len(output), ) return output +def construct_models_section( + ls_config: dict[str, Any], byok_rag: list[dict[str, Any]] +) -> list[dict[str, Any]]: + """Construct registered_resources.models section with embedding models. + + Adds embedding model entries for each BYOK RAG configuration. + + Parameters: + ls_config (dict[str, Any]): Existing Llama Stack configuration mapping. + byok_rag (list[dict[str, Any]]): List of BYOK RAG definitions. + + Returns: + list[dict[str, Any]]: The models list with embedding models added. + """ + output: list[dict[str, Any]] = [] + + # preserve existing models + if "registered_resources" in ls_config: + if "models" in ls_config["registered_resources"]: + output = ls_config["registered_resources"]["models"].copy() + + # add embedding models for each BYOK RAG + for brag in byok_rag: + embedding_model = brag.get("embedding_model", "") + vector_db_id = brag.get("vector_db_id", "") + embedding_dimension = brag.get("embedding_dimension") + + # Strip sentence-transformers/ prefix if present + provider_model_id = embedding_model + if provider_model_id.startswith("sentence-transformers/"): + provider_model_id = provider_model_id[len("sentence-transformers/") :] + + # Skip if embedding model already registered + existing_model_ids = [m.get("provider_model_id") for m in output] + if provider_model_id in existing_model_ids: + continue + + output.append( + { + "model_id": f"byok_{vector_db_id}_embedding", + "model_type": "embedding", + "provider_id": "sentence-transformers", + "provider_model_id": provider_model_id, + "metadata": { + "embedding_dimension": embedding_dimension, + }, + } + ) + logger.info( + "Added embedding models into registered_resources.models, total models %s", + len(output), + ) + return output + + def construct_vector_io_providers_section( ls_config: dict[str, Any], byok_rag: list[dict[str, Any]] ) -> list[dict[str, Any]]: @@ -180,27 +273,28 @@ def construct_vector_io_providers_section( list[dict[str, Any]]: The resulting providers/vector_io list containing the original entries (if any) plus one entry per item in `byok_rag`. Each appended entry has `provider_id` set to "byok_", - `provider_type` set from the RAG item, and a `config` with a `kvstore` - pointing to ".llama/.db", `namespace` as None, and `type` - "sqlite". + `provider_type` set from the RAG item, and a `config` with `persistence` + referencing the corresponding backend. """ output = [] # fill-in existing vector_io entries if "providers" in ls_config and "vector_io" in ls_config["providers"]: - output = ls_config["providers"]["vector_io"] + output = ls_config["providers"]["vector_io"].copy() # append new vector_io entries for brag in byok_rag: + vector_db_id = brag.get("vector_db_id", "") + backend_name = f"byok_{vector_db_id}_storage" + provider_id = f"byok_{vector_db_id}" output.append( { - "provider_id": "byok_" + brag.get("vector_db_id", ""), + "provider_id": provider_id, "provider_type": brag.get("rag_type", "inline::faiss"), "config": { - "kvstore": { - "db_path": ".llama/" + brag.get("vector_db_id", "") + ".db", - "namespace": None, - "type": "sqlite", + "persistence": { + "namespace": "vector_io::faiss", + "backend": backend_name, } }, } @@ -225,14 +319,33 @@ def enrich_byok_rag(ls_config: dict[str, Any], byok_rag: list[dict[str, Any]]) - return logger.info("Enriching Llama Stack config with BYOK RAG") - ls_config["vector_dbs"] = construct_vector_dbs_section(ls_config, byok_rag) + # Add storage backends + if "storage" not in ls_config: + ls_config["storage"] = {} + ls_config["storage"]["backends"] = construct_storage_backends_section( + ls_config, byok_rag + ) + + # Add vector_io providers if "providers" not in ls_config: ls_config["providers"] = {} ls_config["providers"]["vector_io"] = construct_vector_io_providers_section( ls_config, byok_rag ) + # Add registered vector stores + if "registered_resources" not in ls_config: + ls_config["registered_resources"] = {} + ls_config["registered_resources"]["vector_stores"] = ( + construct_vector_stores_section(ls_config, byok_rag) + ) + + # Add embedding models + ls_config["registered_resources"]["models"] = construct_models_section( + ls_config, byok_rag + ) + # ============================================================================= # Main Generation Function (service/container mode only) diff --git a/src/models/config.py b/src/models/config.py index 771ed4fdf..a537d20f5 100644 --- a/src/models/config.py +++ b/src/models/config.py @@ -1484,7 +1484,7 @@ class ByokRag(ConfigurationBase): ..., min_length=1, title="Vector DB ID", - description="Vector DB identification.", + description="Vector database identification.", ) db_path: FilePath = Field( diff --git a/src/models/responses.py b/src/models/responses.py index 9749f95f9..214bb47dc 100644 --- a/src/models/responses.py +++ b/src/models/responses.py @@ -8,8 +8,8 @@ from pydantic import AnyUrl, BaseModel, Field from pydantic_core import SchemaError -from quota.quota_exceed_error import QuotaExceedError from models.config import Action, Configuration +from quota.quota_exceed_error import QuotaExceedError from utils.types import RAGChunk, ToolCallSummary, ToolResultSummary SUCCESSFUL_RESPONSE_DESCRIPTION = "Successful response" diff --git a/tests/configuration/run.yaml b/tests/configuration/run.yaml index 6bb776005..08ca11ca9 100644 --- a/tests/configuration/run.yaml +++ b/tests/configuration/run.yaml @@ -9,61 +9,54 @@ apis: - post_training - safety - scoring - - telemetry - tool_runtime - vector_io benchmarks: [] container_image: null datasets: [] external_providers_dir: null -inference_store: - db_path: .llama/distributions/ollama/inference_store.db - type: sqlite logging: null -metadata_store: - db_path: .llama/distributions/ollama/registry.db - namespace: null - type: sqlite providers: agents: - - config: - persistence_store: - db_path: .llama/distributions/ollama/agents_store.db - namespace: null - type: sqlite - responses_store: - db_path: .llama/distributions/ollama/responses_store.db - type: sqlite - provider_id: meta-reference + - provider_id: meta-reference provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default datasetio: - - config: - kvstore: - db_path: .llama/distributions/ollama/huggingface_datasetio.db - namespace: null - type: sqlite - provider_id: huggingface + - provider_id: huggingface provider_type: remote::huggingface - - config: + config: kvstore: - db_path: .llama/distributions/ollama/localfs_datasetio.db - namespace: null - type: sqlite - provider_id: localfs + namespace: huggingface_datasetio + backend: kv_default + - provider_id: localfs provider_type: inline::localfs - eval: - - config: + config: kvstore: - db_path: .llama/distributions/ollama/meta_reference_eval.db - namespace: null - type: sqlite - provider_id: meta-reference + namespace: localfs_datasetio + backend: kv_default + eval: + - provider_id: meta-reference provider_type: inline::meta-reference + config: + kvstore: + namespace: eval_store + backend: kv_default inference: - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + allowed_models: ["gpt-4-turbo"] + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} post_training: - config: checkpoint_format: huggingface @@ -95,15 +88,17 @@ providers: provider_id: meta-reference provider_type: inline::meta-reference tool_runtime: - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} vector_io: - config: - kvstore: - db_path: .llama/distributions/ollama/faiss_store.db - namespace: null - type: sqlite + persistence: + namespace: vector_io::faiss + backend: kv_default provider_id: faiss provider_type: inline::faiss scoring_fns: [] @@ -115,11 +110,41 @@ server: tls_cafile: null tls_certfile: null tls_keyfile: null -shields: [] -vector_dbs: [] - -models: - - model_id: gpt-4-turbo - provider_id: openai - model_type: llm - provider_model_id: gpt-4-turbo +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db} + sql_default: + type: sql_sqlite + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: [] + vector_stores: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +telemetry: + enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/tests/e2e/configs/run-ci.yaml b/tests/e2e/configs/run-ci.yaml index ad3ac29a8..c5ef1ad87 100644 --- a/tests/e2e/configs/run-ci.yaml +++ b/tests/e2e/configs/run-ci.yaml @@ -14,17 +14,8 @@ apis: - vector_io benchmarks: [] -conversations_store: - db_path: ~/.llama/storage/conversations.db - type: sqlite datasets: [] # external_providers_dir: /opt/app-root/src/.llama/providers.d -inference_store: - db_path: ~/.llama/storage/inference-store.db - type: sqlite -metadata_store: - db_path: ~/.llama/storage/registry.db - type: sqlite providers: inference: @@ -146,6 +137,12 @@ registered_resources: provider_model_id: sentence-transformers/all-mpnet-base-v2 metadata: embedding_dimension: 768 + # Commented out because the vector_store is already registered in the kv_store + # vector_stores: + # - embedding_dimension: 768 + # embedding_model: sentence-transformers/nomic-ai/nomic-embed-text-v1.5 + # provider_id: faiss + # vector_store_id: vs_503a2261-c256-45ff-90aa-580a80de64b8 shields: - shield_id: llama-guard provider_id: llama-guard diff --git a/tests/unit/app/endpoints/test_query.py b/tests/unit/app/endpoints/test_query.py index b900af9b5..459c991aa 100644 --- a/tests/unit/app/endpoints/test_query.py +++ b/tests/unit/app/endpoints/test_query.py @@ -8,14 +8,11 @@ import pytest from fastapi import HTTPException, Request, status -from llama_stack_client.types.shared.interleaved_content_item import TextContentItem -from pydantic import AnyUrl from pytest_mock import MockerFixture from app.endpoints.query import ( evaluate_model_hints, is_transcripts_enabled, - parse_metadata_from_text_item, select_model_and_provider_id, validate_attachments_metadata, ) @@ -23,7 +20,6 @@ from models.config import Action from models.database.conversations import UserConversation from models.requests import Attachment, QueryRequest -from models.responses import ReferencedDocument from utils.token_counter import TokenCounter # User ID must be proper UUID @@ -392,48 +388,6 @@ def test_validate_attachments_metadata_invalid_content_type() -> None: ) -def test_parse_metadata_from_text_item_valid(mocker: MockerFixture) -> None: - """Test parsing metadata from a TextContentItem.""" - text = """ - Some text... - Metadata: {"docs_url": "https://redhat.com", "title": "Example Doc"} - """ - mock_item = mocker.Mock(spec=TextContentItem) - mock_item.text = text - - doc = parse_metadata_from_text_item(mock_item) - - assert isinstance(doc, ReferencedDocument) - assert doc.doc_url == AnyUrl("https://redhat.com") - assert doc.doc_title == "Example Doc" - - -def test_parse_metadata_from_text_item_missing_title(mocker: MockerFixture) -> None: - """Test parsing metadata from a TextContentItem with missing title.""" - mock_item = mocker.Mock(spec=TextContentItem) - mock_item.text = """Metadata: {"docs_url": "https://redhat.com"}""" - doc = parse_metadata_from_text_item(mock_item) - assert doc is None - - -def test_parse_metadata_from_text_item_missing_url(mocker: MockerFixture) -> None: - """Test parsing metadata from a TextContentItem with missing url.""" - mock_item = mocker.Mock(spec=TextContentItem) - mock_item.text = """Metadata: {"title": "Example Doc"}""" - doc = parse_metadata_from_text_item(mock_item) - assert doc is None - - -def test_parse_metadata_from_text_item_malformed_url(mocker: MockerFixture) -> None: - """Test parsing metadata from a TextContentItem with malformed url.""" - mock_item = mocker.Mock(spec=TextContentItem) - mock_item.text = ( - """Metadata: {"docs_url": "not a valid url", "title": "Example Doc"}""" - ) - doc = parse_metadata_from_text_item(mock_item) - assert doc is None - - def test_no_tools_parameter_backward_compatibility() -> None: """Test that default behavior is unchanged when no_tools parameter is not specified.""" # This test ensures that existing code that doesn't specify no_tools continues to work diff --git a/tests/unit/app/endpoints/test_query_v2.py b/tests/unit/app/endpoints/test_query_v2.py index b4b4ec5ee..37468ad91 100644 --- a/tests/unit/app/endpoints/test_query_v2.py +++ b/tests/unit/app/endpoints/test_query_v2.py @@ -909,14 +909,22 @@ def _create_file_search_output(mocker: MockerFixture) -> Any: # Create mock result objects with proper attributes matching real llama-stack response result_1 = mocker.Mock() result_1.filename = "file2.pdf" - result_1.attributes = {"url": "http://example.com/doc2"} + result_1.attributes = { + "docs_url": "http://example.com/doc2", + "title": "Title 1", + "document_id": "doc-123", + } result_1.text = "Sample text from file2.pdf" result_1.score = 0.95 result_1.file_id = "file-123" result_1.model_dump = mocker.Mock( return_value={ "filename": "file2.pdf", - "attributes": {"url": "http://example.com/doc2"}, + "attributes": { + "docs_url": "http://example.com/doc2", + "title": "Title 1", + "document_id": "doc-123", + }, "text": "Sample text from file2.pdf", "score": 0.95, "file_id": "file-123", @@ -925,14 +933,22 @@ def _create_file_search_output(mocker: MockerFixture) -> Any: result_2 = mocker.Mock() result_2.filename = "file3.docx" - result_2.attributes = {} + result_2.attributes = { + "docs_url": "http://example.com/doc3", + "title": "Title 2", + "document_id": "doc-456", + } result_2.text = "Sample text from file3.docx" result_2.score = 0.85 result_2.file_id = "file-456" result_2.model_dump = mocker.Mock( return_value={ "filename": "file3.docx", - "attributes": {}, + "attributes": { + "docs_url": "http://example.com/doc3", + "title": "Title 2", + "document_id": "doc-456", + }, "text": "Sample text from file3.docx", "score": 0.85, "file_id": "file-456", @@ -975,27 +991,20 @@ async def test_retrieve_response_parses_referenced_documents( mock_client, "model-docs", qr, token="tkn", provider_id="test-provider" ) - assert len(referenced_docs) == 4 + # Referenced documents are now extracted only from file_search_call attributes + assert len(referenced_docs) == 2 - # Verify Doc 1 (URL citation) - doc1 = next((d for d in referenced_docs if d.doc_title == "Doc 1"), None) + # Verify Title 1 (File search result with URL) + doc1 = next((d for d in referenced_docs if d.doc_title == "Title 1"), None) assert doc1 - assert str(doc1.doc_url) == "http://example.com/doc1" + assert doc1.doc_title == "Title 1" + assert str(doc1.doc_url) == "http://example.com/doc2" - # Verify file1.txt (File citation) - doc2 = next((d for d in referenced_docs if d.doc_title == "file1.txt"), None) + # Verify Title 2 (File search result with URL) + doc2 = next((d for d in referenced_docs if d.doc_title == "Title 2"), None) assert doc2 - assert doc2.doc_url is None - - # Verify file2.pdf (File search result with URL) - doc3 = next((d for d in referenced_docs if d.doc_title == "file2.pdf"), None) - assert doc3 - assert str(doc3.doc_url) == "http://example.com/doc2" - - # Verify file3.docx (File search result without URL) - doc4 = next((d for d in referenced_docs if d.doc_title == "file3.docx"), None) - assert doc4 - assert doc4.doc_url is None + assert doc2.doc_title == "Title 2" + assert str(doc2.doc_url) == "http://example.com/doc3" # Verify RAG chunks were extracted from file_search_call results assert len(_summary.rag_chunks) == 2 diff --git a/tests/unit/cache/test_postgres_cache.py b/tests/unit/cache/test_postgres_cache.py index 2855c9daa..7b77170c2 100644 --- a/tests/unit/cache/test_postgres_cache.py +++ b/tests/unit/cache/test_postgres_cache.py @@ -1,22 +1,20 @@ """Unit tests for PostgreSQL cache implementation.""" import json - from typing import Any +import psycopg2 import pytest +from pydantic import AnyUrl, SecretStr from pytest_mock import MockerFixture -from pydantic import SecretStr, AnyUrl -import psycopg2 - -from models.config import PostgreSQLDatabaseConfiguration +from cache.cache_error import CacheError +from cache.postgres_cache import PostgresCache from models.cache_entry import CacheEntry +from models.config import PostgreSQLDatabaseConfiguration from models.responses import ConversationData, ReferencedDocument from utils import suid from utils.types import ToolCallSummary, ToolResultSummary -from cache.cache_error import CacheError -from cache.postgres_cache import PostgresCache USER_ID_1 = suid.get_suid() USER_ID_2 = suid.get_suid() diff --git a/tests/unit/test_llama_stack_configuration.py b/tests/unit/test_llama_stack_configuration.py index f6cef3a98..a86344dc9 100644 --- a/tests/unit/test_llama_stack_configuration.py +++ b/tests/unit/test_llama_stack_configuration.py @@ -8,8 +8,10 @@ from llama_stack_configuration import ( generate_configuration, - construct_vector_dbs_section, + construct_vector_stores_section, construct_vector_io_providers_section, + construct_storage_backends_section, + construct_models_section, ) from models.config import ( Configuration, @@ -20,55 +22,59 @@ ) # ============================================================================= -# Test construct_vector_dbs_section +# Test construct_vector_stores_section # ============================================================================= -def test_construct_vector_dbs_section_empty() -> None: +def test_construct_vector_stores_section_empty() -> None: """Test with no BYOK RAG config.""" ls_config: dict[str, Any] = {} byok_rag: list[dict[str, Any]] = [] - output = construct_vector_dbs_section(ls_config, byok_rag) + output = construct_vector_stores_section(ls_config, byok_rag) assert len(output) == 0 -def test_construct_vector_dbs_section_preserves_existing() -> None: - """Test preserves existing vector_dbs entries.""" +def test_construct_vector_stores_section_preserves_existing() -> None: + """Test preserves existing vector_stores entries.""" ls_config = { - "vector_dbs": [ - {"vector_db_id": "existing", "provider_id": "existing_provider"}, - ] + "registered_resources": { + "vector_stores": [ + {"vector_store_id": "existing", "provider_id": "existing_provider"}, + ] + } } byok_rag: list[dict[str, Any]] = [] - output = construct_vector_dbs_section(ls_config, byok_rag) + output = construct_vector_stores_section(ls_config, byok_rag) assert len(output) == 1 - assert output[0]["vector_db_id"] == "existing" + assert output[0]["vector_store_id"] == "existing" -def test_construct_vector_dbs_section_adds_new() -> None: +def test_construct_vector_stores_section_adds_new() -> None: """Test adds new BYOK RAG entries.""" ls_config: dict[str, Any] = {} byok_rag = [ { "rag_id": "rag1", - "vector_db_id": "db1", + "vector_db_id": "store1", "embedding_model": "test-model", "embedding_dimension": 512, }, ] - output = construct_vector_dbs_section(ls_config, byok_rag) + output = construct_vector_stores_section(ls_config, byok_rag) assert len(output) == 1 - assert output[0]["vector_db_id"] == "db1" - assert output[0]["provider_id"] == "byok_db1" + assert output[0]["vector_store_id"] == "store1" + assert output[0]["provider_id"] == "byok_store1" assert output[0]["embedding_model"] == "test-model" assert output[0]["embedding_dimension"] == 512 -def test_construct_vector_dbs_section_merge() -> None: +def test_construct_vector_stores_section_merge() -> None: """Test merges existing and new entries.""" - ls_config = {"vector_dbs": [{"vector_db_id": "existing"}]} - byok_rag = [{"vector_db_id": "new_db"}] - output = construct_vector_dbs_section(ls_config, byok_rag) + ls_config = { + "registered_resources": {"vector_stores": [{"vector_store_id": "existing"}]} + } + byok_rag = [{"vector_db_id": "new_store"}] + output = construct_vector_stores_section(ls_config, byok_rag) assert len(output) == 2 @@ -99,14 +105,120 @@ def test_construct_vector_io_providers_section_adds_new() -> None: ls_config: dict[str, Any] = {"providers": {}} byok_rag = [ { - "vector_db_id": "db1", + "vector_db_id": "store1", "rag_type": "inline::faiss", }, ] output = construct_vector_io_providers_section(ls_config, byok_rag) assert len(output) == 1 - assert output[0]["provider_id"] == "byok_db1" + assert output[0]["provider_id"] == "byok_store1" assert output[0]["provider_type"] == "inline::faiss" + assert output[0]["config"]["persistence"]["backend"] == "byok_store1_storage" + assert output[0]["config"]["persistence"]["namespace"] == "vector_io::faiss" + + +# ============================================================================= +# Test construct_storage_backends_section +# ============================================================================= + + +def test_construct_storage_backends_section_empty() -> None: + """Test with no BYOK RAG config.""" + ls_config: dict[str, Any] = {} + byok_rag: list[dict[str, Any]] = [] + output = construct_storage_backends_section(ls_config, byok_rag) + assert len(output) == 0 + + +def test_construct_storage_backends_section_preserves_existing() -> None: + """Test preserves existing backends.""" + ls_config = { + "storage": { + "backends": { + "kv_default": {"type": "kv_sqlite", "db_path": "~/.llama/kv.db"} + } + } + } + byok_rag: list[dict[str, Any]] = [] + output = construct_storage_backends_section(ls_config, byok_rag) + assert len(output) == 1 + assert "kv_default" in output + + +def test_construct_storage_backends_section_adds_new() -> None: + """Test adds new BYOK RAG backend entries.""" + ls_config: dict[str, Any] = {} + byok_rag = [ + { + "vector_db_id": "store1", + "db_path": "/path/to/store1.db", + }, + ] + output = construct_storage_backends_section(ls_config, byok_rag) + assert len(output) == 1 + assert "byok_store1_storage" in output + assert output["byok_store1_storage"]["type"] == "kv_sqlite" + assert output["byok_store1_storage"]["db_path"] == "/path/to/store1.db" + + +# ============================================================================= +# Test construct_models_section +# ============================================================================= + + +def test_construct_models_section_empty() -> None: + """Test with no BYOK RAG config.""" + ls_config: dict[str, Any] = {} + byok_rag: list[dict[str, Any]] = [] + output = construct_models_section(ls_config, byok_rag) + assert len(output) == 0 + + +def test_construct_models_section_preserves_existing() -> None: + """Test preserves existing models.""" + ls_config = { + "registered_resources": { + "models": [{"model_id": "existing", "model_type": "llm"}] + } + } + byok_rag: list[dict[str, Any]] = [] + output = construct_models_section(ls_config, byok_rag) + assert len(output) == 1 + assert output[0]["model_id"] == "existing" + + +def test_construct_models_section_adds_embedding_model() -> None: + """Test adds embedding model from BYOK RAG.""" + ls_config: dict[str, Any] = {} + byok_rag = [ + { + "vector_db_id": "store1", + "embedding_model": "sentence-transformers/all-mpnet-base-v2", + "embedding_dimension": 768, + }, + ] + output = construct_models_section(ls_config, byok_rag) + assert len(output) == 1 + assert output[0]["model_id"] == "byok_store1_embedding" + assert output[0]["model_type"] == "embedding" + assert output[0]["provider_id"] == "sentence-transformers" + assert output[0]["provider_model_id"] == "all-mpnet-base-v2" + assert output[0]["metadata"]["embedding_dimension"] == 768 + + +def test_construct_models_section_strips_prefix() -> None: + """Test strips sentence-transformers/ prefix from embedding model.""" + ls_config: dict[str, Any] = {} + byok_rag = [ + { + "vector_db_id": "store1", + "embedding_model": "sentence-transformers//usr/path/model", + "embedding_dimension": 768, + }, + ] + output = construct_models_section(ls_config, byok_rag) + assert len(output) == 1 + assert output[0]["provider_model_id"] == "/usr/path/model" # ============================================================================= @@ -164,10 +276,11 @@ def test_generate_configuration_with_byok(tmp_path: Path) -> None: "byok_rag": [ { "rag_id": "rag1", - "vector_db_id": "db1", + "vector_db_id": "store1", "embedding_model": "test-model", "embedding_dimension": 256, "rag_type": "inline::faiss", + "db_path": "/tmp/store1.db", }, ], } @@ -178,5 +291,19 @@ def test_generate_configuration_with_byok(tmp_path: Path) -> None: with open(outfile, encoding="utf-8") as f: result = yaml.safe_load(f) - db_ids = [db["vector_db_id"] for db in result["vector_dbs"]] - assert "db1" in db_ids + # Check registered_resources.vector_stores + store_ids = [ + s["vector_store_id"] for s in result["registered_resources"]["vector_stores"] + ] + assert "store1" in store_ids + + # Check storage.backends + assert "byok_store1_storage" in result["storage"]["backends"] + + # Check providers.vector_io + provider_ids = [p["provider_id"] for p in result["providers"]["vector_io"]] + assert "byok_store1" in provider_ids + + # Check registered_resources.models for embedding model + model_ids = [m["model_id"] for m in result["registered_resources"]["models"]] + assert "byok_store1_embedding" in model_ids