diff --git a/lightspeed-stack.yaml b/lightspeed-stack.yaml index ba29f85f..356394c7 100644 --- a/lightspeed-stack.yaml +++ b/lightspeed-stack.yaml @@ -10,12 +10,12 @@ service: llama_stack: # Uses a remote llama-stack service # The instance would have already been started with a llama-stack-run.yaml file - use_as_library_client: false + # use_as_library_client: false # Alternative for "as library use" - # use_as_library_client: true - # library_client_config_path: - url: http://llama-stack:8321 - api_key: xyzzy + use_as_library_client: true + library_client_config_path: run.yaml + # url: http://llama-stack:8321 + # api_key: xyzzy user_data_collection: feedback_enabled: true feedback_storage: "/tmp/data/feedback" diff --git a/run.yaml b/run.yaml index f85e628a..d37947ee 100644 --- a/run.yaml +++ b/run.yaml @@ -1,157 +1,170 @@ version: 2 +image_name: starter +external_providers_dir: ${env.EXTERNAL_PROVIDERS_DIR} apis: -- agents -- batches -- datasetio -- eval -- files -- inference -- safety -- scoring -- tool_runtime -- vector_io - -benchmarks: [] -conversations_store: - db_path: ~/.llama/storage/conversations.db - type: sqlite -datasets: [] -image_name: starter -# external_providers_dir: /opt/app-root/src/.llama/providers.d -inference_store: - db_path: ~/.llama/storage/inference-store.db - type: sqlite -metadata_store: - db_path: ~/.llama/storage/registry.db - type: sqlite + - batches + - datasetio + - eval + - files + - inference + - safety + - scoring + - tool_runtime + - vector_io -providers: - inference: - - provider_id: openai # This ID is a reference to 'providers.inference' - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - allowed_models: ["${env.E2E_OPENAI_MODEL:=gpt-4o-mini}"] - - config: {} - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - files: - - config: - metadata_store: - table_name: files_metadata - backend: sql_default - storage_dir: ~/.llama/storage/files - provider_id: meta-reference-files - provider_type: inline::localfs - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: '********' - tool_runtime: - - config: {} # Enable the RAG tool - provider_id: rag-runtime - provider_type: inline::rag-runtime - vector_io: - - config: # Define the storage backend for RAG - persistence: - namespace: vector_io::faiss - backend: kv_default - provider_id: faiss - provider_type: inline::faiss - agents: - - config: - persistence: - agent_state: - namespace: agents_state - backend: kv_default - responses: - table_name: agents_responses - backend: sql_default - provider_id: meta-reference - provider_type: inline::meta-reference - batches: - - config: - kvstore: - namespace: batches_store - backend: kv_default - provider_id: reference - provider_type: inline::reference - datasetio: - - config: - kvstore: - namespace: huggingface_datasetio - backend: kv_default - provider_id: huggingface - provider_type: remote::huggingface - - config: - kvstore: - namespace: localfs_datasetio - backend: kv_default - provider_id: localfs - provider_type: inline::localfs - eval: - - config: - kvstore: - namespace: eval_store - backend: kv_default - provider_id: meta-reference - provider_type: inline::meta-reference -scoring_fns: [] -server: - port: 8321 storage: backends: - kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks + kv_default: type: kv_sqlite db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} + stores: metadata: namespace: registry backend: kv_default + inference: table_name: inference_store backend: sql_default max_write_queue_size: 10000 num_writers: 4 + conversations: table_name: openai_conversations backend: sql_default + prompts: namespace: prompts backend: kv_default + +metadata_store: + type: sqlite + db_path: ~/.llama/storage/registry.db + +inference_store: + type: sqlite + db_path: ~/.llama/storage/inference-store.db + +conversations_store: + type: sqlite + db_path: ~/.llama/storage/conversations.db + +providers: + + inference: + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + allowed_models: + - gpt-4o-mini + + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: + allowed_models: + - ${env.EMBEDDING_MODEL_DIR} + + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ~/.llama/storage/files + metadata_store: + table_name: files_metadata + backend: sql_default + + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + + tool_runtime: + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + + vector_io: + - provider_id: solr-vector + provider_type: remote::solr_vector_io + config: + solr_url: http://localhost:8983/solr + collection_name: portal-rag + vector_field: chunk_vector + content_field: chunk + embedding_dimension: 384 + embedding_model: ${env.EMBEDDING_MODEL_DIR} + persistence: + namespace: portal-rag + backend: kv_default + + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default + + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches_store + backend: kv_default + + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: huggingface_datasetio + backend: kv_default + + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: localfs_datasetio + backend: kv_default + registered_resources: - models: [] + models: + - model_id: granite-embedding-30m + model_type: embedding + provider_id: sentence-transformers + provider_model_id: /Users/acoba/.llama/models/granite-embedding-30m-english + metadata: + embedding_dimension: 384 + shields: - - shield_id: llama-guard - provider_id: llama-guard - provider_shield_id: openai/gpt-4o-mini - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::rag # Register the RAG tool - provider_id: rag-runtime -vector_stores: - default_provider_id: faiss - default_embedding_model: # Define the default embedding model for RAG - provider_id: sentence-transformers - model_id: nomic-ai/nomic-embed-text-v1.5 + - shield_id: llama-guard + provider_id: llama-guard + provider_shield_id: openai/gpt-4o-mini + + vector_stores: + - vector_store_id: portal-rag + provider_id: solr-vector + embedding_model: sentence-transformers//Users/acoba/.llama/models/granite-embedding-30m-english + embedding_dimension: 384 safety: default_shield_id: llama-guard diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py index ce0c87be..9b33b2b2 100644 --- a/src/app/endpoints/query.py +++ b/src/app/endpoints/query.py @@ -59,6 +59,11 @@ router = APIRouter(tags=["query"]) +# When OFFLINE is False, use reference_url for chunk source +# When OFFLINE is True, use parent_id for chunk source +# TODO: move this setting to a higher level configuration +OFFLINE = True + query_response: dict[int | str, dict[str, Any]] = { 200: QueryResponse.openapi_response(), 401: UnauthorizedResponse.openapi_response( @@ -390,9 +395,9 @@ async def query_endpoint_handler_base( # pylint: disable=R0914 response = QueryResponse( conversation_id=conversation_id, response=summary.llm_response, - tool_calls=summary.tool_calls, - tool_results=summary.tool_results, - rag_chunks=summary.rag_chunks, + rag_chunks=rag_chunks_dict, + tool_calls=summary.tool_calls if summary.tool_calls else [], + tool_results=summary.tool_results if summary.tool_results else [], referenced_documents=referenced_documents, truncated=False, # TODO: implement truncation detection input_tokens=token_usage.input_tokens, @@ -586,6 +591,311 @@ def parse_metadata_from_text_item( return None +# def parse_referenced_documents(response: Turn) -> list[ReferencedDocument]: +# """ +# Parse referenced documents from Turn. + +# Iterate through the steps of a response and collect all referenced +# documents from rag tool responses. + +# Args: +# response(Turn): The response object from the agent turn. + +# Returns: +# list[ReferencedDocument]: A list of ReferencedDocument, each with 'doc_url' and 'doc_title' +# representing all referenced documents found in the response. +# """ +# docs = [] +# for step in response.steps: +# if not isinstance(step, ToolExecutionStep): +# continue +# for tool_response in step.tool_responses: +# if tool_response.tool_name != constants.DEFAULT_RAG_TOOL: +# continue +# for text_item in tool_response.content: +# if not isinstance(text_item, TextContentItem): +# continue +# doc = parse_metadata_from_text_item(text_item) +# if doc: +# docs.append(doc) +# return docs + + +# async def retrieve_response( # pylint: disable=too-many-locals,too-many-branches,too-many-arguments +# client: AsyncLlamaStackClient, +# model_id: str, +# query_request: QueryRequest, +# token: str, +# mcp_headers: Optional[dict[str, dict[str, str]]] = None, +# *, +# provider_id: str = "", +# ) -> tuple[TurnSummary, str, list[ReferencedDocument], TokenCounter]: +# """ +# Retrieve response from LLMs and agents. + +# Retrieves a response from the Llama Stack LLM or agent for a +# given query, handling shield configuration, tool usage, and +# attachment validation. + +# This function configures input/output shields, system prompts, +# and toolgroups (including RAG and MCP integration) as needed +# based on the query request and system configuration. It +# validates attachments, manages conversation and session +# context, and processes MCP headers for multi-component +# processing. Shield violations in the response are detected and +# corresponding metrics are updated. + +# Parameters: +# model_id (str): The identifier of the LLM model to use. +# provider_id (str): The identifier of the LLM provider to use. +# query_request (QueryRequest): The user's query and associated metadata. +# token (str): The authentication token for authorization. +# mcp_headers (dict[str, dict[str, str]], optional): Headers for multi-component processing. + +# Returns: +# tuple[TurnSummary, str, list[ReferencedDocument], TokenCounter]: A tuple containing +# a summary of the LLM or agent's response +# content, the conversation ID, the list of parsed referenced documents, and token usage information. +# """ +# available_input_shields = [ +# shield.identifier +# for shield in filter(is_input_shield, await client.shields.list()) +# ] +# available_output_shields = [ +# shield.identifier +# for shield in filter(is_output_shield, await client.shields.list()) +# ] +# if not available_input_shields and not available_output_shields: +# logger.info("No available shields. Disabling safety") +# else: +# logger.info( +# "Available input shields: %s, output shields: %s", +# available_input_shields, +# available_output_shields, +# ) +# # use system prompt from request or default one +# system_prompt = get_system_prompt(query_request, configuration) +# logger.debug("Using system prompt: %s", system_prompt) + +# # TODO(lucasagomes): redact attachments content before sending to LLM +# # if attachments are provided, validate them +# if query_request.attachments: +# validate_attachments_metadata(query_request.attachments) + +# agent, conversation_id, session_id = await get_agent( +# client, +# model_id, +# system_prompt, +# available_input_shields, +# available_output_shields, +# query_request.conversation_id, +# query_request.no_tools or False, +# ) + +# logger.debug("Conversation ID: %s, session ID: %s", conversation_id, session_id) +# # bypass tools and MCP servers if no_tools is True +# if query_request.no_tools: +# mcp_headers = {} +# agent.extra_headers = {} +# toolgroups = None +# else: +# # preserve compatibility when mcp_headers is not provided +# logger.info("HELLOOOOOO") +# if mcp_headers is None: +# mcp_headers = {} +# mcp_headers = handle_mcp_headers_with_toolgroups(mcp_headers, configuration) +# if not mcp_headers and token: +# for mcp_server in configuration.mcp_servers: +# mcp_headers[mcp_server.url] = { +# "Authorization": f"Bearer {token}", +# } + +# agent.extra_headers = { +# "X-LlamaStack-Provider-Data": json.dumps( +# { +# "mcp_headers": mcp_headers, +# } +# ), +# } + +# logger.info("STARTING HERE") +# # Use specified vector stores or fetch all available ones +# if client.vector_dbs.list(): +# logger.info("Vector db list: ", client.vector_dbs.list()) +# if query_request.vector_store_ids: +# vector_db_ids = query_request.vector_store_ids +# logger.info("VECTOR_DB_IDS ",vector_db_ids) +# else: +# vector_db_ids = [ +# vector_store.id +# for vector_store in (await client.vector_stores.list()).data +# ] +# toolgroups = (get_rag_toolgroups(vector_db_ids) or []) + [ +# mcp_server.name for mcp_server in configuration.mcp_servers +# ] +# # Convert empty list to None for consistency with existing behavior +# if not toolgroups: +# toolgroups = None + +# # TODO: LCORE-881 - Remove if Llama Stack starts to support these mime types +# # documents: list[Document] = [ +# # ( +# # {"content": doc["content"], "mime_type": "text/plain"} +# # if doc["mime_type"].lower() in ("application/json", "application/xml") +# # else doc +# # ) +# # for doc in query_request.get_documents() +# # ] + +# # Extract RAG chunks from vector DB query response BEFORE calling agent +# rag_chunks = [] +# doc_ids_from_chunks = [] +# retrieved_chunks = [] +# retrieved_scores = [] + +# try: +# if vector_db_ids: +# vector_db_id = vector_db_ids[0] # Use first available vector DB + +# params = {"k": 5, "score_threshold": 0.0} +# logger.info("Initial params: %s", params) +# logger.info("query_request.solr: %s", query_request.solr) +# if query_request.solr: +# # Pass the entire solr dict under the 'solr' key +# params["solr"] = query_request.solr +# logger.info("Final params with solr filters: %s", params) +# else: +# logger.info("No solr filters provided") +# logger.info("Final params being sent to vector_io.query: %s", params) + +# query_response = await client.vector_io.query( +# vector_db_id=vector_db_id, query=query_request.query, params=params +# ) + +# logger.info("The query response total payload: %s", query_response) + +# if query_response.chunks: +# from models.responses import RAGChunk, ReferencedDocument + +# retrieved_chunks = query_response.chunks +# retrieved_scores = ( +# query_response.scores if hasattr(query_response, "scores") else [] +# ) + +# # Extract doc_ids from chunks for referenced_documents +# metadata_doc_ids = set() +# for chunk in query_response.chunks: +# logger.info("Hitting the extract the doc ids in the chunks", chunk) +# metadata = getattr(chunk, "metadata", None) +# if metadata and "doc_id" in metadata: +# reference_doc = metadata["doc_id"] +# logger.info(reference_doc) +# if reference_doc and reference_doc not in metadata_doc_ids: +# metadata_doc_ids.add(reference_doc) +# doc_ids_from_chunks.append( +# ReferencedDocument( +# doc_title=metadata.get("title", None), +# doc_url="https://mimir.corp.redhat.com" +# + reference_doc, +# ) +# ) + +# logger.info( +# "Extracted %d unique document IDs from chunks", +# len(doc_ids_from_chunks), +# ) + +# except Exception as e: +# logger.warning("Failed to query vector database for chunks: %s", e) +# logger.debug("Vector DB query error details: %s", traceback.format_exc()) +# # Continue without RAG chunks + +# # Convert retrieved chunks to RAGChunk format +# for i, chunk in enumerate(retrieved_chunks): +# # Extract source from chunk metadata based on OFFLINE flag +# source = None +# if chunk.metadata: +# if OFFLINE: +# parent_id = chunk.metadata.get("parent_id") +# if parent_id: +# source = urljoin("https://mimir.corp.redhat.com", parent_id) +# else: +# source = chunk.metadata.get("reference_url") + +# # Get score from retrieved_scores list if available +# score = retrieved_scores[i] if i < len(retrieved_scores) else None + +# rag_chunks.append( +# RAGChunk( +# content=chunk.content, +# source=source, +# score=score, +# ) +# ) + +# logger.info("Retrieved %d chunks from vector DB", len(rag_chunks)) + +# # Format RAG context for injection into user message +# rag_context = "" +# if rag_chunks: +# context_chunks = [] +# for chunk in rag_chunks[:5]: # Limit to top 5 chunks +# chunk_text = f"Source: {chunk.source or 'Unknown'}\n{chunk.content}" +# context_chunks.append(chunk_text) +# rag_context = "\n\nRelevant documentation:\n" + "\n\n".join(context_chunks) +# logger.info("Injecting %d RAG chunks into user message", len(context_chunks)) + +# # Inject RAG context into user message +# user_content = query_request.query + rag_context + +# response = await agent.create_turn( +# messages=[UserMessage(role="user", content=user_content).model_dump()], +# session_id=session_id, +# # documents=documents, +# stream=False, +# toolgroups=toolgroups, +# ) +# response = cast(Turn, response) + +# summary = TurnSummary( +# llm_response=( +# content_to_str(response.output_message.content) +# if ( +# getattr(response, "output_message", None) is not None +# and getattr(response.output_message, "content", None) is not None +# ) +# else "" +# ), +# tool_calls=[], +# tool_results=[], +# rag_chunks=rag_chunks, +# ) + +# referenced_documents = parse_referenced_documents(response) + +# # Update token count metrics and extract token usage in one call +# model_label = model_id.split("/", 1)[1] if "/" in model_id else model_id +# token_usage = extract_and_update_token_metrics( +# response, model_label, provider_id, system_prompt +# ) + +# # Check for validation errors in the response +# steps = response.steps or [] +# for step in steps: +# if step.step_type == "shield_call" and step.violation: +# # Metric for LLM validation errors +# metrics.llm_calls_validation_errors_total.inc() +# if step.step_type == "tool_execution": +# summary.append_tool_calls_from_llama(step) + +# if not summary.llm_response: +# logger.warning( +# "Response lacks output_message.content (conversation_id=%s)", +# conversation_id, +# ) +# return (summary, conversation_id, referenced_documents, token_usage) + + def validate_attachments_metadata(attachments: list[Attachment]) -> None: """Validate the attachments metadata provided in the request. @@ -614,3 +924,33 @@ def validate_attachments_metadata(attachments: list[Attachment]) -> None: response="Invalid attribute value", cause=message ) raise HTTPException(**response.model_dump()) + + +# def get_rag_toolgroups( +# vector_db_ids: list[str], +# ) -> Optional[list[Toolgroup]]: +# """ +# Return a list of RAG Tool groups if the given vector DB list is not empty. + +# Generate a list containing a RAG knowledge search toolgroup if +# vector database IDs are provided. + +# Parameters: +# vector_db_ids (list[str]): List of vector database identifiers to include in the toolgroup. + +# Returns: +# Optional[list[Toolgroup]]: A list with a single RAG toolgroup if +# vector_db_ids is non-empty; otherwise, None. +# """ +# return ( +# [ +# ToolgroupAgentToolGroupWithArgs( +# name="builtin::rag/file_search", +# args={ +# "vector_db_ids": vector_db_ids, +# }, +# ) +# ] +# if vector_db_ids +# else None +# ) diff --git a/src/app/endpoints/query_v2.py b/src/app/endpoints/query_v2.py index 64f4c534..61b159fd 100644 --- a/src/app/endpoints/query_v2.py +++ b/src/app/endpoints/query_v2.py @@ -4,9 +4,12 @@ import json import logging +import traceback from typing import Annotated, Any, Optional, cast +from urllib.parse import urljoin from fastapi import APIRouter, Depends, Request +from llama_stack_client import APIConnectionError, APIStatusError from llama_stack_api.openai_responses import ( OpenAIResponseMCPApprovalRequest, OpenAIResponseMCPApprovalResponse, @@ -364,9 +367,14 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche if query_request.attachments: validate_attachments_metadata(query_request.attachments) - # Prepare tools for responses API + # Prepare tools for responses API - skip RAG tools since we're doing direct vector query toolgroups = await prepare_tools_for_responses_api( - client, query_request, token, configuration, mcp_headers + client, + query_request, + token, + configuration, + mcp_headers=mcp_headers, + skip_rag_tools=True, ) # Prepare input for Responses API @@ -420,6 +428,174 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche TokenCounter(), ) + # Extract RAG chunks from vector DB query response BEFORE calling responses API + rag_chunks = [] + doc_ids_from_chunks = [] + retrieved_chunks = [] + retrieved_scores = [] + + # When offline is False, use reference_url for chunk source + # When offline is True, use parent_id for chunk source + # TODO: move this setting to a higher level configuration + offline = True + + try: + # Get vector stores for direct querying + if query_request.vector_store_ids: + vector_store_ids = query_request.vector_store_ids + logger.info( + "Using specified vector_store_ids for direct query: %s", + vector_store_ids, + ) + else: + vector_store_ids = [ + vector_store.id + for vector_store in (await client.vector_stores.list()).data + ] + logger.info( + "Using all available vector_store_ids for direct query: %s", + vector_store_ids, + ) + + if vector_store_ids: + vector_store_id = vector_store_ids[0] # Use first available vector store + + params = {"k": 5, "score_threshold": 0.0, "mode": "hybrid"} + logger.info("Initial params: %s", params) + logger.info("query_request.solr: %s", query_request.solr) + if query_request.solr: + # Pass the entire solr dict under the 'solr' key + params["solr"] = query_request.solr + logger.info("Final params with solr filters: %s", params) + else: + logger.info("No solr filters provided") + logger.info("Final params being sent to vector_io.query: %s", params) + + query_response = await client.vector_io.query( + vector_store_id=vector_store_id, + query=query_request.query, + params=params, + ) + + logger.info("The query response total payload: %s", query_response) + + if query_response.chunks: + retrieved_chunks = query_response.chunks + retrieved_scores = ( + query_response.scores if hasattr(query_response, "scores") else [] + ) + + # Extract doc_ids from chunks for referenced_documents + metadata_doc_ids = set() + + for chunk in query_response.chunks: + logger.info("Extract doc ids from chunk: %s", chunk) + + # 1) dict metadata (what your code expects today) + md = getattr(chunk, "metadata", None) or {} + doc_id = md.get("doc_id") or md.get("document_id") + title = md.get("title") + + # 2) typed chunk_metadata (what your provider/logs are actually populating) + if not doc_id: + cm = getattr(chunk, "chunk_metadata", None) + if cm is not None: + # cm might be a pydantic model or a dict depending on caller + if isinstance(cm, dict): + doc_id = cm.get("doc_id") or cm.get("document_id") + title = title or cm.get("title") + reference_url = cm.get("reference_url") + else: + doc_id = getattr(cm, "doc_id", None) or getattr( + cm, "document_id", None + ) + title = title or getattr(cm, "title", None) + reference_url = getattr(cm, "reference_url", None) + else: + reference_url = None + else: + reference_url = md.get("reference_url") + + if not doc_id and not reference_url: + continue + + # Build URL based on offline flag + if offline: + # Use parent/doc path + reference_doc = doc_id + doc_url = "https://mimir.corp.redhat.com" + reference_doc + else: + # Use reference_url if online + reference_doc = reference_url or doc_id + doc_url = ( + reference_doc + if reference_doc.startswith("http") + else ("https://mimir.corp.redhat.com" + reference_doc) + ) + + if reference_doc and reference_doc not in metadata_doc_ids: + metadata_doc_ids.add(reference_doc) + doc_ids_from_chunks.append( + ReferencedDocument( + doc_title=title, + doc_url=doc_url, + ) + ) + + logger.info( + "Extracted %d unique document IDs from chunks", len(doc_ids_from_chunks) + ) + + except ( + APIConnectionError, + APIStatusError, + AttributeError, + KeyError, + ValueError, + ) as e: + logger.warning("Failed to query vector database for chunks: %s", e) + logger.debug("Vector DB query error details: %s", traceback.format_exc()) + # Continue without RAG chunks + + # Convert retrieved chunks to RAGChunk format + for i, chunk in enumerate(retrieved_chunks): + # Extract source from chunk metadata based on offline flag + source = None + if chunk.metadata: + if offline: + parent_id = chunk.metadata.get("parent_id") + if parent_id: + source = urljoin("https://mimir.corp.redhat.com", parent_id) + else: + source = chunk.metadata.get("reference_url") + + # Get score from retrieved_scores list if available + score = retrieved_scores[i] if i < len(retrieved_scores) else None + + rag_chunks.append( + RAGChunk( + content=chunk.content, + source=source, + score=score, + ) + ) + + logger.info("Retrieved %d chunks from vector DB", len(rag_chunks)) + + # Format RAG context for injection into user message + rag_context = "" + if rag_chunks: + context_chunks = [] + for chunk in rag_chunks[:5]: # Limit to top 5 chunks + chunk_text = f"Source: {chunk.source or 'Unknown'}\n{chunk.content}" + context_chunks.append(chunk_text) + rag_context = "\n\nRelevant documentation:\n" + "\n\n".join(context_chunks) + logger.info("Injecting %d RAG chunks into user message", len(context_chunks)) + + # Inject RAG context into input text + if rag_context: + input_text = input_text + rag_context + # Create OpenAI response using responses API create_kwargs: dict[str, Any] = { "input": input_text, @@ -444,18 +620,29 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche llm_response = "" tool_calls: list[ToolCallSummary] = [] tool_results: list[ToolResultSummary] = [] - rag_chunks: list[RAGChunk] = [] + response_api_rag_chunks: list[RAGChunk] = [] for output_item in response.output: message_text = extract_text_from_response_output_item(output_item) if message_text: llm_response += message_text - tool_call, tool_result = _build_tool_call_summary(output_item, rag_chunks) + tool_call, tool_result = _build_tool_call_summary( + output_item, response_api_rag_chunks + ) if tool_call: tool_calls.append(tool_call) if tool_result: tool_results.append(tool_result) + # Merge RAG chunks from direct vector query with those from responses API + all_rag_chunks = rag_chunks + response_api_rag_chunks + logger.info( + "Combined RAG chunks: %d from direct query + %d from responses API = %d total", + len(rag_chunks), + len(response_api_rag_chunks), + len(all_rag_chunks), + ) + logger.info( "Response processing complete - Tool calls: %d, Response length: %d chars", len(tool_calls), @@ -466,11 +653,21 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche llm_response=llm_response, tool_calls=tool_calls, tool_results=tool_results, - rag_chunks=rag_chunks, + rag_chunks=all_rag_chunks, ) # Extract referenced documents and token usage from Responses API response - referenced_documents = parse_referenced_documents_from_responses_api(response) + # Merge with documents from direct vector query + response_referenced_documents = parse_referenced_documents_from_responses_api( + response + ) + all_referenced_documents = doc_ids_from_chunks + response_referenced_documents + logger.info( + "Combined referenced documents: %d from direct query + %d from responses API = %d total", + len(doc_ids_from_chunks), + len(response_referenced_documents), + len(all_referenced_documents), + ) model_label = model_id.split("/", 1)[1] if "/" in model_id else model_id token_usage = extract_token_usage_from_responses_api( response, model_label, provider_id, system_prompt @@ -485,7 +682,7 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche return ( summary, normalize_conversation_id(conversation_id), - referenced_documents, + all_referenced_documents, token_usage, ) @@ -733,12 +930,15 @@ def _increment_llm_call_metric(provider: str, model: str) -> None: logger.warning("Failed to update LLM call metric: %s", e) -def get_rag_tools(vector_store_ids: list[str]) -> Optional[list[dict[str, Any]]]: +def get_rag_tools( + vector_store_ids: list[str], solr_params: Optional[dict[str, Any]] = None +) -> Optional[list[dict[str, Any]]]: """ Convert vector store IDs to tools format for Responses API. Args: vector_store_ids: List of vector store identifiers + solr_params: Optional Solr filtering parameters Returns: Optional[list[dict[str, Any]]]: List containing file_search tool configuration, @@ -747,13 +947,16 @@ def get_rag_tools(vector_store_ids: list[str]) -> Optional[list[dict[str, Any]]] if not vector_store_ids: return None - return [ - { - "type": "file_search", - "vector_store_ids": vector_store_ids, - "max_num_results": 10, - } - ] + tool_config = { + "type": "file_search", + "vector_store_ids": vector_store_ids, + "max_num_results": 10, + } + + if solr_params: + tool_config["solr"] = solr_params + + return [tool_config] def get_mcp_tools( @@ -854,7 +1057,9 @@ async def prepare_tools_for_responses_api( query_request: QueryRequest, token: str, config: AppConfig, + *, mcp_headers: Optional[dict[str, dict[str, str]]] = None, + skip_rag_tools: bool = False, ) -> Optional[list[dict[str, Any]]]: """ Prepare tools for Responses API including RAG and MCP tools. @@ -868,6 +1073,7 @@ async def prepare_tools_for_responses_api( token: Authentication token for MCP tools config: Configuration object containing MCP server settings mcp_headers: Per-request headers for MCP servers + skip_rag_tools: If True, skip adding RAG tools (used when doing direct vector querying) Returns: Optional[list[dict[str, Any]]]: List of tool configurations for the @@ -877,18 +1083,39 @@ async def prepare_tools_for_responses_api( return None toolgroups = [] - # Get vector stores for RAG tools - use specified ones or fetch all - if query_request.vector_store_ids: - vector_store_ids = query_request.vector_store_ids - else: - vector_store_ids = [ - vector_store.id for vector_store in (await client.vector_stores.list()).data - ] - # Add RAG tools if vector stores are available - rag_tools = get_rag_tools(vector_store_ids) - if rag_tools: - toolgroups.extend(rag_tools) + # Add RAG tools if not skipped + if not skip_rag_tools: + # Get vector stores for RAG tools - use specified ones or fetch all + if query_request.vector_store_ids: + vector_store_ids = query_request.vector_store_ids + logger.info("Using specified vector_store_ids: %s", vector_store_ids) + else: + vector_store_ids = [ + vector_store.id + for vector_store in (await client.vector_stores.list()).data + ] + logger.info("Using all available vector_store_ids: %s", vector_store_ids) + + # Add RAG tools if vector stores are available + if vector_store_ids: + # logger.info("query_request.solr: %s", query_request.solr) + rag_tools = get_rag_tools(vector_store_ids) + if rag_tools: + logger.info("rag_tool are: %s", rag_tools) + toolgroups.extend(rag_tools) + # if query_request.solr: + # logger.info( + # "RAG tools configured with Solr filters: %s", query_request.solr + # ) + # else: + # logger.info("RAG tools configured without Solr filters") + else: + logger.info("No RAG tools configured") + else: + logger.info("No vector stores available for RAG tools") + else: + logger.info("Skipping RAG tools - using direct vector querying instead") # Add MCP server tools mcp_tools = get_mcp_tools(config.mcp_servers, token, mcp_headers) diff --git a/src/app/endpoints/shields.py b/src/app/endpoints/shields.py index 5dd8b8b6..790c2d0b 100644 --- a/src/app/endpoints/shields.py +++ b/src/app/endpoints/shields.py @@ -70,6 +70,8 @@ async def shields_endpoint_handler( try: # try to get Llama Stack client client = AsyncLlamaStackClientHolder().get_client() + # await client.shields.delete(identifier="llama-guard-shielf") + # exit(1) # retrieve shields shields = await client.shields.list() s = [dict(s) for s in shields] diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py index afd7293a..cbe7a566 100644 --- a/src/app/endpoints/streaming_query.py +++ b/src/app/endpoints/streaming_query.py @@ -4,6 +4,8 @@ import json import logging import re + +# import traceback import uuid from collections.abc import Callable from datetime import UTC, datetime @@ -13,6 +15,9 @@ Optional, ) +# from urllib.parse import urljoin + + from fastapi import APIRouter, Request from fastapi.responses import StreamingResponse from llama_stack_client import ( @@ -43,6 +48,7 @@ NotFoundResponse, PromptTooLongResponse, QuotaExceededResponse, + # RAGChunk, ServiceUnavailableResponse, StreamingQueryResponse, UnauthorizedResponse, @@ -59,6 +65,10 @@ logger = logging.getLogger("app.endpoints.handlers") router = APIRouter(tags=["streaming_query"]) +# # When OFFLINE is False, use reference_url for chunk source +# # When OFFLINE is True, use parent_id for chunk source +# # TODO: move this setting to a higher level configuration +# OFFLINE = True streaming_query_responses: dict[int | str, dict[str, Any]] = { 200: StreamingQueryResponse.openapi_response(), @@ -132,6 +142,7 @@ def stream_end_event( available_quotas: dict[str, int], referenced_documents: list[ReferencedDocument], media_type: str = MEDIA_TYPE_JSON, + # vector_io_referenced_docs: list[ReferencedDocument] | None = None, ) -> str: """ Yield the end of the data stream. @@ -164,10 +175,35 @@ def stream_end_event( # Use mode="json" to ensure AnyUrl is serialized to string (not just model_dump()) referenced_docs_dict = [doc.model_dump(mode="json") for doc in referenced_documents] + # referenced_docs_dict = [ + # { + # "doc_url": v.get("docs_url"), + # "doc_title": v.get("title"), + # } + # for v in metadata_map.values() + # if "docs_url" in v and "title" in v + # ] + + # # Add vector_io referenced documents + # if vector_io_referenced_docs: + # for doc in vector_io_referenced_docs: + # referenced_docs_dict.append( + # { + # "doc_url": doc.doc_url, + # "doc_title": doc.doc_title, + # } + # ) + + # # Convert RAG chunks to dict format + # rag_chunks_dict = [] + # if summary.rag_chunks: + # rag_chunks_dict = [chunk.model_dump() for chunk in summary.rag_chunks] + return format_stream_data( { "event": "end", "data": { + # "rag_chunks": rag_chunks_dict, "referenced_documents": referenced_docs_dict, "truncated": None, # TODO(jboos): implement truncated "input_tokens": token_usage.input_tokens, @@ -559,6 +595,130 @@ def _handle_heartbeat_event( ) +# def create_agent_response_generator( # pylint: disable=too-many-locals +# context: ResponseGeneratorContext, +# ) -> Any: +# """ +# Create a response generator function for Agent API streaming. + +# This factory function returns an async generator that processes streaming +# responses from the Agent API and yields Server-Sent Events (SSE). + +# Args: +# context: Context object containing all necessary parameters for response generation + +# Returns: +# An async generator function that yields SSE-formatted strings +# """ + +# async def response_generator( +# turn_response: AsyncIterator[AgentTurnResponseStreamChunk], +# ) -> AsyncIterator[str]: +# """ +# Generate SSE formatted streaming response. + +# Asynchronously generates a stream of Server-Sent Events +# (SSE) representing incremental responses from a +# language model turn. + +# Yields start, token, tool call, turn completion, and +# end events as SSE-formatted strings. Collects the +# complete response for transcript storage if enabled. +# """ +# chunk_id = 0 +# summary = TurnSummary( +# llm_response="No response from the model", +# tool_calls=[], +# tool_results=[], +# rag_chunks=[], +# ) + +# # Determine media type for response formatting +# media_type = context.query_request.media_type or MEDIA_TYPE_JSON + +# # Send start event at the beginning of the stream +# yield stream_start_event(context.conversation_id) + +# latest_turn: Optional[Any] = None + +# async for chunk in turn_response: +# if chunk.event is None: +# continue +# p = chunk.event.payload +# if p.event_type == "turn_complete": +# summary.llm_response = content_to_str(p.turn.output_message.content) +# latest_turn = p.turn +# system_prompt = get_system_prompt(context.query_request, configuration) +# try: +# update_llm_token_count_from_turn( +# p.turn, context.model_id, context.provider_id, system_prompt +# ) +# except Exception: # pylint: disable=broad-except +# logger.exception("Failed to update token usage metrics") +# elif p.event_type == "step_complete": +# if p.step_details.step_type == "tool_execution": +# summary.append_tool_calls_from_llama(p.step_details) + +# for event in stream_build_event( +# chunk, +# chunk_id, +# context.metadata_map, +# media_type, +# context.conversation_id, +# ): +# chunk_id += 1 +# yield event + +# # Extract token usage from the turn +# token_usage = ( +# extract_token_usage_from_turn(latest_turn) +# if latest_turn is not None +# else TokenCounter() +# ) +# referenced_documents = ( +# parse_referenced_documents(latest_turn) if latest_turn is not None else [] +# ) + +# # Add RAG chunks to summary if available from vector_io query +# if hasattr(context, "vector_io_rag_chunks") and context.vector_io_rag_chunks: +# summary.rag_chunks = context.vector_io_rag_chunks + +# available_quotas = get_available_quotas( +# configuration.quota_limiters, context.user_id +# ) +# yield stream_end_event( +# context.metadata_map, +# summary, +# token_usage, +# available_quotas, +# referenced_documents, +# media_type, +# ) + +# # Perform cleanup tasks (database and cache operations) +# await cleanup_after_streaming( +# user_id=context.user_id, +# conversation_id=context.conversation_id, +# model_id=context.model_id, +# provider_id=context.provider_id, +# llama_stack_model_id=context.llama_stack_model_id, +# query_request=context.query_request, +# summary=summary, +# metadata_map=context.metadata_map, +# started_at=context.started_at, +# client=context.client, +# config=configuration, +# skip_userid_check=context.skip_userid_check, +# get_topic_summary_func=get_topic_summary, +# is_transcripts_enabled_func=is_transcripts_enabled, +# store_transcript_func=store_transcript, +# persist_user_conversation_details_func=persist_user_conversation_details, +# rag_chunks=create_rag_chunks_dict(summary), +# ) + +# return response_generator + + async def streaming_query_endpoint_handler_base( # pylint: disable=too-many-locals,too-many-statements,too-many-arguments,too-many-positional-arguments request: Request, query_request: QueryRequest, @@ -662,6 +822,12 @@ async def streaming_query_endpoint_handler_base( # pylint: disable=too-many-loc token, mcp_headers=mcp_headers, ) + + # # Query vector_io for RAG chunks and referenced documents + # vector_io_rag_chunks, vector_io_referenced_docs = ( + # await query_vector_io_for_chunks(client, query_request) + # ) + metadata_map: dict[str, dict[str, Any]] = {} # Create context object for response generator @@ -678,6 +844,12 @@ async def streaming_query_endpoint_handler_base( # pylint: disable=too-many-loc metadata_map=metadata_map, ) + # Add vector_io data to context if available + # if hasattr(context, "vector_io_rag_chunks"): + # context.vector_io_rag_chunks = vector_io_rag_chunks + # if hasattr(context, "vector_io_referenced_docs"): + # context.vector_io_referenced_docs = vector_io_referenced_docs + # Create the response generator using the provided factory function response_generator = create_response_generator_func(context) @@ -724,3 +896,365 @@ async def streaming_query_endpoint_handler_base( # pylint: disable=too-many-loc status_code=error_response.status_code, media_type=query_request.media_type or MEDIA_TYPE_JSON, ) + + +# async def query_vector_io_for_chunks( +# client: AsyncLlamaStackClientHolder, +# query_request: QueryRequest, +# ) -> tuple[list[RAGChunk], list[ReferencedDocument]]: +# """ +# Query vector_io database for RAG chunks and referenced documents. + +# Args: +# client: AsyncLlamaStackClient for vector database access +# query_request: The user's query request containing query text and Solr filters + +# Returns: +# tuple: A tuple containing RAG chunks and referenced documents +# """ +# rag_chunks = [] +# doc_ids_from_chunks = [] + +# try: +# # Use the first available vector database if any exist +# try: +# # Try vector_stores first (new API) +# vector_stores = await client.vector_stores.list() +# vector_db_ids = [vs.id for vs in vector_stores.data] +# except AttributeError: +# # Fallback to vector_dbs (old API) +# vector_dbs = await client.vector_dbs.list() +# vector_db_ids = [vdb.identifier for vdb in vector_dbs] + +# if vector_db_ids: +# vector_db_id = vector_db_ids[0] # Use first available vector DB + +# params = {"k": 5, "score_threshold": 0.0} +# logger.info("Initial params: %s", params) +# logger.info("query_request.solr: %s", query_request.solr) +# if query_request.solr: +# # Pass the entire solr dict under the 'solr' key +# params["solr"] = query_request.solr +# logger.info("Final params with solr filters: %s", params) +# else: +# logger.info("No solr filters provided") +# logger.info("Final params being sent to vector_io.query: %s", params) + +# query_response = await client.vector_io.query( +# vector_db_id=vector_db_id, query=query_request.query, params=params +# ) + +# logger.info("The query response total payload: %s", query_response) + +# if query_response.chunks: +# rag_chunks = [ +# RAGChunk( +# content=str(chunk.content), # Convert to string if needed +# source=getattr(chunk, "doc_id", None) +# or getattr(chunk, "source", None), +# score=getattr(chunk, "score", None), +# ) +# for chunk in query_response.chunks[:5] # Limit to top 5 chunks +# ] +# logger.info("Retrieved %d chunks from vector DB", len(rag_chunks)) + +# # Extract doc_ids from chunks for referenced_documents +# metadata_doc_ids = set() +# for chunk in query_response.chunks: +# metadata = getattr(chunk, "metadata", None) +# if metadata and "doc_id" in metadata: +# reference_doc = metadata["doc_id"] +# logger.info(reference_doc) +# if reference_doc and reference_doc not in metadata_doc_ids: +# metadata_doc_ids.add(reference_doc) +# doc_ids_from_chunks.append( +# ReferencedDocument( +# doc_title=metadata.get("title", None), +# doc_url="https://mimir.corp.redhat.com" +# + reference_doc, +# ) +# ) + +# logger.info( +# "Extracted %d unique document IDs from chunks", +# len(doc_ids_from_chunks), +# ) + +# # Convert retrieved chunks to RAGChunk format with proper source handling +# final_rag_chunks = [] +# for chunk in query_response.chunks[:5]: +# # Extract source from chunk metadata based on OFFLINE flag +# source = None +# if chunk.metadata: +# if OFFLINE: +# parent_id = chunk.metadata.get("parent_id") +# if parent_id: +# source = urljoin( +# "https://mimir.corp.redhat.com", parent_id +# ) +# else: +# source = chunk.metadata.get("reference_url") + +# # Get score from chunk if available +# score = getattr(chunk, "score", None) + +# final_rag_chunks.append( +# RAGChunk( +# content=chunk.content, +# source=source, +# score=score, +# ) +# ) + +# return final_rag_chunks, doc_ids_from_chunks + +# except Exception as e: # pylint: disable=broad-except +# logger.warning("Failed to query vector database for chunks: %s", e) +# logger.debug("Vector DB query error details: %s", traceback.format_exc()) +# # Continue without RAG chunks + +# return rag_chunks, doc_ids_from_chunks + + +# @router.post( +# "/streaming_query", +# response_class=StreamingResponse, +# responses=streaming_query_responses, +# ) +# @authorize(Action.STREAMING_QUERY) +# async def streaming_query_endpoint_handler( # pylint: disable=too-many-locals,too-many-statements +# request: Request, +# query_request: QueryRequest, +# auth: Annotated[AuthTuple, Depends(get_auth_dependency())], +# mcp_headers: dict[str, dict[str, str]] = Depends(mcp_headers_dependency), +# ) -> StreamingResponse: +# """ +# Handle request to the /streaming_query endpoint using Agent API. + +# Returns a streaming response using Server-Sent Events (SSE) format with +# content type text/event-stream. + +# Returns: +# StreamingResponse: An HTTP streaming response yielding +# SSE-formatted events for the query lifecycle with content type +# text/event-stream. + +# Raises: +# HTTPException: +# - 401: Unauthorized - Missing or invalid credentials +# - 403: Forbidden - Insufficient permissions or model override not allowed +# - 404: Not Found - Conversation, model, or provider not found +# - 422: Unprocessable Entity - Request validation failed +# - 429: Too Many Requests - Quota limit exceeded +# - 500: Internal Server Error - Configuration not loaded or other server errors +# - 503: Service Unavailable - Unable to connect to Llama Stack backend +# """ +# return await streaming_query_endpoint_handler_base( +# request=request, +# query_request=query_request, +# auth=auth, +# mcp_headers=mcp_headers, +# retrieve_response_func=retrieve_response, +# create_response_generator_func=create_agent_response_generator, +# ) + + +# async def retrieve_response( +# client: AsyncLlamaStackClient, +# model_id: str, +# query_request: QueryRequest, +# token: str, +# mcp_headers: Optional[dict[str, dict[str, str]]] = None, +# ) -> tuple[AsyncIterator[AgentTurnResponseStreamChunk], str]: +# """ +# Retrieve response from LLMs and agents. + +# Asynchronously retrieves a streaming response and conversation +# ID from the Llama Stack agent for a given user query. + +# This function configures input/output shields, system prompt, +# and tool usage based on the request and environment. It +# prepares the agent with appropriate headers and toolgroups, +# validates attachments if present, and initiates a streaming +# turn with the user's query and any provided documents. + +# Parameters: +# model_id (str): Identifier of the model to use for the query. +# query_request (QueryRequest): The user's query and associated metadata. +# token (str): Authentication token for downstream services. +# mcp_headers (dict[str, dict[str, str]], optional): +# Multi-cluster proxy headers for tool integrations. + +# Returns: +# tuple: A tuple containing the streaming response object +# and the conversation ID. +# """ +# available_input_shields = [ +# shield.identifier +# for shield in filter(is_input_shield, await client.shields.list()) +# ] +# available_output_shields = [ +# shield.identifier +# for shield in filter(is_output_shield, await client.shields.list()) +# ] +# if not available_input_shields and not available_output_shields: +# logger.info("No available shields. Disabling safety") +# else: +# logger.info( +# "Available input shields: %s, output shields: %s", +# available_input_shields, +# available_output_shields, +# ) +# # use system prompt from request or default one +# system_prompt = get_system_prompt(query_request, configuration) +# logger.debug("Using system prompt: %s", system_prompt) + +# # TODO(lucasagomes): redact attachments content before sending to LLM +# # if attachments are provided, validate them +# if query_request.attachments: +# validate_attachments_metadata(query_request.attachments) + +# agent, conversation_id, session_id = await get_agent( +# client, +# model_id, +# system_prompt, +# available_input_shields, +# available_output_shields, +# query_request.conversation_id, +# query_request.no_tools or False, +# ) + +# logger.debug("Conversation ID: %s, session ID: %s", conversation_id, session_id) +# # bypass tools and MCP servers if no_tools is True +# if query_request.no_tools: +# mcp_headers = {} +# agent.extra_headers = {} +# toolgroups = None +# else: +# # preserve compatibility when mcp_headers is not provided +# if mcp_headers is None: +# mcp_headers = {} + +# mcp_headers = handle_mcp_headers_with_toolgroups(mcp_headers, configuration) + +# if not mcp_headers and token: +# for mcp_server in configuration.mcp_servers: +# mcp_headers[mcp_server.url] = { +# "Authorization": f"Bearer {token}", +# } + +# agent.extra_headers = { +# "X-LlamaStack-Provider-Data": json.dumps( +# { +# "mcp_headers": mcp_headers, +# } +# ), +# } + +# # Use specified vector stores or fetch all available ones +# if query_request.vector_store_ids: +# vector_db_ids = query_request.vector_store_ids +# else: +# vector_db_ids = [ +# vector_store.id +# for vector_store in (await client.vector_stores.list()).data +# ] +# toolgroups = (get_rag_toolgroups(vector_db_ids) or []) + [ +# mcp_server.name for mcp_server in configuration.mcp_servers +# ] +# # Convert empty list to None for consistency with existing behavior +# if not toolgroups: +# toolgroups = None + +# # TODO: LCORE-881 - Remove if Llama Stack starts to support these mime types +# # documents: list[Document] = [ +# # ( +# # {"content": doc["content"], "mime_type": "text/plain"} +# # if doc["mime_type"].lower() in ("application/json", "application/xml") +# # else doc +# # ) +# # for doc in query_request.get_documents() +# # ] + +# # Get RAG chunks before sending to LLM (reuse logic from query_vector_io_for_chunks) +# rag_chunks = [] +# try: +# if vector_db_ids: +# vector_db_id = vector_db_ids[0] # Use first available vector DB + +# params = {"k": 5, "score_threshold": 0.0} +# logger.info("Initial params: %s", params) +# logger.info("query_request.solr: %s", query_request.solr) +# if query_request.solr: +# # Pass the entire solr dict under the 'solr' key +# params["solr"] = query_request.solr +# logger.info("Final params with solr filters: %s", params) +# else: +# logger.info("No solr filters provided") +# logger.info("Final params being sent to vector_io.query: %s", params) + +# query_response = await client.vector_io.query( +# vector_db_id=vector_db_id, query=query_request.query, params=params +# ) + +# logger.info("The query response total payload: %s", query_response) + +# if query_response.chunks: +# # Convert retrieved chunks to RAGChunk format with proper source handling +# for chunk in query_response.chunks[:5]: +# # Extract source from chunk metadata based on OFFLINE flag +# source = None +# if chunk.metadata: +# if OFFLINE: +# parent_id = chunk.metadata.get("parent_id") +# if parent_id: +# source = urljoin( +# "https://mimir.corp.redhat.com", parent_id +# ) +# else: +# source = chunk.metadata.get("reference_url") + +# # Get score from chunk if available +# score = getattr(chunk, "score", None) + +# rag_chunks.append( +# RAGChunk( +# content=chunk.content, +# source=source, +# score=score, +# ) +# ) + +# logger.info( +# "Retrieved %d chunks from vector DB for streaming", len(rag_chunks) +# ) + +# except Exception as e: +# logger.warning("Failed to query vector database for chunks: %s", e) +# logger.debug("Vector DB query error details: %s", traceback.format_exc()) + +# # Format RAG context for injection into user message +# rag_context = "" +# if rag_chunks: +# context_chunks = [] +# for chunk in rag_chunks[:5]: # Limit to top 5 chunks +# chunk_text = f"Source: {chunk.source or 'Unknown'}\n{chunk.content}" +# context_chunks.append(chunk_text) +# rag_context = "\n\nRelevant documentation:\n" + "\n\n".join(context_chunks) +# logger.info( +# "Injecting %d RAG chunks into streaming user message", len(context_chunks) +# ) + +# # Inject RAG context into user message +# user_content = query_request.query + rag_context + +# response = await agent.create_turn( +# messages=[UserMessage(role="user", content=user_content).model_dump()], +# session_id=session_id, +# # documents=documents, +# stream=True, +# toolgroups=toolgroups, +# ) +# response = cast(AsyncIterator[AgentTurnResponseStreamChunk], response) + +# return response, conversation_id diff --git a/src/app/endpoints/streaming_query_v2.py b/src/app/endpoints/streaming_query_v2.py index e1c02ca4..de96cdf0 100644 --- a/src/app/endpoints/streaming_query_v2.py +++ b/src/app/endpoints/streaming_query_v2.py @@ -1,7 +1,9 @@ """Streaming query handler using Responses API (v2).""" import logging +import traceback from typing import Annotated, Any, AsyncIterator, Optional, cast +from urllib.parse import urljoin from fastapi import APIRouter, Depends, Request from fastapi.responses import StreamingResponse @@ -14,7 +16,7 @@ OpenAIResponseObjectStreamResponseOutputTextDelta, OpenAIResponseObjectStreamResponseOutputTextDone, ) -from llama_stack_client import AsyncLlamaStackClient +from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient from app.endpoints.query import ( is_transcripts_enabled, @@ -51,6 +53,7 @@ InternalServerErrorResponse, NotFoundResponse, QuotaExceededResponse, + ReferencedDocument, ServiceUnavailableResponse, StreamingQueryResponse, UnauthorizedResponse, @@ -97,6 +100,7 @@ def create_responses_response_generator( # pylint: disable=too-many-locals,too-many-statements context: ResponseGeneratorContext, + doc_ids_from_chunks: Optional[list[ReferencedDocument]] = None, ) -> Any: """ Create a response generator function for Responses API streaming. @@ -106,6 +110,7 @@ def create_responses_response_generator( # pylint: disable=too-many-locals,too- Args: context: Context object containing all necessary parameters for response generation + doc_ids_from_chunks: Referenced documents extracted from vector DB chunks Returns: An async generator function that yields SSE-formatted strings @@ -294,9 +299,13 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat model_id=context.model_id, provider_id=context.provider_id, ) - referenced_documents = parse_referenced_documents_from_responses_api( + response_referenced_documents = parse_referenced_documents_from_responses_api( cast(OpenAIResponseObject, latest_response_object) ) + # Combine doc_ids_from_chunks with response_referenced_documents + all_referenced_documents = ( + doc_ids_from_chunks or [] + ) + response_referenced_documents available_quotas = get_available_quotas( configuration.quota_limiters, context.user_id ) @@ -304,7 +313,7 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat context.metadata_map, token_usage, available_quotas, - referenced_documents, + all_referenced_documents, media_type, ) @@ -382,7 +391,7 @@ async def retrieve_response( # pylint: disable=too-many-locals query_request: QueryRequest, token: str, mcp_headers: Optional[dict[str, dict[str, str]]] = None, -) -> tuple[AsyncIterator[OpenAIResponseObjectStream], str]: +) -> tuple[AsyncIterator[OpenAIResponseObjectStream], str, list[ReferencedDocument]]: """ Retrieve response from LLMs and agents. @@ -403,8 +412,8 @@ async def retrieve_response( # pylint: disable=too-many-locals Multi-cluster proxy headers for tool integrations. Returns: - tuple: A tuple containing the streaming response object - and the conversation ID. + tuple: A tuple containing the streaming response object, + the conversation ID, and the list of referenced documents from vector DB chunks. """ # use system prompt from request or default one system_prompt = get_system_prompt(query_request, configuration) @@ -415,11 +424,180 @@ async def retrieve_response( # pylint: disable=too-many-locals if query_request.attachments: validate_attachments_metadata(query_request.attachments) - # Prepare tools for responses API + # Prepare tools for responses API - skip RAG tools since we're doing direct vector query toolgroups = await prepare_tools_for_responses_api( - client, query_request, token, configuration, mcp_headers + client, + query_request, + token, + configuration, + mcp_headers=mcp_headers, + skip_rag_tools=True, ) + # Extract RAG chunks from vector DB query response BEFORE calling responses API + rag_chunks = [] + doc_ids_from_chunks = [] + retrieved_chunks = [] + retrieved_scores = [] + + # When offline is False, use reference_url for chunk source + # When offline is True, use parent_id for chunk source + # TODO: move this setting to a higher level configuration + offline = True + + try: + # Get vector stores for direct querying + if query_request.vector_store_ids: + vector_store_ids = query_request.vector_store_ids + logger.info( + "Using specified vector_store_ids for direct query: %s", + vector_store_ids, + ) + else: + vector_store_ids = [ + vector_store.id + for vector_store in (await client.vector_stores.list()).data + ] + logger.info( + "Using all available vector_store_ids for direct query: %s", + vector_store_ids, + ) + + if vector_store_ids: + vector_store_id = vector_store_ids[0] # Use first available vector store + + params = {"k": 5, "score_threshold": 0.0, "mode": "hybrid"} + logger.info("Initial params: %s", params) + logger.info("query_request.solr: %s", query_request.solr) + if query_request.solr: + # Pass the entire solr dict under the 'solr' key + params["solr"] = query_request.solr + logger.info("Final params with solr filters: %s", params) + else: + logger.info("No solr filters provided") + logger.info("Final params being sent to vector_io.query: %s", params) + + query_response = await client.vector_io.query( + vector_store_id=vector_store_id, + query=query_request.query, + params=params, + ) + + logger.info("The query response total payload: %s", query_response) + + if query_response.chunks: + retrieved_chunks = query_response.chunks + retrieved_scores = ( + query_response.scores if hasattr(query_response, "scores") else [] + ) + + # Extract doc_ids from chunks for referenced_documents + metadata_doc_ids = set() + + for chunk in query_response.chunks: + logger.info("Extract doc ids from chunk: %s", chunk) + + # 1) dict metadata + md = getattr(chunk, "metadata", None) or {} + doc_id = md.get("doc_id") or md.get("document_id") + title = md.get("title") + + # 2) typed chunk_metadata + if not doc_id: + cm = getattr(chunk, "chunk_metadata", None) + if cm is not None: + # cm might be a pydantic model or a dict depending on caller + if isinstance(cm, dict): + doc_id = cm.get("doc_id") or cm.get("document_id") + title = title or cm.get("title") + reference_url = cm.get("reference_url") + else: + doc_id = getattr(cm, "doc_id", None) or getattr( + cm, "document_id", None + ) + title = title or getattr(cm, "title", None) + reference_url = getattr(cm, "reference_url", None) + else: + reference_url = None + else: + reference_url = md.get("reference_url") + + if not doc_id and not reference_url: + continue + + # Build URL based on offline flag + if offline: + # Use parent/doc path + reference_doc = doc_id + doc_url = "https://mimir.corp.redhat.com" + reference_doc + else: + # Use reference_url if online + reference_doc = reference_url or doc_id + doc_url = ( + reference_doc + if reference_doc.startswith("http") + else ("https://mimir.corp.redhat.com" + reference_doc) + ) + + if reference_doc and reference_doc not in metadata_doc_ids: + metadata_doc_ids.add(reference_doc) + doc_ids_from_chunks.append( + ReferencedDocument( + doc_title=title, + doc_url=doc_url, + ) + ) + + logger.info( + "Extracted %d unique document IDs from chunks", len(doc_ids_from_chunks) + ) + + except ( + APIConnectionError, + APIStatusError, + AttributeError, + KeyError, + ValueError, + ) as e: + logger.warning("Failed to query vector database for chunks: %s", e) + logger.debug("Vector DB query error details: %s", traceback.format_exc()) + # Continue without RAG chunks + + # Convert retrieved chunks to RAGChunk format + for i, chunk in enumerate(retrieved_chunks): + # Extract source from chunk metadata based on offline flag + source = None + if chunk.metadata: + if offline: + parent_id = chunk.metadata.get("parent_id") + if parent_id: + source = urljoin("https://mimir.corp.redhat.com", parent_id) + else: + source = chunk.metadata.get("reference_url") + + # Get score from retrieved_scores list if available + score = retrieved_scores[i] if i < len(retrieved_scores) else None + + rag_chunks.append( + RAGChunk( + content=chunk.content, + source=source, + score=score, + ) + ) + + logger.info("Retrieved %d chunks from vector DB", len(rag_chunks)) + + # Format RAG context for injection into user message + rag_context = "" + if rag_chunks: + context_chunks = [] + for chunk in rag_chunks[:5]: # Limit to top 5 chunks + chunk_text = f"Source: {chunk.source or 'Unknown'}\n{chunk.content}" + context_chunks.append(chunk_text) + rag_context = "\n\nRelevant documentation:\n" + "\n\n".join(context_chunks) + logger.info("Injecting %d RAG chunks into user message", len(context_chunks)) + # Prepare input for Responses API # Convert attachments to text and concatenate with query input_text = query_request.query @@ -430,6 +608,9 @@ async def retrieve_response( # pylint: disable=too-many-locals f"{attachment.content}" ) + # Add RAG context to input text + input_text += rag_context + # Handle conversation ID for Responses API # Create conversation upfront if not provided conversation_id = query_request.conversation_id @@ -475,4 +656,8 @@ async def retrieve_response( # pylint: disable=too-many-locals response = await client.responses.create(**create_params) response_stream = cast(AsyncIterator[OpenAIResponseObjectStream], response) - return response_stream, normalize_conversation_id(conversation_id) + return ( + response_stream, + normalize_conversation_id(conversation_id), + doc_ids_from_chunks, + ) diff --git a/src/app/main.py b/src/app/main.py index 74a6b86a..7dd242f9 100644 --- a/src/app/main.py +++ b/src/app/main.py @@ -22,6 +22,10 @@ from utils.common import register_mcp_servers_async from utils.llama_stack_version import check_llama_stack_version +import faulthandler +import signal +faulthandler.register(signal.SIGUSR1) + logger = get_logger(__name__) logger.info("Initializing app") @@ -55,6 +59,12 @@ async def lifespan(_app: FastAPI) -> AsyncIterator[None]: # check if the Llama Stack version is supported by the service await check_llama_stack_version(client) + # try: + # await client.vector_stores.delete(vector_store_id="portal-rag") + # logger.info("Successfully deregistered vector store: portal-rag") + # except Exception as e: + # logger.warning("Failed to deregister vector store 'portal-rag': %s", e) + logger.info("Registering MCP servers") await register_mcp_servers_async(logger, configuration.configuration) get_logger("app.endpoints.handlers") diff --git a/src/constants.py b/src/constants.py index 681759cd..e9f4e211 100644 --- a/src/constants.py +++ b/src/constants.py @@ -127,7 +127,7 @@ MCP_AUTH_CLIENT = "client" # default RAG tool value -DEFAULT_RAG_TOOL = "knowledge_search" +DEFAULT_RAG_TOOL = "file_search" # Media type constants for streaming responses MEDIA_TYPE_JSON = "application/json" diff --git a/src/models/requests.py b/src/models/requests.py index 18e5b4b6..3ac4ede6 100644 --- a/src/models/requests.py +++ b/src/models/requests.py @@ -1,7 +1,7 @@ """Models for REST API requests.""" +from typing import Optional, Self, Any from enum import Enum -from typing import Optional, Self from pydantic import BaseModel, Field, field_validator, model_validator @@ -166,6 +166,13 @@ class QueryRequest(BaseModel): examples=["ocp_docs", "knowledge_base", "vector_db_1"], ) + solr: Optional[dict[str, Any]] = Field( + None, + description="Solr-specific query parameters including filter queries", + examples=[ + {"fq": ["product:*openshift*", "product_version:*4.16*"]}, + ], + ) # provides examples for /docs endpoint model_config = { "extra": "forbid", diff --git a/tests/unit/app/endpoints/test_query_v2.py b/tests/unit/app/endpoints/test_query_v2.py index b4b4ec5e..0a1390be 100644 --- a/tests/unit/app/endpoints/test_query_v2.py +++ b/tests/unit/app/endpoints/test_query_v2.py @@ -53,6 +53,16 @@ def test_get_rag_tools() -> None: assert tools[0]["type"] == "file_search" assert tools[0]["vector_store_ids"] == ["db1", "db2"] assert tools[0]["max_num_results"] == 10 + assert "solr" not in tools[0] + + # Test with Solr parameters + solr_params = {"fq": ["product:*openshift*", "product_version:*4.16*"]} + tools_with_solr = get_rag_tools(["db1", "db2"], solr_params) + assert isinstance(tools_with_solr, list) + assert tools_with_solr[0]["type"] == "file_search" + assert tools_with_solr[0]["vector_store_ids"] == ["db1", "db2"] + assert tools_with_solr[0]["max_num_results"] == 10 + assert tools_with_solr[0]["solr"] == solr_params def test_get_mcp_tools_with_and_without_token() -> None: @@ -279,8 +289,20 @@ async def test_retrieve_response_builds_rag_and_mcp_tools( # pylint: disable=to # Mock shields.list and models.list for run_shield_moderation mock_client.shields.list = mocker.AsyncMock(return_value=[]) mock_client.models.list = mocker.AsyncMock(return_value=[]) + + # Mock vector_io.query for direct vector querying + mock_query_response = mocker.Mock() + mock_query_response.chunks = [] + mock_query_response.scores = [] + mock_client.vector_io.query = mocker.AsyncMock(return_value=mock_query_response) mocker.patch("app.endpoints.query_v2.get_system_prompt", return_value="PROMPT") + + # Mock shield moderation + mock_moderation_result = mocker.Mock() + mock_moderation_result.blocked = False + mocker.patch("app.endpoints.query_v2.run_shield_moderation", return_value=mock_moderation_result) + mock_cfg = mocker.Mock() mock_cfg.mcp_servers = [ ModelContextProtocolServer( @@ -304,11 +326,9 @@ async def test_retrieve_response_builds_rag_and_mcp_tools( # pylint: disable=to kwargs = mock_client.responses.create.call_args.kwargs tools = kwargs["tools"] assert isinstance(tools, list) - # Expect one file_search and one mcp tool + # Expect only MCP tools since RAG tools are skipped when doing direct vector querying tool_types = {t.get("type") for t in tools} - assert tool_types == {"file_search", "mcp"} - file_search = next(t for t in tools if t["type"] == "file_search") - assert file_search["vector_store_ids"] == ["dbA"] + assert tool_types == {"mcp"} mcp_tool = next(t for t in tools if t["type"] == "mcp") assert mcp_tool["server_label"] == "fs" assert mcp_tool["headers"] == {"Authorization": "Bearer mytoken"} diff --git a/tests/unit/app/endpoints/test_streaming_query.py b/tests/unit/app/endpoints/test_streaming_query.py index a892aff5..b75f33f8 100644 --- a/tests/unit/app/endpoints/test_streaming_query.py +++ b/tests/unit/app/endpoints/test_streaming_query.py @@ -22,6 +22,7 @@ from models.requests import QueryRequest from models.responses import ReferencedDocument from utils.token_counter import TokenCounter +from utils.types import TurnSummary # Note: content_delta module doesn't exist in llama-stack-client 0.3.x # These are mock classes for backward compatibility with Agent API tests @@ -433,6 +434,9 @@ def test_stream_end_event_json(self) -> None: } # Create mock objects for the test mock_token_usage = TokenCounter(input_tokens=100, output_tokens=50) + mock_summary = TurnSummary( + llm_response="", tool_calls=[], tool_results=[], rag_chunks=[] + ) available_quotas: dict[str, int] = {} referenced_documents = [ ReferencedDocument( @@ -473,6 +477,9 @@ def test_stream_end_event_text(self) -> None: } # Create mock objects for the test mock_token_usage = TokenCounter(input_tokens=100, output_tokens=50) + mock_summary = TurnSummary( + llm_response="", tool_calls=[], tool_results=[], rag_chunks=[] + ) available_quotas: dict[str, int] = {} referenced_documents = [ ReferencedDocument( @@ -502,6 +509,9 @@ def test_stream_end_event_text_no_docs(self) -> None: metadata_map: dict = {} # Create mock objects for the test mock_token_usage = TokenCounter(input_tokens=100, output_tokens=50) + mock_summary = TurnSummary( + llm_response="", tool_calls=[], tool_results=[], rag_chunks=[] + ) available_quotas: dict[str, int] = {} referenced_documents: list[ReferencedDocument] = [] result = stream_end_event( @@ -630,6 +640,9 @@ def test_ols_end_event_structure(self) -> None: } # Create mock objects for the test mock_token_usage = TokenCounter(input_tokens=100, output_tokens=50) + mock_summary = TurnSummary( + llm_response="", tool_calls=[], tool_results=[], rag_chunks=[] + ) available_quotas: dict[str, int] = {} referenced_documents = [ ReferencedDocument( diff --git a/tests/unit/app/endpoints/test_streaming_query_v2.py b/tests/unit/app/endpoints/test_streaming_query_v2.py index d4740786..69cde6e9 100644 --- a/tests/unit/app/endpoints/test_streaming_query_v2.py +++ b/tests/unit/app/endpoints/test_streaming_query_v2.py @@ -53,6 +53,10 @@ async def test_retrieve_response_builds_rag_and_mcp_tools( # Mock shields.list and models.list for run_shield_moderation mock_client.shields.list = mocker.AsyncMock(return_value=[]) mock_client.models.list = mocker.AsyncMock(return_value=[]) + # Mock vector_io.query for direct vector querying + mock_query_response = mocker.Mock() + mock_query_response.chunks = [] + mock_client.vector_io.query = mocker.AsyncMock(return_value=mock_query_response) mocker.patch( "app.endpoints.streaming_query_v2.get_system_prompt", return_value="PROMPT" @@ -77,7 +81,9 @@ async def test_retrieve_response_builds_rag_and_mcp_tools( tools = kwargs["tools"] assert isinstance(tools, list) types = {t.get("type") for t in tools} - assert types == {"file_search", "mcp"} + # Since we're now skipping RAG tools and doing direct vector querying, + # we should only see MCP tools, not file_search tools + assert types == {"mcp"} @pytest.mark.asyncio @@ -95,6 +101,10 @@ async def test_retrieve_response_no_tools_passes_none(mocker: MockerFixture) -> # Mock shields.list and models.list for run_shield_moderation mock_client.shields.list = mocker.AsyncMock(return_value=[]) mock_client.models.list = mocker.AsyncMock(return_value=[]) + # Mock vector_io.query for direct vector querying + mock_query_response = mocker.Mock() + mock_query_response.chunks = [] + mock_client.vector_io.query = mocker.AsyncMock(return_value=mock_query_response) mocker.patch( "app.endpoints.streaming_query_v2.get_system_prompt", return_value="PROMPT" diff --git a/uv.lock b/uv.lock index b60c50a4..76d4aca8 100644 --- a/uv.lock +++ b/uv.lock @@ -2463,24 +2463,24 @@ wheels = [ [[package]] name = "psutil" -version = "7.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/73/cb/09e5184fb5fc0358d110fc3ca7f6b1d033800734d34cac10f4136cfac10e/psutil-7.2.1.tar.gz", hash = "sha256:f7583aec590485b43ca601dd9cea0dcd65bd7bb21d30ef4ddbf4ea6b5ed1bdd3", size = 490253, upload-time = "2025-12-29T08:26:00.169Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/77/8e/f0c242053a368c2aa89584ecd1b054a18683f13d6e5a318fc9ec36582c94/psutil-7.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:ba9f33bb525b14c3ea563b2fd521a84d2fa214ec59e3e6a2858f78d0844dd60d", size = 129624, upload-time = "2025-12-29T08:26:04.255Z" }, - { url = "https://files.pythonhosted.org/packages/26/97/a58a4968f8990617decee234258a2b4fc7cd9e35668387646c1963e69f26/psutil-7.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:81442dac7abfc2f4f4385ea9e12ddf5a796721c0f6133260687fec5c3780fa49", size = 130132, upload-time = "2025-12-29T08:26:06.228Z" }, - { url = "https://files.pythonhosted.org/packages/db/6d/ed44901e830739af5f72a85fa7ec5ff1edea7f81bfbf4875e409007149bd/psutil-7.2.1-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ea46c0d060491051d39f0d2cff4f98d5c72b288289f57a21556cc7d504db37fc", size = 180612, upload-time = "2025-12-29T08:26:08.276Z" }, - { url = "https://files.pythonhosted.org/packages/c7/65/b628f8459bca4efbfae50d4bf3feaab803de9a160b9d5f3bd9295a33f0c2/psutil-7.2.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:35630d5af80d5d0d49cfc4d64c1c13838baf6717a13effb35869a5919b854cdf", size = 183201, upload-time = "2025-12-29T08:26:10.622Z" }, - { url = "https://files.pythonhosted.org/packages/fb/23/851cadc9764edcc18f0effe7d0bf69f727d4cf2442deb4a9f78d4e4f30f2/psutil-7.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:923f8653416604e356073e6e0bccbe7c09990acef442def2f5640dd0faa9689f", size = 139081, upload-time = "2025-12-29T08:26:12.483Z" }, - { url = "https://files.pythonhosted.org/packages/59/82/d63e8494ec5758029f31c6cb06d7d161175d8281e91d011a4a441c8a43b5/psutil-7.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:cfbe6b40ca48019a51827f20d830887b3107a74a79b01ceb8cc8de4ccb17b672", size = 134767, upload-time = "2025-12-29T08:26:14.528Z" }, - { url = "https://files.pythonhosted.org/packages/c5/cf/5180eb8c8bdf6a503c6919f1da28328bd1e6b3b1b5b9d5b01ae64f019616/psutil-7.2.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b2e953fcfaedcfbc952b44744f22d16575d3aa78eb4f51ae74165b4e96e55f42", size = 128137, upload-time = "2025-12-29T08:26:27.759Z" }, - { url = "https://files.pythonhosted.org/packages/c5/2c/78e4a789306a92ade5000da4f5de3255202c534acdadc3aac7b5458fadef/psutil-7.2.1-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:05cc68dbb8c174828624062e73078e7e35406f4ca2d0866c272c2410d8ef06d1", size = 128947, upload-time = "2025-12-29T08:26:29.548Z" }, - { url = "https://files.pythonhosted.org/packages/29/f8/40e01c350ad9a2b3cb4e6adbcc8a83b17ee50dd5792102b6142385937db5/psutil-7.2.1-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e38404ca2bb30ed7267a46c02f06ff842e92da3bb8c5bfdadbd35a5722314d8", size = 154694, upload-time = "2025-12-29T08:26:32.147Z" }, - { url = "https://files.pythonhosted.org/packages/06/e4/b751cdf839c011a9714a783f120e6a86b7494eb70044d7d81a25a5cd295f/psutil-7.2.1-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab2b98c9fc19f13f59628d94df5cc4cc4844bc572467d113a8b517d634e362c6", size = 156136, upload-time = "2025-12-29T08:26:34.079Z" }, - { url = "https://files.pythonhosted.org/packages/44/ad/bbf6595a8134ee1e94a4487af3f132cef7fce43aef4a93b49912a48c3af7/psutil-7.2.1-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f78baafb38436d5a128f837fab2d92c276dfb48af01a240b861ae02b2413ada8", size = 148108, upload-time = "2025-12-29T08:26:36.225Z" }, - { url = "https://files.pythonhosted.org/packages/1c/15/dd6fd869753ce82ff64dcbc18356093471a5a5adf4f77ed1f805d473d859/psutil-7.2.1-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:99a4cd17a5fdd1f3d014396502daa70b5ec21bf4ffe38393e152f8e449757d67", size = 147402, upload-time = "2025-12-29T08:26:39.21Z" }, - { url = "https://files.pythonhosted.org/packages/34/68/d9317542e3f2b180c4306e3f45d3c922d7e86d8ce39f941bb9e2e9d8599e/psutil-7.2.1-cp37-abi3-win_amd64.whl", hash = "sha256:b1b0671619343aa71c20ff9767eced0483e4fc9e1f489d50923738caf6a03c17", size = 136938, upload-time = "2025-12-29T08:26:41.036Z" }, - { url = "https://files.pythonhosted.org/packages/3e/73/2ce007f4198c80fcf2cb24c169884f833fe93fbc03d55d302627b094ee91/psutil-7.2.1-cp37-abi3-win_arm64.whl", hash = "sha256:0d67c1822c355aa6f7314d92018fb4268a76668a536f133599b91edd48759442", size = 133836, upload-time = "2025-12-29T08:26:43.086Z" }, +version = "7.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" }, + { url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" }, + { url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" }, + { url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" }, + { url = "https://files.pythonhosted.org/packages/16/ba/0756dca669f5a9300d0cbcbfae9a4c30e446dfc7440ffe43ded5724bfd93/psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b", size = 139893, upload-time = "2026-01-28T18:15:06.378Z" }, + { url = "https://files.pythonhosted.org/packages/1c/61/8fa0e26f33623b49949346de05ec1ddaad02ed8ba64af45f40a147dbfa97/psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9", size = 135589, upload-time = "2026-01-28T18:15:08.03Z" }, + { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" }, + { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" }, + { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" }, + { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" }, + { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" }, + { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" }, + { url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" }, + { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" }, ] [[package]]