diff --git a/RAGChatbot/.gitignore b/RAGChatbot/.gitignore new file mode 100644 index 0000000000..31703cdcff --- /dev/null +++ b/RAGChatbot/.gitignore @@ -0,0 +1,42 @@ +# Environment files +**/.env + +# Test files +**/test.txt + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +build/ + +# Virtual environments +venv/ +env/ +ENV/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Application specific +dmv_index/ +*.log + +# Node.js +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +package-lock.json \ No newline at end of file diff --git a/RAGChatbot/README.md b/RAGChatbot/README.md new file mode 100644 index 0000000000..5d264b6d80 --- /dev/null +++ b/RAGChatbot/README.md @@ -0,0 +1,283 @@ +## RAG Chatbot + +A full-stack Retrieval-Augmented Generation (RAG) application that enables intelligent, document-based question answering. +The system integrates a FastAPI backend powered by LangChain, FAISS, and AI models, alongside a modern React + Vite + Tailwind CSS frontend for an intuitive chat experience. + +## Table of Contents + +- [Project Overview](#project-overview) +- [Features](#features) +- [Architecture](#architecture) +- [Prerequisites](#prerequisites) +- [Quick Start Deployment](#quick-start-deployment) +- [User Interface](#user-interface) +- [Troubleshooting](#troubleshooting) +- [Additional Info](#additional-info) + +--- + +## Project Overview + +The **RAG Chatbot** demonstrates how retrieval-augmented generation can be used to build intelligent, document-grounded conversational systems. It retrieves relevant information from a knowledge base, passes it to a large language model, and generates a concise and reliable answer to the user’s query. This project integrates seamlessly with cloud-hosted APIs or local model endpoints, offering flexibility for research, enterprise, or educational use. + +--- + +## Features + +**Backend** + +- Clean PDF upload with validation +- LangChain-powered document processing +- FAISS-CPU vector store for efficient similarity search +- Enterprise inference endpoints for embeddings and LLM +- Token-based authentication for inference API +- Comprehensive error handling and logging +- File validation and size limits +- CORS enabled for web integration +- Health check endpoints +- Modular architecture (routes + services) + +**Frontend** + +- PDF file upload with drag-and-drop support +- Real-time chat interface +- Modern, responsive design with Tailwind CSS +- Built with Vite for fast development +- Live status updates +- Mobile-friendly + +--- + +## Architecture + +Below is the architecture as it consists of a server that waits for documents to embed and index into a vector database. Once documents have been uploaded, the server will wait for user queries which initiates a similarity search in the vector database before calling the LLM service to summarize the findings. + +![Architecture Diagram](./images/RAG%20Model%20System%20Design.png) + +**Service Components:** + +1. **React Web UI (Port 3000)** - Provides intuitive chat interface with drag-and-drop PDF upload, real-time messaging, and document-grounded Q&A interaction + +2. **FastAPI Backend (Port 5001)** - Handles document processing, FAISS vector storage, LangChain integration, and orchestrates retrieval-augmented generation for accurate responses + +**Typical Flow:** + +1. User uploads a document through the web UI. +2. The backend processes the document by splitting it and transforming it into embeddings before storing it in the vector database. +3. User sends a question through the web UI. +4. The backend retrieves relevant content from stored documents. +5. The model generates a response based on retrieved context. +6. The answer is displayed to the user via the UI. + +--- + +## Prerequisites + +### System Requirements + +Before you begin, ensure you have the following installed: + +- **Docker and Docker Compose** +- **Enterprise inference endpoint access** (token-based authentication) + +### Required API Configuration + +**For Inference Service (RAG Chatbot):** + +This application supports multiple inference deployment patterns: + +- **GenAI Gateway**: Provide your GenAI Gateway URL and API key +- **APISIX Gateway**: Provide your APISIX Gateway URL and authentication token + +Configuration requirements: +- INFERENCE_API_ENDPOINT: URL to your inference service (GenAI Gateway, APISIX Gateway, etc.) +- INFERENCE_API_TOKEN: Authentication token/API key for your chosen service + +### Local Development Configuration + +**For Local Testing Only (Optional)** + +If you're testing with a local inference endpoint using a custom domain (e.g., `inference.example.com` mapped to localhost in your hosts file): + +1. Edit `api/.env` and set: + ```bash + LOCAL_URL_ENDPOINT=inference.example.com + ``` + (Use the domain name from your INFERENCE_API_ENDPOINT without `https://`) + +2. This allows Docker containers to resolve your local domain correctly. + +**Note:** For public domains or cloud-hosted endpoints, leave the default value `not-needed`. + +### Verify Docker Installation + +```bash +# Check Docker version +docker --version + +# Check Docker Compose version +docker compose version + +# Verify Docker is running +docker ps +``` +--- + +## Quick Start Deployment + +### Clone the Repository + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/RAGChatbot +``` + +### Set up the Environment + +This application requires **two `.env` files** for proper configuration: + +1. **Root `.env` file** (for Docker Compose variables) +2. **`api/.env` file** (for backend application configuration) + +#### Step 1: Create Root `.env` File + +```bash +# From the RAGChatbot directory +cat > .env << EOF +# Docker Compose Configuration +LOCAL_URL_ENDPOINT=not-needed +EOF +``` + +**Note:** If using a local domain (e.g., `inference.example.com` mapped to localhost), replace `not-needed` with your domain name (without `https://`). + +#### Step 2: Create `api/.env` File + +You can either copy from the example file: + +```bash +cp api/.env.example api/.env +``` + +Then edit `api/.env` with your actual credentials, **OR** create it directly: + +```bash +mkdir -p api +cat > api/.env << EOF +# Inference API Configuration +# INFERENCE_API_ENDPOINT: URL to your inference service (without /v1 suffix) +# - For GenAI Gateway: https://genai-gateway.example.com +# - For APISIX Gateway: https://apisix-gateway.example.com/inference +INFERENCE_API_ENDPOINT=https://your-actual-api-endpoint.com +INFERENCE_API_TOKEN=your-actual-token-here + +# Model Configuration +# IMPORTANT: Use the full model names as they appear in your inference service +# Check available models: curl https://your-api-endpoint.com/v1/models -H "Authorization: Bearer your-token" +EMBEDDING_MODEL_NAME=bge-base-en-v1.5 +INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct + +# Local URL Endpoint (for Docker) +LOCAL_URL_ENDPOINT=not-needed +EOF +``` + +**Important Configuration Notes:** + +- **INFERENCE_API_ENDPOINT**: Your actual inference service URL (replace `https://your-actual-api-endpoint.com`) +- **INFERENCE_API_TOKEN**: Your actual pre-generated authentication token +- **EMBEDDING_MODEL_NAME** and **INFERENCE_MODEL_NAME**: Use the exact model names from your inference service + - To check available models: `curl https://your-api-endpoint.com/v1/models -H "Authorization: Bearer your-token"` +- **LOCAL_URL_ENDPOINT**: Only needed if using local domain mapping (see [Local Development Configuration](#local-development-configuration)) + +**Note**: The docker-compose.yml file automatically loads environment variables from both `.env` (root) and `./api/.env` (backend) files. + +### Running the Application + +Start both API and UI services together with Docker Compose: + +```bash +# From the RAGChatbot directory +docker compose up --build + +# Or run in detached mode (background) +docker compose up -d --build +``` + +The API will be available at: `http://localhost:5001` +The UI will be available at: `http://localhost:3000` + +**View logs**: + +```bash +# All services +docker compose logs -f + +# Backend only +docker compose logs -f backend + +# Frontend only +docker compose logs -f frontend +``` + +**Verify the services are running**: + +```bash +# Check API health +curl http://localhost:5001/health + +# Check if containers are running +docker compose ps +``` + +## User Interface + +**Using the Application** + +Make sure you are at the `http://localhost:3000` URL + +You will be directed to the main page which has each feature + +![User Interface](images/ui.png) + +Upload a PDF: + +- Drag and drop a PDF file, or +- Click "Browse Files" to select a file +- Wait for processing to complete + +Start chatting: + +- Type your question in the input field +- Press Enter or click Send +- Get AI-powered answers based on your document + +**UI Configuration** + +When running with Docker Compose, the UI automatically connects to the backend API. The frontend is available at `http://localhost:3000` and the API at `http://localhost:5001`. + +For production deployments, you may want to configure a reverse proxy or update the API URL in the frontend configuration. + +### Stopping the Application + +```bash +docker compose down +``` + +## Troubleshooting + +For comprehensive troubleshooting guidance, common issues, and solutions, refer to: + +[Troubleshooting Guide - TROUBLESHOOTING.md](./TROUBLESHOOTING.md) + +--- + +## Additional Info + +The following models have been validated with RAGChatbot: + +| Model | Hardware | +|-------|----------| +| **meta-llama/Llama-3.1-8B-Instruct** | Gaudi | +| **BAAI/bge-base-en-v1.5** (embeddings) | Gaudi | +| **Qwen/Qwen3-4B-Instruct** | Xeon | diff --git a/RAGChatbot/TROUBLESHOOTING.md b/RAGChatbot/TROUBLESHOOTING.md new file mode 100644 index 0000000000..a4d2142b0c --- /dev/null +++ b/RAGChatbot/TROUBLESHOOTING.md @@ -0,0 +1,191 @@ +# Troubleshooting Guide + +This document contains all common issues encountered during development and their solutions. + +## Table of Contents + +- [Docker Compose Issues](#docker-compose-issues) +- [API Common Issues](#api-common-issues) +- [UI Common Issues](#ui-common-issues) + +## Docker Compose Issues + +### Error: "LOCAL_URL_ENDPOINT variable is not set" + +**Problem**: +``` +level=warning msg="The \"LOCAL_URL_ENDPOINT\" variable is not set. Defaulting to a blank string." +decoding failed due to the following error(s): +'services[backend].extra_hosts' bad host name '' +``` + +**Solution**: + +1. Create a `.env` file in the **root** `rag-chatbot` directory (not in `api/`): + ```bash + echo "LOCAL_URL_ENDPOINT=not-needed" > .env + ``` +2. If using a local domain (e.g., `inference.example.com`), replace `not-needed` with your domain name (without `https://`) +3. Restart Docker Compose: `docker compose down && docker compose up` + +### Error: "404 Not Found" when uploading PDF + +**Problem**: +``` +HTTP Request: POST https://api.example.com/BAAI/bge-base-en-v1.5/v1/embeddings "HTTP/1.1 404 Not Found" +openai.NotFoundError: Error code: 404 - {'detail': 'Not Found'} +``` + +**Solution**: + +1. Verify your `api/.env` file has the **correct** API endpoint (not the placeholder): + ```bash + INFERENCE_API_ENDPOINT=https://your-actual-api-endpoint.com + INFERENCE_API_TOKEN=your-actual-token-here + ``` + +2. Check available models on your inference service: + ```bash + curl https://your-api-endpoint.com/v1/models \ + -H "Authorization: Bearer your-token" + ``` + +3. Update model names to match the exact names from your API: + ```bash + EMBEDDING_MODEL_NAME=BAAI/bge-base-en-v1.5 + INFERENCE_MODEL_NAME=Qwen/Qwen3-4B-Instruct-2507 + ``` + +4. Restart containers: `docker compose down && docker compose up --build` + +### Containers fail to start + +**Problem**: Docker containers won't start or crash immediately + +**Solution**: + +1. Check logs for specific errors: + ```bash + docker compose logs backend + docker compose logs frontend + ``` + +2. Ensure ports 5001 and 3000 are available: + ```bash + # Windows + netstat -ano | findstr :5001 + netstat -ano | findstr :3000 + + # Unix/Mac + lsof -i :5001 + lsof -i :3000 + ``` + +3. Clean up and rebuild: + ```bash + docker compose down -v + docker compose up --build + ``` + +4. Restart Docker Desktop if issues persist + +## API Common Issues + +#### "INFERENCE_API_ENDPOINT and INFERENCE_API_TOKEN must be set" + +**Solution**: + +1. Create a `.env` file in the `api` directory +2. Add your inference configuration: + ```bash + INFERENCE_API_ENDPOINT=https://your-actual-api-endpoint.com + INFERENCE_API_TOKEN=your-actual-token-here + EMBEDDING_MODEL_NAME=BAAI/bge-base-en-v1.5 + INFERENCE_MODEL_NAME=Qwen/Qwen3-4B-Instruct-2507 + ``` +3. Restart the server + +#### "No documents uploaded" + +**Solution**: + +- Upload a PDF first using the `/upload-pdf` endpoint +- Check server logs for any upload errors +- Verify the PDF is not corrupted or empty + +#### "Could not load vector store" + +**Solution**: + +- The vector store is created when you upload your first PDF +- Check that the application has write permissions in the directory +- Verify `dmv_index/` directory exists and is accessible + +#### Import errors + +**Solution**: + +1. Ensure all dependencies are installed: `pip install -r requirements.txt` +2. Verify you're using Python 3.10 or higher: `python --version` +3. Activate your virtual environment if using one + +#### Server won't start + +**Solution**: + +1. Check if port 5000 is already in use: `lsof -i :5000` (Unix) or `netstat -ano | findstr :5000` (Windows) +2. Use a different port: `uvicorn server:app --port 5001` +3. Check the logs for specific error messages + +#### PDF upload fails + +**Solution**: + +1. Verify the file is a valid PDF +2. Check file size (must be under 50MB by default) +3. Ensure the PDF contains extractable text (not just images) +4. Check server logs for detailed error messages + +#### Query returns no answer + +**Solution**: + +1. Verify a document has been uploaded successfully +2. Try rephrasing your question +3. Check if the document contains relevant information +4. Increase `TOP_K_DOCUMENTS` in `config.py` for broader search + +## UI Common Issues + +### API Connection Issues + +**Problem**: "Failed to upload PDF" or "Failed to get response" + +**Solution**: + +1. Ensure the API server is running on `http://localhost:5000` +2. Check browser console for detailed errors +3. Verify CORS is enabled in the API + +### Build Issues + +**Problem**: Build fails with dependency errors + +**Solution**: + +```bash +# Clear node_modules and reinstall +rm -rf node_modules package-lock.json +npm install +``` + +### Styling Issues + +**Problem**: Styles not applying + +**Solution**: + +```bash +# Rebuild Tailwind CSS +npm run dev +``` diff --git a/RAGChatbot/api/.env.example b/RAGChatbot/api/.env.example new file mode 100644 index 0000000000..b632f68128 --- /dev/null +++ b/RAGChatbot/api/.env.example @@ -0,0 +1,22 @@ +# Inference API Configuration +# INFERENCE_API_ENDPOINT: URL to your inference service (without /v1 suffix) +# - For GenAI Gateway: https://genai-gateway.example.com +# - For APISIX Gateway: https://apisix-gateway.example.com/inference +# +# INFERENCE_API_TOKEN: Authentication token/API key for the inference service +# - For GenAI Gateway: Your GenAI Gateway API key +# - For APISIX Gateway: Your APISIX authentication token +INFERENCE_API_ENDPOINT=https://api.example.com +INFERENCE_API_TOKEN=your-pre-generated-token-here + +# Model Configuration +# IMPORTANT: Use the full model names as they appear in your inference service +# Check available models: curl https://your-api-endpoint.com/v1/models -H "Authorization: Bearer your-token" +EMBEDDING_MODEL_NAME=BAAI/bge-base-en-v1.5 +INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct + +# Local URL Endpoint (only needed for non-public domains) +# If using a local domain like inference.example.com mapped to localhost: +# Set this to: inference.example.com (domain without https://) +# If using a public domain, set any placeholder value like: not-needed +LOCAL_URL_ENDPOINT=not-needed diff --git a/RAGChatbot/api/Dockerfile b/RAGChatbot/api/Dockerfile new file mode 100644 index 0000000000..4424ff4c43 --- /dev/null +++ b/RAGChatbot/api/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.9-slim + +# Set the working directory in the container +WORKDIR /app + +COPY requirements.txt . + + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the application files into the container +COPY server.py . + +# Expose the port the service runs on +EXPOSE 5001 + +# Command to run the application +CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5001", "--reload"] \ No newline at end of file diff --git a/RAGChatbot/api/README.md b/RAGChatbot/api/README.md new file mode 100644 index 0000000000..7434f91397 --- /dev/null +++ b/RAGChatbot/api/README.md @@ -0,0 +1,693 @@ +# RAG Chatbot API + +A production-ready RAG (Retrieval-Augmented Generation) chatbot API built with FastAPI, LangChain, and FAISS for document-based question answering. + +## Table of Contents + +- [Features](#features) +- [Quick Start](#quick-start) +- [Installation](#installation) +- [Configuration](#configuration) +- [Running the Server](#running-the-server) +- [API Endpoints](#api-endpoints) +- [Project Structure](#project-structure) +- [Testing](#testing) +- [Development](#development) +- [Troubleshooting](#troubleshooting) + +## Features + +- Clean PDF upload with validation +- LangChain-powered document processing +- FAISS-CPU vector store for efficient similarity search +- Enterprise inference endpoints for embeddings and LLM +- Keycloak authentication for secure API access +- Comprehensive error handling and logging +- File validation and size limits +- CORS enabled for web integration +- Health check endpoints +- Modular architecture (routes + services) + +## Quick Start + +Get up and running in 3 minutes using Docker Compose: + +```bash +# 1. Navigate to the rag-chatbot directory +cd /path/to/rag-chatbot + +# 2. Create .env file in the api directory with enterprise configuration +mkdir -p api +cat > api/.env << EOF +BASE_URL=https://api.example.com +KEYCLOAK_REALM=master +KEYCLOAK_CLIENT_ID=api +KEYCLOAK_CLIENT_SECRET=your_client_secret +EMBEDDING_MODEL_ENDPOINT=bge-base-en-v1.5 +INFERENCE_MODEL_ENDPOINT=Llama-3.1-8B-Instruct +EMBEDDING_MODEL_NAME=bge-base-en-v1.5 +INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct +EOF + +# 3. Start both API and UI services with Docker Compose +docker compose up --build + +# 4. Access the application +# API: http://localhost:5001/docs +# UI: http://localhost:3000 +``` + +The application will automatically start both the backend API and frontend UI. Visit http://localhost:5001/docs for interactive API documentation. + +## Installation + +### Prerequisites + +- Docker and Docker Compose installed +- Enterprise inference endpoint access (Keycloak authentication) + +### Docker Compose Setup + +Docker Compose will start both the API and UI services together. + +1. **Set up environment variables**: + +Create a `.env` file in the `api` directory (relative to `rag-chatbot/`): + +```bash +cd rag-chatbot +mkdir -p api +cat > api/.env << EOF +# Backend API URL (accessible from frontend) +VITE_API_URL=https://backend:5000 + +# Required - Enterprise/Keycloak Configuration +BASE_URL=https://api.example.com +KEYCLOAK_REALM=master +KEYCLOAK_CLIENT_ID=api +KEYCLOAK_CLIENT_SECRET=your_client_secret + +# Required - Model Configuration +EMBEDDING_MODEL_ENDPOINT=bge-base-en-v1.5 +INFERENCE_MODEL_ENDPOINT=Llama-3.1-8B-Instruct +EMBEDDING_MODEL_NAME=bge-base-en-v1.5 +INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct +EOF +``` + +2. **Start the services**: + +```bash +# From the rag-chatbot directory +docker compose up --build +``` + +This will: +- Build the backend API container +- Build the frontend UI container +- Start both services automatically +- Make API available at http://localhost:5001 +- Make UI available at http://localhost:3000 + +### Dependencies + +The main dependencies include: + +- `fastapi==0.109.0` - Web framework +- `uvicorn[standard]==0.27.0` - ASGI server +- `langchain==0.1.0` - LLM framework +- `faiss-cpu==1.7.4` - Vector similarity search +- `pypdf==4.0.1` - PDF processing + +See `requirements.txt` for complete list. + +## Configuration + +All configuration is centralized in `config.py`. You can modify settings by editing this file or using environment variables. + +### Environment Variables + +For Docker Compose, create a `.env` file in the `api/` directory (relative to `rag-chatbot/`): + +```bash +# Backend API URL (accessible from frontend) +VITE_API_URL=https://backend:5000 + +# Required - Enterprise/Keycloak Configuration +BASE_URL=https://api.example.com +KEYCLOAK_REALM=master +KEYCLOAK_CLIENT_ID=api +KEYCLOAK_CLIENT_SECRET=your_client_secret + +# Required - Model Configuration +EMBEDDING_MODEL_ENDPOINT=bge-base-en-v1.5 +INFERENCE_MODEL_ENDPOINT=Llama-3.1-8B-Instruct +EMBEDDING_MODEL_NAME=bge-base-en-v1.5 +INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct + +# Optional (with defaults shown) +# VECTOR_STORE_PATH=./dmv_index +# MAX_FILE_SIZE_MB=50 +``` + +**Note**: The docker-compose.yml file automatically loads environment variables from `./api/.env` for the backend service. + +### Configuration Settings + +Edit `config.py` to customize: + +#### File Upload Settings + +```python +MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB +ALLOWED_EXTENSIONS = {".pdf"} +``` + +#### Text Processing Settings + +```python +CHUNK_SIZE = 1000 # Characters per chunk +CHUNK_OVERLAP = 200 # Overlap between chunks +SEPARATORS = ["\n\n", "\n", " ", ""] # Text splitting separators +``` + +#### Vector Store Settings + +```python +VECTOR_STORE_PATH = "./dmv_index" # Where to store FAISS index +``` + +#### LLM Settings + +```python +LLM_TEMPERATURE = 0 # Response randomness (0-1) +TOP_K_DOCUMENTS = 4 # Documents to retrieve +# Model endpoints and names are configured via environment variables: +# EMBEDDING_MODEL_ENDPOINT, INFERENCE_MODEL_ENDPOINT +# EMBEDDING_MODEL_NAME, INFERENCE_MODEL_NAME +``` + +#### CORS Settings + +```python +CORS_ALLOW_ORIGINS = ["*"] # Update with specific origins in production +CORS_ALLOW_CREDENTIALS = True +CORS_ALLOW_METHODS = ["*"] +CORS_ALLOW_HEADERS = ["*"] +``` + +## Running the Server + +**Start both API and UI together**: + +```bash +# From the rag-chatbot directory +docker compose up --build + +# Or run in detached mode (background) +docker compose up -d --build +``` + +**Stop the services**: + +```bash +docker compose down +``` + +The API will be available at: `http://localhost:5001` +The UI will be available at: `http://localhost:3000` + +**View logs**: + +```bash +# All services +docker compose logs -f + +# Backend only +docker compose logs -f backend + +# Frontend only +docker compose logs -f frontend +``` + +### Verifying the Server + +```bash +# Check if API server is running +curl http://localhost:5001/ + +# Check health status +curl http://localhost:5001/health + +# Check if containers are running +docker compose ps +``` + +## API Endpoints + +### Health Check + +**GET /** - Basic health check + +```bash +curl http://localhost:5001/ +``` + +Response: + +```json +{ + "message": "RAG Chatbot API is running", + "version": "2.0.0", + "status": "healthy", + "vectorstore_loaded": true +} +``` + +**GET /health** - Detailed health status + +```bash +curl http://localhost:5001/health +``` + +Response: + +```json +{ + "status": "healthy", + "vectorstore_available": true, + "enterprise_inference_configured": true +} +``` + +### Upload PDF + +**POST /upload-pdf** - Upload and process a PDF document + +```bash +curl -X POST "http://localhost:5001/upload-pdf" \ + -H "Content-Type: multipart/form-data" \ + -F "file=@/path/to/document.pdf" +``` + +Response: + +```json +{ + "message": "Successfully uploaded and processed 'document.pdf'", + "num_chunks": 45, + "status": "success" +} +``` + +**Validation Rules**: + +- File must be PDF format +- Maximum size: 50MB (configurable) +- File must not be empty +- Content must be extractable + +### Query Documents + +**POST /query** - Ask questions about uploaded documents + +```bash +curl -X POST "http://localhost:5001/query" \ + -H "Content-Type: application/json" \ + -d '{"query": "What are the main topics in the document?"}' +``` + +Response: + +```json +{ + "answer": "The main topics covered in the document are...", + "query": "What are the main topics in the document?" +} +``` + +### Delete Vector Store + +**DELETE /vectorstore** - Delete the current vector store + +```bash +curl -X DELETE "http://localhost:5001/vectorstore" +``` + +Response: + +```json +{ + "message": "Vector store deleted successfully", + "status": "success" +} +``` + +### Interactive API Documentation + +FastAPI provides automatic interactive documentation: + +- **Swagger UI**: http://localhost:5001/docs +- **ReDoc**: http://localhost:5001/redoc + +## Project Structure + +The application follows a modular architecture with clear separation of concerns: + +``` +api/ +├── server.py # FastAPI app with routes (main entry point) +├── config.py # Configuration settings +├── models.py # Pydantic models for request/response validation +├── services/ # Business logic layer +│ ├── __init__.py +│ ├── pdf_service.py # PDF processing and validation +│ ├── vector_service.py # Vector store operations (FAISS) +│ └── retrieval_service.py # Query processing and LLM integration +├── requirements.txt # Python dependencies +├── test_api.py # Automated test suite +├── .env # Environment variables (create this) +└── dmv_index/ # FAISS vector store (auto-generated) +``` + +### Architecture Overview + +``` +Client Request + ↓ +server.py (Routes) + ↓ +models.py (Validation) + ↓ +services/ (Business Logic) + ├── pdf_service.py + ├── vector_service.py + └── retrieval_service.py + ↓ +External Services (Enterprise Inference Endpoints, FAISS) +``` + +**Layered Architecture**: + +- **Routes Layer** (`server.py`): HTTP handling, routing, error responses +- **Validation Layer** (`models.py`): Request/response validation +- **Business Logic Layer** (`services/`): Core functionality +- **Configuration Layer** (`config.py`): Settings management + +## Testing + +### Automated Test Suite + +Run the included test suite: + +```bash +# Basic tests (no PDF required) +python test_api.py + +# Full tests with PDF upload +python test_api.py /path/to/your/document.pdf +``` + +The test suite includes: + +- Health check tests +- Upload validation tests +- Query functionality tests +- Error handling tests +- Colored output for easy reading + +### Manual Testing + +1. **Start the services**: + +```bash +docker compose up +``` + +2. **Upload a PDF**: + +```bash +curl -X POST "http://localhost:5001/upload-pdf" \ + -F "file=@sample.pdf" +``` + +3. **Query the document**: + +```bash +curl -X POST "http://localhost:5001/query" \ + -H "Content-Type: application/json" \ + -d '{"query": "What is this document about?"}' +``` + +4. **Check health**: + +```bash +curl http://localhost:5001/health +``` + +## Development + +### Project Setup for Development + +1. Fork/clone the repository +2. Set up your `.env` file in the `api` directory +3. Run with Docker Compose for development: `docker compose up --build` +4. Make changes to code (changes are reflected with volume mounts in docker-compose.yml) + +### Adding New Features + +#### Add a New Service + +1. Create new file in `services/` directory: + +```python +# services/new_service.py +def new_function(param): + """Your business logic""" + return result +``` + +2. Export from `services/__init__.py`: + +```python +from .new_service import new_function +``` + +3. Use in routes: + +```python +# server.py +from services import new_function + +@app.post("/new-endpoint") +def new_endpoint(): + result = new_function(data) + return result +``` + +#### Add a New Endpoint + +1. Define model in `models.py`: + +```python +class NewRequest(BaseModel): + field: str +``` + +2. Add route in `server.py`: + +```python +@app.post("/new-endpoint") +def new_endpoint(request: NewRequest): + # Your logic here + return {"result": "success"} +``` + +### Modifying Configuration + +Edit `config.py` to change default settings: + +```python +# Example: Increase file size limit +MAX_FILE_SIZE = 100 * 1024 * 1024 # 100MB + +# Example: Change chunk size +CHUNK_SIZE = 1500 + +# Example: Use different model +LLM_MODEL = "gpt-4" +``` + +### Code Style + +- Use type hints for all functions +- Add docstrings to all public functions +- Follow PEP 8 style guide +- Keep functions focused (single responsibility) +- Log important operations + +## Troubleshooting + +### Common Issues + +#### "Keycloak authentication or model endpoints not configured" + +**Solution**: + +1. Create a `.env` file in the `api` directory (relative to `rag-chatbot/`) +2. Add required configuration: + ```bash + BASE_URL=https://api.example.com + KEYCLOAK_REALM=master + KEYCLOAK_CLIENT_ID=api + KEYCLOAK_CLIENT_SECRET=your_client_secret + EMBEDDING_MODEL_ENDPOINT=bge-base-en-v1.5 + INFERENCE_MODEL_ENDPOINT=Llama-3.1-8B-Instruct + EMBEDDING_MODEL_NAME=bge-base-en-v1.5 + INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct + ``` +3. Restart the services with `docker compose restart backend` or `docker compose down && docker compose up` + +#### "No documents uploaded" + +**Solution**: + +- Upload a PDF first using the `/upload-pdf` endpoint +- Check server logs for any upload errors +- Verify the PDF is not corrupted or empty + +#### "Could not load vector store" + +**Solution**: + +- The vector store is created when you upload your first PDF +- Check that the application has write permissions in the directory +- Verify `dmv_index/` directory exists and is accessible + +#### Import errors + +**Solution**: + +1. Rebuild the Docker containers: `docker compose down && docker compose build --no-cache && docker compose up` +2. Check container logs: `docker compose logs backend` + +#### Server won't start + +**Solution**: + +1. Check if ports 5001 or 3000 are already in use: `lsof -i :5001` or `lsof -i :3000` (Unix) or `netstat -ano | findstr :5001` (Windows) +2. Check container logs: `docker compose logs backend` +3. Try rebuilding containers: `docker compose down && docker compose build --no-cache && docker compose up` +4. Check the logs for specific error messages + +#### PDF upload fails + +**Solution**: + +1. Verify the file is a valid PDF +2. Check file size (must be under 50MB by default) +3. Ensure the PDF contains extractable text (not just images) +4. Check server logs for detailed error messages + +#### Query returns no answer + +**Solution**: + +1. Verify a document has been uploaded successfully +2. Try rephrasing your question +3. Check if the document contains relevant information +4. Increase `TOP_K_DOCUMENTS` in `config.py` for broader search + +### Logging + +The application logs important events to the console: + +- **INFO**: Normal operations (PDF processing, queries) +- **WARNING**: Non-critical issues +- **ERROR**: Critical errors with stack traces + +To view logs: + +```bash +# View all logs +docker compose logs -f + +# View backend logs only +docker compose logs -f backend +``` + +### Getting Help + +1. View logs with `docker compose logs -f` +2. Visit the health endpoint: `http://localhost:5001/health` +3. Review the error messages in API responses +4. Check the interactive documentation: `http://localhost:5001/docs` + +## Production Deployment + +### Checklist + +Before deploying to production: + +- [ ] Configure secure `KEYCLOAK_CLIENT_SECRET` +- [ ] Set up proper `BASE_URL` for enterprise endpoints +- [ ] Configure specific CORS origins (not `["*"]`) +- [ ] Enable HTTPS +- [ ] Set up monitoring and alerting +- [ ] Configure logging to files +- [ ] Implement rate limiting +- [ ] Verify Keycloak authentication is working +- [ ] Set up backup for vector stores +- [ ] Configure firewall rules +- [ ] Use environment-specific configuration + +### Docker Compose Production Deployment + +The provided `docker-compose.yml` already includes both API and UI services. For production: + +1. **Set up environment variables** in `api/.env`: + +```bash +# Enterprise/Keycloak Configuration +BASE_URL=https://api.example.com +KEYCLOAK_REALM=master +KEYCLOAK_CLIENT_ID=api +KEYCLOAK_CLIENT_SECRET=your_production_client_secret + +# Model Configuration +EMBEDDING_MODEL_ENDPOINT=bge-base-en-v1.5 +INFERENCE_MODEL_ENDPOINT=Llama-3.1-8B-Instruct +EMBEDDING_MODEL_NAME=bge-base-en-v1.5 +INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct +``` + +2. **Run in detached mode**: + +```bash +docker compose up -d --build +``` + +3. **Monitor logs**: + +```bash +docker compose logs -f +``` + +## License + +MIT + +## Support + +For issues, questions, or contributions: + +1. Check this README for solutions +2. Review the troubleshooting section +3. Check container logs: `docker compose logs -f` +4. Visit the interactive docs at `http://localhost:5001/docs` + +--- + +**Version**: 2.0.0 +**Last Updated**: 2025 +**API Documentation**: http://localhost:5001/docs diff --git a/RAGChatbot/api/config.py b/RAGChatbot/api/config.py new file mode 100644 index 0000000000..3ca2f2addc --- /dev/null +++ b/RAGChatbot/api/config.py @@ -0,0 +1,54 @@ +""" +Configuration settings for RAG Chatbot API +""" + +import os +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +# Inference API Configuration +# Supports multiple inference deployment patterns: +# - GenAI Gateway: Provide your GenAI Gateway URL and API key +# - APISIX Gateway: Provide your APISIX Gateway URL and authentication token +INFERENCE_API_ENDPOINT = os.getenv("INFERENCE_API_ENDPOINT", "https://api.example.com") +INFERENCE_API_TOKEN = os.getenv("INFERENCE_API_TOKEN") + +# Model Configuration +EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME", "bge-base-en-v1.5") +INFERENCE_MODEL_NAME = os.getenv("INFERENCE_MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct") + +# Validate required configuration +if not INFERENCE_API_ENDPOINT or not INFERENCE_API_TOKEN: + raise ValueError("INFERENCE_API_ENDPOINT and INFERENCE_API_TOKEN must be set in environment variables") + +# Application Settings +APP_TITLE = "RAG QnA Chatbot" +APP_DESCRIPTION = "A RAG-based chatbot API using LangChain and FAISS" +APP_VERSION = "2.0.0" + +# File Upload Settings +MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB +ALLOWED_EXTENSIONS = {".pdf"} + +# Vector Store Settings +VECTOR_STORE_PATH = "./dmv_index" + +# Text Splitting Settings +CHUNK_SIZE = 1000 +CHUNK_OVERLAP = 200 +SEPARATORS = ["\n\n", "\n", " ", ""] + +# Retrieval Settings +TOP_K_DOCUMENTS = 4 +LLM_MODEL = "gpt-3.5-turbo" +LLM_TEMPERATURE = 0 +EMBEDDING_MODEL = "text-embedding-ada-002" + +# CORS Settings +CORS_ALLOW_ORIGINS = ["*"] # Update with specific origins in production +CORS_ALLOW_CREDENTIALS = True +CORS_ALLOW_METHODS = ["*"] +CORS_ALLOW_HEADERS = ["*"] + diff --git a/RAGChatbot/api/models.py b/RAGChatbot/api/models.py new file mode 100644 index 0000000000..ae9452a81c --- /dev/null +++ b/RAGChatbot/api/models.py @@ -0,0 +1,61 @@ +""" +Pydantic models for request/response validation +""" + +from pydantic import BaseModel, Field + + +class QueryRequest(BaseModel): + """Request model for querying documents""" + query: str = Field(..., min_length=1, description="Natural language question") + + class Config: + json_schema_extra = { + "example": { + "query": "What are the main topics covered in the document?" + } + } + + +class UploadResponse(BaseModel): + """Response model for PDF upload""" + message: str = Field(..., description="Success message") + num_chunks: int = Field(..., description="Number of chunks created") + status: str = Field(..., description="Operation status") + + class Config: + json_schema_extra = { + "example": { + "message": "Successfully uploaded and processed 'document.pdf'", + "num_chunks": 45, + "status": "success" + } + } + + +class QueryResponse(BaseModel): + """Response model for document queries""" + answer: str = Field(..., description="Answer to the query") + query: str = Field(..., description="Original query") + + class Config: + json_schema_extra = { + "example": { + "answer": "The main topics covered in the document are...", + "query": "What are the main topics covered in the document?" + } + } + + +class HealthResponse(BaseModel): + """Response model for health check""" + status: str = Field(..., description="Health status") + vectorstore_available: bool = Field(..., description="Whether vectorstore is loaded") + openai_key_configured: bool = Field(..., description="Whether inference API token is configured") + + +class DeleteResponse(BaseModel): + """Response model for delete operations""" + message: str = Field(..., description="Result message") + status: str = Field(..., description="Operation status") + diff --git a/RAGChatbot/api/requirements.txt b/RAGChatbot/api/requirements.txt new file mode 100644 index 0000000000..8a77040e54 --- /dev/null +++ b/RAGChatbot/api/requirements.txt @@ -0,0 +1,15 @@ +fastapi>=0.109.0 +uvicorn[standard]>=0.27.0 +python-dotenv>=1.0.0 +langchain>=0.1.0 +langchain-community>=0.0.10 +langchain-openai>=0.0.5 +faiss-cpu>=1.7.4 +pypdf>=4.0.0 +openai>=1.10.0 +python-multipart>=0.0.6 +pydantic>=2.5.0 +pydantic-settings>=2.1.0 +cryptography>=3.1.0 +httpx>=0.24.0 +requests>=2.31.0 \ No newline at end of file diff --git a/RAGChatbot/api/server.py b/RAGChatbot/api/server.py new file mode 100644 index 0000000000..3186c48bc3 --- /dev/null +++ b/RAGChatbot/api/server.py @@ -0,0 +1,229 @@ +""" +FastAPI server with routes for RAG Chatbot API +""" + +import os +import tempfile +import logging +from contextlib import asynccontextmanager +from fastapi import FastAPI, File, UploadFile, HTTPException, status +from fastapi.middleware.cors import CORSMiddleware + +import config +from models import ( + QueryRequest, UploadResponse, QueryResponse, + HealthResponse, DeleteResponse +) +from services import ( + validate_pdf_file, load_and_split_pdf, + store_in_vector_storage, load_vector_store, delete_vector_store, + query_documents +) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Lifespan context manager for FastAPI app""" + # Startup + app.state.vectorstore = load_vector_store(config.INFERENCE_API_TOKEN) + if app.state.vectorstore: + logger.info("✓ FAISS vector store loaded successfully") + else: + logger.info("! No existing vector store found. Please upload a PDF document.") + + yield + + # Shutdown + logger.info("Shutting down RAG Chatbot API") + + +# Initialize FastAPI app +app = FastAPI( + title=config.APP_TITLE, + description=config.APP_DESCRIPTION, + version=config.APP_VERSION, + lifespan=lifespan +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=config.CORS_ALLOW_ORIGINS, + allow_credentials=config.CORS_ALLOW_CREDENTIALS, + allow_methods=config.CORS_ALLOW_METHODS, + allow_headers=config.CORS_ALLOW_HEADERS, +) + + +# ==================== Routes ==================== + +@app.get("/") +def root(): + """Health check endpoint""" + return { + "message": "RAG Chatbot API is running", + "version": config.APP_VERSION, + "status": "healthy", + "vectorstore_loaded": app.state.vectorstore is not None + } + + +@app.get("/health", response_model=HealthResponse) +def health_check(): + """Detailed health check""" + return HealthResponse( + status="healthy", + vectorstore_available=app.state.vectorstore is not None, + openai_key_configured=bool(config.INFERENCE_API_TOKEN) + ) + + +@app.post("/upload-pdf", response_model=UploadResponse) +async def upload_pdf(file: UploadFile = File(...)): + """ + Upload a PDF file, process it, create embeddings, and store in FAISS + + - **file**: PDF file to upload (max 50MB) + """ + # Validate file + validate_pdf_file(file) + + tmp_path = None + try: + # Read file content + content = await file.read() + file_size = len(content) + + # Check file size + if file_size > config.MAX_FILE_SIZE: + raise HTTPException( + status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, + detail=f"File too large. Maximum size is {config.MAX_FILE_SIZE / (1024*1024)}MB" + ) + + if file_size == 0: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Empty file uploaded" + ) + + logger.info(f"Processing PDF: {file.filename} ({file_size / 1024:.2f} KB)") + + # Save to temporary file + with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: + tmp.write(content) + tmp_path = tmp.name + logger.info(f"Saved to temporary path: {tmp_path}") + + # Load and split PDF + chunks = load_and_split_pdf(tmp_path) + + if not chunks: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="No text content could be extracted from the PDF" + ) + + # Create embeddings and store in FAISS + vectorstore = store_in_vector_storage(chunks, config.INFERENCE_API_TOKEN) + + # Update app state + app.state.vectorstore = vectorstore + + logger.info(f"✓ Successfully processed PDF: {file.filename}") + + return UploadResponse( + message=f"Successfully uploaded and processed '{file.filename}'", + num_chunks=len(chunks), + status="success" + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error processing PDF: {str(e)}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error processing PDF: {str(e)}" + ) + finally: + # Clean up temporary file + if tmp_path and os.path.exists(tmp_path): + try: + os.remove(tmp_path) + logger.info(f"Cleaned up temporary file: {tmp_path}") + except Exception as e: + logger.warning(f"Could not remove temporary file: {str(e)}") + + +@app.post("/query", response_model=QueryResponse) +def query_endpoint(request: QueryRequest): + """ + Query the uploaded documents using RAG + + - **query**: Natural language question about the documents + """ + if not app.state.vectorstore: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="No documents uploaded. Please upload a PDF first using /upload-pdf endpoint." + ) + + if not request.query or not request.query.strip(): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Query cannot be empty" + ) + + try: + result = query_documents( + request.query, + app.state.vectorstore, + config.INFERENCE_API_TOKEN + ) + return QueryResponse(**result) + except Exception as e: + logger.error(f"Error processing query: {str(e)}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error processing query: {str(e)}" + ) + + +@app.delete("/vectorstore", response_model=DeleteResponse) +def delete_vectorstore_endpoint(): + """Delete the current vector store""" + try: + deleted = delete_vector_store() + app.state.vectorstore = None + + if deleted: + return DeleteResponse( + message="Vector store deleted successfully", + status="success" + ) + else: + return DeleteResponse( + message="No vector store found to delete", + status="success" + ) + except Exception as e: + logger.error(f"Error deleting vector store: {str(e)}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error deleting vector store: {str(e)}" + ) + + +# Entry point for running with uvicorn +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=5001) + diff --git a/RAGChatbot/api/services/__init__.py b/RAGChatbot/api/services/__init__.py new file mode 100644 index 0000000000..2802bf9895 --- /dev/null +++ b/RAGChatbot/api/services/__init__.py @@ -0,0 +1,21 @@ +""" +Services package for RAG Chatbot API +""" + +from .pdf_service import load_and_split_pdf, validate_pdf_file +from .vector_service import store_in_vector_storage, load_vector_store, delete_vector_store +from .retrieval_service import build_retrieval_chain, query_documents +from .api_client import APIClient, get_api_client + +__all__ = [ + 'load_and_split_pdf', + 'validate_pdf_file', + 'store_in_vector_storage', + 'load_vector_store', + 'delete_vector_store', + 'build_retrieval_chain', + 'query_documents', + 'APIClient', + 'get_api_client' +] + diff --git a/RAGChatbot/api/services/api_client.py b/RAGChatbot/api/services/api_client.py new file mode 100644 index 0000000000..8b00942b40 --- /dev/null +++ b/RAGChatbot/api/services/api_client.py @@ -0,0 +1,223 @@ +""" +API Client for authentication and API calls +Similar to simple-client/main.py implementation +""" + +import logging +import requests +import httpx +from typing import Optional +import config + +logger = logging.getLogger(__name__) + + +class APIClient: + """ + Client for handling API calls with token-based authentication + """ + + def __init__(self): + self.base_url = config.INFERENCE_API_ENDPOINT + self.token = config.INFERENCE_API_TOKEN + self.http_client = httpx.Client(verify=False) + logger.info(f"✓ API Client initialized with endpoint: {self.base_url}") + + def get_embedding_client(self): + """ + Get OpenAI-style client for embeddings + Uses bge-base-en-v1.5 model + """ + from openai import OpenAI + + return OpenAI( + api_key=self.token, + base_url=f"{self.base_url}/v1", + http_client=self.http_client + ) + + def get_inference_client(self): + """ + Get OpenAI-style client for inference/completions + Uses Llama-3.1-8B-Instruct model + """ + from openai import OpenAI + + return OpenAI( + api_key=self.token, + base_url=f"{self.base_url}/v1", + http_client=self.http_client + ) + + def embed_text(self, text: str) -> list: + """ + Get embedding for text + Uses the bge-base-en-v1.5 embedding model + + Args: + text: Text to embed + + Returns: + List of embedding values + """ + try: + client = self.get_embedding_client() + # Call the embeddings endpoint + response = client.embeddings.create( + model=config.EMBEDDING_MODEL_NAME, + input=text + ) + return response.data[0].embedding + except Exception as e: + logger.error(f"Error generating embedding: {str(e)}") + raise + + def embed_texts(self, texts: list) -> list: + """ + Get embeddings for multiple texts + Batches requests to avoid exceeding API limits (max batch size: 32) + + Args: + texts: List of texts to embed + + Returns: + List of embedding vectors + """ + try: + BATCH_SIZE = 32 # Maximum allowed batch size + all_embeddings = [] + client = self.get_embedding_client() + + # Process in batches of 32 + for i in range(0, len(texts), BATCH_SIZE): + batch = texts[i:i + BATCH_SIZE] + logger.info(f"Processing embedding batch {i//BATCH_SIZE + 1}/{(len(texts) + BATCH_SIZE - 1)//BATCH_SIZE} ({len(batch)} texts)") + + response = client.embeddings.create( + model=config.EMBEDDING_MODEL_NAME, + input=batch + ) + batch_embeddings = [data.embedding for data in response.data] + all_embeddings.extend(batch_embeddings) + + return all_embeddings + except Exception as e: + logger.error(f"Error generating embeddings: {str(e)}") + raise + + def complete(self, prompt: str, max_tokens: int = 50, temperature: float = 0) -> str: + """ + Get completion from the inference model + Uses Llama-3.1-8B-Instruct for inference + + Args: + prompt: Input prompt + max_tokens: Maximum tokens to generate + temperature: Temperature for generation + + Returns: + Generated text + """ + try: + client = self.get_inference_client() + logger.info(f"Calling inference client with model: {config.INFERENCE_MODEL_NAME}") + response = client.completions.create( + model=config.INFERENCE_MODEL_NAME, + prompt=prompt, + max_tokens=max_tokens, + temperature=temperature + ) + + # Handle response structure + if hasattr(response, 'choices') and len(response.choices) > 0: + choice = response.choices[0] + if hasattr(choice, 'text'): + return choice.text + else: + logger.error(f"Unexpected choice structure: {type(choice)}, {choice}") + return str(choice) + else: + logger.error(f"Unexpected response: {type(response)}, {response}") + return "" + except Exception as e: + logger.error(f"Error generating completion: {str(e)}", exc_info=True) + raise + + def chat_complete(self, messages: list, max_tokens: int = 150, temperature: float = 0) -> str: + """ + Get chat completion from the inference model + + Args: + messages: List of message dicts with 'role' and 'content' + max_tokens: Maximum tokens to generate + temperature: Temperature for generation + + Returns: + Generated text + """ + try: + client = self.get_inference_client() + # Convert messages to a prompt for the completions endpoint + # (since Llama models use completions, not chat.completions) + prompt = "" + for msg in messages: + role = msg.get('role', 'user') + content = msg.get('content', '') + if role == 'system': + prompt += f"System: {content}\n\n" + elif role == 'user': + prompt += f"User: {content}\n\n" + elif role == 'assistant': + prompt += f"Assistant: {content}\n\n" + prompt += "Assistant:" + + logger.info(f"Calling inference with prompt length: {len(prompt)}") + + response = client.completions.create( + model=config.INFERENCE_MODEL_NAME, + prompt=prompt, + max_tokens=max_tokens, + temperature=temperature + ) + + # Handle response structure + if hasattr(response, 'choices') and len(response.choices) > 0: + choice = response.choices[0] + if hasattr(choice, 'text'): + return choice.text + elif hasattr(choice, 'message') and hasattr(choice.message, 'content'): + return choice.message.content + else: + logger.error(f"Unexpected response structure: {type(choice)}, {choice}") + return str(choice) + else: + logger.error(f"Unexpected response: {type(response)}, {response}") + return "" + except Exception as e: + logger.error(f"Error generating chat completion: {str(e)}", exc_info=True) + raise + + def __del__(self): + """ + Cleanup: close httpx client + """ + if self.http_client: + self.http_client.close() + + +# Global API client instance +_api_client: Optional[APIClient] = None + + +def get_api_client() -> APIClient: + """ + Get or create the global API client instance + + Returns: + APIClient instance + """ + global _api_client + if _api_client is None: + _api_client = APIClient() + return _api_client + diff --git a/RAGChatbot/api/services/pdf_service.py b/RAGChatbot/api/services/pdf_service.py new file mode 100644 index 0000000000..9fbc2032af --- /dev/null +++ b/RAGChatbot/api/services/pdf_service.py @@ -0,0 +1,86 @@ +""" +PDF processing service +Handles PDF validation, loading, and text splitting +""" + +import logging +from pathlib import Path +from fastapi import UploadFile, HTTPException, status +from langchain_community.document_loaders import PyPDFLoader +from langchain_text_splitters import RecursiveCharacterTextSplitter + +logger = logging.getLogger(__name__) + +# Constants +ALLOWED_EXTENSIONS = {".pdf"} +MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB + + +def validate_pdf_file(file: UploadFile) -> None: + """ + Validate uploaded PDF file + + Args: + file: UploadFile object from FastAPI + + Raises: + HTTPException: If file validation fails + """ + if not file.filename: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="No filename provided" + ) + + file_ext = Path(file.filename).suffix.lower() + if file_ext not in ALLOWED_EXTENSIONS: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid file type. Only PDF files are allowed. Got: {file_ext}" + ) + + if not file.content_type or "pdf" not in file.content_type.lower(): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid content type. Expected PDF, got: {file.content_type}" + ) + + +def load_and_split_pdf(path: str) -> list: + """ + Load PDF and split into chunks using RecursiveCharacterTextSplitter + + Args: + path: Path to the PDF file + + Returns: + List of document chunks + + Raises: + ValueError: If no content can be extracted + Exception: For other processing errors + """ + try: + # Load PDF documents + loader = PyPDFLoader(file_path=path) + documents = loader.load() + logger.info(f"Loaded {len(documents)} pages from PDF") + + if not documents: + raise ValueError("No content extracted from PDF") + + # Split text into chunks with better strategy + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=1000, + chunk_overlap=200, + length_function=len, + separators=["\n\n", "\n", " ", ""] + ) + chunks = text_splitter.split_documents(documents) + logger.info(f"Split into {len(chunks)} chunks") + + return chunks + except Exception as e: + logger.error(f"Error loading and splitting PDF: {str(e)}") + raise + diff --git a/RAGChatbot/api/services/retrieval_service.py b/RAGChatbot/api/services/retrieval_service.py new file mode 100644 index 0000000000..15e4862abd --- /dev/null +++ b/RAGChatbot/api/services/retrieval_service.py @@ -0,0 +1,231 @@ +""" +Retrieval service +Handles query processing and retrieval chain operations +""" + +import logging +from langchain_openai import ChatOpenAI +from langchain_community.vectorstores import FAISS +from langchain.chains.retrieval import create_retrieval_chain +from langchain.chains.combine_documents import create_stuff_documents_chain +from langchain import hub +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.language_models.llms import LLM +from langchain_core.outputs import LLMResult, Generation +from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, BaseMessage +from typing import List, Optional, Any +import config + +logger = logging.getLogger(__name__) + + +class CustomLLM(LLM): + """ + Custom LLM class that uses the Llama-3.1-8B-Instruct endpoint + """ + + @property + def _llm_type(self) -> str: + """Return type of LLM.""" + return "custom_llm" + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[Any] = None, + **kwargs: Any, + ) -> str: + """Call the LLM on the given prompt.""" + from .api_client import get_api_client + api_client = get_api_client() + return api_client.complete(prompt, max_tokens=kwargs.get('max_tokens', 150), temperature=kwargs.get('temperature', 0)) + + +class CustomChatModel(BaseChatModel): + """ + Custom Chat Model that uses the Llama-3.1-8B-Instruct endpoint + """ + + @property + def _llm_type(self) -> str: + """Return type of LLM.""" + return "custom_chat" + + def _generate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[Any] = None, + **kwargs: Any, + ) -> LLMResult: + """Generate response from messages.""" + from .api_client import get_api_client + api_client = get_api_client() + + # Convert messages to a prompt string + # Build the prompt from all messages + prompt_parts = [] + + for msg in messages: + if isinstance(msg, SystemMessage): + prompt_parts.append(f"System: {msg.content}") + elif isinstance(msg, HumanMessage): + prompt_parts.append(f"User: {msg.content}") + elif isinstance(msg, AIMessage): + prompt_parts.append(f"Assistant: {msg.content}") + + # Join all parts and add assistant prompt suffix + full_prompt = "\n\n".join(prompt_parts) + if not full_prompt.endswith("Assistant:"): + full_prompt += "\n\nAssistant:" + + logger.info(f"Sending prompt to LLM (length: {len(full_prompt)} chars)") + + # Use the complete method which directly sends the prompt + # This calls: Llama-3.1-8B-Instruct/v1/completions with prompt + response_text = api_client.complete( + full_prompt, + max_tokens=kwargs.get('max_tokens', 150), + temperature=kwargs.get('temperature', 0) + ) + + generations = [Generation(text=response_text)] + return LLMResult(generations=[generations]) + + +def get_llm(api_key: str) -> BaseChatModel: + """ + Get LLM instance (ChatOpenAI or CustomChatModel based on config) + + Args: + api_key: API key + + Returns: + LLM instance + """ + # Check if using custom inference endpoint + if hasattr(config, 'INFERENCE_API_TOKEN') and config.INFERENCE_API_TOKEN: + return CustomChatModel() + else: + # Fallback to OpenAI ChatOpenAI + return ChatOpenAI( + model="gpt-3.5-turbo", + temperature=0, + openai_api_key=api_key + ) + + +def build_retrieval_chain(vectorstore: FAISS, api_key: str): + """ + Build retrieval chain with LLM (ChatOpenAI or CustomChatModel) + + Args: + vectorstore: FAISS vectorstore instance + api_key: API key + + Returns: + Configured retrieval chain + + Raises: + Exception: If chain building fails + """ + try: + retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat") + llm = get_llm(api_key) + combine_docs_chain = create_stuff_documents_chain(llm, retrieval_qa_chat_prompt) + retrieval_chain = create_retrieval_chain( + vectorstore.as_retriever(search_kwargs={"k": 4}), + combine_docs_chain + ) + return retrieval_chain + except Exception as e: + logger.error(f"Error building retrieval chain: {str(e)}") + raise + + +def query_documents(query: str, vectorstore: FAISS, api_key: str) -> dict: + """ + Query the documents using RAG with custom embedding and inference + + Simple workflow: + 1. Create embedding for the query + 2. Search for similar documents in the vectorstore + 3. Format the retrieved context + 4. Summarize using Llama inference endpoint + + Args: + query: User's question + vectorstore: FAISS vectorstore instance + api_key: API key + + Returns: + Dictionary with answer and query + + Raises: + Exception: If query processing fails + """ + try: + logger.info(f"Processing query: {query}") + + # Step 1: Create embedding for the query + logger.info("Creating query embedding...") + from .api_client import get_api_client + api_client = get_api_client() + + query_embedding = api_client.embed_text(query) + logger.info(f"Query embedding created (dimension: {len(query_embedding)})") + + # Step 2: Search for similar documents (similarity search) + logger.info("Searching for similar documents...") + similar_docs = vectorstore.similarity_search_by_vector(query_embedding, k=4) + logger.info(f"Found {len(similar_docs)} similar documents") + + if not similar_docs: + return { + "answer": "I couldn't find any relevant documents to answer your question.", + "query": query + } + + # Step 3: Format the retrieved context + context_parts = [] + for i, doc in enumerate(similar_docs): + context_parts.append(f"Document {i+1}:\n{doc.page_content}") + + context = "\n\n".join(context_parts) + logger.info(f"Context length: {len(context)} characters") + + # Step 4: Create prompt for summarization using Llama + prompt = f"""Based on the following documents, provide a comprehensive summary that addresses the question. + +Documents: +{context} + +Question: {query} + +Summary:""" + + logger.info(f"Calling Llama inference with prompt length: {len(prompt)}") + + # Call Llama inference endpoint for summarization + answer = api_client.complete( + prompt=prompt, + max_tokens=200, + temperature=0 + ) + + answer = answer.strip() + + if not answer: + answer = "I couldn't find a relevant answer in the documents." + + logger.info("✓ Query completed successfully") + + return { + "answer": answer, + "query": query + } + except Exception as e: + logger.error(f"Error processing query: {str(e)}", exc_info=True) + raise + diff --git a/RAGChatbot/api/services/vector_service.py b/RAGChatbot/api/services/vector_service.py new file mode 100644 index 0000000000..0884a9c0db --- /dev/null +++ b/RAGChatbot/api/services/vector_service.py @@ -0,0 +1,152 @@ +""" +Vector store service +Handles FAISS vector store operations +""" + +import os +import logging +import shutil +from typing import Optional +from langchain_openai import OpenAIEmbeddings +from langchain_community.vectorstores import FAISS +from langchain_core.embeddings import Embeddings +import config + +logger = logging.getLogger(__name__) + +# Constants +VECTOR_STORE_PATH = "./dmv_index" + + +class CustomEmbeddings(Embeddings): + """ + Custom embeddings class that uses the bge-base-en-v1.5 endpoint + """ + + def __init__(self): + from .api_client import get_api_client + self.api_client = get_api_client() + + def embed_documents(self, texts: list[str]) -> list[list[float]]: + """ + Embed multiple documents + Note: Batches are handled automatically by api_client (max batch size: 32) + + Args: + texts: List of texts to embed + + Returns: + List of embedding vectors + """ + return self.api_client.embed_texts(texts) + + def embed_query(self, text: str) -> list[float]: + """ + Embed a single query + + Args: + text: Text to embed + + Returns: + Embedding vector + """ + return self.api_client.embed_text(text) + + +def get_embeddings(api_key: str) -> Embeddings: + """ + Create embeddings instance + + Args: + api_key: API key (for compatibility, not used with custom endpoint) + + Returns: + Embeddings instance (CustomEmbeddings if using custom API, OpenAIEmbeddings otherwise) + """ + # Check if using custom inference endpoint + if hasattr(config, 'INFERENCE_API_TOKEN') and config.INFERENCE_API_TOKEN: + return CustomEmbeddings() + else: + # Fallback to OpenAI + return OpenAIEmbeddings( + model="text-embedding-ada-002", + openai_api_key=api_key + ) + + +def store_in_vector_storage(chunks: list, api_key: str) -> FAISS: + """ + Create embeddings and store in FAISS vector store + + Args: + chunks: List of document chunks + api_key: OpenAI API key + + Returns: + FAISS vectorstore instance + + Raises: + Exception: If storage operation fails + """ + try: + embeddings = get_embeddings(api_key) + vectorstore = FAISS.from_documents(chunks, embeddings) + + # Ensure directory exists + os.makedirs( + os.path.dirname(VECTOR_STORE_PATH) if os.path.dirname(VECTOR_STORE_PATH) else ".", + exist_ok=True + ) + vectorstore.save_local(VECTOR_STORE_PATH) + logger.info(f"Saved vector store to {VECTOR_STORE_PATH}") + + return vectorstore + except Exception as e: + logger.error(f"Error storing vectors: {str(e)}") + raise + + +def load_vector_store(api_key: str) -> Optional[FAISS]: + """ + Load existing FAISS vector store + + Args: + api_key: OpenAI API key + + Returns: + FAISS vectorstore instance or None if not found + """ + try: + embeddings = get_embeddings(api_key) + vectorstore = FAISS.load_local( + VECTOR_STORE_PATH, + embeddings, + allow_dangerous_deserialization=True + ) + logger.info("Loaded existing FAISS vector store") + return vectorstore + except Exception as e: + logger.warning(f"Could not load vector store: {str(e)}") + return None + + +def delete_vector_store() -> bool: + """ + Delete the vector store from disk + + Returns: + True if deleted successfully, False otherwise + + Raises: + Exception: If deletion fails + """ + try: + if os.path.exists(VECTOR_STORE_PATH): + shutil.rmtree(VECTOR_STORE_PATH) + logger.info("Deleted vector store") + return True + return False + except Exception as e: + logger.error(f"Error deleting vector store: {str(e)}") + raise + diff --git a/RAGChatbot/api/test_api.py b/RAGChatbot/api/test_api.py new file mode 100644 index 0000000000..ac4baf6be5 --- /dev/null +++ b/RAGChatbot/api/test_api.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 +""" +Test script for RAG Chatbot API +Tests PDF upload and query functionality + +Usage: + python test_api.py # Run basic tests + python test_api.py /path/to/file.pdf # Run full tests with PDF upload +""" + +import requests +import sys +import time +from pathlib import Path + +BASE_URL = "http://localhost:5000" + +def print_status(message, status="info"): + """Print colored status messages""" + colors = { + "info": "\033[94m", # Blue + "success": "\033[92m", # Green + "error": "\033[91m", # Red + "warning": "\033[93m" # Yellow + } + reset = "\033[0m" + print(f"{colors.get(status, '')}{message}{reset}") + + +def test_health_check(): + """Test health check endpoint""" + print_status("\n1. Testing health check endpoint...", "info") + try: + response = requests.get(f"{BASE_URL}/") + response.raise_for_status() + data = response.json() + print_status(f"✓ Health check passed: {data['message']}", "success") + print(f" Version: {data.get('version', 'N/A')}") + print(f" Vectorstore loaded: {data.get('vectorstore_loaded', False)}") + return True + except Exception as e: + print_status(f"✗ Health check failed: {str(e)}", "error") + return False + + +def test_detailed_health(): + """Test detailed health endpoint""" + print_status("\n2. Testing detailed health endpoint...", "info") + try: + response = requests.get(f"{BASE_URL}/health") + response.raise_for_status() + data = response.json() + print_status("✓ Detailed health check passed", "success") + print(f" Status: {data.get('status')}") + print(f" Vectorstore available: {data.get('vectorstore_available')}") + print(f" OpenAI key configured: {data.get('openai_key_configured')}") + return True + except Exception as e: + print_status(f"✗ Detailed health check failed: {str(e)}", "error") + return False + + +def test_upload_pdf(pdf_path=None): + """Test PDF upload endpoint""" + print_status("\n3. Testing PDF upload...", "info") + + if pdf_path and Path(pdf_path).exists(): + file_path = pdf_path + else: + print_status(" No PDF file provided. Skipping upload test.", "warning") + print_status(" To test upload, run: python test_api.py /path/to/file.pdf", "warning") + return None + + try: + print(f" Uploading: {file_path}") + with open(file_path, 'rb') as f: + files = {'file': (Path(file_path).name, f, 'application/pdf')} + response = requests.post(f"{BASE_URL}/upload-pdf", files=files) + response.raise_for_status() + data = response.json() + + print_status(f"✓ Upload successful!", "success") + print(f" Message: {data['message']}") + print(f" Number of chunks: {data['num_chunks']}") + print(f" Status: {data['status']}") + return True + except requests.exceptions.HTTPError as e: + print_status(f"✗ Upload failed: {e}", "error") + try: + error_detail = e.response.json() + print(f" Error details: {error_detail}") + except: + pass + return False + except Exception as e: + print_status(f"✗ Upload failed: {str(e)}", "error") + return False + + +def test_query(query="What is this document about?"): + """Test query endpoint""" + print_status("\n4. Testing query endpoint...", "info") + print(f" Query: '{query}'") + + try: + response = requests.post( + f"{BASE_URL}/query", + json={"query": query} + ) + response.raise_for_status() + data = response.json() + + print_status("✓ Query successful!", "success") + print(f" Answer: {data['answer'][:200]}{'...' if len(data['answer']) > 200 else ''}") + return True + except requests.exceptions.HTTPError as e: + if e.response.status_code == 400: + print_status("✗ No documents uploaded yet. Upload a PDF first.", "warning") + else: + print_status(f"✗ Query failed: {e}", "error") + try: + error_detail = e.response.json() + print(f" Error details: {error_detail}") + except: + pass + return False + except Exception as e: + print_status(f"✗ Query failed: {str(e)}", "error") + return False + + +def test_invalid_upload(): + """Test upload validation with invalid file""" + print_status("\n5. Testing upload validation...", "info") + + try: + # Try uploading a text file + files = {'file': ('test.txt', b'This is not a PDF', 'text/plain')} + response = requests.post(f"{BASE_URL}/upload-pdf", files=files) + + if response.status_code == 400: + print_status("✓ Validation working: Invalid file rejected correctly", "success") + return True + else: + print_status("✗ Validation issue: Invalid file was accepted", "error") + return False + except Exception as e: + print_status(f"✗ Validation test failed: {str(e)}", "error") + return False + + +def main(): + """Run all tests""" + print_status("=" * 60) + print_status("RAG Chatbot API Test Suite", "info") + print_status("=" * 60) + + # Check if server is running + print_status("\nChecking if server is running...", "info") + try: + requests.get(BASE_URL, timeout=2) + except requests.exceptions.RequestException: + print_status("✗ Server is not running!", "error") + print_status(f"Please start the server first:", "warning") + print_status(f" cd /Users/raghavdarisi/projects/GenAISamples/rag-chatbot/api", "warning") + print_status(f" uvicorn server:app --reload", "warning") + print_status(f" OR: python server.py", "warning") + sys.exit(1) + + print_status("✓ Server is running", "success") + + # Get PDF path from command line if provided + pdf_path = sys.argv[1] if len(sys.argv) > 1 else None + + # Run tests + results = [] + results.append(("Health Check", test_health_check())) + results.append(("Detailed Health", test_detailed_health())) + + upload_result = test_upload_pdf(pdf_path) + if upload_result is not None: + results.append(("PDF Upload", upload_result)) + + if upload_result: + # Wait a moment for processing + time.sleep(1) + results.append(("Query", test_query())) + results.append(("Query 2", test_query("Summarize the main points"))) + + results.append(("Validation", test_invalid_upload())) + + # Print summary + print_status("\n" + "=" * 60) + print_status("Test Summary", "info") + print_status("=" * 60) + + passed = sum(1 for _, result in results if result) + total = len(results) + + for test_name, result in results: + status = "success" if result else "error" + symbol = "✓" if result else "✗" + print_status(f"{symbol} {test_name}", status) + + print_status(f"\nPassed: {passed}/{total}", "success" if passed == total else "warning") + + if pdf_path is None: + print_status("\nNote: PDF upload test was skipped", "warning") + print_status("To run full tests with PDF upload:", "info") + print_status(f" python test_api.py /path/to/your/document.pdf", "info") + + +if __name__ == "__main__": + main() + diff --git a/RAGChatbot/docker-compose.yml b/RAGChatbot/docker-compose.yml new file mode 100644 index 0000000000..e0f1382da5 --- /dev/null +++ b/RAGChatbot/docker-compose.yml @@ -0,0 +1,44 @@ +services: + # backend Gateway (Python) + backend: + build: + context: ./api + dockerfile: Dockerfile + container_name: backend + ports: + - "5001:5001" + env_file: + - ./api/.env + volumes: + - ./api:/app + networks: + - app_network + extra_hosts: + - "${LOCAL_URL_ENDPOINT}:host-gateway" + restart: unless-stopped + + + # Frontend (React) + frontend: + build: + context: ./ui + dockerfile: Dockerfile + container_name: frontend + ports: + - "3000:3000" + depends_on: + - backend + networks: + - app_network + restart: unless-stopped + +################################## +# 🔗 Shared Network +################################## +networks: + app_network: + driver: bridge + +volumes: + audio-files: + driver: local \ No newline at end of file diff --git a/RAGChatbot/images/RAG Model System Design.png b/RAGChatbot/images/RAG Model System Design.png new file mode 100644 index 0000000000..7838fe5870 Binary files /dev/null and b/RAGChatbot/images/RAG Model System Design.png differ diff --git a/RAGChatbot/images/ui.png b/RAGChatbot/images/ui.png new file mode 100644 index 0000000000..484703ba15 Binary files /dev/null and b/RAGChatbot/images/ui.png differ diff --git a/RAGChatbot/ui/.gitignore b/RAGChatbot/ui/.gitignore new file mode 100644 index 0000000000..d600b6c76d --- /dev/null +++ b/RAGChatbot/ui/.gitignore @@ -0,0 +1,25 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +dist +dist-ssr +*.local + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? + diff --git a/RAGChatbot/ui/Dockerfile b/RAGChatbot/ui/Dockerfile new file mode 100644 index 0000000000..7dab0c57a7 --- /dev/null +++ b/RAGChatbot/ui/Dockerfile @@ -0,0 +1,19 @@ +FROM node:18 + +# Set the working directory +WORKDIR /app + +# Copy package.json +COPY package.json ./ + +# Install dependencies +RUN npm install + +# Copy the rest of the application files +COPY . . + +# Expose the port the app runs on +EXPOSE 3000 + +# Command to run the application +CMD ["npm", "run", "dev", "--", "--host"] \ No newline at end of file diff --git a/RAGChatbot/ui/README.md b/RAGChatbot/ui/README.md new file mode 100644 index 0000000000..532316281f --- /dev/null +++ b/RAGChatbot/ui/README.md @@ -0,0 +1,189 @@ +# RAG Chatbot UI + +A clean and elegant React-based user interface for the RAG Chatbot application. + +## Features + +- PDF file upload with drag-and-drop support +- Real-time chat interface +- Modern, responsive design with Tailwind CSS +- Built with Vite for fast development +- Live status updates +- Mobile-friendly + +## Quick Start + +The UI runs automatically when using Docker Compose. See the main project README for setup instructions. + +The UI will be available at `http://localhost:3000` + +## Development + +This UI runs as part of the Docker Compose setup. For local development without Docker, you can use the scripts below, but Docker Compose is the recommended approach. + +### Available Scripts (Local Development Only) + +```bash +# Start development server with hot reload +npm run dev + +# Build for production +npm run build + +# Preview production build +npm run preview + +# Lint code +npm run lint +``` + +### Project Structure + +``` +ui/ +├── src/ +│ ├── components/ +│ │ ├── Header.jsx # App header +│ │ ├── StatusBar.jsx # Document status display +│ │ ├── PDFUploader.jsx # PDF upload component +│ │ └── ChatInterface.jsx # Chat UI +│ ├── services/ +│ │ └── api.js # API client +│ ├── App.jsx # Main app component +│ ├── main.jsx # Entry point +│ └── index.css # Global styles +├── public/ # Static assets +├── index.html # HTML template +├── vite.config.js # Vite configuration +├── tailwind.config.js # Tailwind CSS config +└── package.json # Dependencies +``` + +## Configuration + +When running with Docker Compose, the UI automatically connects to the backend. Configuration is handled through the docker-compose.yml file. + +## Usage + +1. **Start the application** using Docker Compose (from the `rag-chatbot` directory): + + ```bash + docker compose up --build + ``` + +2. **Upload a PDF**: + + - Drag and drop a PDF file, or + - Click "Browse Files" to select a file + - Wait for processing to complete + +4. **Start chatting**: + - Type your question in the input field + - Press Enter or click Send + - Get AI-powered answers based on your document + +## Features in Detail + +### PDF Upload + +- Drag-and-drop support +- File validation (PDF only, max 50MB) +- Upload progress indicator +- Success/error notifications + +### Chat Interface + +- Real-time messaging +- Message history +- Typing indicators +- Timestamp display +- Error handling + +### Status Bar + +- Document upload status +- Progress tracking +- Quick reset functionality + +## Building for Production + +```bash +# Build the production bundle +npm run build + +# The built files will be in the dist/ directory +# Serve with any static file server +``` + +### Deploy with Docker Compose + +The UI is automatically deployed when using Docker Compose from the root `rag-chatbot` directory. The Dockerfile in this directory is used by the docker-compose.yml configuration. + +## Customization + +### Styling + +The UI uses Tailwind CSS. Customize colors and theme in `tailwind.config.js`: + +```javascript +theme: { + extend: { + colors: { + primary: { + // Your custom colors + } + } + } +} +``` + +### Backend Integration + +The UI communicates with the backend through `src/services/api.js`. When running with Docker Compose, the backend is automatically available. + +## Troubleshooting + +### Build Issues + +**Problem**: Build fails with dependency errors + +**Solution**: + +```bash +# Clear node_modules and reinstall +rm -rf node_modules package-lock.json +npm install +``` + +### Styling Issues + +**Problem**: Styles not applying + +**Solution**: + +```bash +# Rebuild Tailwind CSS +npm run dev +``` + +## Browser Support + +- Chrome/Edge (latest) +- Firefox (latest) +- Safari (latest) +- Mobile browsers (iOS Safari, Chrome Mobile) + +## Performance + +- Optimized bundle size with Vite +- Code splitting for faster loads +- Lazy loading of components +- Efficient re-renders with React + +## License + +MIT + +--- + +**Built with**: React, Vite, Tailwind CSS, Axios, and Lucide Icons diff --git a/RAGChatbot/ui/index.html b/RAGChatbot/ui/index.html new file mode 100644 index 0000000000..c6a3e65988 --- /dev/null +++ b/RAGChatbot/ui/index.html @@ -0,0 +1,14 @@ + + + + + + + RAG Chatbot + + +
+ + + + diff --git a/RAGChatbot/ui/package.json b/RAGChatbot/ui/package.json new file mode 100644 index 0000000000..c4249ab4f3 --- /dev/null +++ b/RAGChatbot/ui/package.json @@ -0,0 +1,32 @@ +{ + "name": "rag-chatbot-ui", + "version": "1.0.0", + "private": true, + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview", + "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0" + }, + "dependencies": { + "react": "^18.2.0", + "react-dom": "^18.2.0", + "axios": "^1.6.0", + "lucide-react": "^0.294.0" + }, + "devDependencies": { + "@types/react": "^18.2.43", + "@types/react-dom": "^18.2.17", + "@vitejs/plugin-react": "^4.2.1", + "autoprefixer": "^10.4.16", + "eslint": "^8.55.0", + "eslint-plugin-react": "^7.33.2", + "eslint-plugin-react-hooks": "^4.6.0", + "eslint-plugin-react-refresh": "^0.4.5", + "postcss": "^8.4.32", + "tailwindcss": "^3.3.6", + "vite": "^5.0.8" + } +} + diff --git a/RAGChatbot/ui/postcss.config.js b/RAGChatbot/ui/postcss.config.js new file mode 100644 index 0000000000..b4a6220e2d --- /dev/null +++ b/RAGChatbot/ui/postcss.config.js @@ -0,0 +1,7 @@ +export default { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +} + diff --git a/RAGChatbot/ui/src/App.jsx b/RAGChatbot/ui/src/App.jsx new file mode 100644 index 0000000000..42ab09fd23 --- /dev/null +++ b/RAGChatbot/ui/src/App.jsx @@ -0,0 +1,77 @@ +import { useState } from 'react' +import ChatInterface from './components/ChatInterface' +import PDFUploader from './components/PDFUploader' +import Header from './components/Header' +import StatusBar from './components/StatusBar' + +function App() { + const [documentUploaded, setDocumentUploaded] = useState(false) + const [documentName, setDocumentName] = useState('') + const [uploadProgress, setUploadProgress] = useState(0) + const [isUploading, setIsUploading] = useState(false) + + const handleUploadSuccess = (fileName, numChunks) => { + setDocumentUploaded(true) + setDocumentName(fileName) + setUploadProgress(100) + setTimeout(() => { + setIsUploading(false) + setUploadProgress(0) + }, 1000) + } + + const handleUploadStart = () => { + setIsUploading(true) + setUploadProgress(0) + } + + const handleUploadProgress = (progress) => { + setUploadProgress(progress) + } + + const handleReset = () => { + setDocumentUploaded(false) + setDocumentName('') + setUploadProgress(0) + } + + return ( +
+
+ +
+ {/* Status Bar */} + + +
+ {/* Left Panel - PDF Upload */} +
+ +
+ + {/* Right Panel - Chat Interface */} +
+ +
+
+
+
+ ) +} + +export default App + diff --git a/RAGChatbot/ui/src/components/ChatInterface.jsx b/RAGChatbot/ui/src/components/ChatInterface.jsx new file mode 100644 index 0000000000..ee26d0a02a --- /dev/null +++ b/RAGChatbot/ui/src/components/ChatInterface.jsx @@ -0,0 +1,184 @@ +import { useState, useRef, useEffect } from 'react' +import { Send, Bot, User, AlertCircle } from 'lucide-react' +import { queryDocument } from '../services/api' + +export default function ChatInterface({ documentUploaded, documentName }) { + const [messages, setMessages] = useState([]) + const [input, setInput] = useState('') + const [isLoading, setIsLoading] = useState(false) + const messagesEndRef = useRef(null) + + const scrollToBottom = () => { + messagesEndRef.current?.scrollIntoView({ behavior: "smooth" }) + } + + useEffect(() => { + scrollToBottom() + }, [messages]) + + useEffect(() => { + // Reset messages when document changes + if (documentUploaded) { + setMessages([ + { + type: 'bot', + content: `Document "${documentName}" has been uploaded successfully! You can now ask me questions about it.`, + timestamp: new Date() + } + ]) + } else { + setMessages([]) + } + }, [documentUploaded, documentName]) + + const handleSubmit = async (e) => { + e.preventDefault() + + if (!input.trim() || !documentUploaded) return + + const userMessage = { + type: 'user', + content: input, + timestamp: new Date() + } + + setMessages(prev => [...prev, userMessage]) + setInput('') + setIsLoading(true) + + try { + const response = await queryDocument(input) + + const botMessage = { + type: 'bot', + content: response.answer, + timestamp: new Date() + } + + setMessages(prev => [...prev, botMessage]) + } catch (error) { + const errorMessage = { + type: 'error', + content: error.message || 'Failed to get response. Please try again.', + timestamp: new Date() + } + setMessages(prev => [...prev, errorMessage]) + } finally { + setIsLoading(false) + } + } + + return ( +
+ {/* Chat Header */} +
+

+ + Chat Assistant +

+

+ {documentUploaded + ? 'Ask questions about your document' + : 'Upload a document to start chatting'} +

+
+ + {/* Messages Container */} +
+ {!documentUploaded && messages.length === 0 && ( +
+
+ +

Upload a PDF document to start chatting

+
+
+ )} + + {messages.map((message, index) => ( +
+
+ {message.type === 'user' ? ( + + ) : message.type === 'error' ? ( + + ) : ( + + )} +
+ +
+
+

{message.content}

+
+

+ {message.timestamp.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })} +

+
+
+ ))} + + {isLoading && ( +
+
+ +
+
+
+
+
+
+
+
+
+ )} + +
+
+ + {/* Input Form */} +
+
+ setInput(e.target.value)} + placeholder={documentUploaded ? "Ask a question..." : "Upload a document first..."} + disabled={!documentUploaded || isLoading} + className="flex-1 px-4 py-2 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 disabled:bg-gray-100 disabled:cursor-not-allowed" + /> + +
+

+ Press Enter to send • The AI will answer based on your uploaded document +

+
+
+ ) +} + diff --git a/RAGChatbot/ui/src/components/Header.jsx b/RAGChatbot/ui/src/components/Header.jsx new file mode 100644 index 0000000000..0cfd2b11ce --- /dev/null +++ b/RAGChatbot/ui/src/components/Header.jsx @@ -0,0 +1,28 @@ +import { MessageSquare, FileText } from 'lucide-react' + +export default function Header() { + return ( +
+
+
+
+
+ +
+
+

+ RAG Chatbot +

+

Ask questions about your documents

+
+
+ +
+ +
+
+
+
+ ) +} + diff --git a/RAGChatbot/ui/src/components/PDFUploader.jsx b/RAGChatbot/ui/src/components/PDFUploader.jsx new file mode 100644 index 0000000000..e6a549f42e --- /dev/null +++ b/RAGChatbot/ui/src/components/PDFUploader.jsx @@ -0,0 +1,155 @@ +import { useState, useRef } from 'react' +import { Upload, FileText, CheckCircle, AlertCircle } from 'lucide-react' +import { uploadPDF } from '../services/api' + +export default function PDFUploader({ onUploadSuccess, onUploadStart, onUploadProgress, documentUploaded }) { + const [dragActive, setDragActive] = useState(false) + const [error, setError] = useState('') + const fileInputRef = useRef(null) + + const handleDrag = (e) => { + e.preventDefault() + e.stopPropagation() + if (e.type === "dragenter" || e.type === "dragover") { + setDragActive(true) + } else if (e.type === "dragleave") { + setDragActive(false) + } + } + + const handleDrop = async (e) => { + e.preventDefault() + e.stopPropagation() + setDragActive(false) + + if (e.dataTransfer.files && e.dataTransfer.files[0]) { + await handleFile(e.dataTransfer.files[0]) + } + } + + const handleChange = async (e) => { + e.preventDefault() + if (e.target.files && e.target.files[0]) { + await handleFile(e.target.files[0]) + } + } + + const handleFile = async (file) => { + setError('') + + // Validate file type + if (!file.name.endsWith('.pdf')) { + setError('Please upload a PDF file') + return + } + + // Validate file size (50MB) + if (file.size > 50 * 1024 * 1024) { + setError('File size must be less than 50MB') + return + } + + onUploadStart() + + try { + // Simulate progress + onUploadProgress(30) + + const result = await uploadPDF(file) + + onUploadProgress(90) + + onUploadSuccess(file.name, result.num_chunks) + setError('') + } catch (err) { + setError(err.message || 'Failed to upload file') + onUploadProgress(0) + } + } + + const handleButtonClick = () => { + fileInputRef.current?.click() + } + + return ( +
+
+

+ + Upload Document +

+

+ Upload a PDF to start asking questions +

+
+ +
+ + + {documentUploaded ? ( +
+ +

Document uploaded successfully!

+ +
+ ) : ( +
+ +
+

Drop your PDF here

+

or

+
+ +

PDF files only, max 50MB

+
+ )} +
+ + {error && ( +
+ +

{error}

+
+ )} + +
+

Instructions:

+
    +
  • Upload a PDF document (max 50MB)
  • +
  • Wait for processing to complete
  • +
  • Start asking questions in the chat
  • +
  • Get intelligent answers based on your document
  • +
+
+
+ ) +} + diff --git a/RAGChatbot/ui/src/components/StatusBar.jsx b/RAGChatbot/ui/src/components/StatusBar.jsx new file mode 100644 index 0000000000..2957d014c5 --- /dev/null +++ b/RAGChatbot/ui/src/components/StatusBar.jsx @@ -0,0 +1,54 @@ +import { CheckCircle, AlertCircle, Loader, Trash2 } from 'lucide-react' + +export default function StatusBar({ documentUploaded, documentName, isUploading, uploadProgress, onReset }) { + return ( +
+
+
+ {isUploading && ( + <> + +
+

Uploading and processing...

+
+
+
+
+ + )} + + {!isUploading && documentUploaded && ( + <> + +
+

Document Ready

+

{documentName}

+
+ + )} + + {!isUploading && !documentUploaded && ( + <> + +

No document uploaded

+ + )} +
+ + {documentUploaded && !isUploading && ( + + )} +
+
+ ) +} + diff --git a/RAGChatbot/ui/src/index.css b/RAGChatbot/ui/src/index.css new file mode 100644 index 0000000000..a523f51e54 --- /dev/null +++ b/RAGChatbot/ui/src/index.css @@ -0,0 +1,34 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; + +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', + 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', + sans-serif; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} + +code { + font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', + monospace; +} + +@layer utilities { + .scrollbar-hide::-webkit-scrollbar { + display: none; + } + + .scrollbar-hide { + -ms-overflow-style: none; + scrollbar-width: none; + } +} + diff --git a/RAGChatbot/ui/src/main.jsx b/RAGChatbot/ui/src/main.jsx new file mode 100644 index 0000000000..299bc52310 --- /dev/null +++ b/RAGChatbot/ui/src/main.jsx @@ -0,0 +1,11 @@ +import React from 'react' +import ReactDOM from 'react-dom/client' +import App from './App.jsx' +import './index.css' + +ReactDOM.createRoot(document.getElementById('root')).render( + + + , +) + diff --git a/RAGChatbot/ui/src/services/api.js b/RAGChatbot/ui/src/services/api.js new file mode 100644 index 0000000000..87ffb0667d --- /dev/null +++ b/RAGChatbot/ui/src/services/api.js @@ -0,0 +1,85 @@ +import axios from 'axios' + +// API base URL - uses Vite proxy in development (proxies to localhost:5000) +const API_BASE_URL = import.meta.env.VITE_API_URL || '/api' + +const api = axios.create({ + baseURL: API_BASE_URL, + headers: { + 'Content-Type': 'application/json', + }, +}) + +/** + * Upload a PDF file to the API + * @param {File} file - The PDF file to upload + * @returns {Promise} Response with upload status and chunk count + */ +export const uploadPDF = async (file) => { + const formData = new FormData() + formData.append('file', file) + + try { + const response = await api.post('/upload-pdf', formData, { + headers: { + 'Content-Type': 'multipart/form-data', + }, + }) + return response.data + } catch (error) { + console.error('Upload error:', error) + throw new Error( + error.response?.data?.detail || 'Failed to upload PDF. Please try again.' + ) + } +} + +/** + * Query the uploaded document + * @param {string} query - The question to ask + * @returns {Promise} Response with the answer + */ +export const queryDocument = async (query) => { + try { + const response = await api.post('/query', { query }) + return response.data + } catch (error) { + console.error('Query error:', error) + throw new Error( + error.response?.data?.detail || 'Failed to get response. Please try again.' + ) + } +} + +/** + * Check API health + * @returns {Promise} Health status + */ +export const checkHealth = async () => { + try { + const response = await api.get('/health') + return response.data + } catch (error) { + console.error('Health check error:', error) + throw new Error('API is not available') + } +} + +/** + * Delete the vector store + * @returns {Promise} Deletion status + */ +export const deleteVectorStore = async () => { + try { + const response = await api.delete('/vectorstore') + return response.data + } catch (error) { + console.error('Delete error:', error) + throw new Error( + error.response?.data?.detail || 'Failed to delete vector store' + ) + } +} + +export default api + diff --git a/RAGChatbot/ui/tailwind.config.js b/RAGChatbot/ui/tailwind.config.js new file mode 100644 index 0000000000..037cbd7203 --- /dev/null +++ b/RAGChatbot/ui/tailwind.config.js @@ -0,0 +1,27 @@ +/** @type {import('tailwindcss').Config} */ +export default { + content: [ + "./index.html", + "./src/**/*.{js,ts,jsx,tsx}", + ], + theme: { + extend: { + colors: { + primary: { + 50: '#f0f9ff', + 100: '#e0f2fe', + 200: '#bae6fd', + 300: '#7dd3fc', + 400: '#38bdf8', + 500: '#0ea5e9', + 600: '#0284c7', + 700: '#0369a1', + 800: '#075985', + 900: '#0c4a6e', + } + } + }, + }, + plugins: [], +} + diff --git a/RAGChatbot/ui/vite.config.js b/RAGChatbot/ui/vite.config.js new file mode 100644 index 0000000000..a20f408af9 --- /dev/null +++ b/RAGChatbot/ui/vite.config.js @@ -0,0 +1,18 @@ +import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' + +export default defineConfig({ + plugins: [react()], + server: { + host: true, + port: 3000, + proxy: { + '/api': { + target: 'http://backend:5001', + changeOrigin: true, + rewrite: (path) => path.replace(/^\/api/, '') + } + } + } +}) +