diff --git a/RAGChatbot/.gitignore b/RAGChatbot/.gitignore
new file mode 100644
index 0000000000..31703cdcff
--- /dev/null
+++ b/RAGChatbot/.gitignore
@@ -0,0 +1,42 @@
+# Environment files
+**/.env
+
+# Test files
+**/test.txt
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+*.egg-info/
+dist/
+build/
+
+# Virtual environments
+venv/
+env/
+ENV/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Application specific
+dmv_index/
+*.log
+
+# Node.js
+node_modules/
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+package-lock.json
\ No newline at end of file
diff --git a/RAGChatbot/README.md b/RAGChatbot/README.md
new file mode 100644
index 0000000000..5d264b6d80
--- /dev/null
+++ b/RAGChatbot/README.md
@@ -0,0 +1,283 @@
+## RAG Chatbot
+
+A full-stack Retrieval-Augmented Generation (RAG) application that enables intelligent, document-based question answering.
+The system integrates a FastAPI backend powered by LangChain, FAISS, and AI models, alongside a modern React + Vite + Tailwind CSS frontend for an intuitive chat experience.
+
+## Table of Contents
+
+- [Project Overview](#project-overview)
+- [Features](#features)
+- [Architecture](#architecture)
+- [Prerequisites](#prerequisites)
+- [Quick Start Deployment](#quick-start-deployment)
+- [User Interface](#user-interface)
+- [Troubleshooting](#troubleshooting)
+- [Additional Info](#additional-info)
+
+---
+
+## Project Overview
+
+The **RAG Chatbot** demonstrates how retrieval-augmented generation can be used to build intelligent, document-grounded conversational systems. It retrieves relevant information from a knowledge base, passes it to a large language model, and generates a concise and reliable answer to the user’s query. This project integrates seamlessly with cloud-hosted APIs or local model endpoints, offering flexibility for research, enterprise, or educational use.
+
+---
+
+## Features
+
+**Backend**
+
+- Clean PDF upload with validation
+- LangChain-powered document processing
+- FAISS-CPU vector store for efficient similarity search
+- Enterprise inference endpoints for embeddings and LLM
+- Token-based authentication for inference API
+- Comprehensive error handling and logging
+- File validation and size limits
+- CORS enabled for web integration
+- Health check endpoints
+- Modular architecture (routes + services)
+
+**Frontend**
+
+- PDF file upload with drag-and-drop support
+- Real-time chat interface
+- Modern, responsive design with Tailwind CSS
+- Built with Vite for fast development
+- Live status updates
+- Mobile-friendly
+
+---
+
+## Architecture
+
+Below is the architecture as it consists of a server that waits for documents to embed and index into a vector database. Once documents have been uploaded, the server will wait for user queries which initiates a similarity search in the vector database before calling the LLM service to summarize the findings.
+
+![Architecture Diagram](./images/RAG%20Model%20System%20Design.png)
+
+**Service Components:**
+
+1. **React Web UI (Port 3000)** - Provides intuitive chat interface with drag-and-drop PDF upload, real-time messaging, and document-grounded Q&A interaction
+
+2. **FastAPI Backend (Port 5001)** - Handles document processing, FAISS vector storage, LangChain integration, and orchestrates retrieval-augmented generation for accurate responses
+
+**Typical Flow:**
+
+1. User uploads a document through the web UI.
+2. The backend processes the document by splitting it and transforming it into embeddings before storing it in the vector database.
+3. User sends a question through the web UI.
+4. The backend retrieves relevant content from stored documents.
+5. The model generates a response based on retrieved context.
+6. The answer is displayed to the user via the UI.
+
+---
+
+## Prerequisites
+
+### System Requirements
+
+Before you begin, ensure you have the following installed:
+
+- **Docker and Docker Compose**
+- **Enterprise inference endpoint access** (token-based authentication)
+
+### Required API Configuration
+
+**For Inference Service (RAG Chatbot):**
+
+This application supports multiple inference deployment patterns:
+
+- **GenAI Gateway**: Provide your GenAI Gateway URL and API key
+- **APISIX Gateway**: Provide your APISIX Gateway URL and authentication token
+
+Configuration requirements:
+- INFERENCE_API_ENDPOINT: URL to your inference service (GenAI Gateway, APISIX Gateway, etc.)
+- INFERENCE_API_TOKEN: Authentication token/API key for your chosen service
+
+### Local Development Configuration
+
+**For Local Testing Only (Optional)**
+
+If you're testing with a local inference endpoint using a custom domain (e.g., `inference.example.com` mapped to localhost in your hosts file):
+
+1. Edit `api/.env` and set:
+   ```bash
+   LOCAL_URL_ENDPOINT=inference.example.com
+   ```
+   (Use the domain name from your INFERENCE_API_ENDPOINT without `https://`)
+
+2. This allows Docker containers to resolve your local domain correctly.
+
+**Note:** For public domains or cloud-hosted endpoints, leave the default value `not-needed`.
+
+### Verify Docker Installation
+
+```bash
+# Check Docker version
+docker --version
+
+# Check Docker Compose version
+docker compose version
+
+# Verify Docker is running
+docker ps
+```
+---
+
+## Quick Start Deployment
+
+### Clone the Repository
+
+```bash
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/RAGChatbot
+```
+
+### Set up the Environment
+
+This application requires **two `.env` files** for proper configuration:
+
+1. **Root `.env` file** (for Docker Compose variables)
+2. **`api/.env` file** (for backend application configuration)
+
+#### Step 1: Create Root `.env` File
+
+```bash
+# From the RAGChatbot directory
+cat > .env << EOF
+# Docker Compose Configuration
+LOCAL_URL_ENDPOINT=not-needed
+EOF
+```
+
+**Note:** If using a local domain (e.g., `inference.example.com` mapped to localhost), replace `not-needed` with your domain name (without `https://`).
+
+#### Step 2: Create `api/.env` File
+
+You can either copy from the example file:
+
+```bash
+cp api/.env.example api/.env
+```
+
+Then edit `api/.env` with your actual credentials, **OR** create it directly:
+
+```bash
+mkdir -p api
+cat > api/.env << EOF
+# Inference API Configuration
+# INFERENCE_API_ENDPOINT: URL to your inference service (without /v1 suffix)
+#   - For GenAI Gateway: https://genai-gateway.example.com
+#   - For APISIX Gateway: https://apisix-gateway.example.com/inference
+INFERENCE_API_ENDPOINT=https://your-actual-api-endpoint.com
+INFERENCE_API_TOKEN=your-actual-token-here
+
+# Model Configuration
+# IMPORTANT: Use the full model names as they appear in your inference service
+# Check available models: curl https://your-api-endpoint.com/v1/models -H "Authorization: Bearer your-token"
+EMBEDDING_MODEL_NAME=bge-base-en-v1.5
+INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
+
+# Local URL Endpoint (for Docker)
+LOCAL_URL_ENDPOINT=not-needed
+EOF
+```
+
+**Important Configuration Notes:**
+
+- **INFERENCE_API_ENDPOINT**: Your actual inference service URL (replace `https://your-actual-api-endpoint.com`)
+- **INFERENCE_API_TOKEN**: Your actual pre-generated authentication token
+- **EMBEDDING_MODEL_NAME** and **INFERENCE_MODEL_NAME**: Use the exact model names from your inference service
+  - To check available models: `curl https://your-api-endpoint.com/v1/models -H "Authorization: Bearer your-token"`
+- **LOCAL_URL_ENDPOINT**: Only needed if using local domain mapping (see [Local Development Configuration](#local-development-configuration))
+
+**Note**: The docker-compose.yml file automatically loads environment variables from both `.env` (root) and `./api/.env` (backend) files.
+
+### Running the Application
+
+Start both API and UI services together with Docker Compose:
+
+```bash
+# From the RAGChatbot directory
+docker compose up --build
+
+# Or run in detached mode (background)
+docker compose up -d --build
+```
+
+The API will be available at: `http://localhost:5001`  
+The UI will be available at: `http://localhost:3000`
+
+**View logs**:
+
+```bash
+# All services
+docker compose logs -f
+
+# Backend only
+docker compose logs -f backend
+
+# Frontend only
+docker compose logs -f frontend
+```
+
+**Verify the services are running**:
+
+```bash
+# Check API health
+curl http://localhost:5001/health
+
+# Check if containers are running
+docker compose ps
+```
+
+## User Interface
+
+**Using the Application**
+
+Make sure you are at the `http://localhost:3000` URL
+
+You will be directed to the main page which has each feature
+
+![User Interface](images/ui.png)
+
+Upload a PDF:
+
+- Drag and drop a PDF file, or
+- Click "Browse Files" to select a file
+- Wait for processing to complete
+
+Start chatting:
+
+- Type your question in the input field
+- Press Enter or click Send
+- Get AI-powered answers based on your document
+
+**UI Configuration**
+
+When running with Docker Compose, the UI automatically connects to the backend API. The frontend is available at `http://localhost:3000` and the API at `http://localhost:5001`.
+
+For production deployments, you may want to configure a reverse proxy or update the API URL in the frontend configuration.
+
+### Stopping the Application
+
+```bash
+docker compose down
+```
+
+## Troubleshooting
+
+For comprehensive troubleshooting guidance, common issues, and solutions, refer to:
+
+[Troubleshooting Guide - TROUBLESHOOTING.md](./TROUBLESHOOTING.md)
+
+---
+
+## Additional Info
+
+The following models have been validated with RAGChatbot:
+
+| Model | Hardware |
+|-------|----------|
+| **meta-llama/Llama-3.1-8B-Instruct** | Gaudi |
+| **BAAI/bge-base-en-v1.5** (embeddings) | Gaudi |
+| **Qwen/Qwen3-4B-Instruct** | Xeon |
diff --git a/RAGChatbot/TROUBLESHOOTING.md b/RAGChatbot/TROUBLESHOOTING.md
new file mode 100644
index 0000000000..a4d2142b0c
--- /dev/null
+++ b/RAGChatbot/TROUBLESHOOTING.md
@@ -0,0 +1,191 @@
+# Troubleshooting Guide
+
+This document contains all common issues encountered during development and their solutions.
+
+## Table of Contents
+
+- [Docker Compose Issues](#docker-compose-issues)
+- [API Common Issues](#api-common-issues)
+- [UI Common Issues](#ui-common-issues)
+
+## Docker Compose Issues
+
+### Error: "LOCAL_URL_ENDPOINT variable is not set"
+
+**Problem**:
+```
+level=warning msg="The \"LOCAL_URL_ENDPOINT\" variable is not set. Defaulting to a blank string."
+decoding failed due to the following error(s):
+'services[backend].extra_hosts' bad host name ''
+```
+
+**Solution**:
+
+1. Create a `.env` file in the **root** `rag-chatbot` directory (not in `api/`):
+   ```bash
+   echo "LOCAL_URL_ENDPOINT=not-needed" > .env
+   ```
+2. If using a local domain (e.g., `inference.example.com`), replace `not-needed` with your domain name (without `https://`)
+3. Restart Docker Compose: `docker compose down && docker compose up`
+
+### Error: "404 Not Found" when uploading PDF
+
+**Problem**:
+```
+HTTP Request: POST https://api.example.com/BAAI/bge-base-en-v1.5/v1/embeddings "HTTP/1.1 404 Not Found"
+openai.NotFoundError: Error code: 404 - {'detail': 'Not Found'}
+```
+
+**Solution**:
+
+1. Verify your `api/.env` file has the **correct** API endpoint (not the placeholder):
+   ```bash
+   INFERENCE_API_ENDPOINT=https://your-actual-api-endpoint.com
+   INFERENCE_API_TOKEN=your-actual-token-here
+   ```
+
+2. Check available models on your inference service:
+   ```bash
+   curl https://your-api-endpoint.com/v1/models \
+     -H "Authorization: Bearer your-token"
+   ```
+
+3. Update model names to match the exact names from your API:
+   ```bash
+   EMBEDDING_MODEL_NAME=BAAI/bge-base-en-v1.5
+   INFERENCE_MODEL_NAME=Qwen/Qwen3-4B-Instruct-2507
+   ```
+
+4. Restart containers: `docker compose down && docker compose up --build`
+
+### Containers fail to start
+
+**Problem**: Docker containers won't start or crash immediately
+
+**Solution**:
+
+1. Check logs for specific errors:
+   ```bash
+   docker compose logs backend
+   docker compose logs frontend
+   ```
+
+2. Ensure ports 5001 and 3000 are available:
+   ```bash
+   # Windows
+   netstat -ano | findstr :5001
+   netstat -ano | findstr :3000
+
+   # Unix/Mac
+   lsof -i :5001
+   lsof -i :3000
+   ```
+
+3. Clean up and rebuild:
+   ```bash
+   docker compose down -v
+   docker compose up --build
+   ```
+
+4. Restart Docker Desktop if issues persist
+
+## API Common Issues
+
+#### "INFERENCE_API_ENDPOINT and INFERENCE_API_TOKEN must be set"
+
+**Solution**:
+
+1. Create a `.env` file in the `api` directory
+2. Add your inference configuration:
+   ```bash
+   INFERENCE_API_ENDPOINT=https://your-actual-api-endpoint.com
+   INFERENCE_API_TOKEN=your-actual-token-here
+   EMBEDDING_MODEL_NAME=BAAI/bge-base-en-v1.5
+   INFERENCE_MODEL_NAME=Qwen/Qwen3-4B-Instruct-2507
+   ```
+3. Restart the server
+
+#### "No documents uploaded"
+
+**Solution**:
+
+- Upload a PDF first using the `/upload-pdf` endpoint
+- Check server logs for any upload errors
+- Verify the PDF is not corrupted or empty
+
+#### "Could not load vector store"
+
+**Solution**:
+
+- The vector store is created when you upload your first PDF
+- Check that the application has write permissions in the directory
+- Verify `dmv_index/` directory exists and is accessible
+
+#### Import errors
+
+**Solution**:
+
+1. Ensure all dependencies are installed: `pip install -r requirements.txt`
+2. Verify you're using Python 3.10 or higher: `python --version`
+3. Activate your virtual environment if using one
+
+#### Server won't start
+
+**Solution**:
+
+1. Check if port 5000 is already in use: `lsof -i :5000` (Unix) or `netstat -ano | findstr :5000` (Windows)
+2. Use a different port: `uvicorn server:app --port 5001`
+3. Check the logs for specific error messages
+
+#### PDF upload fails
+
+**Solution**:
+
+1. Verify the file is a valid PDF
+2. Check file size (must be under 50MB by default)
+3. Ensure the PDF contains extractable text (not just images)
+4. Check server logs for detailed error messages
+
+#### Query returns no answer
+
+**Solution**:
+
+1. Verify a document has been uploaded successfully
+2. Try rephrasing your question
+3. Check if the document contains relevant information
+4. Increase `TOP_K_DOCUMENTS` in `config.py` for broader search
+
+## UI Common Issues
+
+### API Connection Issues
+
+**Problem**: "Failed to upload PDF" or "Failed to get response"
+
+**Solution**:
+
+1. Ensure the API server is running on `http://localhost:5000`
+2. Check browser console for detailed errors
+3. Verify CORS is enabled in the API
+
+### Build Issues
+
+**Problem**: Build fails with dependency errors
+
+**Solution**:
+
+```bash
+# Clear node_modules and reinstall
+rm -rf node_modules package-lock.json
+npm install
+```
+
+### Styling Issues
+
+**Problem**: Styles not applying
+
+**Solution**:
+
+```bash
+# Rebuild Tailwind CSS
+npm run dev
+```
diff --git a/RAGChatbot/api/.env.example b/RAGChatbot/api/.env.example
new file mode 100644
index 0000000000..b632f68128
--- /dev/null
+++ b/RAGChatbot/api/.env.example
@@ -0,0 +1,22 @@
+# Inference API Configuration
+# INFERENCE_API_ENDPOINT: URL to your inference service (without /v1 suffix)
+#   - For GenAI Gateway: https://genai-gateway.example.com
+#   - For APISIX Gateway: https://apisix-gateway.example.com/inference
+#
+# INFERENCE_API_TOKEN: Authentication token/API key for the inference service
+#   - For GenAI Gateway: Your GenAI Gateway API key
+#   - For APISIX Gateway: Your APISIX authentication token
+INFERENCE_API_ENDPOINT=https://api.example.com
+INFERENCE_API_TOKEN=your-pre-generated-token-here
+
+# Model Configuration
+# IMPORTANT: Use the full model names as they appear in your inference service
+# Check available models: curl https://your-api-endpoint.com/v1/models -H "Authorization: Bearer your-token"
+EMBEDDING_MODEL_NAME=BAAI/bge-base-en-v1.5
+INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
+
+# Local URL Endpoint (only needed for non-public domains)
+# If using a local domain like inference.example.com mapped to localhost:
+#   Set this to: inference.example.com (domain without https://)
+# If using a public domain, set any placeholder value like: not-needed
+LOCAL_URL_ENDPOINT=not-needed
diff --git a/RAGChatbot/api/Dockerfile b/RAGChatbot/api/Dockerfile
new file mode 100644
index 0000000000..4424ff4c43
--- /dev/null
+++ b/RAGChatbot/api/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.9-slim
+
+# Set the working directory in the container
+WORKDIR /app
+
+COPY requirements.txt .
+
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the rest of the application files into the container
+COPY server.py .
+
+# Expose the port the service runs on
+EXPOSE 5001
+
+# Command to run the application
+CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5001", "--reload"]
\ No newline at end of file
diff --git a/RAGChatbot/api/README.md b/RAGChatbot/api/README.md
new file mode 100644
index 0000000000..7434f91397
--- /dev/null
+++ b/RAGChatbot/api/README.md
@@ -0,0 +1,693 @@
+# RAG Chatbot API
+
+A production-ready RAG (Retrieval-Augmented Generation) chatbot API built with FastAPI, LangChain, and FAISS for document-based question answering.
+
+## Table of Contents
+
+- [Features](#features)
+- [Quick Start](#quick-start)
+- [Installation](#installation)
+- [Configuration](#configuration)
+- [Running the Server](#running-the-server)
+- [API Endpoints](#api-endpoints)
+- [Project Structure](#project-structure)
+- [Testing](#testing)
+- [Development](#development)
+- [Troubleshooting](#troubleshooting)
+
+## Features
+
+- Clean PDF upload with validation
+- LangChain-powered document processing
+- FAISS-CPU vector store for efficient similarity search
+- Enterprise inference endpoints for embeddings and LLM
+- Keycloak authentication for secure API access
+- Comprehensive error handling and logging
+- File validation and size limits
+- CORS enabled for web integration
+- Health check endpoints
+- Modular architecture (routes + services)
+
+## Quick Start
+
+Get up and running in 3 minutes using Docker Compose:
+
+```bash
+# 1. Navigate to the rag-chatbot directory
+cd /path/to/rag-chatbot
+
+# 2. Create .env file in the api directory with enterprise configuration
+mkdir -p api
+cat > api/.env << EOF
+BASE_URL=https://api.example.com
+KEYCLOAK_REALM=master
+KEYCLOAK_CLIENT_ID=api
+KEYCLOAK_CLIENT_SECRET=your_client_secret
+EMBEDDING_MODEL_ENDPOINT=bge-base-en-v1.5
+INFERENCE_MODEL_ENDPOINT=Llama-3.1-8B-Instruct
+EMBEDDING_MODEL_NAME=bge-base-en-v1.5
+INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
+EOF
+
+# 3. Start both API and UI services with Docker Compose
+docker compose up --build
+
+# 4. Access the application
+# API: http://localhost:5001/docs
+# UI: http://localhost:3000
+```
+
+The application will automatically start both the backend API and frontend UI. Visit http://localhost:5001/docs for interactive API documentation.
+
+## Installation
+
+### Prerequisites
+
+- Docker and Docker Compose installed
+- Enterprise inference endpoint access (Keycloak authentication)
+
+### Docker Compose Setup
+
+Docker Compose will start both the API and UI services together.
+
+1. **Set up environment variables**:
+
+Create a `.env` file in the `api` directory (relative to `rag-chatbot/`):
+
+```bash
+cd rag-chatbot
+mkdir -p api
+cat > api/.env << EOF
+# Backend API URL (accessible from frontend)
+VITE_API_URL=https://backend:5000
+
+# Required - Enterprise/Keycloak Configuration
+BASE_URL=https://api.example.com
+KEYCLOAK_REALM=master
+KEYCLOAK_CLIENT_ID=api
+KEYCLOAK_CLIENT_SECRET=your_client_secret
+
+# Required - Model Configuration
+EMBEDDING_MODEL_ENDPOINT=bge-base-en-v1.5
+INFERENCE_MODEL_ENDPOINT=Llama-3.1-8B-Instruct
+EMBEDDING_MODEL_NAME=bge-base-en-v1.5
+INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
+EOF
+```
+
+2. **Start the services**:
+
+```bash
+# From the rag-chatbot directory
+docker compose up --build
+```
+
+This will:
+- Build the backend API container
+- Build the frontend UI container
+- Start both services automatically
+- Make API available at http://localhost:5001
+- Make UI available at http://localhost:3000
+
+### Dependencies
+
+The main dependencies include:
+
+- `fastapi==0.109.0` - Web framework
+- `uvicorn[standard]==0.27.0` - ASGI server
+- `langchain==0.1.0` - LLM framework
+- `faiss-cpu==1.7.4` - Vector similarity search
+- `pypdf==4.0.1` - PDF processing
+
+See `requirements.txt` for complete list.
+
+## Configuration
+
+All configuration is centralized in `config.py`. You can modify settings by editing this file or using environment variables.
+
+### Environment Variables
+
+For Docker Compose, create a `.env` file in the `api/` directory (relative to `rag-chatbot/`):
+
+```bash
+# Backend API URL (accessible from frontend)
+VITE_API_URL=https://backend:5000
+
+# Required - Enterprise/Keycloak Configuration
+BASE_URL=https://api.example.com
+KEYCLOAK_REALM=master
+KEYCLOAK_CLIENT_ID=api
+KEYCLOAK_CLIENT_SECRET=your_client_secret
+
+# Required - Model Configuration
+EMBEDDING_MODEL_ENDPOINT=bge-base-en-v1.5
+INFERENCE_MODEL_ENDPOINT=Llama-3.1-8B-Instruct
+EMBEDDING_MODEL_NAME=bge-base-en-v1.5
+INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
+
+# Optional (with defaults shown)
+# VECTOR_STORE_PATH=./dmv_index
+# MAX_FILE_SIZE_MB=50
+```
+
+**Note**: The docker-compose.yml file automatically loads environment variables from `./api/.env` for the backend service.
+
+### Configuration Settings
+
+Edit `config.py` to customize:
+
+#### File Upload Settings
+
+```python
+MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB
+ALLOWED_EXTENSIONS = {".pdf"}
+```
+
+#### Text Processing Settings
+
+```python
+CHUNK_SIZE = 1000           # Characters per chunk
+CHUNK_OVERLAP = 200         # Overlap between chunks
+SEPARATORS = ["\n\n", "\n", " ", ""]  # Text splitting separators
+```
+
+#### Vector Store Settings
+
+```python
+VECTOR_STORE_PATH = "./dmv_index"  # Where to store FAISS index
+```
+
+#### LLM Settings
+
+```python
+LLM_TEMPERATURE = 0                      # Response randomness (0-1)
+TOP_K_DOCUMENTS = 4                      # Documents to retrieve
+# Model endpoints and names are configured via environment variables:
+# EMBEDDING_MODEL_ENDPOINT, INFERENCE_MODEL_ENDPOINT
+# EMBEDDING_MODEL_NAME, INFERENCE_MODEL_NAME
+```
+
+#### CORS Settings
+
+```python
+CORS_ALLOW_ORIGINS = ["*"]  # Update with specific origins in production
+CORS_ALLOW_CREDENTIALS = True
+CORS_ALLOW_METHODS = ["*"]
+CORS_ALLOW_HEADERS = ["*"]
+```
+
+## Running the Server
+
+**Start both API and UI together**:
+
+```bash
+# From the rag-chatbot directory
+docker compose up --build
+
+# Or run in detached mode (background)
+docker compose up -d --build
+```
+
+**Stop the services**:
+
+```bash
+docker compose down
+```
+
+The API will be available at: `http://localhost:5001`  
+The UI will be available at: `http://localhost:3000`
+
+**View logs**:
+
+```bash
+# All services
+docker compose logs -f
+
+# Backend only
+docker compose logs -f backend
+
+# Frontend only
+docker compose logs -f frontend
+```
+
+### Verifying the Server
+
+```bash
+# Check if API server is running
+curl http://localhost:5001/
+
+# Check health status
+curl http://localhost:5001/health
+
+# Check if containers are running
+docker compose ps
+```
+
+## API Endpoints
+
+### Health Check
+
+**GET /** - Basic health check
+
+```bash
+curl http://localhost:5001/
+```
+
+Response:
+
+```json
+{
+  "message": "RAG Chatbot API is running",
+  "version": "2.0.0",
+  "status": "healthy",
+  "vectorstore_loaded": true
+}
+```
+
+**GET /health** - Detailed health status
+
+```bash
+curl http://localhost:5001/health
+```
+
+Response:
+
+```json
+{
+  "status": "healthy",
+  "vectorstore_available": true,
+  "enterprise_inference_configured": true
+}
+```
+
+### Upload PDF
+
+**POST /upload-pdf** - Upload and process a PDF document
+
+```bash
+curl -X POST "http://localhost:5001/upload-pdf" \
+  -H "Content-Type: multipart/form-data" \
+  -F "file=@/path/to/document.pdf"
+```
+
+Response:
+
+```json
+{
+  "message": "Successfully uploaded and processed 'document.pdf'",
+  "num_chunks": 45,
+  "status": "success"
+}
+```
+
+**Validation Rules**:
+
+- File must be PDF format
+- Maximum size: 50MB (configurable)
+- File must not be empty
+- Content must be extractable
+
+### Query Documents
+
+**POST /query** - Ask questions about uploaded documents
+
+```bash
+curl -X POST "http://localhost:5001/query" \
+  -H "Content-Type: application/json" \
+  -d '{"query": "What are the main topics in the document?"}'
+```
+
+Response:
+
+```json
+{
+  "answer": "The main topics covered in the document are...",
+  "query": "What are the main topics in the document?"
+}
+```
+
+### Delete Vector Store
+
+**DELETE /vectorstore** - Delete the current vector store
+
+```bash
+curl -X DELETE "http://localhost:5001/vectorstore"
+```
+
+Response:
+
+```json
+{
+  "message": "Vector store deleted successfully",
+  "status": "success"
+}
+```
+
+### Interactive API Documentation
+
+FastAPI provides automatic interactive documentation:
+
+- **Swagger UI**: http://localhost:5001/docs
+- **ReDoc**: http://localhost:5001/redoc
+
+## Project Structure
+
+The application follows a modular architecture with clear separation of concerns:
+
+```
+api/
+├── server.py               # FastAPI app with routes (main entry point)
+├── config.py               # Configuration settings
+├── models.py               # Pydantic models for request/response validation
+├── services/               # Business logic layer
+│   ├── __init__.py
+│   ├── pdf_service.py      # PDF processing and validation
+│   ├── vector_service.py   # Vector store operations (FAISS)
+│   └── retrieval_service.py # Query processing and LLM integration
+├── requirements.txt        # Python dependencies
+├── test_api.py            # Automated test suite
+├── .env                   # Environment variables (create this)
+└── dmv_index/             # FAISS vector store (auto-generated)
+```
+
+### Architecture Overview
+
+```
+Client Request
+     ↓
+server.py (Routes)
+     ↓
+models.py (Validation)
+     ↓
+services/ (Business Logic)
+     ├── pdf_service.py
+     ├── vector_service.py
+     └── retrieval_service.py
+     ↓
+External Services (Enterprise Inference Endpoints, FAISS)
+```
+
+**Layered Architecture**:
+
+- **Routes Layer** (`server.py`): HTTP handling, routing, error responses
+- **Validation Layer** (`models.py`): Request/response validation
+- **Business Logic Layer** (`services/`): Core functionality
+- **Configuration Layer** (`config.py`): Settings management
+
+## Testing
+
+### Automated Test Suite
+
+Run the included test suite:
+
+```bash
+# Basic tests (no PDF required)
+python test_api.py
+
+# Full tests with PDF upload
+python test_api.py /path/to/your/document.pdf
+```
+
+The test suite includes:
+
+- Health check tests
+- Upload validation tests
+- Query functionality tests
+- Error handling tests
+- Colored output for easy reading
+
+### Manual Testing
+
+1. **Start the services**:
+
+```bash
+docker compose up
+```
+
+2. **Upload a PDF**:
+
+```bash
+curl -X POST "http://localhost:5001/upload-pdf" \
+  -F "file=@sample.pdf"
+```
+
+3. **Query the document**:
+
+```bash
+curl -X POST "http://localhost:5001/query" \
+  -H "Content-Type: application/json" \
+  -d '{"query": "What is this document about?"}'
+```
+
+4. **Check health**:
+
+```bash
+curl http://localhost:5001/health
+```
+
+## Development
+
+### Project Setup for Development
+
+1. Fork/clone the repository
+2. Set up your `.env` file in the `api` directory
+3. Run with Docker Compose for development: `docker compose up --build`
+4. Make changes to code (changes are reflected with volume mounts in docker-compose.yml)
+
+### Adding New Features
+
+#### Add a New Service
+
+1. Create new file in `services/` directory:
+
+```python
+# services/new_service.py
+def new_function(param):
+    """Your business logic"""
+    return result
+```
+
+2. Export from `services/__init__.py`:
+
+```python
+from .new_service import new_function
+```
+
+3. Use in routes:
+
+```python
+# server.py
+from services import new_function
+
+@app.post("/new-endpoint")
+def new_endpoint():
+    result = new_function(data)
+    return result
+```
+
+#### Add a New Endpoint
+
+1. Define model in `models.py`:
+
+```python
+class NewRequest(BaseModel):
+    field: str
+```
+
+2. Add route in `server.py`:
+
+```python
+@app.post("/new-endpoint")
+def new_endpoint(request: NewRequest):
+    # Your logic here
+    return {"result": "success"}
+```
+
+### Modifying Configuration
+
+Edit `config.py` to change default settings:
+
+```python
+# Example: Increase file size limit
+MAX_FILE_SIZE = 100 * 1024 * 1024  # 100MB
+
+# Example: Change chunk size
+CHUNK_SIZE = 1500
+
+# Example: Use different model
+LLM_MODEL = "gpt-4"
+```
+
+### Code Style
+
+- Use type hints for all functions
+- Add docstrings to all public functions
+- Follow PEP 8 style guide
+- Keep functions focused (single responsibility)
+- Log important operations
+
+## Troubleshooting
+
+### Common Issues
+
+#### "Keycloak authentication or model endpoints not configured"
+
+**Solution**:
+
+1. Create a `.env` file in the `api` directory (relative to `rag-chatbot/`)
+2. Add required configuration:
+   ```bash
+   BASE_URL=https://api.example.com
+   KEYCLOAK_REALM=master
+   KEYCLOAK_CLIENT_ID=api
+   KEYCLOAK_CLIENT_SECRET=your_client_secret
+   EMBEDDING_MODEL_ENDPOINT=bge-base-en-v1.5
+   INFERENCE_MODEL_ENDPOINT=Llama-3.1-8B-Instruct
+   EMBEDDING_MODEL_NAME=bge-base-en-v1.5
+   INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
+   ```
+3. Restart the services with `docker compose restart backend` or `docker compose down && docker compose up`
+
+#### "No documents uploaded"
+
+**Solution**:
+
+- Upload a PDF first using the `/upload-pdf` endpoint
+- Check server logs for any upload errors
+- Verify the PDF is not corrupted or empty
+
+#### "Could not load vector store"
+
+**Solution**:
+
+- The vector store is created when you upload your first PDF
+- Check that the application has write permissions in the directory
+- Verify `dmv_index/` directory exists and is accessible
+
+#### Import errors
+
+**Solution**:
+
+1. Rebuild the Docker containers: `docker compose down && docker compose build --no-cache && docker compose up`
+2. Check container logs: `docker compose logs backend`
+
+#### Server won't start
+
+**Solution**:
+
+1. Check if ports 5001 or 3000 are already in use: `lsof -i :5001` or `lsof -i :3000` (Unix) or `netstat -ano | findstr :5001` (Windows)
+2. Check container logs: `docker compose logs backend`
+3. Try rebuilding containers: `docker compose down && docker compose build --no-cache && docker compose up`
+4. Check the logs for specific error messages
+
+#### PDF upload fails
+
+**Solution**:
+
+1. Verify the file is a valid PDF
+2. Check file size (must be under 50MB by default)
+3. Ensure the PDF contains extractable text (not just images)
+4. Check server logs for detailed error messages
+
+#### Query returns no answer
+
+**Solution**:
+
+1. Verify a document has been uploaded successfully
+2. Try rephrasing your question
+3. Check if the document contains relevant information
+4. Increase `TOP_K_DOCUMENTS` in `config.py` for broader search
+
+### Logging
+
+The application logs important events to the console:
+
+- **INFO**: Normal operations (PDF processing, queries)
+- **WARNING**: Non-critical issues
+- **ERROR**: Critical errors with stack traces
+
+To view logs:
+
+```bash
+# View all logs
+docker compose logs -f
+
+# View backend logs only
+docker compose logs -f backend
+```
+
+### Getting Help
+
+1. View logs with `docker compose logs -f`
+2. Visit the health endpoint: `http://localhost:5001/health`
+3. Review the error messages in API responses
+4. Check the interactive documentation: `http://localhost:5001/docs`
+
+## Production Deployment
+
+### Checklist
+
+Before deploying to production:
+
+- [ ] Configure secure `KEYCLOAK_CLIENT_SECRET`
+- [ ] Set up proper `BASE_URL` for enterprise endpoints
+- [ ] Configure specific CORS origins (not `["*"]`)
+- [ ] Enable HTTPS
+- [ ] Set up monitoring and alerting
+- [ ] Configure logging to files
+- [ ] Implement rate limiting
+- [ ] Verify Keycloak authentication is working
+- [ ] Set up backup for vector stores
+- [ ] Configure firewall rules
+- [ ] Use environment-specific configuration
+
+### Docker Compose Production Deployment
+
+The provided `docker-compose.yml` already includes both API and UI services. For production:
+
+1. **Set up environment variables** in `api/.env`:
+
+```bash
+# Enterprise/Keycloak Configuration
+BASE_URL=https://api.example.com
+KEYCLOAK_REALM=master
+KEYCLOAK_CLIENT_ID=api
+KEYCLOAK_CLIENT_SECRET=your_production_client_secret
+
+# Model Configuration
+EMBEDDING_MODEL_ENDPOINT=bge-base-en-v1.5
+INFERENCE_MODEL_ENDPOINT=Llama-3.1-8B-Instruct
+EMBEDDING_MODEL_NAME=bge-base-en-v1.5
+INFERENCE_MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
+```
+
+2. **Run in detached mode**:
+
+```bash
+docker compose up -d --build
+```
+
+3. **Monitor logs**:
+
+```bash
+docker compose logs -f
+```
+
+## License
+
+MIT
+
+## Support
+
+For issues, questions, or contributions:
+
+1. Check this README for solutions
+2. Review the troubleshooting section
+3. Check container logs: `docker compose logs -f`
+4. Visit the interactive docs at `http://localhost:5001/docs`
+
+---
+
+**Version**: 2.0.0  
+**Last Updated**: 2025  
+**API Documentation**: http://localhost:5001/docs
diff --git a/RAGChatbot/api/config.py b/RAGChatbot/api/config.py
new file mode 100644
index 0000000000..3ca2f2addc
--- /dev/null
+++ b/RAGChatbot/api/config.py
@@ -0,0 +1,54 @@
+"""
+Configuration settings for RAG Chatbot API
+"""
+
+import os
+from dotenv import load_dotenv
+
+# Load environment variables from .env file
+load_dotenv()
+
+# Inference API Configuration
+# Supports multiple inference deployment patterns:
+#   - GenAI Gateway: Provide your GenAI Gateway URL and API key
+#   - APISIX Gateway: Provide your APISIX Gateway URL and authentication token
+INFERENCE_API_ENDPOINT = os.getenv("INFERENCE_API_ENDPOINT", "https://api.example.com")
+INFERENCE_API_TOKEN = os.getenv("INFERENCE_API_TOKEN")
+
+# Model Configuration
+EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME", "bge-base-en-v1.5")
+INFERENCE_MODEL_NAME = os.getenv("INFERENCE_MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
+
+# Validate required configuration
+if not INFERENCE_API_ENDPOINT or not INFERENCE_API_TOKEN:
+    raise ValueError("INFERENCE_API_ENDPOINT and INFERENCE_API_TOKEN must be set in environment variables")
+
+# Application Settings
+APP_TITLE = "RAG QnA Chatbot"
+APP_DESCRIPTION = "A RAG-based chatbot API using LangChain and FAISS"
+APP_VERSION = "2.0.0"
+
+# File Upload Settings
+MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB
+ALLOWED_EXTENSIONS = {".pdf"}
+
+# Vector Store Settings
+VECTOR_STORE_PATH = "./dmv_index"
+
+# Text Splitting Settings
+CHUNK_SIZE = 1000
+CHUNK_OVERLAP = 200
+SEPARATORS = ["\n\n", "\n", " ", ""]
+
+# Retrieval Settings
+TOP_K_DOCUMENTS = 4
+LLM_MODEL = "gpt-3.5-turbo"
+LLM_TEMPERATURE = 0
+EMBEDDING_MODEL = "text-embedding-ada-002"
+
+# CORS Settings
+CORS_ALLOW_ORIGINS = ["*"]  # Update with specific origins in production
+CORS_ALLOW_CREDENTIALS = True
+CORS_ALLOW_METHODS = ["*"]
+CORS_ALLOW_HEADERS = ["*"]
+
diff --git a/RAGChatbot/api/models.py b/RAGChatbot/api/models.py
new file mode 100644
index 0000000000..ae9452a81c
--- /dev/null
+++ b/RAGChatbot/api/models.py
@@ -0,0 +1,61 @@
+"""
+Pydantic models for request/response validation
+"""
+
+from pydantic import BaseModel, Field
+
+
+class QueryRequest(BaseModel):
+    """Request model for querying documents"""
+    query: str = Field(..., min_length=1, description="Natural language question")
+    
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "query": "What are the main topics covered in the document?"
+            }
+        }
+
+
+class UploadResponse(BaseModel):
+    """Response model for PDF upload"""
+    message: str = Field(..., description="Success message")
+    num_chunks: int = Field(..., description="Number of chunks created")
+    status: str = Field(..., description="Operation status")
+    
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "message": "Successfully uploaded and processed 'document.pdf'",
+                "num_chunks": 45,
+                "status": "success"
+            }
+        }
+
+
+class QueryResponse(BaseModel):
+    """Response model for document queries"""
+    answer: str = Field(..., description="Answer to the query")
+    query: str = Field(..., description="Original query")
+    
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "answer": "The main topics covered in the document are...",
+                "query": "What are the main topics covered in the document?"
+            }
+        }
+
+
+class HealthResponse(BaseModel):
+    """Response model for health check"""
+    status: str = Field(..., description="Health status")
+    vectorstore_available: bool = Field(..., description="Whether vectorstore is loaded")
+    openai_key_configured: bool = Field(..., description="Whether inference API token is configured")
+
+
+class DeleteResponse(BaseModel):
+    """Response model for delete operations"""
+    message: str = Field(..., description="Result message")
+    status: str = Field(..., description="Operation status")
+
diff --git a/RAGChatbot/api/requirements.txt b/RAGChatbot/api/requirements.txt
new file mode 100644
index 0000000000..8a77040e54
--- /dev/null
+++ b/RAGChatbot/api/requirements.txt
@@ -0,0 +1,15 @@
+fastapi>=0.109.0
+uvicorn[standard]>=0.27.0
+python-dotenv>=1.0.0
+langchain>=0.1.0
+langchain-community>=0.0.10
+langchain-openai>=0.0.5
+faiss-cpu>=1.7.4
+pypdf>=4.0.0
+openai>=1.10.0
+python-multipart>=0.0.6
+pydantic>=2.5.0
+pydantic-settings>=2.1.0
+cryptography>=3.1.0
+httpx>=0.24.0
+requests>=2.31.0
\ No newline at end of file
diff --git a/RAGChatbot/api/server.py b/RAGChatbot/api/server.py
new file mode 100644
index 0000000000..3186c48bc3
--- /dev/null
+++ b/RAGChatbot/api/server.py
@@ -0,0 +1,229 @@
+"""
+FastAPI server with routes for RAG Chatbot API
+"""
+
+import os
+import tempfile
+import logging
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, File, UploadFile, HTTPException, status
+from fastapi.middleware.cors import CORSMiddleware
+
+import config
+from models import (
+    QueryRequest, UploadResponse, QueryResponse,
+    HealthResponse, DeleteResponse
+)
+from services import (
+    validate_pdf_file, load_and_split_pdf,
+    store_in_vector_storage, load_vector_store, delete_vector_store,
+    query_documents
+)
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Lifespan context manager for FastAPI app"""
+    # Startup
+    app.state.vectorstore = load_vector_store(config.INFERENCE_API_TOKEN)
+    if app.state.vectorstore:
+        logger.info("✓ FAISS vector store loaded successfully")
+    else:
+        logger.info("! No existing vector store found. Please upload a PDF document.")
+    
+    yield
+    
+    # Shutdown
+    logger.info("Shutting down RAG Chatbot API")
+
+
+# Initialize FastAPI app
+app = FastAPI(
+    title=config.APP_TITLE,
+    description=config.APP_DESCRIPTION,
+    version=config.APP_VERSION,
+    lifespan=lifespan
+)
+
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=config.CORS_ALLOW_ORIGINS,
+    allow_credentials=config.CORS_ALLOW_CREDENTIALS,
+    allow_methods=config.CORS_ALLOW_METHODS,
+    allow_headers=config.CORS_ALLOW_HEADERS,
+)
+
+
+# ==================== Routes ====================
+
+@app.get("/")
+def root():
+    """Health check endpoint"""
+    return {
+        "message": "RAG Chatbot API is running",
+        "version": config.APP_VERSION,
+        "status": "healthy",
+        "vectorstore_loaded": app.state.vectorstore is not None
+    }
+
+
+@app.get("/health", response_model=HealthResponse)
+def health_check():
+    """Detailed health check"""
+    return HealthResponse(
+        status="healthy",
+        vectorstore_available=app.state.vectorstore is not None,
+        openai_key_configured=bool(config.INFERENCE_API_TOKEN)
+    )
+
+
+@app.post("/upload-pdf", response_model=UploadResponse)
+async def upload_pdf(file: UploadFile = File(...)):
+    """
+    Upload a PDF file, process it, create embeddings, and store in FAISS
+    
+    - **file**: PDF file to upload (max 50MB)
+    """
+    # Validate file
+    validate_pdf_file(file)
+    
+    tmp_path = None
+    try:
+        # Read file content
+        content = await file.read()
+        file_size = len(content)
+        
+        # Check file size
+        if file_size > config.MAX_FILE_SIZE:
+            raise HTTPException(
+                status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
+                detail=f"File too large. Maximum size is {config.MAX_FILE_SIZE / (1024*1024)}MB"
+            )
+        
+        if file_size == 0:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Empty file uploaded"
+            )
+        
+        logger.info(f"Processing PDF: {file.filename} ({file_size / 1024:.2f} KB)")
+        
+        # Save to temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
+            tmp.write(content)
+            tmp_path = tmp.name
+            logger.info(f"Saved to temporary path: {tmp_path}")
+        
+        # Load and split PDF
+        chunks = load_and_split_pdf(tmp_path)
+        
+        if not chunks:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="No text content could be extracted from the PDF"
+            )
+        
+        # Create embeddings and store in FAISS
+        vectorstore = store_in_vector_storage(chunks, config.INFERENCE_API_TOKEN)
+        
+        # Update app state
+        app.state.vectorstore = vectorstore
+        
+        logger.info(f"✓ Successfully processed PDF: {file.filename}")
+        
+        return UploadResponse(
+            message=f"Successfully uploaded and processed '{file.filename}'",
+            num_chunks=len(chunks),
+            status="success"
+        )
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error processing PDF: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Error processing PDF: {str(e)}"
+        )
+    finally:
+        # Clean up temporary file
+        if tmp_path and os.path.exists(tmp_path):
+            try:
+                os.remove(tmp_path)
+                logger.info(f"Cleaned up temporary file: {tmp_path}")
+            except Exception as e:
+                logger.warning(f"Could not remove temporary file: {str(e)}")
+
+
+@app.post("/query", response_model=QueryResponse)
+def query_endpoint(request: QueryRequest):
+    """
+    Query the uploaded documents using RAG
+    
+    - **query**: Natural language question about the documents
+    """
+    if not app.state.vectorstore:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="No documents uploaded. Please upload a PDF first using /upload-pdf endpoint."
+        )
+    
+    if not request.query or not request.query.strip():
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Query cannot be empty"
+        )
+    
+    try:
+        result = query_documents(
+            request.query,
+            app.state.vectorstore,
+            config.INFERENCE_API_TOKEN
+        )
+        return QueryResponse(**result)
+    except Exception as e:
+        logger.error(f"Error processing query: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Error processing query: {str(e)}"
+        )
+
+
+@app.delete("/vectorstore", response_model=DeleteResponse)
+def delete_vectorstore_endpoint():
+    """Delete the current vector store"""
+    try:
+        deleted = delete_vector_store()
+        app.state.vectorstore = None
+        
+        if deleted:
+            return DeleteResponse(
+                message="Vector store deleted successfully",
+                status="success"
+            )
+        else:
+            return DeleteResponse(
+                message="No vector store found to delete",
+                status="success"
+            )
+    except Exception as e:
+        logger.error(f"Error deleting vector store: {str(e)}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Error deleting vector store: {str(e)}"
+        )
+
+
+# Entry point for running with uvicorn
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=5001)
+
diff --git a/RAGChatbot/api/services/__init__.py b/RAGChatbot/api/services/__init__.py
new file mode 100644
index 0000000000..2802bf9895
--- /dev/null
+++ b/RAGChatbot/api/services/__init__.py
@@ -0,0 +1,21 @@
+"""
+Services package for RAG Chatbot API
+"""
+
+from .pdf_service import load_and_split_pdf, validate_pdf_file
+from .vector_service import store_in_vector_storage, load_vector_store, delete_vector_store
+from .retrieval_service import build_retrieval_chain, query_documents
+from .api_client import APIClient, get_api_client
+
+__all__ = [
+    'load_and_split_pdf',
+    'validate_pdf_file',
+    'store_in_vector_storage',
+    'load_vector_store',
+    'delete_vector_store',
+    'build_retrieval_chain',
+    'query_documents',
+    'APIClient',
+    'get_api_client'
+]
+
diff --git a/RAGChatbot/api/services/api_client.py b/RAGChatbot/api/services/api_client.py
new file mode 100644
index 0000000000..8b00942b40
--- /dev/null
+++ b/RAGChatbot/api/services/api_client.py
@@ -0,0 +1,223 @@
+"""
+API Client for authentication and API calls
+Similar to simple-client/main.py implementation
+"""
+
+import logging
+import requests
+import httpx
+from typing import Optional
+import config
+
+logger = logging.getLogger(__name__)
+
+
+class APIClient:
+    """
+    Client for handling API calls with token-based authentication
+    """
+
+    def __init__(self):
+        self.base_url = config.INFERENCE_API_ENDPOINT
+        self.token = config.INFERENCE_API_TOKEN
+        self.http_client = httpx.Client(verify=False)
+        logger.info(f"✓ API Client initialized with endpoint: {self.base_url}")
+    
+    def get_embedding_client(self):
+        """
+        Get OpenAI-style client for embeddings
+        Uses bge-base-en-v1.5 model
+        """
+        from openai import OpenAI
+
+        return OpenAI(
+            api_key=self.token,
+            base_url=f"{self.base_url}/v1",
+            http_client=self.http_client
+        )
+    
+    def get_inference_client(self):
+        """
+        Get OpenAI-style client for inference/completions
+        Uses Llama-3.1-8B-Instruct model
+        """
+        from openai import OpenAI
+
+        return OpenAI(
+            api_key=self.token,
+            base_url=f"{self.base_url}/v1",
+            http_client=self.http_client
+        )
+    
+    def embed_text(self, text: str) -> list:
+        """
+        Get embedding for text
+        Uses the bge-base-en-v1.5 embedding model
+        
+        Args:
+            text: Text to embed
+            
+        Returns:
+            List of embedding values
+        """
+        try:
+            client = self.get_embedding_client()
+            # Call the embeddings endpoint
+            response = client.embeddings.create(
+                model=config.EMBEDDING_MODEL_NAME,
+                input=text
+            )
+            return response.data[0].embedding
+        except Exception as e:
+            logger.error(f"Error generating embedding: {str(e)}")
+            raise
+    
+    def embed_texts(self, texts: list) -> list:
+        """
+        Get embeddings for multiple texts
+        Batches requests to avoid exceeding API limits (max batch size: 32)
+        
+        Args:
+            texts: List of texts to embed
+            
+        Returns:
+            List of embedding vectors
+        """
+        try:
+            BATCH_SIZE = 32  # Maximum allowed batch size
+            all_embeddings = []
+            client = self.get_embedding_client()
+            
+            # Process in batches of 32
+            for i in range(0, len(texts), BATCH_SIZE):
+                batch = texts[i:i + BATCH_SIZE]
+                logger.info(f"Processing embedding batch {i//BATCH_SIZE + 1}/{(len(texts) + BATCH_SIZE - 1)//BATCH_SIZE} ({len(batch)} texts)")
+                
+                response = client.embeddings.create(
+                    model=config.EMBEDDING_MODEL_NAME,
+                    input=batch
+                )
+                batch_embeddings = [data.embedding for data in response.data]
+                all_embeddings.extend(batch_embeddings)
+            
+            return all_embeddings
+        except Exception as e:
+            logger.error(f"Error generating embeddings: {str(e)}")
+            raise
+    
+    def complete(self, prompt: str, max_tokens: int = 50, temperature: float = 0) -> str:
+        """
+        Get completion from the inference model
+        Uses Llama-3.1-8B-Instruct for inference
+        
+        Args:
+            prompt: Input prompt
+            max_tokens: Maximum tokens to generate
+            temperature: Temperature for generation
+            
+        Returns:
+            Generated text
+        """
+        try:
+            client = self.get_inference_client()
+            logger.info(f"Calling inference client with model: {config.INFERENCE_MODEL_NAME}")
+            response = client.completions.create(
+                model=config.INFERENCE_MODEL_NAME,
+                prompt=prompt,
+                max_tokens=max_tokens,
+                temperature=temperature
+            )
+            
+            # Handle response structure
+            if hasattr(response, 'choices') and len(response.choices) > 0:
+                choice = response.choices[0]
+                if hasattr(choice, 'text'):
+                    return choice.text
+                else:
+                    logger.error(f"Unexpected choice structure: {type(choice)}, {choice}")
+                    return str(choice)
+            else:
+                logger.error(f"Unexpected response: {type(response)}, {response}")
+                return ""
+        except Exception as e:
+            logger.error(f"Error generating completion: {str(e)}", exc_info=True)
+            raise
+    
+    def chat_complete(self, messages: list, max_tokens: int = 150, temperature: float = 0) -> str:
+        """
+        Get chat completion from the inference model
+        
+        Args:
+            messages: List of message dicts with 'role' and 'content'
+            max_tokens: Maximum tokens to generate
+            temperature: Temperature for generation
+            
+        Returns:
+            Generated text
+        """
+        try:
+            client = self.get_inference_client()
+            # Convert messages to a prompt for the completions endpoint
+            # (since Llama models use completions, not chat.completions)
+            prompt = ""
+            for msg in messages:
+                role = msg.get('role', 'user')
+                content = msg.get('content', '')
+                if role == 'system':
+                    prompt += f"System: {content}\n\n"
+                elif role == 'user':
+                    prompt += f"User: {content}\n\n"
+                elif role == 'assistant':
+                    prompt += f"Assistant: {content}\n\n"
+            prompt += "Assistant:"
+            
+            logger.info(f"Calling inference with prompt length: {len(prompt)}")
+            
+            response = client.completions.create(
+                model=config.INFERENCE_MODEL_NAME,
+                prompt=prompt,
+                max_tokens=max_tokens,
+                temperature=temperature
+            )
+            
+            # Handle response structure
+            if hasattr(response, 'choices') and len(response.choices) > 0:
+                choice = response.choices[0]
+                if hasattr(choice, 'text'):
+                    return choice.text
+                elif hasattr(choice, 'message') and hasattr(choice.message, 'content'):
+                    return choice.message.content
+                else:
+                    logger.error(f"Unexpected response structure: {type(choice)}, {choice}")
+                    return str(choice)
+            else:
+                logger.error(f"Unexpected response: {type(response)}, {response}")
+                return ""
+        except Exception as e:
+            logger.error(f"Error generating chat completion: {str(e)}", exc_info=True)
+            raise
+    
+    def __del__(self):
+        """
+        Cleanup: close httpx client
+        """
+        if self.http_client:
+            self.http_client.close()
+
+
+# Global API client instance
+_api_client: Optional[APIClient] = None
+
+
+def get_api_client() -> APIClient:
+    """
+    Get or create the global API client instance
+    
+    Returns:
+        APIClient instance
+    """
+    global _api_client
+    if _api_client is None:
+        _api_client = APIClient()
+    return _api_client
+
diff --git a/RAGChatbot/api/services/pdf_service.py b/RAGChatbot/api/services/pdf_service.py
new file mode 100644
index 0000000000..9fbc2032af
--- /dev/null
+++ b/RAGChatbot/api/services/pdf_service.py
@@ -0,0 +1,86 @@
+"""
+PDF processing service
+Handles PDF validation, loading, and text splitting
+"""
+
+import logging
+from pathlib import Path
+from fastapi import UploadFile, HTTPException, status
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+
+logger = logging.getLogger(__name__)
+
+# Constants
+ALLOWED_EXTENSIONS = {".pdf"}
+MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB
+
+
+def validate_pdf_file(file: UploadFile) -> None:
+    """
+    Validate uploaded PDF file
+    
+    Args:
+        file: UploadFile object from FastAPI
+        
+    Raises:
+        HTTPException: If file validation fails
+    """
+    if not file.filename:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="No filename provided"
+        )
+    
+    file_ext = Path(file.filename).suffix.lower()
+    if file_ext not in ALLOWED_EXTENSIONS:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Invalid file type. Only PDF files are allowed. Got: {file_ext}"
+        )
+    
+    if not file.content_type or "pdf" not in file.content_type.lower():
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Invalid content type. Expected PDF, got: {file.content_type}"
+        )
+
+
+def load_and_split_pdf(path: str) -> list:
+    """
+    Load PDF and split into chunks using RecursiveCharacterTextSplitter
+    
+    Args:
+        path: Path to the PDF file
+        
+    Returns:
+        List of document chunks
+        
+    Raises:
+        ValueError: If no content can be extracted
+        Exception: For other processing errors
+    """
+    try:
+        # Load PDF documents
+        loader = PyPDFLoader(file_path=path)
+        documents = loader.load()
+        logger.info(f"Loaded {len(documents)} pages from PDF")
+        
+        if not documents:
+            raise ValueError("No content extracted from PDF")
+        
+        # Split text into chunks with better strategy
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200,
+            length_function=len,
+            separators=["\n\n", "\n", " ", ""]
+        )
+        chunks = text_splitter.split_documents(documents)
+        logger.info(f"Split into {len(chunks)} chunks")
+        
+        return chunks
+    except Exception as e:
+        logger.error(f"Error loading and splitting PDF: {str(e)}")
+        raise
+
diff --git a/RAGChatbot/api/services/retrieval_service.py b/RAGChatbot/api/services/retrieval_service.py
new file mode 100644
index 0000000000..15e4862abd
--- /dev/null
+++ b/RAGChatbot/api/services/retrieval_service.py
@@ -0,0 +1,231 @@
+"""
+Retrieval service
+Handles query processing and retrieval chain operations
+"""
+
+import logging
+from langchain_openai import ChatOpenAI
+from langchain_community.vectorstores import FAISS
+from langchain.chains.retrieval import create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain import hub
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.language_models.llms import LLM
+from langchain_core.outputs import LLMResult, Generation
+from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, BaseMessage
+from typing import List, Optional, Any
+import config
+
+logger = logging.getLogger(__name__)
+
+
+class CustomLLM(LLM):
+    """
+    Custom LLM class that uses the Llama-3.1-8B-Instruct endpoint
+    """
+    
+    @property
+    def _llm_type(self) -> str:
+        """Return type of LLM."""
+        return "custom_llm"
+    
+    def _call(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[Any] = None,
+        **kwargs: Any,
+    ) -> str:
+        """Call the LLM on the given prompt."""
+        from .api_client import get_api_client
+        api_client = get_api_client()
+        return api_client.complete(prompt, max_tokens=kwargs.get('max_tokens', 150), temperature=kwargs.get('temperature', 0))
+
+
+class CustomChatModel(BaseChatModel):
+    """
+    Custom Chat Model that uses the Llama-3.1-8B-Instruct endpoint
+    """
+    
+    @property
+    def _llm_type(self) -> str:
+        """Return type of LLM."""
+        return "custom_chat"
+    
+    def _generate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[Any] = None,
+        **kwargs: Any,
+    ) -> LLMResult:
+        """Generate response from messages."""
+        from .api_client import get_api_client
+        api_client = get_api_client()
+        
+        # Convert messages to a prompt string
+        # Build the prompt from all messages
+        prompt_parts = []
+        
+        for msg in messages:
+            if isinstance(msg, SystemMessage):
+                prompt_parts.append(f"System: {msg.content}")
+            elif isinstance(msg, HumanMessage):
+                prompt_parts.append(f"User: {msg.content}")
+            elif isinstance(msg, AIMessage):
+                prompt_parts.append(f"Assistant: {msg.content}")
+        
+        # Join all parts and add assistant prompt suffix
+        full_prompt = "\n\n".join(prompt_parts)
+        if not full_prompt.endswith("Assistant:"):
+            full_prompt += "\n\nAssistant:"
+        
+        logger.info(f"Sending prompt to LLM (length: {len(full_prompt)} chars)")
+        
+        # Use the complete method which directly sends the prompt
+        # This calls: Llama-3.1-8B-Instruct/v1/completions with prompt
+        response_text = api_client.complete(
+            full_prompt,
+            max_tokens=kwargs.get('max_tokens', 150),
+            temperature=kwargs.get('temperature', 0)
+        )
+        
+        generations = [Generation(text=response_text)]
+        return LLMResult(generations=[generations])
+
+
+def get_llm(api_key: str) -> BaseChatModel:
+    """
+    Get LLM instance (ChatOpenAI or CustomChatModel based on config)
+
+    Args:
+        api_key: API key
+
+    Returns:
+        LLM instance
+    """
+    # Check if using custom inference endpoint
+    if hasattr(config, 'INFERENCE_API_TOKEN') and config.INFERENCE_API_TOKEN:
+        return CustomChatModel()
+    else:
+        # Fallback to OpenAI ChatOpenAI
+        return ChatOpenAI(
+            model="gpt-3.5-turbo",
+            temperature=0,
+            openai_api_key=api_key
+        )
+
+
+def build_retrieval_chain(vectorstore: FAISS, api_key: str):
+    """
+    Build retrieval chain with LLM (ChatOpenAI or CustomChatModel)
+    
+    Args:
+        vectorstore: FAISS vectorstore instance
+        api_key: API key
+        
+    Returns:
+        Configured retrieval chain
+        
+    Raises:
+        Exception: If chain building fails
+    """
+    try:
+        retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
+        llm = get_llm(api_key)
+        combine_docs_chain = create_stuff_documents_chain(llm, retrieval_qa_chat_prompt)
+        retrieval_chain = create_retrieval_chain(
+            vectorstore.as_retriever(search_kwargs={"k": 4}),
+            combine_docs_chain
+        )
+        return retrieval_chain
+    except Exception as e:
+        logger.error(f"Error building retrieval chain: {str(e)}")
+        raise
+
+
+def query_documents(query: str, vectorstore: FAISS, api_key: str) -> dict:
+    """
+    Query the documents using RAG with custom embedding and inference
+    
+    Simple workflow:
+    1. Create embedding for the query
+    2. Search for similar documents in the vectorstore
+    3. Format the retrieved context
+    4. Summarize using Llama inference endpoint
+    
+    Args:
+        query: User's question
+        vectorstore: FAISS vectorstore instance
+        api_key: API key
+        
+    Returns:
+        Dictionary with answer and query
+        
+    Raises:
+        Exception: If query processing fails
+    """
+    try:
+        logger.info(f"Processing query: {query}")
+        
+        # Step 1: Create embedding for the query
+        logger.info("Creating query embedding...")
+        from .api_client import get_api_client
+        api_client = get_api_client()
+        
+        query_embedding = api_client.embed_text(query)
+        logger.info(f"Query embedding created (dimension: {len(query_embedding)})")
+        
+        # Step 2: Search for similar documents (similarity search)
+        logger.info("Searching for similar documents...")
+        similar_docs = vectorstore.similarity_search_by_vector(query_embedding, k=4)
+        logger.info(f"Found {len(similar_docs)} similar documents")
+        
+        if not similar_docs:
+            return {
+                "answer": "I couldn't find any relevant documents to answer your question.",
+                "query": query
+            }
+        
+        # Step 3: Format the retrieved context
+        context_parts = []
+        for i, doc in enumerate(similar_docs):
+            context_parts.append(f"Document {i+1}:\n{doc.page_content}")
+        
+        context = "\n\n".join(context_parts)
+        logger.info(f"Context length: {len(context)} characters")
+        
+        # Step 4: Create prompt for summarization using Llama
+        prompt = f"""Based on the following documents, provide a comprehensive summary that addresses the question.
+
+Documents:
+{context}
+
+Question: {query}
+
+Summary:"""
+        
+        logger.info(f"Calling Llama inference with prompt length: {len(prompt)}")
+        
+        # Call Llama inference endpoint for summarization
+        answer = api_client.complete(
+            prompt=prompt,
+            max_tokens=200,
+            temperature=0
+        )
+        
+        answer = answer.strip()
+        
+        if not answer:
+            answer = "I couldn't find a relevant answer in the documents."
+        
+        logger.info("✓ Query completed successfully")
+        
+        return {
+            "answer": answer,
+            "query": query
+        }
+    except Exception as e:
+        logger.error(f"Error processing query: {str(e)}", exc_info=True)
+        raise
+
diff --git a/RAGChatbot/api/services/vector_service.py b/RAGChatbot/api/services/vector_service.py
new file mode 100644
index 0000000000..0884a9c0db
--- /dev/null
+++ b/RAGChatbot/api/services/vector_service.py
@@ -0,0 +1,152 @@
+"""
+Vector store service
+Handles FAISS vector store operations
+"""
+
+import os
+import logging
+import shutil
+from typing import Optional
+from langchain_openai import OpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_core.embeddings import Embeddings
+import config
+
+logger = logging.getLogger(__name__)
+
+# Constants
+VECTOR_STORE_PATH = "./dmv_index"
+
+
+class CustomEmbeddings(Embeddings):
+    """
+    Custom embeddings class that uses the bge-base-en-v1.5 endpoint
+    """
+    
+    def __init__(self):
+        from .api_client import get_api_client
+        self.api_client = get_api_client()
+    
+    def embed_documents(self, texts: list[str]) -> list[list[float]]:
+        """
+        Embed multiple documents
+        Note: Batches are handled automatically by api_client (max batch size: 32)
+        
+        Args:
+            texts: List of texts to embed
+            
+        Returns:
+            List of embedding vectors
+        """
+        return self.api_client.embed_texts(texts)
+    
+    def embed_query(self, text: str) -> list[float]:
+        """
+        Embed a single query
+        
+        Args:
+            text: Text to embed
+            
+        Returns:
+            Embedding vector
+        """
+        return self.api_client.embed_text(text)
+
+
+def get_embeddings(api_key: str) -> Embeddings:
+    """
+    Create embeddings instance
+
+    Args:
+        api_key: API key (for compatibility, not used with custom endpoint)
+
+    Returns:
+        Embeddings instance (CustomEmbeddings if using custom API, OpenAIEmbeddings otherwise)
+    """
+    # Check if using custom inference endpoint
+    if hasattr(config, 'INFERENCE_API_TOKEN') and config.INFERENCE_API_TOKEN:
+        return CustomEmbeddings()
+    else:
+        # Fallback to OpenAI
+        return OpenAIEmbeddings(
+            model="text-embedding-ada-002",
+            openai_api_key=api_key
+        )
+
+
+def store_in_vector_storage(chunks: list, api_key: str) -> FAISS:
+    """
+    Create embeddings and store in FAISS vector store
+    
+    Args:
+        chunks: List of document chunks
+        api_key: OpenAI API key
+        
+    Returns:
+        FAISS vectorstore instance
+        
+    Raises:
+        Exception: If storage operation fails
+    """
+    try:
+        embeddings = get_embeddings(api_key)
+        vectorstore = FAISS.from_documents(chunks, embeddings)
+        
+        # Ensure directory exists
+        os.makedirs(
+            os.path.dirname(VECTOR_STORE_PATH) if os.path.dirname(VECTOR_STORE_PATH) else ".",
+            exist_ok=True
+        )
+        vectorstore.save_local(VECTOR_STORE_PATH)
+        logger.info(f"Saved vector store to {VECTOR_STORE_PATH}")
+        
+        return vectorstore
+    except Exception as e:
+        logger.error(f"Error storing vectors: {str(e)}")
+        raise
+
+
+def load_vector_store(api_key: str) -> Optional[FAISS]:
+    """
+    Load existing FAISS vector store
+    
+    Args:
+        api_key: OpenAI API key
+        
+    Returns:
+        FAISS vectorstore instance or None if not found
+    """
+    try:
+        embeddings = get_embeddings(api_key)
+        vectorstore = FAISS.load_local(
+            VECTOR_STORE_PATH,
+            embeddings,
+            allow_dangerous_deserialization=True
+        )
+        logger.info("Loaded existing FAISS vector store")
+        return vectorstore
+    except Exception as e:
+        logger.warning(f"Could not load vector store: {str(e)}")
+        return None
+
+
+def delete_vector_store() -> bool:
+    """
+    Delete the vector store from disk
+    
+    Returns:
+        True if deleted successfully, False otherwise
+        
+    Raises:
+        Exception: If deletion fails
+    """
+    try:
+        if os.path.exists(VECTOR_STORE_PATH):
+            shutil.rmtree(VECTOR_STORE_PATH)
+            logger.info("Deleted vector store")
+            return True
+        return False
+    except Exception as e:
+        logger.error(f"Error deleting vector store: {str(e)}")
+        raise
+
diff --git a/RAGChatbot/api/test_api.py b/RAGChatbot/api/test_api.py
new file mode 100644
index 0000000000..ac4baf6be5
--- /dev/null
+++ b/RAGChatbot/api/test_api.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+"""
+Test script for RAG Chatbot API
+Tests PDF upload and query functionality
+
+Usage:
+    python test_api.py                    # Run basic tests
+    python test_api.py /path/to/file.pdf  # Run full tests with PDF upload
+"""
+
+import requests
+import sys
+import time
+from pathlib import Path
+
+BASE_URL = "http://localhost:5000"
+
+def print_status(message, status="info"):
+    """Print colored status messages"""
+    colors = {
+        "info": "\033[94m",  # Blue
+        "success": "\033[92m",  # Green
+        "error": "\033[91m",  # Red
+        "warning": "\033[93m"  # Yellow
+    }
+    reset = "\033[0m"
+    print(f"{colors.get(status, '')}{message}{reset}")
+
+
+def test_health_check():
+    """Test health check endpoint"""
+    print_status("\n1. Testing health check endpoint...", "info")
+    try:
+        response = requests.get(f"{BASE_URL}/")
+        response.raise_for_status()
+        data = response.json()
+        print_status(f"✓ Health check passed: {data['message']}", "success")
+        print(f"  Version: {data.get('version', 'N/A')}")
+        print(f"  Vectorstore loaded: {data.get('vectorstore_loaded', False)}")
+        return True
+    except Exception as e:
+        print_status(f"✗ Health check failed: {str(e)}", "error")
+        return False
+
+
+def test_detailed_health():
+    """Test detailed health endpoint"""
+    print_status("\n2. Testing detailed health endpoint...", "info")
+    try:
+        response = requests.get(f"{BASE_URL}/health")
+        response.raise_for_status()
+        data = response.json()
+        print_status("✓ Detailed health check passed", "success")
+        print(f"  Status: {data.get('status')}")
+        print(f"  Vectorstore available: {data.get('vectorstore_available')}")
+        print(f"  OpenAI key configured: {data.get('openai_key_configured')}")
+        return True
+    except Exception as e:
+        print_status(f"✗ Detailed health check failed: {str(e)}", "error")
+        return False
+
+
+def test_upload_pdf(pdf_path=None):
+    """Test PDF upload endpoint"""
+    print_status("\n3. Testing PDF upload...", "info")
+    
+    if pdf_path and Path(pdf_path).exists():
+        file_path = pdf_path
+    else:
+        print_status("  No PDF file provided. Skipping upload test.", "warning")
+        print_status("  To test upload, run: python test_api.py /path/to/file.pdf", "warning")
+        return None
+    
+    try:
+        print(f"  Uploading: {file_path}")
+        with open(file_path, 'rb') as f:
+            files = {'file': (Path(file_path).name, f, 'application/pdf')}
+            response = requests.post(f"{BASE_URL}/upload-pdf", files=files)
+            response.raise_for_status()
+            data = response.json()
+            
+        print_status(f"✓ Upload successful!", "success")
+        print(f"  Message: {data['message']}")
+        print(f"  Number of chunks: {data['num_chunks']}")
+        print(f"  Status: {data['status']}")
+        return True
+    except requests.exceptions.HTTPError as e:
+        print_status(f"✗ Upload failed: {e}", "error")
+        try:
+            error_detail = e.response.json()
+            print(f"  Error details: {error_detail}")
+        except:
+            pass
+        return False
+    except Exception as e:
+        print_status(f"✗ Upload failed: {str(e)}", "error")
+        return False
+
+
+def test_query(query="What is this document about?"):
+    """Test query endpoint"""
+    print_status("\n4. Testing query endpoint...", "info")
+    print(f"  Query: '{query}'")
+    
+    try:
+        response = requests.post(
+            f"{BASE_URL}/query",
+            json={"query": query}
+        )
+        response.raise_for_status()
+        data = response.json()
+        
+        print_status("✓ Query successful!", "success")
+        print(f"  Answer: {data['answer'][:200]}{'...' if len(data['answer']) > 200 else ''}")
+        return True
+    except requests.exceptions.HTTPError as e:
+        if e.response.status_code == 400:
+            print_status("✗ No documents uploaded yet. Upload a PDF first.", "warning")
+        else:
+            print_status(f"✗ Query failed: {e}", "error")
+            try:
+                error_detail = e.response.json()
+                print(f"  Error details: {error_detail}")
+            except:
+                pass
+        return False
+    except Exception as e:
+        print_status(f"✗ Query failed: {str(e)}", "error")
+        return False
+
+
+def test_invalid_upload():
+    """Test upload validation with invalid file"""
+    print_status("\n5. Testing upload validation...", "info")
+    
+    try:
+        # Try uploading a text file
+        files = {'file': ('test.txt', b'This is not a PDF', 'text/plain')}
+        response = requests.post(f"{BASE_URL}/upload-pdf", files=files)
+        
+        if response.status_code == 400:
+            print_status("✓ Validation working: Invalid file rejected correctly", "success")
+            return True
+        else:
+            print_status("✗ Validation issue: Invalid file was accepted", "error")
+            return False
+    except Exception as e:
+        print_status(f"✗ Validation test failed: {str(e)}", "error")
+        return False
+
+
+def main():
+    """Run all tests"""
+    print_status("=" * 60)
+    print_status("RAG Chatbot API Test Suite", "info")
+    print_status("=" * 60)
+    
+    # Check if server is running
+    print_status("\nChecking if server is running...", "info")
+    try:
+        requests.get(BASE_URL, timeout=2)
+    except requests.exceptions.RequestException:
+        print_status("✗ Server is not running!", "error")
+        print_status(f"Please start the server first:", "warning")
+        print_status(f"  cd /Users/raghavdarisi/projects/GenAISamples/rag-chatbot/api", "warning")
+        print_status(f"  uvicorn server:app --reload", "warning")
+        print_status(f"  OR: python server.py", "warning")
+        sys.exit(1)
+    
+    print_status("✓ Server is running", "success")
+    
+    # Get PDF path from command line if provided
+    pdf_path = sys.argv[1] if len(sys.argv) > 1 else None
+    
+    # Run tests
+    results = []
+    results.append(("Health Check", test_health_check()))
+    results.append(("Detailed Health", test_detailed_health()))
+    
+    upload_result = test_upload_pdf(pdf_path)
+    if upload_result is not None:
+        results.append(("PDF Upload", upload_result))
+        
+        if upload_result:
+            # Wait a moment for processing
+            time.sleep(1)
+            results.append(("Query", test_query()))
+            results.append(("Query 2", test_query("Summarize the main points")))
+    
+    results.append(("Validation", test_invalid_upload()))
+    
+    # Print summary
+    print_status("\n" + "=" * 60)
+    print_status("Test Summary", "info")
+    print_status("=" * 60)
+    
+    passed = sum(1 for _, result in results if result)
+    total = len(results)
+    
+    for test_name, result in results:
+        status = "success" if result else "error"
+        symbol = "✓" if result else "✗"
+        print_status(f"{symbol} {test_name}", status)
+    
+    print_status(f"\nPassed: {passed}/{total}", "success" if passed == total else "warning")
+    
+    if pdf_path is None:
+        print_status("\nNote: PDF upload test was skipped", "warning")
+        print_status("To run full tests with PDF upload:", "info")
+        print_status(f"  python test_api.py /path/to/your/document.pdf", "info")
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/RAGChatbot/docker-compose.yml b/RAGChatbot/docker-compose.yml
new file mode 100644
index 0000000000..e0f1382da5
--- /dev/null
+++ b/RAGChatbot/docker-compose.yml
@@ -0,0 +1,44 @@
+services:
+  # backend Gateway (Python)
+  backend:
+    build:
+      context: ./api
+      dockerfile: Dockerfile
+    container_name: backend
+    ports:
+      - "5001:5001"
+    env_file:
+      - ./api/.env
+    volumes:
+      - ./api:/app
+    networks:
+      - app_network
+    extra_hosts:
+      - "${LOCAL_URL_ENDPOINT}:host-gateway"
+    restart: unless-stopped
+
+
+  # Frontend (React)
+  frontend:
+    build:
+      context: ./ui
+      dockerfile: Dockerfile
+    container_name: frontend
+    ports:
+      - "3000:3000"
+    depends_on:
+      - backend
+    networks:
+      - app_network
+    restart: unless-stopped
+
+##################################
+# 🔗 Shared Network
+##################################
+networks:
+  app_network:
+    driver: bridge
+
+volumes:
+  audio-files:
+    driver: local
\ No newline at end of file
diff --git a/RAGChatbot/images/RAG Model System Design.png b/RAGChatbot/images/RAG Model System Design.png
new file mode 100644
index 0000000000..7838fe5870
Binary files /dev/null and b/RAGChatbot/images/RAG Model System Design.png differ
diff --git a/RAGChatbot/images/ui.png b/RAGChatbot/images/ui.png
new file mode 100644
index 0000000000..484703ba15
Binary files /dev/null and b/RAGChatbot/images/ui.png differ
diff --git a/RAGChatbot/ui/.gitignore b/RAGChatbot/ui/.gitignore
new file mode 100644
index 0000000000..d600b6c76d
--- /dev/null
+++ b/RAGChatbot/ui/.gitignore
@@ -0,0 +1,25 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
+
diff --git a/RAGChatbot/ui/Dockerfile b/RAGChatbot/ui/Dockerfile
new file mode 100644
index 0000000000..7dab0c57a7
--- /dev/null
+++ b/RAGChatbot/ui/Dockerfile
@@ -0,0 +1,19 @@
+FROM node:18
+
+# Set the working directory
+WORKDIR /app
+
+# Copy package.json
+COPY package.json ./
+
+# Install dependencies
+RUN npm install
+
+# Copy the rest of the application files
+COPY . .
+
+# Expose the port the app runs on
+EXPOSE 3000
+
+# Command to run the application
+CMD ["npm", "run", "dev", "--", "--host"]
\ No newline at end of file
diff --git a/RAGChatbot/ui/README.md b/RAGChatbot/ui/README.md
new file mode 100644
index 0000000000..532316281f
--- /dev/null
+++ b/RAGChatbot/ui/README.md
@@ -0,0 +1,189 @@
+# RAG Chatbot UI
+
+A clean and elegant React-based user interface for the RAG Chatbot application.
+
+## Features
+
+- PDF file upload with drag-and-drop support
+- Real-time chat interface
+- Modern, responsive design with Tailwind CSS
+- Built with Vite for fast development
+- Live status updates
+- Mobile-friendly
+
+## Quick Start
+
+The UI runs automatically when using Docker Compose. See the main project README for setup instructions.
+
+The UI will be available at `http://localhost:3000`
+
+## Development
+
+This UI runs as part of the Docker Compose setup. For local development without Docker, you can use the scripts below, but Docker Compose is the recommended approach.
+
+### Available Scripts (Local Development Only)
+
+```bash
+# Start development server with hot reload
+npm run dev
+
+# Build for production
+npm run build
+
+# Preview production build
+npm run preview
+
+# Lint code
+npm run lint
+```
+
+### Project Structure
+
+```
+ui/
+├── src/
+│   ├── components/
+│   │   ├── Header.jsx          # App header
+│   │   ├── StatusBar.jsx       # Document status display
+│   │   ├── PDFUploader.jsx     # PDF upload component
+│   │   └── ChatInterface.jsx   # Chat UI
+│   ├── services/
+│   │   └── api.js              # API client
+│   ├── App.jsx                 # Main app component
+│   ├── main.jsx                # Entry point
+│   └── index.css               # Global styles
+├── public/                     # Static assets
+├── index.html                  # HTML template
+├── vite.config.js             # Vite configuration
+├── tailwind.config.js         # Tailwind CSS config
+└── package.json               # Dependencies
+```
+
+## Configuration
+
+When running with Docker Compose, the UI automatically connects to the backend. Configuration is handled through the docker-compose.yml file.
+
+## Usage
+
+1. **Start the application** using Docker Compose (from the `rag-chatbot` directory):
+
+   ```bash
+   docker compose up --build
+   ```
+
+2. **Upload a PDF**:
+
+   - Drag and drop a PDF file, or
+   - Click "Browse Files" to select a file
+   - Wait for processing to complete
+
+4. **Start chatting**:
+   - Type your question in the input field
+   - Press Enter or click Send
+   - Get AI-powered answers based on your document
+
+## Features in Detail
+
+### PDF Upload
+
+- Drag-and-drop support
+- File validation (PDF only, max 50MB)
+- Upload progress indicator
+- Success/error notifications
+
+### Chat Interface
+
+- Real-time messaging
+- Message history
+- Typing indicators
+- Timestamp display
+- Error handling
+
+### Status Bar
+
+- Document upload status
+- Progress tracking
+- Quick reset functionality
+
+## Building for Production
+
+```bash
+# Build the production bundle
+npm run build
+
+# The built files will be in the dist/ directory
+# Serve with any static file server
+```
+
+### Deploy with Docker Compose
+
+The UI is automatically deployed when using Docker Compose from the root `rag-chatbot` directory. The Dockerfile in this directory is used by the docker-compose.yml configuration.
+
+## Customization
+
+### Styling
+
+The UI uses Tailwind CSS. Customize colors and theme in `tailwind.config.js`:
+
+```javascript
+theme: {
+  extend: {
+    colors: {
+      primary: {
+        // Your custom colors
+      }
+    }
+  }
+}
+```
+
+### Backend Integration
+
+The UI communicates with the backend through `src/services/api.js`. When running with Docker Compose, the backend is automatically available.
+
+## Troubleshooting
+
+### Build Issues
+
+**Problem**: Build fails with dependency errors
+
+**Solution**:
+
+```bash
+# Clear node_modules and reinstall
+rm -rf node_modules package-lock.json
+npm install
+```
+
+### Styling Issues
+
+**Problem**: Styles not applying
+
+**Solution**:
+
+```bash
+# Rebuild Tailwind CSS
+npm run dev
+```
+
+## Browser Support
+
+- Chrome/Edge (latest)
+- Firefox (latest)
+- Safari (latest)
+- Mobile browsers (iOS Safari, Chrome Mobile)
+
+## Performance
+
+- Optimized bundle size with Vite
+- Code splitting for faster loads
+- Lazy loading of components
+- Efficient re-renders with React
+
+## License
+
+MIT
+
+---
+
+**Built with**: React, Vite, Tailwind CSS, Axios, and Lucide Icons
diff --git a/RAGChatbot/ui/index.html b/RAGChatbot/ui/index.html
new file mode 100644
index 0000000000..c6a3e65988
--- /dev/null
+++ b/RAGChatbot/ui/index.html
@@ -0,0 +1,14 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="/vite.svg" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>RAG Chatbot</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>
+
diff --git a/RAGChatbot/ui/package.json b/RAGChatbot/ui/package.json
new file mode 100644
index 0000000000..c4249ab4f3
--- /dev/null
+++ b/RAGChatbot/ui/package.json
@@ -0,0 +1,32 @@
+{
+  "name": "rag-chatbot-ui",
+  "version": "1.0.0",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "preview": "vite preview",
+    "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0"
+  },
+  "dependencies": {
+    "react": "^18.2.0",
+    "react-dom": "^18.2.0",
+    "axios": "^1.6.0",
+    "lucide-react": "^0.294.0"
+  },
+  "devDependencies": {
+    "@types/react": "^18.2.43",
+    "@types/react-dom": "^18.2.17",
+    "@vitejs/plugin-react": "^4.2.1",
+    "autoprefixer": "^10.4.16",
+    "eslint": "^8.55.0",
+    "eslint-plugin-react": "^7.33.2",
+    "eslint-plugin-react-hooks": "^4.6.0",
+    "eslint-plugin-react-refresh": "^0.4.5",
+    "postcss": "^8.4.32",
+    "tailwindcss": "^3.3.6",
+    "vite": "^5.0.8"
+  }
+}
+
diff --git a/RAGChatbot/ui/postcss.config.js b/RAGChatbot/ui/postcss.config.js
new file mode 100644
index 0000000000..b4a6220e2d
--- /dev/null
+++ b/RAGChatbot/ui/postcss.config.js
@@ -0,0 +1,7 @@
+export default {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+}
+
diff --git a/RAGChatbot/ui/src/App.jsx b/RAGChatbot/ui/src/App.jsx
new file mode 100644
index 0000000000..42ab09fd23
--- /dev/null
+++ b/RAGChatbot/ui/src/App.jsx
@@ -0,0 +1,77 @@
+import { useState } from 'react'
+import ChatInterface from './components/ChatInterface'
+import PDFUploader from './components/PDFUploader'
+import Header from './components/Header'
+import StatusBar from './components/StatusBar'
+
+function App() {
+  const [documentUploaded, setDocumentUploaded] = useState(false)
+  const [documentName, setDocumentName] = useState('')
+  const [uploadProgress, setUploadProgress] = useState(0)
+  const [isUploading, setIsUploading] = useState(false)
+
+  const handleUploadSuccess = (fileName, numChunks) => {
+    setDocumentUploaded(true)
+    setDocumentName(fileName)
+    setUploadProgress(100)
+    setTimeout(() => {
+      setIsUploading(false)
+      setUploadProgress(0)
+    }, 1000)
+  }
+
+  const handleUploadStart = () => {
+    setIsUploading(true)
+    setUploadProgress(0)
+  }
+
+  const handleUploadProgress = (progress) => {
+    setUploadProgress(progress)
+  }
+
+  const handleReset = () => {
+    setDocumentUploaded(false)
+    setDocumentName('')
+    setUploadProgress(0)
+  }
+
+  return (
+    <div className="min-h-screen bg-gradient-to-br from-blue-50 via-white to-purple-50">
+      <Header />
+      
+      <main className="container mx-auto px-4 py-8 max-w-6xl">
+        {/* Status Bar */}
+        <StatusBar 
+          documentUploaded={documentUploaded}
+          documentName={documentName}
+          isUploading={isUploading}
+          uploadProgress={uploadProgress}
+          onReset={handleReset}
+        />
+
+        <div className="grid grid-cols-1 lg:grid-cols-3 gap-6 mt-6">
+          {/* Left Panel - PDF Upload */}
+          <div className="lg:col-span-1">
+            <PDFUploader
+              onUploadSuccess={handleUploadSuccess}
+              onUploadStart={handleUploadStart}
+              onUploadProgress={handleUploadProgress}
+              documentUploaded={documentUploaded}
+            />
+          </div>
+
+          {/* Right Panel - Chat Interface */}
+          <div className="lg:col-span-2">
+            <ChatInterface 
+              documentUploaded={documentUploaded}
+              documentName={documentName}
+            />
+          </div>
+        </div>
+      </main>
+    </div>
+  )
+}
+
+export default App
+
diff --git a/RAGChatbot/ui/src/components/ChatInterface.jsx b/RAGChatbot/ui/src/components/ChatInterface.jsx
new file mode 100644
index 0000000000..ee26d0a02a
--- /dev/null
+++ b/RAGChatbot/ui/src/components/ChatInterface.jsx
@@ -0,0 +1,184 @@
+import { useState, useRef, useEffect } from 'react'
+import { Send, Bot, User, AlertCircle } from 'lucide-react'
+import { queryDocument } from '../services/api'
+
+export default function ChatInterface({ documentUploaded, documentName }) {
+  const [messages, setMessages] = useState([])
+  const [input, setInput] = useState('')
+  const [isLoading, setIsLoading] = useState(false)
+  const messagesEndRef = useRef(null)
+
+  const scrollToBottom = () => {
+    messagesEndRef.current?.scrollIntoView({ behavior: "smooth" })
+  }
+
+  useEffect(() => {
+    scrollToBottom()
+  }, [messages])
+
+  useEffect(() => {
+    // Reset messages when document changes
+    if (documentUploaded) {
+      setMessages([
+        {
+          type: 'bot',
+          content: `Document "${documentName}" has been uploaded successfully! You can now ask me questions about it.`,
+          timestamp: new Date()
+        }
+      ])
+    } else {
+      setMessages([])
+    }
+  }, [documentUploaded, documentName])
+
+  const handleSubmit = async (e) => {
+    e.preventDefault()
+    
+    if (!input.trim() || !documentUploaded) return
+
+    const userMessage = {
+      type: 'user',
+      content: input,
+      timestamp: new Date()
+    }
+
+    setMessages(prev => [...prev, userMessage])
+    setInput('')
+    setIsLoading(true)
+
+    try {
+      const response = await queryDocument(input)
+      
+      const botMessage = {
+        type: 'bot',
+        content: response.answer,
+        timestamp: new Date()
+      }
+      
+      setMessages(prev => [...prev, botMessage])
+    } catch (error) {
+      const errorMessage = {
+        type: 'error',
+        content: error.message || 'Failed to get response. Please try again.',
+        timestamp: new Date()
+      }
+      setMessages(prev => [...prev, errorMessage])
+    } finally {
+      setIsLoading(false)
+    }
+  }
+
+  return (
+    <div className="bg-white rounded-lg shadow-sm border border-gray-200 flex flex-col h-[600px]">
+      {/* Chat Header */}
+      <div className="border-b border-gray-200 p-4">
+        <h2 className="text-lg font-semibold text-gray-800 flex items-center space-x-2">
+          <Bot className="w-5 h-5 text-purple-500" />
+          <span>Chat Assistant</span>
+        </h2>
+        <p className="text-sm text-gray-500 mt-1">
+          {documentUploaded 
+            ? 'Ask questions about your document' 
+            : 'Upload a document to start chatting'}
+        </p>
+      </div>
+
+      {/* Messages Container */}
+      <div className="flex-1 overflow-y-auto p-4 space-y-4 scrollbar-hide">
+        {!documentUploaded && messages.length === 0 && (
+          <div className="h-full flex items-center justify-center">
+            <div className="text-center space-y-3">
+              <Bot className="w-16 h-16 text-gray-300 mx-auto" />
+              <p className="text-gray-500">Upload a PDF document to start chatting</p>
+            </div>
+          </div>
+        )}
+
+        {messages.map((message, index) => (
+          <div
+            key={index}
+            className={`flex items-start space-x-3 ${
+              message.type === 'user' ? 'flex-row-reverse space-x-reverse' : ''
+            }`}
+          >
+            <div className={`flex-shrink-0 w-8 h-8 rounded-full flex items-center justify-center ${
+              message.type === 'user' 
+                ? 'bg-gradient-to-br from-blue-500 to-purple-600' 
+                : message.type === 'error'
+                ? 'bg-red-500'
+                : 'bg-gray-200'
+            }`}>
+              {message.type === 'user' ? (
+                <User className="w-5 h-5 text-white" />
+              ) : message.type === 'error' ? (
+                <AlertCircle className="w-5 h-5 text-white" />
+              ) : (
+                <Bot className="w-5 h-5 text-gray-600" />
+              )}
+            </div>
+
+            <div className={`flex-1 max-w-[80%] ${
+              message.type === 'user' ? 'items-end' : 'items-start'
+            }`}>
+              <div className={`rounded-lg p-3 ${
+                message.type === 'user'
+                  ? 'bg-gradient-to-br from-blue-500 to-purple-600 text-white'
+                  : message.type === 'error'
+                  ? 'bg-red-50 border border-red-200 text-red-700'
+                  : 'bg-gray-100 text-gray-800'
+              }`}>
+                <p className="text-sm whitespace-pre-wrap">{message.content}</p>
+              </div>
+              <p className="text-xs text-gray-400 mt-1">
+                {message.timestamp.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })}
+              </p>
+            </div>
+          </div>
+        ))}
+
+        {isLoading && (
+          <div className="flex items-start space-x-3">
+            <div className="flex-shrink-0 w-8 h-8 rounded-full bg-gray-200 flex items-center justify-center">
+              <Bot className="w-5 h-5 text-gray-600" />
+            </div>
+            <div className="bg-gray-100 rounded-lg p-3">
+              <div className="flex space-x-2">
+                <div className="w-2 h-2 bg-gray-400 rounded-full animate-bounce" style={{ animationDelay: '0ms' }}></div>
+                <div className="w-2 h-2 bg-gray-400 rounded-full animate-bounce" style={{ animationDelay: '150ms' }}></div>
+                <div className="w-2 h-2 bg-gray-400 rounded-full animate-bounce" style={{ animationDelay: '300ms' }}></div>
+              </div>
+            </div>
+          </div>
+        )}
+
+        <div ref={messagesEndRef} />
+      </div>
+
+      {/* Input Form */}
+      <div className="border-t border-gray-200 p-4">
+        <form onSubmit={handleSubmit} className="flex space-x-2">
+          <input
+            type="text"
+            value={input}
+            onChange={(e) => setInput(e.target.value)}
+            placeholder={documentUploaded ? "Ask a question..." : "Upload a document first..."}
+            disabled={!documentUploaded || isLoading}
+            className="flex-1 px-4 py-2 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 disabled:bg-gray-100 disabled:cursor-not-allowed"
+          />
+          <button
+            type="submit"
+            disabled={!documentUploaded || !input.trim() || isLoading}
+            className="px-6 py-2 bg-gradient-to-r from-blue-500 to-purple-600 text-white rounded-lg hover:from-blue-600 hover:to-purple-700 disabled:opacity-50 disabled:cursor-not-allowed transition-all font-medium shadow-sm flex items-center space-x-2"
+          >
+            <Send className="w-4 h-4" />
+            <span>Send</span>
+          </button>
+        </form>
+        <p className="text-xs text-gray-500 mt-2">
+          Press Enter to send • The AI will answer based on your uploaded document
+        </p>
+      </div>
+    </div>
+  )
+}
+
diff --git a/RAGChatbot/ui/src/components/Header.jsx b/RAGChatbot/ui/src/components/Header.jsx
new file mode 100644
index 0000000000..0cfd2b11ce
--- /dev/null
+++ b/RAGChatbot/ui/src/components/Header.jsx
@@ -0,0 +1,28 @@
+import { MessageSquare, FileText } from 'lucide-react'
+
+export default function Header() {
+  return (
+    <header className="bg-white border-b border-gray-200 shadow-sm">
+      <div className="container mx-auto px-4 py-4 max-w-6xl">
+        <div className="flex items-center justify-between">
+          <div className="flex items-center space-x-3">
+            <div className="bg-gradient-to-br from-blue-500 to-purple-600 p-2 rounded-lg">
+              <MessageSquare className="w-6 h-6 text-white" />
+            </div>
+            <div>
+              <h1 className="text-2xl font-bold bg-gradient-to-r from-blue-600 to-purple-600 bg-clip-text text-transparent">
+                RAG Chatbot
+              </h1>
+              <p className="text-sm text-gray-500">Ask questions about your documents</p>
+            </div>
+          </div>
+          
+          <div className="flex items-center space-x-2 text-sm text-gray-500">
+            <FileText className="w-4 h-4" />
+          </div>
+        </div>
+      </div>
+    </header>
+  )
+}
+
diff --git a/RAGChatbot/ui/src/components/PDFUploader.jsx b/RAGChatbot/ui/src/components/PDFUploader.jsx
new file mode 100644
index 0000000000..e6a549f42e
--- /dev/null
+++ b/RAGChatbot/ui/src/components/PDFUploader.jsx
@@ -0,0 +1,155 @@
+import { useState, useRef } from 'react'
+import { Upload, FileText, CheckCircle, AlertCircle } from 'lucide-react'
+import { uploadPDF } from '../services/api'
+
+export default function PDFUploader({ onUploadSuccess, onUploadStart, onUploadProgress, documentUploaded }) {
+  const [dragActive, setDragActive] = useState(false)
+  const [error, setError] = useState('')
+  const fileInputRef = useRef(null)
+
+  const handleDrag = (e) => {
+    e.preventDefault()
+    e.stopPropagation()
+    if (e.type === "dragenter" || e.type === "dragover") {
+      setDragActive(true)
+    } else if (e.type === "dragleave") {
+      setDragActive(false)
+    }
+  }
+
+  const handleDrop = async (e) => {
+    e.preventDefault()
+    e.stopPropagation()
+    setDragActive(false)
+    
+    if (e.dataTransfer.files && e.dataTransfer.files[0]) {
+      await handleFile(e.dataTransfer.files[0])
+    }
+  }
+
+  const handleChange = async (e) => {
+    e.preventDefault()
+    if (e.target.files && e.target.files[0]) {
+      await handleFile(e.target.files[0])
+    }
+  }
+
+  const handleFile = async (file) => {
+    setError('')
+    
+    // Validate file type
+    if (!file.name.endsWith('.pdf')) {
+      setError('Please upload a PDF file')
+      return
+    }
+
+    // Validate file size (50MB)
+    if (file.size > 50 * 1024 * 1024) {
+      setError('File size must be less than 50MB')
+      return
+    }
+
+    onUploadStart()
+    
+    try {
+      // Simulate progress
+      onUploadProgress(30)
+      
+      const result = await uploadPDF(file)
+      
+      onUploadProgress(90)
+      
+      onUploadSuccess(file.name, result.num_chunks)
+      setError('')
+    } catch (err) {
+      setError(err.message || 'Failed to upload file')
+      onUploadProgress(0)
+    }
+  }
+
+  const handleButtonClick = () => {
+    fileInputRef.current?.click()
+  }
+
+  return (
+    <div className="bg-white rounded-lg shadow-sm border border-gray-200 p-6">
+      <div className="mb-4">
+        <h2 className="text-lg font-semibold text-gray-800 flex items-center space-x-2">
+          <FileText className="w-5 h-5 text-blue-500" />
+          <span>Upload Document</span>
+        </h2>
+        <p className="text-sm text-gray-500 mt-1">
+          Upload a PDF to start asking questions
+        </p>
+      </div>
+
+      <div
+        className={`relative border-2 border-dashed rounded-lg p-8 text-center transition-all ${
+          dragActive 
+            ? 'border-blue-500 bg-blue-50' 
+            : documentUploaded
+            ? 'border-green-300 bg-green-50'
+            : 'border-gray-300 hover:border-gray-400'
+        }`}
+        onDragEnter={handleDrag}
+        onDragLeave={handleDrag}
+        onDragOver={handleDrag}
+        onDrop={handleDrop}
+      >
+        <input
+          ref={fileInputRef}
+          type="file"
+          className="hidden"
+          accept=".pdf"
+          onChange={handleChange}
+        />
+
+        {documentUploaded ? (
+          <div className="space-y-3">
+            <CheckCircle className="w-12 h-12 text-green-500 mx-auto" />
+            <p className="text-green-700 font-medium">Document uploaded successfully!</p>
+            <button
+              onClick={handleButtonClick}
+              className="mt-2 px-4 py-2 text-sm text-blue-600 hover:bg-blue-50 rounded-lg transition-colors"
+            >
+              Upload another document
+            </button>
+          </div>
+        ) : (
+          <div className="space-y-3">
+            <Upload className="w-12 h-12 text-gray-400 mx-auto" />
+            <div>
+              <p className="text-gray-600 font-medium">Drop your PDF here</p>
+              <p className="text-sm text-gray-500">or</p>
+            </div>
+            <button
+              onClick={handleButtonClick}
+              className="px-6 py-2 bg-gradient-to-r from-blue-500 to-purple-600 text-white rounded-lg hover:from-blue-600 hover:to-purple-700 transition-all font-medium shadow-sm"
+            >
+              Browse Files
+            </button>
+            <p className="text-xs text-gray-500">PDF files only, max 50MB</p>
+          </div>
+        )}
+      </div>
+
+      {error && (
+        <div className="mt-4 p-3 bg-red-50 border border-red-200 rounded-lg flex items-start space-x-2">
+          <AlertCircle className="w-5 h-5 text-red-500 flex-shrink-0 mt-0.5" />
+          <p className="text-sm text-red-700">{error}</p>
+        </div>
+      )}
+
+      <div className="mt-6 space-y-2">
+        <h3 className="text-sm font-medium text-gray-700">Instructions:</h3>
+        <ul className="text-xs text-gray-600 space-y-1 list-disc list-inside">
+          <li>Upload a PDF document (max 50MB)</li>
+          <li>Wait for processing to complete</li>
+          <li>Start asking questions in the chat</li>
+          <li>Get intelligent answers based on your document</li>
+        </ul>
+      </div>
+    </div>
+  )
+}
+
diff --git a/RAGChatbot/ui/src/components/StatusBar.jsx b/RAGChatbot/ui/src/components/StatusBar.jsx
new file mode 100644
index 0000000000..2957d014c5
--- /dev/null
+++ b/RAGChatbot/ui/src/components/StatusBar.jsx
@@ -0,0 +1,54 @@
+import { CheckCircle, AlertCircle, Loader, Trash2 } from 'lucide-react'
+
+export default function StatusBar({ documentUploaded, documentName, isUploading, uploadProgress, onReset }) {
+  return (
+    <div className="bg-white rounded-lg shadow-sm border border-gray-200 p-4">
+      <div className="flex items-center justify-between">
+        <div className="flex items-center space-x-3">
+          {isUploading && (
+            <>
+              <Loader className="w-5 h-5 text-blue-500 animate-spin" />
+              <div className="flex-1">
+                <p className="text-sm font-medium text-gray-700">Uploading and processing...</p>
+                <div className="w-64 bg-gray-200 rounded-full h-2 mt-1">
+                  <div 
+                    className="bg-blue-500 h-2 rounded-full transition-all duration-300"
+                    style={{ width: `${uploadProgress}%` }}
+                  ></div>
+                </div>
+              </div>
+            </>
+          )}
+          
+          {!isUploading && documentUploaded && (
+            <>
+              <CheckCircle className="w-5 h-5 text-green-500" />
+              <div>
+                <p className="text-sm font-medium text-gray-700">Document Ready</p>
+                <p className="text-xs text-gray-500">{documentName}</p>
+              </div>
+            </>
+          )}
+          
+          {!isUploading && !documentUploaded && (
+            <>
+              <AlertCircle className="w-5 h-5 text-amber-500" />
+              <p className="text-sm font-medium text-gray-700">No document uploaded</p>
+            </>
+          )}
+        </div>
+
+        {documentUploaded && !isUploading && (
+          <button
+            onClick={onReset}
+            className="flex items-center space-x-2 px-3 py-1.5 text-sm text-red-600 hover:bg-red-50 rounded-lg transition-colors"
+          >
+            <Trash2 className="w-4 h-4" />
+            <span>Clear</span>
+          </button>
+        )}
+      </div>
+    </div>
+  )
+}
+
diff --git a/RAGChatbot/ui/src/index.css b/RAGChatbot/ui/src/index.css
new file mode 100644
index 0000000000..a523f51e54
--- /dev/null
+++ b/RAGChatbot/ui/src/index.css
@@ -0,0 +1,34 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+* {
+  margin: 0;
+  padding: 0;
+  box-sizing: border-box;
+}
+
+body {
+  font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
+    'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
+    sans-serif;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+}
+
+code {
+  font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
+    monospace;
+}
+
+@layer utilities {
+  .scrollbar-hide::-webkit-scrollbar {
+    display: none;
+  }
+  
+  .scrollbar-hide {
+    -ms-overflow-style: none;
+    scrollbar-width: none;
+  }
+}
+
diff --git a/RAGChatbot/ui/src/main.jsx b/RAGChatbot/ui/src/main.jsx
new file mode 100644
index 0000000000..299bc52310
--- /dev/null
+++ b/RAGChatbot/ui/src/main.jsx
@@ -0,0 +1,11 @@
+import React from 'react'
+import ReactDOM from 'react-dom/client'
+import App from './App.jsx'
+import './index.css'
+
+ReactDOM.createRoot(document.getElementById('root')).render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>,
+)
+
diff --git a/RAGChatbot/ui/src/services/api.js b/RAGChatbot/ui/src/services/api.js
new file mode 100644
index 0000000000..87ffb0667d
--- /dev/null
+++ b/RAGChatbot/ui/src/services/api.js
@@ -0,0 +1,85 @@
+import axios from 'axios'
+
+// API base URL - uses Vite proxy in development (proxies to localhost:5000)
+const API_BASE_URL = import.meta.env.VITE_API_URL || '/api'
+
+const api = axios.create({
+  baseURL: API_BASE_URL,
+  headers: {
+    'Content-Type': 'application/json',
+  },
+})
+
+/**
+ * Upload a PDF file to the API
+ * @param {File} file - The PDF file to upload
+ * @returns {Promise} Response with upload status and chunk count
+ */
+export const uploadPDF = async (file) => {
+  const formData = new FormData()
+  formData.append('file', file)
+
+  try {
+    const response = await api.post('/upload-pdf', formData, {
+      headers: {
+        'Content-Type': 'multipart/form-data',
+      },
+    })
+    return response.data
+  } catch (error) {
+    console.error('Upload error:', error)
+    throw new Error(
+      error.response?.data?.detail || 'Failed to upload PDF. Please try again.'
+    )
+  }
+}
+
+/**
+ * Query the uploaded document
+ * @param {string} query - The question to ask
+ * @returns {Promise} Response with the answer
+ */
+export const queryDocument = async (query) => {
+  try {
+    const response = await api.post('/query', { query })
+    return response.data
+  } catch (error) {
+    console.error('Query error:', error)
+    throw new Error(
+      error.response?.data?.detail || 'Failed to get response. Please try again.'
+    )
+  }
+}
+
+/**
+ * Check API health
+ * @returns {Promise} Health status
+ */
+export const checkHealth = async () => {
+  try {
+    const response = await api.get('/health')
+    return response.data
+  } catch (error) {
+    console.error('Health check error:', error)
+    throw new Error('API is not available')
+  }
+}
+
+/**
+ * Delete the vector store
+ * @returns {Promise} Deletion status
+ */
+export const deleteVectorStore = async () => {
+  try {
+    const response = await api.delete('/vectorstore')
+    return response.data
+  } catch (error) {
+    console.error('Delete error:', error)
+    throw new Error(
+      error.response?.data?.detail || 'Failed to delete vector store'
+    )
+  }
+}
+
+export default api
+
diff --git a/RAGChatbot/ui/tailwind.config.js b/RAGChatbot/ui/tailwind.config.js
new file mode 100644
index 0000000000..037cbd7203
--- /dev/null
+++ b/RAGChatbot/ui/tailwind.config.js
@@ -0,0 +1,27 @@
+/** @type {import('tailwindcss').Config} */
+export default {
+  content: [
+    "./index.html",
+    "./src/**/*.{js,ts,jsx,tsx}",
+  ],
+  theme: {
+    extend: {
+      colors: {
+        primary: {
+          50: '#f0f9ff',
+          100: '#e0f2fe',
+          200: '#bae6fd',
+          300: '#7dd3fc',
+          400: '#38bdf8',
+          500: '#0ea5e9',
+          600: '#0284c7',
+          700: '#0369a1',
+          800: '#075985',
+          900: '#0c4a6e',
+        }
+      }
+    },
+  },
+  plugins: [],
+}
+
diff --git a/RAGChatbot/ui/vite.config.js b/RAGChatbot/ui/vite.config.js
new file mode 100644
index 0000000000..a20f408af9
--- /dev/null
+++ b/RAGChatbot/ui/vite.config.js
@@ -0,0 +1,18 @@
+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+
+export default defineConfig({
+  plugins: [react()],
+  server: {
+    host: true,
+    port: 3000,
+    proxy: {
+      '/api': {
+        target: 'http://backend:5001',
+        changeOrigin: true,
+        rewrite: (path) => path.replace(/^\/api/, '')
+      }
+    }
+  }
+})
+