Spaces:

afulara
/

PythonicRAG-FastAPI-React

Sleeping

App Files Files Community

afulara commited on Apr 15, 2025

Commit

5cd3431

0 Parent(s):

Initial files

Browse files

Files changed (44) hide show

.env +1 -0
backend/aimakerspace/__init__.py +0 -0
backend/aimakerspace/__pycache__/__init__.cpython-312.pyc +0 -0
backend/aimakerspace/__pycache__/__init__.cpython-313.pyc +0 -0
backend/aimakerspace/__pycache__/text_utils.cpython-312.pyc +0 -0
backend/aimakerspace/__pycache__/text_utils.cpython-313.pyc +0 -0
backend/aimakerspace/__pycache__/vectordatabase.cpython-312.pyc +0 -0
backend/aimakerspace/__pycache__/vectordatabase.cpython-313.pyc +0 -0
backend/aimakerspace/openai_utils/__init__.py +0 -0
backend/aimakerspace/openai_utils/__pycache__/__init__.cpython-312.pyc +0 -0
backend/aimakerspace/openai_utils/__pycache__/__init__.cpython-313.pyc +0 -0
backend/aimakerspace/openai_utils/__pycache__/chatmodel.cpython-312.pyc +0 -0
backend/aimakerspace/openai_utils/__pycache__/chatmodel.cpython-313.pyc +0 -0
backend/aimakerspace/openai_utils/__pycache__/embedding.cpython-312.pyc +0 -0
backend/aimakerspace/openai_utils/__pycache__/embedding.cpython-313.pyc +0 -0
backend/aimakerspace/openai_utils/__pycache__/prompts.cpython-312.pyc +0 -0
backend/aimakerspace/openai_utils/__pycache__/prompts.cpython-313.pyc +0 -0
backend/aimakerspace/openai_utils/chatmodel.py +45 -0
backend/aimakerspace/openai_utils/embedding.py +59 -0
backend/aimakerspace/openai_utils/prompts.py +78 -0
backend/aimakerspace/text_utils.py +136 -0
backend/aimakerspace/vectordatabase.py +81 -0
backend/main.py +211 -0
backend/requirements.txt +9 -0
frontend/.gitignore +23 -0
frontend/README.md +46 -0
frontend/package-lock.json +0 -0
frontend/package.json +44 -0
frontend/public/favicon.ico +0 -0
frontend/public/index.html +43 -0
frontend/public/logo192.png +0 -0
frontend/public/logo512.png +0 -0
frontend/public/manifest.json +25 -0
frontend/public/robots.txt +3 -0
frontend/src/App.css +132 -0
frontend/src/App.test.tsx +9 -0
frontend/src/App.tsx +177 -0
frontend/src/index.css +13 -0
frontend/src/index.tsx +19 -0
frontend/src/logo.svg +1 -0
frontend/src/react-app-env.d.ts +1 -0
frontend/src/reportWebVitals.ts +15 -0
frontend/src/setupTests.ts +5 -0
frontend/tsconfig.json +26 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ OPENAI_API_KEY=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

backend/aimakerspace/__init__.py ADDED Viewed

File without changes

backend/aimakerspace/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (214 Bytes). View file

backend/aimakerspace/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (214 Bytes). View file

backend/aimakerspace/__pycache__/text_utils.cpython-312.pyc ADDED Viewed

Binary file (8.42 kB). View file

backend/aimakerspace/__pycache__/text_utils.cpython-313.pyc ADDED Viewed

Binary file (8.59 kB). View file

backend/aimakerspace/__pycache__/vectordatabase.cpython-312.pyc ADDED Viewed

Binary file (4.98 kB). View file

backend/aimakerspace/__pycache__/vectordatabase.cpython-313.pyc ADDED Viewed

Binary file (5.05 kB). View file

backend/aimakerspace/openai_utils/__init__.py ADDED Viewed

File without changes

backend/aimakerspace/openai_utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (227 Bytes). View file

backend/aimakerspace/openai_utils/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (227 Bytes). View file

backend/aimakerspace/openai_utils/__pycache__/chatmodel.cpython-312.pyc ADDED Viewed

Binary file (2.45 kB). View file

backend/aimakerspace/openai_utils/__pycache__/chatmodel.cpython-313.pyc ADDED Viewed

Binary file (2.55 kB). View file

backend/aimakerspace/openai_utils/__pycache__/embedding.cpython-312.pyc ADDED Viewed

Binary file (3.69 kB). View file

backend/aimakerspace/openai_utils/__pycache__/embedding.cpython-313.pyc ADDED Viewed

Binary file (3.75 kB). View file

backend/aimakerspace/openai_utils/__pycache__/prompts.cpython-312.pyc ADDED Viewed

Binary file (4.67 kB). View file

backend/aimakerspace/openai_utils/__pycache__/prompts.cpython-313.pyc ADDED Viewed

Binary file (4.71 kB). View file

backend/aimakerspace/openai_utils/chatmodel.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from openai import OpenAI, AsyncOpenAI
+from dotenv import load_dotenv
+import os
+load_dotenv()
+class ChatOpenAI:
+    def __init__(self, model_name: str = "gpt-4o-mini"):
+        self.model_name = model_name
+        self.openai_api_key = os.getenv("OPENAI_API_KEY")
+        if self.openai_api_key is None:
+            raise ValueError("OPENAI_API_KEY is not set")
+    def run(self, messages, text_only: bool = True, **kwargs):
+        if not isinstance(messages, list):
+            raise ValueError("messages must be a list")
+        client = OpenAI()
+        response = client.chat.completions.create(
+            model=self.model_name, messages=messages, **kwargs
+        )
+        if text_only:
+            return response.choices[0].message.content
+        return response
+    async def astream(self, messages, **kwargs):
+        if not isinstance(messages, list):
+            raise ValueError("messages must be a list")
+        client = AsyncOpenAI()
+        stream = await client.chat.completions.create(
+            model=self.model_name,
+            messages=messages,
+            stream=True,
+            **kwargs
+        )
+        async for chunk in stream:
+            content = chunk.choices[0].delta.content
+            if content is not None:
+                yield content

backend/aimakerspace/openai_utils/embedding.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from dotenv import load_dotenv
+from openai import AsyncOpenAI, OpenAI
+import openai
+from typing import List
+import os
+import asyncio
+class EmbeddingModel:
+    def __init__(self, embeddings_model_name: str = "text-embedding-3-small"):
+        load_dotenv()
+        self.openai_api_key = os.getenv("OPENAI_API_KEY")
+        self.async_client = AsyncOpenAI()
+        self.client = OpenAI()
+        if self.openai_api_key is None:
+            raise ValueError(
+                "OPENAI_API_KEY environment variable is not set. Please set it to your OpenAI API key."
+            )
+        openai.api_key = self.openai_api_key
+        self.embeddings_model_name = embeddings_model_name
+    async def async_get_embeddings(self, list_of_text: List[str]) -> List[List[float]]:
+        embedding_response = await self.async_client.embeddings.create(
+            input=list_of_text, model=self.embeddings_model_name
+        )
+        return [embeddings.embedding for embeddings in embedding_response.data]
+    async def async_get_embedding(self, text: str) -> List[float]:
+        embedding = await self.async_client.embeddings.create(
+            input=text, model=self.embeddings_model_name
+        )
+        return embedding.data[0].embedding
+    def get_embeddings(self, list_of_text: List[str]) -> List[List[float]]:
+        embedding_response = self.client.embeddings.create(
+            input=list_of_text, model=self.embeddings_model_name
+        )
+        return [embeddings.embedding for embeddings in embedding_response.data]
+    def get_embedding(self, text: str) -> List[float]:
+        embedding = self.client.embeddings.create(
+            input=text, model=self.embeddings_model_name
+        )
+        return embedding.data[0].embedding
+if __name__ == "__main__":
+    embedding_model = EmbeddingModel()
+    print(asyncio.run(embedding_model.async_get_embedding("Hello, world!")))
+    print(
+        asyncio.run(
+            embedding_model.async_get_embeddings(["Hello, world!", "Goodbye, world!"])
+        )
+    )

backend/aimakerspace/openai_utils/prompts.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import re
+class BasePrompt:
+    def __init__(self, prompt):
+        """
+        Initializes the BasePrompt object with a prompt template.
+        :param prompt: A string that can contain placeholders within curly braces
+        """
+        self.prompt = prompt
+        self._pattern = re.compile(r"\{([^}]+)\}")
+    def format_prompt(self, **kwargs):
+        """
+        Formats the prompt string using the keyword arguments provided.
+        :param kwargs: The values to substitute into the prompt string
+        :return: The formatted prompt string
+        """
+        matches = self._pattern.findall(self.prompt)
+        return self.prompt.format(**{match: kwargs.get(match, "") for match in matches})
+    def get_input_variables(self):
+        """
+        Gets the list of input variable names from the prompt string.
+        :return: List of input variable names
+        """
+        return self._pattern.findall(self.prompt)
+class RolePrompt(BasePrompt):
+    def __init__(self, prompt, role: str):
+        """
+        Initializes the RolePrompt object with a prompt template and a role.
+        :param prompt: A string that can contain placeholders within curly braces
+        :param role: The role for the message ('system', 'user', or 'assistant')
+        """
+        super().__init__(prompt)
+        self.role = role
+    def create_message(self, format=True, **kwargs):
+        """
+        Creates a message dictionary with a role and a formatted message.
+        :param kwargs: The values to substitute into the prompt string
+        :return: Dictionary containing the role and the formatted message
+        """
+        if format:
+            return {"role": self.role, "content": self.format_prompt(**kwargs)}
+        return {"role": self.role, "content": self.prompt}
+class SystemRolePrompt(RolePrompt):
+    def __init__(self, prompt: str):
+        super().__init__(prompt, "system")
+class UserRolePrompt(RolePrompt):
+    def __init__(self, prompt: str):
+        super().__init__(prompt, "user")
+class AssistantRolePrompt(RolePrompt):
+    def __init__(self, prompt: str):
+        super().__init__(prompt, "assistant")
+if __name__ == "__main__":
+    prompt = BasePrompt("Hello {name}, you are {age} years old")
+    print(prompt.format_prompt(name="John", age=30))
+    prompt = SystemRolePrompt("Hello {name}, you are {age} years old")
+    print(prompt.create_message(name="John", age=30))
+    print(prompt.get_input_variables())

backend/aimakerspace/text_utils.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import os
+from typing import List
+import PyPDF2
+class TextFileLoader:
+    def __init__(self, path: str, encoding: str = "utf-8"):
+        self.documents = []
+        self.path = path
+        self.encoding = encoding
+    def load(self):
+        if os.path.isdir(self.path):
+            self.load_directory()
+        elif os.path.isfile(self.path) and self.path.endswith(".txt"):
+            self.load_file()
+        else:
+            raise ValueError(
+                "Provided path is neither a valid directory nor a .txt file."
+            )
+    def load_file(self):
+        with open(self.path, "r", encoding=self.encoding) as f:
+            self.documents.append(f.read())
+    def load_directory(self):
+        for root, _, files in os.walk(self.path):
+            for file in files:
+                if file.endswith(".txt"):
+                    with open(
+                        os.path.join(root, file), "r", encoding=self.encoding
+                    ) as f:
+                        self.documents.append(f.read())
+    def load_documents(self):
+        self.load()
+        return self.documents
+class CharacterTextSplitter:
+    def __init__(
+        self,
+        chunk_size: int = 1000,
+        chunk_overlap: int = 200,
+    ):
+        assert (
+            chunk_size > chunk_overlap
+        ), "Chunk size must be greater than chunk overlap"
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+    def split(self, text: str) -> List[str]:
+        chunks = []
+        for i in range(0, len(text), self.chunk_size - self.chunk_overlap):
+            chunks.append(text[i : i + self.chunk_size])
+        return chunks
+    def split_texts(self, texts: List[str]) -> List[str]:
+        chunks = []
+        for text in texts:
+            chunks.extend(self.split(text))
+        return chunks
+class PDFLoader:
+    def __init__(self, path: str):
+        self.documents = []
+        self.path = path
+        print(f"PDFLoader initialized with path: {self.path}")
+    def load(self):
+        print(f"Loading PDF from path: {self.path}")
+        print(f"Path exists: {os.path.exists(self.path)}")
+        print(f"Is file: {os.path.isfile(self.path)}")
+        print(f"Is directory: {os.path.isdir(self.path)}")
+        print(f"File permissions: {oct(os.stat(self.path).st_mode)[-3:]}")
+        try:
+            # Try to open the file first to verify access
+            with open(self.path, 'rb') as test_file:
+                pass
+            # If we can open it, proceed with loading
+            self.load_file()
+        except IOError as e:
+            raise ValueError(f"Cannot access file at '{self.path}': {str(e)}")
+        except Exception as e:
+            raise ValueError(f"Error processing file at '{self.path}': {str(e)}")
+    def load_file(self):
+        with open(self.path, 'rb') as file:
+            # Create PDF reader object
+            pdf_reader = PyPDF2.PdfReader(file)
+            # Extract text from each page
+            text = ""
+            for page in pdf_reader.pages:
+                text += page.extract_text() + "\n"
+            self.documents.append(text)
+    def load_directory(self):
+        for root, _, files in os.walk(self.path):
+            for file in files:
+                if file.lower().endswith('.pdf'):
+                    file_path = os.path.join(root, file)
+                    with open(file_path, 'rb') as f:
+                        pdf_reader = PyPDF2.PdfReader(f)
+                        # Extract text from each page
+                        text = ""
+                        for page in pdf_reader.pages:
+                            text += page.extract_text() + "\n"
+                        self.documents.append(text)
+    def load_documents(self):
+        self.load()
+        return self.documents
+if __name__ == "__main__":
+    loader = TextFileLoader("data/KingLear.txt")
+    loader.load()
+    splitter = CharacterTextSplitter()
+    chunks = splitter.split_texts(loader.documents)
+    print(len(chunks))
+    print(chunks[0])
+    print("--------")
+    print(chunks[1])
+    print("--------")
+    print(chunks[-2])
+    print("--------")
+    print(chunks[-1])

backend/aimakerspace/vectordatabase.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import numpy as np
+from collections import defaultdict
+from typing import List, Tuple, Callable
+from aimakerspace.openai_utils.embedding import EmbeddingModel
+import asyncio
+def cosine_similarity(vector_a: np.array, vector_b: np.array) -> float:
+    """Computes the cosine similarity between two vectors."""
+    dot_product = np.dot(vector_a, vector_b)
+    norm_a = np.linalg.norm(vector_a)
+    norm_b = np.linalg.norm(vector_b)
+    return dot_product / (norm_a * norm_b)
+class VectorDatabase:
+    def __init__(self, embedding_model: EmbeddingModel = None):
+        self.vectors = defaultdict(np.array)
+        self.embedding_model = embedding_model or EmbeddingModel()
+    def insert(self, key: str, vector: np.array) -> None:
+        self.vectors[key] = vector
+    def search(
+        self,
+        query_vector: np.array,
+        k: int,
+        distance_measure: Callable = cosine_similarity,
+    ) -> List[Tuple[str, float]]:
+        scores = [
+            (key, distance_measure(query_vector, vector))
+            for key, vector in self.vectors.items()
+        ]
+        return sorted(scores, key=lambda x: x[1], reverse=True)[:k]
+    def search_by_text(
+        self,
+        query_text: str,
+        k: int,
+        distance_measure: Callable = cosine_similarity,
+        return_as_text: bool = False,
+    ) -> List[Tuple[str, float]]:
+        query_vector = self.embedding_model.get_embedding(query_text)
+        results = self.search(query_vector, k, distance_measure)
+        return [result[0] for result in results] if return_as_text else results
+    def retrieve_from_key(self, key: str) -> np.array:
+        return self.vectors.get(key, None)
+    async def abuild_from_list(self, list_of_text: List[str]) -> "VectorDatabase":
+        embeddings = await self.embedding_model.async_get_embeddings(list_of_text)
+        for text, embedding in zip(list_of_text, embeddings):
+            self.insert(text, np.array(embedding))
+        return self
+if __name__ == "__main__":
+    list_of_text = [
+        "I like to eat broccoli and bananas.",
+        "I ate a banana and spinach smoothie for breakfast.",
+        "Chinchillas and kittens are cute.",
+        "My sister adopted a kitten yesterday.",
+        "Look at this cute hamster munching on a piece of broccoli.",
+    ]
+    vector_db = VectorDatabase()
+    vector_db = asyncio.run(vector_db.abuild_from_list(list_of_text))
+    k = 2
+    searched_vector = vector_db.search_by_text("I think fruit is awesome!", k=k)
+    print(f"Closest {k} vector(s):", searched_vector)
+    retrieved_vector = vector_db.retrieve_from_key(
+        "I like to eat broccoli and bananas."
+    )
+    print("Retrieved vector:", retrieved_vector)
+    relevant_texts = vector_db.search_by_text(
+        "I think fruit is awesome!", k=k, return_as_text=True
+    )
+    print(f"Closest {k} text(s):", relevant_texts)

backend/main.py ADDED Viewed

	@@ -0,0 +1,211 @@

+from fastapi import FastAPI, UploadFile, File, HTTPException, WebSocket
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+from typing import List, Optional, Dict, AsyncGenerator
+import os
+from dotenv import load_dotenv
+from aimakerspace.vectordatabase import VectorDatabase
+from aimakerspace.openai_utils.embedding import EmbeddingModel
+from aimakerspace.text_utils import CharacterTextSplitter, PDFLoader
+from aimakerspace.openai_utils.prompts import (
+    UserRolePrompt,
+    SystemRolePrompt,
+    AssistantRolePrompt,
+)
+from aimakerspace.openai_utils.chatmodel import ChatOpenAI
+import asyncio
+import tempfile
+import shutil
+import json
+from uuid import uuid4
+# Load environment variables
+load_dotenv()
+app = FastAPI()
+# Configure CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["http://localhost:3000"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Initialize components
+text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+chat_openai = ChatOpenAI()
+# Define prompts
+system_template = """\
+You are a helpful assistant that provides concise, direct answers based on the provided context.
+If the answer cannot be found in the context, simply say "I don't know" or "The information is not available in the provided context."
+Keep your answers brief and to the point."""
+system_role_prompt = SystemRolePrompt(system_template)
+user_prompt_template = """\
+Context:
+{context}
+Question:
+{question}
+Answer the question concisely based on the context above."""
+user_role_prompt = UserRolePrompt(user_prompt_template)
+# Session management
+sessions: Dict[str, Dict] = {}
+class Query(BaseModel):
+    text: str
+    k: int = 4
+class DocumentResponse(BaseModel):
+    text: str
+    type: str  # 'answer' or 'context'
+    score: Optional[float] = None
+class RetrievalAugmentedQAPipeline:
+    def __init__(self, llm: ChatOpenAI, vector_db_retriever: VectorDatabase) -> None:
+        self.llm = llm
+        self.vector_db_retriever = vector_db_retriever
+    async def arun_pipeline(self, user_query: str, k: int = 4) -> AsyncGenerator[str, None]:
+        # Get top k most relevant chunks
+        context_list = self.vector_db_retriever.search_by_text(user_query, k=k)
+        # Format context
+        context_prompt = ""
+        for context in context_list:
+            context_prompt += context[0] + "\n"
+        # Format prompts
+        formatted_system_prompt = system_role_prompt.create_message()
+        formatted_user_prompt = user_role_prompt.create_message(
+            question=user_query,
+            context=context_prompt
+        )
+        # Stream only the LLM response
+        async for chunk in self.llm.astream([formatted_system_prompt, formatted_user_prompt]):
+            yield json.dumps({
+                "type": "token",
+                "text": chunk
+            })
+        # Send context information once at the end
+        yield json.dumps({
+            "type": "context",
+            "context": [{"text": text, "score": score} for text, score in context_list]
+        })
+def process_file(file_path: str, file_name: str):
+    if file_name.lower().endswith('.pdf'):
+        loader = PDFLoader(file_path)
+    else:
+        raise HTTPException(status_code=400, detail="Only PDF files are supported")
+    documents = loader.load_documents()
+    texts = text_splitter.split_texts(documents)
+    return texts
+@app.post("/upload")
+async def upload_document(file: UploadFile = File(...)):
+    if not file.filename.endswith('.pdf'):
+        raise HTTPException(status_code=400, detail="Only PDF files are supported")
+    try:
+        # Create a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
+            content = await file.read()
+            temp_file.write(content)
+            temp_path = temp_file.name
+        # Process the file
+        texts = process_file(temp_path, file.filename)
+        # Create a new session
+        session_id = str(uuid4())
+        vector_db = VectorDatabase()
+        await vector_db.abuild_from_list(texts)
+        # Store session data
+        sessions[session_id] = {
+            "vector_db": vector_db,
+            "texts": texts
+        }
+        # Clean up
+        os.unlink(temp_path)
+        return {
+            "session_id": session_id,
+            "message": f"Document processed successfully. Added {len(texts)} chunks to the database."
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/query/{session_id}")
+async def query_documents(session_id: str, query: Query):
+    if session_id not in sessions:
+        raise HTTPException(status_code=404, detail="Session not found")
+    try:
+        session = sessions[session_id]
+        vector_db = session["vector_db"]
+        # Initialize RAG pipeline
+        rag_pipeline = RetrievalAugmentedQAPipeline(
+            llm=chat_openai,
+            vector_db_retriever=vector_db
+        )
+        # Create streaming response
+        async def generate():
+            async for chunk in rag_pipeline.arun_pipeline(query.text, query.k):
+                yield f"data: {chunk}\n\n"
+        return StreamingResponse(
+            generate(),
+            media_type="text/event-stream"
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.websocket("/ws/{session_id}")
+async def websocket_endpoint(websocket: WebSocket, session_id: str):
+    await websocket.accept()
+    if session_id not in sessions:
+        await websocket.close(code=1008, reason="Session not found")
+        return
+    try:
+        session = sessions[session_id]
+        vector_db = session["vector_db"]
+        while True:
+            data = await websocket.receive_text()
+            query = json.loads(data)
+            # Initialize RAG pipeline
+            rag_pipeline = RetrievalAugmentedQAPipeline(
+                llm=chat_openai,
+                vector_db_retriever=vector_db
+            )
+            # Stream response
+            async for chunk in rag_pipeline.arun_pipeline(query["text"], query.get("k", 4)):
+                await websocket.send_text(json.dumps({
+                    "type": "token" if isinstance(chunk, str) else "context",
+                    "text": chunk if isinstance(chunk, str) else chunk
+                }))
+    except Exception as e:
+        await websocket.close(code=1011, reason=str(e))
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=9000)

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi==0.109.2
+uvicorn==0.27.1
+python-multipart==0.0.9
+pydantic==2.10.1
+openai==1.59.9
+numpy==2.2.2
+pypdf2==3.0.1
+python-jose==3.3.0
+python-dotenv==1.0.1

frontend/.gitignore ADDED Viewed

	@@ -0,0 +1,23 @@

+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+# dependencies
+/node_modules
+/.pnp
+.pnp.js
+# testing
+/coverage
+# production
+/build
+# misc
+.DS_Store
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*

frontend/README.md ADDED Viewed

	@@ -0,0 +1,46 @@

+# Getting Started with Create React App
+This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).
+## Available Scripts
+In the project directory, you can run:
+### `npm start`
+Runs the app in the development mode.\
+Open [http://localhost:3000](http://localhost:3000) to view it in the browser.
+The page will reload if you make edits.\
+You will also see any lint errors in the console.
+### `npm test`
+Launches the test runner in the interactive watch mode.\
+See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
+### `npm run build`
+Builds the app for production to the `build` folder.\
+It correctly bundles React in production mode and optimizes the build for the best performance.
+The build is minified and the filenames include the hashes.\
+Your app is ready to be deployed!
+See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
+### `npm run eject`
+**Note: this is a one-way operation. Once you `eject`, you can’t go back!**
+If you aren’t satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
+Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you’re on your own.
+You don’t have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn’t feel obligated to use this feature. However we understand that this tool wouldn’t be useful if you couldn’t customize it when you are ready for it.
+## Learn More
+You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
+To learn React, check out the [React documentation](https://reactjs.org/).

frontend/package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

frontend/package.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "name": "frontend",
+  "version": "0.1.0",
+  "private": true,
+  "dependencies": {
+    "@testing-library/dom": "^10.4.0",
+    "@testing-library/jest-dom": "^6.6.3",
+    "@testing-library/react": "^16.3.0",
+    "@testing-library/user-event": "^13.5.0",
+    "@types/jest": "^27.5.2",
+    "@types/node": "^16.18.126",
+    "@types/react": "^19.1.2",
+    "@types/react-dom": "^19.1.2",
+    "react": "^19.1.0",
+    "react-dom": "^19.1.0",
+    "react-scripts": "5.0.1",
+    "typescript": "^4.9.5",
+    "web-vitals": "^2.1.4"
+  },
+  "scripts": {
+    "start": "react-scripts start",
+    "build": "react-scripts build",
+    "test": "react-scripts test",
+    "eject": "react-scripts eject"
+  },
+  "eslintConfig": {
+    "extends": [
+      "react-app",
+      "react-app/jest"
+    ]
+  },
+  "browserslist": {
+    "production": [
+      ">0.2%",
+      "not dead",
+      "not op_mini all"
+    ],
+    "development": [
+      "last 1 chrome version",
+      "last 1 firefox version",
+      "last 1 safari version"
+    ]
+  }
+}

frontend/public/favicon.ico ADDED Viewed

frontend/public/index.html ADDED Viewed

	@@ -0,0 +1,43 @@

+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8" />
+    <link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <meta name="theme-color" content="#000000" />
+    <meta
+      name="description"
+      content="Web site created using create-react-app"
+    />
+    <link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
+    <!--
+      manifest.json provides metadata used when your web app is installed on a
+      user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
+    -->
+    <link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
+    <!--
+      Notice the use of %PUBLIC_URL% in the tags above.
+      It will be replaced with the URL of the `public` folder during the build.
+      Only files inside the `public` folder can be referenced from the HTML.
+      Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
+      work correctly both with client-side routing and a non-root public URL.
+      Learn how to configure a non-root public URL by running `npm run build`.
+    -->
+    <title>React App</title>
+  </head>
+  <body>
+    <noscript>You need to enable JavaScript to run this app.</noscript>
+    <div id="root"></div>
+    <!--
+      This HTML file is a template.
+      If you open it directly in the browser, you will see an empty page.
+      You can add webfonts, meta tags, or analytics to this file.
+      The build step will place the bundled scripts into the <body> tag.
+      To begin the development, run `npm start` or `yarn start`.
+      To create a production bundle, use `npm run build` or `yarn build`.
+    -->
+  </body>
+</html>

frontend/public/logo192.png ADDED Viewed

frontend/public/logo512.png ADDED Viewed

frontend/public/manifest.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "short_name": "React App",
+  "name": "Create React App Sample",
+  "icons": [
+    {
+      "src": "favicon.ico",
+      "sizes": "64x64 32x32 24x24 16x16",
+      "type": "image/x-icon"
+    },
+    {
+      "src": "logo192.png",
+      "type": "image/png",
+      "sizes": "192x192"
+    },
+    {
+      "src": "logo512.png",
+      "type": "image/png",
+      "sizes": "512x512"
+    }
+  ],
+  "start_url": ".",
+  "display": "standalone",
+  "theme_color": "#000000",
+  "background_color": "#ffffff"
+}

frontend/public/robots.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+# https://www.robotstxt.org/robotstxt.html
+User-agent: *
+Disallow:

frontend/src/App.css ADDED Viewed

	@@ -0,0 +1,132 @@

+.App {
+  text-align: center;
+  max-width: 1200px;
+  margin: 0 auto;
+  padding: 20px;
+}
+.App-logo {
+  height: 40vmin;
+  pointer-events: none;
+}
+@media (prefers-reduced-motion: no-preference) {
+  .App-logo {
+    animation: App-logo-spin infinite 20s linear;
+  }
+}
+.App-header {
+  background-color: #282c34;
+  padding: 20px;
+  color: white;
+  margin-bottom: 30px;
+}
+.App-link {
+  color: #61dafb;
+}
+@keyframes App-logo-spin {
+  from {
+    transform: rotate(0deg);
+  }
+  to {
+    transform: rotate(360deg);
+  }
+}
+.App-main {
+  display: flex;
+  flex-direction: column;
+  gap: 30px;
+}
+.upload-section,
+.query-section,
+.results-section {
+  background-color: #f5f5f5;
+  padding: 20px;
+  border-radius: 8px;
+  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+}
+input[type="file"],
+input[type="text"] {
+  padding: 10px;
+  margin: 10px 0;
+  width: 100%;
+  max-width: 400px;
+  border: 1px solid #ddd;
+  border-radius: 4px;
+}
+button {
+  background-color: #4CAF50;
+  color: white;
+  padding: 10px 20px;
+  border: none;
+  border-radius: 4px;
+  cursor: pointer;
+  font-size: 16px;
+  transition: background-color 0.3s;
+}
+button:hover {
+  background-color: #45a049;
+}
+button:disabled {
+  background-color: #cccccc;
+  cursor: not-allowed;
+}
+.message {
+  padding: 10px;
+  margin: 10px 0;
+  border-radius: 4px;
+  background-color: #f8f9fa;
+  color: #333;
+}
+.llm-response {
+  background-color: #e8f5e9;
+  padding: 20px;
+  border-radius: 8px;
+  margin-bottom: 20px;
+  text-align: left;
+}
+.llm-response h3 {
+  color: #2e7d32;
+  margin-bottom: 10px;
+}
+.relevant-chunks {
+  text-align: left;
+}
+.relevant-chunks h3 {
+  color: #1976d2;
+  margin-bottom: 10px;
+}
+.result-item {
+  background-color: white;
+  padding: 15px;
+  margin: 10px 0;
+  border-radius: 4px;
+  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
+}
+.result-text {
+  margin-bottom: 5px;
+  font-size: 16px;
+  line-height: 1.5;
+}
+.result-score {
+  color: #666;
+  font-size: 14px;
+  font-style: italic;
+}

frontend/src/App.test.tsx ADDED Viewed

	@@ -0,0 +1,9 @@

+import React from 'react';
+import { render, screen } from '@testing-library/react';
+import App from './App';
+test('renders learn react link', () => {
+  render(<App />);
+  const linkElement = screen.getByText(/learn react/i);
+  expect(linkElement).toBeInTheDocument();
+});

frontend/src/App.tsx ADDED Viewed

	@@ -0,0 +1,177 @@

+import React, { useState } from 'react';
+import './App.css';
+interface DocumentResponse {
+  text: string;
+  type: string;
+  score?: number;
+}
+function App() {
+  const [file, setFile] = useState<File | null>(null);
+  const [query, setQuery] = useState('');
+  const [results, setResults] = useState<DocumentResponse[]>([]);
+  const [loading, setLoading] = useState(false);
+  const [message, setMessage] = useState('');
+  const [sessionId, setSessionId] = useState<string | null>(null);
+  const [currentAnswer, setCurrentAnswer] = useState<string>('');
+  const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
+    if (e.target.files && e.target.files[0]) {
+      setFile(e.target.files[0]);
+    }
+  };
+  const handleUpload = async () => {
+    if (!file) {
+      setMessage('Please select a file first');
+      return;
+    }
+    setLoading(true);
+    const formData = new FormData();
+    formData.append('file', file);
+    try {
+      const response = await fetch('http://localhost:9000/upload', {
+        method: 'POST',
+        body: formData,
+      });
+      if (!response.ok) {
+        throw new Error('Upload failed');
+      }
+      const data = await response.json();
+      setMessage(data.message);
+      setSessionId(data.session_id);
+      setResults([]);
+      setCurrentAnswer('');
+    } catch (error) {
+      setMessage('Error uploading file');
+    } finally {
+      setLoading(false);
+    }
+  };
+  const handleQuery = async () => {
+    if (!query.trim()) {
+      setMessage('Please enter a query');
+      return;
+    }
+    if (!sessionId) {
+      setMessage('Please upload a document first');
+      return;
+    }
+    setLoading(true);
+    setCurrentAnswer('');
+    try {
+      const response = await fetch(`http://localhost:9000/query/${sessionId}`, {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+        },
+        body: JSON.stringify({ text: query, k: 4 }),
+      });
+      if (!response.ok) {
+        throw new Error('Query failed');
+      }
+      // Handle streaming response
+      const reader = response.body?.getReader();
+      const decoder = new TextDecoder();
+      let buffer = '';
+      if (!reader) {
+        throw new Error('No reader available');
+      }
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        buffer += decoder.decode(value, { stream: true });
+        const lines = buffer.split('\n');
+        buffer = lines.pop() || '';
+        for (const line of lines) {
+          if (line.startsWith('data: ')) {
+            const data = JSON.parse(line.slice(6));
+            if (data.type === 'token') {
+              setCurrentAnswer(prev => prev + data.text);
+            } else if (data.type === 'context') {
+              setResults(data.context);
+            }
+          }
+        }
+      }
+    } catch (error) {
+      setMessage('Error querying documents');
+    } finally {
+      setLoading(false);
+    }
+  };
+  return (
+    <div className="App">
+      <header className="App-header">
+        <h1>RAG System</h1>
+      </header>
+      <main className="App-main">
+        <section className="upload-section">
+          <h2>Upload PDF Document</h2>
+          <input type="file" accept=".pdf" onChange={handleFileChange} />
+          <button onClick={handleUpload} disabled={loading || !file}>
+            {loading ? 'Uploading...' : 'Upload'}
+          </button>
+        </section>
+        <section className="query-section">
+          <h2>Query Documents</h2>
+          <input
+            type="text"
+            value={query}
+            onChange={(e) => setQuery(e.target.value)}
+            placeholder="Enter your query..."
+            disabled={!sessionId}
+          />
+          <button onClick={handleQuery} disabled={loading || !query.trim() || !sessionId}>
+            {loading ? 'Searching...' : 'Search'}
+          </button>
+        </section>
+        {message && <div className="message">{message}</div>}
+        <section className="results-section">
+          <h2>Results</h2>
+          {currentAnswer && (
+            <div className="llm-response">
+              <h3>AI Response</h3>
+              <p className="result-text">{currentAnswer}</p>
+            </div>
+          )}
+          {results.length > 0 && (
+            <div className="relevant-chunks">
+              <h3>Relevant Document Chunks</h3>
+              {results.map((result, index) => (
+                <div key={index} className="result-item">
+                  <p className="result-text">{result.text}</p>
+                  {result.score !== undefined && (
+                    <p className="result-score">Relevance Score: {result.score.toFixed(2)}</p>
+                  )}
+                </div>
+              ))}
+            </div>
+          )}
+        </section>
+      </main>
+    </div>
+  );
+}
+export default App;

frontend/src/index.css ADDED Viewed

	@@ -0,0 +1,13 @@

+body {
+  margin: 0;
+  font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
+    'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
+    sans-serif;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+}
+code {
+  font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
+    monospace;
+}

frontend/src/index.tsx ADDED Viewed

	@@ -0,0 +1,19 @@

+import React from 'react';
+import ReactDOM from 'react-dom/client';
+import './index.css';
+import App from './App';
+import reportWebVitals from './reportWebVitals';
+const root = ReactDOM.createRoot(
+  document.getElementById('root') as HTMLElement
+);
+root.render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>
+);
+// If you want to start measuring performance in your app, pass a function
+// to log results (for example: reportWebVitals(console.log))
+// or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
+reportWebVitals();

frontend/src/logo.svg ADDED Viewed

frontend/src/react-app-env.d.ts ADDED Viewed

	@@ -0,0 +1 @@


1	+ /// <reference types="react-scripts" />

frontend/src/reportWebVitals.ts ADDED Viewed

	@@ -0,0 +1,15 @@

+import { ReportHandler } from 'web-vitals';
+const reportWebVitals = (onPerfEntry?: ReportHandler) => {
+  if (onPerfEntry && onPerfEntry instanceof Function) {
+    import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
+      getCLS(onPerfEntry);
+      getFID(onPerfEntry);
+      getFCP(onPerfEntry);
+      getLCP(onPerfEntry);
+      getTTFB(onPerfEntry);
+    });
+  }
+};
+export default reportWebVitals;

frontend/src/setupTests.ts ADDED Viewed

	@@ -0,0 +1,5 @@

+// jest-dom adds custom jest matchers for asserting on DOM nodes.
+// allows you to do things like:
+// expect(element).toHaveTextContent(/react/i)
+// learn more: https://github.com/testing-library/jest-dom
+import '@testing-library/jest-dom';

frontend/tsconfig.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "compilerOptions": {
+    "target": "es5",
+    "lib": [
+      "dom",
+      "dom.iterable",
+      "esnext"
+    ],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "esModuleInterop": true,
+    "allowSyntheticDefaultImports": true,
+    "strict": true,
+    "forceConsistentCasingInFileNames": true,
+    "noFallthroughCasesInSwitch": true,
+    "module": "esnext",
+    "moduleResolution": "node",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "noEmit": true,
+    "jsx": "react-jsx"
+  },
+  "include": [
+    "src"
+  ]
+}