afulara commited on
Commit
5cd3431
·
0 Parent(s):

Initial files

Browse files
Files changed (44) hide show
  1. .env +1 -0
  2. backend/aimakerspace/__init__.py +0 -0
  3. backend/aimakerspace/__pycache__/__init__.cpython-312.pyc +0 -0
  4. backend/aimakerspace/__pycache__/__init__.cpython-313.pyc +0 -0
  5. backend/aimakerspace/__pycache__/text_utils.cpython-312.pyc +0 -0
  6. backend/aimakerspace/__pycache__/text_utils.cpython-313.pyc +0 -0
  7. backend/aimakerspace/__pycache__/vectordatabase.cpython-312.pyc +0 -0
  8. backend/aimakerspace/__pycache__/vectordatabase.cpython-313.pyc +0 -0
  9. backend/aimakerspace/openai_utils/__init__.py +0 -0
  10. backend/aimakerspace/openai_utils/__pycache__/__init__.cpython-312.pyc +0 -0
  11. backend/aimakerspace/openai_utils/__pycache__/__init__.cpython-313.pyc +0 -0
  12. backend/aimakerspace/openai_utils/__pycache__/chatmodel.cpython-312.pyc +0 -0
  13. backend/aimakerspace/openai_utils/__pycache__/chatmodel.cpython-313.pyc +0 -0
  14. backend/aimakerspace/openai_utils/__pycache__/embedding.cpython-312.pyc +0 -0
  15. backend/aimakerspace/openai_utils/__pycache__/embedding.cpython-313.pyc +0 -0
  16. backend/aimakerspace/openai_utils/__pycache__/prompts.cpython-312.pyc +0 -0
  17. backend/aimakerspace/openai_utils/__pycache__/prompts.cpython-313.pyc +0 -0
  18. backend/aimakerspace/openai_utils/chatmodel.py +45 -0
  19. backend/aimakerspace/openai_utils/embedding.py +59 -0
  20. backend/aimakerspace/openai_utils/prompts.py +78 -0
  21. backend/aimakerspace/text_utils.py +136 -0
  22. backend/aimakerspace/vectordatabase.py +81 -0
  23. backend/main.py +211 -0
  24. backend/requirements.txt +9 -0
  25. frontend/.gitignore +23 -0
  26. frontend/README.md +46 -0
  27. frontend/package-lock.json +0 -0
  28. frontend/package.json +44 -0
  29. frontend/public/favicon.ico +0 -0
  30. frontend/public/index.html +43 -0
  31. frontend/public/logo192.png +0 -0
  32. frontend/public/logo512.png +0 -0
  33. frontend/public/manifest.json +25 -0
  34. frontend/public/robots.txt +3 -0
  35. frontend/src/App.css +132 -0
  36. frontend/src/App.test.tsx +9 -0
  37. frontend/src/App.tsx +177 -0
  38. frontend/src/index.css +13 -0
  39. frontend/src/index.tsx +19 -0
  40. frontend/src/logo.svg +1 -0
  41. frontend/src/react-app-env.d.ts +1 -0
  42. frontend/src/reportWebVitals.ts +15 -0
  43. frontend/src/setupTests.ts +5 -0
  44. frontend/tsconfig.json +26 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY=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
backend/aimakerspace/__init__.py ADDED
File without changes
backend/aimakerspace/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (214 Bytes). View file
 
backend/aimakerspace/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (214 Bytes). View file
 
backend/aimakerspace/__pycache__/text_utils.cpython-312.pyc ADDED
Binary file (8.42 kB). View file
 
backend/aimakerspace/__pycache__/text_utils.cpython-313.pyc ADDED
Binary file (8.59 kB). View file
 
backend/aimakerspace/__pycache__/vectordatabase.cpython-312.pyc ADDED
Binary file (4.98 kB). View file
 
backend/aimakerspace/__pycache__/vectordatabase.cpython-313.pyc ADDED
Binary file (5.05 kB). View file
 
backend/aimakerspace/openai_utils/__init__.py ADDED
File without changes
backend/aimakerspace/openai_utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (227 Bytes). View file
 
backend/aimakerspace/openai_utils/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (227 Bytes). View file
 
backend/aimakerspace/openai_utils/__pycache__/chatmodel.cpython-312.pyc ADDED
Binary file (2.45 kB). View file
 
backend/aimakerspace/openai_utils/__pycache__/chatmodel.cpython-313.pyc ADDED
Binary file (2.55 kB). View file
 
backend/aimakerspace/openai_utils/__pycache__/embedding.cpython-312.pyc ADDED
Binary file (3.69 kB). View file
 
backend/aimakerspace/openai_utils/__pycache__/embedding.cpython-313.pyc ADDED
Binary file (3.75 kB). View file
 
backend/aimakerspace/openai_utils/__pycache__/prompts.cpython-312.pyc ADDED
Binary file (4.67 kB). View file
 
backend/aimakerspace/openai_utils/__pycache__/prompts.cpython-313.pyc ADDED
Binary file (4.71 kB). View file
 
backend/aimakerspace/openai_utils/chatmodel.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI, AsyncOpenAI
2
+ from dotenv import load_dotenv
3
+ import os
4
+
5
+ load_dotenv()
6
+
7
+
8
+ class ChatOpenAI:
9
+ def __init__(self, model_name: str = "gpt-4o-mini"):
10
+ self.model_name = model_name
11
+ self.openai_api_key = os.getenv("OPENAI_API_KEY")
12
+ if self.openai_api_key is None:
13
+ raise ValueError("OPENAI_API_KEY is not set")
14
+
15
+ def run(self, messages, text_only: bool = True, **kwargs):
16
+ if not isinstance(messages, list):
17
+ raise ValueError("messages must be a list")
18
+
19
+ client = OpenAI()
20
+ response = client.chat.completions.create(
21
+ model=self.model_name, messages=messages, **kwargs
22
+ )
23
+
24
+ if text_only:
25
+ return response.choices[0].message.content
26
+
27
+ return response
28
+
29
+ async def astream(self, messages, **kwargs):
30
+ if not isinstance(messages, list):
31
+ raise ValueError("messages must be a list")
32
+
33
+ client = AsyncOpenAI()
34
+
35
+ stream = await client.chat.completions.create(
36
+ model=self.model_name,
37
+ messages=messages,
38
+ stream=True,
39
+ **kwargs
40
+ )
41
+
42
+ async for chunk in stream:
43
+ content = chunk.choices[0].delta.content
44
+ if content is not None:
45
+ yield content
backend/aimakerspace/openai_utils/embedding.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ from openai import AsyncOpenAI, OpenAI
3
+ import openai
4
+ from typing import List
5
+ import os
6
+ import asyncio
7
+
8
+
9
+ class EmbeddingModel:
10
+ def __init__(self, embeddings_model_name: str = "text-embedding-3-small"):
11
+ load_dotenv()
12
+ self.openai_api_key = os.getenv("OPENAI_API_KEY")
13
+ self.async_client = AsyncOpenAI()
14
+ self.client = OpenAI()
15
+
16
+ if self.openai_api_key is None:
17
+ raise ValueError(
18
+ "OPENAI_API_KEY environment variable is not set. Please set it to your OpenAI API key."
19
+ )
20
+ openai.api_key = self.openai_api_key
21
+ self.embeddings_model_name = embeddings_model_name
22
+
23
+ async def async_get_embeddings(self, list_of_text: List[str]) -> List[List[float]]:
24
+ embedding_response = await self.async_client.embeddings.create(
25
+ input=list_of_text, model=self.embeddings_model_name
26
+ )
27
+
28
+ return [embeddings.embedding for embeddings in embedding_response.data]
29
+
30
+ async def async_get_embedding(self, text: str) -> List[float]:
31
+ embedding = await self.async_client.embeddings.create(
32
+ input=text, model=self.embeddings_model_name
33
+ )
34
+
35
+ return embedding.data[0].embedding
36
+
37
+ def get_embeddings(self, list_of_text: List[str]) -> List[List[float]]:
38
+ embedding_response = self.client.embeddings.create(
39
+ input=list_of_text, model=self.embeddings_model_name
40
+ )
41
+
42
+ return [embeddings.embedding for embeddings in embedding_response.data]
43
+
44
+ def get_embedding(self, text: str) -> List[float]:
45
+ embedding = self.client.embeddings.create(
46
+ input=text, model=self.embeddings_model_name
47
+ )
48
+
49
+ return embedding.data[0].embedding
50
+
51
+
52
+ if __name__ == "__main__":
53
+ embedding_model = EmbeddingModel()
54
+ print(asyncio.run(embedding_model.async_get_embedding("Hello, world!")))
55
+ print(
56
+ asyncio.run(
57
+ embedding_model.async_get_embeddings(["Hello, world!", "Goodbye, world!"])
58
+ )
59
+ )
backend/aimakerspace/openai_utils/prompts.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+
4
+ class BasePrompt:
5
+ def __init__(self, prompt):
6
+ """
7
+ Initializes the BasePrompt object with a prompt template.
8
+
9
+ :param prompt: A string that can contain placeholders within curly braces
10
+ """
11
+ self.prompt = prompt
12
+ self._pattern = re.compile(r"\{([^}]+)\}")
13
+
14
+ def format_prompt(self, **kwargs):
15
+ """
16
+ Formats the prompt string using the keyword arguments provided.
17
+
18
+ :param kwargs: The values to substitute into the prompt string
19
+ :return: The formatted prompt string
20
+ """
21
+ matches = self._pattern.findall(self.prompt)
22
+ return self.prompt.format(**{match: kwargs.get(match, "") for match in matches})
23
+
24
+ def get_input_variables(self):
25
+ """
26
+ Gets the list of input variable names from the prompt string.
27
+
28
+ :return: List of input variable names
29
+ """
30
+ return self._pattern.findall(self.prompt)
31
+
32
+
33
+ class RolePrompt(BasePrompt):
34
+ def __init__(self, prompt, role: str):
35
+ """
36
+ Initializes the RolePrompt object with a prompt template and a role.
37
+
38
+ :param prompt: A string that can contain placeholders within curly braces
39
+ :param role: The role for the message ('system', 'user', or 'assistant')
40
+ """
41
+ super().__init__(prompt)
42
+ self.role = role
43
+
44
+ def create_message(self, format=True, **kwargs):
45
+ """
46
+ Creates a message dictionary with a role and a formatted message.
47
+
48
+ :param kwargs: The values to substitute into the prompt string
49
+ :return: Dictionary containing the role and the formatted message
50
+ """
51
+ if format:
52
+ return {"role": self.role, "content": self.format_prompt(**kwargs)}
53
+
54
+ return {"role": self.role, "content": self.prompt}
55
+
56
+
57
+ class SystemRolePrompt(RolePrompt):
58
+ def __init__(self, prompt: str):
59
+ super().__init__(prompt, "system")
60
+
61
+
62
+ class UserRolePrompt(RolePrompt):
63
+ def __init__(self, prompt: str):
64
+ super().__init__(prompt, "user")
65
+
66
+
67
+ class AssistantRolePrompt(RolePrompt):
68
+ def __init__(self, prompt: str):
69
+ super().__init__(prompt, "assistant")
70
+
71
+
72
+ if __name__ == "__main__":
73
+ prompt = BasePrompt("Hello {name}, you are {age} years old")
74
+ print(prompt.format_prompt(name="John", age=30))
75
+
76
+ prompt = SystemRolePrompt("Hello {name}, you are {age} years old")
77
+ print(prompt.create_message(name="John", age=30))
78
+ print(prompt.get_input_variables())
backend/aimakerspace/text_utils.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List
3
+ import PyPDF2
4
+
5
+
6
+ class TextFileLoader:
7
+ def __init__(self, path: str, encoding: str = "utf-8"):
8
+ self.documents = []
9
+ self.path = path
10
+ self.encoding = encoding
11
+
12
+ def load(self):
13
+ if os.path.isdir(self.path):
14
+ self.load_directory()
15
+ elif os.path.isfile(self.path) and self.path.endswith(".txt"):
16
+ self.load_file()
17
+ else:
18
+ raise ValueError(
19
+ "Provided path is neither a valid directory nor a .txt file."
20
+ )
21
+
22
+ def load_file(self):
23
+ with open(self.path, "r", encoding=self.encoding) as f:
24
+ self.documents.append(f.read())
25
+
26
+ def load_directory(self):
27
+ for root, _, files in os.walk(self.path):
28
+ for file in files:
29
+ if file.endswith(".txt"):
30
+ with open(
31
+ os.path.join(root, file), "r", encoding=self.encoding
32
+ ) as f:
33
+ self.documents.append(f.read())
34
+
35
+ def load_documents(self):
36
+ self.load()
37
+ return self.documents
38
+
39
+
40
+ class CharacterTextSplitter:
41
+ def __init__(
42
+ self,
43
+ chunk_size: int = 1000,
44
+ chunk_overlap: int = 200,
45
+ ):
46
+ assert (
47
+ chunk_size > chunk_overlap
48
+ ), "Chunk size must be greater than chunk overlap"
49
+
50
+ self.chunk_size = chunk_size
51
+ self.chunk_overlap = chunk_overlap
52
+
53
+ def split(self, text: str) -> List[str]:
54
+ chunks = []
55
+ for i in range(0, len(text), self.chunk_size - self.chunk_overlap):
56
+ chunks.append(text[i : i + self.chunk_size])
57
+ return chunks
58
+
59
+ def split_texts(self, texts: List[str]) -> List[str]:
60
+ chunks = []
61
+ for text in texts:
62
+ chunks.extend(self.split(text))
63
+ return chunks
64
+
65
+
66
+ class PDFLoader:
67
+ def __init__(self, path: str):
68
+ self.documents = []
69
+ self.path = path
70
+ print(f"PDFLoader initialized with path: {self.path}")
71
+
72
+ def load(self):
73
+ print(f"Loading PDF from path: {self.path}")
74
+ print(f"Path exists: {os.path.exists(self.path)}")
75
+ print(f"Is file: {os.path.isfile(self.path)}")
76
+ print(f"Is directory: {os.path.isdir(self.path)}")
77
+ print(f"File permissions: {oct(os.stat(self.path).st_mode)[-3:]}")
78
+
79
+ try:
80
+ # Try to open the file first to verify access
81
+ with open(self.path, 'rb') as test_file:
82
+ pass
83
+
84
+ # If we can open it, proceed with loading
85
+ self.load_file()
86
+
87
+ except IOError as e:
88
+ raise ValueError(f"Cannot access file at '{self.path}': {str(e)}")
89
+ except Exception as e:
90
+ raise ValueError(f"Error processing file at '{self.path}': {str(e)}")
91
+
92
+ def load_file(self):
93
+ with open(self.path, 'rb') as file:
94
+ # Create PDF reader object
95
+ pdf_reader = PyPDF2.PdfReader(file)
96
+
97
+ # Extract text from each page
98
+ text = ""
99
+ for page in pdf_reader.pages:
100
+ text += page.extract_text() + "\n"
101
+
102
+ self.documents.append(text)
103
+
104
+ def load_directory(self):
105
+ for root, _, files in os.walk(self.path):
106
+ for file in files:
107
+ if file.lower().endswith('.pdf'):
108
+ file_path = os.path.join(root, file)
109
+ with open(file_path, 'rb') as f:
110
+ pdf_reader = PyPDF2.PdfReader(f)
111
+
112
+ # Extract text from each page
113
+ text = ""
114
+ for page in pdf_reader.pages:
115
+ text += page.extract_text() + "\n"
116
+
117
+ self.documents.append(text)
118
+
119
+ def load_documents(self):
120
+ self.load()
121
+ return self.documents
122
+
123
+
124
+ if __name__ == "__main__":
125
+ loader = TextFileLoader("data/KingLear.txt")
126
+ loader.load()
127
+ splitter = CharacterTextSplitter()
128
+ chunks = splitter.split_texts(loader.documents)
129
+ print(len(chunks))
130
+ print(chunks[0])
131
+ print("--------")
132
+ print(chunks[1])
133
+ print("--------")
134
+ print(chunks[-2])
135
+ print("--------")
136
+ print(chunks[-1])
backend/aimakerspace/vectordatabase.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from collections import defaultdict
3
+ from typing import List, Tuple, Callable
4
+ from aimakerspace.openai_utils.embedding import EmbeddingModel
5
+ import asyncio
6
+
7
+
8
+ def cosine_similarity(vector_a: np.array, vector_b: np.array) -> float:
9
+ """Computes the cosine similarity between two vectors."""
10
+ dot_product = np.dot(vector_a, vector_b)
11
+ norm_a = np.linalg.norm(vector_a)
12
+ norm_b = np.linalg.norm(vector_b)
13
+ return dot_product / (norm_a * norm_b)
14
+
15
+
16
+ class VectorDatabase:
17
+ def __init__(self, embedding_model: EmbeddingModel = None):
18
+ self.vectors = defaultdict(np.array)
19
+ self.embedding_model = embedding_model or EmbeddingModel()
20
+
21
+ def insert(self, key: str, vector: np.array) -> None:
22
+ self.vectors[key] = vector
23
+
24
+ def search(
25
+ self,
26
+ query_vector: np.array,
27
+ k: int,
28
+ distance_measure: Callable = cosine_similarity,
29
+ ) -> List[Tuple[str, float]]:
30
+ scores = [
31
+ (key, distance_measure(query_vector, vector))
32
+ for key, vector in self.vectors.items()
33
+ ]
34
+ return sorted(scores, key=lambda x: x[1], reverse=True)[:k]
35
+
36
+ def search_by_text(
37
+ self,
38
+ query_text: str,
39
+ k: int,
40
+ distance_measure: Callable = cosine_similarity,
41
+ return_as_text: bool = False,
42
+ ) -> List[Tuple[str, float]]:
43
+ query_vector = self.embedding_model.get_embedding(query_text)
44
+ results = self.search(query_vector, k, distance_measure)
45
+ return [result[0] for result in results] if return_as_text else results
46
+
47
+ def retrieve_from_key(self, key: str) -> np.array:
48
+ return self.vectors.get(key, None)
49
+
50
+ async def abuild_from_list(self, list_of_text: List[str]) -> "VectorDatabase":
51
+ embeddings = await self.embedding_model.async_get_embeddings(list_of_text)
52
+ for text, embedding in zip(list_of_text, embeddings):
53
+ self.insert(text, np.array(embedding))
54
+ return self
55
+
56
+
57
+ if __name__ == "__main__":
58
+ list_of_text = [
59
+ "I like to eat broccoli and bananas.",
60
+ "I ate a banana and spinach smoothie for breakfast.",
61
+ "Chinchillas and kittens are cute.",
62
+ "My sister adopted a kitten yesterday.",
63
+ "Look at this cute hamster munching on a piece of broccoli.",
64
+ ]
65
+
66
+ vector_db = VectorDatabase()
67
+ vector_db = asyncio.run(vector_db.abuild_from_list(list_of_text))
68
+ k = 2
69
+
70
+ searched_vector = vector_db.search_by_text("I think fruit is awesome!", k=k)
71
+ print(f"Closest {k} vector(s):", searched_vector)
72
+
73
+ retrieved_vector = vector_db.retrieve_from_key(
74
+ "I like to eat broccoli and bananas."
75
+ )
76
+ print("Retrieved vector:", retrieved_vector)
77
+
78
+ relevant_texts = vector_db.search_by_text(
79
+ "I think fruit is awesome!", k=k, return_as_text=True
80
+ )
81
+ print(f"Closest {k} text(s):", relevant_texts)
backend/main.py ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, HTTPException, WebSocket
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import StreamingResponse
4
+ from pydantic import BaseModel
5
+ from typing import List, Optional, Dict, AsyncGenerator
6
+ import os
7
+ from dotenv import load_dotenv
8
+ from aimakerspace.vectordatabase import VectorDatabase
9
+ from aimakerspace.openai_utils.embedding import EmbeddingModel
10
+ from aimakerspace.text_utils import CharacterTextSplitter, PDFLoader
11
+ from aimakerspace.openai_utils.prompts import (
12
+ UserRolePrompt,
13
+ SystemRolePrompt,
14
+ AssistantRolePrompt,
15
+ )
16
+ from aimakerspace.openai_utils.chatmodel import ChatOpenAI
17
+ import asyncio
18
+ import tempfile
19
+ import shutil
20
+ import json
21
+ from uuid import uuid4
22
+
23
+ # Load environment variables
24
+ load_dotenv()
25
+
26
+ app = FastAPI()
27
+
28
+ # Configure CORS
29
+ app.add_middleware(
30
+ CORSMiddleware,
31
+ allow_origins=["http://localhost:3000"],
32
+ allow_credentials=True,
33
+ allow_methods=["*"],
34
+ allow_headers=["*"],
35
+ )
36
+
37
+ # Initialize components
38
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
39
+ chat_openai = ChatOpenAI()
40
+
41
+ # Define prompts
42
+ system_template = """\
43
+ You are a helpful assistant that provides concise, direct answers based on the provided context.
44
+ If the answer cannot be found in the context, simply say "I don't know" or "The information is not available in the provided context."
45
+ Keep your answers brief and to the point."""
46
+ system_role_prompt = SystemRolePrompt(system_template)
47
+
48
+ user_prompt_template = """\
49
+ Context:
50
+ {context}
51
+
52
+ Question:
53
+ {question}
54
+
55
+ Answer the question concisely based on the context above."""
56
+ user_role_prompt = UserRolePrompt(user_prompt_template)
57
+
58
+ # Session management
59
+ sessions: Dict[str, Dict] = {}
60
+
61
+ class Query(BaseModel):
62
+ text: str
63
+ k: int = 4
64
+
65
+ class DocumentResponse(BaseModel):
66
+ text: str
67
+ type: str # 'answer' or 'context'
68
+ score: Optional[float] = None
69
+
70
+ class RetrievalAugmentedQAPipeline:
71
+ def __init__(self, llm: ChatOpenAI, vector_db_retriever: VectorDatabase) -> None:
72
+ self.llm = llm
73
+ self.vector_db_retriever = vector_db_retriever
74
+
75
+ async def arun_pipeline(self, user_query: str, k: int = 4) -> AsyncGenerator[str, None]:
76
+ # Get top k most relevant chunks
77
+ context_list = self.vector_db_retriever.search_by_text(user_query, k=k)
78
+
79
+ # Format context
80
+ context_prompt = ""
81
+ for context in context_list:
82
+ context_prompt += context[0] + "\n"
83
+
84
+ # Format prompts
85
+ formatted_system_prompt = system_role_prompt.create_message()
86
+ formatted_user_prompt = user_role_prompt.create_message(
87
+ question=user_query,
88
+ context=context_prompt
89
+ )
90
+
91
+ # Stream only the LLM response
92
+ async for chunk in self.llm.astream([formatted_system_prompt, formatted_user_prompt]):
93
+ yield json.dumps({
94
+ "type": "token",
95
+ "text": chunk
96
+ })
97
+
98
+ # Send context information once at the end
99
+ yield json.dumps({
100
+ "type": "context",
101
+ "context": [{"text": text, "score": score} for text, score in context_list]
102
+ })
103
+
104
+ def process_file(file_path: str, file_name: str):
105
+ if file_name.lower().endswith('.pdf'):
106
+ loader = PDFLoader(file_path)
107
+ else:
108
+ raise HTTPException(status_code=400, detail="Only PDF files are supported")
109
+
110
+ documents = loader.load_documents()
111
+ texts = text_splitter.split_texts(documents)
112
+ return texts
113
+
114
+ @app.post("/upload")
115
+ async def upload_document(file: UploadFile = File(...)):
116
+ if not file.filename.endswith('.pdf'):
117
+ raise HTTPException(status_code=400, detail="Only PDF files are supported")
118
+
119
+ try:
120
+ # Create a temporary file
121
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
122
+ content = await file.read()
123
+ temp_file.write(content)
124
+ temp_path = temp_file.name
125
+
126
+ # Process the file
127
+ texts = process_file(temp_path, file.filename)
128
+
129
+ # Create a new session
130
+ session_id = str(uuid4())
131
+ vector_db = VectorDatabase()
132
+ await vector_db.abuild_from_list(texts)
133
+
134
+ # Store session data
135
+ sessions[session_id] = {
136
+ "vector_db": vector_db,
137
+ "texts": texts
138
+ }
139
+
140
+ # Clean up
141
+ os.unlink(temp_path)
142
+
143
+ return {
144
+ "session_id": session_id,
145
+ "message": f"Document processed successfully. Added {len(texts)} chunks to the database."
146
+ }
147
+ except Exception as e:
148
+ raise HTTPException(status_code=500, detail=str(e))
149
+
150
+ @app.post("/query/{session_id}")
151
+ async def query_documents(session_id: str, query: Query):
152
+ if session_id not in sessions:
153
+ raise HTTPException(status_code=404, detail="Session not found")
154
+
155
+ try:
156
+ session = sessions[session_id]
157
+ vector_db = session["vector_db"]
158
+
159
+ # Initialize RAG pipeline
160
+ rag_pipeline = RetrievalAugmentedQAPipeline(
161
+ llm=chat_openai,
162
+ vector_db_retriever=vector_db
163
+ )
164
+
165
+ # Create streaming response
166
+ async def generate():
167
+ async for chunk in rag_pipeline.arun_pipeline(query.text, query.k):
168
+ yield f"data: {chunk}\n\n"
169
+
170
+ return StreamingResponse(
171
+ generate(),
172
+ media_type="text/event-stream"
173
+ )
174
+ except Exception as e:
175
+ raise HTTPException(status_code=500, detail=str(e))
176
+
177
+ @app.websocket("/ws/{session_id}")
178
+ async def websocket_endpoint(websocket: WebSocket, session_id: str):
179
+ await websocket.accept()
180
+
181
+ if session_id not in sessions:
182
+ await websocket.close(code=1008, reason="Session not found")
183
+ return
184
+
185
+ try:
186
+ session = sessions[session_id]
187
+ vector_db = session["vector_db"]
188
+
189
+ while True:
190
+ data = await websocket.receive_text()
191
+ query = json.loads(data)
192
+
193
+ # Initialize RAG pipeline
194
+ rag_pipeline = RetrievalAugmentedQAPipeline(
195
+ llm=chat_openai,
196
+ vector_db_retriever=vector_db
197
+ )
198
+
199
+ # Stream response
200
+ async for chunk in rag_pipeline.arun_pipeline(query["text"], query.get("k", 4)):
201
+ await websocket.send_text(json.dumps({
202
+ "type": "token" if isinstance(chunk, str) else "context",
203
+ "text": chunk if isinstance(chunk, str) else chunk
204
+ }))
205
+
206
+ except Exception as e:
207
+ await websocket.close(code=1011, reason=str(e))
208
+
209
+ if __name__ == "__main__":
210
+ import uvicorn
211
+ uvicorn.run(app, host="0.0.0.0", port=9000)
backend/requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.109.2
2
+ uvicorn==0.27.1
3
+ python-multipart==0.0.9
4
+ pydantic==2.10.1
5
+ openai==1.59.9
6
+ numpy==2.2.2
7
+ pypdf2==3.0.1
8
+ python-jose==3.3.0
9
+ python-dotenv==1.0.1
frontend/.gitignore ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2
+
3
+ # dependencies
4
+ /node_modules
5
+ /.pnp
6
+ .pnp.js
7
+
8
+ # testing
9
+ /coverage
10
+
11
+ # production
12
+ /build
13
+
14
+ # misc
15
+ .DS_Store
16
+ .env.local
17
+ .env.development.local
18
+ .env.test.local
19
+ .env.production.local
20
+
21
+ npm-debug.log*
22
+ yarn-debug.log*
23
+ yarn-error.log*
frontend/README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Getting Started with Create React App
2
+
3
+ This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).
4
+
5
+ ## Available Scripts
6
+
7
+ In the project directory, you can run:
8
+
9
+ ### `npm start`
10
+
11
+ Runs the app in the development mode.\
12
+ Open [http://localhost:3000](http://localhost:3000) to view it in the browser.
13
+
14
+ The page will reload if you make edits.\
15
+ You will also see any lint errors in the console.
16
+
17
+ ### `npm test`
18
+
19
+ Launches the test runner in the interactive watch mode.\
20
+ See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
21
+
22
+ ### `npm run build`
23
+
24
+ Builds the app for production to the `build` folder.\
25
+ It correctly bundles React in production mode and optimizes the build for the best performance.
26
+
27
+ The build is minified and the filenames include the hashes.\
28
+ Your app is ready to be deployed!
29
+
30
+ See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
31
+
32
+ ### `npm run eject`
33
+
34
+ **Note: this is a one-way operation. Once you `eject`, you can’t go back!**
35
+
36
+ If you aren’t satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
37
+
38
+ Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you’re on your own.
39
+
40
+ You don’t have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn’t feel obligated to use this feature. However we understand that this tool wouldn’t be useful if you couldn’t customize it when you are ready for it.
41
+
42
+ ## Learn More
43
+
44
+ You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
45
+
46
+ To learn React, check out the [React documentation](https://reactjs.org/).
frontend/package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
frontend/package.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "frontend",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "dependencies": {
6
+ "@testing-library/dom": "^10.4.0",
7
+ "@testing-library/jest-dom": "^6.6.3",
8
+ "@testing-library/react": "^16.3.0",
9
+ "@testing-library/user-event": "^13.5.0",
10
+ "@types/jest": "^27.5.2",
11
+ "@types/node": "^16.18.126",
12
+ "@types/react": "^19.1.2",
13
+ "@types/react-dom": "^19.1.2",
14
+ "react": "^19.1.0",
15
+ "react-dom": "^19.1.0",
16
+ "react-scripts": "5.0.1",
17
+ "typescript": "^4.9.5",
18
+ "web-vitals": "^2.1.4"
19
+ },
20
+ "scripts": {
21
+ "start": "react-scripts start",
22
+ "build": "react-scripts build",
23
+ "test": "react-scripts test",
24
+ "eject": "react-scripts eject"
25
+ },
26
+ "eslintConfig": {
27
+ "extends": [
28
+ "react-app",
29
+ "react-app/jest"
30
+ ]
31
+ },
32
+ "browserslist": {
33
+ "production": [
34
+ ">0.2%",
35
+ "not dead",
36
+ "not op_mini all"
37
+ ],
38
+ "development": [
39
+ "last 1 chrome version",
40
+ "last 1 firefox version",
41
+ "last 1 safari version"
42
+ ]
43
+ }
44
+ }
frontend/public/favicon.ico ADDED
frontend/public/index.html ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
7
+ <meta name="theme-color" content="#000000" />
8
+ <meta
9
+ name="description"
10
+ content="Web site created using create-react-app"
11
+ />
12
+ <link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
13
+ <!--
14
+ manifest.json provides metadata used when your web app is installed on a
15
+ user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
16
+ -->
17
+ <link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
18
+ <!--
19
+ Notice the use of %PUBLIC_URL% in the tags above.
20
+ It will be replaced with the URL of the `public` folder during the build.
21
+ Only files inside the `public` folder can be referenced from the HTML.
22
+
23
+ Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
24
+ work correctly both with client-side routing and a non-root public URL.
25
+ Learn how to configure a non-root public URL by running `npm run build`.
26
+ -->
27
+ <title>React App</title>
28
+ </head>
29
+ <body>
30
+ <noscript>You need to enable JavaScript to run this app.</noscript>
31
+ <div id="root"></div>
32
+ <!--
33
+ This HTML file is a template.
34
+ If you open it directly in the browser, you will see an empty page.
35
+
36
+ You can add webfonts, meta tags, or analytics to this file.
37
+ The build step will place the bundled scripts into the <body> tag.
38
+
39
+ To begin the development, run `npm start` or `yarn start`.
40
+ To create a production bundle, use `npm run build` or `yarn build`.
41
+ -->
42
+ </body>
43
+ </html>
frontend/public/logo192.png ADDED
frontend/public/logo512.png ADDED
frontend/public/manifest.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "short_name": "React App",
3
+ "name": "Create React App Sample",
4
+ "icons": [
5
+ {
6
+ "src": "favicon.ico",
7
+ "sizes": "64x64 32x32 24x24 16x16",
8
+ "type": "image/x-icon"
9
+ },
10
+ {
11
+ "src": "logo192.png",
12
+ "type": "image/png",
13
+ "sizes": "192x192"
14
+ },
15
+ {
16
+ "src": "logo512.png",
17
+ "type": "image/png",
18
+ "sizes": "512x512"
19
+ }
20
+ ],
21
+ "start_url": ".",
22
+ "display": "standalone",
23
+ "theme_color": "#000000",
24
+ "background_color": "#ffffff"
25
+ }
frontend/public/robots.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # https://www.robotstxt.org/robotstxt.html
2
+ User-agent: *
3
+ Disallow:
frontend/src/App.css ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .App {
2
+ text-align: center;
3
+ max-width: 1200px;
4
+ margin: 0 auto;
5
+ padding: 20px;
6
+ }
7
+
8
+ .App-logo {
9
+ height: 40vmin;
10
+ pointer-events: none;
11
+ }
12
+
13
+ @media (prefers-reduced-motion: no-preference) {
14
+ .App-logo {
15
+ animation: App-logo-spin infinite 20s linear;
16
+ }
17
+ }
18
+
19
+ .App-header {
20
+ background-color: #282c34;
21
+ padding: 20px;
22
+ color: white;
23
+ margin-bottom: 30px;
24
+ }
25
+
26
+ .App-link {
27
+ color: #61dafb;
28
+ }
29
+
30
+ @keyframes App-logo-spin {
31
+ from {
32
+ transform: rotate(0deg);
33
+ }
34
+ to {
35
+ transform: rotate(360deg);
36
+ }
37
+ }
38
+
39
+ .App-main {
40
+ display: flex;
41
+ flex-direction: column;
42
+ gap: 30px;
43
+ }
44
+
45
+ .upload-section,
46
+ .query-section,
47
+ .results-section {
48
+ background-color: #f5f5f5;
49
+ padding: 20px;
50
+ border-radius: 8px;
51
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
52
+ }
53
+
54
+ input[type="file"],
55
+ input[type="text"] {
56
+ padding: 10px;
57
+ margin: 10px 0;
58
+ width: 100%;
59
+ max-width: 400px;
60
+ border: 1px solid #ddd;
61
+ border-radius: 4px;
62
+ }
63
+
64
+ button {
65
+ background-color: #4CAF50;
66
+ color: white;
67
+ padding: 10px 20px;
68
+ border: none;
69
+ border-radius: 4px;
70
+ cursor: pointer;
71
+ font-size: 16px;
72
+ transition: background-color 0.3s;
73
+ }
74
+
75
+ button:hover {
76
+ background-color: #45a049;
77
+ }
78
+
79
+ button:disabled {
80
+ background-color: #cccccc;
81
+ cursor: not-allowed;
82
+ }
83
+
84
+ .message {
85
+ padding: 10px;
86
+ margin: 10px 0;
87
+ border-radius: 4px;
88
+ background-color: #f8f9fa;
89
+ color: #333;
90
+ }
91
+
92
+ .llm-response {
93
+ background-color: #e8f5e9;
94
+ padding: 20px;
95
+ border-radius: 8px;
96
+ margin-bottom: 20px;
97
+ text-align: left;
98
+ }
99
+
100
+ .llm-response h3 {
101
+ color: #2e7d32;
102
+ margin-bottom: 10px;
103
+ }
104
+
105
+ .relevant-chunks {
106
+ text-align: left;
107
+ }
108
+
109
+ .relevant-chunks h3 {
110
+ color: #1976d2;
111
+ margin-bottom: 10px;
112
+ }
113
+
114
+ .result-item {
115
+ background-color: white;
116
+ padding: 15px;
117
+ margin: 10px 0;
118
+ border-radius: 4px;
119
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
120
+ }
121
+
122
+ .result-text {
123
+ margin-bottom: 5px;
124
+ font-size: 16px;
125
+ line-height: 1.5;
126
+ }
127
+
128
+ .result-score {
129
+ color: #666;
130
+ font-size: 14px;
131
+ font-style: italic;
132
+ }
frontend/src/App.test.tsx ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import { render, screen } from '@testing-library/react';
3
+ import App from './App';
4
+
5
+ test('renders learn react link', () => {
6
+ render(<App />);
7
+ const linkElement = screen.getByText(/learn react/i);
8
+ expect(linkElement).toBeInTheDocument();
9
+ });
frontend/src/App.tsx ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState } from 'react';
2
+ import './App.css';
3
+
4
+ interface DocumentResponse {
5
+ text: string;
6
+ type: string;
7
+ score?: number;
8
+ }
9
+
10
+ function App() {
11
+ const [file, setFile] = useState<File | null>(null);
12
+ const [query, setQuery] = useState('');
13
+ const [results, setResults] = useState<DocumentResponse[]>([]);
14
+ const [loading, setLoading] = useState(false);
15
+ const [message, setMessage] = useState('');
16
+ const [sessionId, setSessionId] = useState<string | null>(null);
17
+ const [currentAnswer, setCurrentAnswer] = useState<string>('');
18
+
19
+ const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
20
+ if (e.target.files && e.target.files[0]) {
21
+ setFile(e.target.files[0]);
22
+ }
23
+ };
24
+
25
+ const handleUpload = async () => {
26
+ if (!file) {
27
+ setMessage('Please select a file first');
28
+ return;
29
+ }
30
+
31
+ setLoading(true);
32
+ const formData = new FormData();
33
+ formData.append('file', file);
34
+
35
+ try {
36
+ const response = await fetch('http://localhost:9000/upload', {
37
+ method: 'POST',
38
+ body: formData,
39
+ });
40
+
41
+ if (!response.ok) {
42
+ throw new Error('Upload failed');
43
+ }
44
+
45
+ const data = await response.json();
46
+ setMessage(data.message);
47
+ setSessionId(data.session_id);
48
+ setResults([]);
49
+ setCurrentAnswer('');
50
+ } catch (error) {
51
+ setMessage('Error uploading file');
52
+ } finally {
53
+ setLoading(false);
54
+ }
55
+ };
56
+
57
+ const handleQuery = async () => {
58
+ if (!query.trim()) {
59
+ setMessage('Please enter a query');
60
+ return;
61
+ }
62
+
63
+ if (!sessionId) {
64
+ setMessage('Please upload a document first');
65
+ return;
66
+ }
67
+
68
+ setLoading(true);
69
+ setCurrentAnswer('');
70
+ try {
71
+ const response = await fetch(`http://localhost:9000/query/${sessionId}`, {
72
+ method: 'POST',
73
+ headers: {
74
+ 'Content-Type': 'application/json',
75
+ },
76
+ body: JSON.stringify({ text: query, k: 4 }),
77
+ });
78
+
79
+ if (!response.ok) {
80
+ throw new Error('Query failed');
81
+ }
82
+
83
+ // Handle streaming response
84
+ const reader = response.body?.getReader();
85
+ const decoder = new TextDecoder();
86
+ let buffer = '';
87
+
88
+ if (!reader) {
89
+ throw new Error('No reader available');
90
+ }
91
+
92
+ while (true) {
93
+ const { done, value } = await reader.read();
94
+ if (done) break;
95
+
96
+ buffer += decoder.decode(value, { stream: true });
97
+ const lines = buffer.split('\n');
98
+ buffer = lines.pop() || '';
99
+
100
+ for (const line of lines) {
101
+ if (line.startsWith('data: ')) {
102
+ const data = JSON.parse(line.slice(6));
103
+ if (data.type === 'token') {
104
+ setCurrentAnswer(prev => prev + data.text);
105
+ } else if (data.type === 'context') {
106
+ setResults(data.context);
107
+ }
108
+ }
109
+ }
110
+ }
111
+ } catch (error) {
112
+ setMessage('Error querying documents');
113
+ } finally {
114
+ setLoading(false);
115
+ }
116
+ };
117
+
118
+ return (
119
+ <div className="App">
120
+ <header className="App-header">
121
+ <h1>RAG System</h1>
122
+ </header>
123
+
124
+ <main className="App-main">
125
+ <section className="upload-section">
126
+ <h2>Upload PDF Document</h2>
127
+ <input type="file" accept=".pdf" onChange={handleFileChange} />
128
+ <button onClick={handleUpload} disabled={loading || !file}>
129
+ {loading ? 'Uploading...' : 'Upload'}
130
+ </button>
131
+ </section>
132
+
133
+ <section className="query-section">
134
+ <h2>Query Documents</h2>
135
+ <input
136
+ type="text"
137
+ value={query}
138
+ onChange={(e) => setQuery(e.target.value)}
139
+ placeholder="Enter your query..."
140
+ disabled={!sessionId}
141
+ />
142
+ <button onClick={handleQuery} disabled={loading || !query.trim() || !sessionId}>
143
+ {loading ? 'Searching...' : 'Search'}
144
+ </button>
145
+ </section>
146
+
147
+ {message && <div className="message">{message}</div>}
148
+
149
+ <section className="results-section">
150
+ <h2>Results</h2>
151
+ {currentAnswer && (
152
+ <div className="llm-response">
153
+ <h3>AI Response</h3>
154
+ <p className="result-text">{currentAnswer}</p>
155
+ </div>
156
+ )}
157
+
158
+ {results.length > 0 && (
159
+ <div className="relevant-chunks">
160
+ <h3>Relevant Document Chunks</h3>
161
+ {results.map((result, index) => (
162
+ <div key={index} className="result-item">
163
+ <p className="result-text">{result.text}</p>
164
+ {result.score !== undefined && (
165
+ <p className="result-score">Relevance Score: {result.score.toFixed(2)}</p>
166
+ )}
167
+ </div>
168
+ ))}
169
+ </div>
170
+ )}
171
+ </section>
172
+ </main>
173
+ </div>
174
+ );
175
+ }
176
+
177
+ export default App;
frontend/src/index.css ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ margin: 0;
3
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
4
+ 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
5
+ sans-serif;
6
+ -webkit-font-smoothing: antialiased;
7
+ -moz-osx-font-smoothing: grayscale;
8
+ }
9
+
10
+ code {
11
+ font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
12
+ monospace;
13
+ }
frontend/src/index.tsx ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import ReactDOM from 'react-dom/client';
3
+ import './index.css';
4
+ import App from './App';
5
+ import reportWebVitals from './reportWebVitals';
6
+
7
+ const root = ReactDOM.createRoot(
8
+ document.getElementById('root') as HTMLElement
9
+ );
10
+ root.render(
11
+ <React.StrictMode>
12
+ <App />
13
+ </React.StrictMode>
14
+ );
15
+
16
+ // If you want to start measuring performance in your app, pass a function
17
+ // to log results (for example: reportWebVitals(console.log))
18
+ // or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
19
+ reportWebVitals();
frontend/src/logo.svg ADDED
frontend/src/react-app-env.d.ts ADDED
@@ -0,0 +1 @@
 
 
1
+ /// <reference types="react-scripts" />
frontend/src/reportWebVitals.ts ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ReportHandler } from 'web-vitals';
2
+
3
+ const reportWebVitals = (onPerfEntry?: ReportHandler) => {
4
+ if (onPerfEntry && onPerfEntry instanceof Function) {
5
+ import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
6
+ getCLS(onPerfEntry);
7
+ getFID(onPerfEntry);
8
+ getFCP(onPerfEntry);
9
+ getLCP(onPerfEntry);
10
+ getTTFB(onPerfEntry);
11
+ });
12
+ }
13
+ };
14
+
15
+ export default reportWebVitals;
frontend/src/setupTests.ts ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ // jest-dom adds custom jest matchers for asserting on DOM nodes.
2
+ // allows you to do things like:
3
+ // expect(element).toHaveTextContent(/react/i)
4
+ // learn more: https://github.com/testing-library/jest-dom
5
+ import '@testing-library/jest-dom';
frontend/tsconfig.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "target": "es5",
4
+ "lib": [
5
+ "dom",
6
+ "dom.iterable",
7
+ "esnext"
8
+ ],
9
+ "allowJs": true,
10
+ "skipLibCheck": true,
11
+ "esModuleInterop": true,
12
+ "allowSyntheticDefaultImports": true,
13
+ "strict": true,
14
+ "forceConsistentCasingInFileNames": true,
15
+ "noFallthroughCasesInSwitch": true,
16
+ "module": "esnext",
17
+ "moduleResolution": "node",
18
+ "resolveJsonModule": true,
19
+ "isolatedModules": true,
20
+ "noEmit": true,
21
+ "jsx": "react-jsx"
22
+ },
23
+ "include": [
24
+ "src"
25
+ ]
26
+ }