Spaces:

jk12p
/

RAG

Running

App Files Files Community

jk12p commited on Apr 30

Commit

1ac7b06

verified ·

1 Parent(s): 5dec005

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -12

app.py CHANGED Viewed

@@ -1,20 +1,24 @@
 import streamlit as st
 import torch
 import fitz  # PyMuPDF
 from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # --- CONFIG ---
-HF_TOKEN = "your_huggingface_token_here"  # Add your Hugging Face token
 # Load tokenizer and model with optimizations
-tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
 model = AutoModelForCausalLM.from_pretrained(
     "google/gemma-2b-it",
-    torch_dtype=torch.float16,  # Use half-precision for less memory
-    device_map="auto"  # This will place the model on the best device (CPU/GPU)
 )
 # Load sentence transformer model for embedding generation
@@ -25,7 +29,6 @@ st.title("🔍 RAG App using 🤖 Gemma 2B")
 uploaded_file = st.file_uploader("📄 Upload a PDF or TXT file", type=["pdf", "txt"])
-# Extract text from file (PDF/TXT)
 def extract_text(file):
     text = ""
     if file.type == "application/pdf":
@@ -36,11 +39,9 @@ def extract_text(file):
         text = file.read().decode("utf-8")
     return text
-# Split text into chunks for indexing
 def split_into_chunks(text, chunk_size=500):
     return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
-# Create FAISS index for fast retrieval
 def create_faiss_index(chunks):
     embeddings = embedder.encode(chunks)
     dim = embeddings.shape[1]
@@ -48,13 +49,11 @@ def create_faiss_index(chunks):
     index.add(np.array(embeddings))
     return index, embeddings
-# Retrieve top-k relevant chunks for the query
 def retrieve_chunks(query, chunks, index, embeddings, k=3):
     query_embedding = embedder.encode([query])
     D, I = index.search(np.array(query_embedding), k)
     return [chunks[i] for i in I[0]]
-# --- MAIN LOGIC ---
 if uploaded_file:
     st.success("✅ File uploaded successfully!")
     raw_text = extract_text(uploaded_file)
@@ -70,10 +69,12 @@ if uploaded_file:
         with st.spinner("Thinking..."):
             context = "\n".join(retrieve_chunks(user_question, chunks, index, embeddings))
-            # Generate response from Gemma 2B
-            input_ids = tokenizer.encode(f"Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {user_question}\nAnswer:", return_tensors="pt").to(model.device)
-            with torch.no_grad():  # Disable gradient computation for inference
                 outputs = model.generate(input_ids, max_length=512, num_return_sequences=1, temperature=0.7)
             generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

 import streamlit as st
 import torch
 import fitz  # PyMuPDF
+import os
+from dotenv import load_dotenv
 from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # --- CONFIG ---
+load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN")
 # Load tokenizer and model with optimizations
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
 model = AutoModelForCausalLM.from_pretrained(
     "google/gemma-2b-it",
+    torch_dtype=torch.float16,
+    device_map="auto",
+    token=HF_TOKEN
 )
 # Load sentence transformer model for embedding generation
 uploaded_file = st.file_uploader("📄 Upload a PDF or TXT file", type=["pdf", "txt"])
 def extract_text(file):
     text = ""
     if file.type == "application/pdf":
         text = file.read().decode("utf-8")
     return text
 def split_into_chunks(text, chunk_size=500):
     return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
 def create_faiss_index(chunks):
     embeddings = embedder.encode(chunks)
     dim = embeddings.shape[1]
     index.add(np.array(embeddings))
     return index, embeddings
 def retrieve_chunks(query, chunks, index, embeddings, k=3):
     query_embedding = embedder.encode([query])
     D, I = index.search(np.array(query_embedding), k)
     return [chunks[i] for i in I[0]]
 if uploaded_file:
     st.success("✅ File uploaded successfully!")
     raw_text = extract_text(uploaded_file)
         with st.spinner("Thinking..."):
             context = "\n".join(retrieve_chunks(user_question, chunks, index, embeddings))
+            input_ids = tokenizer.encode(
+                f"Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {user_question}\nAnswer:",
+                return_tensors="pt"
+            ).to(model.device)
+            with torch.no_grad():
                 outputs = model.generate(input_ids, max_length=512, num_return_sequences=1, temperature=0.7)
             generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)