Spaces:
Running
Running
File size: 9,852 Bytes
bf8b348 cc598bb cc7258d 637d303 cc7258d 8601c8e cc598bb 8601c8e cc598bb 8601c8e cc598bb 8601c8e 02f4e29 58e21db 02f4e29 58e21db 8601c8e 58e21db ac7a438 6f611ea ac7a438 6a6c714 bda38da 8601c8e 637d303 6a6c714 ac7a438 6f611ea ac7a438 cc7258d bda38da cc7258d ac7a438 8574ab5 ac7a438 6a6c714 ac7a438 6a6c714 8574ab5 bda38da 8601c8e ac7a438 6f611ea ac7a438 86a71ff ac7a438 cc7258d ac7a438 dabfdf4 f6c0f61 6e8ccc0 f6c0f61 df394b1 15dcde4 6e8ccc0 ac7a438 eaf959e ac7a438 58e21db 02f4e29 8601c8e 8574ab5 58e21db 8574ab5 8601c8e 637d303 56f7afc 8601c8e 56f7afc 3a184ff ac7a438 86a71ff ac7a438 86a71ff 6a6c714 86a71ff ac7a438 86a71ff ac7a438 8601c8e 637d303 8601c8e b24e947 0eb63e4 8601c8e 58e21db 8574ab5 ac7a438 8574ab5 02f4e29 58e21db 8601c8e cc598bb 8601c8e cc598bb 8601c8e 02f4e29 8601c8e 58e21db 8601c8e 02f4e29 110d1c8 02f4e29 cc598bb 02f4e29 cc598bb 02f4e29 8601c8e 02f4e29 8601c8e ec71ba3 02f4e29 8601c8e 02f4e29 8601c8e 02f4e29 8601c8e 02f4e29 978b7eb 8601c8e 02f4e29 8601c8e 02f4e29 8601c8e 02f4e29 8601c8e 02f4e29 8574ab5 02f4e29 8574ab5 8601c8e 02f4e29 8601c8e 02f4e29 ec71ba3 15a93b4 ec71ba3 8574ab5 02f4e29 8574ab5 8601c8e 02f4e29 978b7eb 02f4e29 c51e3fd 02f4e29 8574ab5 8601c8e 7d0de58 8601c8e 7d0de58 dabfdf4 637d303 86a71ff 8601c8e 6e8ccc0 86a71ff 8574ab5 86a71ff 8601c8e 86a71ff 8574ab5 86a71ff 6f611ea 86a71ff ac7a438 86a71ff 8601c8e 8574ab5 8601c8e 637d303 8601c8e 637d303 8601c8e 86a71ff 56f7afc 8574ab5 86a71ff 6a6c714 8601c8e 8574ab5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 |
import os
import tempfile
import gc
import logging
import streamlit as st
from groq import Groq, APIError
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
import torch
# ---------------- CONFIGURATION ----------------
logging.basicConfig(level=logging.INFO)
# Load API key from Hugging Face secrets
GROQ_API_KEY = st.secrets.get("GROQ_API_KEY", os.environ.get("GROQ_API_KEY"))
GROQ_MODEL = "llama-3.1-8b-instant"
# Initialize Groq client
client = None
if GROQ_API_KEY:
try:
client = Groq(api_key=GROQ_API_KEY)
logging.info("β
Groq client initialized successfully.")
except Exception as e:
st.error(f"β Failed to initialize Groq client: {e}")
client = None
else:
st.warning("β οΈ GROQ_API_KEY not found. Please add it to Hugging Face secrets.")
# ---------------- STREAMLIT UI SETUP ----------------
st.set_page_config(page_title="PDF Assistant", page_icon="π", layout="wide")
# ---------------- CSS ----------------
st.markdown("""
<style>
/* 1. GLOBAL RESET & SCROLL LOCK */
html, body {
overflow: hidden;
height: 100%;
margin: 0;
}
/* 2. HIDE DEFAULT STREAMLIT ELEMENTS */
header[data-testid="stHeader"] {
display: none;
}
footer {
display: none;
}
/* 3. SIDEBAR STYLING (INDEPENDENT LEFT PANEL SCROLL) */
[data-testid="stSidebar"] {
position: fixed;
top: 0;
left: 0;
height: 100vh;
width: 20rem;
overflow-y: auto !important;
z-index: 99999;
}
[data-testid="stSidebar"]::-webkit-scrollbar {
width: 6px;
}
[data-testid="stSidebar"]::-webkit-scrollbar-thumb {
background: #2d3748;
border-radius: 3px;
}
/* 4. FIXED HEADER STYLING */
.fixed-header {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 6rem;
background-color: #0e1117;
z-index: 99998;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
border-bottom: 1px solid rgba(255, 255, 255, 0.1);
}
/* 5. MAIN CONTENT SCROLLING (INDEPENDENT RIGHT PANEL SCROLL) */
.main .block-container {
margin-top: 6rem;
height: calc(100vh - 6rem);
overflow-y: auto;
padding-top: 1rem;
padding-bottom: 5rem;
}
.main .block-container::-webkit-scrollbar {
width: 8px;
}
.main .block-container::-webkit-scrollbar-thumb {
background: #2d3748;
border-radius: 4px;
}
/* 6. SIDEBAR BUTTON STYLING */
[data-testid="stSidebar"] .stButton button {
width: 100%;
border-radius: 8px;
font-weight: 600;
margin-bottom: 6px;
}
/* 7. HIDE UPLOADED FILE LIST & NAME */
[data-testid='stFileUploaderFile'] {
display: none;
}
section[data-testid="stFileUploader"] ul {
display: none;
}
section[data-testid="stFileUploader"] small {
display: none;
}
/* 8. CHAT BUBBLES */
.chat-user {
background: #2d3748;
padding: 12px;
border-radius: 10px 10px 2px 10px;
margin: 6px 0 6px auto;
max-width: 85%;
text-align: right;
color: #f0f2f6;
}
.chat-bot {
background: #1e3a8a;
padding: 12px;
border-radius: 10px 10px 10px 2px;
margin: 6px auto 6px 0;
max-width: 85%;
text-align: left;
color: #ffffff;
}
/* Sources CSS removed/hidden as it is no longer used */
.sources {
display: none;
}
/* 9. TITLE TEXT */
.title-text {
font-size: 2.5rem;
font-weight: 800;
margin: 0;
line-height: 1.2;
}
.creator-text {
font-size: 1rem;
font-weight: 500;
color: #cccccc;
}
.creator-text a {
color: #4da6ff;
text-decoration: none;
}
/* 10. INPUT FORM STYLING */
[data-testid="stForm"] {
border: none;
padding: 0;
}
</style>
""", unsafe_allow_html=True)
# ---------------- FIXED HEADER ----------------
st.markdown("""
<div class="fixed-header">
<div class="title-text">π PDF Assistant</div>
<div class="creator-text">
by <a href="https://www.linkedin.com/in/abhishek-iitr/" target="_blank">Abhishek Saxena</a>
</div>
</div>
""", unsafe_allow_html=True)
# ---------------- SESSION STATE ----------------
if "chat" not in st.session_state:
st.session_state.chat = []
if "vectorstore" not in st.session_state:
st.session_state.vectorstore = None
if "retriever" not in st.session_state:
st.session_state.retriever = None
if "uploaded_file_name" not in st.session_state:
st.session_state.uploaded_file_name = None
if "uploader_key" not in st.session_state:
st.session_state.uploader_key = 0
# ---------------- FUNCTIONS ----------------
def clear_chat_history():
st.session_state.chat = []
def clear_memory():
st.session_state.vectorstore = None
st.session_state.retriever = None
st.session_state.uploaded_file_name = None
st.session_state.uploader_key += 1
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
def process_pdf(uploaded_file):
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(uploaded_file.getvalue())
path = tmp.name
loader = PyPDFLoader(path)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=60)
chunks = splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
vectorstore = Chroma.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
st.session_state.vectorstore = vectorstore
st.session_state.retriever = retriever
if os.path.exists(path):
os.unlink(path)
return len(chunks)
except Exception as e:
st.error(f"Error processing PDF: {str(e)}")
return None
def ask_question(question):
if not client:
return None, 0, "Groq client is not initialized."
if not st.session_state.retriever:
return None, 0, "Upload PDF first."
try:
docs = st.session_state.retriever.invoke(question)
context = "\n\n".join(d.page_content for d in docs)
prompt = f"""
You are a strict RAG Q&A assistant.
Summarize the context based on user question and return best answer. If the answer is not found, reply: "I cannot find this in the PDF."
CONTEXT = {context}
QUESTION = {question}
Answer on your behalf, don't say based on the context...
"""
response = client.chat.completions.create(
model=GROQ_MODEL,
messages=[
{"role": "system", "content": "Use only the PDF content."},
{"role": "user", "content": prompt}
],
temperature=0.1
)
return response.choices[0].message.content.strip(), len(docs), None
except Exception as e:
return None, 0, f"Error: {str(e)}"
# ---------------- SIDEBAR ----------------
with st.sidebar:
st.write("")
if st.button("ποΈ Clear Chat History", use_container_width=True):
clear_chat_history()
if st.button("π₯ Clear PDF Memory", on_click=clear_memory, use_container_width=True):
st.success("Memory Cleared!")
st.markdown("---")
upload_label = "β
PDF Uploaded!" if st.session_state.uploaded_file_name else "Upload PDF"
uploaded = st.file_uploader(
upload_label, type=["pdf"], key=st.session_state.uploader_key, label_visibility="collapsed"
)
if uploaded:
if uploaded.name != st.session_state.uploaded_file_name:
st.session_state.uploaded_file_name = None
st.session_state.chat = []
with st.spinner(f"Processing '{uploaded.name}'..."):
chunks = process_pdf(uploaded)
if chunks:
st.session_state.uploaded_file_name = uploaded.name
st.success("β
PDF Processed!")
else:
st.error("β Failed.")
else:
st.success(f"β
**Active:** `{uploaded.name}`")
else:
st.warning("β¬οΈ Upload a PDF to start chatting!")
# ---------------- INPUT AREA ----------------
disabled_input = st.session_state.uploaded_file_name is None or client is None
# Input Form
with st.form(key='chat_form', clear_on_submit=True):
col_input, col_btn = st.columns([0.85, 0.15], gap="small")
with col_input:
user_question = st.text_input(
"Ask a question",
placeholder="Ask a question about the loaded PDF...",
label_visibility="collapsed",
disabled=disabled_input
)
with col_btn:
submit_btn = st.form_submit_button("β€", disabled=disabled_input, use_container_width=True)
if submit_btn and user_question:
st.session_state.chat.append(("user", user_question))
with st.spinner("Thinking..."):
answer, sources, error = ask_question(user_question)
if answer:
# CHANGED: Removed the 'Context Chunks Used' HTML from the message
bot_msg = answer
st.session_state.chat.append(("bot", bot_msg))
else:
st.session_state.chat.append(("bot", f"π΄ **Error:** {error}"))
st.rerun()
# ---------------- CHAT HISTORY (REVERSED) ----------------
if st.session_state.chat:
st.markdown("---")
for role, msg in reversed(st.session_state.chat):
if role == "user":
st.markdown(f"<div class='chat-user'>{msg}</div>", unsafe_allow_html=True)
else:
st.markdown(f"<div class='chat-bot'>{msg}</div>", unsafe_allow_html=True) |