Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import PyPDF2 | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| import faiss | |
| import numpy as np | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| def answer_from_pdf(pdf_file, user_question): | |
| # ββββββββββββββββββββββ | |
| # 1) PDF Text Extraction | |
| # ββββββββββββββββββββββ | |
| pdf_reader = PyPDF2.PdfReader(pdf_file.name) | |
| raw_text = "" | |
| for page in pdf_reader.pages: | |
| raw_text += page.extract_text() | |
| # ββββββββββββββββββββββ | |
| # 2) Chunking with LangChain | |
| # ββββββββββββββββββββββ | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size = 1000, | |
| chunk_overlap = 200, | |
| separators = ["\n\n", "\n", " ", ""] | |
| ) | |
| chunks = splitter.split_text(raw_text) | |
| # ββββββββββββββββββββββββββ | |
| # 3) Embedding & FAISS Indexing | |
| # ββββββββββββββββββββββββββ | |
| hf_embedder = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| ) | |
| doc_embeddings = hf_embedder.embed_documents(chunks) | |
| emb_array = np.array(doc_embeddings, dtype=np.float32) | |
| index = faiss.IndexFlatL2(emb_array.shape[1]) | |
| index.add(emb_array) | |
| # ββββββββββββββββββββββββββ | |
| # 4) Load LLAma model | |
| # ββββββββββββββββββββββββββ | |
| gguf_path = hf_hub_download( | |
| repo_id="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", | |
| filename="Meta-Llama-3.1-8B-Instruct-Q6_K.gguf", | |
| ) | |
| llama = Llama(model_path=gguf_path, n_ctx=2048) | |
| # ββββββββββββββββββββββββββ | |
| # 5) Retrieval & Prompting | |
| # ββββββββββββββββββββββββββ | |
| system_msg = ( | |
| "You are a knowledgeable assistant. " | |
| "Use the provided context to answer the user's question concisely, " | |
| "if it's not in the provided context, you say I don't know" | |
| "avoid hallucinations, and keep answers under 150 words." | |
| ) | |
| q_emb = hf_embedder.embed_query(user_question) | |
| q_vec = np.array(q_emb, dtype=np.float32).reshape(1, -1) | |
| D, I = index.search(q_vec, k=3) | |
| context_chunks = [chunks[i] for i in I[0]] | |
| context_text = "\n\n".join(context_chunks) | |
| prompt = ( | |
| f"SYSTEM:\n{system_msg}\n\n" | |
| f"CONTEXT:\n{context_text}\n\n" | |
| f"USER:\n{user_question}\n\n" | |
| "ASSISTANT:" | |
| ) | |
| resp = llama.create_completion( | |
| prompt=prompt, | |
| max_tokens=256, | |
| temperature=0.7, | |
| stop=["\n\n"] | |
| ) | |
| return resp["choices"][0]["text"].strip() | |
| # Build a chat interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## PDF RAG Chatbot (LangChain + LLaMA)") | |
| upload = gr.File(label="Upload PDF") | |
| chatbot = gr.Chatbot() | |
| user_input = gr.Textbox(placeholder="Type your question and hit Send") | |
| send_btn = gr.Button("Send") | |
| def chat_step(pdf_file, message, chat_history): | |
| response = answer_from_pdf(pdf_file, message) | |
| chat_history = chat_history + [(message, response)] | |
| return chat_history, "" | |
| send_btn.click( | |
| chat_step, | |
| inputs=[upload, user_input, chatbot], | |
| outputs=[chatbot, user_input] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |