Spaces:

DharavathSri
/

LLMFineTuningDeployment

Running

App Files Files Community

DharavathSri commited on Jun 26, 2025

Commit

f2c6c48

verified ·

1 Parent(s): 83f95fd

Create app.py

Browse files

Files changed (1) hide show

app.py +100 -0

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+# Custom CSS for styling
+st.markdown("""
+    <style>
+    .main {
+        background-color: #f5f5f5;
+    }
+    .stTextInput>div>div>input {
+        background-color: #ffffff;
+        color: #000000;
+    }
+    .stButton>button {
+        background-color: #4CAF50;
+        color: white;
+        border-radius: 5px;
+        border: none;
+        padding: 10px 24px;
+    }
+    .stButton>button:hover {
+        background-color: #45a049;
+    }
+    .title {
+        font-size: 2.5em;
+        color: #2c3e50;
+        text-align: center;
+        margin-bottom: 0.5em;
+    }
+    .sidebar .sidebar-content {
+        background-color: #2c3e50;
+        color: white;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+# App Title
+st.markdown('<p class="title">💬 Fine-Tuned LLM Chat</p>', unsafe_allow_html=True)
+# Sidebar for settings
+with st.sidebar:
+    st.header("⚙️ Settings")
+    model_name = st.selectbox(
+        "Select Model",
+        ["mistralai/Mistral-7B-v0.1", "meta-llama/Llama-2-7b-chat-hf"],
+        help="Choose a pre-trained model to fine-tune."
+    )
+    temperature = st.slider(
+        "Temperature",
+        min_value=0.1,
+        max_value=1.0,
+        value=0.7,
+        help="Controls randomness (lower = more deterministic)."
+    )
+    max_length = st.slider(
+        "Max Response Length",
+        min_value=50,
+        max_value=500,
+        value=150,
+        help="Maximum number of tokens in the response."
+    )
+# Load model (cached to avoid reloading)
+@st.cache_resource
+def load_model(model_name):
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
+    return tokenizer, model
+tokenizer, model = load_model(model_name)
+# Initialize chat history
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+# Display chat messages
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# Chat input
+if prompt := st.chat_input("Ask me anything..."):
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    # Generate response
+    with st.chat_message("assistant"):
+        with st.spinner("Thinking..."):
+            inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=max_length,
+                temperature=temperature,
+                do_sample=True
+            )
+            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            st.markdown(response)
+    st.session_state.messages.append({"role": "assistant", "content": response})