import gradio as gr
import pandas as pd
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import joblib
import os
import traceback
from datetime import datetime

# Try to import langdetect, use fallback if not available
try:
    from langdetect import detect, LangDetectException
    LANGDETECT_AVAILABLE = True
except ImportError:
    print("⚠️ langdetect not available, using fallback language detection")
    LANGDETECT_AVAILABLE = False

# ===============================
# Load assets
# ===============================
print("🔄 Loading data and models...")
df = pd.read_csv("clean_feedback.csv", encoding='utf-8')
print("✅ CSV loaded with columns:", df.columns.tolist())

embeddings = np.load("embeddings.npy")
print("✅ Embeddings loaded with shape:", embeddings.shape)

index = faiss.read_index("feedback.index")
print("✅ FAISS index loaded")

clf = joblib.load("feedback_model.pkl")
print("✅ Sentiment model loaded")

model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2", device="cpu")
print("✅ SentenceTransformer ready")

# File to store user submissions
USER_FEEDBACK_FILE = "user_feedback.csv"

# Initialize CSV with proper columns if it doesn't exist
if not os.path.exists(USER_FEEDBACK_FILE):
    pd.DataFrame(columns=[
        "Timestamp", 
        "Sentence", 
        "Predicted_Sentiment", 
        "Confidence", 
        "Language"
    ]).to_csv(USER_FEEDBACK_FILE, index=False, encoding='utf-8-sig')

# ===============================
# Language Detection Function
# ===============================
def detect_language(text):
    """
    Detect language of the input text.
    Returns: 'Urdu', 'English', 'Roman Urdu', or 'Mixed'
    """
    try:
        # Check if text contains Urdu script (Unicode range for Urdu/Arabic)
        urdu_chars = sum(1 for char in text if '\u0600' <= char <= '\u06FF')
        total_chars = len([c for c in text if c.isalpha()])
        
        if total_chars == 0:
            return "Unknown"
        
        urdu_ratio = urdu_chars / total_chars if total_chars > 0 else 0
        
        # If more than 50% Urdu characters
        if urdu_ratio > 0.5:
            return "Urdu"
        
        # If some Urdu characters mixed with English
        if urdu_ratio > 0:
            return "Mixed (Urdu+English)"
        
        # For English/Roman Urdu detection
        if LANGDETECT_AVAILABLE:
            try:
                lang_code = detect(text)
                if lang_code == 'ur':
                    return "Urdu"
            except:
                pass
        
        # Check for Roman Urdu indicators (common transliterated words)
        text_lower = text.lower()
        roman_urdu_words = [
            'hai', 'nahi', 'acha', 'achchha', 'bohot', 'bahut', 'bhi', 'kya', 
            'kaise', 'theek', 'thik', 'matlab', 'samajh', 'bilkul', 'yar', 
            'yaar', 'par', 'lekin', 'aur', 'ka', 'ki', 'ko', 'se', 'me',
            'mein', 'hain', 'tha', 'thi', 'gaya', 'gayi', 'karna', 'karo',
            'kuch', 'sab', 'bahut', 'zyada', 'kam', 'achha', 'bura'
        ]
        
        # Count Roman Urdu word matches
        matches = sum(1 for word in roman_urdu_words if word in text_lower.split())
        
        if matches >= 2:  # If 2 or more Roman Urdu words found
            return "Roman Urdu"
        elif matches >= 1:
            return "Mixed (Roman Urdu+English)"
        
        return "English"
            
    except Exception as e:
        print(f"⚠️ Language detection error: {e}")
        return "Unknown"

# ===============================
# Core classification function
# ===============================
def classify_feedback(text, top_k=5, progress=gr.Progress()):
    try:
        if not text or not text.strip():
            empty_df = pd.read_csv(USER_FEEDBACK_FILE, encoding='utf-8-sig')
            return (
                gr.update(value="⚠️ Please enter a feedback text.", visible=True),
                gr.update(value="", visible=False),
                empty_df,
                gr.update(visible=False),
                gr.update(visible=False),
                gr.update(visible=False)
            )
        
        progress(0.1, desc="🔍 Analyzing text...")
        
        # Embed query
        query_emb = model.encode([text])
        progress(0.3, desc="🔍 Finding similar feedbacks...")
        
        # Retrieve similar sentences
        distances, indices = index.search(query_emb, top_k)
        retrieved = df.iloc[indices[0]]
        progress(0.6, desc="🤖 Classifying sentiment...")
        
        # Predict sentiment & probability
        probs_all = clf.predict_proba(query_emb)[0]
        sentiment = clf.classes_[np.argmax(probs_all)]
        confidence = np.max(probs_all)
        
        # Detect language
        language = detect_language(text)
        progress(0.8, desc="💾 Saving results...")
        
        # Extract only sentences for copying
        similar_sentences = retrieved['Sentence'].tolist()
        sentences_text = "\n".join([f"{i+1}. {s}" for i, s in enumerate(similar_sentences)])
        
        # Format similar examples with similarity scores and sentiment
        examples = "\n".join(
            [f"**{i+1}.** [{retrieved.iloc[i].get('Sentiment', 'N/A')}] {s}  \n*Similarity: {(1 - distances[0][i]):.1%}*" 
             for i, s in enumerate(similar_sentences)]
        )
        
        # Save user submission with timestamp and language
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        new_row = pd.DataFrame([{
            "Timestamp": timestamp,
            "Sentence": text,
            "Predicted_Sentiment": sentiment,
            "Confidence": round(confidence, 4),
            "Language": language
        }])
        
        # Read existing data and append with proper encoding
        existing = pd.read_csv(USER_FEEDBACK_FILE, encoding='utf-8-sig')
        updated = pd.concat([existing, new_row], ignore_index=True)
        updated.to_csv(USER_FEEDBACK_FILE, index=False, encoding='utf-8-sig')
        
        progress(1.0, desc="✅ Complete!")
        
        print(f"✅ Prediction: {sentiment} ({confidence:.2f}) | Language: {language}")
        
        # Determine sentiment color
        sentiment_color = {
            "Positive": "🟢",
            "Negative": "🔴",
            "Neutral": "🟡"
        }.get(sentiment, "⚪")
        
        # Return formatted output
        return (
            gr.update(visible=False),  # error box
            gr.update(value=sentences_text, visible=True),  # similar sentences
            updated.sort_values('Timestamp', ascending=False),  # table
            gr.update(visible=True, value=f"{sentiment_color} **{sentiment}**"),  # sentiment badge
            gr.update(visible=True, value=f"**{confidence:.1%}**"),  # confidence badge
            gr.update(visible=True, value=f"**{language}**")  # language badge
        )
        
    except Exception as e:
        tb = traceback.format_exc()
        print("❌ Error:", tb)
        try:
            existing_df = pd.read_csv(USER_FEEDBACK_FILE, encoding='utf-8-sig')
            return (
                gr.update(value=f"❌ **Error occurred:**\n```\n{tb}\n```", visible=True),
                gr.update(visible=False),
                existing_df,
                gr.update(visible=False),
                gr.update(visible=False),
                gr.update(visible=False)
            )
        except:
            return (
                gr.update(value=f"❌ **Error occurred:**\n```\n{tb}\n```", visible=True),
                gr.update(visible=False),
                pd.DataFrame(),
                gr.update(visible=False),
                gr.update(visible=False),
                gr.update(visible=False)
            )

# ===============================
# Clear history function
# ===============================
def clear_history():
    """Clear all user feedback history"""
    try:
        empty_df = pd.DataFrame(columns=[
            "Timestamp", 
            "Sentence", 
            "Predicted_Sentiment", 
            "Confidence", 
            "Language"
        ])
        empty_df.to_csv(USER_FEEDBACK_FILE, index=False, encoding='utf-8-sig')
        return "✅ History cleared successfully!", empty_df, USER_FEEDBACK_FILE
    except Exception as e:
        existing_df = pd.read_csv(USER_FEEDBACK_FILE, encoding='utf-8-sig')
        return f"❌ Error clearing history: {str(e)}", existing_df, USER_FEEDBACK_FILE

# ===============================
# Load initial data function
# ===============================
def load_initial_data():
    try:
        if os.path.exists(USER_FEEDBACK_FILE):
            df_temp = pd.read_csv(USER_FEEDBACK_FILE, encoding='utf-8-sig')
            if len(df_temp) > 0:
                return df_temp.sort_values('Timestamp', ascending=False)
        return pd.DataFrame(columns=["Timestamp", "Sentence", "Predicted_Sentiment", "Confidence", "Language"])
    except:
        return pd.DataFrame(columns=["Timestamp", "Sentence", "Predicted_Sentiment", "Confidence", "Language"])

# ===============================
# Gradio Interface
# ===============================
custom_css = """
.rtl-text textarea {
    direction: rtl;
    text-align: right;
    font-family: 'Noto Nastaliq Urdu', 'Jameel Noori Nastaleeq', 'Arial', sans-serif;
    font-size: 16px;
}
.input-card {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    border-radius: 12px;
    padding: 20px;
    color: white;
}
.result-card {
    background: #f8f9fa;
    border-radius: 12px;
    padding: 20px;
    border-left: 4px solid #667eea;
}
.sentiment-badge {
    display: inline-block;
    padding: 8px 16px;
    border-radius: 20px;
    font-weight: bold;
    margin: 5px;
}
.badge-positive { background: #d4edda; color: #155724; }
.badge-negative { background: #f8d7da; color: #721c24; }
.badge-neutral { background: #fff3cd; color: #856404; }
.stats-row {
    display: flex;
    gap: 10px;
    margin: 10px 0;
}
.stat-box {
    flex: 1;
    background: white;
    padding: 15px;
    border-radius: 8px;
    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
    text-align: center;
}
"""

with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_width=True) as demo:
    
    # Header
    gr.Markdown(
        """
        # 🎓 Student Feedback RAG System
        ### Multilingual Sentiment Analysis for Urdu, Roman Urdu, and English
        
        This intelligent system analyzes student feedback using **Retrieval-Augmented Generation (RAG)** 
        and provides similar examples from the database. All submissions are automatically saved with 
        timestamp, sentiment analysis, confidence scores, and automatic language detection.
        """
    )
    
    # Main Input Section
    with gr.Row():
        with gr.Column(scale=2):
            with gr.Group():
                input_text = gr.Textbox(
                    label="✍️ Enter Student Feedback",
                    placeholder="اپنی رائے یہاں لکھیں | Type your feedback here in Urdu, Roman Urdu, or English...",
                    lines=4,
                    elem_classes=["rtl-text"],
                    show_copy_button=True
                )
                
                with gr.Row():
                    submit_btn = gr.Button(
                        "🔍 Analyze Feedback", 
                        variant="primary", 
                        size="lg",
                        scale=2
                    )
                    clear_input_btn = gr.Button(
                        "🧹 Clear", 
                        variant="secondary", 
                        size="lg",
                        scale=1
                    )
            
            # Error message box (hidden by default)
            error_box = gr.Markdown(visible=False)
            
        with gr.Column(scale=1):
            gr.Markdown(
                """
                ### 🌐 Supported Languages
                
                | Language | Script | Status |
                |----------|--------|--------|
                | **Urdu** | اردو | ✅ Full Support |
                | **Roman Urdu** | Latin | ✅ Full Support |
                | **English** | English | ✅ Full Support |
                | **Mixed** | Mixed | ✅ Full Support |
                
                ### 🎯 Sentiment Classes
                - 🟢 **Positive** - Favorable feedback
                - 🔴 **Negative** - Critical feedback  
                - 🟡 **Neutral** - Balanced/Objective feedback
                """
            )
    
    # Results Section (hidden initially)
    with gr.Row(visible=False) as results_row:
        with gr.Column():
            gr.Markdown("## 📊 Analysis Results")
            
            with gr.Row():
                with gr.Column(scale=1):
                    sentiment_badge = gr.Markdown(
                        label="Predicted Sentiment",
                        visible=False
                    )
                with gr.Column(scale=1):
                    confidence_badge = gr.Markdown(
                        label="Confidence",
                        visible=False
                    )
                with gr.Column(scale=1):
                    language_badge = gr.Markdown(
                        label="Detected Language",
                        visible=False
                    )
            
            # Similar sentences section
            similar_sentences_box = gr.Textbox(
                label="📋 Similar Sentences from Database (Copy to Clipboard)",
                lines=6,
                max_lines=10,
                interactive=False,
                show_copy_button=True,
                visible=False,
                container=True
            )
    
    # Similar Examples Section
    gr.Markdown("---")
    gr.Markdown("## 🔍 Similar Examples from Database")
    examples_output = gr.Markdown()
    
    # History Section
    gr.Markdown("---")
    gr.Markdown("## 🗂️ Submission History")
    
    with gr.Row():
        with gr.Column(scale=4):
            # FIXED: Removed 'height' parameter, using 'max_height' instead
            output_table = gr.Dataframe(
                label="Recent Submissions",
                wrap=True,
                max_height=300,  # Changed from height to max_height
                show_row_numbers=True,  # Added for better UX
                show_copy_button=True,   # Added for convenience
                show_fullscreen_button=True  # Added for better viewing
            )
        with gr.Column(scale=1):
            with gr.Row():
                clear_btn = gr.Button(
                    "🗑️ Clear History", 
                    variant="stop", 
                    size="sm"
                )
            with gr.Row():
                download_btn = gr.DownloadButton(
                    label="📥 Download CSV",
                    value=USER_FEEDBACK_FILE,
                    variant="secondary",
                    size="sm"
                )
            
            clear_output = gr.Markdown()
    
    # Footer
    gr.Markdown(
        """
        ---
        ### 💡 Tips:
        - Type in **Urdu (اردو)**, **Roman Urdu**, or **English** - the system auto-detects the language
        - The system uses **FAISS** for fast similarity search across thousands of feedback entries
        - **Confidence scores** above 80% indicate high reliability predictions
        - All data is saved with **UTF-8 encoding** to properly handle Urdu script
        """
    )
    
    # Event handlers
    def process_and_show(text, progress=gr.Progress()):
        error_msg, sentences, table, sent_badge, conf_badge, lang_badge = classify_feedback(text, progress=progress)
        
        # Show results row if successful
        show_results = sent_badge.get("visible", False) if isinstance(sent_badge, dict) else sent_badge.visible if hasattr(sent_badge, 'visible') else False
        
        return {
            error_box: error_msg,
            similar_sentences_box: sentences,
            output_table: table,
            sentiment_badge: sent_badge,
            confidence_badge: conf_badge,
            language_badge: lang_badge,
            results_row: gr.update(visible=show_results)
        }
    
    submit_btn.click(
        fn=process_and_show,
        inputs=[input_text],
        outputs=[error_box, similar_sentences_box, output_table, 
                sentiment_badge, confidence_badge, language_badge, results_row]
    )
    
    # Also allow Enter key to submit
    input_text.submit(
        fn=process_and_show,
        inputs=[input_text],
        outputs=[error_box, similar_sentences_box, output_table,
                sentiment_badge, confidence_badge, language_badge, results_row]
    )
    
    def clear_and_update():
        msg, df, file_path = clear_history()
        return msg, df, file_path
    
    clear_btn.click(
        fn=clear_and_update,
        inputs=[],
        outputs=[clear_output, output_table, download_btn]
    )
    
    def clear_input():
        return "", gr.update(visible=False), gr.update(visible=False), gr.update(value="")
    
    clear_input_btn.click(
        fn=clear_input,
        inputs=[],
        outputs=[input_text, results_row, error_box, examples_output]
    )
    
    # Load existing data on startup
    demo.load(
        fn=load_initial_data,
        inputs=[],
        outputs=[output_table]
    )

# Launch the app
if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_error=True
    )