Spaces:

mic3333
/

dash-mcp

Sleeping

App Files Files Community

mic3333 commited on Sep 2

Commit

c4848ed

verified ·

1 Parent(s): aa35d25

Create app.py

Browse files

Files changed (1) hide show

app.py +419 -0

app.py ADDED Viewed

	@@ -0,0 +1,419 @@

+import os
+import base64
+import io
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from dash import Dash, html, dcc, Input, Output, State
+import dash_bootstrap_components as dbc
+# Langchain imports
+from langchain.llms import HuggingFacePipeline
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains import RetrievalQA
+from langchain.document_loaders import CSVLoader, DataFrameLoader
+from langchain.schema import Document
+# Initialize Dash app
+app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
+server = app.server
+# Initialize Langchain components
+@st.cache_resource
+def init_langchain():
+    """Initialize Langchain components"""
+    try:
+        # Use a lightweight model for embeddings
+        embeddings = HuggingFaceEmbeddings(
+            model_name="sentence-transformers/all-MiniLM-L6-v2",
+            model_kwargs={'device': 'cpu'}
+        )
+        # Initialize text splitter
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200
+        )
+        return embeddings, text_splitter
+    except Exception as e:
+        print(f"Error initializing Langchain: {e}")
+        return None, None
+# Global variables
+embeddings, text_splitter = init_langchain()
+vector_store = None
+# App layout
+app.layout = dbc.Container([
+    dbc.Row([
+        dbc.Col([
+            html.H1("🤖 AI-Powered Data Analytics", className="text-center mb-4"),
+            html.P("Upload data, ask questions, and get AI-powered insights!",
+                   className="text-center text-muted"),
+            html.Hr(),
+        ], width=12)
+    ]),
+    dbc.Row([
+        dbc.Col([
+            dbc.Card([
+                dbc.CardBody([
+                    html.H4("📁 Data Upload", className="card-title"),
+                    dcc.Upload(
+                        id='upload-data',
+                        children=html.Div([
+                            'Drag and Drop or ',
+                            html.A('Select Files')
+                        ]),
+                        style={
+                            'width': '100%',
+                            'height': '60px',
+                            'lineHeight': '60px',
+                            'borderWidth': '1px',
+                            'borderStyle': 'dashed',
+                            'borderRadius': '5px',
+                            'textAlign': 'center',
+                            'margin': '10px'
+                        },
+                        multiple=False,
+                        accept='.csv,.xlsx,.txt'
+                    ),
+                    html.Div(id='upload-status', className="mt-2"),
+                    html.Hr(),
+                    html.H4("🤖 AI Assistant", className="card-title"),
+                    dbc.InputGroup([
+                        dbc.Input(
+                            id="ai-question",
+                            placeholder="Ask questions about your data...",
+                            type="text",
+                            style={"fontSize": "14px"}
+                        ),
+                        dbc.Button(
+                            "Ask AI",
+                            id="ask-button",
+                            color="primary",
+                            n_clicks=0
+                        )
+                    ]),
+                    html.Div(id="ai-response", className="mt-3"),
+                    html.Hr(),
+                    html.H4("📊 Quick Analytics", className="card-title"),
+                    dbc.ButtonGroup([
+                        dbc.Button("Summary Stats", id="stats-btn", size="sm"),
+                        dbc.Button("Correlations", id="corr-btn", size="sm"),
+                        dbc.Button("Missing Data", id="missing-btn", size="sm"),
+                    ], className="w-100"),
+                    html.Div(id="quick-analytics", className="mt-3")
+                ])
+            ])
+        ], width=4),
+        dbc.Col([
+            dbc.Card([
+                dbc.CardBody([
+                    html.H4("📈 Visualizations", className="card-title"),
+                    dcc.Graph(id='main-graph', style={'height': '400px'}),
+                ])
+            ]),
+            dbc.Card([
+                dbc.CardBody([
+                    html.H4("🔍 Data Explorer", className="card-title"),
+                    html.Div(id='data-table')
+                ])
+            ], className="mt-3")
+        ], width=8)
+    ], className="mt-4"),
+    # Store components
+    dcc.Store(id='stored-data'),
+    dcc.Store(id='data-context')
+], fluid=True)
+def create_vector_store(df):
+    """Create vector store from dataframe"""
+    global vector_store
+    if embeddings is None:
+        return False
+    try:
+        # Convert dataframe to documents
+        documents = []
+        # Add column information
+        col_info = f"Dataset has {len(df)} rows and {len(df.columns)} columns.\n"
+        col_info += f"Columns: {', '.join(df.columns)}\n"
+        col_info += f"Data types: {df.dtypes.to_string()}\n"
+        documents.append(Document(page_content=col_info, metadata={"type": "schema"}))
+        # Add summary statistics
+        summary = df.describe().to_string()
+        documents.append(Document(page_content=f"Summary statistics:\n{summary}",
+                                metadata={"type": "statistics"}))
+        # Add sample rows
+        sample_data = df.head(10).to_string()
+        documents.append(Document(page_content=f"Sample data:\n{sample_data}",
+                                metadata={"type": "sample"}))
+        # Add correlation information for numeric columns
+        numeric_cols = df.select_dtypes(include=['number']).columns
+        if len(numeric_cols) > 1:
+            corr = df[numeric_cols].corr().to_string()
+            documents.append(Document(page_content=f"Correlations:\n{corr}",
+                                    metadata={"type": "correlation"}))
+        # Create vector store
+        vector_store = FAISS.from_documents(documents, embeddings)
+        return True
+    except Exception as e:
+        print(f"Error creating vector store: {e}")
+        return False
+def get_ai_response(question, df):
+    """Get AI response using RAG"""
+    global vector_store
+    if vector_store is None:
+        return "Please upload data first to enable AI features."
+    try:
+        # Simple keyword-based responses for demo
+        question_lower = question.lower()
+        if "summary" in question_lower or "overview" in question_lower:
+            return f"""📊 **Data Summary**:
+            - **Shape**: {df.shape[0]} rows × {df.shape[1]} columns
+            - **Columns**: {', '.join(df.columns)}
+            - **Missing values**: {df.isnull().sum().sum()} total
+            - **Numeric columns**: {len(df.select_dtypes(include=['number']).columns)}
+            """
+        elif "correlation" in question_lower or "relationship" in question_lower:
+            numeric_cols = df.select_dtypes(include=['number']).columns
+            if len(numeric_cols) > 1:
+                corr = df[numeric_cols].corr()
+                # Find highest correlation
+                corr_vals = corr.abs().unstack().sort_values(ascending=False)
+                corr_vals = corr_vals[corr_vals < 1.0]  # Remove self-correlations
+                if not corr_vals.empty:
+                    top_corr = corr_vals.iloc[0]
+                    col1, col2 = corr_vals.index[0]
+                    return f"""🔗 **Correlation Analysis**:
+                    - Strongest relationship: **{col1}** and **{col2}** (r = {top_corr:.3f})
+                    - This suggests a {'strong' if top_corr > 0.7 else 'moderate' if top_corr > 0.5 else 'weak'} correlation
+                    """
+            return "No numeric columns found for correlation analysis."
+        elif "missing" in question_lower or "null" in question_lower:
+            missing = df.isnull().sum()
+            missing = missing[missing > 0]
+            if missing.empty:
+                return "✅ **Great news!** No missing values found in your dataset."
+            else:
+                return f"""⚠️ **Missing Data Found**:
+                {missing.to_string()}
+                **Recommendation**: Consider filling or removing missing values before analysis.
+                """
+        elif "recommend" in question_lower or "suggest" in question_lower:
+            suggestions = []
+            numeric_cols = df.select_dtypes(include=['number']).columns
+            categorical_cols = df.select_dtypes(include=['object']).columns
+            if len(numeric_cols) >= 2:
+                suggestions.append("📈 Try scatter plots to explore relationships between numeric variables")
+            if len(categorical_cols) > 0 and len(numeric_cols) > 0:
+                suggestions.append("📊 Create bar charts to compare numeric values across categories")
+            if len(numeric_cols) > 0:
+                suggestions.append("📉 Use histograms to understand data distributions")
+            return f"""💡 **Analysis Suggestions**:
+            {chr(10).join(['• ' + s for s in suggestions])}
+            """
+        else:
+            return f"""🤖 **AI Assistant**: I can help you with:
+            - Data summaries and overviews
+            - Correlation and relationship analysis
+            - Missing data detection
+            - Visualization recommendations
+            Try asking: "What's the summary?" or "Any missing data?"
+            """
+    except Exception as e:
+        return f"Error processing question: {str(e)}"
+def parse_contents(contents, filename):
+    """Parse uploaded file contents"""
+    content_type, content_string = contents.split(',')
+    decoded = base64.b64decode(content_string)
+    try:
+        if 'csv' in filename:
+            df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
+        elif 'xls' in filename:
+            df = pd.read_excel(io.BytesIO(decoded))
+        else:
+            return None, "Unsupported file type"
+        return df, None
+    except Exception as e:
+        return None, f"Error processing file: {str(e)}"
+@app.callback(
+    [Output('stored-data', 'data'),
+     Output('upload-status', 'children'),
+     Output('data-table', 'children')],
+    [Input('upload-data', 'contents')],
+    [State('upload-data', 'filename')]
+)
+def update_data(contents, filename):
+    """Update data when file is uploaded"""
+    if contents is None:
+        return None, "", ""
+    df, error = parse_contents(contents, filename)
+    if error:
+        return None, dbc.Alert(error, color="danger"), ""
+    # Create vector store for AI
+    vector_success = create_vector_store(df)
+    # Create data table preview
+    table = dbc.Table.from_dataframe(
+        df.head(10),
+        striped=True,
+        bordered=True,
+        hover=True,
+        size='sm'
+    )
+    ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited"
+    success_msg = dbc.Alert([
+        html.H6(f"✅ File uploaded successfully! {ai_status}"),
+        html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"),
+        html.P(f"Columns: {', '.join(df.columns.tolist())}")
+    ], color="success")
+    return df.to_dict('records'), success_msg, table
+@app.callback(
+    Output('ai-response', 'children'),
+    [Input('ask-button', 'n_clicks')],
+    [State('ai-question', 'value'),
+     State('stored-data', 'data')]
+)
+def handle_ai_question(n_clicks, question, data):
+    """Handle AI question"""
+    if not n_clicks or not question or not data:
+        return ""
+    df = pd.DataFrame(data)
+    response = get_ai_response(question, df)
+    return dbc.Alert(
+        dcc.Markdown(response),
+        color="info"
+    )
+@app.callback(
+    Output('quick-analytics', 'children'),
+    [Input('stats-btn', 'n_clicks'),
+     Input('corr-btn', 'n_clicks'),
+     Input('missing-btn', 'n_clicks')],
+    [State('stored-data', 'data')]
+)
+def quick_analytics(stats_clicks, corr_clicks, missing_clicks, data):
+    """Handle quick analytics buttons"""
+    if not data:
+        return ""
+    df = pd.DataFrame(data)
+    ctx = callback_context
+    if not ctx.triggered:
+        return ""
+    button_id = ctx.triggered[0]['prop_id'].split('.')[0]
+    if button_id == 'stats-btn':
+        stats = df.describe()
+        return dbc.Alert([
+            html.H6("📊 Summary Statistics"),
+            dbc.Table.from_dataframe(stats.reset_index(), size='sm')
+        ], color="light")
+    elif button_id == 'corr-btn':
+        numeric_df = df.select_dtypes(include=['number'])
+        if len(numeric_df.columns) > 1:
+            corr = numeric_df.corr()
+            fig = px.imshow(corr, text_auto=True, aspect="auto",
+                          title="Correlation Matrix")
+            return dcc.Graph(figure=fig, style={'height': '300px'})
+        return dbc.Alert("No numeric columns for correlation analysis", color="warning")
+    elif button_id == 'missing-btn':
+        missing = df.isnull().sum()
+        missing = missing[missing > 0]
+        if missing.empty:
+            return dbc.Alert("✅ No missing values!", color="success")
+        return dbc.Alert([
+            html.H6("⚠️ Missing Values"),
+            html.Pre(missing.to_string())
+        ], color="warning")
+    return ""
+@app.callback(
+    Output('main-graph', 'figure'),
+    [Input('stored-data', 'data')]
+)
+def update_main_graph(data):
+    """Update main visualization"""
+    if not data:
+        return {}
+    df = pd.DataFrame(data)
+    # Create a smart default visualization
+    numeric_cols = df.select_dtypes(include=['number']).columns
+    categorical_cols = df.select_dtypes(include=['object']).columns
+    if len(numeric_cols) >= 2:
+        # Scatter plot for numeric data
+        fig = px.scatter(df, x=numeric_cols[0], y=numeric_cols[1],
+                        title=f"Relationship: {numeric_cols[1]} vs {numeric_cols[0]}")
+    elif len(numeric_cols) >= 1 and len(categorical_cols) >= 1:
+        # Bar chart for mixed data
+        fig = px.bar(df, x=categorical_cols[0], y=numeric_cols[0],
+                    title=f"Distribution: {numeric_cols[0]} by {categorical_cols[0]}")
+    elif len(numeric_cols) >= 1:
+        # Histogram for single numeric
+        fig = px.histogram(df, x=numeric_cols[0],
+                         title=f"Distribution of {numeric_cols[0]}")
+    else:
+        # Default message
+        fig = go.Figure()
+        fig.add_annotation(text="Upload data to see visualizations",
+                         x=0.5, y=0.5, showarrow=False)
+    fig.update_layout(template="plotly_white")
+    return fig
+if __name__ == '__main__':
+    app.run_server(host='0.0.0.0', port=7860, debug=False)