Praanshull commited on
Commit
376aabc
·
verified ·
1 Parent(s): 45d05c5

Upload 5 files

Browse files
Files changed (5) hide show
  1. app/__init__.py +18 -0
  2. app/inference.py +116 -0
  3. app/interface.py +207 -0
  4. app/model_loader.py +78 -0
  5. app/utils.py +170 -0
app/__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Multilingual Question Answering System
3
+ App package initialization
4
+ """
5
+
6
+ __version__ = "1.0.0"
7
+ __author__ = "Praanshull Verma"
8
+
9
+ from .model_loader import ModelLoader
10
+ from .inference import QAInference
11
+ from .utils import calculate_confidence, format_answer
12
+
13
+ __all__ = [
14
+ "ModelLoader",
15
+ "QAInference",
16
+ "calculate_confidence",
17
+ "format_answer"
18
+ ]
app/inference.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Inference Module
3
+ Handles question answering predictions
4
+ """
5
+
6
+ import torch
7
+ from typing import Tuple
8
+
9
+
10
+ class QAInference:
11
+ """Handles question answering inference"""
12
+
13
+ def __init__(self, model, tokenizer, device):
14
+ """
15
+ Initialize QA Inference
16
+
17
+ Args:
18
+ model: Loaded model
19
+ tokenizer: Loaded tokenizer
20
+ device: Torch device
21
+ """
22
+ self.model = model
23
+ self.tokenizer = tokenizer
24
+ self.device = device
25
+
26
+ def answer_question(
27
+ self,
28
+ question: str,
29
+ context: str,
30
+ language: str = "English",
31
+ max_length: int = 64
32
+ ) -> Tuple[str, str]:
33
+ """
34
+ Generate answer for given question and context
35
+
36
+ Args:
37
+ question: Question text
38
+ context: Context/passage text
39
+ language: "English" or "German"
40
+ max_length: Maximum answer length
41
+
42
+ Returns:
43
+ Tuple of (answer, response_info)
44
+ """
45
+ if not question.strip() or not context.strip():
46
+ return "⚠️ Please provide both a question and context!", ""
47
+
48
+ try:
49
+ # Configure language
50
+ if language == "English":
51
+ self.tokenizer.src_lang = "en_XX"
52
+ self.tokenizer.tgt_lang = "en_XX"
53
+ lang_code = self.tokenizer.lang_code_to_id["en_XX"]
54
+ else:
55
+ self.tokenizer.src_lang = "de_DE"
56
+ self.tokenizer.tgt_lang = "de_DE"
57
+ lang_code = self.tokenizer.lang_code_to_id["de_DE"]
58
+
59
+ self.model.config.forced_bos_token_id = lang_code
60
+
61
+ # Prepare input
62
+ input_text = f"question: {question} context: {context}"
63
+ inputs = self.tokenizer(
64
+ input_text,
65
+ max_length=256,
66
+ truncation=True,
67
+ return_tensors="pt"
68
+ ).to(self.device)
69
+
70
+ # Generate answer
71
+ self.model.eval()
72
+ with torch.no_grad():
73
+ outputs = self.model.generate(
74
+ **inputs,
75
+ max_length=max_length,
76
+ num_beams=4,
77
+ early_stopping=True,
78
+ forced_bos_token_id=lang_code
79
+ )
80
+
81
+ answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
82
+
83
+ # Calculate confidence
84
+ confidence = self._calculate_confidence(answer, context)
85
+
86
+ # Format response info
87
+ response_info = f"""
88
+ ### 📊 Response Details
89
+ - **Language**: {language}
90
+ - **Answer Length**: {len(answer.split())} words
91
+ - **Confidence**: {confidence}
92
+ - **Model**: mBART-large-50 + LoRA
93
+ """
94
+
95
+ return answer, response_info
96
+
97
+ except Exception as e:
98
+ return f"❌ Error: {str(e)}", ""
99
+
100
+ def _calculate_confidence(self, answer: str, context: str) -> str:
101
+ """
102
+ Calculate answer confidence (simple heuristic)
103
+
104
+ Args:
105
+ answer: Generated answer
106
+ context: Input context
107
+
108
+ Returns:
109
+ Confidence level string
110
+ """
111
+ if len(answer.split()) < 2:
112
+ return "Low"
113
+ elif answer.lower() in context.lower():
114
+ return "High"
115
+ else:
116
+ return "Medium"
app/interface.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gradio Interface Module
3
+ Defines the web interface layout and interactions
4
+ """
5
+
6
+ import gradio as gr
7
+ from .utils import create_performance_chart, create_metrics_table, get_example
8
+
9
+
10
+ # Custom CSS
11
+ CUSTOM_CSS = """
12
+ .gradio-container {
13
+ font-family: 'Arial', sans-serif;
14
+ }
15
+ .header {
16
+ text-align: center;
17
+ padding: 20px;
18
+ background: linear-gradient(90deg, #3498db, #e74c3c);
19
+ color: white;
20
+ border-radius: 10px;
21
+ margin-bottom: 20px;
22
+ }
23
+ """
24
+
25
+
26
+ def create_interface(inference_engine):
27
+ """
28
+ Create Gradio interface
29
+
30
+ Args:
31
+ inference_engine: QAInference instance
32
+
33
+ Returns:
34
+ Gradio Blocks interface
35
+ """
36
+
37
+ with gr.Blocks() as demo:
38
+
39
+ # Header
40
+ gr.Markdown("""
41
+ <div class="header">
42
+ <h1>🌍 Multilingual Question Answering System</h1>
43
+ <p>Fine-tuned mBART-large with LoRA on SQuAD (English) and XQuAD (German)</p>
44
+ <p><i>Supporting English 🇬🇧 and German 🇩🇪</i></p>
45
+ </div>
46
+ """)
47
+
48
+ with gr.Tabs():
49
+
50
+ # Tab 1: Question Answering
51
+ with gr.Tab("❓ Ask Questions"):
52
+
53
+ gr.Markdown("""### Enter your question and provide context for the model to extract the answer from:
54
+ 💡 Tips for Best Results:
55
+ - ✅ Keep context under 300 words
56
+ - ✅ Make sure the answer is explicitly stated in the context
57
+ - ✅ Use clear, direct questions
58
+ - ❌ Avoid questions requiring reasoning across multiple sentences
59
+ """)
60
+
61
+ with gr.Row():
62
+ with gr.Column(scale=2):
63
+ language_choice = gr.Radio(
64
+ choices=["English", "German"],
65
+ value="English",
66
+ label="🌐 Select Language",
67
+ info="Choose the language for your question and context"
68
+ )
69
+
70
+ question_input = gr.Textbox(
71
+ label="📝 Question",
72
+ placeholder="Enter your question here...",
73
+ lines=2
74
+ )
75
+
76
+ context_input = gr.Textbox(
77
+ label="📄 Context",
78
+ placeholder="Provide the context/passage containing the answer...",
79
+ lines=6
80
+ )
81
+
82
+ with gr.Row():
83
+ submit_btn = gr.Button("🔍 Get Answer", variant="primary", size="lg")
84
+ clear_btn = gr.Button("🗑️ Clear", variant="secondary")
85
+
86
+ gr.Markdown("### 💡 Try Examples:")
87
+ example_type = gr.Radio(
88
+ choices=["General Knowledge", "Historical", "Scientific"],
89
+ value="General Knowledge",
90
+ label="Example Type"
91
+ )
92
+ load_example_btn = gr.Button("📥 Load Example")
93
+
94
+ with gr.Column(scale=1):
95
+ gr.Markdown("### 🎯 Answer")
96
+ answer_output = gr.Textbox(
97
+ label="Model Answer",
98
+ lines=3,
99
+ interactive=False
100
+ )
101
+ response_details = gr.Markdown("")
102
+
103
+ # Button actions
104
+ submit_btn.click(
105
+ fn=inference_engine.answer_question,
106
+ inputs=[question_input, context_input, language_choice],
107
+ outputs=[answer_output, response_details]
108
+ )
109
+
110
+ clear_btn.click(
111
+ fn=lambda: ("", "", ""),
112
+ outputs=[question_input, context_input, answer_output]
113
+ )
114
+
115
+ load_example_btn.click(
116
+ fn=get_example,
117
+ inputs=[example_type, language_choice],
118
+ outputs=[question_input, context_input]
119
+ )
120
+
121
+ # Tab 2: Performance Metrics
122
+ with gr.Tab("📊 Performance Metrics"):
123
+ gr.Markdown("""
124
+ ### Model Performance Analysis
125
+ Evaluation results on SQuAD (English) and XQuAD (German) test sets
126
+ """)
127
+
128
+ performance_plot = gr.Plot(
129
+ value=create_performance_chart(),
130
+ label="Performance Comparison"
131
+ )
132
+
133
+ gr.Markdown("### 📋 Detailed Metrics Table")
134
+ metrics_df = create_metrics_table()
135
+ metrics_table = gr.Dataframe(
136
+ value=metrics_df,
137
+ label="Performance Metrics by Language"
138
+ )
139
+
140
+ gr.Markdown("""
141
+ ### 🔑 Key Insights
142
+
143
+ ✅ **German Performance**: 107.2% of English performance (Avg EM+F1)
144
+ - BLEU: 43.12 vs 37.79 (+5.33 points)
145
+ - F1 Score: 0.6580 vs 0.6329 (+0.025)
146
+ - Exact Match: 48.74% vs 43.60% (+5.14%)
147
+
148
+ ✅ **Strong Transfer Learning**: Model successfully adapted to German with limited data
149
+
150
+ ✅ **Training Details**:
151
+ - Base Model: facebook/mbart-large-50-many-to-many-mmt
152
+ - Fine-tuning: LoRA (r=8, alpha=32)
153
+ - English Training: 20,000 samples from SQuAD
154
+ - German Training: ~950 samples from XQuAD
155
+ - Total Training Time: ~2.5 hours on T4 GPU
156
+ """)
157
+
158
+ # Tab 3: About
159
+ with gr.Tab("ℹ️ About"):
160
+ gr.Markdown("""
161
+ # Multilingual Question Answering System
162
+
163
+ ## 🎯 Project Overview
164
+ This is a state-of-the-art multilingual question answering system that can extract answers from context in both English and German.
165
+
166
+ ## 🛠️ Architecture
167
+ - **Base Model**: mBART-large-50-many-to-many-mmt (610M parameters)
168
+ - **Fine-tuning Method**: LoRA (Low-Rank Adaptation)
169
+ - **Trainable Parameters**: 1.77M (0.29% of total)
170
+ - **Training Data**:
171
+ - English: Stanford Question Answering Dataset (SQuAD)
172
+ - German: Cross-lingual Question Answering Dataset (XQuAD)
173
+
174
+ ## 🚀 Key Features
175
+ - ✅ Bilingual support (English & German)
176
+ - ✅ Fast inference (<1 second per query)
177
+ - ✅ Memory-efficient with LoRA
178
+ - ✅ High accuracy (>0.65 F1 score on both languages)
179
+
180
+ ## 📈 Performance Highlights
181
+ - Achieved 48.74% exact match on German with minimal training data
182
+ - BLEU score of 43.12 on German (better than English baseline)
183
+ - Successfully demonstrated positive transfer learning across languages
184
+
185
+ ## ⚠️ Known Limitations
186
+ - Long contexts (>500 words) may affect performance
187
+ - Complex multi-hop reasoning questions may fail
188
+ - Limited to extractive QA (answer must be in context)
189
+
190
+ ## 👨‍💻 Author
191
+ Praanshull Verma
192
+ - GitHub: Praanshull
193
+
194
+ ## 📄 License
195
+ MIT License
196
+ """)
197
+
198
+ # Footer
199
+ gr.Markdown("""
200
+ ---
201
+ <div style="text-align: center; padding: 10px;">
202
+ <p>Built with ❤️ using HuggingFace Transformers, PEFT, and Gradio</p>
203
+ <p><i>Last Updated: December 2025</i></p>
204
+ </div>
205
+ """)
206
+
207
+ return demo
app/model_loader.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Model Loading Module
3
+ Handles loading mBART + LoRA model from disk
4
+ """
5
+
6
+ import torch
7
+ import gc
8
+ from pathlib import Path
9
+ from transformers import MBart50TokenizerFast, MBartForConditionalGeneration
10
+ from peft import PeftModel
11
+
12
+
13
+ class ModelLoader:
14
+ """Handles model and tokenizer loading"""
15
+
16
+ def __init__(self, model_path: str = None):
17
+ """
18
+ Initialize ModelLoader
19
+
20
+ Args:
21
+ model_path: Path to saved model directory
22
+ """
23
+ self.model_path = model_path or "models/multilingual_model"
24
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
+ self.model = None
26
+ self.tokenizer = None
27
+
28
+ def load(self):
29
+ """Load model and tokenizer from disk"""
30
+ print(f"🔧 Loading model from: {self.model_path}")
31
+
32
+ # Clear memory
33
+ torch.cuda.empty_cache()
34
+ gc.collect()
35
+
36
+ try:
37
+ # Load tokenizer
38
+ print("⏳ Loading tokenizer...")
39
+ self.tokenizer = MBart50TokenizerFast.from_pretrained(self.model_path)
40
+ print("✅ Tokenizer loaded")
41
+
42
+ # Load base model
43
+ print("⏳ Loading base mBART model...")
44
+ base_model = MBartForConditionalGeneration.from_pretrained(
45
+ "facebook/mbart-large-50-many-to-many-mmt"
46
+ )
47
+ print("✅ Base model loaded")
48
+
49
+ # Load LoRA weights
50
+ print("⏳ Loading LoRA adapter...")
51
+ self.model = PeftModel.from_pretrained(base_model, self.model_path)
52
+ print("✅ LoRA weights loaded")
53
+
54
+ # Move to device
55
+ self.model = self.model.to(self.device)
56
+ self.model.eval()
57
+
58
+ print(f"\n✅ MODEL LOADED SUCCESSFULLY!")
59
+ print(f"💾 Device: {self.device}")
60
+ print(f"📊 Total parameters: {self.model.num_parameters():,}")
61
+
62
+ return self.model, self.tokenizer
63
+
64
+ except Exception as e:
65
+ print(f"\n❌ ERROR LOADING MODEL: {str(e)}")
66
+ raise
67
+
68
+ def get_model_info(self):
69
+ """Get model information"""
70
+ if self.model is None:
71
+ return None
72
+
73
+ return {
74
+ "device": str(self.device),
75
+ "parameters": self.model.num_parameters(),
76
+ "model_path": self.model_path,
77
+ "base_model": "facebook/mbart-large-50-many-to-many-mmt"
78
+ }
app/utils.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility Functions
3
+ Helper functions for the QA system
4
+ """
5
+
6
+ import pandas as pd
7
+ import plotly.graph_objects as go
8
+ from typing import Dict, Tuple
9
+
10
+
11
+ # Performance data from training
12
+ PERFORMANCE_DATA = {
13
+ 'English': {
14
+ 'BLEU': 37.79,
15
+ 'ROUGE-1': 0.6282,
16
+ 'ROUGE-2': 0.3710,
17
+ 'ROUGE-L': 0.6272,
18
+ 'Exact Match': 0.4360,
19
+ 'F1 Score': 0.6329,
20
+ 'Avg (EM+F1)': 0.5344
21
+ },
22
+ 'German': {
23
+ 'BLEU': 43.12,
24
+ 'ROUGE-1': 0.6646,
25
+ 'ROUGE-2': 0.4064,
26
+ 'ROUGE-L': 0.6622,
27
+ 'Exact Match': 0.4874,
28
+ 'F1 Score': 0.6580,
29
+ 'Avg (EM+F1)': 0.5727
30
+ }
31
+ }
32
+
33
+
34
+ def calculate_confidence(answer: str, context: str) -> str:
35
+ """
36
+ Calculate answer confidence level
37
+
38
+ Args:
39
+ answer: Generated answer
40
+ context: Input context
41
+
42
+ Returns:
43
+ Confidence level: "High", "Medium", or "Low"
44
+ """
45
+ if len(answer.split()) < 2:
46
+ return "Low"
47
+ elif answer.lower() in context.lower():
48
+ return "High"
49
+ else:
50
+ return "Medium"
51
+
52
+
53
+ def format_answer(answer: str, language: str, confidence: str) -> str:
54
+ """
55
+ Format answer with metadata
56
+
57
+ Args:
58
+ answer: Generated answer
59
+ language: Language used
60
+ confidence: Confidence level
61
+
62
+ Returns:
63
+ Formatted string with answer details
64
+ """
65
+ return f"""
66
+ ### 📊 Response Details
67
+ - **Language**: {language}
68
+ - **Answer Length**: {len(answer.split())} words
69
+ - **Confidence**: {confidence}
70
+ - **Model**: mBART-large-50 + LoRA
71
+ """
72
+
73
+
74
+ def create_performance_chart() -> go.Figure:
75
+ """
76
+ Create interactive performance comparison chart
77
+
78
+ Returns:
79
+ Plotly figure object
80
+ """
81
+ metrics = ['BLEU', 'ROUGE-L', 'Exact Match', 'F1 Score']
82
+
83
+ english_scores = [
84
+ PERFORMANCE_DATA['English']['BLEU'] / 100,
85
+ PERFORMANCE_DATA['English']['ROUGE-L'],
86
+ PERFORMANCE_DATA['English']['Exact Match'],
87
+ PERFORMANCE_DATA['English']['F1 Score']
88
+ ]
89
+
90
+ german_scores = [
91
+ PERFORMANCE_DATA['German']['BLEU'] / 100,
92
+ PERFORMANCE_DATA['German']['ROUGE-L'],
93
+ PERFORMANCE_DATA['German']['Exact Match'],
94
+ PERFORMANCE_DATA['German']['F1 Score']
95
+ ]
96
+
97
+ fig = go.Figure(data=[
98
+ go.Bar(name='English', x=metrics, y=english_scores, marker_color='#3498db'),
99
+ go.Bar(name='German', x=metrics, y=german_scores, marker_color='#e74c3c')
100
+ ])
101
+
102
+ fig.update_layout(
103
+ title='Model Performance Comparison: English vs German',
104
+ xaxis_title='Metrics',
105
+ yaxis_title='Score',
106
+ yaxis_range=[0, 1],
107
+ barmode='group',
108
+ template='plotly_white',
109
+ height=400,
110
+ font=dict(size=12)
111
+ )
112
+
113
+ return fig
114
+
115
+
116
+ def create_metrics_table() -> pd.DataFrame:
117
+ """
118
+ Create detailed metrics table
119
+
120
+ Returns:
121
+ Pandas DataFrame with metrics
122
+ """
123
+ df = pd.DataFrame(PERFORMANCE_DATA).T
124
+ df = df.round(4)
125
+ return df
126
+
127
+
128
+ def get_example(example_type: str, language: str) -> Tuple[str, str]:
129
+ """
130
+ Get example question and context
131
+
132
+ Args:
133
+ example_type: Type of example ("General Knowledge", "Historical", "Scientific")
134
+ language: "English" or "German"
135
+
136
+ Returns:
137
+ Tuple of (question, context)
138
+ """
139
+ examples = {
140
+ "English": {
141
+ "General Knowledge": (
142
+ "What is the capital of France?",
143
+ "Paris is the capital and most populous city of France. It has an area of 105 square kilometres and a population of 2,165,423 residents."
144
+ ),
145
+ "Historical": (
146
+ "When was the Eiffel Tower built?",
147
+ "The Eiffel Tower was constructed from 1887 to 1889 as the entrance arch to the 1889 World's Fair."
148
+ ),
149
+ "Scientific": (
150
+ "What is the largest planet in our solar system?",
151
+ "Jupiter is the largest planet in our solar system. It is a gas giant with a mass more than two and a half times that of all the other planets combined."
152
+ )
153
+ },
154
+ "German": {
155
+ "General Knowledge": (
156
+ "Was ist die Hauptstadt von Deutschland?",
157
+ "Berlin ist die Hauptstadt und größte Stadt Deutschlands mit etwa 3,7 Millionen Einwohnern."
158
+ ),
159
+ "Historical": (
160
+ "Wann wurde der Berliner Fernsehturm gebaut?",
161
+ "Der Berliner Fernsehturm wurde zwischen 1965 und 1969 erbaut und ist eines der bekanntesten Wahrzeichen Berlins."
162
+ ),
163
+ "Scientific": (
164
+ "Was ist der größte Planet in unserem Sonnensystem?",
165
+ "Jupiter ist der größte Planet in unserem Sonnensystem. Er ist ein Gasriese mit einer Masse, die mehr als zweieinhalb Mal so groß ist wie die aller anderen Planeten zusammen."
166
+ )
167
+ }
168
+ }
169
+
170
+ return examples[language][example_type]