File size: 4,095 Bytes
a537907
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
Model Testing Script
Quick tests to verify model is working correctly
"""

from app.model_loader import ModelLoader
from app.inference import QAInference


def test_english():
    """Test English question answering"""
    print("\n" + "=" * 80)
    print("πŸ‡¬πŸ‡§ TESTING ENGLISH")
    print("=" * 80)
    
    test_cases = [
        {
            "question": "What is the capital of France?",
            "context": "Paris is the capital and most populous city of France.",
            "expected": "Paris"
        },
        {
            "question": "When was the Eiffel Tower built?",
            "context": "The Eiffel Tower was constructed from 1887 to 1889.",
            "expected": "1887 to 1889"
        }
    ]
    
    return test_cases


def test_german():
    """Test German question answering"""
    print("\n" + "=" * 80)
    print("πŸ‡©πŸ‡ͺ TESTING GERMAN")
    print("=" * 80)
    
    test_cases = [
        {
            "question": "Was ist die Hauptstadt von Deutschland?",
            "context": "Berlin ist die Hauptstadt von Deutschland.",
            "expected": "Berlin"
        },
        {
            "question": "Wann wurde der Berliner Fernsehturm gebaut?",
            "context": "Der Berliner Fernsehturm wurde zwischen 1965 und 1969 erbaut.",
            "expected": "1965 bis 1969"
        }
    ]
    
    return test_cases


def run_tests():
    """Run all tests"""
    
    print("""
    ╔══════════════════════════════════════════════════════════════╗
    β•‘                                                              β•‘
    β•‘        πŸ§ͺ MODEL TESTING SUITE πŸ§ͺ                             β•‘
    β•‘                                                              β•‘
    β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
    """)
    
    # Load model
    print("\nπŸ“‚ Loading model...")
    try:
        loader = ModelLoader(model_path="models/multilingual_model")
        model, tokenizer = loader.load()
        
        inference = QAInference(model, tokenizer, loader.device)
        print("βœ… Model loaded successfully!\n")
    except Exception as e:
        print(f"❌ Failed to load model: {e}")
        print("\nπŸ’‘ Make sure model files exist in models/multilingual_model/")
        return
    
    # Test English
    english_tests = test_english()
    passed = 0
    total = len(english_tests)
    
    for i, test in enumerate(english_tests, 1):
        answer, _ = inference.answer_question(
            test["question"], 
            test["context"], 
            "English"
        )
        
        print(f"\nTest {i}/{total}")
        print(f"Q: {test['question']}")
        print(f"Expected: {test['expected']}")
        print(f"Got: {answer}")
        
        if test["expected"].lower() in answer.lower():
            print("βœ… PASSED")
            passed += 1
        else:
            print("❌ FAILED")
    
    print(f"\nπŸ“Š English Results: {passed}/{total} passed ({passed/total*100:.1f}%)")
    
    # Test German
    german_tests = test_german()
    passed = 0
    total = len(german_tests)
    
    for i, test in enumerate(german_tests, 1):
        answer, _ = inference.answer_question(
            test["question"], 
            test["context"], 
            "German"
        )
        
        print(f"\nTest {i}/{total}")
        print(f"Q: {test['question']}")
        print(f"Expected: {test['expected']}")
        print(f"Got: {answer}")
        
        if test["expected"].lower() in answer.lower():
            print("βœ… PASSED")
            passed += 1
        else:
            print("❌ FAILED")
    
    print(f"\nπŸ“Š German Results: {passed}/{total} passed ({passed/total*100:.1f}%)")
    
    print("\n" + "=" * 80)
    print("βœ… TESTING COMPLETE!")
    print("=" * 80)


if __name__ == "__main__":
    run_tests()