File size: 4,634 Bytes
651b18e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python3
"""
Test script to demonstrate SHORT vs LONG response types.
Shows the difference between WhatsApp (brief) and PDF (comprehensive) outputs.
"""

import sys
from pathlib import Path
from rag_pipeline import RAGPipeline, DocumentStore


def test_response_types():
    """Test both SHORT and LONG response types with the same question."""

    print("=" * 80)
    print("RESPONSE TYPE COMPARISON TEST")
    print("=" * 80)

    # Initialize RAG pipeline
    print("\nInitializing RAG pipeline...")
    vector_store_path = Path("vector_store")
    doc_store = DocumentStore(
        persist_dir=vector_store_path,
        embedding_model="BAAI/bge-large-en-v1.5"
    )

    src = Path("data")
    pdfs = doc_store.discover_pdfs(src)
    doc_store.build_vector_store(pdfs, force_rebuild=False)

    rag = RAGPipeline(
        doc_store=doc_store,
        model="llama-3.3-70b-versatile",
        temperature=0.1,
        top_k=15,
    )

    print("✓ RAG pipeline initialized\n")

    # Test question
    question = "What are the personal income tax rates in Nigeria?"

    print("=" * 80)
    print("TEST QUESTION:")
    print(question)
    print("=" * 80)

    # Test SHORT response (WhatsApp)
    print("\n" + "=" * 80)
    print("SHORT RESPONSE (for WhatsApp)")
    print("=" * 80)
    print("\nExpected: 3-4 concise sentences, immediate answer, key facts only\n")

    try:
        short_answer = rag.query(question, verbose=False, response_type='short')
        print(short_answer)

        # Quality checks for SHORT
        print("\n" + "-" * 80)
        print("SHORT RESPONSE QUALITY CHECKS:")
        word_count = len(short_answer.split())
        sentence_count = short_answer.count('.') + short_answer.count('?') + short_answer.count('!')
        has_numbers = any(char.isdigit() for char in short_answer)

        print(f"  Word count: {word_count} (target: 50-150 words for brief)")
        print(f"  Sentence count: ~{sentence_count}")
        print(f"  Contains numbers: {has_numbers}")

        if word_count <= 200:
            print("  ✓ PASS: Response is concise")
        else:
            print("  ⚠️ WARNING: Response may be too long for WhatsApp")

    except Exception as e:
        print(f"❌ ERROR: {e}")
        import traceback
        traceback.print_exc()

    # Test LONG response (PDF)
    print("\n\n" + "=" * 80)
    print("LONG RESPONSE (for PDF Report)")
    print("=" * 80)
    print("\nExpected: Comprehensive with examples, calculations, tables, law references\n")

    try:
        long_answer = rag.query(question, verbose=False, response_type='long')
        print(long_answer)

        # Quality checks for LONG
        print("\n" + "-" * 80)
        print("LONG RESPONSE QUALITY CHECKS:")
        word_count = len(long_answer.split())
        has_examples = 'example' in long_answer.lower() or 'instance' in long_answer.lower()
        has_calculations = '×' in long_answer or 'calculate' in long_answer.lower()
        has_law_refs = 'section' in long_answer.lower() or 'act' in long_answer.lower()
        has_numbers = any(char.isdigit() for char in long_answer)

        print(f"  Word count: {word_count} (target: 300+ words for comprehensive)")
        print(f"  Contains examples: {has_examples}")
        print(f"  Contains calculations: {has_calculations}")
        print(f"  Contains law references: {has_law_refs}")
        print(f"  Contains numbers: {has_numbers}")

        if word_count >= 300:
            print("  ✓ PASS: Response is comprehensive")
        else:
            print("  ⚠️ WARNING: Response may be too brief for PDF report")

        if has_examples and has_numbers:
            print("  ✓ PASS: Response includes examples and numbers")
        else:
            print("  ⚠️ WARNING: Response may lack examples or numbers")

    except Exception as e:
        print(f"❌ ERROR: {e}")
        import traceback
        traceback.print_exc()

    # Summary
    print("\n\n" + "=" * 80)
    print("COMPARISON SUMMARY")
    print("=" * 80)
    print("\nKEY DIFFERENCES:")
    print("  SHORT (WhatsApp):")
    print("    - 3-4 sentences")
    print("    - Immediate answer")
    print("    - Key facts only")
    print("    - No examples or detailed calculations")
    print("")
    print("  LONG (PDF Report):")
    print("    - Multiple paragraphs")
    print("    - Detailed explanations")
    print("    - Examples with step-by-step calculations")
    print("    - Law references and edge cases")
    print("    - Professional report format")
    print("=" * 80)


if __name__ == "__main__":
    test_response_types()