Oluwaferanmi commited on
Commit
090df05
·
1 Parent(s): e75ba0c

Update the classifier function

Browse files
Files changed (2) hide show
  1. kaanta +0 -1
  2. transaction_classifier.py +13 -9
kaanta DELETED
@@ -1 +0,0 @@
1
- Subproject commit 5140abe64a725e0eda4de06ba52a34e31f5ce0f1
 
 
transaction_classifier.py CHANGED
@@ -26,23 +26,25 @@ class TransactionClassifier:
26
  Classifies bank transactions (from Mono API or manual entry) into tax categories
27
  """
28
 
29
- # Nigerian bank transaction patterns
30
  INCOME_PATTERNS = {
31
  'employment_income': [
32
  r'\bSALARY\b', r'\bWAGES\b', r'\bPAYROLL\b', r'\bSTIPEND\b',
33
- r'\bEMPLOYMENT\b', r'\bMONTHLY PAY\b', r'\bNET PAY\b'
 
34
  ],
35
  'business_income': [
36
  r'\bSALES\b', r'\bREVENUE\b', r'\bINVOICE\b', r'\bPAYMENT RECEIVED\b',
37
- r'\bCUSTOMER\b', r'\bCLIENT\b'
 
38
  ],
39
  'rental_income': [
40
  r'\bRENT RECEIVED\b', r'\bTENANT\b', r'\bLEASE PAYMENT\b',
41
- r'\bPROPERTY INCOME\b'
42
  ],
43
  'investment_income': [
44
  r'\bDIVIDEND\b', r'\bINTEREST\b', r'\bINVESTMENT\b',
45
- r'\bCOUPON\b', r'\bBOND\b'
46
  ]
47
  }
48
 
@@ -112,11 +114,13 @@ class TransactionClassifier:
112
  # Classify using pattern matching
113
  classification = self._classify_by_patterns(narration, tx_type, amount)
114
 
 
 
115
  # If confidence is low and RAG is available, use LLM
116
- if classification.confidence < 0.7 and self.rag:
117
- llm_classification = self._llm_classify(transaction)
118
- if llm_classification.confidence > classification.confidence:
119
- classification = llm_classification
120
 
121
  # Enrich original transaction
122
  return {
 
26
  Classifies bank transactions (from Mono API or manual entry) into tax categories
27
  """
28
 
29
+ # Nigerian bank transaction patterns (expanded for better coverage)
30
  INCOME_PATTERNS = {
31
  'employment_income': [
32
  r'\bSALARY\b', r'\bWAGES\b', r'\bPAYROLL\b', r'\bSTIPEND\b',
33
+ r'\bEMPLOYMENT\b', r'\bMONTHLY PAY\b', r'\bNET PAY\b',
34
+ r'\bGROSS PAY\b', r'\bEARNINGS\b', r'\bSALARY PAYMENT\b'
35
  ],
36
  'business_income': [
37
  r'\bSALES\b', r'\bREVENUE\b', r'\bINVOICE\b', r'\bPAYMENT RECEIVED\b',
38
+ r'\bCUSTOMER\b', r'\bCLIENT\b', r'\bPROJECT\b', r'\bCONSULTING\b',
39
+ r'\bFREELANCE\b', r'\bCONTRACT\b'
40
  ],
41
  'rental_income': [
42
  r'\bRENT RECEIVED\b', r'\bTENANT\b', r'\bLEASE PAYMENT\b',
43
+ r'\bPROPERTY INCOME\b', r'\bRENTAL\b'
44
  ],
45
  'investment_income': [
46
  r'\bDIVIDEND\b', r'\bINTEREST\b', r'\bINVESTMENT\b',
47
+ r'\bCOUPON\b', r'\bBOND\b', r'\bSTOCK\b', r'\bSHARE\b'
48
  ]
49
  }
50
 
 
114
  # Classify using pattern matching
115
  classification = self._classify_by_patterns(narration, tx_type, amount)
116
 
117
+ # DISABLED: LLM classification to avoid rate limits
118
+ # Only use pattern matching for now
119
  # If confidence is low and RAG is available, use LLM
120
+ # if classification.confidence < 0.7 and self.rag:
121
+ # llm_classification = self._llm_classify(transaction)
122
+ # if llm_classification.confidence > classification.confidence:
123
+ # classification = llm_classification
124
 
125
  # Enrich original transaction
126
  return {