Oluwaferanmi
commited on
Commit
·
090df05
1
Parent(s):
e75ba0c
Update the classifier function
Browse files- kaanta +0 -1
- transaction_classifier.py +13 -9
kaanta
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Subproject commit 5140abe64a725e0eda4de06ba52a34e31f5ce0f1
|
|
|
|
|
|
transaction_classifier.py
CHANGED
|
@@ -26,23 +26,25 @@ class TransactionClassifier:
|
|
| 26 |
Classifies bank transactions (from Mono API or manual entry) into tax categories
|
| 27 |
"""
|
| 28 |
|
| 29 |
-
# Nigerian bank transaction patterns
|
| 30 |
INCOME_PATTERNS = {
|
| 31 |
'employment_income': [
|
| 32 |
r'\bSALARY\b', r'\bWAGES\b', r'\bPAYROLL\b', r'\bSTIPEND\b',
|
| 33 |
-
r'\bEMPLOYMENT\b', r'\bMONTHLY PAY\b', r'\bNET PAY\b'
|
|
|
|
| 34 |
],
|
| 35 |
'business_income': [
|
| 36 |
r'\bSALES\b', r'\bREVENUE\b', r'\bINVOICE\b', r'\bPAYMENT RECEIVED\b',
|
| 37 |
-
r'\bCUSTOMER\b', r'\bCLIENT\b'
|
|
|
|
| 38 |
],
|
| 39 |
'rental_income': [
|
| 40 |
r'\bRENT RECEIVED\b', r'\bTENANT\b', r'\bLEASE PAYMENT\b',
|
| 41 |
-
r'\bPROPERTY INCOME\b'
|
| 42 |
],
|
| 43 |
'investment_income': [
|
| 44 |
r'\bDIVIDEND\b', r'\bINTEREST\b', r'\bINVESTMENT\b',
|
| 45 |
-
r'\bCOUPON\b', r'\bBOND\b'
|
| 46 |
]
|
| 47 |
}
|
| 48 |
|
|
@@ -112,11 +114,13 @@ class TransactionClassifier:
|
|
| 112 |
# Classify using pattern matching
|
| 113 |
classification = self._classify_by_patterns(narration, tx_type, amount)
|
| 114 |
|
|
|
|
|
|
|
| 115 |
# If confidence is low and RAG is available, use LLM
|
| 116 |
-
if classification.confidence < 0.7 and self.rag:
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
|
| 121 |
# Enrich original transaction
|
| 122 |
return {
|
|
|
|
| 26 |
Classifies bank transactions (from Mono API or manual entry) into tax categories
|
| 27 |
"""
|
| 28 |
|
| 29 |
+
# Nigerian bank transaction patterns (expanded for better coverage)
|
| 30 |
INCOME_PATTERNS = {
|
| 31 |
'employment_income': [
|
| 32 |
r'\bSALARY\b', r'\bWAGES\b', r'\bPAYROLL\b', r'\bSTIPEND\b',
|
| 33 |
+
r'\bEMPLOYMENT\b', r'\bMONTHLY PAY\b', r'\bNET PAY\b',
|
| 34 |
+
r'\bGROSS PAY\b', r'\bEARNINGS\b', r'\bSALARY PAYMENT\b'
|
| 35 |
],
|
| 36 |
'business_income': [
|
| 37 |
r'\bSALES\b', r'\bREVENUE\b', r'\bINVOICE\b', r'\bPAYMENT RECEIVED\b',
|
| 38 |
+
r'\bCUSTOMER\b', r'\bCLIENT\b', r'\bPROJECT\b', r'\bCONSULTING\b',
|
| 39 |
+
r'\bFREELANCE\b', r'\bCONTRACT\b'
|
| 40 |
],
|
| 41 |
'rental_income': [
|
| 42 |
r'\bRENT RECEIVED\b', r'\bTENANT\b', r'\bLEASE PAYMENT\b',
|
| 43 |
+
r'\bPROPERTY INCOME\b', r'\bRENTAL\b'
|
| 44 |
],
|
| 45 |
'investment_income': [
|
| 46 |
r'\bDIVIDEND\b', r'\bINTEREST\b', r'\bINVESTMENT\b',
|
| 47 |
+
r'\bCOUPON\b', r'\bBOND\b', r'\bSTOCK\b', r'\bSHARE\b'
|
| 48 |
]
|
| 49 |
}
|
| 50 |
|
|
|
|
| 114 |
# Classify using pattern matching
|
| 115 |
classification = self._classify_by_patterns(narration, tx_type, amount)
|
| 116 |
|
| 117 |
+
# DISABLED: LLM classification to avoid rate limits
|
| 118 |
+
# Only use pattern matching for now
|
| 119 |
# If confidence is low and RAG is available, use LLM
|
| 120 |
+
# if classification.confidence < 0.7 and self.rag:
|
| 121 |
+
# llm_classification = self._llm_classify(transaction)
|
| 122 |
+
# if llm_classification.confidence > classification.confidence:
|
| 123 |
+
# classification = llm_classification
|
| 124 |
|
| 125 |
# Enrich original transaction
|
| 126 |
return {
|