CaffeinatedCoding commited on
Commit
70b94cb
·
verified ·
1 Parent(s): a7b1897

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +15 -0
  2. requirements.txt +1 -0
  3. src/agent_v2.py +9 -24
  4. src/llm.py +118 -49
README.md CHANGED
@@ -352,8 +352,23 @@ NyayaSetu/
352
  │ └── test_api.py
353
  ├── .github/workflows/ci.yml ← pytest → lint → docker build → HF deploy → smoke test
354
  └── docker/Dockerfile
 
 
355
  ```
356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  ---
358
 
359
  ## Setup & Reproduction
 
352
  │ └── test_api.py
353
  ├── .github/workflows/ci.yml ← pytest → lint → docker build → HF deploy → smoke test
354
  └── docker/Dockerfile
355
+
356
+
357
  ```
358
 
359
+ ## V2 Agent Architecture
360
+
361
+ **Pass 1 — Analyse:** LLM call to understand the message, detect tone/stage,
362
+ build structured fact web, update hypotheses, form targeted FAISS queries.
363
+
364
+ **Pass 2 — Retrieve:** Parallel FAISS search across 3 queries. No LLM call. ~5ms.
365
+
366
+ **Pass 3 — Respond:** Dynamically assembled prompt based on tone, stage, and
367
+ format needs + full case state + retrieved context.
368
+
369
+ **Conversation Memory:** Each session maintains a compressed summary + structured
370
+ fact web (parties, events, documents, amounts, hypotheses) updated every turn.
371
+
372
  ---
373
 
374
  ## Setup & Reproduction
requirements.txt CHANGED
@@ -5,6 +5,7 @@ huggingface_hub
5
  sentence-transformers
6
  numpy
7
  groq
 
8
  tenacity
9
  python-dotenv
10
  transformers
 
5
  sentence-transformers
6
  numpy
7
  groq
8
+ google-generativeai
9
  tenacity
10
  python-dotenv
11
  transformers
src/agent_v2.py CHANGED
@@ -25,15 +25,14 @@ from src.retrieval import retrieve
25
  from src.verify import verify_citations
26
  from src.system_prompt import build_prompt, ANALYSIS_PROMPT
27
  from src.ner import extract_entities, augment_query
 
28
 
29
  logger = logging.getLogger(__name__)
30
 
31
- from groq import Groq
32
  from tenacity import retry, stop_after_attempt, wait_exponential
33
  from dotenv import load_dotenv
34
 
35
  load_dotenv()
36
- _client = Groq(api_key=os.getenv("GROQ_API_KEY"))
37
 
38
  # ── Session store ─────────────────────────────────────────
39
  sessions: Dict[str, Dict] = {}
@@ -165,17 +164,10 @@ Rules:
165
  - Update hypothesis confidence based on new evidence
166
  - search_queries must be specific legal questions for vector search"""
167
 
168
- response = _client.chat.completions.create(
169
- model="llama-3.3-70b-versatile",
170
- messages=[
171
- {"role": "system", "content": ANALYSIS_PROMPT},
172
- {"role": "user", "content": user_content}
173
- ],
174
- temperature=0.1,
175
- max_tokens=900
176
- )
177
-
178
- raw = response.choices[0].message.content.strip()
179
  raw = raw.replace("```json", "").replace("```", "").strip()
180
 
181
  try:
@@ -326,17 +318,10 @@ Instructions:
326
  - Opposition war-gaming: if giving strategy, include what the other side will argue
327
  {radar_instruction}"""
328
 
329
- response = _client.chat.completions.create(
330
- model="llama-3.3-70b-versatile",
331
- messages=[
332
- {"role": "system", "content": system_prompt},
333
- {"role": "user", "content": user_content}
334
- ],
335
- temperature=0.3,
336
- max_tokens=1500
337
- )
338
-
339
- return response.choices[0].message.content
340
 
341
 
342
  # ── Main entry point ──────────────────────────────────────
 
25
  from src.verify import verify_citations
26
  from src.system_prompt import build_prompt, ANALYSIS_PROMPT
27
  from src.ner import extract_entities, augment_query
28
+ from src.llm import call_llm_raw
29
 
30
  logger = logging.getLogger(__name__)
31
 
 
32
  from tenacity import retry, stop_after_attempt, wait_exponential
33
  from dotenv import load_dotenv
34
 
35
  load_dotenv()
 
36
 
37
  # ── Session store ─────────────────────────────────────────
38
  sessions: Dict[str, Dict] = {}
 
164
  - Update hypothesis confidence based on new evidence
165
  - search_queries must be specific legal questions for vector search"""
166
 
167
+ raw = call_llm_raw([
168
+ {"role": "system", "content": ANALYSIS_PROMPT},
169
+ {"role": "user", "content": user_content}
170
+ ]).strip()
 
 
 
 
 
 
 
171
  raw = raw.replace("```json", "").replace("```", "").strip()
172
 
173
  try:
 
318
  - Opposition war-gaming: if giving strategy, include what the other side will argue
319
  {radar_instruction}"""
320
 
321
+ return call_llm_raw([
322
+ {"role": "system", "content": system_prompt},
323
+ {"role": "user", "content": user_content}
324
+ ])
 
 
 
 
 
 
 
325
 
326
 
327
  # ── Main entry point ──────────────────────────────────────
src/llm.py CHANGED
@@ -1,67 +1,136 @@
1
  """
2
- LLM module. Single Groq API call with tenacity retry.
3
-
4
- WHY Groq? Free tier, fastest inference (~500 tokens/sec).
5
- WHY temperature=0.1? Lower = more deterministic, less hallucination.
6
- WHY one call per query? Multi-step chains add latency and failure points.
7
- Gemini is configured as backup if Groq fails permanently.
8
  """
9
 
10
  import os
11
- from groq import Groq
12
  from tenacity import retry, stop_after_attempt, wait_exponential
13
  from dotenv import load_dotenv
14
 
15
  load_dotenv()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- _client = Groq(api_key=os.getenv("GROQ_API_KEY"))
18
-
19
- SYSTEM_PROMPT = """You are NyayaSetu, an Indian legal research assistant.
20
-
21
- Rules you must follow:
22
- 1. Answer ONLY using the provided Supreme Court judgment excerpts
23
- 2. Never use outside knowledge
24
- 3. Quote directly from excerpts when making factual claims — use double quotes
25
- 4. Always cite the Judgment ID when referencing a case
26
- 5. If excerpts don't contain enough information, say so explicitly
27
- 6. End every response with: "NOTE: This is not legal advice. Consult a qualified advocate."
28
-
29
- Formatting rules follow these exactly:
30
- - Use numbered lists (1. 2. 3.) when listing multiple points or steps
31
- - Use bullet points (- item) for sub-points or supporting details
32
- - Use markdown tables (| Col | Col |) when comparing options side by side
33
- - Use **bold** for important terms, case names, and section numbers
34
- - Use headers (## Heading) to separate major sections in long answers
35
- - Never write everything as one long paragraph
36
- - Each distinct point must be on its own line
37
- - Always put a blank line between sections
38
  """
39
 
40
- @retry(
41
- stop=stop_after_attempt(3),
42
- wait=wait_exponential(multiplier=1, min=2, max=8)
43
- )
44
- def call_llm(query: str, context: str) -> str:
45
- """
46
- Call Groq Llama-3. Retries 3 times with exponential backoff.
47
- Raises LLMError after all retries fail — caller handles this.
48
- """
49
- user_message = f"""QUESTION: {query}
50
 
51
- SUPREME COURT JUDGMENT EXCERPTS:
52
- {context}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- Answer based only on the excerpts above. Cite judgment IDs.
55
- Use proper markdown formatting — numbered lists, bullet points, tables, bold text as appropriate."""
56
 
57
- response = _client.chat.completions.create(
 
 
58
  model="llama-3.3-70b-versatile",
59
- messages=[
60
- {"role": "system", "content": SYSTEM_PROMPT},
61
- {"role": "user", "content": user_message}
62
- ],
63
- temperature=0.1,
64
  max_tokens=1500
65
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- return response.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ LLM module. Gemini Flash as primary, Groq as fallback.
3
+ Gemini works reliably from HF Spaces. Groq is backup.
 
 
 
 
4
  """
5
 
6
  import os
7
+ import logging
8
  from tenacity import retry, stop_after_attempt, wait_exponential
9
  from dotenv import load_dotenv
10
 
11
  load_dotenv()
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # ── Gemini setup ──────────────────────────────────────────
15
+ import google.generativeai as genai
16
+
17
+ _gemini_client = None
18
+ _gemini_model = None
19
+
20
+ def _init_gemini():
21
+ global _gemini_client, _gemini_model
22
+ api_key = os.getenv("GEMINI_API_KEY")
23
+ if not api_key:
24
+ logger.warning("GEMINI_API_KEY not set")
25
+ return False
26
+ try:
27
+ genai.configure(api_key=api_key)
28
+ _gemini_model = genai.GenerativeModel("gemini-1.5-flash")
29
+ logger.info("Gemini Flash ready")
30
+ return True
31
+ except Exception as e:
32
+ logger.error(f"Gemini init failed: {e}")
33
+ return False
34
+
35
+ # ── Groq setup ────────────────────────────────────────────
36
+ _groq_client = None
37
+
38
+ def _init_groq():
39
+ global _groq_client
40
+ api_key = os.getenv("GROQ_API_KEY")
41
+ if not api_key:
42
+ return False
43
+ try:
44
+ from groq import Groq
45
+ _groq_client = Groq(api_key=api_key)
46
+ logger.info("Groq ready as fallback")
47
+ return True
48
+ except Exception as e:
49
+ logger.error(f"Groq init failed: {e}")
50
+ return False
51
 
52
+ _gemini_ready = _init_gemini()
53
+ _groq_ready = _init_groq()
54
+
55
+ SYSTEM_PROMPT = """You are NyayaSetu — a sharp, street-smart Indian legal advisor.
56
+ You work FOR the user. Your job is to find the angle, identify the leverage,
57
+ and tell the user exactly what to do — the way a senior lawyer would in a
58
+ private consultation, not the way a textbook would explain it.
59
+
60
+ Be direct. Be human. Vary your response style naturally.
61
+ Sometimes short and punchy. Sometimes detailed and structured.
62
+ Match the energy of what the user needs right now.
63
+
64
+ When citing sources, reference the Judgment ID naturally in your response.
65
+ Always end with: "Note: This is not legal advice. Consult a qualified advocate."
 
 
 
 
 
 
 
66
  """
67
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ def _call_gemini(messages: list) -> str:
70
+ """Call Gemini Flash."""
71
+ # Convert messages to Gemini format
72
+ system = next((m["content"] for m in messages if m["role"] == "system"), "")
73
+ user_parts = [m["content"] for m in messages if m["role"] == "user"]
74
+
75
+ full_prompt = f"{system}\n\n{chr(10).join(user_parts)}"
76
+
77
+ response = _gemini_model.generate_content(
78
+ full_prompt,
79
+ generation_config=genai.types.GenerationConfig(
80
+ temperature=0.3,
81
+ max_output_tokens=1500,
82
+ )
83
+ )
84
+ return response.text
85
 
 
 
86
 
87
+ def _call_groq(messages: list) -> str:
88
+ """Call Groq Llama as fallback."""
89
+ response = _groq_client.chat.completions.create(
90
  model="llama-3.3-70b-versatile",
91
+ messages=messages,
92
+ temperature=0.3,
 
 
 
93
  max_tokens=1500
94
  )
95
+ return response.choices[0].message.content
96
+
97
+
98
+ @retry(stop=stop_after_attempt(2), wait=wait_exponential(min=1, max=4))
99
+ def call_llm(query: str, context: str) -> str:
100
+ """
101
+ Call LLM with Gemini primary, Groq fallback.
102
+ Used by V1 agent (src/agent.py).
103
+ """
104
+ messages = [
105
+ {"role": "system", "content": SYSTEM_PROMPT},
106
+ {"role": "user", "content": f"QUESTION: {query}\n\nSOURCES:\n{context}\n\nAnswer based on sources. Cite judgment IDs."}
107
+ ]
108
+ return _call_llm_with_fallback(messages)
109
+
110
+
111
+ def call_llm_raw(messages: list) -> str:
112
+ """
113
+ Call LLM with pre-built messages list.
114
+ Used by V2 agent (src/agent_v2.py) for Pass 1 and Pass 3.
115
+ """
116
+ return _call_llm_with_fallback(messages)
117
+
118
 
119
+ def _call_llm_with_fallback(messages: list) -> str:
120
+ """Try Gemini first, fall back to Groq."""
121
+
122
+ # Try Gemini first
123
+ if _gemini_ready and _gemini_model:
124
+ try:
125
+ return _call_gemini(messages)
126
+ except Exception as e:
127
+ logger.warning(f"Gemini failed: {e}, trying Groq")
128
+
129
+ # Fall back to Groq
130
+ if _groq_ready and _groq_client:
131
+ try:
132
+ return _call_groq(messages)
133
+ except Exception as e:
134
+ logger.error(f"Groq also failed: {e}")
135
+
136
+ raise Exception("All LLM providers failed")