snikhilesh commited on
Commit
e4dfd03
·
verified ·
1 Parent(s): 47598fb

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. backend/main.py +17 -418
  2. backend/main_full.py +445 -0
  3. backend/requirements.txt +1 -12
backend/main.py CHANGED
@@ -1,445 +1,44 @@
1
  """
2
- Medical Report Analysis Platform - Main Backend Application
3
- Comprehensive AI-powered medical document analysis with multi-model processing
4
- With HIPAA/GDPR Security & Compliance Features
5
  """
6
 
7
- from fastapi import FastAPI, File, UploadFile, HTTPException, BackgroundTasks, Request, Depends
8
- from fastapi.middleware.cors import CORSMiddleware
9
- from fastapi.responses import JSONResponse, FileResponse
10
- from fastapi.staticfiles import StaticFiles
11
- from pydantic import BaseModel
12
- from pathlib import Path
13
- from typing import List, Dict, Optional, Any
14
- import os
15
- import tempfile
16
- import logging
17
  from datetime import datetime
18
- import uuid
19
-
20
- # Import processing modules
21
- from pdf_processor import PDFProcessor
22
- from document_classifier import DocumentClassifier
23
- from model_router import ModelRouter
24
- from analysis_synthesizer import AnalysisSynthesizer
25
- from security import get_security_manager, ComplianceValidator, DataEncryption
26
-
27
- # Configure logging
28
- logging.basicConfig(
29
- level=logging.INFO,
30
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
31
- )
32
- logger = logging.getLogger(__name__)
33
 
34
  # Initialize FastAPI app
35
  app = FastAPI(
36
  title="Medical Report Analysis Platform",
37
- description="HIPAA/GDPR Compliant AI-powered medical document analysis",
38
  version="2.0.0"
39
  )
40
 
41
- # CORS configuration
42
- app.add_middleware(
43
- CORSMiddleware,
44
- allow_origins=["*"], # Configure appropriately for production
45
- allow_credentials=True,
46
- allow_methods=["*"],
47
- allow_headers=["*"],
48
- )
49
-
50
- # Mount static files (frontend)
51
- static_dir = Path(__file__).parent / "static"
52
- if static_dir.exists():
53
- app.mount("/assets", StaticFiles(directory=static_dir / "assets"), name="assets")
54
- logger.info("Static files mounted successfully")
55
-
56
- # Initialize processing components
57
- pdf_processor = PDFProcessor()
58
- document_classifier = DocumentClassifier()
59
- model_router = ModelRouter()
60
- analysis_synthesizer = AnalysisSynthesizer()
61
-
62
- # Initialize security components
63
- security_manager = get_security_manager()
64
- compliance_validator = ComplianceValidator()
65
- data_encryption = DataEncryption()
66
-
67
- logger.info("Security and compliance features initialized")
68
-
69
- # Request/Response Models
70
- class AnalysisStatus(BaseModel):
71
- job_id: str
72
- status: str
73
- progress: float
74
- message: str
75
-
76
- class AnalysisResult(BaseModel):
77
- job_id: str
78
- document_type: str
79
- confidence: float
80
- analysis: Dict[str, Any]
81
- specialized_results: List[Dict[str, Any]]
82
- summary: str
83
- timestamp: str
84
-
85
- class HealthCheck(BaseModel):
86
- status: str
87
- version: str
88
- timestamp: str
89
-
90
- # In-memory job tracking (use Redis/database in production)
91
- job_tracker: Dict[str, Dict[str, Any]] = {}
92
-
93
-
94
- @app.get("/api", response_model=HealthCheck)
95
- async def api_root():
96
- """API health check endpoint"""
97
- return HealthCheck(
98
- status="healthy",
99
- version="1.0.0",
100
- timestamp=datetime.utcnow().isoformat()
101
- )
102
-
103
-
104
  @app.get("/")
105
  async def root():
106
- """Serve frontend"""
107
- static_dir = Path(__file__).parent / "static"
108
- index_file = static_dir / "index.html"
109
-
110
- if index_file.exists():
111
- return FileResponse(index_file)
112
- else:
113
- return {"message": "Medical Report Analysis Platform API", "version": "1.0.0"}
114
-
115
 
116
  @app.get("/health")
117
  async def health_check():
118
- """Detailed health check with component status"""
119
  return {
120
  "status": "healthy",
121
- "components": {
122
- "pdf_processor": "ready",
123
- "classifier": "ready",
124
- "model_router": "ready",
125
- "synthesizer": "ready",
126
- "security": "ready",
127
- "compliance": "active"
128
- },
129
  "timestamp": datetime.utcnow().isoformat()
130
  }
131
 
132
-
133
- @app.get("/compliance-status")
134
- async def get_compliance_status():
135
- """Get HIPAA/GDPR compliance status"""
136
- return compliance_validator.check_compliance()
137
-
138
-
139
- @app.post("/auth/login")
140
- async def login(email: str, password: str):
141
- """
142
- User authentication endpoint
143
- In production, validate credentials against secure database
144
- """
145
- # Demo authentication - in production, validate against database
146
- logger.warning("Demo authentication - implement secure auth in production")
147
-
148
- # For demo, accept any credentials
149
- user_id = str(uuid.uuid4())
150
- token = security_manager.create_access_token(user_id, email)
151
-
152
- return {
153
- "access_token": token,
154
- "token_type": "bearer",
155
- "user_id": user_id,
156
- "email": email
157
- }
158
-
159
-
160
- @app.post("/analyze", response_model=AnalysisStatus)
161
- async def analyze_document(
162
- request: Request,
163
- file: UploadFile = File(...),
164
- background_tasks: BackgroundTasks = BackgroundTasks(),
165
- current_user: Dict[str, Any] = Depends(security_manager.get_current_user)
166
- ):
167
- """
168
- Upload and analyze a medical document with audit logging
169
-
170
- This endpoint initiates the two-layer processing:
171
- - Layer 1: PDF extraction and classification
172
- - Layer 2: Specialized model analysis
173
-
174
- Security: Logs all PHI access for HIPAA compliance
175
- """
176
-
177
- # Generate unique job ID
178
- job_id = str(uuid.uuid4())
179
-
180
- # Audit log: Document upload
181
- client_ip = request.client.host if request.client else "unknown"
182
- security_manager.audit_logger.log_phi_access(
183
- user_id=current_user.get("user_id", "unknown"),
184
- document_id=job_id,
185
- action="UPLOAD",
186
- ip_address=client_ip
187
- )
188
-
189
- # Validate file type
190
- if not file.filename.lower().endswith('.pdf'):
191
- raise HTTPException(
192
- status_code=400,
193
- detail="Only PDF files are supported"
194
- )
195
-
196
- # Initialize job tracking
197
- job_tracker[job_id] = {
198
- "status": "processing",
199
- "progress": 0.0,
200
- "filename": file.filename,
201
- "user_id": current_user.get("user_id"),
202
- "created_at": datetime.utcnow().isoformat()
203
- }
204
-
205
- try:
206
- # Save uploaded file temporarily
207
- with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
208
- content = await file.read()
209
- tmp_file.write(content)
210
- tmp_file_path = tmp_file.name
211
-
212
- # Schedule background processing
213
- background_tasks.add_task(
214
- process_document_pipeline,
215
- job_id,
216
- tmp_file_path,
217
- file.filename,
218
- current_user.get("user_id")
219
- )
220
-
221
- logger.info(f"Analysis job {job_id} created for file: {file.filename}")
222
-
223
- return AnalysisStatus(
224
- job_id=job_id,
225
- status="processing",
226
- progress=0.0,
227
- message="Document uploaded successfully. Analysis in progress."
228
- )
229
-
230
- except Exception as e:
231
- logger.error(f"Error creating analysis job: {str(e)}")
232
- job_tracker[job_id]["status"] = "failed"
233
- job_tracker[job_id]["error"] = str(e)
234
-
235
- # Audit log: Failed upload
236
- security_manager.audit_logger.log_access(
237
- user_id=current_user.get("user_id", "unknown"),
238
- action="UPLOAD_FAILED",
239
- resource=f"document:{job_id}",
240
- ip_address=client_ip,
241
- status="FAILED",
242
- details={"error": str(e)}
243
- )
244
-
245
- raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
246
-
247
-
248
- @app.get("/status/{job_id}", response_model=AnalysisStatus)
249
- async def get_analysis_status(job_id: str):
250
- """Get the current status of an analysis job"""
251
-
252
- if job_id not in job_tracker:
253
- raise HTTPException(status_code=404, detail="Job not found")
254
-
255
- job_data = job_tracker[job_id]
256
-
257
- return AnalysisStatus(
258
- job_id=job_id,
259
- status=job_data["status"],
260
- progress=job_data.get("progress", 0.0),
261
- message=job_data.get("message", "Processing...")
262
- )
263
-
264
-
265
- @app.get("/results/{job_id}", response_model=AnalysisResult)
266
- async def get_analysis_results(job_id: str):
267
- """Retrieve the analysis results for a completed job"""
268
-
269
- if job_id not in job_tracker:
270
- raise HTTPException(status_code=404, detail="Job not found")
271
-
272
- job_data = job_tracker[job_id]
273
-
274
- if job_data["status"] != "completed":
275
- raise HTTPException(
276
- status_code=400,
277
- detail=f"Analysis not completed. Current status: {job_data['status']}"
278
- )
279
-
280
- return AnalysisResult(**job_data["result"])
281
-
282
-
283
- @app.get("/supported-models")
284
- async def get_supported_models():
285
- """Get list of supported medical AI models by domain"""
286
  return {
287
- "domains": {
288
- "clinical_notes": {
289
- "models": ["MedGemma 27B", "Bio_ClinicalBERT"],
290
- "tasks": ["summarization", "entity_extraction", "coding"]
291
- },
292
- "radiology": {
293
- "models": ["MedGemma 4B Multimodal", "MONAI"],
294
- "tasks": ["vqa", "report_generation", "segmentation"]
295
- },
296
- "pathology": {
297
- "models": ["Path Foundation", "UNI2-h"],
298
- "tasks": ["slide_classification", "embedding_generation"]
299
- },
300
- "cardiology": {
301
- "models": ["HuBERT-ECG"],
302
- "tasks": ["ecg_analysis", "event_prediction"]
303
- },
304
- "laboratory": {
305
- "models": ["DrLlama", "Lab-AI"],
306
- "tasks": ["normalization", "explanation"]
307
- },
308
- "drug_interactions": {
309
- "models": ["CatBoost DDI", "DrugGen"],
310
- "tasks": ["interaction_classification"]
311
- },
312
- "diagnosis": {
313
- "models": ["MedGemma 27B"],
314
- "tasks": ["differential_diagnosis", "triage"]
315
- },
316
- "coding": {
317
- "models": ["Rayyan Med Coding", "ICD-10 Predictors"],
318
- "tasks": ["icd10_extraction", "cpt_coding"]
319
- },
320
- "mental_health": {
321
- "models": ["MentalBERT"],
322
- "tasks": ["screening", "sentiment_analysis"]
323
- }
324
- }
325
  }
326
 
327
-
328
- async def process_document_pipeline(job_id: str, file_path: str, filename: str, user_id: str = "unknown"):
329
- """
330
- Background task for processing medical documents through the full pipeline
331
-
332
- Pipeline stages:
333
- 1. PDF Extraction (text, images, tables)
334
- 2. Document Classification
335
- 3. Intelligent Routing
336
- 4. Specialized Model Analysis
337
- 5. Result Synthesis
338
-
339
- Security: All stages logged for HIPAA compliance
340
- """
341
-
342
- try:
343
- # Stage 1: PDF Processing
344
- job_tracker[job_id]["progress"] = 0.1
345
- job_tracker[job_id]["message"] = "Extracting content from PDF..."
346
- logger.info(f"Job {job_id}: Starting PDF extraction")
347
-
348
- pdf_content = await pdf_processor.extract_content(file_path)
349
-
350
- # Stage 2: Document Classification
351
- job_tracker[job_id]["progress"] = 0.3
352
- job_tracker[job_id]["message"] = "Classifying document type..."
353
- logger.info(f"Job {job_id}: Classifying document")
354
-
355
- classification = await document_classifier.classify(pdf_content)
356
-
357
- # Audit log: Classification complete
358
- security_manager.audit_logger.log_phi_access(
359
- user_id=user_id,
360
- document_id=job_id,
361
- action="CLASSIFY",
362
- ip_address="internal"
363
- )
364
-
365
- # Stage 3: Model Routing
366
- job_tracker[job_id]["progress"] = 0.4
367
- job_tracker[job_id]["message"] = "Routing to specialized models..."
368
- logger.info(f"Job {job_id}: Routing to models - {classification['document_type']}")
369
-
370
- model_tasks = model_router.route(classification, pdf_content)
371
-
372
- # Stage 4: Specialized Analysis
373
- job_tracker[job_id]["progress"] = 0.5
374
- job_tracker[job_id]["message"] = "Running specialized analysis..."
375
- logger.info(f"Job {job_id}: Running {len(model_tasks)} specialized models")
376
-
377
- specialized_results = []
378
- for i, task in enumerate(model_tasks):
379
- result = await model_router.execute_task(task)
380
- specialized_results.append(result)
381
- progress = 0.5 + (0.3 * (i + 1) / len(model_tasks))
382
- job_tracker[job_id]["progress"] = progress
383
-
384
- # Stage 5: Result Synthesis
385
- job_tracker[job_id]["progress"] = 0.9
386
- job_tracker[job_id]["message"] = "Synthesizing results..."
387
- logger.info(f"Job {job_id}: Synthesizing results")
388
-
389
- final_analysis = await analysis_synthesizer.synthesize(
390
- classification,
391
- specialized_results,
392
- pdf_content
393
- )
394
-
395
- # Complete
396
- job_tracker[job_id]["progress"] = 1.0
397
- job_tracker[job_id]["status"] = "completed"
398
- job_tracker[job_id]["message"] = "Analysis complete"
399
- job_tracker[job_id]["result"] = {
400
- "job_id": job_id,
401
- "document_type": classification["document_type"],
402
- "confidence": classification["confidence"],
403
- "analysis": final_analysis,
404
- "specialized_results": specialized_results,
405
- "summary": final_analysis.get("summary", ""),
406
- "timestamp": datetime.utcnow().isoformat()
407
- }
408
-
409
- logger.info(f"Job {job_id}: Analysis completed successfully")
410
-
411
- # Audit log: Analysis complete
412
- security_manager.audit_logger.log_phi_access(
413
- user_id=user_id,
414
- document_id=job_id,
415
- action="ANALYSIS_COMPLETE",
416
- ip_address="internal"
417
- )
418
-
419
- # Secure cleanup of temporary file
420
- data_encryption.secure_delete(file_path)
421
-
422
- except Exception as e:
423
- logger.error(f"Job {job_id}: Analysis failed - {str(e)}")
424
- job_tracker[job_id]["status"] = "failed"
425
- job_tracker[job_id]["message"] = f"Analysis failed: {str(e)}"
426
- job_tracker[job_id]["error"] = str(e)
427
-
428
- # Audit log: Analysis failed
429
- security_manager.audit_logger.log_access(
430
- user_id=user_id,
431
- action="ANALYSIS_FAILED",
432
- resource=f"document:{job_id}",
433
- ip_address="internal",
434
- status="FAILED",
435
- details={"error": str(e)}
436
- )
437
-
438
- # Cleanup on error
439
- if os.path.exists(file_path):
440
- data_encryption.secure_delete(file_path)
441
-
442
-
443
  if __name__ == "__main__":
444
  import uvicorn
445
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  """
2
+ Medical Report Analysis Platform - Minimal Bootstrap
 
 
3
  """
4
 
5
+ from fastapi import FastAPI
6
+ from fastapi.responses import JSONResponse
 
 
 
 
 
 
 
 
7
  from datetime import datetime
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Initialize FastAPI app
10
  app = FastAPI(
11
  title="Medical Report Analysis Platform",
12
+ description="AI-powered medical document analysis",
13
  version="2.0.0"
14
  )
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  @app.get("/")
17
  async def root():
18
+ """Root endpoint"""
19
+ return {
20
+ "message": "Medical Report Analysis Platform API",
21
+ "version": "2.0.0",
22
+ "status": "online"
23
+ }
 
 
 
24
 
25
  @app.get("/health")
26
  async def health_check():
27
+ """Health check endpoint"""
28
  return {
29
  "status": "healthy",
 
 
 
 
 
 
 
 
30
  "timestamp": datetime.utcnow().isoformat()
31
  }
32
 
33
+ @app.get("/api")
34
+ async def api_root():
35
+ """API status"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  return {
37
+ "status": "healthy",
38
+ "version": "2.0.0",
39
+ "timestamp": datetime.utcnow().isoformat()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  }
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  if __name__ == "__main__":
43
  import uvicorn
44
  uvicorn.run(app, host="0.0.0.0", port=7860)
backend/main_full.py ADDED
@@ -0,0 +1,445 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Medical Report Analysis Platform - Main Backend Application
3
+ Comprehensive AI-powered medical document analysis with multi-model processing
4
+ With HIPAA/GDPR Security & Compliance Features
5
+ """
6
+
7
+ from fastapi import FastAPI, File, UploadFile, HTTPException, BackgroundTasks, Request, Depends
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from fastapi.responses import JSONResponse, FileResponse
10
+ from fastapi.staticfiles import StaticFiles
11
+ from pydantic import BaseModel
12
+ from pathlib import Path
13
+ from typing import List, Dict, Optional, Any
14
+ import os
15
+ import tempfile
16
+ import logging
17
+ from datetime import datetime
18
+ import uuid
19
+
20
+ # Import processing modules
21
+ from pdf_processor import PDFProcessor
22
+ from document_classifier import DocumentClassifier
23
+ from model_router import ModelRouter
24
+ from analysis_synthesizer import AnalysisSynthesizer
25
+ from security import get_security_manager, ComplianceValidator, DataEncryption
26
+
27
+ # Configure logging
28
+ logging.basicConfig(
29
+ level=logging.INFO,
30
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
31
+ )
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # Initialize FastAPI app
35
+ app = FastAPI(
36
+ title="Medical Report Analysis Platform",
37
+ description="HIPAA/GDPR Compliant AI-powered medical document analysis",
38
+ version="2.0.0"
39
+ )
40
+
41
+ # CORS configuration
42
+ app.add_middleware(
43
+ CORSMiddleware,
44
+ allow_origins=["*"], # Configure appropriately for production
45
+ allow_credentials=True,
46
+ allow_methods=["*"],
47
+ allow_headers=["*"],
48
+ )
49
+
50
+ # Mount static files (frontend)
51
+ static_dir = Path(__file__).parent / "static"
52
+ if static_dir.exists():
53
+ app.mount("/assets", StaticFiles(directory=static_dir / "assets"), name="assets")
54
+ logger.info("Static files mounted successfully")
55
+
56
+ # Initialize processing components
57
+ pdf_processor = PDFProcessor()
58
+ document_classifier = DocumentClassifier()
59
+ model_router = ModelRouter()
60
+ analysis_synthesizer = AnalysisSynthesizer()
61
+
62
+ # Initialize security components
63
+ security_manager = get_security_manager()
64
+ compliance_validator = ComplianceValidator()
65
+ data_encryption = DataEncryption()
66
+
67
+ logger.info("Security and compliance features initialized")
68
+
69
+ # Request/Response Models
70
+ class AnalysisStatus(BaseModel):
71
+ job_id: str
72
+ status: str
73
+ progress: float
74
+ message: str
75
+
76
+ class AnalysisResult(BaseModel):
77
+ job_id: str
78
+ document_type: str
79
+ confidence: float
80
+ analysis: Dict[str, Any]
81
+ specialized_results: List[Dict[str, Any]]
82
+ summary: str
83
+ timestamp: str
84
+
85
+ class HealthCheck(BaseModel):
86
+ status: str
87
+ version: str
88
+ timestamp: str
89
+
90
+ # In-memory job tracking (use Redis/database in production)
91
+ job_tracker: Dict[str, Dict[str, Any]] = {}
92
+
93
+
94
+ @app.get("/api", response_model=HealthCheck)
95
+ async def api_root():
96
+ """API health check endpoint"""
97
+ return HealthCheck(
98
+ status="healthy",
99
+ version="1.0.0",
100
+ timestamp=datetime.utcnow().isoformat()
101
+ )
102
+
103
+
104
+ @app.get("/")
105
+ async def root():
106
+ """Serve frontend"""
107
+ static_dir = Path(__file__).parent / "static"
108
+ index_file = static_dir / "index.html"
109
+
110
+ if index_file.exists():
111
+ return FileResponse(index_file)
112
+ else:
113
+ return {"message": "Medical Report Analysis Platform API", "version": "1.0.0"}
114
+
115
+
116
+ @app.get("/health")
117
+ async def health_check():
118
+ """Detailed health check with component status"""
119
+ return {
120
+ "status": "healthy",
121
+ "components": {
122
+ "pdf_processor": "ready",
123
+ "classifier": "ready",
124
+ "model_router": "ready",
125
+ "synthesizer": "ready",
126
+ "security": "ready",
127
+ "compliance": "active"
128
+ },
129
+ "timestamp": datetime.utcnow().isoformat()
130
+ }
131
+
132
+
133
+ @app.get("/compliance-status")
134
+ async def get_compliance_status():
135
+ """Get HIPAA/GDPR compliance status"""
136
+ return compliance_validator.check_compliance()
137
+
138
+
139
+ @app.post("/auth/login")
140
+ async def login(email: str, password: str):
141
+ """
142
+ User authentication endpoint
143
+ In production, validate credentials against secure database
144
+ """
145
+ # Demo authentication - in production, validate against database
146
+ logger.warning("Demo authentication - implement secure auth in production")
147
+
148
+ # For demo, accept any credentials
149
+ user_id = str(uuid.uuid4())
150
+ token = security_manager.create_access_token(user_id, email)
151
+
152
+ return {
153
+ "access_token": token,
154
+ "token_type": "bearer",
155
+ "user_id": user_id,
156
+ "email": email
157
+ }
158
+
159
+
160
+ @app.post("/analyze", response_model=AnalysisStatus)
161
+ async def analyze_document(
162
+ request: Request,
163
+ file: UploadFile = File(...),
164
+ background_tasks: BackgroundTasks = BackgroundTasks(),
165
+ current_user: Dict[str, Any] = Depends(security_manager.get_current_user)
166
+ ):
167
+ """
168
+ Upload and analyze a medical document with audit logging
169
+
170
+ This endpoint initiates the two-layer processing:
171
+ - Layer 1: PDF extraction and classification
172
+ - Layer 2: Specialized model analysis
173
+
174
+ Security: Logs all PHI access for HIPAA compliance
175
+ """
176
+
177
+ # Generate unique job ID
178
+ job_id = str(uuid.uuid4())
179
+
180
+ # Audit log: Document upload
181
+ client_ip = request.client.host if request.client else "unknown"
182
+ security_manager.audit_logger.log_phi_access(
183
+ user_id=current_user.get("user_id", "unknown"),
184
+ document_id=job_id,
185
+ action="UPLOAD",
186
+ ip_address=client_ip
187
+ )
188
+
189
+ # Validate file type
190
+ if not file.filename.lower().endswith('.pdf'):
191
+ raise HTTPException(
192
+ status_code=400,
193
+ detail="Only PDF files are supported"
194
+ )
195
+
196
+ # Initialize job tracking
197
+ job_tracker[job_id] = {
198
+ "status": "processing",
199
+ "progress": 0.0,
200
+ "filename": file.filename,
201
+ "user_id": current_user.get("user_id"),
202
+ "created_at": datetime.utcnow().isoformat()
203
+ }
204
+
205
+ try:
206
+ # Save uploaded file temporarily
207
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
208
+ content = await file.read()
209
+ tmp_file.write(content)
210
+ tmp_file_path = tmp_file.name
211
+
212
+ # Schedule background processing
213
+ background_tasks.add_task(
214
+ process_document_pipeline,
215
+ job_id,
216
+ tmp_file_path,
217
+ file.filename,
218
+ current_user.get("user_id")
219
+ )
220
+
221
+ logger.info(f"Analysis job {job_id} created for file: {file.filename}")
222
+
223
+ return AnalysisStatus(
224
+ job_id=job_id,
225
+ status="processing",
226
+ progress=0.0,
227
+ message="Document uploaded successfully. Analysis in progress."
228
+ )
229
+
230
+ except Exception as e:
231
+ logger.error(f"Error creating analysis job: {str(e)}")
232
+ job_tracker[job_id]["status"] = "failed"
233
+ job_tracker[job_id]["error"] = str(e)
234
+
235
+ # Audit log: Failed upload
236
+ security_manager.audit_logger.log_access(
237
+ user_id=current_user.get("user_id", "unknown"),
238
+ action="UPLOAD_FAILED",
239
+ resource=f"document:{job_id}",
240
+ ip_address=client_ip,
241
+ status="FAILED",
242
+ details={"error": str(e)}
243
+ )
244
+
245
+ raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
246
+
247
+
248
+ @app.get("/status/{job_id}", response_model=AnalysisStatus)
249
+ async def get_analysis_status(job_id: str):
250
+ """Get the current status of an analysis job"""
251
+
252
+ if job_id not in job_tracker:
253
+ raise HTTPException(status_code=404, detail="Job not found")
254
+
255
+ job_data = job_tracker[job_id]
256
+
257
+ return AnalysisStatus(
258
+ job_id=job_id,
259
+ status=job_data["status"],
260
+ progress=job_data.get("progress", 0.0),
261
+ message=job_data.get("message", "Processing...")
262
+ )
263
+
264
+
265
+ @app.get("/results/{job_id}", response_model=AnalysisResult)
266
+ async def get_analysis_results(job_id: str):
267
+ """Retrieve the analysis results for a completed job"""
268
+
269
+ if job_id not in job_tracker:
270
+ raise HTTPException(status_code=404, detail="Job not found")
271
+
272
+ job_data = job_tracker[job_id]
273
+
274
+ if job_data["status"] != "completed":
275
+ raise HTTPException(
276
+ status_code=400,
277
+ detail=f"Analysis not completed. Current status: {job_data['status']}"
278
+ )
279
+
280
+ return AnalysisResult(**job_data["result"])
281
+
282
+
283
+ @app.get("/supported-models")
284
+ async def get_supported_models():
285
+ """Get list of supported medical AI models by domain"""
286
+ return {
287
+ "domains": {
288
+ "clinical_notes": {
289
+ "models": ["MedGemma 27B", "Bio_ClinicalBERT"],
290
+ "tasks": ["summarization", "entity_extraction", "coding"]
291
+ },
292
+ "radiology": {
293
+ "models": ["MedGemma 4B Multimodal", "MONAI"],
294
+ "tasks": ["vqa", "report_generation", "segmentation"]
295
+ },
296
+ "pathology": {
297
+ "models": ["Path Foundation", "UNI2-h"],
298
+ "tasks": ["slide_classification", "embedding_generation"]
299
+ },
300
+ "cardiology": {
301
+ "models": ["HuBERT-ECG"],
302
+ "tasks": ["ecg_analysis", "event_prediction"]
303
+ },
304
+ "laboratory": {
305
+ "models": ["DrLlama", "Lab-AI"],
306
+ "tasks": ["normalization", "explanation"]
307
+ },
308
+ "drug_interactions": {
309
+ "models": ["CatBoost DDI", "DrugGen"],
310
+ "tasks": ["interaction_classification"]
311
+ },
312
+ "diagnosis": {
313
+ "models": ["MedGemma 27B"],
314
+ "tasks": ["differential_diagnosis", "triage"]
315
+ },
316
+ "coding": {
317
+ "models": ["Rayyan Med Coding", "ICD-10 Predictors"],
318
+ "tasks": ["icd10_extraction", "cpt_coding"]
319
+ },
320
+ "mental_health": {
321
+ "models": ["MentalBERT"],
322
+ "tasks": ["screening", "sentiment_analysis"]
323
+ }
324
+ }
325
+ }
326
+
327
+
328
+ async def process_document_pipeline(job_id: str, file_path: str, filename: str, user_id: str = "unknown"):
329
+ """
330
+ Background task for processing medical documents through the full pipeline
331
+
332
+ Pipeline stages:
333
+ 1. PDF Extraction (text, images, tables)
334
+ 2. Document Classification
335
+ 3. Intelligent Routing
336
+ 4. Specialized Model Analysis
337
+ 5. Result Synthesis
338
+
339
+ Security: All stages logged for HIPAA compliance
340
+ """
341
+
342
+ try:
343
+ # Stage 1: PDF Processing
344
+ job_tracker[job_id]["progress"] = 0.1
345
+ job_tracker[job_id]["message"] = "Extracting content from PDF..."
346
+ logger.info(f"Job {job_id}: Starting PDF extraction")
347
+
348
+ pdf_content = await pdf_processor.extract_content(file_path)
349
+
350
+ # Stage 2: Document Classification
351
+ job_tracker[job_id]["progress"] = 0.3
352
+ job_tracker[job_id]["message"] = "Classifying document type..."
353
+ logger.info(f"Job {job_id}: Classifying document")
354
+
355
+ classification = await document_classifier.classify(pdf_content)
356
+
357
+ # Audit log: Classification complete
358
+ security_manager.audit_logger.log_phi_access(
359
+ user_id=user_id,
360
+ document_id=job_id,
361
+ action="CLASSIFY",
362
+ ip_address="internal"
363
+ )
364
+
365
+ # Stage 3: Model Routing
366
+ job_tracker[job_id]["progress"] = 0.4
367
+ job_tracker[job_id]["message"] = "Routing to specialized models..."
368
+ logger.info(f"Job {job_id}: Routing to models - {classification['document_type']}")
369
+
370
+ model_tasks = model_router.route(classification, pdf_content)
371
+
372
+ # Stage 4: Specialized Analysis
373
+ job_tracker[job_id]["progress"] = 0.5
374
+ job_tracker[job_id]["message"] = "Running specialized analysis..."
375
+ logger.info(f"Job {job_id}: Running {len(model_tasks)} specialized models")
376
+
377
+ specialized_results = []
378
+ for i, task in enumerate(model_tasks):
379
+ result = await model_router.execute_task(task)
380
+ specialized_results.append(result)
381
+ progress = 0.5 + (0.3 * (i + 1) / len(model_tasks))
382
+ job_tracker[job_id]["progress"] = progress
383
+
384
+ # Stage 5: Result Synthesis
385
+ job_tracker[job_id]["progress"] = 0.9
386
+ job_tracker[job_id]["message"] = "Synthesizing results..."
387
+ logger.info(f"Job {job_id}: Synthesizing results")
388
+
389
+ final_analysis = await analysis_synthesizer.synthesize(
390
+ classification,
391
+ specialized_results,
392
+ pdf_content
393
+ )
394
+
395
+ # Complete
396
+ job_tracker[job_id]["progress"] = 1.0
397
+ job_tracker[job_id]["status"] = "completed"
398
+ job_tracker[job_id]["message"] = "Analysis complete"
399
+ job_tracker[job_id]["result"] = {
400
+ "job_id": job_id,
401
+ "document_type": classification["document_type"],
402
+ "confidence": classification["confidence"],
403
+ "analysis": final_analysis,
404
+ "specialized_results": specialized_results,
405
+ "summary": final_analysis.get("summary", ""),
406
+ "timestamp": datetime.utcnow().isoformat()
407
+ }
408
+
409
+ logger.info(f"Job {job_id}: Analysis completed successfully")
410
+
411
+ # Audit log: Analysis complete
412
+ security_manager.audit_logger.log_phi_access(
413
+ user_id=user_id,
414
+ document_id=job_id,
415
+ action="ANALYSIS_COMPLETE",
416
+ ip_address="internal"
417
+ )
418
+
419
+ # Secure cleanup of temporary file
420
+ data_encryption.secure_delete(file_path)
421
+
422
+ except Exception as e:
423
+ logger.error(f"Job {job_id}: Analysis failed - {str(e)}")
424
+ job_tracker[job_id]["status"] = "failed"
425
+ job_tracker[job_id]["message"] = f"Analysis failed: {str(e)}"
426
+ job_tracker[job_id]["error"] = str(e)
427
+
428
+ # Audit log: Analysis failed
429
+ security_manager.audit_logger.log_access(
430
+ user_id=user_id,
431
+ action="ANALYSIS_FAILED",
432
+ resource=f"document:{job_id}",
433
+ ip_address="internal",
434
+ status="FAILED",
435
+ details={"error": str(e)}
436
+ )
437
+
438
+ # Cleanup on error
439
+ if os.path.exists(file_path):
440
+ data_encryption.secure_delete(file_path)
441
+
442
+
443
+ if __name__ == "__main__":
444
+ import uvicorn
445
+ uvicorn.run(app, host="0.0.0.0", port=7860)
backend/requirements.txt CHANGED
@@ -1,15 +1,4 @@
1
  fastapi==0.109.0
2
- uvicorn[standard]==0.27.0
3
  python-multipart==0.0.6
4
  pydantic==2.5.3
5
- PyPDF2==3.0.1
6
- pdf2image==1.17.0
7
- Pillow==10.2.0
8
- pytesseract==0.3.10
9
- PyMuPDF==1.23.8
10
- requests==2.31.0
11
- aiofiles==23.2.1
12
- PyJWT==2.8.0
13
- python-docx==1.1.0
14
- numpy==1.26.4
15
- pandas==2.2.0
 
1
  fastapi==0.109.0
2
+ uvicorn==0.27.0
3
  python-multipart==0.0.6
4
  pydantic==2.5.3