afulara commited on
Commit
ae920ed
·
1 Parent(s): 1cea5a6

Better PDF upload paths

Browse files
Files changed (1) hide show
  1. backend/main.py +39 -25
backend/main.py CHANGED
@@ -117,39 +117,53 @@ def process_file(file_path: str, file_name: str):
117
 
118
  @app.post("/upload")
119
  async def upload_document(file: UploadFile = File(...)):
120
- if not file.filename.endswith('.pdf'):
121
  raise HTTPException(status_code=400, detail="Only PDF files are supported")
122
 
123
  try:
124
- # Create a temporary file
125
- with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
126
- content = await file.read()
127
- temp_file.write(content)
128
- temp_path = temp_file.name
129
-
130
- # Process the file
131
- texts = process_file(temp_path, file.filename)
132
 
133
- # Create a new session
134
- session_id = str(uuid4())
135
- vector_db = VectorDatabase()
136
- await vector_db.abuild_from_list(texts)
137
 
138
- # Store session data
139
- sessions[session_id] = {
140
- "vector_db": vector_db,
141
- "texts": texts
142
- }
143
 
144
- # Clean up
145
- os.unlink(temp_path)
 
146
 
147
- return {
148
- "session_id": session_id,
149
- "message": f"Document processed successfully. Added {len(texts)} chunks to the database."
150
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  except Exception as e:
152
- raise HTTPException(status_code=500, detail=str(e))
153
 
154
  @app.post("/query/{session_id}")
155
  async def query_documents(session_id: str, query: Query):
 
117
 
118
  @app.post("/upload")
119
  async def upload_document(file: UploadFile = File(...)):
120
+ if not file.filename.lower().endswith('.pdf'):
121
  raise HTTPException(status_code=400, detail="Only PDF files are supported")
122
 
123
  try:
124
+ # Read the file content directly into memory
125
+ content = await file.read()
 
 
 
 
 
 
126
 
127
+ # Create a temporary file in a directory we know exists
128
+ temp_dir = "/tmp" # Using /tmp which is writable in most environments
129
+ os.makedirs(temp_dir, exist_ok=True)
 
130
 
131
+ temp_path = os.path.join(temp_dir, f"upload_{file.filename}")
 
 
 
 
132
 
133
+ # Write the content to the temporary file
134
+ with open(temp_path, 'wb') as temp_file:
135
+ temp_file.write(content)
136
 
137
+ try:
138
+ # Process the file
139
+ texts = process_file(temp_path, file.filename)
140
+
141
+ # Create a new session
142
+ session_id = str(uuid4())
143
+ vector_db = VectorDatabase()
144
+ await vector_db.abuild_from_list(texts)
145
+
146
+ # Store session data
147
+ sessions[session_id] = {
148
+ "vector_db": vector_db,
149
+ "texts": texts
150
+ }
151
+
152
+ return {
153
+ "session_id": session_id,
154
+ "message": f"Document processed successfully. Added {len(texts)} chunks to the database."
155
+ }
156
+
157
+ finally:
158
+ # Clean up the temporary file
159
+ try:
160
+ if os.path.exists(temp_path):
161
+ os.unlink(temp_path)
162
+ except Exception as e:
163
+ print(f"Warning: Could not delete temporary file: {e}")
164
+
165
  except Exception as e:
166
+ raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")
167
 
168
  @app.post("/query/{session_id}")
169
  async def query_documents(session_id: str, query: Query):