Spaces:
Sleeping
Sleeping
| import os | |
| import base64 | |
| from base64 import urlsafe_b64encode | |
| import requests | |
| from supabase_models import Supabase_Client | |
| from authenticate import get_access_token_v1 | |
| async def extract_structure_store_message(user_id: str, message_id: str, attachment_id: str, attachment_extension: str, email: str): | |
| if attachment_id and message_id: | |
| project_id = os.getenv('PROJECT_ID') | |
| processor_id = os.getenv('PROCESSOR_ID') | |
| document_entities = {} | |
| file_name = f"{message_id}_{attachment_id}.{attachment_extension}" | |
| print(f"file_name: {file_name}") | |
| supabase = Supabase_Client().instance | |
| response = supabase.storage.from_("receipt_radar").download(file_name) | |
| base64_data = urlsafe_b64encode(response).decode('utf-8') | |
| payload = { | |
| "skipHumanReview": True, | |
| "rawDocument": { | |
| "mimeType": f"application/{attachment_extension}", | |
| "content": base64_data | |
| } | |
| } | |
| access_token = get_access_token_v1() | |
| print(access_token) | |
| headers = { | |
| 'Authorization': f'Bearer {access_token}', | |
| 'Content-Type': 'application/json; charset=utf-8' | |
| } | |
| response = requests.post( | |
| f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process', | |
| headers=headers, | |
| json=payload | |
| ) | |
| response_json = response.json() | |
| allowed_entities = [ | |
| "due_date", | |
| "invoice_date", | |
| "total_amount", | |
| "total_tax_amount", | |
| "receiver_name", | |
| "invoice_id", | |
| "currency", | |
| "receiver_address", | |
| "invoice_type", | |
| "supplier_name", | |
| "payment_terms", | |
| "line_item", | |
| "line_item/description", | |
| "line_item/quantity", | |
| "line_item/amount", | |
| "line_item/unit_price" | |
| ] | |
| print("Response") | |
| print(response_json) | |
| raw_text = response_json.get('document').get('text', None) | |
| entities = response_json.get('document').get('entities', None) | |
| document_entities['user_id'] = user_id | |
| print('Printing entities') | |
| print(entities) | |
| if entities is not None: | |
| for ent in entities: | |
| if ent.get('type') is not None: | |
| entity_type = ent.get('type') or "" | |
| # Check if the entity type is in the allowed list | |
| if entity_type in allowed_entities: | |
| mention_text = ent.get('mentionText') or "" | |
| normalized_values = ent.get('normalizedValue') or "" | |
| # Initialize a list for the entity type if not already present | |
| if entity_type not in document_entities: | |
| document_entities[entity_type] = [] | |
| # Append the entity data to the list | |
| document_entities[entity_type].append({ | |
| "mention_text": mention_text, | |
| "normalizedValue": normalized_values | |
| }) | |
| # Handling 'line_item' and its properties (line_item/description, line_item/quantity, etc.) | |
| if entity_type == 'line_item' and 'properties' in ent: | |
| for prop in ent['properties']: | |
| prop_type = prop.get('type') or "" | |
| if prop_type in allowed_entities: | |
| mention_text = prop.get('mentionText') or "" | |
| normalized_values = prop.get('normalizedValue') or "" | |
| # Initialize a list for the property type if not already present | |
| if prop_type not in document_entities: | |
| document_entities[prop_type] = [] | |
| # Append the property data to the list | |
| document_entities[prop_type].append({ | |
| "mention_text": mention_text, | |
| "normalizedValue": normalized_values | |
| }) | |
| if 'line_item/description' in document_entities: | |
| document_entities['line_item_description'] = document_entities['line_item/description'] | |
| document_entities.pop('line_item/description', None) | |
| if 'line_item/quantity' in document_entities: | |
| document_entities['line_item_quantity'] = document_entities['line_item/quantity'] | |
| document_entities.pop('line_item/quantity', None) | |
| if 'line_item/amount' in document_entities: | |
| document_entities['line_item_amount'] = document_entities['line_item/amount'] | |
| document_entities.pop('line_item/amount', None) | |
| if 'line_item/unit_price' in document_entities: | |
| document_entities['line_item_unit_price'] = document_entities['line_item/unit_price'] | |
| document_entities.pop('line_item/unit_price', None) | |
| document_entities['email'] = email | |
| document_entities['message_id'] = message_id | |
| print("Printing parsed json") | |
| print(document_entities) | |
| return document_entities | |
| # def extract_structure_store_message(user_id:str,message_id:str , attachment_id:str,attachment_extension:str,email:str): | |
| # if attachment_id and message_id: | |
| # project_id = os.getenv('PROJECT_ID') | |
| # processor_id = os.getenv('PROCESSOR_ID') | |
| # document_entities = {} | |
| # file_name = f"{message_id}_{attachment_id}.{attachment_extension}" | |
| # print(f"file_name: {file_name}") | |
| # supabase = Supabase_Client().instance | |
| # try: | |
| # response = supabase.storage.from_("receipt_radar").download( | |
| # file_name | |
| # ) | |
| # base64_data = urlsafe_b64encode(response).decode('utf-8') | |
| # payload = { | |
| # "skipHumanReview": True, | |
| # "rawDocument": { | |
| # "mimeType": f"application/{attachment_extension}", | |
| # "content": base64_data | |
| # } | |
| # } | |
| # access_token = get_access_token_v1() | |
| # print(access_token) | |
| # headers = { | |
| # 'Authorization': f'Bearer {access_token}', | |
| # 'Content-Type': 'application/json; charset=utf-8' | |
| # } | |
| # response = requests.post( | |
| # f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process', | |
| # headers=headers, | |
| # json=payload | |
| # ) | |
| # response_json = response.json() | |
| # allowed_entities = [ | |
| # "due_date", | |
| # "invoice_date", | |
| # "total_amount", | |
| # "total_tax_amount", | |
| # "receiver_name", | |
| # "invoice_id", | |
| # "currency", | |
| # "receiver_address", | |
| # "invoice_type", | |
| # "supplier_name", | |
| # "payment_terms", | |
| # "line_item", | |
| # "line_item/description", | |
| # "line_item/quantity", | |
| # "line_item/amount", | |
| # "line_item/unit_price" | |
| # ] | |
| # raw_text = response_json.get('document').get('text' , None) | |
| # entities = response_json.get('document').get('entities' , None) | |
| # document_entities['user_id'] = user_id | |
| # # insert_ocr_data_response = ( | |
| # # supabase.table("receipt_ocr_data") | |
| # # .insert({'user_id':user_id , 'message_id':message_id,'receipt_text':raw_text ,'email':email,'file_type':attachment_extension}) | |
| # # .execute() | |
| # # ) | |
| # print('Printing entities') | |
| # print(entities) | |
| # # if entities is not None: | |
| # # for ent in entities: | |
| # # if ent.get('type') is not None: | |
| # # if ent.get('type') in allowed_entities: | |
| # # mention_text = ent.get('mentionText') | |
| # # normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None | |
| # # document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values} | |
| # if entities is not None: | |
| # for ent in entities: | |
| # if ent.get('type') is not None: | |
| # entity_type = ent.get('type') or "" | |
| # # Check if the entity type is in the allowed list | |
| # if entity_type in allowed_entities: | |
| # mention_text = ent.get('mentionText') or "" | |
| # normalized_values = ent.get('normalizedValue') or "" | |
| # # Initialize a list for the entity type if not already present | |
| # if entity_type not in document_entities: | |
| # document_entities[entity_type] = [] | |
| # # Append the entity data to the list | |
| # document_entities[entity_type].append({ | |
| # "mention_text": mention_text, | |
| # "normalizedValue": normalized_values | |
| # }) | |
| # # Handling 'line_item' and its properties (line_item/description, line_item/quantity, etc.) | |
| # if entity_type == 'line_item' and 'properties' in ent: | |
| # for prop in ent['properties']: | |
| # prop_type = prop.get('type') or "" | |
| # if prop_type in allowed_entities: | |
| # mention_text = prop.get('mentionText') or "" | |
| # normalized_values = prop.get('normalizedValue') or "" | |
| # # Initialize a list for the property type if not already present | |
| # if prop_type not in document_entities: | |
| # document_entities[prop_type] = [] | |
| # # Append the property data to the list | |
| # document_entities[prop_type].append({ | |
| # "mention_text": mention_text, | |
| # "normalizedValue": normalized_values | |
| # }) | |
| # document_entities['line_item_description'] = document_entities['line_item/description'] | |
| # document_entities['line_item_quantity'] = document_entities['line_item/quantity'] | |
| # document_entities['line_item_amount'] = document_entities['line_item/amount'] | |
| # document_entities['line_item_unit_price'] = document_entities['line_item/unit_price'] | |
| # document_entities.pop('line_item/description', None) | |
| # document_entities.pop('line_item/quantity', None) | |
| # document_entities.pop('line_item/amount', None) | |
| # document_entities.pop('line_item/unit_price', None) | |
| # document_entities['email'] = email | |
| # document_entities['message_id'] = message_id | |
| # print("Printing parsed json") | |
| # print(document_entities) | |
| # # insert_data_response = ( | |
| # # supabase.table("document_ai_entities") | |
| # # .insert(document_entities) | |
| # # .execute() | |
| # # ) | |
| # # print(insert_data_response) | |
| # return document_entities | |
| # except Exception as e: | |
| # print(f"Error downloading or encoding file: {e}") |