Spaces:
Build error
Build error
| import streamlit as st | |
| from transformers import pipeline, BigBirdTokenizerFast, AutoModelForTokenClassification | |
| def pii_app(): | |
| st.title('PII Data Detection') | |
| text_input = st.text_area('Enter a Paragraph below to get list of PII in your text.') | |
| tokenizer = BigBirdTokenizerFast.from_pretrained("google/bigbird-roberta-base", block_size=2) | |
| model = AutoModelForTokenClassification.from_pretrained("vedantM/BigBird-PII") | |
| big_bird_classifier = pipeline(task="token-classification", | |
| model=model, | |
| aggregation_strategy="average", | |
| tokenizer=tokenizer) | |
| output = big_bird_classifier(text_input) | |
| st.header('List of Entities:') | |
| for entity in output: | |
| st.write(f"Entity: {entity['word']}, Type: {entity['entity_group']}") | |
| highlighted_text = highlight_pii(text_input, output) | |
| st.header('\nPII Detected Output:') | |
| st.markdown(highlighted_text, unsafe_allow_html=True) | |
| def highlight_pii(text, entities): | |
| highlighted_text = text | |
| offset = 0 | |
| for entity in entities: | |
| start_idx = entity["start"] + offset | |
| end_idx = entity["end"] + offset | |
| highlighted_text = ( | |
| highlighted_text[:start_idx] | |
| + f'<span style="background-color: blue">{highlighted_text[start_idx:end_idx]}</span>' | |
| + highlighted_text[end_idx:] | |
| ) | |
| offset += len('<span style="background-color: blue"></span>') # Adjust offset for HTML tags | |
| return highlighted_text | |
| if __name__ == "__main__": | |
| pii_app() | |