Spaces:

pawlo2013
/

Pneumonia_3_Class

Running

App Files Files Community

pawlo2013 commited on Jun 19, 2024

Commit

66b11d3

1 Parent(s): 4326ce4

added attention rollout for visualisation of the ViT prediction

Browse files

Files changed (5) hide show

.history/app_20240617182329.py +72 -0
.history/app_20240617182353.py +72 -0
.history/app_20240617182506.py +72 -0
app.py +103 -9
requirements.txt +6 -1

.history/app_20240617182329.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import os
+import gradio as gr
+from PIL import Image
+import torch
+from transformers import ViTForImageClassification, ViTImageProcessor
+from datasets import load_dataset
+# Model and processor configuration
+model_name_or_path = "google/vit-base-patch16-224-in21k"
+processor = ViTImageProcessor.from_pretrained(model_name_or_path)
+# Load dataset (adjust dataset_path accordingly)
+dataset_path = "pawlo2013/chest_xray"
+train_dataset = load_dataset(dataset_path, split="train")
+class_names = train_dataset.features["label"].names
+# Load ViT model
+model = ViTForImageClassification.from_pretrained(
+    "./models",
+    num_labels=len(class_names),
+    id2label={str(i): label for i, label in enumerate(class_names)},
+    label2id={label: i for i, label in enumerate(class_names)},
+)
+# Set model to evaluation mode
+model.eval()
+# Define the classification function
+def classify_image(img_path):
+    img = Image.open(img_path)
+    processed_input = processor(images=img, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**processed_input)
+        logits = outputs.logits
+        probabilities = torch.softmax(logits, dim=1)[0].tolist()
+    result = {class_name: prob for class_name, prob in zip(class_names, probabilities)}
+    filename = os.path.basename(img_path).split(".")[0]
+    return {"filename": filename, "probabilities": result}
+def format_output(output):
+    return f"{output['filename']}", output["probabilities"]
+# Function to load examples from a folder
+def load_examples_from_folder(folder_path):
+    examples = []
+    for file in os.listdir(folder_path):
+        if file.endswith((".png", ".jpg", ".jpeg")):
+            examples.append(os.path.join(folder_path, file))
+    return examples
+# Define the path to the examples folder
+examples_folder = "./examples"
+examples = load_examples_from_folder(examples_folder)
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=lambda img: format_output(classify_image(img)),
+    inputs=gr.Image(type="filepath"),
+    outputs=[gr.Textbox(label="True Label (from filename)"), gr.Label()],
+    examples=examples,
+    title="Pneumonia X-Ray 3-Class Classification with Vision Transformer (ViT) using data augmentation",
+    description="Upload an X-ray image to classify it as normal, viral or bacterial pneumonia. Checkout the model in more details at https://huggingface.co/pawlo2013/vit-pneumonia-x-ray_3_class. The examples presented are take from the test set of [Kermany et al. (2018) dataset.](https://data.mendeley.com/datasets/rscbjbr9sj/2)",
+)
+# Launch the app
+if __name__ == "__main__":
+    iface.launch()

.history/app_20240617182353.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import os
+import gradio as gr
+from PIL import Image
+import torch
+from transformers import ViTForImageClassification, ViTImageProcessor
+from datasets import load_dataset
+# Model and processor configuration
+model_name_or_path = "google/vit-base-patch16-224-in21k"
+processor = ViTImageProcessor.from_pretrained(model_name_or_path)
+# Load dataset (adjust dataset_path accordingly)
+dataset_path = "pawlo2013/chest_xray"
+train_dataset = load_dataset(dataset_path, split="train")
+class_names = train_dataset.features["label"].names
+# Load ViT model
+model = ViTForImageClassification.from_pretrained(
+    "./models",
+    num_labels=len(class_names),
+    id2label={str(i): label for i, label in enumerate(class_names)},
+    label2id={label: i for i, label in enumerate(class_names)},
+)
+# Set model to evaluation mode
+model.eval()
+# Define the classification function
+def classify_image(img_path):
+    img = Image.open(img_path)
+    processed_input = processor(images=img, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**processed_input)
+        logits = outputs.logits
+        probabilities = torch.softmax(logits, dim=1)[0].tolist()
+    result = {class_name: prob for class_name, prob in zip(class_names, probabilities)}
+    filename = os.path.basename(img_path).split(".")[0]
+    return {"filename": filename, "probabilities": result}
+def format_output(output):
+    return f"{output['filename']}", output["probabilities"]
+# Function to load examples from a folder
+def load_examples_from_folder(folder_path):
+    examples = []
+    for file in os.listdir(folder_path):
+        if file.endswith((".png", ".jpg", ".jpeg")):
+            examples.append(os.path.join(folder_path, file))
+    return examples
+# Define the path to the examples folder
+examples_folder = "./examples"
+examples = load_examples_from_folder(examples_folder)
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=lambda img: format_output(classify_image(img)),
+    inputs=gr.Image(type="filepath"),
+    outputs=[gr.Textbox(label="True Label (from filename)"), gr.Label()],
+    examples=examples,
+    title="Pneumonia X-Ray 3-Class Classification with Vision Transformer (ViT) using data augmentation",
+    description="Upload an X-ray image to classify it as normal, viral or bacterial pneumonia. Checkout the model in more details [here] (https://huggingface.co/pawlo2013/vit-pneumonia-x-ray_3_class). The examples presented are take from the test set of [Kermany et al. (2018) dataset.](https://data.mendeley.com/datasets/rscbjbr9sj/2)",
+)
+# Launch the app
+if __name__ == "__main__":
+    iface.launch()

.history/app_20240617182506.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import os
+import gradio as gr
+from PIL import Image
+import torch
+from transformers import ViTForImageClassification, ViTImageProcessor
+from datasets import load_dataset
+# Model and processor configuration
+model_name_or_path = "google/vit-base-patch16-224-in21k"
+processor = ViTImageProcessor.from_pretrained(model_name_or_path)
+# Load dataset (adjust dataset_path accordingly)
+dataset_path = "pawlo2013/chest_xray"
+train_dataset = load_dataset(dataset_path, split="train")
+class_names = train_dataset.features["label"].names
+# Load ViT model
+model = ViTForImageClassification.from_pretrained(
+    "./models",
+    num_labels=len(class_names),
+    id2label={str(i): label for i, label in enumerate(class_names)},
+    label2id={label: i for i, label in enumerate(class_names)},
+)
+# Set model to evaluation mode
+model.eval()
+# Define the classification function
+def classify_image(img_path):
+    img = Image.open(img_path)
+    processed_input = processor(images=img, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**processed_input)
+        logits = outputs.logits
+        probabilities = torch.softmax(logits, dim=1)[0].tolist()
+    result = {class_name: prob for class_name, prob in zip(class_names, probabilities)}
+    filename = os.path.basename(img_path).split(".")[0]
+    return {"filename": filename, "probabilities": result}
+def format_output(output):
+    return f"{output['filename']}", output["probabilities"]
+# Function to load examples from a folder
+def load_examples_from_folder(folder_path):
+    examples = []
+    for file in os.listdir(folder_path):
+        if file.endswith((".png", ".jpg", ".jpeg")):
+            examples.append(os.path.join(folder_path, file))
+    return examples
+# Define the path to the examples folder
+examples_folder = "./examples"
+examples = load_examples_from_folder(examples_folder)
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=lambda img: format_output(classify_image(img)),
+    inputs=gr.Image(type="filepath"),
+    outputs=[gr.Textbox(label="True Label (from filename)"), gr.Label()],
+    examples=examples,
+    title="Pneumonia X-Ray 3-Class Classification with Vision Transformer (ViT) using data augmentation",
+    description="Upload an X-ray image to classify it as normal, viral or bacterial pneumonia. Checkout the model in more details [here](https://huggingface.co/pawlo2013/vit-pneumonia-x-ray_3_class). The examples presented are take from the test set of [Kermany et al. (2018) dataset.](https://data.mendeley.com/datasets/rscbjbr9sj/2)",
+)
+# Launch the app
+if __name__ == "__main__":
+    iface.launch()

app.py CHANGED Viewed

@@ -4,6 +4,10 @@ from PIL import Image
 import torch
 from transformers import ViTForImageClassification, ViTImageProcessor
 from datasets import load_dataset
 # Model and processor configuration
 model_name_or_path = "google/vit-base-patch16-224-in21k"
@@ -27,21 +31,37 @@ model.eval()
 # Define the classification function
-def classify_image(img_path):
-    img = Image.open(img_path)
-    processed_input = processor(images=img, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**processed_input)
         logits = outputs.logits
         probabilities = torch.softmax(logits, dim=1)[0].tolist()
     result = {class_name: prob for class_name, prob in zip(class_names, probabilities)}
     filename = os.path.basename(img_path).split(".")[0]
-    return {"filename": filename, "probabilities": result}
 def format_output(output):
-    return f"{output['filename']}", output["probabilities"]
 # Function to load examples from a folder
@@ -53,20 +73,94 @@ def load_examples_from_folder(folder_path):
     return examples
 # Define the path to the examples folder
 examples_folder = "./examples"
 examples = load_examples_from_folder(examples_folder)
 # Create the Gradio interface
 iface = gr.Interface(
-    fn=lambda img: format_output(classify_image(img)),
     inputs=gr.Image(type="filepath"),
-    outputs=[gr.Textbox(label="True Label (from filename)"), gr.Label()],
     examples=examples,
     title="Pneumonia X-Ray 3-Class Classification with Vision Transformer (ViT) using data augmentation",
-    description="Upload an X-ray image to classify it as normal, viral or bacterial pneumonia. Checkout the model in more details at https://huggingface.co/pawlo2013/vit-pneumonia-x-ray_3_class",
 )
 # Launch the app
 if __name__ == "__main__":
     iface.launch()

 import torch
 from transformers import ViTForImageClassification, ViTImageProcessor
 from datasets import load_dataset
+import matplotlib.pyplot as plt
+import numpy as np
+import cv2
 # Model and processor configuration
 model_name_or_path = "google/vit-base-patch16-224-in21k"
 # Define the classification function
+# Define the classification function
+def classify_and_visualize(
+    img_path, device="cpu", discard_ratio=0.9, head_fusion="mean"
+):
+    img = Image.open(img_path).convert("RGB")
+    processed_input = processor(images=img, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model(**processed_input)
         logits = outputs.logits
         probabilities = torch.softmax(logits, dim=1)[0].tolist()
+        prediction = torch.argmax(logits, dim=-1).item()
+        predicted_class = class_names[prediction]
     result = {class_name: prob for class_name, prob in zip(class_names, probabilities)}
     filename = os.path.basename(img_path).split(".")[0]
+    # Generate attention heatmap
+    heatmap_img = show_final_layer_attention_maps(
+        model, processed_input, device, discard_ratio, head_fusion
+    )
+    return {"filename": filename, "probabilities": result, "heatmap": heatmap_img}
 def format_output(output):
+    return (
+        f"{output['filename']}",
+        output["probabilities"],
+        gr.Image(value=output["heatmap"]),
+    )
 # Function to load examples from a folder
     return examples
+# Function to show final layer attention maps
+def show_final_layer_attention_maps(
+    model, tensor, device, discard_ratio=0.6, head_fusion="max", only_last_layer=False
+):
+    # Create a DataLoader with batch size equal to the number of images
+    image = tensor["pixel_values"].to(device).squeeze(0)
+    # Iterate over the samples
+    with torch.no_grad():
+        # Forward pass through the model
+        outputs = model(**tensor, output_attentions=True)
+        # Scale image to [0, 1]
+        image = image - image.min()
+        image = image / image.max()
+        # Initialize the result tensor and recursively fuse the attention maps
+        result = torch.eye(outputs.attentions[0].size(-1)).to(device)
+        if only_last_layer:
+            attention_list = outputs.attentions[-1].unsqueeze(0).to(device)
+        else:
+            attention_list = outputs.attentions
+        for attention in attention_list:
+            if head_fusion == "mean":
+                attention_heads_fused = attention.mean(axis=1)
+            elif head_fusion == "max":
+                attention_heads_fused = attention.max(axis=1)[0]
+            elif head_fusion == "min":
+                attention_heads_fused = attention.min(axis=1)[0]
+            flat = attention_heads_fused.view(attention_heads_fused.size(0), -1)
+            _, indices = flat.topk(int(flat.size(-1) * discard_ratio), -1, False)
+            indices = indices[indices != 0]
+            flat[0, indices] = 0
+            I = torch.eye(attention_heads_fused.size(-1)).to(device)
+            a = (attention_heads_fused + 1.0 * I) / 2
+            a = a / a.sum(dim=-1)
+            result = torch.matmul(a, result)
+        mask = result[0, 0, 1:]
+        # In case of 224x224 image, this brings us from 196 to 14
+        width = int(mask.size(-1) ** 0.5)
+        mask = mask.reshape(width, width).cpu().numpy()
+        mask = mask / np.max(mask)
+        mask = cv2.resize(mask, (224, 224))
+        # Normalize mask to [0, 1] for visualization
+        mask = (mask - np.min(mask)) / (np.max(mask) - np.min(mask))
+        heatmap = plt.cm.jet(mask)[:, :, :3]  # Apply colormap
+        # Superimpose heatmap on the original image
+        showed_img = image.permute(1, 2, 0).detach().cpu().numpy()
+        showed_img = (showed_img - np.min(showed_img)) / (
+            np.max(showed_img) - np.min(showed_img)
+        )  # Normalize image
+        superimposed_img = (
+            heatmap * 0.4 + showed_img * 0.6
+        )  # Combine heatmap with original image
+        # Plot attention map
+        superimposed_img_pil = Image.fromarray(
+            (superimposed_img * 255).astype(np.uint8)
+        )
+        return superimposed_img_pil
 # Define the path to the examples folder
 examples_folder = "./examples"
 examples = load_examples_from_folder(examples_folder)
 # Create the Gradio interface
 iface = gr.Interface(
+    fn=lambda img: format_output(classify_and_visualize(img)),
     inputs=gr.Image(type="filepath"),
+    outputs=[
+        gr.Textbox(label="True Label (from filename)"),
+        gr.Label(),
+        gr.Image(label="Attention Heatmap"),
+    ],
     examples=examples,
     title="Pneumonia X-Ray 3-Class Classification with Vision Transformer (ViT) using data augmentation",
+    description="Upload an X-ray image to classify it as normal, viral or bacterial pneumonia. Checkout the model in more details [here](https://huggingface.co/pawlo2013/vit-pneumonia-x-ray_3_class). The examples presented are taken from the test set of [Kermany et al. (2018) dataset.](https://data.mendeley.com/datasets/rscbjbr9sj/2.) The attention heatmap over all layers of the transfomer done by the attention rollout techinique by the implementation of [jacobgil](https://github.com/jacobgil/vit-explain).",
 )
 # Launch the app
 if __name__ == "__main__":
     iface.launch()

requirements.txt CHANGED Viewed

@@ -1,3 +1,8 @@
 torch
 transformers
-datasets

 torch
 transformers
+datasets
+numpy
+cv2
+PIL
+os
+matplotlib