Spaces:

imthanhlv
/

dual-encoder

Runtime error

App Files Files Community

imthanhlv commited on Dec 4, 2021

Commit

abded39

1 Parent(s): d7a3fe0

update

Browse files

Files changed (1) hide show

app.py +35 -33

app.py CHANGED Viewed

@@ -15,6 +15,8 @@ from tqdm import tqdm, trange
 import skimage.io as io
 import PIL.Image
 import gradio as gr
 N = type(None)
 V = np.array
 ARRAY = np.ndarray
@@ -228,47 +230,47 @@ clip_model, preprocess = clip.load("ViT-B/16", device=device, jit=False)
 from transformers import AutoTokenizer
 tokenizer = AutoTokenizer.from_pretrained("imthanhlv/gpt2news")
-def inference(img, text, is_translate):
-  prefix_length = 10
-  model = ClipCaptionModel(prefix_length)
-  model_path = 'sat_019.pt'
-  model.load_state_dict(torch.load(model_path, map_location=CPU))
-  model = model.eval()
-  device = CUDA(0) if is_gpu else "cpu"
-  model = model.to(device)
-  use_beam_search = True
-  if is_translate:
-    # encode text
-    if text is None:
-      return "No text provided"
-    text = clip.tokenize([text]).to(device)
-    with torch.no_grad():
-      prefix = clip_model.encode_text(text).to(device, dtype=torch.float32)
-      prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
-      generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed)[0]
-  else:
-    if img is None:
-      return "No image"
-    image = io.imread(img.name)
-    pil_image = PIL.Image.fromarray(image)
-    image = preprocess(pil_image).unsqueeze(0).to(device)
-    with torch.no_grad():
-      prefix = clip_model.encode_image(image).to(device, dtype=torch.float32)
-      prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
-      generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed, prompt="Một bức ảnh về")[0]
-  return generated_text_prefix
 title = "CLIP Dual encoder"
-description = "You can translate English sentence to Vietnamese sentence or generate Vietnamese caption from image"
 examples=[["drug.jpg","", False], ["", "What is your name?", True]]
 inputs = [
-  gr.inputs.Image(type="file", label="Image to generate Vietnamese caption", optional=True),
-  gr.inputs.Textbox(lines=2, placeholder="English sentence for translation"),
-  gr.inputs.Checkbox()
 ]
 gr.Interface(

 import skimage.io as io
 import PIL.Image
 import gradio as gr
 N = type(None)
 V = np.array
 ARRAY = np.ndarray
 from transformers import AutoTokenizer
 tokenizer = AutoTokenizer.from_pretrained("imthanhlv/gpt2news")
+def inference(img, text, is_translation):
+    prefix_length = 10
+    model = ClipCaptionModel(prefix_length)
+    model_path = 'sat_019.pt'
+    model.load_state_dict(torch.load(model_path, map_location=CPU))
+    model = model.eval()
+    device = CUDA(0) if is_gpu else "cpu"
+    model = model.to(device)
+    if is_translation:
+        # encode text
+        if text is None:
+            return "No text provided"
+        text = clip.tokenize([text]).to(device)
+        with torch.no_grad():
+            prefix = clip_model.encode_text(text).to(device, dtype=torch.float32)
+            prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
+            generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed)[0]
+    else:
+        if img is None:
+            return "No image"
+        image = io.imread(img.name)
+        pil_image = PIL.Image.fromarray(image)
+        image = preprocess(pil_image).unsqueeze(0).to(device)
+        with torch.no_grad():
+            prefix = clip_model.encode_image(image).to(device, dtype=torch.float32)
+            prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
+            generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed, prompt="Một bức ảnh về")[0]
+    return generated_text_prefix
 title = "CLIP Dual encoder"
+description = "You can translate English to Vietnamese or generate Vietnamese caption from image"
 examples=[["drug.jpg","", False], ["", "What is your name?", True]]
 inputs = [
+    gr.inputs.Image(type="file", label="Image to generate Vietnamese caption", optional=True),
+    gr.inputs.Textbox(lines=2, placeholder="English sentence for translation"),
+    gr.inputs.Checkbox()
 ]
 gr.Interface(