isLinXu commited on
Commit
bfc98ed
·
1 Parent(s): 53db737
Files changed (2) hide show
  1. app.py +542 -0
  2. requirements.txt +20 -0
app.py ADDED
@@ -0,0 +1,542 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+
4
+ import cv2
5
+ import numpy as np
6
+ from PIL.Image import Image
7
+
8
+ os.system("pip install 'mmengine>=0.6.0'")
9
+ os.system("pip install 'mmcv>=2.0.0rc4,<2.1.0'")
10
+ os.system("pip install 'mmdet>=3.0.0,<4.0.0'")
11
+
12
+ import fnmatch
13
+ import os
14
+
15
+ import PIL
16
+ import gradio as gr
17
+ from argparse import ArgumentParser
18
+
19
+ import torch
20
+ from mim import download
21
+ from mmengine.logging import print_log
22
+
23
+ from mmdet.apis import DetInferencer
24
+
25
+ import warnings
26
+
27
+ warnings.filterwarnings("ignore")
28
+
29
+ ckpt_path = "./checkpoint"
30
+ if not os.path.exists(ckpt_path):
31
+ os.makedirs(ckpt_path)
32
+
33
+ mmdet_list = ['mask-rcnn_r50_fpn_albu-1x_coco', 'atss_r50_fpn_1x_coco', 'atss_r101_fpn_1x_coco',
34
+ 'autoassign_r50-caffe_fpn_1x_coco', 'boxinst_r50_fpn_ms-90k_coco', 'boxinst_r101_fpn_ms-90k_coco',
35
+ 'faster-rcnn_r50_fpn_carafe_1x_coco', 'mask-rcnn_r50_fpn_carafe_1x_coco',
36
+ 'cascade-rcnn_r50-caffe_fpn_1x_coco', 'cascade-rcnn_r50_fpn_1x_coco',
37
+ 'cascade-rcnn_r50_fpn_20e_coco', 'cascade-rcnn_r101-caffe_fpn_1x_coco',
38
+ 'cascade-rcnn_r101_fpn_1x_coco', 'cascade-rcnn_r101_fpn_20e_coco',
39
+ 'cascade-rcnn_x101-32x4d_fpn_1x_coco', 'cascade-rcnn_x101-32x4d_fpn_20e_coco',
40
+ 'cascade-rcnn_x101-64x4d_fpn_1x_coco', 'cascade-rcnn_x101_64x4d_fpn_20e_coco',
41
+ 'cascade-mask-rcnn_r50-caffe_fpn_1x_coco', 'cascade-mask-rcnn_r50_fpn_1x_coco',
42
+ 'cascade-mask-rcnn_r50_fpn_20e_coco', 'cascade-mask-rcnn_r101-caffe_fpn_1x_coco',
43
+ 'cascade-mask-rcnn_r101_fpn_1x_coco', 'cascade-mask-rcnn_r101_fpn_20e_coco',
44
+ 'cascade-mask-rcnn_x101-32x4d_fpn_1x_coco', 'cascade-mask-rcnn_x101-32x4d_fpn_20e_coco',
45
+ 'cascade-mask-rcnn_x101-64x4d_fpn_1x_coco', 'cascade-mask-rcnn_x101-64x4d_fpn_20e_coco',
46
+ 'cascade-mask-rcnn_r50-caffe_fpn_ms-3x_coco', 'cascade-mask-rcnn_r50_fpn_mstrain_3x_coco',
47
+ 'cascade-mask-rcnn_r101-caffe_fpn_ms-3x_coco', 'cascade-mask-rcnn_r101_fpn_ms-3x_coco',
48
+ 'cascade-mask-rcnn_x101-32x4d_fpn_ms-3x_coco', 'cascade-mask-rcnn_x101-32x8d_fpn_ms-3x_coco',
49
+ 'cascade-mask-rcnn_x101-64x4d_fpn_ms-3x_coco', 'cascade-rpn_fast-rcnn_r50-caffe_fpn_1x_coco',
50
+ 'cascade-rpn_faster-rcnn_r50-caffe_fpn_1x_coco', 'centernet_r18-dcnv2_8xb16-crop512-140e_coco',
51
+ 'centernet_r18_8xb16-crop512-140e_coco', 'centernet-update_r50-caffe_fpn_ms-1x_coco',
52
+ 'centripetalnet_hourglass104_16xb6-crop511-210e-mstest_coco',
53
+ 'condinst_r50_fpn_ms-poly-90k_coco_instance', 'conditional-detr_r50_8xb2-50e_coco.py',
54
+ 'cornernet_hourglass104_10xb5-crop511-210e-mstest_coco',
55
+ 'cornernet_hourglass104_8xb6-210e-mstest_coco', 'cornernet_hourglass104_32xb3-210e-mstest_coco',
56
+ 'mask-rcnn_convnext-t-p4-w7_fpn_amp-ms-crop-3x_coco',
57
+ 'cascade-mask-rcnn_convnext-t-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco',
58
+ 'cascade-mask-rcnn_convnext-s-p4-w7_fpn_4conv1fc-giou_amp-ms-crop-3x_coco',
59
+ 'crowddet-rcnn_refine_r50_fpn_8xb2-30e_crowdhuman', 'crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman',
60
+ 'dab-detr_r50_8xb2-50e_coco.py', 'faster-rcnn_r50_fpn_dconv_c3-c5_1x_coco',
61
+ 'faster-rcnn_r50_fpn_dpool_1x_coco', 'faster-rcnn_r101-dconv-c3-c5_fpn_1x_coco',
62
+ 'faster-rcnn_x101-32x4d-dconv-c3-c5_fpn_1x_coco', 'mask-rcnn_r50_fpn_dconv_c3-c5_1x_coco',
63
+ 'mask-rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco', 'mask-rcnn_r101-dconv-c3-c5_fpn_1x_coco',
64
+ 'cascade-rcnn_r50_fpn_dconv_c3-c5_1x_coco', 'cascade-rcnn_r101-dconv-c3-c5_fpn_1x_coco',
65
+ 'cascade-mask-rcnn_r50_fpn_dconv_c3-c5_1x_coco', 'cascade-mask-rcnn_r101-dconv-c3-c5_fpn_1x_coco',
66
+ 'cascade-mask-rcnn_x101-32x4d-dconv-c3-c5_fpn_1x_coco', 'faster-rcnn_r50_fpn_mdconv_c3-c5_1x_coco',
67
+ 'faster-rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco', 'faster-rcnn_r50_fpn_mdpool_1x_coco',
68
+ 'mask-rcnn_r50_fpn_mdconv_c3-c5_1x_coco', 'mask-rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco',
69
+ 'ddod_r50_fpn_1x_coco', 'deformable-detr_r50_16xb2-50e_coco',
70
+ 'deformable-detr_refine_r50_16xb2-50e_coco', 'deformable-detr_refine_twostage_r50_16xb2-50e_coco',
71
+ 'cascade-rcnn_r50-rfp_1x_coco', 'cascade-rcnn_r50-sac_1x_coco',
72
+ 'detectors_cascade-rcnn_r50_1x_coco', 'htc_r50-rfp_1x_coco', 'htc_r50-sac_1x_coco',
73
+ 'detectors_htc-r50_1x_coco', 'detr_r50_8xb2-150e_coco', 'dino-4scale_r50_8xb2-12e_coco.py',
74
+ 'dino-4scale_r50_8xb2-24e_coco.py', 'dino-5scale_swin-l_8xb2-12e_coco.py',
75
+ 'dino-5scale_swin-l_8xb2-36e_coco.py', 'dh-faster-rcnn_r50_fpn_1x_coco',
76
+ 'atss_r50-caffe_fpn_dyhead_1x_coco', 'atss_r50_fpn_dyhead_1x_coco',
77
+ 'atss_swin-l-p4-w12_fpn_dyhead_ms-2x_coco', 'dynamic-rcnn_r50_fpn_1x_coco',
78
+ 'retinanet_effb3_fpn_8xb4-crop896-1x_coco', 'faster-rcnn_r50_fpn_attention_1111_1x_coco',
79
+ 'faster-rcnn_r50_fpn_attention_0010_1x_coco', 'faster-rcnn_r50_fpn_attention_1111_dcn_1x_coco',
80
+ 'faster-rcnn_r50_fpn_attention_0010_dcn_1x_coco', 'faster-rcnn_r50-caffe-c4_1x_coco',
81
+ 'faster-rcnn_r50-caffe-c4_mstrain_1x_coco', 'faster-rcnn_r50-caffe-dc5_1x_coco',
82
+ 'faster-rcnn_r50-caffe_fpn_1x_coco', 'faster-rcnn_r50_fpn_1x_coco',
83
+ 'faster-rcnn_r50_fpn_fp16_1x_coco', 'faster-rcnn_r50_fpn_2x_coco',
84
+ 'faster-rcnn_r101-caffe_fpn_1x_coco', 'faster-rcnn_r101_fpn_1x_coco',
85
+ 'faster-rcnn_r101_fpn_2x_coco', 'faster-rcnn_x101-32x4d_fpn_1x_coco',
86
+ 'faster-rcnn_x101-32x4d_fpn_2x_coco', 'faster-rcnn_x101-64x4d_fpn_1x_coco',
87
+ 'faster-rcnn_x101-64x4d_fpn_2x_coco', 'faster-rcnn_r50_fpn_iou_1x_coco',
88
+ 'faster-rcnn_r50_fpn_giou_1x_coco', 'faster-rcnn_r50_fpn_bounded_iou_1x_coco',
89
+ 'faster-rcnn_r50-caffe-dc5_mstrain_1x_coco', 'faster-rcnn_r50-caffe-dc5_mstrain_3x_coco',
90
+ 'faster-rcnn_r50-caffe_fpn_ms-2x_coco', 'faster-rcnn_r50-caffe_fpn_ms-3x_coco',
91
+ 'faster-rcnn_r50_fpn_mstrain_3x_coco', 'faster-rcnn_r101-caffe_fpn_ms-3x_coco',
92
+ 'faster-rcnn_r101_fpn_ms-3x_coco', 'faster-rcnn_x101-32x4d_fpn_ms-3x_coco',
93
+ 'faster-rcnn_x101-32x8d_fpn_ms-3x_coco', 'faster-rcnn_x101-64x4d_fpn_ms-3x_coco',
94
+ 'faster-rcnn_r50_fpn_tnr-pretrain_1x_coco', 'fcos_r50-caffe_fpn_gn-head_1x_coco',
95
+ 'fcos_r50-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco',
96
+ 'fcos_r50-dcn-caffe_fpn_gn-head-center-normbbox-centeronreg-giou_1x_coco',
97
+ 'fcos_r101-caffe_fpn_gn-head-1x_coco', 'fcos_r50-caffe_fpn_gn-head_ms-640-800-2x_coco',
98
+ 'fcos_r101-caffe_fpn_gn-head_ms-640-800-2x_coco', 'fcos_x101-64x4d_fpn_gn-head_ms-640-800-2x_coco',
99
+ 'fovea_r50_fpn_4xb4-1x_coco', 'fovea_r50_fpn_4xb4-2x_coco',
100
+ 'fovea_r50_fpn_gn-head-align_4xb4-2x_coco', 'fovea_r50_fpn_gn-head-align_ms-640-800-4xb4-2x_coco',
101
+ 'fovea_r101_fpn_4xb4-1x_coco', 'fovea_r101_fpn_4xb4-2x_coco',
102
+ 'fovea_r101_fpn_gn-head-align_4xb4-2x_coco',
103
+ 'fovea_r101_fpn_gn-head-align_ms-640-800-4xb4-2x_coco', 'faster-rcnn_r50_fpg_crop640-50e_coco',
104
+ 'faster-rcnn_r50_fpg-chn128_crop640-50e_coco', 'mask-rcnn_r50_fpg_crop640-50e_coco',
105
+ 'mask-rcnn_r50_fpg-chn128_crop640-50e_coco', 'retinanet_r50_fpg_crop640_50e_coco',
106
+ 'retinanet_r50_fpg-chn128_crop640_50e_coco', 'freeanchor_r50_fpn_1x_coco',
107
+ 'freeanchor_r101_fpn_1x_coco', 'freeanchor_x101-32x4d_fpn_1x_coco', 'fsaf_r50_fpn_1x_coco',
108
+ 'fsaf_r101_fpn_1x_coco', 'fsaf_x101-64x4d_fpn_1x_coco', 'mask-rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco',
109
+ 'mask-rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco', 'mask-rcnn_r101-gcb-r16-c3-c5_fpn_1x_coco',
110
+ 'mask-rcnn_r101-gcb-r4-c3-c5_fpn_1x_coco', 'mask-rcnn_r50_fpn_syncbn-backbone_1x_coco',
111
+ 'mask-rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco',
112
+ 'mask-rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco', 'mask-rcnn_r101-syncbn_fpn_1x_coco',
113
+ 'mask-rcnn_r101-syncbn-gcb-r16-c3-c5_fpn_1x_coco',
114
+ 'mask-rcnn_r101-syncbn-gcb-r4-c3-c5_fpn_1x_coco', 'mask-rcnn_x101-32x4d-syncbn_fpn_1x_coco',
115
+ 'mask-rcnn_x101-32x4d-syncbn-gcb-r16-c3-c5_fpn_1x_coco',
116
+ 'mask-rcnn_x101-32x4d-syncbn-gcb-r4-c3-c5_fpn_1x_coco',
117
+ 'cascade-mask-rcnn_x101-32x4d-syncbn_fpn_1x_coco',
118
+ 'cascade-mask-rcnn_x101-32x4d-syncbn-r16-gcb-c3-c5_fpn_1x_coco',
119
+ 'cascade-mask-rcnn_x101-32x4d-syncbn-r4-gcb-c3-c5_fpn_1x_coco',
120
+ 'cascade-mask-rcnn_x101-32x4d-syncbn-dconv-c3-c5_fpn_1x_coco',
121
+ 'cascade-mask-rcnn_x101-32x4d-syncbn-dconv-c3-c5-r16-gcb-c3-c5_fpn_1x_coco',
122
+ 'cascade-mask-rcnn_x101-32x4d-syncbn-dconv-c3-c5-r4-gcb-c3-c5_fpn_1x_coco', 'gfl_r50_fpn_1x_coco',
123
+ 'gfl_r50_fpn_ms-2x_coco', 'gfl_r101_fpn_ms-2x_coco', 'gfl_r101-dconv-c3-c5_fpn_ms-2x_coco',
124
+ 'gfl_x101-32x4d_fpn_ms-2x_coco', 'gfl_x101-32x4d-dconv-c4-c5_fpn_ms-2x_coco',
125
+ 'retinanet_r50_fpn_ghm-1x_coco', 'retinanet_r101_fpn_ghm-1x_coco',
126
+ 'retinanet_x101-32x4d_fpn_ghm-1x_coco', 'retinanet_x101-64x4d_fpn_ghm-1x_coco',
127
+ 'mask-rcnn_r50_fpn_gn-all_2x_coco', 'mask-rcnn_r50_fpn_gn-all_3x_coco',
128
+ 'mask-rcnn_r101_fpn_gn-all_2x_coco', 'mask-rcnn_r101_fpn_gn-all_3x_coco',
129
+ 'mask-rcnn_r50_fpn_gn-all_contrib_2x_coco', 'mask-rcnn_r50_fpn_gn-all_contrib_3x_coco',
130
+ 'faster-rcnn_r50_fpn_gn_ws-all_1x_coco', 'faster-rcnn_r101_fpn_gn-ws-all_1x_coco',
131
+ 'faster-rcnn_x50-32x4d_fpn_gn-ws-all_1x_coco', 'faster-rcnn_x101-32x4d_fpn_gn-ws-all_1x_coco',
132
+ 'mask-rcnn_r50_fpn_gn_ws-all_2x_coco', 'mask-rcnn_r101_fpn_gn-ws-all_2x_coco',
133
+ 'mask-rcnn_x50-32x4d_fpn_gn-ws-all_2x_coco', 'mask-rcnn_x101-32x4d_fpn_gn-ws-all_2x_coco',
134
+ 'mask-rcnn_r50_fpn_gn_ws-all_20_23_24e_coco', 'mask-rcnn_r101_fpn_gn-ws-all_20-23-24e_coco',
135
+ 'mask-rcnn_x50-32x4d_fpn_gn-ws-all_20-23-24e_coco',
136
+ 'mask-rcnn_x101-32x4d_fpn_gn-ws-all_20-23-24e_coco', 'grid-rcnn_r50_fpn_gn-head_2x_coco',
137
+ 'grid-rcnn_r101_fpn_gn-head_2x_coco', 'grid-rcnn_x101-32x4d_fpn_gn-head_2x_coco',
138
+ 'grid-rcnn_x101-64x4d_fpn_gn-head_2x_coco', 'faster-rcnn_r50_fpn_groie_1x_coco',
139
+ 'grid-rcnn_r50_fpn_gn-head-groie_1x_coco', 'mask-rcnn_r50_fpn_groie_1x_coco',
140
+ 'mask-rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco',
141
+ 'mask-rcnn_r101_fpn_syncbn-r4-gcb_c3-c5-groie_1x_coco', 'ga-rpn_r50-caffe_fpn_1x_coco',
142
+ 'ga-rpn_r101-caffe_fpn_1x_coco', 'ga-rpn_x101-32x4d_fpn_1x_coco', 'ga-rpn_x101-64x4d_fpn_1x_coco',
143
+ 'ga-faster-rcnn_r50-caffe_fpn_1x_coco', 'ga-faster-rcnn_r101-caffe_fpn_1x_coco',
144
+ 'ga-faster-rcnn_x101-32x4d_fpn_1x_coco', 'ga-faster-rcnn_x101-64x4d_fpn_1x_coco',
145
+ 'ga-retinanet_r50-caffe_fpn_1x_coco', 'ga-retinanet_r101-caffe_fpn_1x_coco',
146
+ 'ga-retinanet_x101-32x4d_fpn_1x_coco', 'ga-retinanet_x101-64x4d_fpn_1x_coco',
147
+ 'faster-rcnn_hrnetv2p-w18-1x_coco', 'faster-rcnn_hrnetv2p-w18-2x_coco',
148
+ 'faster-rcnn_hrnetv2p-w32-1x_coco', 'faster-rcnn_hrnetv2p-w32_2x_coco',
149
+ 'faster-rcnn_hrnetv2p-w40-1x_coco', 'faster-rcnn_hrnetv2p-w40_2x_coco',
150
+ 'mask-rcnn_hrnetv2p-w18-1x_coco', 'mask-rcnn_hrnetv2p-w18-2x_coco',
151
+ 'mask-rcnn_hrnetv2p-w32-1x_coco', 'mask-rcnn_hrnetv2p-w32-2x_coco',
152
+ 'mask-rcnn_hrnetv2p-w40_1x_coco', 'mask-rcnn_hrnetv2p-w40-2x_coco',
153
+ 'cascade-rcnn_hrnetv2p-w18-20e_coco', 'cascade-rcnn_hrnetv2p-w32-20e_coco',
154
+ 'cascade-rcnn_hrnetv2p-w40-20e_coco', 'cascade-mask-rcnn_hrnetv2p-w18_20e_coco',
155
+ 'cascade-mask-rcnn_hrnetv2p-w32_20e_coco', 'cascade-mask-rcnn_hrnetv2p-w40-20e_coco',
156
+ 'htc_hrnetv2p-w18_20e_coco', 'htc_hrnetv2p-w32_20e_coco', 'htc_hrnetv2p-w40_20e_coco',
157
+ 'fcos_hrnetv2p-w18-gn-head_4xb4-1x_coco', 'fcos_hrnetv2p-w18-gn-head_4xb4-2x_coco',
158
+ 'fcos_hrnetv2p-w32-gn-head_4xb4-1x_coco', 'fcos_hrnetv2p-w32-gn-head_4xb4-2x_coco',
159
+ 'fcos_hrnetv2p-w18-gn-head_ms-640-800-4xb4-2x_coco',
160
+ 'fcos_hrnetv2p-w32-gn-head_ms-640-800-4xb4-2x_coco',
161
+ 'fcos_hrnetv2p-w40-gn-head_ms-640-800-4xb4-2x_coco', 'htc_r50_fpn_1x_coco', 'htc_r50_fpn_20e_coco',
162
+ 'htc_r101_fpn_20e_coco', 'htc_x101-32x4d_fpn_16xb1-20e_coco', 'htc_x101-64x4d_fpn_16xb1-20e_coco',
163
+ 'htc_x101-64x4d-dconv-c3-c5_fpn_ms-400-1400-16xb1-20e_coco',
164
+ 'mask-rcnn_r50_fpn_instaboost_4x_coco', 'mask-rcnn_r101_fpn_instaboost-4x_coco',
165
+ 'mask-rcnn_x101-64x4d_fpn_instaboost-4x_coco', 'cascade-mask-rcnn_r50_fpn_instaboost_4x_coco',
166
+ 'lad_r101-paa-r50_fpn_2xb8_coco_1x', 'lad_r50-paa-r101_fpn_2xb8_coco_1x',
167
+ 'ld_r18-gflv1-r101_fpn_1x_coco', 'ld_r34-gflv1-r101_fpn_1x_coco', 'ld_r50-gflv1-r101_fpn_1x_coco',
168
+ 'ld_r101-gflv1-r101-dcn_fpn_2x_coco', 'libra-faster-rcnn_r50_fpn_1x_coco',
169
+ 'libra-faster-rcnn_r101_fpn_1x_coco', 'libra-faster-rcnn_x101-64x4d_fpn_1x_coco',
170
+ 'libra-retinanet_r50_fpn_1x_coco', 'mask-rcnn_r50_fpn_sample1e-3_ms-2x_lvis-v0.5',
171
+ 'mask-rcnn_r101_fpn_sample1e-3_ms-2x_lvis-v0.5',
172
+ 'mask-rcnn_x101-32x4d_fpn_sample1e-3_ms-2x_lvis-v0.5',
173
+ 'mask-rcnn_x101-64x4d_fpn_sample1e-3_ms-2x_lvis-v0.5',
174
+ 'mask-rcnn_r50_fpn_sample1e-3_ms-1x_lvis-v1', 'mask-rcnn_r101_fpn_sample1e-3_ms-1x_lvis-v1',
175
+ 'mask-rcnn_x101-32x4d_fpn_sample1e-3_ms-1x_lvis-v1',
176
+ 'mask-rcnn_x101-64x4d_fpn_sample1e-3_ms-1x_lvis-v1',
177
+ 'mask2former_swin-s-p4-w7-224_8xb2-lsj-50e_coco-panoptic', 'mask2former_r101_8xb2-lsj-50e_coco',
178
+ 'mask2former_r101_8xb2-lsj-50e_coco-panoptic', 'mask2former_r50_8xb2-lsj-50e_coco-panoptic',
179
+ 'mask2former_swin-t-p4-w7-224_8xb2-lsj-50e_coco-panoptic', 'mask2former_r50_8xb2-lsj-50e_coco',
180
+ 'mask2former_swin-l-p4-w12-384-in21k_16xb1-lsj-100e_coco-panoptic',
181
+ 'mask2former_swin-b-p4-w12-384-in21k_8xb2-lsj-50e_coco-panoptic',
182
+ 'mask2former_swin-b-p4-w12-384_8xb2-lsj-50e_coco-panoptic',
183
+ 'mask2former_swin-t-p4-w7-224_8xb2-lsj-50e_coco', 'mask2former_swin-s-p4-w7-224_8xb2-lsj-50e_coco',
184
+ 'mask-rcnn_r50-caffe_fpn_1x_coco', 'mask-rcnn_r50_fpn_1x_coco', 'mask-rcnn_r50_fpn_fp16_1x_coco',
185
+ 'mask-rcnn_r50_fpn_2x_coco', 'mask-rcnn_r101-caffe_fpn_1x_coco', 'mask-rcnn_r101_fpn_1x_coco',
186
+ 'mask-rcnn_r101_fpn_2x_coco', 'mask-rcnn_x101-32x4d_fpn_1x_coco',
187
+ 'mask-rcnn_x101-32x4d_fpn_2x_coco', 'mask-rcnn_x101-64x4d_fpn_1x_coco',
188
+ 'mask-rcnn_x101-64x4d_fpn_2x_coco', 'mask-rcnn_x101-32x8d_fpn_1x_coco',
189
+ 'mask-rcnn_r50-caffe_fpn_ms-poly-2x_coco', 'mask-rcnn_r50-caffe_fpn_ms-poly-3x_coco',
190
+ 'mask-rcnn_r50_fpn_mstrain-poly_3x_coco', 'mask-rcnn_r101_fpn_ms-poly-3x_coco',
191
+ 'mask-rcnn_r101-caffe_fpn_ms-poly-3x_coco', 'mask-rcnn_x101-32x4d_fpn_ms-poly-3x_coco',
192
+ 'mask-rcnn_x101-32x8d_fpn_ms-poly-1x_coco', 'mask-rcnn_x101-32x8d_fpn_ms-poly-3x_coco',
193
+ 'mask-rcnn_x101-64x4d_fpn_ms-poly_3x_coco', 'maskformer_r50_ms-16xb1-75e_coco',
194
+ 'maskformer_swin-l-p4-w12_64xb1-ms-300e_coco', 'ms-rcnn_r50-caffe_fpn_1x_coco',
195
+ 'ms-rcnn_r50-caffe_fpn_2x_coco', 'ms-rcnn_r101-caffe_fpn_1x_coco',
196
+ 'ms-rcnn_r101-caffe_fpn_2x_coco', 'ms-rcnn_x101-32x4d_fpn_1x_coco',
197
+ 'ms-rcnn_x101-64x4d_fpn_1x_coco', 'ms-rcnn_x101-64x4d_fpn_2x_coco',
198
+ 'nas-fcos_r50-caffe_fpn_nashead-gn-head_4xb4-1x_coco',
199
+ 'nas-fcos_r50-caffe_fpn_fcoshead-gn-head_4xb4-1x_coco', 'retinanet_r50_fpn_crop640-50e_coco',
200
+ 'retinanet_r50_nasfpn_crop640-50e_coco', 'faster-rcnn_r50_fpn_32x2_1x_openimages',
201
+ 'retinanet_r50_fpn_32xb2-1x_openimages', 'ssd300_32xb8-36e_openimages',
202
+ 'faster-rcnn_r50_fpn_32x2_1x_openimages_challenge', 'faster-rcnn_r50_fpn_32x2_cas_1x_openimages',
203
+ 'faster-rcnn_r50_fpn_32x2_cas_1x_openimages_challenge', 'paa_r50_fpn_1x_coco',
204
+ 'paa_r50_fpn_1.5x_coco', 'paa_r50_fpn_2x_coco', 'paa_r50_fpn_mstrain_3x_coco',
205
+ 'paa_r101_fpn_1x_coco', 'paa_r101_fpn_2x_coco', 'paa_r101_fpn_mstrain_3x_coco',
206
+ 'faster-rcnn_r50_pafpn_1x_coco', 'panoptic_fpn_r50_fpn_1x_coco',
207
+ 'panoptic_fpn_r50_fpn_mstrain_3x_coco', 'panoptic_fpn_r101_fpn_1x_coco',
208
+ 'panoptic_fpn_r101_fpn_mstrain_3x_coco', 'retinanet_pvt-t_fpn_1x_coco',
209
+ 'retinanet_pvt-s_fpn_1x_coco', 'retinanet_pvt-m_fpn_1x_coco', 'retinanet_pvtv2-b0_fpn_1x_coco',
210
+ 'retinanet_pvtv2-b1_fpn_1x_coco', 'retinanet_pvtv2-b2_fpn_1x_coco',
211
+ 'retinanet_pvtv2-b3_fpn_1x_coco', 'retinanet_pvtv2-b4_fpn_1x_coco',
212
+ 'retinanet_pvtv2-b5_fpn_1x_coco', 'pisa_faster_rcnn_r50_fpn_1x_coco',
213
+ 'pisa_faster_rcnn_x101_32x4d_fpn_1x_coco', 'pisa_mask_rcnn_r50_fpn_1x_coco',
214
+ 'pisa_retinanet_r50_fpn_1x_coco', 'pisa_retinanet_x101_32x4d_fpn_1x_coco', 'pisa_ssd300_coco',
215
+ 'pisa_ssd512_coco', 'point_rend_r50_caffe_fpn_mstrain_1x_coco',
216
+ 'point_rend_r50_caffe_fpn_mstrain_3x_coco', 'queryinst_r50_fpn_1x_coco',
217
+ 'queryinst_r50_fpn_ms-480-800-3x_coco', 'queryinst_r50_fpn_300-proposals_crop-ms-480-800-3x_coco',
218
+ 'queryinst_r101_fpn_ms-480-800-3x_coco',
219
+ 'queryinst_r101_fpn_300-proposals_crop-ms-480-800-3x_coco', 'mask-rcnn_regnetx-3.2GF_fpn_1x_coco',
220
+ 'mask-rcnn_regnetx-4GF_fpn_1x_coco', 'mask-rcnn_regnetx-6.4GF_fpn_1x_coco',
221
+ 'mask-rcnn_regnetx-8GF_fpn_1x_coco', 'mask-rcnn_regnetx-12GF_fpn_1x_coco',
222
+ 'mask-rcnn_regnetx-3.2GF-mdconv-c3-c5_fpn_1x_coco', 'faster-rcnn_regnetx-3.2GF_fpn_1x_coco',
223
+ 'faster-rcnn_regnetx-3.2GF_fpn_2x_coco', 'retinanet_regnetx-800MF_fpn_1x_coco',
224
+ 'retinanet_regnetx-1.6GF_fpn_1x_coco', 'retinanet_regnetx-3.2GF_fpn_1x_coco',
225
+ 'faster-rcnn_regnetx-400MF_fpn_ms-3x_coco', 'faster-rcnn_regnetx-800MF_fpn_ms-3x_coco',
226
+ 'faster-rcnn_regnetx-1.6GF_fpn_ms-3x_coco', 'faster-rcnn_regnetx-3.2GF_fpn_ms-3x_coco',
227
+ 'faster-rcnn_regnetx-4GF_fpn_ms-3x_coco', 'mask-rcnn_regnetx-3.2GF_fpn_ms-3x_coco',
228
+ 'mask-rcnn_regnetx-400MF_fpn_ms-poly-3x_coco', 'mask-rcnn_regnetx-800MF_fpn_ms-poly-3x_coco',
229
+ 'mask-rcnn_regnetx-1.6GF_fpn_ms-poly-3x_coco', 'mask-rcnn_regnetx-4GF_fpn_ms-poly-3x_coco',
230
+ 'cascade-mask-rcnn_regnetx-400MF_fpn_ms-3x_coco', 'cascade-mask-rcnn_regnetx-800MF_fpn_ms-3x_coco',
231
+ 'cascade-mask-rcnn_regnetx-1.6GF_fpn_ms-3x_coco', 'cascade-mask-rcnn_regnetx-3.2GF_fpn_ms-3x_coco',
232
+ 'cascade-mask-rcnn_regnetx-4GF_fpn_ms-3x_coco', 'reppoints-bbox_r50_fpn-gn_head-gn-grid_1x_coco',
233
+ 'reppoints-bbox_r50-center_fpn-gn_head-gn-grid_1x_coco', 'reppoints-moment_r50_fpn_1x_coco',
234
+ 'reppoints-moment_r50_fpn-gn_head-gn_1x_coco', 'reppoints-moment_r50_fpn-gn_head-gn_2x_coco',
235
+ 'reppoints-moment_r101_fpn-gn_head-gn_2x_coco',
236
+ 'reppoints-moment_r101-dconv-c3-c5_fpn-gn_head-gn_2x_coco',
237
+ 'reppoints-moment_x101-dconv-c3-c5_fpn-gn_head-gn_2x_coco', 'faster-rcnn_res2net-101_fpn_2x_coco',
238
+ 'mask-rcnn_res2net-101_fpn_2x_coco', 'cascade-rcnn_res2net-101_fpn_20e_coco',
239
+ 'cascade-mask-rcnn_res2net-101_fpn_20e_coco', 'htc_res2net-101_fpn_20e_coco',
240
+ 'faster-rcnn_s50_fpn_syncbn-backbone+head_ms-range-1x_coco',
241
+ 'faster-rcnn_s101_fpn_syncbn-backbone+head_ms-range-1x_coco',
242
+ 'mask-rcnn_s50_fpn_syncbn-backbone+head_ms-1x_coco',
243
+ 'mask-rcnn_s101_fpn_syncbn-backbone+head_ms-1x_coco',
244
+ 'cascade-rcnn_s50_fpn_syncbn-backbone+head_ms-range-1x_coco',
245
+ 'cascade-rcnn_s101_fpn_syncbn-backbone+head_ms-range-1x_coco',
246
+ 'cascade-mask-rcnn_s50_fpn_syncbn-backbone+head_ms-1x_coco',
247
+ 'cascade-mask-rcnn_s101_fpn_syncbn-backbone+head_ms-1x_coco',
248
+ 'faster-rcnn_r50_fpn_rsb-pretrain_1x_coco', 'cascade-mask-rcnn_r50_fpn_rsb-pretrain_1x_coco',
249
+ 'retinanet_r50-rsb-pre_fpn_1x_coco', 'mask-rcnn_r50_fpn_rsb-pretrain_1x_coco',
250
+ 'retinanet_r18_fpn_1x_coco', 'retinanet_r18_fpn_1xb8-1x_coco', 'retinanet_r50-caffe_fpn_1x_coco',
251
+ 'retinanet_r50_fpn_1x_coco', 'retinanet_r50_fpn_amp-1x_coco', 'retinanet_r50_fpn_2x_coco',
252
+ 'retinanet_r50_fpn_ms-640-800-3x_coco', 'retinanet_r101-caffe_fpn_1x_coco',
253
+ 'retinanet_r101-caffe_fpn_ms-3x_coco', 'retinanet_r101_fpn_1x_coco', 'retinanet_r101_fpn_2x_coco',
254
+ 'retinanet_r101_fpn_ms-640-800-3x_coco', 'retinanet_x101-32x4d_fpn_1x_coco',
255
+ 'retinanet_x101-32x4d_fpn_2x_coco', 'retinanet_x101-64x4d_fpn_1x_coco',
256
+ 'retinanet_x101-64x4d_fpn_2x_coco', 'retinanet_x101-64x4d_fpn_ms-640-800-3x_coco',
257
+ 'rpn_r50-caffe_fpn_1x_coco', 'rpn_r50_fpn_1x_coco', 'rpn_r50_fpn_2x_coco',
258
+ 'rpn_r101-caffe_fpn_1x_coco', 'rpn_x101-32x4d_fpn_1x_coco', 'rpn_x101-32x4d_fpn_2x_coco',
259
+ 'rpn_x101-64x4d_fpn_1x_coco', 'rpn_x101-64x4d_fpn_2x_coco', 'rtmdet_tiny_8xb32-300e_coco',
260
+ 'rtmdet_s_8xb32-300e_coco', 'rtmdet_m_8xb32-300e_coco', 'rtmdet_l_8xb32-300e_coco',
261
+ 'rtmdet_x_8xb32-300e_coco', 'rtmdet-ins_tiny_8xb32-300e_coco', 'rtmdet-ins_s_8xb32-300e_coco',
262
+ 'rtmdet-ins_m_8xb32-300e_coco', 'rtmdet-ins_l_8xb32-300e_coco', 'rtmdet-ins_x_8xb16-300e_coco',
263
+ 'sabl-faster-rcnn_r50_fpn_1x_coco', 'sabl-faster-rcnn_r101_fpn_1x_coco',
264
+ 'sabl-cascade-rcnn_r50_fpn_1x_coco', 'sabl-cascade-rcnn_r101_fpn_1x_coco',
265
+ 'sabl-retinanet_r50_fpn_1x_coco', 'sabl-retinanet_r50-gn_fpn_1x_coco',
266
+ 'sabl-retinanet_r101_fpn_1x_coco', 'sabl-retinanet_r101-gn_fpn_1x_coco',
267
+ 'sabl-retinanet_r101-gn_fpn_ms-640-800-2x_coco', 'sabl-retinanet_r101-gn_fpn_ms-480-960-2x_coco',
268
+ 'scnet_r50_fpn_1x_coco', 'scnet_r50_fpn_20e_coco', 'scnet_r101_fpn_20e_coco',
269
+ 'scnet_x101-64x4d_fpn_20e_coco', 'faster-rcnn_r50_fpn_gn-all_scratch_6x_coco',
270
+ 'mask-rcnn_r50_fpn_gn-all_scratch_6x_coco',
271
+ 'mask-rcnn_r50_fpn_random_seesaw_loss_mstrain_2x_lvis_v1',
272
+ 'mask-rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1',
273
+ 'mask-rcnn_r101_fpn_seesaw-loss_random-ms-2x_lvis-v1',
274
+ 'mask-rcnn_r101_fpn_seesaw-loss-normed-mask_random-ms-2x_lvis-v1',
275
+ 'mask-rcnn_r50_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1',
276
+ 'mask-rcnn_r50_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1',
277
+ 'mask-rcnn_r101_fpn_seesaw-loss_sample1e-3-ms-2x_lvis-v1',
278
+ 'mask-rcnn_r101_fpn_seesaw-loss-normed-mask_sample1e-3-ms-2x_lvis-v1',
279
+ 'cascade-mask-rcnn_r101_fpn_seesaw-loss_random-ms-2x_lvis-v1',
280
+ 'cascade-mask-rcnn_r101_fpn_seesaw-loss-normed-mask_random-ms-2x_lvis-v1',
281
+ 'cascade-mask-rcnn_r101_fpn_seesaw-loss_sample1e-3-ms-2x_lvis-v1',
282
+ 'cascade-mask-rcnn_r101_fpn_seesaw-loss-normed-mask_sample1e-3-ms-2x_lvis-v1',
283
+ 'mask-rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco',
284
+ 'mask-rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco',
285
+ 'mask-rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco',
286
+ 'mask-rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco',
287
+ 'soft-teacher_faster-rcnn_r50-caffe_fpn_180k_semi-0.01-coco.py',
288
+ 'soft-teacher_faster-rcnn_r50-caffe_fpn_180k_semi-0.02-coco.py',
289
+ 'soft-teacher_faster-rcnn_r50-caffe_fpn_180k_semi-0.05-coco.py',
290
+ 'soft-teacher_faster-rcnn_r50-caffe_fpn_180k_semi-0.1-coco.py', 'sparse-rcnn_r50_fpn_1x_coco',
291
+ 'sparse-rcnn_r50_fpn_ms-480-800-3x_coco',
292
+ 'sparse-rcnn_r50_fpn_300-proposals_crop-ms-480-800-3x_coco',
293
+ 'sparse-rcnn_r101_fpn_ms-480-800-3x_coco',
294
+ 'sparse-rcnn_r101_fpn_300-proposals_crop-ms-480-800-3x_coco', 'decoupled-solo_r50_fpn_1x_coco',
295
+ 'decoupled-solo_r50_fpn_3x_coco', 'decoupled-solo-light_r50_fpn_3x_coco', 'solo_r50_fpn_3x_coco',
296
+ 'solo_r50_fpn_1x_coco', 'solov2_r50_fpn_1x_coco', 'solov2_r50_fpn_ms-3x_coco',
297
+ 'solov2_r101-dcn_fpn_ms-3x_coco', 'solov2_x101-dcn_fpn_ms-3x_coco',
298
+ 'solov2-light_r18_fpn_ms-3x_coco', 'solov2-light_r50_fpn_ms-3x_coco', 'ssd300_coco', 'ssd512_coco',
299
+ 'ssdlite_mobilenetv2-scratch_8xb24-600e_coco',
300
+ 'mask-rcnn_r50-caffe_fpn_rpn-2conv_4conv1fc_syncbn-all_lsj-100e_coco',
301
+ 'mask-rcnn_swin-s-p4-w7_fpn_amp-ms-crop-3x_coco', 'mask-rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco',
302
+ 'mask-rcnn_swin-t-p4-w7_fpn_1x_coco', 'mask-rcnn_swin-t-p4-w7_fpn_amp-ms-crop-3x_coco',
303
+ 'tridentnet_r50-caffe_1x_coco', 'tridentnet_r50-caffe_ms-1x_coco',
304
+ 'tridentnet_r50-caffe_ms-3x_coco', 'tood_r101_fpn_ms-2x_coco', 'tood_x101-64x4d_fpn_ms-2x_coco',
305
+ 'tood_r101-dconv-c3-c5_fpn_ms-2x_coco', 'tood_r50_fpn_anchor-based_1x_coco',
306
+ 'tood_r50_fpn_1x_coco', 'tood_r50_fpn_ms-2x_coco', 'vfnet_r50_fpn_1x_coco',
307
+ 'vfnet_r50_fpn_ms-2x_coco', 'vfnet_r50-mdconv-c3-c5_fpn_ms-2x_coco', 'vfnet_r101_fpn_1x_coco',
308
+ 'vfnet_r101_fpn_ms-2x_coco', 'vfnet_r101-mdconv-c3-c5_fpn_ms-2x_coco',
309
+ 'vfnet_x101-32x4d-mdconv-c3-c5_fpn_ms-2x_coco', 'vfnet_x101-64x4d-mdconv-c3-c5_fpn_ms-2x_coco',
310
+ 'yolact_r50_1x8_coco', 'yolact_r50_8x8_coco', 'yolact_r101_1x8_coco', 'yolov3_d53_320_273e_coco',
311
+ 'yolov3_d53_mstrain-416_273e_coco', 'yolov3_d53_mstrain-608_273e_coco',
312
+ 'yolov3_d53_fp16_mstrain-608_273e_coco', 'yolov3_mobilenetv2_8xb24-320-300e_coco',
313
+ 'yolov3_mobilenetv2_8xb24-ms-416-300e_coco', 'yolof_r50_c5_8x8_1x_coco', 'yolox_s_8x8_300e_coco',
314
+ 'yolox_l_8x8_300e_coco', 'yolox_x_8x8_300e_coco', 'yolox_tiny_8x8_300e_coco',
315
+ 'bytetrack_yolox_x_8xb4-amp-80e_crowdhuman-mot17halftrain_test-mot17halfval',
316
+ 'bytetrack_yolox_x_8xb4-amp-80e_crowdhuman-mot17halftrain_test-mot17test',
317
+ 'bytetrack_yolox_x_8xb4-amp-80e_crowdhuman-mot20train_test-mot20test',
318
+ 'strongsort_yolox_x_8xb4-80e_crowdhuman-mot17halftrain_test-mot17halfval',
319
+ 'strongsort_yolox_x_8xb4-80e_crowdhuman-mot20train_test-mot20test',
320
+ 'ocsort_yolox_x_crowdhuman_mot17-private-half',
321
+ 'sort_faster-rcnn_r50_fpn_8xb2-4e_mot17halftrain_test-mot17halfval',
322
+ 'deepsort_faster-rcnn_r50_fpn_8xb2-4e_mot17halftrain_test-mot17halfval',
323
+ 'qdtrack_faster-rcnn_r50_fpn_8xb2-4e_mot17halftrain_test-mot17halfval',
324
+ 'mask2former_r50_8xb2-8e_youtubevis2021', 'mask2former_r101_8xb2-8e_youtubevis2021',
325
+ 'mask2former_swin-l-p4-w12-384-in21k_8xb2-8e_youtubevis2021.py',
326
+ 'masktrack-rcnn_mask-rcnn_r50_fpn_8xb1-12e_youtubevis2019',
327
+ 'masktrack-rcnn_mask-rcnn_r101_fpn_8xb1-12e_youtubevis2019',
328
+ 'masktrack-rcnn_mask-rcnn_x101_fpn_8xb1-12e_youtubevis2019',
329
+ 'masktrack-rcnn_mask-rcnn_r50_fpn_8xb1-12e_youtubevis2021',
330
+ 'masktrack-rcnn_mask-rcnn_r101_fpn_8xb1-12e_youtubevis2021',
331
+ 'masktrack-rcnn_mask-rcnn_x101_fpn_8xb1-12e_youtubevis2021',
332
+ 'glip_atss_swin-t_a_fpn_dyhead_pretrain_obj365', 'glip_atss_swin-t_b_fpn_dyhead_pretrain_obj365',
333
+ 'glip_atss_swin-t_c_fpn_dyhead_pretrain_obj365-goldg',
334
+ 'glip_atss_swin-t_fpn_dyhead_pretrain_obj365-goldg-cc3m-sub',
335
+ 'glip_atss_swin-l_fpn_dyhead_pretrain_mixeddata']
336
+
337
+ def download_test_image():
338
+ # Images
339
+ torch.hub.download_url_to_file(
340
+ 'https://user-images.githubusercontent.com/59380685/266264420-21575a83-4057-41cf-8a4a-b3ea6f332d79.jpg',
341
+ 'bus.jpg')
342
+ torch.hub.download_url_to_file(
343
+ 'https://user-images.githubusercontent.com/59380685/266264536-82afdf58-6b9a-4568-b9df-551ee72cb6d9.jpg',
344
+ 'dogs.jpg')
345
+ torch.hub.download_url_to_file(
346
+ 'https://user-images.githubusercontent.com/59380685/266264600-9d0c26ca-8ba6-45f2-b53b-4dc98460c43e.jpg',
347
+ 'zidane.jpg')
348
+
349
+ def parse_args():
350
+ parser = ArgumentParser()
351
+ parser.add_argument(
352
+ '--inputs', type=str, help='Input image file or folder path.')
353
+ gr.inputs.Dropdown(choices=[m for m in mmdet_list], label='Model',
354
+ default='rtmdet_tiny_8xb32-300e_coco'),
355
+ parser.add_argument(
356
+ '--model',
357
+ type=str,
358
+ help='Config or checkpoint .pth file or the model name '
359
+ 'and alias defined in metafile. The model configuration '
360
+ 'file will try to read from .pth if the parameter is '
361
+ 'a .pth weights file.')
362
+ parser.add_argument('--weights', default=None, help='Checkpoint file')
363
+ parser.add_argument(
364
+ '--out-dir',
365
+ type=str,
366
+ default='outputs',
367
+ help='Output directory of images or prediction results.')
368
+ parser.add_argument('--texts', help='text prompt', default=None)
369
+ parser.add_argument(
370
+ '--device', default='cuda:0', help='Device used for inference')
371
+ parser.add_argument(
372
+ '--pred-score-thr',
373
+ type=float,
374
+ default=0.3,
375
+ help='bbox score threshold')
376
+ parser.add_argument(
377
+ '--batch-size', type=int, default=1, help='Inference batch size.')
378
+ parser.add_argument(
379
+ '--show',
380
+ action='store_true',
381
+ help='Display the image in a popup window.')
382
+ parser.add_argument(
383
+ '--no-save-vis',
384
+ action='store_true',
385
+ help='Do not save detection vis results')
386
+ parser.add_argument(
387
+ '--no-save-pred',
388
+ action='store_true',
389
+ help='Do not save detection json results')
390
+ parser.add_argument(
391
+ '--print-result',
392
+ action='store_true',
393
+ help='Whether to print the results.')
394
+ parser.add_argument(
395
+ '--palette',
396
+ default='none',
397
+ choices=['coco', 'voc', 'citys', 'random', 'none'],
398
+ help='Color palette used for visualization')
399
+ # only for GLIP
400
+ parser.add_argument(
401
+ '--custom-entities',
402
+ '-c',
403
+ action='store_true',
404
+ help='Whether to customize entity names? '
405
+ 'If so, the input text should be '
406
+ '"cls_name1 . cls_name2 . cls_name3 ." format')
407
+
408
+ call_args = vars(parser.parse_args())
409
+ return call_args
410
+
411
+
412
+ def clear_folder(folder_path):
413
+ import shutil
414
+ for filename in os.listdir(folder_path):
415
+ file_path = os.path.join(folder_path, filename)
416
+ try:
417
+ if os.path.isfile(file_path) or os.path.islink(file_path):
418
+ os.unlink(file_path)
419
+ elif os.path.isdir(file_path):
420
+ shutil.rmtree(file_path)
421
+ except Exception as e:
422
+ print(f"Failed to delete {file_path}. Reason: {e}")
423
+ print(f"Clear {folder_path} successfully.")
424
+
425
+
426
+ def download_cfg_checkpoint_model_name(model_name):
427
+ clear_folder("./checkpoint")
428
+ download(package='mmdet',
429
+ configs=[model_name],
430
+ dest_root='./checkpoint')
431
+
432
+ def save_image(img, img_path):
433
+ # Convert PIL image to OpenCV image
434
+ img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
435
+ # Save OpenCV image
436
+ cv2.imwrite(img_path, img)
437
+
438
+ def detect_objects(img_path, model, weights, out_dir, texts, device, pred_score_thr, batch_size, show, no_save_vis,
439
+ no_save_pred, print_result, palette, custom_entities):
440
+ call_args = parse_args()
441
+ call_args['model'] = model
442
+ call_args['weights'] = weights
443
+ call_args['inputs'] = img_path
444
+ call_args['device'] = device
445
+ call_args['out_dir'] = out_dir
446
+ call_args['texts'] = texts
447
+ call_args['pred_score_thr'] = float(pred_score_thr)
448
+ call_args['batch_size'] = int(batch_size)
449
+ call_args['show'] = show
450
+ call_args['no_save_vis'] = no_save_vis
451
+ call_args['no_save_pred'] = no_save_pred
452
+ call_args['print_result'] = print_result
453
+ call_args['palette'] = palette
454
+ call_args['custom_entities'] = custom_entities
455
+
456
+ if call_args['no_save_vis'] and call_args['no_save_pred']:
457
+ call_args['out_dir'] = ''
458
+
459
+ if call_args['model'].endswith('.pth'):
460
+ print_log('The model is a weight file, automatically '
461
+ 'assign the model to --weights')
462
+ call_args['weights'] = call_args['model']
463
+ call_args['model'] = None
464
+
465
+ init_kws = ['model', 'weights', 'device', 'palette']
466
+ init_args = {}
467
+ for init_kw in init_kws:
468
+ init_args[init_kw] = call_args.pop(init_kw)
469
+
470
+ return init_args, call_args
471
+
472
+
473
+ def main(inputs, model_name, out_dir, texts, device, pred_score_thr, batch_size, show, no_save_vis, no_save_pred,
474
+ print_result, palette, custom_entities):
475
+ download_cfg_checkpoint_model_name(model_name)
476
+ img_path = "input_img.jpg"
477
+ save_image(inputs,img_path)
478
+ # inputs.save("input_img.jpg")
479
+ path = "./checkpoint"
480
+ model = [f for f in os.listdir(path) if fnmatch.fnmatch(f, model_name + "*.py")][0]
481
+ model = path + "/" + model
482
+
483
+ weights = [f for f in os.listdir(path) if fnmatch.fnmatch(f, model_name + "*.pth")][0]
484
+ weights = path + "/" + weights
485
+ init_args, call_args = detect_objects(img_path, model, weights, out_dir, texts, device, pred_score_thr, batch_size,
486
+ show, no_save_vis, no_save_pred,
487
+ print_result, palette, custom_entities)
488
+
489
+ # TODO: Video and Webcam are currently not supported and
490
+ # may consume too much memory if your input folder has a lot of images.
491
+ # We will be optimized later.
492
+ inferencer = DetInferencer(**init_args)
493
+ inferencer(**call_args)
494
+
495
+ if call_args['out_dir'] != '' and not (call_args['no_save_vis']
496
+ and call_args['no_save_pred']):
497
+ print_log(f'results have been saved at {call_args["out_dir"]}')
498
+ save_dir = './outputs/vis/'
499
+ img_out = PIL.Image.open(os.path.join(save_dir, img_path))
500
+ return img_out
501
+
502
+
503
+ if __name__ == '__main__':
504
+ download_test_image()
505
+ examples = [
506
+ ['bus.jpg', 'rtmdet_tiny_8xb32-300e_coco', './outputs', '', "cpu"],
507
+ ['dogs.jpg', 'mask-rcnn_r50_fpn_albu-1x_coco', './outputs', '', "cpu"],
508
+ ['zidane.jpg', 'yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco', './outputs', '', "cpu"]
509
+ ]
510
+
511
+ title = "MMDetection detection web demo"
512
+ description = "<div align='center'><img src='https://raw.githubusercontent.com/open-mmlab/mmdetection/main/resources/mmdet-logo.png' width='450''/><div>" \
513
+ "<p style='text-align: center'><a href='https://github.com/open-mmlab/mmdetection'>MMDetection</a> 是一个开源的物体检测工具箱,提供了丰富的检测模型和数据增强方式。" \
514
+ "OpenMMLab Detection Toolbox and Benchmark.</p>"
515
+ article = "<p style='text-align: center'><a href='https://github.com/open-mmlab/mmdetection'>MMDetection</a></p>" \
516
+ "<p style='text-align: center'><a href='https://github.com/isLinXu'>gradio build by gatilin</a></a></p>"
517
+
518
+ iface = gr.Interface(
519
+ fn=main,
520
+ inputs=[
521
+ gr.inputs.Image(type="pil", label="input"),
522
+ gr.inputs.Dropdown(choices=[m for m in mmdet_list], label='Model',
523
+ default='rtmdet_tiny_8xb32-300e_coco'),
524
+ gr.inputs.Textbox(label="out_dir", default="./outputs/"),
525
+ gr.inputs.Textbox(label="texts", default=''),
526
+ gr.inputs.Textbox(label="device", default="cpu"),
527
+ gr.inputs.Slider(label="pred_score_thr", minimum=0.0, maximum=1.0, step=0.1, default=0.3),
528
+ gr.inputs.Number(label="batch_size", default=1),
529
+ gr.inputs.Checkbox(label="show"),
530
+ gr.inputs.Checkbox(label="no_save_vis"),
531
+ gr.inputs.Checkbox(label="no_save_pred"),
532
+ gr.inputs.Checkbox(label="print_result"),
533
+ gr.inputs.Radio(label="palette", choices=["coco", "voc", "citys", "random", "none"]),
534
+ gr.inputs.Checkbox(label="custom_entities")
535
+ ],
536
+ outputs=gr.outputs.Image(type="pil"),
537
+ examples=examples,
538
+ title=title,
539
+ description=description, article=article, allow_flagging=False
540
+ )
541
+
542
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wget~=3.2
2
+ opencv-python~=4.6.0.66
3
+ numpy~=1.23.0
4
+ torch~=1.13.1
5
+ torchvision~=0.14.1
6
+ pillow~=9.4.0
7
+ gradio~=3.42.0
8
+ ultralytics~=8.0.169
9
+ pyyaml~=6.0
10
+ wandb~=0.13.11
11
+ tqdm~=4.65.0
12
+ matplotlib~=3.7.1
13
+ pandas~=2.0.0
14
+ seaborn~=0.12.2
15
+ requests~=2.31.0
16
+ psutil~=5.9.4
17
+ thop~=0.1.1-2209072238
18
+ timm~=0.9.2
19
+ super-gradients~=3.2.0
20
+ openmim