| dataset_kwargs: | |
| window_size: 512 | |
| window_stride: 384 | |
| debug: false | |
| inference_kwargs: | |
| im_size: 1024 | |
| window_size: 512 | |
| window_stride: 128 | |
| net_kwargs: | |
| backbone: dino_vits16 | |
| d_model: 384 | |
| decoder: | |
| drop_path_rate: 0.0 | |
| dropout: 0.1 | |
| n_cls: 31 | |
| n_layers: 1 | |
| name: mask_transformer | |
| distilled: false | |
| drop_path_rate: 0.1 | |
| dropout: 0.0 | |
| image_size: !!python/tuple | |
| - 512 | |
| - 512 | |
| n_cls: 31 | |
| n_heads: 3 | |
| n_layers: 12 | |
| normalization: deit | |
| patch_size: 16 | |
| val_dataset_kwargs: | |
| batch_size: 1 | |
| crop_size: 512 | |
| dataset: cityscapes | |
| image_size: 1024 | |
| normalization: deit | |
| num_workers: 10 | |
| split: val | |