Skip to content

Commit 67f703b

Browse files
authored
release dino-eva 1280 checkpoint (#338)
1 parent 5d19a5c commit 67f703b

File tree

2 files changed

+64
-0
lines changed

2 files changed

+64
-0
lines changed

projects/dino_eva/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Here's the model card for `dino-eva` models, all the pretrained weights can be d
3737
| `dino-eva-02-B` | `eva02_B_pt_in21k_p14to16` | `1024x1024` | 12 | 55.8 | [config](./configs/dino-eva-02/dino_eva_02_vitdet_b_4attn_1024_lrd0p7_4scale_12ep.py) | [Huggingface](https://huggingface.co/IDEA-CVR/DINO-EVA/resolve/main/dino_eva_02_in21k_pretrain_vitdet_b_4attn_1024_lrd0p7_4scale_12ep.pth) |
3838
| `dino-eva-02-B` | `eva02_B_pt_in21k_p14to16` | `1536x1536` | 12 | 58.1 | [config](./configs/dino-eva-02/dino_eva_02_vitdet_b_6attn_win32_1536_lrd0p7_4scale_12ep.py) | [Huggingface](https://huggingface.co/IDEA-CVR/DINO-EVA/resolve/main/dino_eva_02_in21k_pretrain_vitdet_b_6attn_win32_1536_lrd0p7_4scale_12ep.pth) |
3939
| `dino-eva-02-L` | `eva02_L_pt_m38m_p14to16` | `1024x1024` | 12 | 58.9 | [config](./configs/dino-eva-02/dino_eva_02_vitdet_l_4attn_1024_lrd0p8_4scale_12ep.py) | [Huggingface](https://huggingface.co/IDEA-CVR/DINO-EVA/resolve/main/dino_eva_02_m38m_pretrain_vitdet_l_4attn_1024_lrd0p8_4scale_12ep.pth) |
40+
| `dino-eva-02-L` | `eva02_L_pt_m38m_p14to16` | `1280x1280` | 12 | 59.8 | [config](./configs/dino-eva-02/dino_eva_02_vitdet_l_4attn_1280_lrd0p8_4scale_12ep.py) | [Huggingface](https://huggingface.co/IDEA-CVR/DINO-EVA/resolve/main/dino_eva_02_m38m_pretrain_vitdet_l_4attn_1280_lrd0p8_4scale_12ep.pth) |
4041
| `dino-eva-02-L` | `eva02_L_m38m_to_o365` | `1536x1536` | 12 | 61.6 | [config](./configs/dino-eva-02/dino_eva_02_vitdet_l_8attn_1536_lrd0p8_4scale_12ep.py) | [Huggingface](https://huggingface.co/IDEA-CVR/DINO-EVA/resolve/main/dino_eva_02_o365_backbone_finetune_vitdet_l_8attn_lsj_1536_4scale_12ep.pth) |
4142

4243
</div>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
from functools import partial
2+
from detrex.config import get_config
3+
from detrex.modeling.backbone.eva import get_vit_lr_decay_rate
4+
5+
from ..models.dino_eva_02 import model
6+
from ..common.coco_loader_lsj_1280 import dataloader
7+
8+
# get default config
9+
optimizer = get_config("common/optim.py").AdamW
10+
lr_multiplier = get_config("common/coco_schedule.py").lr_multiplier_12ep
11+
train = get_config("common/train.py").train
12+
13+
14+
# modify model config
15+
model.backbone.net.img_size = 1280
16+
model.backbone.square_pad = 1280
17+
model.backbone.net.patch_size = 16
18+
model.backbone.net.window_size = 16
19+
model.backbone.net.embed_dim = 1024
20+
model.backbone.net.depth = 24
21+
model.backbone.net.num_heads = 16
22+
model.backbone.net.mlp_ratio = 4*2/3
23+
model.backbone.net.use_act_checkpoint = True
24+
model.backbone.net.drop_path_rate = 0.4
25+
26+
# 5, 11, 17, 23 for global attention
27+
model.backbone.net.window_block_indexes = (
28+
list(range(0, 5)) + list(range(6, 11)) + list(range(12, 17)) + list(range(18, 23))
29+
)
30+
31+
# modify training config
32+
train.init_checkpoint = "/path/to/eva02_L_pt_m38m_p14to16.pt"
33+
train.output_dir = "./output/dino_eva_02_vitdet_l_4attn_1024_lrd0p8_4scale_12ep"
34+
35+
# max training iterations
36+
train.max_iter = 90000
37+
38+
39+
# gradient clipping for training
40+
train.clip_grad.enabled = True
41+
train.clip_grad.params.max_norm = 0.1
42+
train.clip_grad.params.norm_type = 2
43+
44+
# set training devices
45+
train.device = "cuda"
46+
model.device = train.device
47+
48+
# modify optimizer config
49+
optimizer.lr = 1e-4
50+
optimizer.betas = (0.9, 0.999)
51+
optimizer.weight_decay = 1e-4
52+
optimizer.params.lr_factor_func = partial(get_vit_lr_decay_rate, lr_decay_rate=0.8, num_layers=24)
53+
optimizer.params.overrides = {}
54+
optimizer.params.weight_decay_norm = None
55+
56+
# modify dataloader config
57+
dataloader.train.num_workers = 16
58+
59+
# please notice that this is total batch size.
60+
# surpose you're using 4 gpus for training and the batch size for
61+
# each gpu is 16/4 = 4
62+
dataloader.train.total_batch_size = 16
63+

0 commit comments

Comments
 (0)