Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[New configs]Segformer_ade20k #3537

Open
wants to merge 2 commits into
base: dev-1.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
77 changes: 77 additions & 0 deletions mmseg/configs/_base_/datasets/ade20k.py
@@ -0,0 +1,77 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.transforms import RandomFlip, RandomResize, Resize, TestTimeAug
from mmcv.transforms.loading import LoadImageFromFile
from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler

from mmseg.datasets.ade import ADE20KDataset
from mmseg.datasets.transforms import (LoadAnnotations, PackSegInputs,
PhotoMetricDistortion, RandomCrop)
from mmseg.evaluation import IoUMetric

# dataset settings
dataset_type = ADE20KDataset
data_root = 'data/ade/ADEChallengeData2016'
crop_size = (512, 512)
train_pipeline = [
dict(type=LoadImageFromFile),
dict(type=LoadAnnotations),
dict(
type=RandomResize,
scale=(2048, 512),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type=RandomCrop, crop_size=crop_size, cat_max_ratio=0.75),
dict(type=RandomFlip, prob=0.5),
dict(type=PhotoMetricDistortion),
dict(type=PackSegInputs)
]
test_pipeline = [
dict(type=LoadImageFromFile),
dict(type=Resize, scale=(2048, 512), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type=LoadAnnotations),
dict(type=PackSegInputs)
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type=LoadImageFromFile, backend_args=None),
dict(
type=TestTimeAug,
transforms=[[
dict(type=Resize, scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type=RandomFlip, prob=0., direction='horizontal'),
dict(type=RandomFlip, prob=1., direction='horizontal')
], [dict(type=LoadAnnotations)],
[dict(type=PackSegInputs)]])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type=InfiniteSampler, shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/training', seg_map_path='annotations/training'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/validation',
seg_map_path='annotations/validation'),
pipeline=test_pipeline))
test_dataloader = val_dataloader

val_evaluator = dict(type=IoUMetric, iou_metrics=['mIoU'])
test_evaluator = val_evaluator
77 changes: 77 additions & 0 deletions mmseg/configs/_base_/datasets/ade20k_640x640.py
@@ -0,0 +1,77 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.transforms import RandomFlip, RandomResize, Resize, TestTimeAug
from mmcv.transforms.loading import LoadImageFromFile
from mmengine.dataset.sampler import DefaultSampler, InfiniteSampler

from mmseg.datasets.ade import ADE20KDataset
from mmseg.datasets.transforms import (LoadAnnotations, PackSegInputs,
PhotoMetricDistortion, RandomCrop)
from mmseg.evaluation import IoUMetric

# dataset settings
dataset_type = ADE20KDataset
data_root = 'data/ade/ADEChallengeData2016'
crop_size = (640, 640)
train_pipeline = [
dict(type=LoadImageFromFile),
dict(type=LoadAnnotations),
dict(
type=RandomResize,
scale=(2560, 640),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type=RandomCrop, crop_size=crop_size, cat_max_ratio=0.75),
dict(type=RandomFlip, prob=0.5),
dict(type=PhotoMetricDistortion),
dict(type=PackSegInputs)
]
test_pipeline = [
dict(type=LoadImageFromFile),
dict(type=Resize, scale=(2560, 640), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type=LoadAnnotations),
dict(type=PackSegInputs)
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type=LoadImageFromFile, backend_args=None),
dict(
type=TestTimeAug,
transforms=[[
dict(type=Resize, scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type=RandomFlip, prob=0., direction='horizontal'),
dict(type=RandomFlip, prob=1., direction='horizontal')
], [dict(type=LoadAnnotations)],
[dict(type=PackSegInputs)]])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type=InfiniteSampler, shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/training', seg_map_path='annotations/training'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/validation',
seg_map_path='annotations/validation'),
pipeline=test_pipeline))
test_dataloader = val_dataloader

val_evaluator = dict(type=IoUMetric, iou_metrics=['mIoU'])
test_evaluator = val_evaluator
24 changes: 24 additions & 0 deletions mmseg/configs/_base_/default_runtime.py
@@ -0,0 +1,24 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.runner import LogProcessor
from mmengine.visualization import LocalVisBackend

from mmseg.models.segmentors import SegTTAModel
from mmseg.visualization import SegLocalVisualizer

default_scope = None
env_cfg = dict(
cudnn_benchmark=False,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'),
)

vis_backends = [dict(type=LocalVisBackend)]
visualizer = dict(
type=SegLocalVisualizer, vis_backends=vis_backends, name='visualizer')
log_processor = dict(type=LogProcessor, window_size=50, by_epoch=False)

log_level = 'INFO'
load_from = None
resume = False

tta_model = dict(type=SegTTAModel)
49 changes: 49 additions & 0 deletions mmseg/configs/_base_/models/segformer_mit_b0.py
@@ -0,0 +1,49 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmseg.models.backbones import MixVisionTransformer
from mmseg.models.data_preprocessor import SegDataPreProcessor
from mmseg.models.decode_heads import SegformerHead
from mmseg.models.losses import CrossEntropyLoss
from mmseg.models.segmentors import EncoderDecoder

# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type=SegDataPreProcessor,
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type=EncoderDecoder,
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type=MixVisionTransformer,
in_channels=3,
embed_dims=32,
num_stages=4,
num_layers=[2, 2, 2, 2],
num_heads=[1, 2, 5, 8],
patch_sizes=[7, 3, 3, 3],
sr_ratios=[8, 4, 2, 1],
out_indices=(0, 1, 2, 3),
mlp_ratio=4,
qkv_bias=True,
drop_rate=0.0,
attn_drop_rate=0.0,
drop_path_rate=0.1),
decode_head=dict(
type=SegformerHead,
in_channels=[32, 64, 160, 256],
in_index=[0, 1, 2, 3],
channels=256,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type=CrossEntropyLoss, use_sigmoid=False, loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))
35 changes: 35 additions & 0 deletions mmseg/configs/_base_/schedules/schedule_160k.py
@@ -0,0 +1,35 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook,
LoggerHook, ParamSchedulerHook)
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import PolyLR
from mmengine.runner.loops import IterBasedTrainLoop, TestLoop, ValLoop
from torch.optim.sgd import SGD

from mmseg.engine.hooks import SegVisualizationHook

# optimizer
optimizer = dict(type=SGD, lr=0.01, momentum=0.9, weight_decay=0.0005)
optim_wrapper = dict(type=OptimWrapper, optimizer=optimizer, clip_grad=None)
# learning policy
param_scheduler = [
dict(
type=PolyLR,
eta_min=1e-4,
power=0.9,
begin=0,
end=160000,
by_epoch=False)
]
# training schedule for 160k
train_cfg = dict(type=IterBasedTrainLoop, max_iters=160000, val_interval=16000)
val_cfg = dict(type=ValLoop)
test_cfg = dict(type=TestLoop)

default_hooks = dict(
timer=dict(type=IterTimerHook),
logger=dict(type=LoggerHook, interval=50, log_metric_by_epoch=False),
param_scheduler=dict(type=ParamSchedulerHook),
checkpoint=dict(type=CheckpointHook, by_epoch=False, interval=16000),
sampler_seed=dict(type=DistSamplerSeedHook),
visualization=dict(type=SegVisualizationHook))
@@ -0,0 +1,47 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base

with read_base():
from .._base_.models.segformer_mit_b0 import * # noqa: F401,F403
from .._base_.datasets.ade20k import * # noqa: F401,F403
from .._base_.schedules.schedule_160k import * # noqa: F401,F403
from .._base_.default_runtime import * # noqa: F401,F403

from mmengine.model.weight_init import PretrainedInit
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import LinearLR, PolyLR
from torch.optim.adamw import AdamW

crop_size = (512, 512)
data_preprocessor.update(size=crop_size) # noqa: F405
checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth' # noqa
model.update( # noqa: F405
data_preprocessor=data_preprocessor, # noqa: F405
backbone=dict(init_cfg=dict(type=PretrainedInit, checkpoint=checkpoint)),
decode_head=dict(num_classes=150))

optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(
type=AdamW, lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01),
paramwise_cfg=dict(
custom_keys={
'pos_block': dict(decay_mult=0.),
'norm': dict(decay_mult=0.),
'head': dict(lr_mult=10.)
}))

param_scheduler = [
dict(type=LinearLR, start_factor=1e-6, by_epoch=False, begin=0, end=1500),
dict(
type=PolyLR,
eta_min=0.0,
power=1.0,
begin=1500,
end=160000,
by_epoch=False,
)
]
train_dataloader.update(batch_size=2, num_workers=2) # noqa: F405
val_dataloader.update(batch_size=1, num_workers=4) # noqa: F405
test_dataloader = val_dataloader # noqa: F405
@@ -0,0 +1,16 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base

with read_base():
from .segformer_mit_b0_8xb2_160k_ade20k_512x512 import * # noqa: F401,F403

checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b1_20220624-02e5a6a1.pth' # noqa

# model settings
model.update( # noqa: F405
backbone=dict(
init_cfg=dict(checkpoint=checkpoint),
embed_dims=64,
num_heads=[1, 2, 5, 8],
num_layers=[2, 2, 2, 2]),
decode_head=dict(in_channels=[64, 128, 320, 512]))
@@ -0,0 +1,16 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base

with read_base():
from .segformer_mit_b0_8xb2_160k_ade20k_512x512 import * # noqa: F401,F403

checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b2_20220624-66e8bf70.pth' # noqa

# model settings
model.update( # noqa: F405
backbone=dict(
init_cfg=dict(checkpoint=checkpoint),
embed_dims=64,
num_heads=[1, 2, 5, 8],
num_layers=[3, 4, 6, 3]),
decode_head=dict(in_channels=[64, 128, 320, 512]))
@@ -0,0 +1,16 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base

with read_base():
from .segformer_mit_b0_8xb2_160k_ade20k_512x512 import * # noqa: F401,F403

checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b3_20220624-13b1141c.pth' # noqa

# model settings
model.update( # noqa: F405
backbone=dict(
init_cfg=dict(checkpoint=checkpoint),
embed_dims=64,
num_heads=[1, 2, 5, 8],
num_layers=[3, 4, 18, 3]),
decode_head=dict(in_channels=[64, 128, 320, 512]))
@@ -0,0 +1,16 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base

with read_base():
from .segformer_mit_b0_8xb2_160k_ade20k_512x512 import * # noqa: F401,F403

checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b4_20220624-d588d980.pth' # noqa

# model settings
model.update( # noqa: F405
backbone=dict(
init_cfg=dict(checkpoint=checkpoint),
embed_dims=64,
num_heads=[1, 2, 5, 8],
num_layers=[3, 8, 27, 3]),
decode_head=dict(in_channels=[64, 128, 320, 512]))
@@ -0,0 +1,16 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base

with read_base():
from .segformer_mit_b0_8xb2_160k_ade20k_512x512 import * # noqa: F401,F403

checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pth' # noqa

# model settings
model.update( # noqa: F405
backbone=dict(
init_cfg=dict(checkpoint=checkpoint),
embed_dims=64,
num_heads=[1, 2, 5, 8],
num_layers=[3, 6, 40, 3]),
decode_head=dict(in_channels=[64, 128, 320, 512]))