YOLO11更换Backbone为ShuffleNetV2

关于ShuffleNetV2网络结构的介绍可以参考。

下面是对应的实战部分,首先在ultralytics源码目录nn/modules下创建1个shufflenetv2模块,其代码如下:

#encoding:utf-8
import torch
import torch.nn as nn

def channel_shuffle(x, groups):
    batchsize, num_channels, height, width = x.data.size()
    channels_per_group = num_channels // groups
    # reshape
    x = x.view(batchsize, groups,
               channels_per_group, height, width)
    x = torch.transpose(x, 1, 2).contiguous()
    # flatten
    x = x.view(batchsize, -1, height, width)
    return x


class CBRM(nn.Module):           #conv BN ReLU Maxpool2d
    def __init__(self, c1, c2):  # ch_in, ch_out
        super(CBRM, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(c1, c2, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(c2),
            nn.ReLU(inplace=True),
        )
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)

    def forward(self, x):
        return self.maxpool(self.conv(x))


class Shuffle_Block(nn.Module):
    def __init__(self, ch_in, ch_out, stride):
        super(Shuffle_Block, self).__init__()
        if not (1 <= stride <= 2):
            raise ValueError('illegal stride value')
        self.stride = stride
        branch_features = ch_out // 2
        assert (self.stride != 1) or (ch_in == branch_features << 1)
        if self.stride > 1:
            self.branch1 = nn.Sequential(
                self.depthwise_conv(ch_in, ch_in, kernel_size=3, stride=self.stride, padding=1),
                nn.BatchNorm2d(ch_in),

                nn.Conv2d(ch_in, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(branch_features),
                nn.ReLU(inplace=True),
            )
        self.branch2 = nn.Sequential(
            nn.Conv2d(ch_in if (self.stride > 1) else branch_features,
                      branch_features, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(branch_features),
            nn.ReLU(inplace=True),
            self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1),
            nn.BatchNorm2d(branch_features),
            nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(branch_features),
            nn.ReLU(inplace=True),
        )

    @staticmethod
    def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
        return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)

    def forward(self, x):
        if self.stride == 1:
            x1, x2 = x.chunk(2, dim=1)  # 按照维度1进行split
            out = torch.cat((x1, self.branch2(x2)), dim=1)
        else:
            out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
        out = channel_shuffle(out, 2)
        return out

然后在nn/modules/__init__.py中导入上述类:

from .transformer import (
    AIFI,
    ...
    TransformerLayer,
)
from .shufflenetv2 import CBRM, Shuffle_Block

__all__ = (
    "AIFI",
    ...
    "CBRM",
    "Shuffle_Block"
)

之后修改nn/tasks.py中的代码:

from ultralytics.nn.modules import (
    AIFI,
    ...
    CBRM,
    Shuffle_Block
)

def parse_model(d, ch, verbose=True):
    ...
    base_modules = frozenset(
        {
            Classify,
            Conv,
            ...
            CBRM,
            Shuffle_Block
        }
    )

除了将其放置在base_modules变量中,还可以使用如下的方式:

    ...
    elif m in frozenset({TorchVision, Index}):
        c2 = args[0]
        c1 = ch[f]
        args = [*args[1:]]
    elif m in (CBRM,Shuffle_Block):
        c1, c2 = ch[f], args[0]
        args = [c1,c2,*args[1:]]

我们构建参数让其可以满足对应模块的参数,分别为输入通道数、输出通道数,其中输入参数的值是根据上一层传入的。

对此可以使用如下的代码查看每层的输入与输出维度:

import torch
from ultralytics import YOLO

model = YOLO("yolo11.yaml").model
model.eval()

x = torch.randn(1,3,640,640)
print("{:<4} {:<25} {:>25} {:>25}".format("Idx","Module","Input Shape","Output Shape"))
print("-"*80)

def hook_fn(module,input,output):
	if module in model.model:
		def extract_shape(o):
			if isinstance(o, torch.Tensor):
				return list(o.shape)
			elif isinstance(o,(list,tuple)):
				return [extract_shape(x) for x in o]
			else:
				return str(type(o))
		input_shape = extract_shape(input)[0]
		output_shape = extract_shape(output)
		idx = list(model.model).index(module)
		print("{:<4} {:<25} {:<25} {:>25}".format(idx,module.__class__.__name__,str(input_shape), str(output_shape)))

hooks = [m.register_forward_hook(hook_fn) for m in model.model]

_ = model(x)
for h in hooks:
	h.remove()

之后在cfg/models/11目录下新建1个yolo11n-shufflenetV2.yaml的配置文件,其内容如下:

nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolo11n.yaml' will call yolo11.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.50, 0.25, 1024] # summary: 181 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPs
  s: [0.50, 0.50, 1024] # summary: 181 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPs
  m: [0.50, 1.00, 512] # summary: 231 layers, 20114688 parameters, 20114672 gradients, 68.5 GFLOPs
  l: [1.00, 1.00, 512] # summary: 357 layers, 25372160 parameters, 25372144 gradients, 87.6 GFLOPs
  x: [1.00, 1.50, 512] # summary: 357 layers, 56966176 parameters, 56966160 gradients, 196.0 GFLOPs

# YOLO11n backbone
backbone:
  # [from, repeats, module, args]
  - [-1, 1, CBRM, [32]]              # 0-P2/4           160*160 
  - [-1, 1, Shuffle_Block, [128, 2]] # 1-P3/8           80*80
  - [-1, 3, Shuffle_Block, [128, 1]] # 2                80*80
  - [-1, 1, Shuffle_Block, [256, 2]] # 3-P4/16          40*40
  - [-1, 7, Shuffle_Block, [256, 1]] # 4                40*40
  - [-1, 1, Shuffle_Block, [512, 2]] # 5-P5/32          20*20
  - [-1, 3, Shuffle_Block, [512, 1]] # 6                20*20

# YOLO11n head
head:
  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]    # 11  40*40
  - [[-1, 4], 1, Concat, [1]] # 12 cat backbone P4      40*40
  - [-1, 2, C3k2, [512, False]] # 13                    40*40

  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]   # 14   80*80
  - [[-1, 2], 1, Concat, [1]] #15 cat backbone P3       80*80
  - [-1, 2, C3k2, [256, False]] # 16 (P3/8-small)       80*80

  - [-1, 1, Conv, [256, 3, 2]]  #17                     40*40
  - [[-1, 9], 1, Concat, [1]] #18 cat head P4          40*40
  - [-1, 2, C3k2, [512, False]] # 19 (P4/16-medium)     40*40

  - [-1, 1, Conv, [512, 3, 2]] # 20                     20*20
  - [[-1, 6], 1, Concat, [1]] #21 cat head P5           20*20
  - [-1, 2, C3k2, [1024, True]] # 22 (P5/32-large)      20*20
  - [[12, 15, 18], 1, Detect, [nc]] # Detect(P3, P4, P5)

我们需要根据Backbone的输出尺寸调整Concat模块对应的层数。比如第1个Concat其输出为40x40,因此拼接的是Backbone的第4层。而选择的是yolo11n模型,其width为0.25,因此输入均要乘以1/4。

最后就可以开始训练了:

from ultralytics import YOLO

model = YOLO("yolo11n-shufflenetV2.yaml")
model.train(epochs=100,data="coco128.yaml")

其网络输出如下:


                   from  n    params  module                                       arguments
  0                  -1  1       232  ultralytics.nn.modules.shufflenetv2.CBRM     [3, 8]
  1                  -1  1       872  ultralytics.nn.modules.shufflenetv2.Shuffle_Block[8, 32, 2]
  2                  -1  2      1504  ultralytics.nn.modules.shufflenetv2.Shuffle_Block[32, 32, 1]
  3                  -1  1      3968  ultralytics.nn.modules.shufflenetv2.Shuffle_Block[32, 64, 2]
  4                  -1  4     10112  ultralytics.nn.modules.shufflenetv2.Shuffle_Block[64, 64, 1]
  5                  -1  1     14080  ultralytics.nn.modules.shufflenetv2.Shuffle_Block[64, 128, 2]
  6                  -1  2     18304  ultralytics.nn.modules.shufflenetv2.Shuffle_Block[128, 128, 1]
  7                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']
  8             [-1, 4]  1         0  ultralytics.nn.modules.conv.Concat           [1]
  9                  -1  1     86720  ultralytics.nn.modules.block.C3k2            [192, 128, 1, False]
 10                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']
 11             [-1, 2]  1         0  ultralytics.nn.modules.conv.Concat           [1]
 12                  -1  1     25952  ultralytics.nn.modules.block.C3k2            [160, 64, 1, False]
 13                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]
 14             [-1, 9]  1         0  ultralytics.nn.modules.conv.Concat           [1]
 15                  -1  1     86720  ultralytics.nn.modules.block.C3k2            [192, 128, 1, False]
 16                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]
 17             [-1, 6]  1         0  ultralytics.nn.modules.conv.Concat           [1]
 18                  -1  1    346112  ultralytics.nn.modules.block.C3k2            [256, 256, 1, True]
 19        [12, 15, 18]  1    464912  ultralytics.nn.modules.head.Detect           [80, [64, 128, 256]]
YOLO11n-shufflenetV2 summary: 203 layers, 1,244,192 parameters, 1,244,176 gradients, 3.5 GFLOPs

可以看到成功将Backbone替换为了ShuffleNetV2了。

参考视频:

https://www.bilibili.com/video/BV1gn8BzEEGE/

如果喜欢这篇文章或对您有帮助，可以：[☕] 请我喝杯咖啡 | [💓] 小额赞助

码力全开 / YOLO11更换Backbone为ShuffleNetV2