关于ShuffleNetV2网络结构的介绍可以参考。
下面是对应的实战部分,首先在ultralytics源码目录nn/modules下创建1个shufflenetv2模块,其代码如下:
#encoding:utf-8
import torch
import torch.nn as nn
def channel_shuffle(x, groups):
batchsize, num_channels, height, width = x.data.size()
channels_per_group = num_channels // groups
# reshape
x = x.view(batchsize, groups,
channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
# flatten
x = x.view(batchsize, -1, height, width)
return x
class CBRM(nn.Module): #conv BN ReLU Maxpool2d
def __init__(self, c1, c2): # ch_in, ch_out
super(CBRM, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(c1, c2, kernel_size=3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(c2),
nn.ReLU(inplace=True),
)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
def forward(self, x):
return self.maxpool(self.conv(x))
class Shuffle_Block(nn.Module):
def __init__(self, ch_in, ch_out, stride):
super(Shuffle_Block, self).__init__()
if not (1 <= stride <= 2):
raise ValueError('illegal stride value')
self.stride = stride
branch_features = ch_out // 2
assert (self.stride != 1) or (ch_in == branch_features << 1)
if self.stride > 1:
self.branch1 = nn.Sequential(
self.depthwise_conv(ch_in, ch_in, kernel_size=3, stride=self.stride, padding=1),
nn.BatchNorm2d(ch_in),
nn.Conv2d(ch_in, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
)
self.branch2 = nn.Sequential(
nn.Conv2d(ch_in if (self.stride > 1) else branch_features,
branch_features, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1),
nn.BatchNorm2d(branch_features),
nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
)
@staticmethod
def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)
def forward(self, x):
if self.stride == 1:
x1, x2 = x.chunk(2, dim=1) # 按照维度1进行split
out = torch.cat((x1, self.branch2(x2)), dim=1)
else:
out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
out = channel_shuffle(out, 2)
return out
然后在nn/modules/__init__.py中导入上述类:
from .transformer import (
AIFI,
...
TransformerLayer,
)
from .shufflenetv2 import CBRM, Shuffle_Block
__all__ = (
"AIFI",
...
"CBRM",
"Shuffle_Block"
)
之后修改nn/tasks.py中的代码:
from ultralytics.nn.modules import (
AIFI,
...
CBRM,
Shuffle_Block
)
def parse_model(d, ch, verbose=True):
...
base_modules = frozenset(
{
Classify,
Conv,
...
CBRM,
Shuffle_Block
}
)
除了将其放置在base_modules变量中,还可以使用如下的方式:
...
elif m in frozenset({TorchVision, Index}):
c2 = args[0]
c1 = ch[f]
args = [*args[1:]]
elif m in (CBRM,Shuffle_Block):
c1, c2 = ch[f], args[0]
args = [c1,c2,*args[1:]]
我们构建参数让其可以满足对应模块的参数,分别为输入通道数、输出通道数,其中输入参数的值是根据上一层传入的。
对此可以使用如下的代码查看每层的输入与输出维度:
import torch
from ultralytics import YOLO
model = YOLO("yolo11.yaml").model
model.eval()
x = torch.randn(1,3,640,640)
print("{:<4} {:<25} {:>25} {:>25}".format("Idx","Module","Input Shape","Output Shape"))
print("-"*80)
def hook_fn(module,input,output):
if module in model.model:
def extract_shape(o):
if isinstance(o, torch.Tensor):
return list(o.shape)
elif isinstance(o,(list,tuple)):
return [extract_shape(x) for x in o]
else:
return str(type(o))
input_shape = extract_shape(input)[0]
output_shape = extract_shape(output)
idx = list(model.model).index(module)
print("{:<4} {:<25} {:<25} {:>25}".format(idx,module.__class__.__name__,str(input_shape), str(output_shape)))
hooks = [m.register_forward_hook(hook_fn) for m in model.model]
_ = model(x)
for h in hooks:
h.remove()
之后在cfg/models/11目录下新建1个yolo11n-shufflenetV2.yaml的配置文件,其内容如下:
nc: 80 # number of classes scales: # model compound scaling constants, i.e. 'model=yolo11n.yaml' will call yolo11.yaml with scale 'n' # [depth, width, max_channels] n: [0.50, 0.25, 1024] # summary: 181 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPs s: [0.50, 0.50, 1024] # summary: 181 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPs m: [0.50, 1.00, 512] # summary: 231 layers, 20114688 parameters, 20114672 gradients, 68.5 GFLOPs l: [1.00, 1.00, 512] # summary: 357 layers, 25372160 parameters, 25372144 gradients, 87.6 GFLOPs x: [1.00, 1.50, 512] # summary: 357 layers, 56966176 parameters, 56966160 gradients, 196.0 GFLOPs # YOLO11n backbone backbone: # [from, repeats, module, args] - [-1, 1, CBRM, [32]] # 0-P2/4 160*160 - [-1, 1, Shuffle_Block, [128, 2]] # 1-P3/8 80*80 - [-1, 3, Shuffle_Block, [128, 1]] # 2 80*80 - [-1, 1, Shuffle_Block, [256, 2]] # 3-P4/16 40*40 - [-1, 7, Shuffle_Block, [256, 1]] # 4 40*40 - [-1, 1, Shuffle_Block, [512, 2]] # 5-P5/32 20*20 - [-1, 3, Shuffle_Block, [512, 1]] # 6 20*20 # YOLO11n head head: - [-1, 1, nn.Upsample, [None, 2, "nearest"]] # 11 40*40 - [[-1, 4], 1, Concat, [1]] # 12 cat backbone P4 40*40 - [-1, 2, C3k2, [512, False]] # 13 40*40 - [-1, 1, nn.Upsample, [None, 2, "nearest"]] # 14 80*80 - [[-1, 2], 1, Concat, [1]] #15 cat backbone P3 80*80 - [-1, 2, C3k2, [256, False]] # 16 (P3/8-small) 80*80 - [-1, 1, Conv, [256, 3, 2]] #17 40*40 - [[-1, 9], 1, Concat, [1]] #18 cat head P4 40*40 - [-1, 2, C3k2, [512, False]] # 19 (P4/16-medium) 40*40 - [-1, 1, Conv, [512, 3, 2]] # 20 20*20 - [[-1, 6], 1, Concat, [1]] #21 cat head P5 20*20 - [-1, 2, C3k2, [1024, True]] # 22 (P5/32-large) 20*20 - [[12, 15, 18], 1, Detect, [nc]] # Detect(P3, P4, P5)
我们需要根据Backbone的输出尺寸调整Concat模块对应的层数。比如第1个Concat其输出为40x40,因此拼接的是Backbone的第4层。而选择的是yolo11n模型,其width为0.25,因此输入均要乘以1/4。
最后就可以开始训练了:
from ultralytics import YOLO
model = YOLO("yolo11n-shufflenetV2.yaml")
model.train(epochs=100,data="coco128.yaml")
其网络输出如下:
from n params module arguments
0 -1 1 232 ultralytics.nn.modules.shufflenetv2.CBRM [3, 8]
1 -1 1 872 ultralytics.nn.modules.shufflenetv2.Shuffle_Block[8, 32, 2]
2 -1 2 1504 ultralytics.nn.modules.shufflenetv2.Shuffle_Block[32, 32, 1]
3 -1 1 3968 ultralytics.nn.modules.shufflenetv2.Shuffle_Block[32, 64, 2]
4 -1 4 10112 ultralytics.nn.modules.shufflenetv2.Shuffle_Block[64, 64, 1]
5 -1 1 14080 ultralytics.nn.modules.shufflenetv2.Shuffle_Block[64, 128, 2]
6 -1 2 18304 ultralytics.nn.modules.shufflenetv2.Shuffle_Block[128, 128, 1]
7 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
8 [-1, 4] 1 0 ultralytics.nn.modules.conv.Concat [1]
9 -1 1 86720 ultralytics.nn.modules.block.C3k2 [192, 128, 1, False]
10 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
11 [-1, 2] 1 0 ultralytics.nn.modules.conv.Concat [1]
12 -1 1 25952 ultralytics.nn.modules.block.C3k2 [160, 64, 1, False]
13 -1 1 36992 ultralytics.nn.modules.conv.Conv [64, 64, 3, 2]
14 [-1, 9] 1 0 ultralytics.nn.modules.conv.Concat [1]
15 -1 1 86720 ultralytics.nn.modules.block.C3k2 [192, 128, 1, False]
16 -1 1 147712 ultralytics.nn.modules.conv.Conv [128, 128, 3, 2]
17 [-1, 6] 1 0 ultralytics.nn.modules.conv.Concat [1]
18 -1 1 346112 ultralytics.nn.modules.block.C3k2 [256, 256, 1, True]
19 [12, 15, 18] 1 464912 ultralytics.nn.modules.head.Detect [80, [64, 128, 256]]
YOLO11n-shufflenetV2 summary: 203 layers, 1,244,192 parameters, 1,244,176 gradients, 3.5 GFLOPs
可以看到成功将Backbone替换为了ShuffleNetV2了。
参考视频:
如果喜欢这篇文章或对您有帮助,可以:[☕] 请我喝杯咖啡 | [💓] 小额赞助

