The main change needed is to switch use_global_stats
to true
. This switches to using the moving average.
layer { bottom: 'layerx' top: 'layerx-bn' name: 'layerx-bn' type: 'BatchNorm'
batch_norm_param {
use_global_stats: true # use pre-calculated average and variance
}
param { lr_mult: 0 }
param { lr_mult: 0 }
param { lr_mult: 0 }}
# channel-wise scale and bias are separate
layer { bottom: 'layerx-bn' top: 'layerx-bn' name: 'layerx-bn-scale' type: 'Scale',
scale_param {
bias_term: true
axis: 1 # scale separately for each channel
num_axes: 1 # ... but not spatially (default)
}}