Refinements and Improvements to CNN's for image classification
From the paper: Bag of Tricks for Image Classification with Convolutional Neural Networks
By: Tong He, Zhi Zhang, Hang Zhang, Zhongyue Zhang, Junyuan Xie, Mu Li
Much of the recent progress made in image classification research can be credited to training procedure refinements, such as changes in data augmentations and optimization methods. In the literature, however, most refinements are either briefly mentioned as implementation details or only visible in source code. In this paper, we will examine a collection of such refinements and empirically evaluate their impact on the final model accuracy through ablation study. We will show that, by combining these refinements together, we are able to improve various CNN models significantly. For example, we raise ResNet-50's top-1 validation accuracy from 75.3% to 79.29% on ImageNet. We will also demonstrate that improvement on image classification accuracy leads to better transfer learning performance in other application domains such as object detection and semantic segmentation.
%reload_ext autoreload
%autoreload 2
%matplotlib inline
#export
from exp.nb_09 import *
bs = 64
path = datasets.untar_data(datasets.URLs.IMAGENETTE_160) # downloads and returns a path to folder
tfms = [make_rgb, ResizeFixed(128), to_byte_tensor, to_float_tensor] # transforms to be applied to images
il = ImageList.from_files(path, tfms=tfms) # Imagelist from files
sd = SplitData.split_by_func(il, partial(grandparent_splitter, valid_name="val")) # Splitdata by function
ll = label_by_func(sd, parent_labeler, proc_y=CategoryProcesser()) # label the data by parent folder
data = ll.to_databunch(bs, c_in=3, c_out=10)
callbacks = [partial(AvgStatsCallback, accuracy),
CudaCallback,
partial(BatchTransformXCallback, norm_imagenette)]
nfs = [64,64,128,256]
#export
def prev_pow_2(x): return 2**math.floor(math.log2(x))
for i in [1,3]:
print(f'{i}:', (2**math.floor(math.log2(i*3*3))))
Now we'll define a function that will automatically generate our model layers based on the geometry suggested in the BoT paper:
#export
def get_cnn_layers(data, nfs, layer, **kwargs):
def f(ni, nf, stride=2):
return layer(ni, nf, ks=3, stride=stride, **kwargs)
l1 = data.c_in # channels in from databunch
l2 = prev_pow_2(l1*3*3)
layers = [f(l1, l2, stride=1), # input channels, 2**input channels
f(l2, l2*2, stride=2),
f(l2*2, l2*4, stride=2)]
nfs = [l2*4] + nfs
layers += [f(nfs[i], nfs[i+1]) for i in range(len(nfs)-1)]
layers += [nn.AdaptiveAvgPool2d(1), Lambda(flatten), nn.Linear(nfs[-1], data.c_out)]
return layers
#export
def get_cnn_model(data, nfs, layer, **kwargs):
return nn.Sequential(*get_cnn_layers(data, nfs, layer, **kwargs))
def get_learn_run(data, nfs, layer, lr, cbs=None, opt_func=None, uniform=False, **kwargs):
model = get_cnn_model(data, nfs, layer, **kwargs)
init_cnn(model, uniform=uniform)
return get_runner(model, data, lr=lr, cbs=cbs, opt_func=opt_func)
sched = combine_scheds([0.3, 0.7], cos_1cycle_anneal(0.1, 0.3, 0.05))
learn, run = get_learn_run(data, nfs, conv_layer, lr=0.2, cbs=callbacks+[partial(ParamScheduler, 'lr', sched)])
run.fit(1, learn)
A function that would print out a summary of the layers and their activation shapes of our model would be very helpful.
We can do this by using Hooks and sending batch through the model to print out what happens at every stage:
#export
def model_summary(run, learn, data, find_all=False):
xb, yb = get_batch(data.valid_dl, run)
device = next(learn.model.parameters()).device
xb, yb = xb.to(device), yb.to(device)
hf = lambda hook,mod,inp,outp: print(f'Module:\n{mod}\nOutput Shape: {outp.shape}\n')
mods = find_mods(learn.model, is_lin_layer) if find_all else learn.model.children()
with Hooks(mods, hf) as hook: learn.model(xb)
model_summary(run, learn, data)
%time run.fit(5, learn)
nb_auto_export()