final

hyungjin
Commit b8f4c4e67a8c618780c05f2758b647a4e1d802e7 b8f4c4e6 1 parent b20b09cc
Showing 41 changed files with 2479 additions and 0 deletions
2015101094_전형진_최종보고서.pdf
source/.vscode/settings.json
source/LICENSE
source/README.md
source/__pycache__/lsq_int.cpython-36.pyc
source/__pycache__/lsq_sq.cpython-36.pyc
source/__pycache__/replace.cpython-36.pyc
source/__pycache__/replace_int.cpython-36.pyc
source/__pycache__/utils.cpython-36.pyc
source/lsq_int.py
source/lsq_sq.py
source/main.py
source/models/__pycache__/mobilenet.cpython-36.pyc
source/models/densenet.py
source/models/dla.py
source/models/dla_simple.py
source/models/dpn.py
source/models/efficientnet.py
source/models/googlenet.py
source/models/lenet.py
--- a/2015101094_전형진_최종보고서.pdf 0 → 100644
View file @b8f4c4e
+++ b/2015101094_전형진_최종보고서.pdf 0 → 100644
View file @b8f4c4e
--- a/source/.vscode/settings.json 0 → 100644
View file @b8f4c4e
+++ b/source/.vscode/settings.json 0 → 100644
View file @b8f4c4e
+ {
+     "python.pythonPath": "/home/chunjin1212/anaconda3/envs/torch/bin/python"
+ }
\ No newline at end of file
--- a/source/LICENSE 0 → 100644
View file @b8f4c4e
+++ b/source/LICENSE 0 → 100644
View file @b8f4c4e
+ MIT License
+ 
+ Copyright (c) 2017 liukuang
+ 
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+ 
+ The above copyright notice and this permission notice shall be included in all
+ copies or substantial portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
--- a/source/README.md 0 → 100644
View file @b8f4c4e
+++ b/source/README.md 0 → 100644
View file @b8f4c4e
+ # Train CIFAR10 with PyTorch
+ 
+ I'm playing with [PyTorch](http://pytorch.org/) on the CIFAR10 dataset.
+ 
+ ## Prerequisites
+ - Python 3.6+
+ - PyTorch 1.0+
+ 
+ ## Training
+ ```
+ # Start training with: 
+ python main.py
+ 
+ # You can manually resume the training with: 
+ python main.py --resume --lr=0.01
+ ```
+ 
+ ## Accuracy
+ | Model             | Acc.        |
+ | ----------------- | ----------- |
+ | [VGG16](https://arxiv.org/abs/1409.1556)              | 92.64%      |
+ | [ResNet18](https://arxiv.org/abs/1512.03385)          | 93.02%      |
+ | [ResNet50](https://arxiv.org/abs/1512.03385)          | 93.62%      |
+ | [ResNet101](https://arxiv.org/abs/1512.03385)         | 93.75%      |
+ | [RegNetX_200MF](https://arxiv.org/abs/2003.13678)     | 94.24%      |
+ | [RegNetY_400MF](https://arxiv.org/abs/2003.13678)     | 94.29%      |
+ | [MobileNetV2](https://arxiv.org/abs/1801.04381)       | 94.43%      |
+ | [ResNeXt29(32x4d)](https://arxiv.org/abs/1611.05431)  | 94.73%      |
+ | [ResNeXt29(2x64d)](https://arxiv.org/abs/1611.05431)  | 94.82%      |
+ | [SimpleDLA](https://arxiv.org/abs/1707.064)           | 94.89%      |
+ | [DenseNet121](https://arxiv.org/abs/1608.06993)       | 95.04%      |
+ | [PreActResNet18](https://arxiv.org/abs/1603.05027)    | 95.11%      |
+ | [DPN92](https://arxiv.org/abs/1707.01629)             | 95.16%      |
+ | [DLA](https://arxiv.org/pdf/1707.06484.pdf)           | 95.47%      |
+ 
--- a/source/__pycache__/lsq_int.cpython-36.pyc 0 → 100644
View file @b8f4c4e
+++ b/source/__pycache__/lsq_int.cpython-36.pyc 0 → 100644
View file @b8f4c4e
--- a/source/__pycache__/lsq_sq.cpython-36.pyc 0 → 100644
View file @b8f4c4e
+++ b/source/__pycache__/lsq_sq.cpython-36.pyc 0 → 100644
View file @b8f4c4e
--- a/source/__pycache__/replace.cpython-36.pyc 0 → 100644
View file @b8f4c4e
+++ b/source/__pycache__/replace.cpython-36.pyc 0 → 100644
View file @b8f4c4e
--- a/source/__pycache__/replace_int.cpython-36.pyc 0 → 100644
View file @b8f4c4e
+++ b/source/__pycache__/replace_int.cpython-36.pyc 0 → 100644
View file @b8f4c4e
--- a/source/__pycache__/utils.cpython-36.pyc 0 → 100644
View file @b8f4c4e
+++ b/source/__pycache__/utils.cpython-36.pyc 0 → 100644
View file @b8f4c4e
--- a/source/lsq_int.py 0 → 100644
View file @b8f4c4e
+++ b/source/lsq_int.py 0 → 100644
View file @b8f4c4e
--- a/source/lsq_sq.py 0 → 100644
View file @b8f4c4e
+++ b/source/lsq_sq.py 0 → 100644
View file @b8f4c4e
--- a/source/main.py 0 → 100644
View file @b8f4c4e
+++ b/source/main.py 0 → 100644
View file @b8f4c4e
+ '''Train CIFAR10 with PyTorch.'''
+ import torch
+ import torch.nn as nn
+ import torch.optim as optim
+ import torch.nn.functional as F
+ import torch.backends.cudnn as cudnn
+ 
+ import torchvision
+ import torchvision.transforms as transforms
+ 
+ import os
+ import argparse
+ 
+ from models.mobilenet import MobileNet1
+ from utils import progress_bar
+ from replace import replace_sq
+ from collections import OrderedDict
+ # from lsq_int import Input_Quantizer
+ from lsq_sq import Input_Quantizer
+ from replace_int import replace_int
+ 
+ 
+ parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
+ parser.add_argument('--lr', default=0.1, type=float, help='learning rate')
+ parser.add_argument('--resume', '-r', default=None, type=str,
+                     help='resume from checkpoint')
+ parser.add_argument('--dir', default='default', type=str,
+                     help='save dir name')
+ parser.add_argument('--test', default=False, action='store_true',
+                     help='test version or not')
+ parser.add_argument('--qat', default=False, action='store_true',
+                     help='qat version or not')
+ args = parser.parse_args()
+ 
+ 
+ # Training
+ def train(epoch):
+     print('\nEpoch: %d' % epoch)
+     net.train()
+     train_loss = 0
+     correct = 0
+     total = 0
+     for batch_idx, (inputs, targets) in enumerate(trainloader):
+         inputs, targets = inputs.to(device), targets.to(device)
+         optimizer.zero_grad()
+         outputs = net(inputs)
+         loss = criterion(outputs, targets)
+         loss.backward()
+         optimizer.step()
+ 
+         train_loss += loss.item()
+         _, predicted = outputs.max(1)
+         total += targets.size(0)
+         correct += predicted.eq(targets).sum().item()
+         progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
+                      % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
+ 
+ 
+ def test(epoch):
+     global best_acc
+     dir_name = args.dir
+     net.eval()
+     test_loss = 0
+     correct = 0
+     total = 0
+     with torch.no_grad():
+         for batch_idx, (inputs, targets) in enumerate(testloader):
+ 
+             inputs, targets = inputs.to(device), targets.to(device)
+             outputs = net(inputs)
+             loss = criterion(outputs, targets)
+ 
+             test_loss += loss.item()
+             _, predicted = outputs.max(1)
+             total += targets.size(0)
+             correct += predicted.eq(targets).sum().item()
+             progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
+                          % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
+ 
+     # Save checkpoint.
+     acc = 100.*correct/total
+     if acc > best_acc:
+         print('Saving..')
+         state = {
+             'net': net.state_dict(),
+             'acc': acc,
+             'epoch': epoch,
+         }
+         if not os.path.isdir(dir_name):
+             os.mkdir(dir_name)
+         torch.save(state, f'./{dir_name}/ckpt.pth')
+         best_acc = acc
+     print('*** best Test Accuracy: ', best_acc)
+ 
+ if __name__ == '__main__':
+     device = 'cuda' if torch.cuda.is_available() else 'cpu'
+     best_acc = 0  # best test accuracy
+     start_epoch = 0  # start from epoch 0 or last checkpoint epoch
+ 
+     # Data
+     print('==> Preparing data..')
+     transform_train = transforms.Compose([
+         transforms.RandomCrop(32, padding=4),
+         transforms.RandomHorizontalFlip(),
+         transforms.ToTensor(),
+         transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+     ])
+ 
+     transform_test = transforms.Compose([
+         transforms.ToTensor(),
+         transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+     ])
+ 
+     trainset = torchvision.datasets.CIFAR10(
+         root='./data', train=True, download=True, transform=transform_train)
+     trainloader = torch.utils.data.DataLoader(
+         trainset, batch_size=256, shuffle=True, num_workers=4)
+ 
+     testset = torchvision.datasets.CIFAR10(
+         root='./data', train=False, download=True, transform=transform_test)
+     testloader = torch.utils.data.DataLoader(
+         testset, batch_size=100, shuffle=False, num_workers=4)
+ 
+     classes = ('plane', 'car', 'bird', 'cat', 'deer',
+             'dog', 'frog', 'horse', 'ship', 'truck')
+ 
+     # Model
+     print('==> Building model..')
+     net = MobileNet1(3, 10)
+     net = net.to(device)
+ 
+     if args.qat:
+         net = replace_sq(model=net)
+         net = nn.Sequential(Input_Quantizer(abit=8, dequantize=True),
+                             net)
+ 
+     if args.resume:
+         # Load checkpoint.
+         print('==> Resuming from checkpoint..')
+         assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
+         checkpoint = torch.load(args.resume)
+         new_state_dict = OrderedDict()
+         for k, v in checkpoint['net'].items():
+             k = k.replace("module.", "")
+             new_state_dict[k] = v
+         net.load_state_dict(new_state_dict)
+         best_acc = 0.0
+         start_epoch = 0
+ 
+     print(net)
+     # replace_int(net)
+ 
+     if device == 'cuda':
+         net = torch.nn.DataParallel(net)
+         cudnn.benchmark = True
+         net.cuda()
+ 
+     criterion = nn.CrossEntropyLoss()
+     optimizer = optim.SGD(net.parameters(), lr=args.lr,
+                         momentum=0.9, weight_decay=5e-4)
+     scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
+ 
+ 
+     for epoch in range(start_epoch, start_epoch+200):
+         if args.test:
+             test(epoch)
+             break
+         else:
+             train(epoch)
+             test(epoch)
+             scheduler.step()
--- a/source/models/__pycache__/mobilenet.cpython-36.pyc 0 → 100644
View file @b8f4c4e
+++ b/source/models/__pycache__/mobilenet.cpython-36.pyc 0 → 100644
View file @b8f4c4e
--- a/source/models/densenet.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/densenet.py 0 → 100644
View file @b8f4c4e
+ '''DenseNet in PyTorch.'''
+ import math
+ 
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ class Bottleneck(nn.Module):
+     def __init__(self, in_planes, growth_rate):
+         super(Bottleneck, self).__init__()
+         self.bn1 = nn.BatchNorm2d(in_planes)
+         self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
+         self.bn2 = nn.BatchNorm2d(4*growth_rate)
+         self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
+ 
+     def forward(self, x):
+         out = self.conv1(F.relu(self.bn1(x)))
+         out = self.conv2(F.relu(self.bn2(out)))
+         out = torch.cat([out,x], 1)
+         return out
+ 
+ 
+ class Transition(nn.Module):
+     def __init__(self, in_planes, out_planes):
+         super(Transition, self).__init__()
+         self.bn = nn.BatchNorm2d(in_planes)
+         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
+ 
+     def forward(self, x):
+         out = self.conv(F.relu(self.bn(x)))
+         out = F.avg_pool2d(out, 2)
+         return out
+ 
+ 
+ class DenseNet(nn.Module):
+     def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
+         super(DenseNet, self).__init__()
+         self.growth_rate = growth_rate
+ 
+         num_planes = 2*growth_rate
+         self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
+ 
+         self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
+         num_planes += nblocks[0]*growth_rate
+         out_planes = int(math.floor(num_planes*reduction))
+         self.trans1 = Transition(num_planes, out_planes)
+         num_planes = out_planes
+ 
+         self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
+         num_planes += nblocks[1]*growth_rate
+         out_planes = int(math.floor(num_planes*reduction))
+         self.trans2 = Transition(num_planes, out_planes)
+         num_planes = out_planes
+ 
+         self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
+         num_planes += nblocks[2]*growth_rate
+         out_planes = int(math.floor(num_planes*reduction))
+         self.trans3 = Transition(num_planes, out_planes)
+         num_planes = out_planes
+ 
+         self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
+         num_planes += nblocks[3]*growth_rate
+ 
+         self.bn = nn.BatchNorm2d(num_planes)
+         self.linear = nn.Linear(num_planes, num_classes)
+ 
+     def _make_dense_layers(self, block, in_planes, nblock):
+         layers = []
+         for i in range(nblock):
+             layers.append(block(in_planes, self.growth_rate))
+             in_planes += self.growth_rate
+         return nn.Sequential(*layers)
+ 
+     def forward(self, x):
+         out = self.conv1(x)
+         out = self.trans1(self.dense1(out))
+         out = self.trans2(self.dense2(out))
+         out = self.trans3(self.dense3(out))
+         out = self.dense4(out)
+         out = F.avg_pool2d(F.relu(self.bn(out)), 4)
+         out = out.view(out.size(0), -1)
+         out = self.linear(out)
+         return out
+ 
+ def DenseNet121():
+     return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)
+ 
+ def DenseNet169():
+     return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
+ 
+ def DenseNet201():
+     return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
+ 
+ def DenseNet161():
+     return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
+ 
+ def densenet_cifar():
+     return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
+ 
+ def test():
+     net = densenet_cifar()
+     x = torch.randn(1,3,32,32)
+     y = net(x)
+     print(y)
+ 
+ # test()
--- a/source/models/dla.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/dla.py 0 → 100644
View file @b8f4c4e
+ '''DLA in PyTorch.
+ 
+ Reference:
+     Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
+ '''
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ class BasicBlock(nn.Module):
+     expansion = 1
+ 
+     def __init__(self, in_planes, planes, stride=1):
+         super(BasicBlock, self).__init__()
+         self.conv1 = nn.Conv2d(
+             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(planes)
+         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                                stride=1, padding=1, bias=False)
+         self.bn2 = nn.BatchNorm2d(planes)
+ 
+         self.shortcut = nn.Sequential()
+         if stride != 1 or in_planes != self.expansion*planes:
+             self.shortcut = nn.Sequential(
+                 nn.Conv2d(in_planes, self.expansion*planes,
+                           kernel_size=1, stride=stride, bias=False),
+                 nn.BatchNorm2d(self.expansion*planes)
+             )
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = self.bn2(self.conv2(out))
+         out += self.shortcut(x)
+         out = F.relu(out)
+         return out
+ 
+ 
+ class Root(nn.Module):
+     def __init__(self, in_channels, out_channels, kernel_size=1):
+         super(Root, self).__init__()
+         self.conv = nn.Conv2d(
+             in_channels, out_channels, kernel_size,
+             stride=1, padding=(kernel_size - 1) // 2, bias=False)
+         self.bn = nn.BatchNorm2d(out_channels)
+ 
+     def forward(self, xs):
+         x = torch.cat(xs, 1)
+         out = F.relu(self.bn(self.conv(x)))
+         return out
+ 
+ 
+ class Tree(nn.Module):
+     def __init__(self, block, in_channels, out_channels, level=1, stride=1):
+         super(Tree, self).__init__()
+         self.level = level
+         if level == 1:
+             self.root = Root(2*out_channels, out_channels)
+             self.left_node = block(in_channels, out_channels, stride=stride)
+             self.right_node = block(out_channels, out_channels, stride=1)
+         else:
+             self.root = Root((level+2)*out_channels, out_channels)
+             for i in reversed(range(1, level)):
+                 subtree = Tree(block, in_channels, out_channels,
+                                level=i, stride=stride)
+                 self.__setattr__('level_%d' % i, subtree)
+             self.prev_root = block(in_channels, out_channels, stride=stride)
+             self.left_node = block(out_channels, out_channels, stride=1)
+             self.right_node = block(out_channels, out_channels, stride=1)
+ 
+     def forward(self, x):
+         xs = [self.prev_root(x)] if self.level > 1 else []
+         for i in reversed(range(1, self.level)):
+             level_i = self.__getattr__('level_%d' % i)
+             x = level_i(x)
+             xs.append(x)
+         x = self.left_node(x)
+         xs.append(x)
+         x = self.right_node(x)
+         xs.append(x)
+         out = self.root(xs)
+         return out
+ 
+ 
+ class DLA(nn.Module):
+     def __init__(self, block=BasicBlock, num_classes=10):
+         super(DLA, self).__init__()
+         self.base = nn.Sequential(
+             nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
+             nn.BatchNorm2d(16),
+             nn.ReLU(True)
+         )
+ 
+         self.layer1 = nn.Sequential(
+             nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
+             nn.BatchNorm2d(16),
+             nn.ReLU(True)
+         )
+ 
+         self.layer2 = nn.Sequential(
+             nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
+             nn.BatchNorm2d(32),
+             nn.ReLU(True)
+         )
+ 
+         self.layer3 = Tree(block,  32,  64, level=1, stride=1)
+         self.layer4 = Tree(block,  64, 128, level=2, stride=2)
+         self.layer5 = Tree(block, 128, 256, level=2, stride=2)
+         self.layer6 = Tree(block, 256, 512, level=1, stride=2)
+         self.linear = nn.Linear(512, num_classes)
+ 
+     def forward(self, x):
+         out = self.base(x)
+         out = self.layer1(out)
+         out = self.layer2(out)
+         out = self.layer3(out)
+         out = self.layer4(out)
+         out = self.layer5(out)
+         out = self.layer6(out)
+         out = F.avg_pool2d(out, 4)
+         out = out.view(out.size(0), -1)
+         out = self.linear(out)
+         return out
+ 
+ 
+ def test():
+     net = DLA()
+     print(net)
+     x = torch.randn(1, 3, 32, 32)
+     y = net(x)
+     print(y.size())
+ 
+ 
+ if __name__ == '__main__':
+     test()
--- a/source/models/dla_simple.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/dla_simple.py 0 → 100644
View file @b8f4c4e
+ '''Simplified version of DLA in PyTorch.
+ 
+ Note this implementation is not identical to the original paper version.
+ But it seems works fine.
+ 
+ See dla.py for the original paper version.
+ 
+ Reference:
+     Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
+ '''
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ class BasicBlock(nn.Module):
+     expansion = 1
+ 
+     def __init__(self, in_planes, planes, stride=1):
+         super(BasicBlock, self).__init__()
+         self.conv1 = nn.Conv2d(
+             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(planes)
+         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                                stride=1, padding=1, bias=False)
+         self.bn2 = nn.BatchNorm2d(planes)
+ 
+         self.shortcut = nn.Sequential()
+         if stride != 1 or in_planes != self.expansion*planes:
+             self.shortcut = nn.Sequential(
+                 nn.Conv2d(in_planes, self.expansion*planes,
+                           kernel_size=1, stride=stride, bias=False),
+                 nn.BatchNorm2d(self.expansion*planes)
+             )
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = self.bn2(self.conv2(out))
+         out += self.shortcut(x)
+         out = F.relu(out)
+         return out
+ 
+ 
+ class Root(nn.Module):
+     def __init__(self, in_channels, out_channels, kernel_size=1):
+         super(Root, self).__init__()
+         self.conv = nn.Conv2d(
+             in_channels, out_channels, kernel_size,
+             stride=1, padding=(kernel_size - 1) // 2, bias=False)
+         self.bn = nn.BatchNorm2d(out_channels)
+ 
+     def forward(self, xs):
+         x = torch.cat(xs, 1)
+         out = F.relu(self.bn(self.conv(x)))
+         return out
+ 
+ 
+ class Tree(nn.Module):
+     def __init__(self, block, in_channels, out_channels, level=1, stride=1):
+         super(Tree, self).__init__()
+         self.root = Root(2*out_channels, out_channels)
+         if level == 1:
+             self.left_tree = block(in_channels, out_channels, stride=stride)
+             self.right_tree = block(out_channels, out_channels, stride=1)
+         else:
+             self.left_tree = Tree(block, in_channels,
+                                   out_channels, level=level-1, stride=stride)
+             self.right_tree = Tree(block, out_channels,
+                                    out_channels, level=level-1, stride=1)
+ 
+     def forward(self, x):
+         out1 = self.left_tree(x)
+         out2 = self.right_tree(out1)
+         out = self.root([out1, out2])
+         return out
+ 
+ 
+ class SimpleDLA(nn.Module):
+     def __init__(self, block=BasicBlock, num_classes=10):
+         super(SimpleDLA, self).__init__()
+         self.base = nn.Sequential(
+             nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
+             nn.BatchNorm2d(16),
+             nn.ReLU(True)
+         )
+ 
+         self.layer1 = nn.Sequential(
+             nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
+             nn.BatchNorm2d(16),
+             nn.ReLU(True)
+         )
+ 
+         self.layer2 = nn.Sequential(
+             nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
+             nn.BatchNorm2d(32),
+             nn.ReLU(True)
+         )
+ 
+         self.layer3 = Tree(block,  32,  64, level=1, stride=1)
+         self.layer4 = Tree(block,  64, 128, level=2, stride=2)
+         self.layer5 = Tree(block, 128, 256, level=2, stride=2)
+         self.layer6 = Tree(block, 256, 512, level=1, stride=2)
+         self.linear = nn.Linear(512, num_classes)
+ 
+     def forward(self, x):
+         out = self.base(x)
+         out = self.layer1(out)
+         out = self.layer2(out)
+         out = self.layer3(out)
+         out = self.layer4(out)
+         out = self.layer5(out)
+         out = self.layer6(out)
+         out = F.avg_pool2d(out, 4)
+         out = out.view(out.size(0), -1)
+         out = self.linear(out)
+         return out
+ 
+ 
+ def test():
+     net = SimpleDLA()
+     print(net)
+     x = torch.randn(1, 3, 32, 32)
+     y = net(x)
+     print(y.size())
+ 
+ 
+ if __name__ == '__main__':
+     test()
--- a/source/models/dpn.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/dpn.py 0 → 100644
View file @b8f4c4e
+ '''Dual Path Networks in PyTorch.'''
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ class Bottleneck(nn.Module):
+     def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
+         super(Bottleneck, self).__init__()
+         self.out_planes = out_planes
+         self.dense_depth = dense_depth
+ 
+         self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(in_planes)
+         self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
+         self.bn2 = nn.BatchNorm2d(in_planes)
+         self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
+         self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
+ 
+         self.shortcut = nn.Sequential()
+         if first_layer:
+             self.shortcut = nn.Sequential(
+                 nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
+                 nn.BatchNorm2d(out_planes+dense_depth)
+             )
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = F.relu(self.bn2(self.conv2(out)))
+         out = self.bn3(self.conv3(out))
+         x = self.shortcut(x)
+         d = self.out_planes
+         out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
+         out = F.relu(out)
+         return out
+ 
+ 
+ class DPN(nn.Module):
+     def __init__(self, cfg):
+         super(DPN, self).__init__()
+         in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
+         num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
+ 
+         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(64)
+         self.last_planes = 64
+         self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
+         self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
+         self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
+         self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
+         self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)
+ 
+     def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
+         strides = [stride] + [1]*(num_blocks-1)
+         layers = []
+         for i,stride in enumerate(strides):
+             layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
+             self.last_planes = out_planes + (i+2) * dense_depth
+         return nn.Sequential(*layers)
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = self.layer1(out)
+         out = self.layer2(out)
+         out = self.layer3(out)
+         out = self.layer4(out)
+         out = F.avg_pool2d(out, 4)
+         out = out.view(out.size(0), -1)
+         out = self.linear(out)
+         return out
+ 
+ 
+ def DPN26():
+     cfg = {
+         'in_planes': (96,192,384,768),
+         'out_planes': (256,512,1024,2048),
+         'num_blocks': (2,2,2,2),
+         'dense_depth': (16,32,24,128)
+     }
+     return DPN(cfg)
+ 
+ def DPN92():
+     cfg = {
+         'in_planes': (96,192,384,768),
+         'out_planes': (256,512,1024,2048),
+         'num_blocks': (3,4,20,3),
+         'dense_depth': (16,32,24,128)
+     }
+     return DPN(cfg)
+ 
+ 
+ def test():
+     net = DPN92()
+     x = torch.randn(1,3,32,32)
+     y = net(x)
+     print(y)
+ 
+ # test()
--- a/source/models/efficientnet.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/efficientnet.py 0 → 100644
View file @b8f4c4e
+ '''EfficientNet in PyTorch.
+ 
+ Paper: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks".
+ 
+ Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
+ '''
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ def swish(x):
+     return x * x.sigmoid()
+ 
+ 
+ def drop_connect(x, drop_ratio):
+     keep_ratio = 1.0 - drop_ratio
+     mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
+     mask.bernoulli_(keep_ratio)
+     x.div_(keep_ratio)
+     x.mul_(mask)
+     return x
+ 
+ 
+ class SE(nn.Module):
+     '''Squeeze-and-Excitation block with Swish.'''
+ 
+     def __init__(self, in_channels, se_channels):
+         super(SE, self).__init__()
+         self.se1 = nn.Conv2d(in_channels, se_channels,
+                              kernel_size=1, bias=True)
+         self.se2 = nn.Conv2d(se_channels, in_channels,
+                              kernel_size=1, bias=True)
+ 
+     def forward(self, x):
+         out = F.adaptive_avg_pool2d(x, (1, 1))
+         out = swish(self.se1(out))
+         out = self.se2(out).sigmoid()
+         out = x * out
+         return out
+ 
+ 
+ class Block(nn.Module):
+     '''expansion + depthwise + pointwise + squeeze-excitation'''
+ 
+     def __init__(self,
+                  in_channels,
+                  out_channels,
+                  kernel_size,
+                  stride,
+                  expand_ratio=1,
+                  se_ratio=0.,
+                  drop_rate=0.):
+         super(Block, self).__init__()
+         self.stride = stride
+         self.drop_rate = drop_rate
+         self.expand_ratio = expand_ratio
+ 
+         # Expansion
+         channels = expand_ratio * in_channels
+         self.conv1 = nn.Conv2d(in_channels,
+                                channels,
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False)
+         self.bn1 = nn.BatchNorm2d(channels)
+ 
+         # Depthwise conv
+         self.conv2 = nn.Conv2d(channels,
+                                channels,
+                                kernel_size=kernel_size,
+                                stride=stride,
+                                padding=(1 if kernel_size == 3 else 2),
+                                groups=channels,
+                                bias=False)
+         self.bn2 = nn.BatchNorm2d(channels)
+ 
+         # SE layers
+         se_channels = int(in_channels * se_ratio)
+         self.se = SE(channels, se_channels)
+ 
+         # Output
+         self.conv3 = nn.Conv2d(channels,
+                                out_channels,
+                                kernel_size=1,
+                                stride=1,
+                                padding=0,
+                                bias=False)
+         self.bn3 = nn.BatchNorm2d(out_channels)
+ 
+         # Skip connection if in and out shapes are the same (MV-V2 style)
+         self.has_skip = (stride == 1) and (in_channels == out_channels)
+ 
+     def forward(self, x):
+         out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x)))
+         out = swish(self.bn2(self.conv2(out)))
+         out = self.se(out)
+         out = self.bn3(self.conv3(out))
+         if self.has_skip:
+             if self.training and self.drop_rate > 0:
+                 out = drop_connect(out, self.drop_rate)
+             out = out + x
+         return out
+ 
+ 
+ class EfficientNet(nn.Module):
+     def __init__(self, cfg, num_classes=10):
+         super(EfficientNet, self).__init__()
+         self.cfg = cfg
+         self.conv1 = nn.Conv2d(3,
+                                32,
+                                kernel_size=3,
+                                stride=1,
+                                padding=1,
+                                bias=False)
+         self.bn1 = nn.BatchNorm2d(32)
+         self.layers = self._make_layers(in_channels=32)
+         self.linear = nn.Linear(cfg['out_channels'][-1], num_classes)
+ 
+     def _make_layers(self, in_channels):
+         layers = []
+         cfg = [self.cfg[k] for k in ['expansion', 'out_channels', 'num_blocks', 'kernel_size',
+                                      'stride']]
+         b = 0
+         blocks = sum(self.cfg['num_blocks'])
+         for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg):
+             strides = [stride] + [1] * (num_blocks - 1)
+             for stride in strides:
+                 drop_rate = self.cfg['drop_connect_rate'] * b / blocks
+                 layers.append(
+                     Block(in_channels,
+                           out_channels,
+                           kernel_size,
+                           stride,
+                           expansion,
+                           se_ratio=0.25,
+                           drop_rate=drop_rate))
+                 in_channels = out_channels
+         return nn.Sequential(*layers)
+ 
+     def forward(self, x):
+         out = swish(self.bn1(self.conv1(x)))
+         out = self.layers(out)
+         out = F.adaptive_avg_pool2d(out, 1)
+         out = out.view(out.size(0), -1)
+         dropout_rate = self.cfg['dropout_rate']
+         if self.training and dropout_rate > 0:
+             out = F.dropout(out, p=dropout_rate)
+         out = self.linear(out)
+         return out
+ 
+ 
+ def EfficientNetB0():
+     cfg = {
+         'num_blocks': [1, 2, 2, 3, 3, 4, 1],
+         'expansion': [1, 6, 6, 6, 6, 6, 6],
+         'out_channels': [16, 24, 40, 80, 112, 192, 320],
+         'kernel_size': [3, 3, 5, 3, 5, 5, 3],
+         'stride': [1, 2, 2, 2, 1, 2, 1],
+         'dropout_rate': 0.2,
+         'drop_connect_rate': 0.2,
+     }
+     return EfficientNet(cfg)
+ 
+ 
+ def test():
+     net = EfficientNetB0()
+     x = torch.randn(2, 3, 32, 32)
+     y = net(x)
+     print(y.shape)
+ 
+ 
+ if __name__ == '__main__':
+     test()
--- a/source/models/googlenet.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/googlenet.py 0 → 100644
View file @b8f4c4e
+ '''GoogLeNet with PyTorch.'''
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ class Inception(nn.Module):
+     def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
+         super(Inception, self).__init__()
+         # 1x1 conv branch
+         self.b1 = nn.Sequential(
+             nn.Conv2d(in_planes, n1x1, kernel_size=1),
+             nn.BatchNorm2d(n1x1),
+             nn.ReLU(True),
+         )
+ 
+         # 1x1 conv -> 3x3 conv branch
+         self.b2 = nn.Sequential(
+             nn.Conv2d(in_planes, n3x3red, kernel_size=1),
+             nn.BatchNorm2d(n3x3red),
+             nn.ReLU(True),
+             nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
+             nn.BatchNorm2d(n3x3),
+             nn.ReLU(True),
+         )
+ 
+         # 1x1 conv -> 5x5 conv branch
+         self.b3 = nn.Sequential(
+             nn.Conv2d(in_planes, n5x5red, kernel_size=1),
+             nn.BatchNorm2d(n5x5red),
+             nn.ReLU(True),
+             nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
+             nn.BatchNorm2d(n5x5),
+             nn.ReLU(True),
+             nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
+             nn.BatchNorm2d(n5x5),
+             nn.ReLU(True),
+         )
+ 
+         # 3x3 pool -> 1x1 conv branch
+         self.b4 = nn.Sequential(
+             nn.MaxPool2d(3, stride=1, padding=1),
+             nn.Conv2d(in_planes, pool_planes, kernel_size=1),
+             nn.BatchNorm2d(pool_planes),
+             nn.ReLU(True),
+         )
+ 
+     def forward(self, x):
+         y1 = self.b1(x)
+         y2 = self.b2(x)
+         y3 = self.b3(x)
+         y4 = self.b4(x)
+         return torch.cat([y1,y2,y3,y4], 1)
+ 
+ 
+ class GoogLeNet(nn.Module):
+     def __init__(self):
+         super(GoogLeNet, self).__init__()
+         self.pre_layers = nn.Sequential(
+             nn.Conv2d(3, 192, kernel_size=3, padding=1),
+             nn.BatchNorm2d(192),
+             nn.ReLU(True),
+         )
+ 
+         self.a3 = Inception(192,  64,  96, 128, 16, 32, 32)
+         self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
+ 
+         self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+ 
+         self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
+         self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
+         self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
+         self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
+         self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
+ 
+         self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
+         self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
+ 
+         self.avgpool = nn.AvgPool2d(8, stride=1)
+         self.linear = nn.Linear(1024, 10)
+ 
+     def forward(self, x):
+         out = self.pre_layers(x)
+         out = self.a3(out)
+         out = self.b3(out)
+         out = self.maxpool(out)
+         out = self.a4(out)
+         out = self.b4(out)
+         out = self.c4(out)
+         out = self.d4(out)
+         out = self.e4(out)
+         out = self.maxpool(out)
+         out = self.a5(out)
+         out = self.b5(out)
+         out = self.avgpool(out)
+         out = out.view(out.size(0), -1)
+         out = self.linear(out)
+         return out
+ 
+ 
+ def test():
+     net = GoogLeNet()
+     x = torch.randn(1,3,32,32)
+     y = net(x)
+     print(y.size())
+ 
+ # test()
--- a/source/models/lenet.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/lenet.py 0 → 100644
View file @b8f4c4e
+ '''LeNet in PyTorch.'''
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ class LeNet(nn.Module):
+     def __init__(self):
+         super(LeNet, self).__init__()
+         self.conv1 = nn.Conv2d(3, 6, 5)
+         self.conv2 = nn.Conv2d(6, 16, 5)
+         self.fc1   = nn.Linear(16*5*5, 120)
+         self.fc2   = nn.Linear(120, 84)
+         self.fc3   = nn.Linear(84, 10)
+ 
+     def forward(self, x):
+         out = F.relu(self.conv1(x))
+         out = F.max_pool2d(out, 2)
+         out = F.relu(self.conv2(out))
+         out = F.max_pool2d(out, 2)
+         out = out.view(out.size(0), -1)
+         out = F.relu(self.fc1(out))
+         out = F.relu(self.fc2(out))
+         out = self.fc3(out)
+         return out
--- a/source/models/mobilenet.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/mobilenet.py 0 → 100644
View file @b8f4c4e
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ class MobileNet1(nn.Module):
+     def __init__(self, inchannel=3, num_classes=10):
+         super(MobileNet1, self).__init__()
+         self.num_classes = num_classes
+ 
+         def conv_bn(inp, oup, stride):
+             return nn.Sequential(
+                 nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+                 nn.BatchNorm2d(oup),
+                 nn.Hardswish()
+                 #nn.Hardsigmoid(inplace=True)
+                 # nn.LeakyReLU(negative_slope=0.1, inplace=True)
+                 # nn.ReLU(inplace=True)
+             )
+ 
+         def conv_dw(inp, oup, stride):
+             return nn.Sequential(
+                 nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+                 nn.BatchNorm2d(inp),
+                 nn.Hardswish(),
+                 #nn.Hardsigmoid(inplace=True),
+                 # nn.LeakyReLU(negative_slope=0.1, inplace=True),
+                 # nn.ReLU(inplace=True),
+     
+                 nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+                 nn.BatchNorm2d(oup),
+                 nn.Hardswish()
+                 #nn.Hardsigmoid(inplace=True)
+                 # nn.LeakyReLU(negative_slope=0.1, inplace=True)
+                 # nn.ReLU(inplace=True),
+             )
+ 
+         self.model = nn.Sequential(
+             conv_bn(inchannel, 32, 1), 
+             conv_dw( 32,  64, 1),
+             conv_dw( 64, 128, 2),
+             conv_dw(128, 128, 1),
+             conv_dw(128, 256, 2),
+             conv_dw(256, 256, 1),
+             conv_dw(256, 512, 2),
+             conv_dw(512, 512, 1),
+             conv_dw(512, 512, 1),
+             conv_dw(512, 512, 1),
+             conv_dw(512, 512, 1),
+             conv_dw(512, 512, 1),
+             conv_dw(512, 1024, 2),
+             conv_dw(1024, 1024, 1),
+             nn.AdaptiveAvgPool2d(1)
+         )
+         self.fc = nn.Linear(1024, self.num_classes)
+ 
+ 
+     def forward(self, x):
+         x, act_scale = self.model(x)
+         # x = self.model(x)
+         x = x.view(x.size(0), -1)
+         # x = self.fc(x)
+         x = self.fc(x, act_scale)
+         return x
--- a/source/models/mobilenetv2.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/mobilenetv2.py 0 → 100644
View file @b8f4c4e
+ '''MobileNetV2 in PyTorch.
+ 
+ See the paper "Inverted Residuals and Linear Bottlenecks:
+ Mobile Networks for Classification, Detection and Segmentation" for more details.
+ '''
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ class Block(nn.Module):
+     '''expand + depthwise + pointwise'''
+     def __init__(self, in_planes, out_planes, expansion, stride):
+         super(Block, self).__init__()
+         self.stride = stride
+ 
+         planes = expansion * in_planes
+         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
+         self.bn1 = nn.BatchNorm2d(planes)
+         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
+         self.bn2 = nn.BatchNorm2d(planes)
+         self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+         self.bn3 = nn.BatchNorm2d(out_planes)
+ 
+         self.shortcut = nn.Sequential()
+         if stride == 1 and in_planes != out_planes:
+             self.shortcut = nn.Sequential(
+                 nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
+                 nn.BatchNorm2d(out_planes),
+             )
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = F.relu(self.bn2(self.conv2(out)))
+         out = self.bn3(self.conv3(out))
+         out = out + self.shortcut(x) if self.stride==1 else out
+         return out
+ 
+ 
+ class MobileNetV2(nn.Module):
+     # (expansion, out_planes, num_blocks, stride)
+     cfg = [(1,  16, 1, 1),
+            (6,  24, 2, 1),  # NOTE: change stride 2 -> 1 for CIFAR10
+            (6,  32, 3, 2),
+            (6,  64, 4, 2),
+            (6,  96, 3, 1),
+            (6, 160, 3, 2),
+            (6, 320, 1, 1)]
+ 
+     def __init__(self, num_classes=10):
+         super(MobileNetV2, self).__init__()
+         # NOTE: change conv1 stride 2 -> 1 for CIFAR10
+         self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(32)
+         self.layers = self._make_layers(in_planes=32)
+         self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
+         self.bn2 = nn.BatchNorm2d(1280)
+         self.linear = nn.Linear(1280, num_classes)
+ 
+     def _make_layers(self, in_planes):
+         layers = []
+         for expansion, out_planes, num_blocks, stride in self.cfg:
+             strides = [stride] + [1]*(num_blocks-1)
+             for stride in strides:
+                 layers.append(Block(in_planes, out_planes, expansion, stride))
+                 in_planes = out_planes
+         return nn.Sequential(*layers)
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = self.layers(out)
+         out = F.relu(self.bn2(self.conv2(out)))
+         # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10
+         out = F.avg_pool2d(out, 4)
+         out = out.view(out.size(0), -1)
+         out = self.linear(out)
+         return out
+ 
+ 
+ def test():
+     net = MobileNetV2()
+     x = torch.randn(2,3,32,32)
+     y = net(x)
+     print(y.size())
+ 
+ # test()
--- a/source/models/pnasnet.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/pnasnet.py 0 → 100644
View file @b8f4c4e
+ '''PNASNet in PyTorch.
+ 
+ Paper: Progressive Neural Architecture Search
+ '''
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ class SepConv(nn.Module):
+     '''Separable Convolution.'''
+     def __init__(self, in_planes, out_planes, kernel_size, stride):
+         super(SepConv, self).__init__()
+         self.conv1 = nn.Conv2d(in_planes, out_planes,
+                                kernel_size, stride,
+                                padding=(kernel_size-1)//2,
+                                bias=False, groups=in_planes)
+         self.bn1 = nn.BatchNorm2d(out_planes)
+ 
+     def forward(self, x):
+         return self.bn1(self.conv1(x))
+ 
+ 
+ class CellA(nn.Module):
+     def __init__(self, in_planes, out_planes, stride=1):
+         super(CellA, self).__init__()
+         self.stride = stride
+         self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
+         if stride==2:
+             self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+             self.bn1 = nn.BatchNorm2d(out_planes)
+ 
+     def forward(self, x):
+         y1 = self.sep_conv1(x)
+         y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
+         if self.stride==2:
+             y2 = self.bn1(self.conv1(y2))
+         return F.relu(y1+y2)
+ 
+ class CellB(nn.Module):
+     def __init__(self, in_planes, out_planes, stride=1):
+         super(CellB, self).__init__()
+         self.stride = stride
+         # Left branch
+         self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
+         self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
+         # Right branch
+         self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
+         if stride==2:
+             self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+             self.bn1 = nn.BatchNorm2d(out_planes)
+         # Reduce channels
+         self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+         self.bn2 = nn.BatchNorm2d(out_planes)
+ 
+     def forward(self, x):
+         # Left branch
+         y1 = self.sep_conv1(x)
+         y2 = self.sep_conv2(x)
+         # Right branch
+         y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
+         if self.stride==2:
+             y3 = self.bn1(self.conv1(y3))
+         y4 = self.sep_conv3(x)
+         # Concat & reduce channels
+         b1 = F.relu(y1+y2)
+         b2 = F.relu(y3+y4)
+         y = torch.cat([b1,b2], 1)
+         return F.relu(self.bn2(self.conv2(y)))
+ 
+ class PNASNet(nn.Module):
+     def __init__(self, cell_type, num_cells, num_planes):
+         super(PNASNet, self).__init__()
+         self.in_planes = num_planes
+         self.cell_type = cell_type
+ 
+         self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(num_planes)
+ 
+         self.layer1 = self._make_layer(num_planes, num_cells=6)
+         self.layer2 = self._downsample(num_planes*2)
+         self.layer3 = self._make_layer(num_planes*2, num_cells=6)
+         self.layer4 = self._downsample(num_planes*4)
+         self.layer5 = self._make_layer(num_planes*4, num_cells=6)
+ 
+         self.linear = nn.Linear(num_planes*4, 10)
+ 
+     def _make_layer(self, planes, num_cells):
+         layers = []
+         for _ in range(num_cells):
+             layers.append(self.cell_type(self.in_planes, planes, stride=1))
+             self.in_planes = planes
+         return nn.Sequential(*layers)
+ 
+     def _downsample(self, planes):
+         layer = self.cell_type(self.in_planes, planes, stride=2)
+         self.in_planes = planes
+         return layer
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = self.layer1(out)
+         out = self.layer2(out)
+         out = self.layer3(out)
+         out = self.layer4(out)
+         out = self.layer5(out)
+         out = F.avg_pool2d(out, 8)
+         out = self.linear(out.view(out.size(0), -1))
+         return out
+ 
+ 
+ def PNASNetA():
+     return PNASNet(CellA, num_cells=6, num_planes=44)
+ 
+ def PNASNetB():
+     return PNASNet(CellB, num_cells=6, num_planes=32)
+ 
+ 
+ def test():
+     net = PNASNetB()
+     x = torch.randn(1,3,32,32)
+     y = net(x)
+     print(y)
+ 
+ # test()
--- a/source/models/preact_resnet.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/preact_resnet.py 0 → 100644
View file @b8f4c4e
+ '''Pre-activation ResNet in PyTorch.
+ 
+ Reference:
+ [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+     Identity Mappings in Deep Residual Networks. arXiv:1603.05027
+ '''
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ class PreActBlock(nn.Module):
+     '''Pre-activation version of the BasicBlock.'''
+     expansion = 1
+ 
+     def __init__(self, in_planes, planes, stride=1):
+         super(PreActBlock, self).__init__()
+         self.bn1 = nn.BatchNorm2d(in_planes)
+         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+         self.bn2 = nn.BatchNorm2d(planes)
+         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+ 
+         if stride != 1 or in_planes != self.expansion*planes:
+             self.shortcut = nn.Sequential(
+                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
+             )
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(x))
+         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
+         out = self.conv1(out)
+         out = self.conv2(F.relu(self.bn2(out)))
+         out += shortcut
+         return out
+ 
+ 
+ class PreActBottleneck(nn.Module):
+     '''Pre-activation version of the original Bottleneck module.'''
+     expansion = 4
+ 
+     def __init__(self, in_planes, planes, stride=1):
+         super(PreActBottleneck, self).__init__()
+         self.bn1 = nn.BatchNorm2d(in_planes)
+         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+         self.bn2 = nn.BatchNorm2d(planes)
+         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+         self.bn3 = nn.BatchNorm2d(planes)
+         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
+ 
+         if stride != 1 or in_planes != self.expansion*planes:
+             self.shortcut = nn.Sequential(
+                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
+             )
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(x))
+         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
+         out = self.conv1(out)
+         out = self.conv2(F.relu(self.bn2(out)))
+         out = self.conv3(F.relu(self.bn3(out)))
+         out += shortcut
+         return out
+ 
+ 
+ class PreActResNet(nn.Module):
+     def __init__(self, block, num_blocks, num_classes=10):
+         super(PreActResNet, self).__init__()
+         self.in_planes = 64
+ 
+         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+         self.linear = nn.Linear(512*block.expansion, num_classes)
+ 
+     def _make_layer(self, block, planes, num_blocks, stride):
+         strides = [stride] + [1]*(num_blocks-1)
+         layers = []
+         for stride in strides:
+             layers.append(block(self.in_planes, planes, stride))
+             self.in_planes = planes * block.expansion
+         return nn.Sequential(*layers)
+ 
+     def forward(self, x):
+         out = self.conv1(x)
+         out = self.layer1(out)
+         out = self.layer2(out)
+         out = self.layer3(out)
+         out = self.layer4(out)
+         out = F.avg_pool2d(out, 4)
+         out = out.view(out.size(0), -1)
+         out = self.linear(out)
+         return out
+ 
+ 
+ def PreActResNet18():
+     return PreActResNet(PreActBlock, [2,2,2,2])
+ 
+ def PreActResNet34():
+     return PreActResNet(PreActBlock, [3,4,6,3])
+ 
+ def PreActResNet50():
+     return PreActResNet(PreActBottleneck, [3,4,6,3])
+ 
+ def PreActResNet101():
+     return PreActResNet(PreActBottleneck, [3,4,23,3])
+ 
+ def PreActResNet152():
+     return PreActResNet(PreActBottleneck, [3,8,36,3])
+ 
+ 
+ def test():
+     net = PreActResNet18()
+     y = net((torch.randn(1,3,32,32)))
+     print(y.size())
+ 
+ # test()
--- a/source/models/regnet.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/regnet.py 0 → 100644
View file @b8f4c4e
+ '''RegNet in PyTorch.
+ 
+ Paper: "Designing Network Design Spaces".
+ 
+ Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
+ '''
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ class SE(nn.Module):
+     '''Squeeze-and-Excitation block.'''
+ 
+     def __init__(self, in_planes, se_planes):
+         super(SE, self).__init__()
+         self.se1 = nn.Conv2d(in_planes, se_planes, kernel_size=1, bias=True)
+         self.se2 = nn.Conv2d(se_planes, in_planes, kernel_size=1, bias=True)
+ 
+     def forward(self, x):
+         out = F.adaptive_avg_pool2d(x, (1, 1))
+         out = F.relu(self.se1(out))
+         out = self.se2(out).sigmoid()
+         out = x * out
+         return out
+ 
+ 
+ class Block(nn.Module):
+     def __init__(self, w_in, w_out, stride, group_width, bottleneck_ratio, se_ratio):
+         super(Block, self).__init__()
+         # 1x1
+         w_b = int(round(w_out * bottleneck_ratio))
+         self.conv1 = nn.Conv2d(w_in, w_b, kernel_size=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(w_b)
+         # 3x3
+         num_groups = w_b // group_width
+         self.conv2 = nn.Conv2d(w_b, w_b, kernel_size=3,
+                                stride=stride, padding=1, groups=num_groups, bias=False)
+         self.bn2 = nn.BatchNorm2d(w_b)
+         # se
+         self.with_se = se_ratio > 0
+         if self.with_se:
+             w_se = int(round(w_in * se_ratio))
+             self.se = SE(w_b, w_se)
+         # 1x1
+         self.conv3 = nn.Conv2d(w_b, w_out, kernel_size=1, bias=False)
+         self.bn3 = nn.BatchNorm2d(w_out)
+ 
+         self.shortcut = nn.Sequential()
+         if stride != 1 or w_in != w_out:
+             self.shortcut = nn.Sequential(
+                 nn.Conv2d(w_in, w_out,
+                           kernel_size=1, stride=stride, bias=False),
+                 nn.BatchNorm2d(w_out)
+             )
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = F.relu(self.bn2(self.conv2(out)))
+         if self.with_se:
+             out = self.se(out)
+         out = self.bn3(self.conv3(out))
+         out += self.shortcut(x)
+         out = F.relu(out)
+         return out
+ 
+ 
+ class RegNet(nn.Module):
+     def __init__(self, cfg, num_classes=10):
+         super(RegNet, self).__init__()
+         self.cfg = cfg
+         self.in_planes = 64
+         self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
+                                stride=1, padding=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(64)
+         self.layer1 = self._make_layer(0)
+         self.layer2 = self._make_layer(1)
+         self.layer3 = self._make_layer(2)
+         self.layer4 = self._make_layer(3)
+         self.linear = nn.Linear(self.cfg['widths'][-1], num_classes)
+ 
+     def _make_layer(self, idx):
+         depth = self.cfg['depths'][idx]
+         width = self.cfg['widths'][idx]
+         stride = self.cfg['strides'][idx]
+         group_width = self.cfg['group_width']
+         bottleneck_ratio = self.cfg['bottleneck_ratio']
+         se_ratio = self.cfg['se_ratio']
+ 
+         layers = []
+         for i in range(depth):
+             s = stride if i == 0 else 1
+             layers.append(Block(self.in_planes, width,
+                                 s, group_width, bottleneck_ratio, se_ratio))
+             self.in_planes = width
+         return nn.Sequential(*layers)
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = self.layer1(out)
+         out = self.layer2(out)
+         out = self.layer3(out)
+         out = self.layer4(out)
+         out = F.adaptive_avg_pool2d(out, (1, 1))
+         out = out.view(out.size(0), -1)
+         out = self.linear(out)
+         return out
+ 
+ 
+ def RegNetX_200MF():
+     cfg = {
+         'depths': [1, 1, 4, 7],
+         'widths': [24, 56, 152, 368],
+         'strides': [1, 1, 2, 2],
+         'group_width': 8,
+         'bottleneck_ratio': 1,
+         'se_ratio': 0,
+     }
+     return RegNet(cfg)
+ 
+ 
+ def RegNetX_400MF():
+     cfg = {
+         'depths': [1, 2, 7, 12],
+         'widths': [32, 64, 160, 384],
+         'strides': [1, 1, 2, 2],
+         'group_width': 16,
+         'bottleneck_ratio': 1,
+         'se_ratio': 0,
+     }
+     return RegNet(cfg)
+ 
+ 
+ def RegNetY_400MF():
+     cfg = {
+         'depths': [1, 2, 7, 12],
+         'widths': [32, 64, 160, 384],
+         'strides': [1, 1, 2, 2],
+         'group_width': 16,
+         'bottleneck_ratio': 1,
+         'se_ratio': 0.25,
+     }
+     return RegNet(cfg)
+ 
+ 
+ def test():
+     net = RegNetX_200MF()
+     print(net)
+     x = torch.randn(2, 3, 32, 32)
+     y = net(x)
+     print(y.shape)
+ 
+ 
+ if __name__ == '__main__':
+     test()
--- a/source/models/resnet.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/resnet.py 0 → 100644
View file @b8f4c4e
+ '''ResNet in PyTorch.
+ 
+ For Pre-activation ResNet, see 'preact_resnet.py'.
+ 
+ Reference:
+ [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+     Deep Residual Learning for Image Recognition. arXiv:1512.03385
+ '''
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ class BasicBlock(nn.Module):
+     expansion = 1
+ 
+     def __init__(self, in_planes, planes, stride=1):
+         super(BasicBlock, self).__init__()
+         self.conv1 = nn.Conv2d(
+             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(planes)
+         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                                stride=1, padding=1, bias=False)
+         self.bn2 = nn.BatchNorm2d(planes)
+ 
+         self.shortcut = nn.Sequential()
+         if stride != 1 or in_planes != self.expansion*planes:
+             self.shortcut = nn.Sequential(
+                 nn.Conv2d(in_planes, self.expansion*planes,
+                           kernel_size=1, stride=stride, bias=False),
+                 nn.BatchNorm2d(self.expansion*planes)
+             )
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = self.bn2(self.conv2(out))
+         out += self.shortcut(x)
+         out = F.relu(out)
+         return out
+ 
+ 
+ class Bottleneck(nn.Module):
+     expansion = 4
+ 
+     def __init__(self, in_planes, planes, stride=1):
+         super(Bottleneck, self).__init__()
+         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(planes)
+         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                                stride=stride, padding=1, bias=False)
+         self.bn2 = nn.BatchNorm2d(planes)
+         self.conv3 = nn.Conv2d(planes, self.expansion *
+                                planes, kernel_size=1, bias=False)
+         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
+ 
+         self.shortcut = nn.Sequential()
+         if stride != 1 or in_planes != self.expansion*planes:
+             self.shortcut = nn.Sequential(
+                 nn.Conv2d(in_planes, self.expansion*planes,
+                           kernel_size=1, stride=stride, bias=False),
+                 nn.BatchNorm2d(self.expansion*planes)
+             )
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = F.relu(self.bn2(self.conv2(out)))
+         out = self.bn3(self.conv3(out))
+         out += self.shortcut(x)
+         out = F.relu(out)
+         return out
+ 
+ 
+ class ResNet(nn.Module):
+     def __init__(self, block, num_blocks, num_classes=10):
+         super(ResNet, self).__init__()
+         self.in_planes = 64
+ 
+         self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
+                                stride=1, padding=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(64)
+         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+         self.linear = nn.Linear(512*block.expansion, num_classes)
+ 
+     def _make_layer(self, block, planes, num_blocks, stride):
+         strides = [stride] + [1]*(num_blocks-1)
+         layers = []
+         for stride in strides:
+             layers.append(block(self.in_planes, planes, stride))
+             self.in_planes = planes * block.expansion
+         return nn.Sequential(*layers)
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = self.layer1(out)
+         out = self.layer2(out)
+         out = self.layer3(out)
+         out = self.layer4(out)
+         out = F.avg_pool2d(out, 4)
+         out = out.view(out.size(0), -1)
+         out = self.linear(out)
+         return out
+ 
+ 
+ def ResNet18():
+     return ResNet(BasicBlock, [2, 2, 2, 2])
+ 
+ 
+ def ResNet34():
+     return ResNet(BasicBlock, [3, 4, 6, 3])
+ 
+ 
+ def ResNet50():
+     return ResNet(Bottleneck, [3, 4, 6, 3])
+ 
+ 
+ def ResNet101():
+     return ResNet(Bottleneck, [3, 4, 23, 3])
+ 
+ 
+ def ResNet152():
+     return ResNet(Bottleneck, [3, 8, 36, 3])
+ 
+ 
+ def test():
+     net = ResNet18()
+     y = net(torch.randn(1, 3, 32, 32))
+     print(y.size())
+ 
+ # test()
--- a/source/models/resnext.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/resnext.py 0 → 100644
View file @b8f4c4e
+ '''ResNeXt in PyTorch.
+ 
+ See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
+ '''
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ class Block(nn.Module):
+     '''Grouped convolution block.'''
+     expansion = 2
+ 
+     def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
+         super(Block, self).__init__()
+         group_width = cardinality * bottleneck_width
+         self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(group_width)
+         self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
+         self.bn2 = nn.BatchNorm2d(group_width)
+         self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
+         self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
+ 
+         self.shortcut = nn.Sequential()
+         if stride != 1 or in_planes != self.expansion*group_width:
+             self.shortcut = nn.Sequential(
+                 nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
+                 nn.BatchNorm2d(self.expansion*group_width)
+             )
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = F.relu(self.bn2(self.conv2(out)))
+         out = self.bn3(self.conv3(out))
+         out += self.shortcut(x)
+         out = F.relu(out)
+         return out
+ 
+ 
+ class ResNeXt(nn.Module):
+     def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
+         super(ResNeXt, self).__init__()
+         self.cardinality = cardinality
+         self.bottleneck_width = bottleneck_width
+         self.in_planes = 64
+ 
+         self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(64)
+         self.layer1 = self._make_layer(num_blocks[0], 1)
+         self.layer2 = self._make_layer(num_blocks[1], 2)
+         self.layer3 = self._make_layer(num_blocks[2], 2)
+         # self.layer4 = self._make_layer(num_blocks[3], 2)
+         self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
+ 
+     def _make_layer(self, num_blocks, stride):
+         strides = [stride] + [1]*(num_blocks-1)
+         layers = []
+         for stride in strides:
+             layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
+             self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
+         # Increase bottleneck_width by 2 after each stage.
+         self.bottleneck_width *= 2
+         return nn.Sequential(*layers)
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = self.layer1(out)
+         out = self.layer2(out)
+         out = self.layer3(out)
+         # out = self.layer4(out)
+         out = F.avg_pool2d(out, 8)
+         out = out.view(out.size(0), -1)
+         out = self.linear(out)
+         return out
+ 
+ 
+ def ResNeXt29_2x64d():
+     return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)
+ 
+ def ResNeXt29_4x64d():
+     return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64)
+ 
+ def ResNeXt29_8x64d():
+     return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64)
+ 
+ def ResNeXt29_32x4d():
+     return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4)
+ 
+ def test_resnext():
+     net = ResNeXt29_2x64d()
+     x = torch.randn(1,3,32,32)
+     y = net(x)
+     print(y.size())
+ 
+ # test_resnext()
--- a/source/models/senet.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/senet.py 0 → 100644
View file @b8f4c4e
+ '''SENet in PyTorch.
+ 
+ SENet is the winner of ImageNet-2017. The paper is not released yet.
+ '''
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ class BasicBlock(nn.Module):
+     def __init__(self, in_planes, planes, stride=1):
+         super(BasicBlock, self).__init__()
+         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(planes)
+         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+         self.bn2 = nn.BatchNorm2d(planes)
+ 
+         self.shortcut = nn.Sequential()
+         if stride != 1 or in_planes != planes:
+             self.shortcut = nn.Sequential(
+                 nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
+                 nn.BatchNorm2d(planes)
+             )
+ 
+         # SE layers
+         self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)  # Use nn.Conv2d instead of nn.Linear
+         self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = self.bn2(self.conv2(out))
+ 
+         # Squeeze
+         w = F.avg_pool2d(out, out.size(2))
+         w = F.relu(self.fc1(w))
+         w = F.sigmoid(self.fc2(w))
+         # Excitation
+         out = out * w  # New broadcasting feature from v0.2!
+ 
+         out += self.shortcut(x)
+         out = F.relu(out)
+         return out
+ 
+ 
+ class PreActBlock(nn.Module):
+     def __init__(self, in_planes, planes, stride=1):
+         super(PreActBlock, self).__init__()
+         self.bn1 = nn.BatchNorm2d(in_planes)
+         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+         self.bn2 = nn.BatchNorm2d(planes)
+         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+ 
+         if stride != 1 or in_planes != planes:
+             self.shortcut = nn.Sequential(
+                 nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False)
+             )
+ 
+         # SE layers
+         self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)
+         self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(x))
+         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
+         out = self.conv1(out)
+         out = self.conv2(F.relu(self.bn2(out)))
+ 
+         # Squeeze
+         w = F.avg_pool2d(out, out.size(2))
+         w = F.relu(self.fc1(w))
+         w = F.sigmoid(self.fc2(w))
+         # Excitation
+         out = out * w
+ 
+         out += shortcut
+         return out
+ 
+ 
+ class SENet(nn.Module):
+     def __init__(self, block, num_blocks, num_classes=10):
+         super(SENet, self).__init__()
+         self.in_planes = 64
+ 
+         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(64)
+         self.layer1 = self._make_layer(block,  64, num_blocks[0], stride=1)
+         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+         self.linear = nn.Linear(512, num_classes)
+ 
+     def _make_layer(self, block, planes, num_blocks, stride):
+         strides = [stride] + [1]*(num_blocks-1)
+         layers = []
+         for stride in strides:
+             layers.append(block(self.in_planes, planes, stride))
+             self.in_planes = planes
+         return nn.Sequential(*layers)
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = self.layer1(out)
+         out = self.layer2(out)
+         out = self.layer3(out)
+         out = self.layer4(out)
+         out = F.avg_pool2d(out, 4)
+         out = out.view(out.size(0), -1)
+         out = self.linear(out)
+         return out
+ 
+ 
+ def SENet18():
+     return SENet(PreActBlock, [2,2,2,2])
+ 
+ 
+ def test():
+     net = SENet18()
+     y = net(torch.randn(1,3,32,32))
+     print(y.size())
+ 
+ # test()
--- a/source/models/shufflenet.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/shufflenet.py 0 → 100644
View file @b8f4c4e
+ '''ShuffleNet in PyTorch.
+ 
+ See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
+ '''
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ class ShuffleBlock(nn.Module):
+     def __init__(self, groups):
+         super(ShuffleBlock, self).__init__()
+         self.groups = groups
+ 
+     def forward(self, x):
+         '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
+         N,C,H,W = x.size()
+         g = self.groups
+         return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W)
+ 
+ 
+ class Bottleneck(nn.Module):
+     def __init__(self, in_planes, out_planes, stride, groups):
+         super(Bottleneck, self).__init__()
+         self.stride = stride
+ 
+         mid_planes = out_planes/4
+         g = 1 if in_planes==24 else groups
+         self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
+         self.bn1 = nn.BatchNorm2d(mid_planes)
+         self.shuffle1 = ShuffleBlock(groups=g)
+         self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
+         self.bn2 = nn.BatchNorm2d(mid_planes)
+         self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
+         self.bn3 = nn.BatchNorm2d(out_planes)
+ 
+         self.shortcut = nn.Sequential()
+         if stride == 2:
+             self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = self.shuffle1(out)
+         out = F.relu(self.bn2(self.conv2(out)))
+         out = self.bn3(self.conv3(out))
+         res = self.shortcut(x)
+         out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
+         return out
+ 
+ 
+ class ShuffleNet(nn.Module):
+     def __init__(self, cfg):
+         super(ShuffleNet, self).__init__()
+         out_planes = cfg['out_planes']
+         num_blocks = cfg['num_blocks']
+         groups = cfg['groups']
+ 
+         self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(24)
+         self.in_planes = 24
+         self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
+         self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
+         self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
+         self.linear = nn.Linear(out_planes[2], 10)
+ 
+     def _make_layer(self, out_planes, num_blocks, groups):
+         layers = []
+         for i in range(num_blocks):
+             stride = 2 if i == 0 else 1
+             cat_planes = self.in_planes if i == 0 else 0
+             layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
+             self.in_planes = out_planes
+         return nn.Sequential(*layers)
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         out = self.layer1(out)
+         out = self.layer2(out)
+         out = self.layer3(out)
+         out = F.avg_pool2d(out, 4)
+         out = out.view(out.size(0), -1)
+         out = self.linear(out)
+         return out
+ 
+ 
+ def ShuffleNetG2():
+     cfg = {
+         'out_planes': [200,400,800],
+         'num_blocks': [4,8,4],
+         'groups': 2
+     }
+     return ShuffleNet(cfg)
+ 
+ def ShuffleNetG3():
+     cfg = {
+         'out_planes': [240,480,960],
+         'num_blocks': [4,8,4],
+         'groups': 3
+     }
+     return ShuffleNet(cfg)
+ 
+ 
+ def test():
+     net = ShuffleNetG2()
+     x = torch.randn(1,3,32,32)
+     y = net(x)
+     print(y)
+ 
+ # test()
--- a/source/models/shufflenetv2.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/shufflenetv2.py 0 → 100644
View file @b8f4c4e
+ '''ShuffleNetV2 in PyTorch.
+ 
+ See the paper "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" for more details.
+ '''
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ 
+ 
+ class ShuffleBlock(nn.Module):
+     def __init__(self, groups=2):
+         super(ShuffleBlock, self).__init__()
+         self.groups = groups
+ 
+     def forward(self, x):
+         '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
+         N, C, H, W = x.size()
+         g = self.groups
+         return x.view(N, g, C//g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
+ 
+ 
+ class SplitBlock(nn.Module):
+     def __init__(self, ratio):
+         super(SplitBlock, self).__init__()
+         self.ratio = ratio
+ 
+     def forward(self, x):
+         c = int(x.size(1) * self.ratio)
+         return x[:, :c, :, :], x[:, c:, :, :]
+ 
+ 
+ class BasicBlock(nn.Module):
+     def __init__(self, in_channels, split_ratio=0.5):
+         super(BasicBlock, self).__init__()
+         self.split = SplitBlock(split_ratio)
+         in_channels = int(in_channels * split_ratio)
+         self.conv1 = nn.Conv2d(in_channels, in_channels,
+                                kernel_size=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(in_channels)
+         self.conv2 = nn.Conv2d(in_channels, in_channels,
+                                kernel_size=3, stride=1, padding=1, groups=in_channels, bias=False)
+         self.bn2 = nn.BatchNorm2d(in_channels)
+         self.conv3 = nn.Conv2d(in_channels, in_channels,
+                                kernel_size=1, bias=False)
+         self.bn3 = nn.BatchNorm2d(in_channels)
+         self.shuffle = ShuffleBlock()
+ 
+     def forward(self, x):
+         x1, x2 = self.split(x)
+         out = F.relu(self.bn1(self.conv1(x2)))
+         out = self.bn2(self.conv2(out))
+         out = F.relu(self.bn3(self.conv3(out)))
+         out = torch.cat([x1, out], 1)
+         out = self.shuffle(out)
+         return out
+ 
+ 
+ class DownBlock(nn.Module):
+     def __init__(self, in_channels, out_channels):
+         super(DownBlock, self).__init__()
+         mid_channels = out_channels // 2
+         # left
+         self.conv1 = nn.Conv2d(in_channels, in_channels,
+                                kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False)
+         self.bn1 = nn.BatchNorm2d(in_channels)
+         self.conv2 = nn.Conv2d(in_channels, mid_channels,
+                                kernel_size=1, bias=False)
+         self.bn2 = nn.BatchNorm2d(mid_channels)
+         # right
+         self.conv3 = nn.Conv2d(in_channels, mid_channels,
+                                kernel_size=1, bias=False)
+         self.bn3 = nn.BatchNorm2d(mid_channels)
+         self.conv4 = nn.Conv2d(mid_channels, mid_channels,
+                                kernel_size=3, stride=2, padding=1, groups=mid_channels, bias=False)
+         self.bn4 = nn.BatchNorm2d(mid_channels)
+         self.conv5 = nn.Conv2d(mid_channels, mid_channels,
+                                kernel_size=1, bias=False)
+         self.bn5 = nn.BatchNorm2d(mid_channels)
+ 
+         self.shuffle = ShuffleBlock()
+ 
+     def forward(self, x):
+         # left
+         out1 = self.bn1(self.conv1(x))
+         out1 = F.relu(self.bn2(self.conv2(out1)))
+         # right
+         out2 = F.relu(self.bn3(self.conv3(x)))
+         out2 = self.bn4(self.conv4(out2))
+         out2 = F.relu(self.bn5(self.conv5(out2)))
+         # concat
+         out = torch.cat([out1, out2], 1)
+         out = self.shuffle(out)
+         return out
+ 
+ 
+ class ShuffleNetV2(nn.Module):
+     def __init__(self, net_size):
+         super(ShuffleNetV2, self).__init__()
+         out_channels = configs[net_size]['out_channels']
+         num_blocks = configs[net_size]['num_blocks']
+ 
+         self.conv1 = nn.Conv2d(3, 24, kernel_size=3,
+                                stride=1, padding=1, bias=False)
+         self.bn1 = nn.BatchNorm2d(24)
+         self.in_channels = 24
+         self.layer1 = self._make_layer(out_channels[0], num_blocks[0])
+         self.layer2 = self._make_layer(out_channels[1], num_blocks[1])
+         self.layer3 = self._make_layer(out_channels[2], num_blocks[2])
+         self.conv2 = nn.Conv2d(out_channels[2], out_channels[3],
+                                kernel_size=1, stride=1, padding=0, bias=False)
+         self.bn2 = nn.BatchNorm2d(out_channels[3])
+         self.linear = nn.Linear(out_channels[3], 10)
+ 
+     def _make_layer(self, out_channels, num_blocks):
+         layers = [DownBlock(self.in_channels, out_channels)]
+         for i in range(num_blocks):
+             layers.append(BasicBlock(out_channels))
+             self.in_channels = out_channels
+         return nn.Sequential(*layers)
+ 
+     def forward(self, x):
+         out = F.relu(self.bn1(self.conv1(x)))
+         # out = F.max_pool2d(out, 3, stride=2, padding=1)
+         out = self.layer1(out)
+         out = self.layer2(out)
+         out = self.layer3(out)
+         out = F.relu(self.bn2(self.conv2(out)))
+         out = F.avg_pool2d(out, 4)
+         out = out.view(out.size(0), -1)
+         out = self.linear(out)
+         return out
+ 
+ 
+ configs = {
+     0.5: {
+         'out_channels': (48, 96, 192, 1024),
+         'num_blocks': (3, 7, 3)
+     },
+ 
+     1: {
+         'out_channels': (116, 232, 464, 1024),
+         'num_blocks': (3, 7, 3)
+     },
+     1.5: {
+         'out_channels': (176, 352, 704, 1024),
+         'num_blocks': (3, 7, 3)
+     },
+     2: {
+         'out_channels': (224, 488, 976, 2048),
+         'num_blocks': (3, 7, 3)
+     }
+ }
+ 
+ 
+ def test():
+     net = ShuffleNetV2(net_size=0.5)
+     x = torch.randn(3, 3, 32, 32)
+     y = net(x)
+     print(y.shape)
+ 
+ 
+ # test()
--- a/source/models/vgg.py 0 → 100644
View file @b8f4c4e
+++ b/source/models/vgg.py 0 → 100644
View file @b8f4c4e
+ '''VGG11/13/16/19 in Pytorch.'''
+ import torch
+ import torch.nn as nn
+ 
+ 
+ cfg = {
+     'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+     'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+     'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
+     'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
+ }
+ 
+ 
+ class VGG(nn.Module):
+     def __init__(self, vgg_name):
+         super(VGG, self).__init__()
+         self.features = self._make_layers(cfg[vgg_name])
+         self.classifier = nn.Linear(512, 10)
+ 
+     def forward(self, x):
+         out = self.features(x)
+         out = out.view(out.size(0), -1)
+         out = self.classifier(out)
+         return out
+ 
+     def _make_layers(self, cfg):
+         layers = []
+         in_channels = 3
+         for x in cfg:
+             if x == 'M':
+                 layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
+             else:
+                 layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
+                            nn.BatchNorm2d(x),
+                            nn.ReLU(inplace=True)]
+                 in_channels = x
+         layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
+         return nn.Sequential(*layers)
+ 
+ 
+ def test():
+     net = VGG('VGG11')
+     x = torch.randn(2,3,32,32)
+     y = net(x)
+     print(y.size())
+ 
+ # test()
--- a/source/replace.py 0 → 100644
View file @b8f4c4e
+++ b/source/replace.py 0 → 100644
View file @b8f4c4e
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ import math
+ from lsq_sq import *
+ from models.mobilenet import *
+ 
+ conv_idx = -1
+ act_idx = -1
+ former_conv = None
+ 
+ def replace_sq(model, bit_width=8):
+     global conv_idx, act_idx
+     
+     for name, module in model.named_children():
+         if isinstance(module, (nn.Sequential)): #- conventional
+             replace_sq(model.__dict__['_modules'][name], bit_width)
+ 
+         elif isinstance(module, nn.Conv2d):
+             former_conv = name
+             conv_idx += 1
+             bias = False if module.bias is None else True
+ 
+             model.__dict__['_modules'][name] = FuseConv2dQ(module.in_channels, module.out_channels,
+                                                         module.kernel_size, stride=module.stride,
+                                                         padding=module.padding, dilation=module.dilation,
+                                                         groups=module.groups, bias=bias, wbit=bit_width)
+             model.__dict__['_modules'][name].weight = module.weight
+             if bias:
+                 model.__dict__['_modules'][name].bias = module.bias
+ 
+         elif isinstance(module, nn.BatchNorm2d):
+             model.__dict__['_modules'][former_conv].replace_bn(module)
+             model.__dict__['_modules'][name] = nn.Identity()
+ 
+         elif isinstance(module, nn.ReLU):
+             act_idx += 1
+             model.__dict__['_modules'][name] = QReLU(abit=bit_width, inplace=False, dequantize=True)
+ 
+         elif isinstance(module, nn.Hardswish): 
+             act_idx += 1
+             model.__dict__['_modules'][name] = QHswish(abit=bit_width, inplace=False, dequantize=True)
+ 
+         elif isinstance(module, nn.Hardsigmoid): 
+             act_idx += 1
+             model.__dict__['_modules'][name] = QHsigmoid(abit=bit_width, inplace=False, dequantize=True)
+ 
+         elif isinstance(module, nn.LeakyReLU): 
+             act_idx += 1
+             model.__dict__['_modules'][name] = QLeakyReLU(abit=bit_width, inplace=False, dequantize=True)
+ 
+         elif isinstance(module, nn.Linear):           
+             bias = False if module.bias is None else True
+             model.__dict__['_modules'][name] = QLinear(module.in_features, module.out_features, bias, wbit=bit_width)
+             model.__dict__['_modules'][name].weight = module.weight
+             if bias:
+                 model.__dict__['_modules'][name].bias = module.bias
+ 
+         elif isinstance(module, nn.AdaptiveAvgPool2d):
+             model.__dict__['_modules'][name] = QAvgPool2d(abit=bit_width, dequantize=True, output_size=module.output_size)
+ 
+         # elif isinstance(module, BasicBlock) or isinstance(module, Bottleneck): #- ResNet support
+         #     replace_sq(model.__dict__['_modules'][name], bit_width)
+ 
+         # elif isinstance(module, InvertedResidual): #mv2
+         #     replace_sq(model.__dict__['_modules'][name], bit_width)
+ 
+         else:
+             model.__dict__['_modules'][name] = module
+ 
+     return model
--- a/source/replace_int.py 0 → 100644
View file @b8f4c4e
+++ b/source/replace_int.py 0 → 100644
View file @b8f4c4e
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+ import math
+ from lsq_int import *
+ from models.mobilenet import *
+ 
+ conv_idx = -1
+ act_idx = -1
+ former_conv = None
+ 
+ def replace_int(model, bit_width=8):
+     global conv_idx, act_idx
+     
+     for name, module in model.named_children():
+         if isinstance(module, (nn.Sequential)): #- conventional
+             replace_int(model.__dict__['_modules'][name], bit_width)
+ 
+         elif isinstance(module, nn.Conv2d):
+             former_conv = name
+             conv_idx += 1
+             bias = False if module.bias is None else True
+ 
+             model.__dict__['_modules'][name] = FuseConv2dQ(module.in_channels, module.out_channels,
+                                                         module.kernel_size, stride=module.stride,
+                                                         padding=module.padding, dilation=module.dilation,
+                                                         groups=module.groups, bias=bias, wbit=bit_width)
+             model.__dict__['_modules'][name].weight = module.weight
+             if bias:
+                 model.__dict__['_modules'][name].bias = module.bias
+ 
+         elif isinstance(module, nn.BatchNorm2d):
+             model.__dict__['_modules'][former_conv].replace_bn(module)
+             model.__dict__['_modules'][name] = nn.Identity()
+ 
+         elif isinstance(module, nn.ReLU):
+             act_idx += 1
+             model.__dict__['_modules'][name] = QReLU(abit=bit_width, inplace=False, dequantize=True)
+ 
+         elif isinstance(module, nn.Hardswish): 
+             act_idx += 1
+             model.__dict__['_modules'][name] = QHswish(abit=bit_width, inplace=False, dequantize=True)
+ 
+         elif isinstance(module, nn.LeakyReLU): 
+             act_idx += 1
+             model.__dict__['_modules'][name] = QLeakyReLU(abit=bit_width, inplace=False, dequantize=True)
+ 
+         elif isinstance(module, nn.Linear):           
+             bias = False if module.bias is None else True
+             model.__dict__['_modules'][name] = QLinear(module.in_features, module.out_features, bias, wbit=bit_width)
+             model.__dict__['_modules'][name].weight = module.weight
+             if bias:
+                 model.__dict__['_modules'][name].bias = module.bias
+ 
+         elif isinstance(module, nn.AdaptiveAvgPool2d):
+             model.__dict__['_modules'][name] = QAvgPool2d(abit=bit_width, dequantize=True, output_size=module.output_size)
+ 
+         # elif isinstance(module, BasicBlock) or isinstance(module, Bottleneck): #- ResNet support
+         #     replace_sq(model.__dict__['_modules'][name], bit_width)
+ 
+         # elif isinstance(module, InvertedResidual): #mv2
+         #     replace_sq(model.__dict__['_modules'][name], bit_width)
+ 
+         else:
+             model.__dict__['_modules'][name] = module
+ 
+     return model
--- a/source/utils.py 0 → 100644
View file @b8f4c4e
+++ b/source/utils.py 0 → 100644
View file @b8f4c4e
+ '''Some helper functions for PyTorch, including:
+     - get_mean_and_std: calculate the mean and std value of dataset.
+     - msr_init: net parameter initialization.
+     - progress_bar: progress bar mimic xlua.progress.
+ '''
+ import os
+ import sys
+ import time
+ import math
+ 
+ import torch
+ import torch.nn as nn
+ import torch.nn.init as init
+ 
+ 
+ def get_mean_and_std(dataset):
+     '''Compute the mean and std value of dataset.'''
+     dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
+     mean = torch.zeros(3)
+     std = torch.zeros(3)
+     print('==> Computing mean and std..')
+     for inputs, targets in dataloader:
+         for i in range(3):
+             mean[i] += inputs[:,i,:,:].mean()
+             std[i] += inputs[:,i,:,:].std()
+     mean.div_(len(dataset))
+     std.div_(len(dataset))
+     return mean, std
+ 
+ def init_params(net):
+     '''Init layer parameters.'''
+     for m in net.modules():
+         if isinstance(m, nn.Conv2d):
+             init.kaiming_normal(m.weight, mode='fan_out')
+             if m.bias:
+                 init.constant(m.bias, 0)
+         elif isinstance(m, nn.BatchNorm2d):
+             init.constant(m.weight, 1)
+             init.constant(m.bias, 0)
+         elif isinstance(m, nn.Linear):
+             init.normal(m.weight, std=1e-3)
+             if m.bias:
+                 init.constant(m.bias, 0)
+ 
+ 
+ _, term_width = os.popen('stty size', 'r').read().split()
+ term_width = int(term_width)
+ 
+ TOTAL_BAR_LENGTH = 65.
+ last_time = time.time()
+ begin_time = last_time
+ def progress_bar(current, total, msg=None):
+     global last_time, begin_time
+     if current == 0:
+         begin_time = time.time()  # Reset for new bar.
+ 
+     cur_len = int(TOTAL_BAR_LENGTH*current/total)
+     rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
+ 
+     sys.stdout.write(' [')
+     for i in range(cur_len):
+         sys.stdout.write('=')
+     sys.stdout.write('>')
+     for i in range(rest_len):
+         sys.stdout.write('.')
+     sys.stdout.write(']')
+ 
+     cur_time = time.time()
+     step_time = cur_time - last_time
+     last_time = cur_time
+     tot_time = cur_time - begin_time
+ 
+     L = []
+     L.append('  Step: %s' % format_time(step_time))
+     L.append(' | Tot: %s' % format_time(tot_time))
+     if msg:
+         L.append(' | ' + msg)
+ 
+     msg = ''.join(L)
+     sys.stdout.write(msg)
+     for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
+         sys.stdout.write(' ')
+ 
+     # Go back to the center of the bar.
+     for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
+         sys.stdout.write('\b')
+     sys.stdout.write(' %d/%d ' % (current+1, total))
+ 
+     if current < total-1:
+         sys.stdout.write('\r')
+     else:
+         sys.stdout.write('\n')
+     sys.stdout.flush()
+ 
+ def format_time(seconds):
+     days = int(seconds / 3600/24)
+     seconds = seconds - days*3600*24
+     hours = int(seconds / 3600)
+     seconds = seconds - hours*3600
+     minutes = int(seconds / 60)
+     seconds = seconds - minutes*60
+     secondsf = int(seconds)
+     seconds = seconds - secondsf
+     millis = int(seconds*1000)
+ 
+     f = ''
+     i = 1
+     if days > 0:
+         f += str(days) + 'D'
+         i += 1
+     if hours > 0 and i <= 2:
+         f += str(hours) + 'h'
+         i += 1
+     if minutes > 0 and i <= 2:
+         f += str(minutes) + 'm'
+         i += 1
+     if secondsf > 0 and i <= 2:
+         f += str(secondsf) + 's'
+         i += 1
+     if millis > 0 and i <= 2:
+         f += str(millis) + 'ms'
+         i += 1
+     if f == '':
+         f = '0ms'
+     return f
--- a/주간보고서/210322.docx 0 → 100644
View file @b8f4c4e
+++ b/주간보고서/210322.docx 0 → 100644
View file @b8f4c4e
--- a/주간보고서/210329.docx 0 → 100644
View file @b8f4c4e
+++ b/주간보고서/210329.docx 0 → 100644
View file @b8f4c4e
--- a/주간보고서/210405.docx 0 → 100644
View file @b8f4c4e
+++ b/주간보고서/210405.docx 0 → 100644
View file @b8f4c4e
--- a/주간보고서/210412.docx 0 → 100644
View file @b8f4c4e
+++ b/주간보고서/210412.docx 0 → 100644
View file @b8f4c4e
--- a/주간보고서/210419.docx 0 → 100644
View file @b8f4c4e
+++ b/주간보고서/210419.docx 0 → 100644
View file @b8f4c4e
--- a/주간보고서/210426.docx 0 → 100644
View file @b8f4c4e
+++ b/주간보고서/210426.docx 0 → 100644
View file @b8f4c4e
--- a/주간보고서/210503.docx 0 → 100644
View file @b8f4c4e
+++ b/주간보고서/210503.docx 0 → 100644
View file @b8f4c4e