fast_auto_augment.py
7.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import copy
import json
import time
import torch
import random
import torchvision.transforms as transforms
from torch.utils.data import Subset
from sklearn.model_selection import StratifiedShuffleSplit
from concurrent.futures import ProcessPoolExecutor
from transforms import *
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from utils import *
DEFALUT_CANDIDATES = [
ShearXY,
TranslateXY,
Rotate,
AutoContrast,
Invert,
Equalize,
Solarize,
Posterize,
Contrast,
Color,
Brightness,
Sharpness,
Cutout,
# SamplePairing,
]
def train_child(args, model, dataset, subset_indx, device=None):
optimizer = select_optimizer(args, model)
scheduler = select_scheduler(args, optimizer)
criterion = nn.CrossEntropyLoss()
dataset.transform = transforms.Compose([
transforms.Resize(32),
transforms.ToTensor()])
subset = Subset(dataset, subset_indx)
data_loader = get_inf_dataloader(args, subset)
if device:
model = model.to(device)
criterion = criterion.to(device)
elif args.use_cuda:
model = model.cuda()
criterion = criterion.cuda()
if torch.cuda.device_count() > 1:
print('\n[+] Use {} GPUs'.format(torch.cuda.device_count()))
model = nn.DataParallel(model)
elif torch.cuda.device_count() == 1:
print('\n[+] Use {} GPUs'.format(torch.cuda.device_count()))
start_t = time.time()
for step in range(args.start_step, args.max_step):
batch = next(data_loader)
_train_res = train_step(args, model, optimizer, scheduler, criterion, batch, step, None, device)
if step % args.print_step == 0:
print('\n[+] Training step: {}/{}\tElapsed time: {:.2f}min\tLearning rate: {}\tDevice name: {}'.format(
step, args.max_step,(time.time()-start_t)/60, optimizer.param_groups[0]['lr'], torch.cuda.get_device_name(0)))
print(' Acc@1 : {:.3f}%'.format(_train_res[0].data.cpu().numpy()[0]*100))
print(' Acc@5 : {:.3f}%'.format(_train_res[1].data.cpu().numpy()[0]*100))
print(' Loss : {}'.format(_train_res[2].data))
return _train_res
def validate_child(args, model, dataset, subset_indx, transform, device=None):
criterion = nn.CrossEntropyLoss()
if device:
model = model.to(device)
criterion = criterion.to(device)
elif args.use_cuda:
model = model.cuda()
criterion = criterion.cuda()
dataset.transform = transform
subset = Subset(dataset, subset_indx)
data_loader = get_dataloader(args, subset, pin_memory=False)
return validate(args, model, criterion, data_loader, 0, None, device)
def get_next_subpolicy(transform_candidates, op_per_subpolicy=2):
n_candidates = len(transform_candidates)
subpolicy = []
for i in range(op_per_subpolicy):
indx = random.randrange(n_candidates)
prob = random.random()
mag = random.random()
subpolicy.append(transform_candidates[indx](prob, mag))
subpolicy = transforms.Compose([
*subpolicy,
transforms.Resize(32),
transforms.ToTensor()])
return subpolicy
def search_subpolicies(args, transform_candidates, child_model, dataset, Da_indx, B, device):
subpolicies = []
for b in range(B):
subpolicy = get_next_subpolicy(transform_candidates)
val_res = validate_child(args, child_model, dataset, Da_indx, subpolicy, device)
subpolicies.append((subpolicy, val_res[2]))
return subpolicies
def search_subpolicies_hyperopt(args, transform_candidates, child_model, dataset, Da_indx, B, device):
def _objective(sampled):
subpolicy = [transform(prob, mag)
for transform, prob, mag in sampled]
subpolicy = transforms.Compose([
transforms.Resize(32),
*subpolicy,
transforms.ToTensor()])
val_res = validate_child(args, child_model, dataset, Da_indx, subpolicy, device)
loss = val_res[2].cpu().numpy()
return {'loss': loss, 'status': STATUS_OK }
space = [(hp.choice('transform1', transform_candidates), hp.uniform('prob1', 0, 1.0), hp.uniform('mag1', 0, 1.0)),
(hp.choice('transform2', transform_candidates), hp.uniform('prob2', 0, 1.0), hp.uniform('mag2', 0, 1.0))]
trials = Trials()
best = fmin(_objective,
space=space,
algo=tpe.suggest,
max_evals=B,
trials=trials)
subpolicies = []
for t in trials.trials:
vals = t['misc']['vals']
subpolicy = [transform_candidates[vals['transform1'][0]](vals['prob1'][0], vals['mag1'][0]),
transform_candidates[vals['transform2'][0]](vals['prob2'][0], vals['mag2'][0])]
subpolicy = transforms.Compose([
## baseline augmentation
transforms.Pad(4),
transforms.RandomCrop(32),
transforms.RandomHorizontalFlip(),
## policy
*subpolicy,
## to tensor
transforms.ToTensor()])
subpolicies.append((subpolicy, t['result']['loss']))
return subpolicies
def get_topn_subpolicies(subpolicies, N=10):
return sorted(subpolicies, key=lambda subpolicy: subpolicy[1])[:N]
def process_fn(args_str, model, dataset, Dm_indx, Da_indx, T, transform_candidates, B, N, k):
kwargs = json.loads(args_str)
args, kwargs = parse_args(kwargs)
device_id = k % torch.cuda.device_count()
device = torch.device('cuda:%d' % device_id)
_transform = []
print('[+] Child %d training started (GPU: %d)' % (k, device_id))
# train child model
child_model = copy.deepcopy(model)
train_res = train_child(args, child_model, dataset, Dm_indx, device)
# search sub policy
for t in range(T):
#subpolicies = search_subpolicies(args, transform_candidates, child_model, dataset, Da_indx, B, device)
subpolicies = search_subpolicies_hyperopt(args, transform_candidates, child_model, dataset, Da_indx, B, device)
subpolicies = get_topn_subpolicies(subpolicies, N)
_transform.extend([subpolicy[0] for subpolicy in subpolicies])
return _transform
#fast_auto_augment(args, model, K=4, B=1, num_process=4)
def fast_auto_augment(args, model, transform_candidates=None, K=5, B=100, T=2, N=10, num_process=5):
args_str = json.dumps(args._asdict())
dataset = get_dataset(args, None, 'trainval')
num_process = min(torch.cuda.device_count(), num_process)
transform, futures = [], []
torch.multiprocessing.set_start_method('spawn', force=True)
if not transform_candidates:
transform_candidates = DEFALUT_CANDIDATES
# split
Dm_indexes, Da_indexes = split_dataset(args, dataset, K)
with ProcessPoolExecutor(max_workers=num_process) as executor:
for k, (Dm_indx, Da_indx) in enumerate(zip(Dm_indexes, Da_indexes)):
future = executor.submit(process_fn,
args_str, model, dataset, Dm_indx, Da_indx, T, transform_candidates, B, N, k)
futures.append(future)
for future in futures:
transform.extend(future.result())
transform = transforms.RandomChoice(transform)
return transform