Ayaka Koshibe

role reassignment tweaks

Change-Id: Ie6d412787330e67a13e605a34f0824cf70882f85
...@@ -33,7 +33,7 @@ public interface MastershipService { ...@@ -33,7 +33,7 @@ public interface MastershipService {
33 /** 33 /**
34 * Abandons mastership of the specified device on the local node thus 34 * Abandons mastership of the specified device on the local node thus
35 * forcing selection of a new master. If the local node is not a master 35 * forcing selection of a new master. If the local node is not a master
36 - * for this device, no action will be taken. 36 + * for this device, no master selection will occur.
37 * 37 *
38 * @param deviceId the identifier of the device 38 * @param deviceId the identifier of the device
39 */ 39 */
......
...@@ -66,12 +66,25 @@ public interface MastershipStore extends Store<MastershipEvent, MastershipStoreD ...@@ -66,12 +66,25 @@ public interface MastershipStore extends Store<MastershipEvent, MastershipStoreD
66 MastershipTerm getTermFor(DeviceId deviceId); 66 MastershipTerm getTermFor(DeviceId deviceId);
67 67
68 /** 68 /**
69 - * Revokes a controller instance's mastership over a device and hands 69 + * Sets a controller instance's mastership role to STANDBY for a device.
70 - * over mastership to another controller instance. 70 + * If the role is MASTER, another controller instance will be selected
71 + * as a candidate master.
71 * 72 *
72 * @param nodeId the controller instance identifier 73 * @param nodeId the controller instance identifier
73 - * @param deviceId device to revoke mastership for 74 + * @param deviceId device to revoke mastership role for
74 * @return a mastership event 75 * @return a mastership event
75 */ 76 */
76 - MastershipEvent unsetMaster(NodeId nodeId, DeviceId deviceId); 77 + MastershipEvent setStandby(NodeId nodeId, DeviceId deviceId);
78 +
79 + /**
80 + * Allows a controller instance to give up its current role for a device.
81 + * If the role is MASTER, another controller instance will be selected
82 + * as a candidate master.
83 + *
84 + * @param nodeId the controller instance identifier
85 + * @param deviceId device to revoke mastership role for
86 + * @return a mastership event
87 + */
88 + MastershipEvent relinquishRole(NodeId nodeId, DeviceId deviceId);
89 +
77 } 90 }
......
...@@ -82,7 +82,7 @@ implements MastershipService, MastershipAdminService { ...@@ -82,7 +82,7 @@ implements MastershipService, MastershipAdminService {
82 if (role.equals(MastershipRole.MASTER)) { 82 if (role.equals(MastershipRole.MASTER)) {
83 event = store.setMaster(nodeId, deviceId); 83 event = store.setMaster(nodeId, deviceId);
84 } else { 84 } else {
85 - event = store.unsetMaster(nodeId, deviceId); 85 + event = store.setStandby(nodeId, deviceId);
86 } 86 }
87 87
88 if (event != null) { 88 if (event != null) {
...@@ -98,13 +98,10 @@ implements MastershipService, MastershipAdminService { ...@@ -98,13 +98,10 @@ implements MastershipService, MastershipAdminService {
98 98
99 @Override 99 @Override
100 public void relinquishMastership(DeviceId deviceId) { 100 public void relinquishMastership(DeviceId deviceId) {
101 - MastershipRole role = getLocalRole(deviceId); 101 + MastershipEvent event = null;
102 - if (!role.equals(MastershipRole.MASTER)) { 102 + event = store.relinquishRole(
103 - return; 103 + clusterService.getLocalNode().id(), deviceId);
104 - }
105 104
106 - MastershipEvent event = store.unsetMaster(
107 - clusterService.getLocalNode().id(), deviceId);
108 if (event != null) { 105 if (event != null) {
109 post(event); 106 post(event);
110 } 107 }
......
...@@ -142,7 +142,7 @@ public class DeviceManager ...@@ -142,7 +142,7 @@ public class DeviceManager
142 142
143 // Applies the specified role to the device; ignores NONE 143 // Applies the specified role to the device; ignores NONE
144 private void applyRole(DeviceId deviceId, MastershipRole newRole) { 144 private void applyRole(DeviceId deviceId, MastershipRole newRole) {
145 - if (newRole != MastershipRole.NONE) { 145 + if (newRole.equals(MastershipRole.NONE)) {
146 Device device = store.getDevice(deviceId); 146 Device device = store.getDevice(deviceId);
147 DeviceProvider provider = getProvider(device.providerId()); 147 DeviceProvider provider = getProvider(device.providerId());
148 if (provider != null) { 148 if (provider != null) {
...@@ -196,11 +196,8 @@ public class DeviceManager ...@@ -196,11 +196,8 @@ public class DeviceManager
196 DeviceEvent event = store.createOrUpdateDevice(provider().id(), 196 DeviceEvent event = store.createOrUpdateDevice(provider().id(),
197 deviceId, deviceDescription); 197 deviceId, deviceDescription);
198 198
199 - // If there was a change of any kind, trigger role selection
200 - // process.
201 if (event != null) { 199 if (event != null) {
202 log.info("Device {} connected", deviceId); 200 log.info("Device {} connected", deviceId);
203 - //mastershipService.requestRoleFor(deviceId);
204 provider().roleChanged(event.subject(), 201 provider().roleChanged(event.subject(),
205 mastershipService.requestRoleFor(deviceId)); 202 mastershipService.requestRoleFor(deviceId));
206 post(event); 203 post(event);
...@@ -212,11 +209,11 @@ public class DeviceManager ...@@ -212,11 +209,11 @@ public class DeviceManager
212 checkNotNull(deviceId, DEVICE_ID_NULL); 209 checkNotNull(deviceId, DEVICE_ID_NULL);
213 checkValidity(); 210 checkValidity();
214 DeviceEvent event = store.markOffline(deviceId); 211 DeviceEvent event = store.markOffline(deviceId);
212 + //we're no longer capable of being master or a candidate.
213 + mastershipService.relinquishMastership(deviceId);
215 214
216 - //we're no longer capable of mastership.
217 if (event != null) { 215 if (event != null) {
218 log.info("Device {} disconnected", deviceId); 216 log.info("Device {} disconnected", deviceId);
219 - mastershipService.relinquishMastership(deviceId);
220 post(event); 217 post(event);
221 } 218 }
222 } 219 }
...@@ -267,17 +264,23 @@ public class DeviceManager ...@@ -267,17 +264,23 @@ public class DeviceManager
267 } 264 }
268 265
269 // Intercepts mastership events 266 // Intercepts mastership events
270 - private class InternalMastershipListener 267 + private class InternalMastershipListener implements MastershipListener {
271 - implements MastershipListener { 268 +
272 @Override 269 @Override
273 public void event(MastershipEvent event) { 270 public void event(MastershipEvent event) {
274 - if (event.master().equals(clusterService.getLocalNode().id())) { 271 + DeviceId did = event.subject();
275 - MastershipTerm term = mastershipService.requestTermService() 272 + if (isAvailable(did)) {
276 - .getMastershipTerm(event.subject()); 273 + if (event.master().equals(clusterService.getLocalNode().id())) {
277 - clockService.setMastershipTerm(event.subject(), term); 274 + MastershipTerm term = termService.getMastershipTerm(did);
278 - applyRole(event.subject(), MastershipRole.MASTER); 275 + clockService.setMastershipTerm(did, term);
276 + applyRole(did, MastershipRole.MASTER);
277 + } else {
278 + applyRole(did, MastershipRole.STANDBY);
279 + }
279 } else { 280 } else {
280 - applyRole(event.subject(), MastershipRole.STANDBY); 281 + //device dead to node, give up
282 + mastershipService.relinquishMastership(did);
283 + applyRole(did, MastershipRole.STANDBY);
281 } 284 }
282 } 285 }
283 } 286 }
......
1 package org.onlab.onos.net.device.impl; 1 package org.onlab.onos.net.device.impl;
2 2
3 import com.google.common.collect.Sets; 3 import com.google.common.collect.Sets;
4 +
4 import org.junit.After; 5 import org.junit.After;
5 import org.junit.Before; 6 import org.junit.Before;
6 import org.junit.Ignore; 7 import org.junit.Ignore;
...@@ -258,7 +259,8 @@ public class DeviceManagerTest { ...@@ -258,7 +259,8 @@ public class DeviceManagerTest {
258 } 259 }
259 } 260 }
260 261
261 - private static class TestMastershipService extends MastershipServiceAdapter { 262 + private static class TestMastershipService
263 + extends MastershipServiceAdapter {
262 @Override 264 @Override
263 public MastershipRole getLocalRole(DeviceId deviceId) { 265 public MastershipRole getLocalRole(DeviceId deviceId) {
264 return MastershipRole.MASTER; 266 return MastershipRole.MASTER;
......
...@@ -10,7 +10,6 @@ import org.apache.felix.scr.annotations.Component; ...@@ -10,7 +10,6 @@ import org.apache.felix.scr.annotations.Component;
10 import org.apache.felix.scr.annotations.Deactivate; 10 import org.apache.felix.scr.annotations.Deactivate;
11 import org.apache.felix.scr.annotations.Reference; 11 import org.apache.felix.scr.annotations.Reference;
12 import org.apache.felix.scr.annotations.ReferenceCardinality; 12 import org.apache.felix.scr.annotations.ReferenceCardinality;
13 -import org.apache.felix.scr.annotations.ReferencePolicy;
14 import org.apache.felix.scr.annotations.Service; 13 import org.apache.felix.scr.annotations.Service;
15 import org.onlab.onos.cluster.ClusterService; 14 import org.onlab.onos.cluster.ClusterService;
16 import org.onlab.onos.cluster.MastershipEvent; 15 import org.onlab.onos.cluster.MastershipEvent;
...@@ -20,15 +19,16 @@ import org.onlab.onos.cluster.MastershipTerm; ...@@ -20,15 +19,16 @@ import org.onlab.onos.cluster.MastershipTerm;
20 import org.onlab.onos.cluster.NodeId; 19 import org.onlab.onos.cluster.NodeId;
21 import org.onlab.onos.net.DeviceId; 20 import org.onlab.onos.net.DeviceId;
22 import org.onlab.onos.net.MastershipRole; 21 import org.onlab.onos.net.MastershipRole;
23 -import org.onlab.onos.net.device.DeviceService;
24 import org.onlab.onos.store.common.AbstractHazelcastStore; 22 import org.onlab.onos.store.common.AbstractHazelcastStore;
25 23
26 import com.google.common.collect.ImmutableSet; 24 import com.google.common.collect.ImmutableSet;
27 import com.hazelcast.core.ILock; 25 import com.hazelcast.core.ILock;
28 import com.hazelcast.core.IMap; 26 import com.hazelcast.core.IMap;
27 +import com.hazelcast.core.MultiMap;
29 28
30 /** 29 /**
31 - * Distributed implementation of the cluster nodes store. 30 + * Distributed implementation of the mastership store. The store is
31 + * responsible for the master selection process.
32 */ 32 */
33 @Component(immediate = true) 33 @Component(immediate = true)
34 @Service 34 @Service
...@@ -38,35 +38,34 @@ implements MastershipStore { ...@@ -38,35 +38,34 @@ implements MastershipStore {
38 38
39 //arbitrary lock name 39 //arbitrary lock name
40 private static final String LOCK = "lock"; 40 private static final String LOCK = "lock";
41 - //initial term value 41 + //initial term/TTL value
42 private static final Integer INIT = 0; 42 private static final Integer INIT = 0;
43 - //placeholder non-null value
44 - private static final Byte NIL = 0x0;
45 43
46 //devices to masters 44 //devices to masters
47 - protected IMap<byte[], byte[]> rawMasters; 45 + protected IMap<byte[], byte[]> masters;
48 //devices to terms 46 //devices to terms
49 - protected IMap<byte[], Integer> rawTerms; 47 + protected IMap<byte[], Integer> terms;
50 - //collection of nodes. values are ignored, as it's used as a makeshift 'set' 48 +
51 - protected IMap<byte[], Byte> backups; 49 + //re-election related, disjoint-set structures:
50 + //device-nodes multiset of available nodes
51 + protected MultiMap<byte[], byte[]> standbys;
52 + //device-nodes multiset for nodes that have given up on device
53 + protected MultiMap<byte[], byte[]> unusable;
52 54
53 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) 55 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
54 protected ClusterService clusterService; 56 protected ClusterService clusterService;
55 57
56 - //FIXME: need to guarantee that this will be met, sans circular dependencies
57 - @Reference(policy = ReferencePolicy.DYNAMIC)
58 - protected DeviceService deviceService;
59 -
60 @Override 58 @Override
61 @Activate 59 @Activate
62 public void activate() { 60 public void activate() {
63 super.activate(); 61 super.activate();
64 62
65 - rawMasters = theInstance.getMap("masters"); 63 + masters = theInstance.getMap("masters");
66 - rawTerms = theInstance.getMap("terms"); 64 + terms = theInstance.getMap("terms");
67 - backups = theInstance.getMap("backups"); 65 + standbys = theInstance.getMultiMap("backups");
66 + unusable = theInstance.getMultiMap("unusable");
68 67
69 - rawMasters.addEntryListener(new RemoteMasterShipEventHandler(), true); 68 + masters.addEntryListener(new RemoteMasterShipEventHandler(), true);
70 69
71 log.info("Started"); 70 log.info("Started");
72 } 71 }
...@@ -77,6 +76,30 @@ implements MastershipStore { ...@@ -77,6 +76,30 @@ implements MastershipStore {
77 } 76 }
78 77
79 @Override 78 @Override
79 + public MastershipRole getRole(NodeId nodeId, DeviceId deviceId) {
80 + byte[] did = serialize(deviceId);
81 + byte[] nid = serialize(nodeId);
82 +
83 + NodeId current = deserialize(masters.get(did));
84 + if (current == null) {
85 + if (standbys.containsEntry(did, nid)) {
86 + //was previously standby, or set to standby from master
87 + return MastershipRole.STANDBY;
88 + } else {
89 + return MastershipRole.NONE;
90 + }
91 + } else {
92 + if (current.equals(nodeId)) {
93 + //*should* be in unusable, not always
94 + return MastershipRole.MASTER;
95 + } else {
96 + //may be in backups or unusable from earlier retirement
97 + return MastershipRole.STANDBY;
98 + }
99 + }
100 + }
101 +
102 + @Override
80 public MastershipEvent setMaster(NodeId nodeId, DeviceId deviceId) { 103 public MastershipEvent setMaster(NodeId nodeId, DeviceId deviceId) {
81 byte [] did = serialize(deviceId); 104 byte [] did = serialize(deviceId);
82 byte [] nid = serialize(nodeId); 105 byte [] nid = serialize(nodeId);
...@@ -85,30 +108,31 @@ implements MastershipStore { ...@@ -85,30 +108,31 @@ implements MastershipStore {
85 lock.lock(); 108 lock.lock();
86 try { 109 try {
87 MastershipRole role = getRole(nodeId, deviceId); 110 MastershipRole role = getRole(nodeId, deviceId);
88 - Integer term = rawTerms.get(did);
89 switch (role) { 111 switch (role) {
90 case MASTER: 112 case MASTER:
113 + //reinforce mastership
114 + evict(nid, did);
91 return null; 115 return null;
92 case STANDBY: 116 case STANDBY:
93 - rawMasters.put(did, nid); 117 + //make current master standby
94 - rawTerms.put(did, ++term); 118 + byte [] current = masters.get(did);
95 - backups.putIfAbsent(nid, NIL); 119 + if (current != null) {
96 - break; 120 + backup(current, did);
97 - case NONE:
98 - rawMasters.put(did, nid);
99 - //new switch OR state transition after being orphaned
100 - if (term == null) {
101 - rawTerms.put(did, INIT);
102 - } else {
103 - rawTerms.put(did, ++term);
104 } 121 }
105 - backups.put(nid, NIL); 122 + //assign specified node as new master
106 - break; 123 + masters.put(did, nid);
124 + evict(nid, did);
125 + updateTerm(did);
126 + return new MastershipEvent(MASTER_CHANGED, deviceId, nodeId);
127 + case NONE:
128 + masters.put(did, nid);
129 + evict(nid, did);
130 + updateTerm(did);
131 + return new MastershipEvent(MASTER_CHANGED, deviceId, nodeId);
107 default: 132 default:
108 log.warn("unknown Mastership Role {}", role); 133 log.warn("unknown Mastership Role {}", role);
109 return null; 134 return null;
110 } 135 }
111 - return new MastershipEvent(MASTER_CHANGED, deviceId, nodeId);
112 } finally { 136 } finally {
113 lock.unlock(); 137 lock.unlock();
114 } 138 }
...@@ -116,14 +140,14 @@ implements MastershipStore { ...@@ -116,14 +140,14 @@ implements MastershipStore {
116 140
117 @Override 141 @Override
118 public NodeId getMaster(DeviceId deviceId) { 142 public NodeId getMaster(DeviceId deviceId) {
119 - return deserialize(rawMasters.get(serialize(deviceId))); 143 + return deserialize(masters.get(serialize(deviceId)));
120 } 144 }
121 145
122 @Override 146 @Override
123 public Set<DeviceId> getDevices(NodeId nodeId) { 147 public Set<DeviceId> getDevices(NodeId nodeId) {
124 ImmutableSet.Builder<DeviceId> builder = ImmutableSet.builder(); 148 ImmutableSet.Builder<DeviceId> builder = ImmutableSet.builder();
125 149
126 - for (Map.Entry<byte[], byte[]> entry : rawMasters.entrySet()) { 150 + for (Map.Entry<byte[], byte[]> entry : masters.entrySet()) {
127 if (nodeId.equals(deserialize(entry.getValue()))) { 151 if (nodeId.equals(deserialize(entry.getValue()))) {
128 builder.add((DeviceId) deserialize(entry.getKey())); 152 builder.add((DeviceId) deserialize(entry.getKey()));
129 } 153 }
...@@ -134,11 +158,8 @@ implements MastershipStore { ...@@ -134,11 +158,8 @@ implements MastershipStore {
134 158
135 @Override 159 @Override
136 public MastershipRole requestRole(DeviceId deviceId) { 160 public MastershipRole requestRole(DeviceId deviceId) {
137 - // first to empty slot for device in master map is MASTER
138 - // depending on how backups are organized, might need to trigger election
139 - // so only controller doesn't set itself to backup for another device
140 - byte [] did = serialize(deviceId);
141 NodeId local = clusterService.getLocalNode().id(); 161 NodeId local = clusterService.getLocalNode().id();
162 + byte [] did = serialize(deviceId);
142 byte [] lnid = serialize(local); 163 byte [] lnid = serialize(local);
143 164
144 ILock lock = theInstance.getLock(LOCK); 165 ILock lock = theInstance.getLock(LOCK);
...@@ -147,15 +168,17 @@ implements MastershipStore { ...@@ -147,15 +168,17 @@ implements MastershipStore {
147 MastershipRole role = getRole(local, deviceId); 168 MastershipRole role = getRole(local, deviceId);
148 switch (role) { 169 switch (role) {
149 case MASTER: 170 case MASTER:
171 + evict(lnid, did);
150 break; 172 break;
151 case STANDBY: 173 case STANDBY:
152 - backups.put(lnid, NIL); 174 + backup(lnid, did);
153 - rawTerms.putIfAbsent(did, INIT); 175 + terms.putIfAbsent(did, INIT);
154 break; 176 break;
155 case NONE: 177 case NONE:
156 - rawMasters.put(did, lnid); 178 + //claim mastership
157 - rawTerms.putIfAbsent(did, INIT); 179 + masters.put(did, lnid);
158 - backups.put(lnid, NIL); 180 + evict(lnid, did);
181 + updateTerm(did);
159 role = MastershipRole.MASTER; 182 role = MastershipRole.MASTER;
160 break; 183 break;
161 default: 184 default:
...@@ -168,41 +191,21 @@ implements MastershipStore { ...@@ -168,41 +191,21 @@ implements MastershipStore {
168 } 191 }
169 192
170 @Override 193 @Override
171 - public MastershipRole getRole(NodeId nodeId, DeviceId deviceId) {
172 - byte[] did = serialize(deviceId);
173 -
174 - NodeId current = deserialize(rawMasters.get(did));
175 - MastershipRole role = null;
176 -
177 - if (current == null) {
178 - //IFF no controllers have claimed mastership over it
179 - role = MastershipRole.NONE;
180 - } else {
181 - if (current.equals(nodeId)) {
182 - role = MastershipRole.MASTER;
183 - } else {
184 - role = MastershipRole.STANDBY;
185 - }
186 - }
187 -
188 - return role;
189 - }
190 -
191 - @Override
192 public MastershipTerm getTermFor(DeviceId deviceId) { 194 public MastershipTerm getTermFor(DeviceId deviceId) {
193 byte[] did = serialize(deviceId); 195 byte[] did = serialize(deviceId);
194 - 196 + if ((masters.get(did) == null) ||
195 - if ((rawMasters.get(did) == null) || 197 + (terms.get(did) == null)) {
196 - (rawTerms.get(did) == null)) {
197 return null; 198 return null;
198 } 199 }
199 return MastershipTerm.of( 200 return MastershipTerm.of(
200 - (NodeId) deserialize(rawMasters.get(did)), rawTerms.get(did)); 201 + (NodeId) deserialize(masters.get(did)), terms.get(did));
201 } 202 }
202 203
203 @Override 204 @Override
204 - public MastershipEvent unsetMaster(NodeId nodeId, DeviceId deviceId) { 205 + public MastershipEvent setStandby(NodeId nodeId, DeviceId deviceId) {
205 byte [] did = serialize(deviceId); 206 byte [] did = serialize(deviceId);
207 + byte [] nid = serialize(nodeId);
208 + MastershipEvent event = null;
206 209
207 ILock lock = theInstance.getLock(LOCK); 210 ILock lock = theInstance.getLock(LOCK);
208 lock.lock(); 211 lock.lock();
...@@ -210,54 +213,113 @@ implements MastershipStore { ...@@ -210,54 +213,113 @@ implements MastershipStore {
210 MastershipRole role = getRole(nodeId, deviceId); 213 MastershipRole role = getRole(nodeId, deviceId);
211 switch (role) { 214 switch (role) {
212 case MASTER: 215 case MASTER:
213 - //hand off device to another 216 + event = reelect(nodeId, deviceId);
214 - NodeId backup = reelect(nodeId, deviceId); 217 + backup(nid, did);
215 - if (backup == null) { 218 + break;
216 - //goes back to NONE
217 - rawMasters.remove(did);
218 - } else {
219 - //goes to STANDBY for local, MASTER for someone else
220 - Integer term = rawTerms.get(did);
221 - rawMasters.put(did, serialize(backup));
222 - rawTerms.put(did, ++term);
223 - return new MastershipEvent(MASTER_CHANGED, deviceId, backup);
224 - }
225 case STANDBY: 219 case STANDBY:
220 + //fall through to reinforce role
226 case NONE: 221 case NONE:
222 + backup(nid, did);
227 break; 223 break;
228 default: 224 default:
229 log.warn("unknown Mastership Role {}", role); 225 log.warn("unknown Mastership Role {}", role);
230 } 226 }
231 - return null; 227 + return event;
232 } finally { 228 } finally {
233 lock.unlock(); 229 lock.unlock();
234 } 230 }
235 } 231 }
236 232
237 - //helper for "re-electing" a new master for a given device 233 + @Override
238 - private NodeId reelect(NodeId current, DeviceId deviceId) { 234 + public MastershipEvent relinquishRole(NodeId nodeId, DeviceId deviceId) {
235 + byte [] did = serialize(deviceId);
236 + byte [] nid = serialize(nodeId);
237 + MastershipEvent event = null;
239 238
240 - for (byte [] node : backups.keySet()) { 239 + ILock lock = theInstance.getLock(LOCK);
241 - NodeId nid = deserialize(node); 240 + lock.lock();
242 - //if a device dies we shouldn't pick another master for it. 241 + try {
243 - if (!current.equals(nid) && (deviceService.isAvailable(deviceId))) { 242 + MastershipRole role = getRole(nodeId, deviceId);
244 - return nid; 243 + switch (role) {
244 + case MASTER:
245 + event = reelect(nodeId, deviceId);
246 + evict(nid, did);
247 + break;
248 + case STANDBY:
249 + //fall through to reinforce relinquishment
250 + case NONE:
251 + evict(nid, did);
252 + break;
253 + default:
254 + log.warn("unknown Mastership Role {}", role);
245 } 255 }
256 + return event;
257 + } finally {
258 + lock.unlock();
246 } 259 }
247 - return null;
248 } 260 }
249 261
250 - //adds node to pool(s) of backup 262 + //helper to fetch a new master candidate for a given device.
251 - private void backup(NodeId nodeId, DeviceId deviceId) { 263 + private MastershipEvent reelect(NodeId current, DeviceId deviceId) {
252 - //TODO might be useful to isolate out this function and reelect() if we 264 + byte [] did = serialize(deviceId);
253 - //get more backup/election schemes 265 + byte [] nid = serialize(current);
266 +
267 + //if this is an queue it'd be neater.
268 + byte [] backup = null;
269 + for (byte [] n : standbys.get(serialize(deviceId))) {
270 + if (!current.equals(deserialize(n))) {
271 + backup = n;
272 + break;
273 + }
274 + }
275 +
276 + if (backup == null) {
277 + masters.remove(did, nid);
278 + return null;
279 + } else {
280 + masters.put(did, backup);
281 + evict(backup, did);
282 + Integer term = terms.get(did);
283 + terms.put(did, ++term);
284 + return new MastershipEvent(
285 + MASTER_CHANGED, deviceId, (NodeId) deserialize(backup));
286 + }
287 + }
288 +
289 + //adds node to pool(s) of backups and moves them from unusable.
290 + private void backup(byte [] nodeId, byte [] deviceId) {
291 + if (!standbys.containsEntry(deviceId, nodeId)) {
292 + standbys.put(deviceId, nodeId);
293 + }
294 + if (unusable.containsEntry(deviceId, nodeId)) {
295 + unusable.remove(deviceId, nodeId);
296 + }
297 + }
298 +
299 + //adds node to unusable and evicts it from backup pool.
300 + private void evict(byte [] nodeId, byte [] deviceId) {
301 + if (!unusable.containsEntry(deviceId, nodeId)) {
302 + unusable.put(deviceId, nodeId);
303 + }
304 + if (standbys.containsEntry(deviceId, nodeId)) {
305 + standbys.remove(deviceId, nodeId);
306 + }
307 + }
308 +
309 + //adds or updates term information.
310 + private void updateTerm(byte [] deviceId) {
311 + Integer term = terms.get(deviceId);
312 + if (term == null) {
313 + terms.put(deviceId, INIT);
314 + } else {
315 + terms.put(deviceId, ++term);
316 + }
254 } 317 }
255 318
256 private class RemoteMasterShipEventHandler extends RemoteEventHandler<DeviceId, NodeId> { 319 private class RemoteMasterShipEventHandler extends RemoteEventHandler<DeviceId, NodeId> {
257 320
258 @Override 321 @Override
259 protected void onAdd(DeviceId deviceId, NodeId nodeId) { 322 protected void onAdd(DeviceId deviceId, NodeId nodeId) {
260 - //only addition indicates a change in mastership
261 notifyDelegate(new MastershipEvent(MASTER_CHANGED, deviceId, nodeId)); 323 notifyDelegate(new MastershipEvent(MASTER_CHANGED, deviceId, nodeId));
262 } 324 }
263 325
...@@ -268,6 +330,7 @@ implements MastershipStore { ...@@ -268,6 +330,7 @@ implements MastershipStore {
268 330
269 @Override 331 @Override
270 protected void onUpdate(DeviceId deviceId, NodeId oldNodeId, NodeId nodeId) { 332 protected void onUpdate(DeviceId deviceId, NodeId oldNodeId, NodeId nodeId) {
333 + //only addition indicates a change in mastership
271 //notifyDelegate(new MastershipEvent(MASTER_CHANGED, deviceId, nodeId)); 334 //notifyDelegate(new MastershipEvent(MASTER_CHANGED, deviceId, nodeId));
272 } 335 }
273 } 336 }
......
...@@ -5,7 +5,7 @@ import static org.junit.Assert.assertNull; ...@@ -5,7 +5,7 @@ import static org.junit.Assert.assertNull;
5 import static org.junit.Assert.assertTrue; 5 import static org.junit.Assert.assertTrue;
6 import static org.onlab.onos.net.MastershipRole.*; 6 import static org.onlab.onos.net.MastershipRole.*;
7 7
8 -import java.util.List; 8 +import java.util.Map;
9 import java.util.Set; 9 import java.util.Set;
10 import java.util.concurrent.CountDownLatch; 10 import java.util.concurrent.CountDownLatch;
11 import java.util.concurrent.TimeUnit; 11 import java.util.concurrent.TimeUnit;
...@@ -26,13 +26,7 @@ import org.onlab.onos.cluster.MastershipEvent.Type; ...@@ -26,13 +26,7 @@ import org.onlab.onos.cluster.MastershipEvent.Type;
26 import org.onlab.onos.cluster.MastershipStoreDelegate; 26 import org.onlab.onos.cluster.MastershipStoreDelegate;
27 import org.onlab.onos.cluster.MastershipTerm; 27 import org.onlab.onos.cluster.MastershipTerm;
28 import org.onlab.onos.cluster.NodeId; 28 import org.onlab.onos.cluster.NodeId;
29 -import org.onlab.onos.net.Device;
30 import org.onlab.onos.net.DeviceId; 29 import org.onlab.onos.net.DeviceId;
31 -import org.onlab.onos.net.MastershipRole;
32 -import org.onlab.onos.net.Port;
33 -import org.onlab.onos.net.PortNumber;
34 -import org.onlab.onos.net.device.DeviceListener;
35 -import org.onlab.onos.net.device.DeviceService;
36 import org.onlab.onos.store.common.StoreManager; 30 import org.onlab.onos.store.common.StoreManager;
37 import org.onlab.onos.store.common.StoreService; 31 import org.onlab.onos.store.common.StoreService;
38 import org.onlab.onos.store.common.TestStoreManager; 32 import org.onlab.onos.store.common.TestStoreManager;
...@@ -87,7 +81,6 @@ public class DistributedMastershipStoreTest { ...@@ -87,7 +81,6 @@ public class DistributedMastershipStoreTest {
87 81
88 dms = new TestDistributedMastershipStore(storeMgr, serializationMgr); 82 dms = new TestDistributedMastershipStore(storeMgr, serializationMgr);
89 dms.clusterService = new TestClusterService(); 83 dms.clusterService = new TestClusterService();
90 - dms.deviceService = new TestDeviceService();
91 dms.activate(); 84 dms.activate();
92 85
93 testStore = (TestDistributedMastershipStore) dms; 86 testStore = (TestDistributedMastershipStore) dms;
...@@ -105,14 +98,14 @@ public class DistributedMastershipStoreTest { ...@@ -105,14 +98,14 @@ public class DistributedMastershipStoreTest {
105 @Test 98 @Test
106 public void getRole() { 99 public void getRole() {
107 assertEquals("wrong role:", NONE, dms.getRole(N1, DID1)); 100 assertEquals("wrong role:", NONE, dms.getRole(N1, DID1));
108 - testStore.put(DID1, N1, true, true, true); 101 + testStore.put(DID1, N1, true, false, true);
109 assertEquals("wrong role:", MASTER, dms.getRole(N1, DID1)); 102 assertEquals("wrong role:", MASTER, dms.getRole(N1, DID1));
110 assertEquals("wrong role:", STANDBY, dms.getRole(N2, DID1)); 103 assertEquals("wrong role:", STANDBY, dms.getRole(N2, DID1));
111 } 104 }
112 105
113 @Test 106 @Test
114 public void getMaster() { 107 public void getMaster() {
115 - assertTrue("wrong store state:", dms.rawMasters.isEmpty()); 108 + assertTrue("wrong store state:", dms.masters.isEmpty());
116 109
117 testStore.put(DID1, N1, true, false, false); 110 testStore.put(DID1, N1, true, false, false);
118 assertEquals("wrong master:", N1, dms.getMaster(DID1)); 111 assertEquals("wrong master:", N1, dms.getMaster(DID1));
...@@ -121,7 +114,7 @@ public class DistributedMastershipStoreTest { ...@@ -121,7 +114,7 @@ public class DistributedMastershipStoreTest {
121 114
122 @Test 115 @Test
123 public void getDevices() { 116 public void getDevices() {
124 - assertTrue("wrong store state:", dms.rawMasters.isEmpty()); 117 + assertTrue("wrong store state:", dms.masters.isEmpty());
125 118
126 testStore.put(DID1, N1, true, false, false); 119 testStore.put(DID1, N1, true, false, false);
127 testStore.put(DID2, N1, true, false, false); 120 testStore.put(DID2, N1, true, false, false);
...@@ -139,20 +132,17 @@ public class DistributedMastershipStoreTest { ...@@ -139,20 +132,17 @@ public class DistributedMastershipStoreTest {
139 //if already MASTER, nothing should happen 132 //if already MASTER, nothing should happen
140 testStore.put(DID2, N1, true, false, false); 133 testStore.put(DID2, N1, true, false, false);
141 assertEquals("wrong role for MASTER:", MASTER, dms.requestRole(DID2)); 134 assertEquals("wrong role for MASTER:", MASTER, dms.requestRole(DID2));
142 - assertTrue("wrong state for store:",
143 - dms.backups.isEmpty() & dms.rawTerms.isEmpty());
144 135
145 //populate maps with DID1, N1 thru NONE case 136 //populate maps with DID1, N1 thru NONE case
146 assertEquals("wrong role for NONE:", MASTER, dms.requestRole(DID1)); 137 assertEquals("wrong role for NONE:", MASTER, dms.requestRole(DID1));
147 - assertTrue("wrong state for store:", 138 + assertTrue("wrong state for store:", !dms.terms.isEmpty());
148 - !dms.backups.isEmpty() & !dms.rawTerms.isEmpty());
149 assertEquals("wrong term", 139 assertEquals("wrong term",
150 MastershipTerm.of(N1, 0), dms.getTermFor(DID1)); 140 MastershipTerm.of(N1, 0), dms.getTermFor(DID1));
151 141
152 //CN2 now local. DID2 has N1 as MASTER so N2 is STANDBY 142 //CN2 now local. DID2 has N1 as MASTER so N2 is STANDBY
153 testStore.setCurrent(CN2); 143 testStore.setCurrent(CN2);
154 assertEquals("wrong role for STANDBY:", STANDBY, dms.requestRole(DID2)); 144 assertEquals("wrong role for STANDBY:", STANDBY, dms.requestRole(DID2));
155 - assertEquals("wrong number of entries:", 2, dms.rawTerms.size()); 145 + assertEquals("wrong number of entries:", 2, dms.terms.size());
156 146
157 //change term and requestRole() again; should persist 147 //change term and requestRole() again; should persist
158 testStore.increment(DID2); 148 testStore.increment(DID2);
...@@ -181,35 +171,42 @@ public class DistributedMastershipStoreTest { ...@@ -181,35 +171,42 @@ public class DistributedMastershipStoreTest {
181 } 171 }
182 172
183 @Test 173 @Test
184 - public void unsetMaster() { 174 + public void relinquishRole() {
185 //populate maps with DID1, N1 as MASTER thru NONE case 175 //populate maps with DID1, N1 as MASTER thru NONE case
186 testStore.setCurrent(CN1); 176 testStore.setCurrent(CN1);
187 assertEquals("wrong role for NONE:", MASTER, dms.requestRole(DID1)); 177 assertEquals("wrong role for NONE:", MASTER, dms.requestRole(DID1));
188 //no backup, no new MASTER/event 178 //no backup, no new MASTER/event
189 - assertNull("wrong event:", dms.unsetMaster(N1, DID1)); 179 + assertNull("wrong event:", dms.relinquishRole(N1, DID1));
190 180
191 dms.requestRole(DID1); 181 dms.requestRole(DID1);
192 - ((TestDeviceService) dms.deviceService).active.add(DID1);
193 182
194 //add backup CN2, get it elected MASTER by relinquishing 183 //add backup CN2, get it elected MASTER by relinquishing
195 testStore.setCurrent(CN2); 184 testStore.setCurrent(CN2);
196 - dms.requestRole(DID1); 185 + assertEquals("wrong role for NONE:", STANDBY, dms.requestRole(DID1));
197 - assertEquals("wrong event:", Type.MASTER_CHANGED, dms.unsetMaster(N1, DID1).type()); 186 + assertEquals("wrong event:", Type.MASTER_CHANGED, dms.relinquishRole(N1, DID1).type());
198 assertEquals("wrong master", N2, dms.getMaster(DID1)); 187 assertEquals("wrong master", N2, dms.getMaster(DID1));
199 188
200 //STANDBY - nothing here, either 189 //STANDBY - nothing here, either
201 - assertNull("wrong event:", dms.unsetMaster(N1, DID1)); 190 + assertNull("wrong event:", dms.relinquishRole(N1, DID1));
202 assertEquals("wrong role for node:", STANDBY, dms.getRole(N1, DID1)); 191 assertEquals("wrong role for node:", STANDBY, dms.getRole(N1, DID1));
203 192
193 + //all nodes "give up" on device, which goes back to NONE.
194 + assertNull("wrong event:", dms.relinquishRole(N2, DID1));
195 + assertEquals("wrong role for node:", NONE, dms.getRole(N2, DID1));
196 + assertEquals("wrong role for node:", NONE, dms.getRole(N1, DID1));
197 +
198 + assertEquals("wrong number of retired nodes", 2, dms.unusable.size());
199 +
200 + //bring nodes back
201 + assertEquals("wrong role for NONE:", MASTER, dms.requestRole(DID1));
202 + testStore.setCurrent(CN1);
203 + assertEquals("wrong role for NONE:", STANDBY, dms.requestRole(DID1));
204 + assertEquals("wrong number of backup nodes", 1, dms.standbys.size());
205 +
204 //NONE - nothing happens 206 //NONE - nothing happens
205 - assertNull("wrong event:", dms.unsetMaster(N1, DID2)); 207 + assertNull("wrong event:", dms.relinquishRole(N1, DID2));
206 assertEquals("wrong role for node:", NONE, dms.getRole(N1, DID2)); 208 assertEquals("wrong role for node:", NONE, dms.getRole(N1, DID2));
207 209
208 - //for a device that turned off (not active) - status to NONE
209 - ((TestDeviceService) dms.deviceService).active.clear();
210 - assertNull("extraneous event:", dms.unsetMaster(N2, DID1));
211 - assertEquals("wrong role", NONE, dms.getRole(N2, DID1));
212 -
213 } 210 }
214 211
215 @Ignore("Ignore until Delegate spec. is clear.") 212 @Ignore("Ignore until Delegate spec. is clear.")
...@@ -244,36 +241,55 @@ public class DistributedMastershipStoreTest { ...@@ -244,36 +241,55 @@ public class DistributedMastershipStoreTest {
244 241
245 //helper to populate master/backup structures 242 //helper to populate master/backup structures
246 public void put(DeviceId dev, NodeId node, 243 public void put(DeviceId dev, NodeId node,
247 - boolean store, boolean backup, boolean term) { 244 + boolean master, boolean backup, boolean term) {
248 - if (store) { 245 + byte [] n = serialize(node);
249 - dms.rawMasters.put(serialize(dev), serialize(node)); 246 + byte [] d = serialize(dev);
247 +
248 + if (master) {
249 + dms.masters.put(d, n);
250 + dms.unusable.put(d, n);
251 + dms.standbys.remove(d, n);
250 } 252 }
251 if (backup) { 253 if (backup) {
252 - dms.backups.put(serialize(node), (byte) 0); 254 + dms.standbys.put(d, n);
255 + dms.masters.remove(d, n);
256 + dms.unusable.remove(d, n);
253 } 257 }
254 if (term) { 258 if (term) {
255 - dms.rawTerms.put(serialize(dev), 0); 259 + dms.terms.put(d, 0);
260 + }
261 + }
262 +
263 + public void dump() {
264 + System.out.println("standbys");
265 + for (Map.Entry<byte [], byte []> e : standbys.entrySet()) {
266 + System.out.println(deserialize(e.getKey()) + ":" + deserialize(e.getValue()));
267 + }
268 + System.out.println("unusable");
269 + for (Map.Entry<byte [], byte []> e : unusable.entrySet()) {
270 + System.out.println(deserialize(e.getKey()) + ":" + deserialize(e.getValue()));
256 } 271 }
257 } 272 }
258 273
259 //clears structures 274 //clears structures
260 public void reset(boolean store, boolean backup, boolean term) { 275 public void reset(boolean store, boolean backup, boolean term) {
261 if (store) { 276 if (store) {
262 - dms.rawMasters.clear(); 277 + dms.masters.clear();
278 + dms.unusable.clear();
263 } 279 }
264 if (backup) { 280 if (backup) {
265 - dms.backups.clear(); 281 + dms.standbys.clear();
266 } 282 }
267 if (term) { 283 if (term) {
268 - dms.rawTerms.clear(); 284 + dms.terms.clear();
269 } 285 }
270 } 286 }
271 287
272 //increment term for a device 288 //increment term for a device
273 public void increment(DeviceId dev) { 289 public void increment(DeviceId dev) {
274 - Integer t = dms.rawTerms.get(serialize(dev)); 290 + Integer t = dms.terms.get(serialize(dev));
275 if (t != null) { 291 if (t != null) {
276 - dms.rawTerms.put(serialize(dev), ++t); 292 + dms.terms.put(serialize(dev), ++t);
277 } 293 }
278 } 294 }
279 295
...@@ -317,52 +333,4 @@ public class DistributedMastershipStoreTest { ...@@ -317,52 +333,4 @@ public class DistributedMastershipStoreTest {
317 333
318 } 334 }
319 335
320 - private class TestDeviceService implements DeviceService {
321 -
322 - Set<DeviceId> active = Sets.newHashSet();
323 -
324 - @Override
325 - public int getDeviceCount() {
326 - return 0;
327 - }
328 -
329 - @Override
330 - public Iterable<Device> getDevices() {
331 - return null;
332 - }
333 -
334 - @Override
335 - public Device getDevice(DeviceId deviceId) {
336 - return null;
337 - }
338 -
339 - @Override
340 - public MastershipRole getRole(DeviceId deviceId) {
341 - return null;
342 - }
343 -
344 - @Override
345 - public List<Port> getPorts(DeviceId deviceId) {
346 - return null;
347 - }
348 -
349 - @Override
350 - public Port getPort(DeviceId deviceId, PortNumber portNumber) {
351 - return null;
352 - }
353 -
354 - @Override
355 - public boolean isAvailable(DeviceId deviceId) {
356 - return active.contains(deviceId);
357 - }
358 -
359 - @Override
360 - public void addListener(DeviceListener listener) {
361 - }
362 -
363 - @Override
364 - public void removeListener(DeviceListener listener) {
365 - }
366 -
367 - }
368 } 336 }
......
...@@ -174,7 +174,7 @@ public class SimpleMastershipStore ...@@ -174,7 +174,7 @@ public class SimpleMastershipStore
174 } 174 }
175 175
176 @Override 176 @Override
177 - public MastershipEvent unsetMaster(NodeId nodeId, DeviceId deviceId) { 177 + public MastershipEvent setStandby(NodeId nodeId, DeviceId deviceId) {
178 MastershipRole role = getRole(nodeId, deviceId); 178 MastershipRole role = getRole(nodeId, deviceId);
179 synchronized (this) { 179 synchronized (this) {
180 switch (role) { 180 switch (role) {
...@@ -214,4 +214,9 @@ public class SimpleMastershipStore ...@@ -214,4 +214,9 @@ public class SimpleMastershipStore
214 return backup; 214 return backup;
215 } 215 }
216 216
217 + @Override
218 + public MastershipEvent relinquishRole(NodeId nodeId, DeviceId deviceId) {
219 + return setStandby(nodeId, deviceId);
220 + }
221 +
217 } 222 }
......
...@@ -129,22 +129,22 @@ public class SimpleMastershipStoreTest { ...@@ -129,22 +129,22 @@ public class SimpleMastershipStoreTest {
129 public void unsetMaster() { 129 public void unsetMaster() {
130 //NONE - record backup but take no other action 130 //NONE - record backup but take no other action
131 put(DID1, N1, false, false); 131 put(DID1, N1, false, false);
132 - sms.unsetMaster(N1, DID1); 132 + sms.setStandby(N1, DID1);
133 assertTrue("not backed up", sms.backups.contains(N1)); 133 assertTrue("not backed up", sms.backups.contains(N1));
134 sms.termMap.clear(); 134 sms.termMap.clear();
135 - sms.unsetMaster(N1, DID1); 135 + sms.setStandby(N1, DID1);
136 assertTrue("term not set", sms.termMap.containsKey(DID1)); 136 assertTrue("term not set", sms.termMap.containsKey(DID1));
137 137
138 //no backup, MASTER 138 //no backup, MASTER
139 put(DID1, N1, true, true); 139 put(DID1, N1, true, true);
140 - assertNull("wrong event", sms.unsetMaster(N1, DID1)); 140 + assertNull("wrong event", sms.setStandby(N1, DID1));
141 assertNull("wrong node", sms.masterMap.get(DID1)); 141 assertNull("wrong node", sms.masterMap.get(DID1));
142 142
143 //backup, switch 143 //backup, switch
144 sms.masterMap.clear(); 144 sms.masterMap.clear();
145 put(DID1, N1, true, true); 145 put(DID1, N1, true, true);
146 put(DID2, N2, true, true); 146 put(DID2, N2, true, true);
147 - assertEquals("wrong event", MASTER_CHANGED, sms.unsetMaster(N1, DID1).type()); 147 + assertEquals("wrong event", MASTER_CHANGED, sms.setStandby(N1, DID1).type());
148 } 148 }
149 149
150 //helper to populate master/backup structures 150 //helper to populate master/backup structures
......
...@@ -981,11 +981,13 @@ class OFChannelHandler extends IdleStateAwareChannelHandler { ...@@ -981,11 +981,13 @@ class OFChannelHandler extends IdleStateAwareChannelHandler {
981 // switch was a duplicate-dpid, calling the method below would clear 981 // switch was a duplicate-dpid, calling the method below would clear
982 // all state for the original switch (with the same dpid), 982 // all state for the original switch (with the same dpid),
983 // which we obviously don't want. 983 // which we obviously don't want.
984 + log.info("{}:removal called");
984 sw.removeConnectedSwitch(); 985 sw.removeConnectedSwitch();
985 } else { 986 } else {
986 // A duplicate was disconnected on this ChannelHandler, 987 // A duplicate was disconnected on this ChannelHandler,
987 // this is the same switch reconnecting, but the original state was 988 // this is the same switch reconnecting, but the original state was
988 // not cleaned up - XXX check liveness of original ChannelHandler 989 // not cleaned up - XXX check liveness of original ChannelHandler
990 + log.info("{}:duplicate found");
989 duplicateDpidFound = Boolean.FALSE; 991 duplicateDpidFound = Boolean.FALSE;
990 } 992 }
991 } else { 993 } else {
......
...@@ -307,9 +307,11 @@ public class OpenFlowControllerImpl implements OpenFlowController { ...@@ -307,9 +307,11 @@ public class OpenFlowControllerImpl implements OpenFlowController {
307 connectedSwitches.remove(dpid); 307 connectedSwitches.remove(dpid);
308 OpenFlowSwitch sw = activeMasterSwitches.remove(dpid); 308 OpenFlowSwitch sw = activeMasterSwitches.remove(dpid);
309 if (sw == null) { 309 if (sw == null) {
310 + log.warn("sw was null for {}", dpid);
310 sw = activeEqualSwitches.remove(dpid); 311 sw = activeEqualSwitches.remove(dpid);
311 } 312 }
312 for (OpenFlowSwitchListener l : ofSwitchListener) { 313 for (OpenFlowSwitchListener l : ofSwitchListener) {
314 + log.warn("removal for {}", dpid);
313 l.switchRemoved(dpid); 315 l.switchRemoved(dpid);
314 } 316 }
315 } 317 }
......