fixes for mastership handoff race conditions
Change-Id: Ifed733df1bdc3b144b6a341a9322838ea2aacd72
Showing
9 changed files
with
159 additions
and
37 deletions
... | @@ -104,7 +104,6 @@ implements MastershipService, MastershipAdminService { | ... | @@ -104,7 +104,6 @@ implements MastershipService, MastershipAdminService { |
104 | MastershipEvent event = null; | 104 | MastershipEvent event = null; |
105 | event = store.relinquishRole( | 105 | event = store.relinquishRole( |
106 | clusterService.getLocalNode().id(), deviceId); | 106 | clusterService.getLocalNode().id(), deviceId); |
107 | - | ||
108 | if (event != null) { | 107 | if (event != null) { |
109 | post(event); | 108 | post(event); |
110 | } | 109 | } |
... | @@ -229,7 +228,8 @@ implements MastershipService, MastershipAdminService { | ... | @@ -229,7 +228,8 @@ implements MastershipService, MastershipAdminService { |
229 | return true; | 228 | return true; |
230 | } | 229 | } |
231 | //else { | 230 | //else { |
232 | - //FIXME: break tie for equal-sized clusters, can we use hz's functions? | 231 | + //FIXME: break tie for equal-sized clusters, |
232 | + // maybe by number of connected switches | ||
233 | // } | 233 | // } |
234 | return false; | 234 | return false; |
235 | } | 235 | } | ... | ... |
... | @@ -161,6 +161,9 @@ public class DeviceManager | ... | @@ -161,6 +161,9 @@ public class DeviceManager |
161 | @Override | 161 | @Override |
162 | public void removeDevice(DeviceId deviceId) { | 162 | public void removeDevice(DeviceId deviceId) { |
163 | checkNotNull(deviceId, DEVICE_ID_NULL); | 163 | checkNotNull(deviceId, DEVICE_ID_NULL); |
164 | + // XXX is this intended to apply to the full global topology? | ||
165 | + // if so, we probably don't want the fact that we aren't | ||
166 | + // MASTER to get in the way, as it would do now. | ||
164 | DeviceEvent event = store.removeDevice(deviceId); | 167 | DeviceEvent event = store.removeDevice(deviceId); |
165 | if (event != null) { | 168 | if (event != null) { |
166 | log.info("Device {} administratively removed", deviceId); | 169 | log.info("Device {} administratively removed", deviceId); |
... | @@ -203,19 +206,21 @@ public class DeviceManager | ... | @@ -203,19 +206,21 @@ public class DeviceManager |
203 | log.info("Device {} connected", deviceId); | 206 | log.info("Device {} connected", deviceId); |
204 | // check my Role | 207 | // check my Role |
205 | MastershipRole role = mastershipService.requestRoleFor(deviceId); | 208 | MastershipRole role = mastershipService.requestRoleFor(deviceId); |
206 | - | 209 | + log.info("## - our role for {} is {} [master is {}]", deviceId, role, |
210 | + mastershipService.getMasterFor(deviceId)); | ||
207 | if (role != MastershipRole.MASTER) { | 211 | if (role != MastershipRole.MASTER) { |
208 | // TODO: Do we need to explicitly tell the Provider that | 212 | // TODO: Do we need to explicitly tell the Provider that |
209 | // this instance is no longer the MASTER? probably not | 213 | // this instance is no longer the MASTER? probably not |
210 | return; | 214 | return; |
211 | } | 215 | } |
212 | - | ||
213 | MastershipTerm term = mastershipService.requestTermService() | 216 | MastershipTerm term = mastershipService.requestTermService() |
214 | .getMastershipTerm(deviceId); | 217 | .getMastershipTerm(deviceId); |
218 | + | ||
215 | if (!term.master().equals(clusterService.getLocalNode().id())) { | 219 | if (!term.master().equals(clusterService.getLocalNode().id())) { |
216 | // lost mastership after requestRole told this instance was MASTER. | 220 | // lost mastership after requestRole told this instance was MASTER. |
217 | return; | 221 | return; |
218 | } | 222 | } |
223 | + | ||
219 | // tell clock provider if this instance is the master | 224 | // tell clock provider if this instance is the master |
220 | deviceClockProviderService.setMastershipTerm(deviceId, term); | 225 | deviceClockProviderService.setMastershipTerm(deviceId, term); |
221 | 226 | ||
... | @@ -256,19 +261,32 @@ public class DeviceManager | ... | @@ -256,19 +261,32 @@ public class DeviceManager |
256 | // but if I was the last STANDBY connection, etc. and no one else | 261 | // but if I was the last STANDBY connection, etc. and no one else |
257 | // was there to mark the device offline, this instance may need to | 262 | // was there to mark the device offline, this instance may need to |
258 | // temporarily request for Master Role and mark offline. | 263 | // temporarily request for Master Role and mark offline. |
264 | + log.info("## for {} role is {}", deviceId, mastershipService.getLocalRole(deviceId)); | ||
259 | if (!mastershipService.getLocalRole(deviceId).equals(MastershipRole.MASTER)) { | 265 | if (!mastershipService.getLocalRole(deviceId).equals(MastershipRole.MASTER)) { |
260 | log.debug("Device {} disconnected, but I am not the master", deviceId); | 266 | log.debug("Device {} disconnected, but I am not the master", deviceId); |
261 | //let go of ability to be backup | 267 | //let go of ability to be backup |
262 | mastershipService.relinquishMastership(deviceId); | 268 | mastershipService.relinquishMastership(deviceId); |
263 | return; | 269 | return; |
264 | } | 270 | } |
265 | - DeviceEvent event = store.markOffline(deviceId); | ||
266 | - //relinquish master role and ability to be backup. | ||
267 | - mastershipService.relinquishMastership(deviceId); | ||
268 | 271 | ||
269 | - if (event != null) { | 272 | + DeviceEvent event = null; |
270 | - log.info("Device {} disconnected", deviceId); | 273 | + try { |
271 | - post(event); | 274 | + event = store.markOffline(deviceId); |
275 | + } catch (IllegalStateException e) { | ||
276 | + //there are times when this node will correctly have mastership, BUT | ||
277 | + //that isn't reflected in the ClockManager before the device disconnects. | ||
278 | + //we want to let go of the device anyways, so make sure this happens. | ||
279 | + MastershipTerm term = termService.getMastershipTerm(deviceId); | ||
280 | + deviceClockProviderService.setMastershipTerm(deviceId, term); | ||
281 | + event = store.markOffline(deviceId); | ||
282 | + } finally { | ||
283 | + //relinquish master role and ability to be backup. | ||
284 | + mastershipService.relinquishMastership(deviceId); | ||
285 | + | ||
286 | + if (event != null) { | ||
287 | + log.info("Device {} disconnected", deviceId); | ||
288 | + post(event); | ||
289 | + } | ||
272 | } | 290 | } |
273 | } | 291 | } |
274 | 292 | ||
... | @@ -279,7 +297,15 @@ public class DeviceManager | ... | @@ -279,7 +297,15 @@ public class DeviceManager |
279 | checkNotNull(portDescriptions, | 297 | checkNotNull(portDescriptions, |
280 | "Port descriptions list cannot be null"); | 298 | "Port descriptions list cannot be null"); |
281 | checkValidity(); | 299 | checkValidity(); |
300 | + //XXX what's this doing here? | ||
282 | this.provider().id(); | 301 | this.provider().id(); |
302 | + | ||
303 | + if (!mastershipService.getLocalRole(deviceId).equals(MastershipRole.MASTER)) { | ||
304 | + // TODO If we become master, then we'll trigger something to update this | ||
305 | + // info to fix any inconsistencies that may result during the handoff. | ||
306 | + return; | ||
307 | + } | ||
308 | + | ||
283 | List<DeviceEvent> events = store.updatePorts(this.provider().id(), | 309 | List<DeviceEvent> events = store.updatePorts(this.provider().id(), |
284 | deviceId, portDescriptions); | 310 | deviceId, portDescriptions); |
285 | for (DeviceEvent event : events) { | 311 | for (DeviceEvent event : events) { |
... | @@ -293,6 +319,12 @@ public class DeviceManager | ... | @@ -293,6 +319,12 @@ public class DeviceManager |
293 | checkNotNull(deviceId, DEVICE_ID_NULL); | 319 | checkNotNull(deviceId, DEVICE_ID_NULL); |
294 | checkNotNull(portDescription, PORT_DESCRIPTION_NULL); | 320 | checkNotNull(portDescription, PORT_DESCRIPTION_NULL); |
295 | checkValidity(); | 321 | checkValidity(); |
322 | + | ||
323 | + if (!mastershipService.getLocalRole(deviceId).equals(MastershipRole.MASTER)) { | ||
324 | + // TODO If we become master, then we'll trigger something to update this | ||
325 | + // info to fix any inconsistencies that may result during the handoff. | ||
326 | + return; | ||
327 | + } | ||
296 | DeviceEvent event = store.updatePortStatus(this.provider().id(), | 328 | DeviceEvent event = store.updatePortStatus(this.provider().id(), |
297 | deviceId, portDescription); | 329 | deviceId, portDescription); |
298 | if (event != null) { | 330 | if (event != null) { |
... | @@ -328,27 +360,37 @@ public class DeviceManager | ... | @@ -328,27 +360,37 @@ public class DeviceManager |
328 | final DeviceId did = event.subject(); | 360 | final DeviceId did = event.subject(); |
329 | final NodeId myNodeId = clusterService.getLocalNode().id(); | 361 | final NodeId myNodeId = clusterService.getLocalNode().id(); |
330 | 362 | ||
363 | + log.info("## got Mastershipevent for dev {}", did); | ||
331 | if (myNodeId.equals(event.master())) { | 364 | if (myNodeId.equals(event.master())) { |
332 | MastershipTerm term = termService.getMastershipTerm(did); | 365 | MastershipTerm term = termService.getMastershipTerm(did); |
333 | 366 | ||
334 | - if (term.master().equals(myNodeId)) { | 367 | + if (!myNodeId.equals(term.master())) { |
335 | - // only set the new term if I am the master | 368 | + // something went wrong in consistency, let go |
336 | - deviceClockProviderService.setMastershipTerm(did, term); | 369 | + mastershipService.relinquishMastership(did); |
370 | + applyRole(did, MastershipRole.STANDBY); | ||
371 | + return; | ||
337 | } | 372 | } |
338 | 373 | ||
374 | + log.info("## setting term for CPS as new master for {}", did); | ||
375 | + // only set the new term if I am the master | ||
376 | + deviceClockProviderService.setMastershipTerm(did, term); | ||
377 | + | ||
339 | // FIXME: we should check that the device is connected on our end. | 378 | // FIXME: we should check that the device is connected on our end. |
340 | // currently, this is not straight forward as the actual switch | 379 | // currently, this is not straight forward as the actual switch |
341 | - // implementation is hidden from the registry. | 380 | + // implementation is hidden from the registry. Maybe we can ask the |
342 | - if (!isAvailable(did)) { | 381 | + // provider. |
382 | + // if the device is null here, we are the first master to claim the | ||
383 | + // device. No worries, the DeviceManager will create one soon. | ||
384 | + Device device = getDevice(did); | ||
385 | + if ((device != null) && !isAvailable(did)) { | ||
343 | //flag the device as online. Is there a better way to do this? | 386 | //flag the device as online. Is there a better way to do this? |
344 | - Device device = getDevice(did); | ||
345 | store.createOrUpdateDevice(device.providerId(), did, | 387 | store.createOrUpdateDevice(device.providerId(), did, |
346 | new DefaultDeviceDescription( | 388 | new DefaultDeviceDescription( |
347 | did.uri(), device.type(), device.manufacturer(), | 389 | did.uri(), device.type(), device.manufacturer(), |
348 | device.hwVersion(), device.swVersion(), | 390 | device.hwVersion(), device.swVersion(), |
349 | device.serialNumber())); | 391 | device.serialNumber())); |
350 | } | 392 | } |
351 | - | 393 | + //TODO re-collect device information to fix potential staleness |
352 | applyRole(did, MastershipRole.MASTER); | 394 | applyRole(did, MastershipRole.MASTER); |
353 | } else { | 395 | } else { |
354 | applyRole(did, MastershipRole.STANDBY); | 396 | applyRole(did, MastershipRole.STANDBY); | ... | ... |
... | @@ -16,6 +16,7 @@ import org.onlab.onos.event.EventDeliveryService; | ... | @@ -16,6 +16,7 @@ import org.onlab.onos.event.EventDeliveryService; |
16 | import org.onlab.onos.net.ConnectPoint; | 16 | import org.onlab.onos.net.ConnectPoint; |
17 | import org.onlab.onos.net.DeviceId; | 17 | import org.onlab.onos.net.DeviceId; |
18 | import org.onlab.onos.net.Link; | 18 | import org.onlab.onos.net.Link; |
19 | +import org.onlab.onos.net.MastershipRole; | ||
19 | import org.onlab.onos.net.device.DeviceEvent; | 20 | import org.onlab.onos.net.device.DeviceEvent; |
20 | import org.onlab.onos.net.device.DeviceListener; | 21 | import org.onlab.onos.net.device.DeviceListener; |
21 | import org.onlab.onos.net.device.DeviceService; | 22 | import org.onlab.onos.net.device.DeviceService; |
... | @@ -139,11 +140,17 @@ public class LinkManager | ... | @@ -139,11 +140,17 @@ public class LinkManager |
139 | 140 | ||
140 | @Override | 141 | @Override |
141 | public void removeLinks(ConnectPoint connectPoint) { | 142 | public void removeLinks(ConnectPoint connectPoint) { |
143 | + if (deviceService.getRole(connectPoint.deviceId()) != MastershipRole.MASTER) { | ||
144 | + return; | ||
145 | + } | ||
142 | removeLinks(getLinks(connectPoint)); | 146 | removeLinks(getLinks(connectPoint)); |
143 | } | 147 | } |
144 | 148 | ||
145 | @Override | 149 | @Override |
146 | public void removeLinks(DeviceId deviceId) { | 150 | public void removeLinks(DeviceId deviceId) { |
151 | + if (deviceService.getRole(deviceId) != MastershipRole.MASTER) { | ||
152 | + return; | ||
153 | + } | ||
147 | removeLinks(getDeviceLinks(deviceId)); | 154 | removeLinks(getDeviceLinks(deviceId)); |
148 | } | 155 | } |
149 | 156 | ||
... | @@ -189,6 +196,15 @@ public class LinkManager | ... | @@ -189,6 +196,15 @@ public class LinkManager |
189 | public void linkDetected(LinkDescription linkDescription) { | 196 | public void linkDetected(LinkDescription linkDescription) { |
190 | checkNotNull(linkDescription, LINK_DESC_NULL); | 197 | checkNotNull(linkDescription, LINK_DESC_NULL); |
191 | checkValidity(); | 198 | checkValidity(); |
199 | + | ||
200 | + ConnectPoint src = linkDescription.src(); | ||
201 | + ConnectPoint dst = linkDescription.dst(); | ||
202 | + // if we aren't master for the device associated with the ConnectPoint | ||
203 | + // we probably shouldn't be doing this. | ||
204 | + if ((deviceService.getRole(src.deviceId()) != MastershipRole.MASTER) || | ||
205 | + (deviceService.getRole(dst.deviceId()) != MastershipRole.MASTER)) { | ||
206 | + return; | ||
207 | + } | ||
192 | LinkEvent event = store.createOrUpdateLink(provider().id(), | 208 | LinkEvent event = store.createOrUpdateLink(provider().id(), |
193 | linkDescription); | 209 | linkDescription); |
194 | if (event != null) { | 210 | if (event != null) { |
... | @@ -201,6 +217,15 @@ public class LinkManager | ... | @@ -201,6 +217,15 @@ public class LinkManager |
201 | public void linkVanished(LinkDescription linkDescription) { | 217 | public void linkVanished(LinkDescription linkDescription) { |
202 | checkNotNull(linkDescription, LINK_DESC_NULL); | 218 | checkNotNull(linkDescription, LINK_DESC_NULL); |
203 | checkValidity(); | 219 | checkValidity(); |
220 | + | ||
221 | + ConnectPoint src = linkDescription.src(); | ||
222 | + ConnectPoint dst = linkDescription.dst(); | ||
223 | + // if we aren't master for the device associated with the ConnectPoint | ||
224 | + // we probably shouldn't be doing this. | ||
225 | + if ((deviceService.getRole(src.deviceId()) != MastershipRole.MASTER) || | ||
226 | + (deviceService.getRole(dst.deviceId()) != MastershipRole.MASTER)) { | ||
227 | + return; | ||
228 | + } | ||
204 | LinkEvent event = store.removeLink(linkDescription.src(), | 229 | LinkEvent event = store.removeLink(linkDescription.src(), |
205 | linkDescription.dst()); | 230 | linkDescription.dst()); |
206 | if (event != null) { | 231 | if (event != null) { |
... | @@ -213,6 +238,11 @@ public class LinkManager | ... | @@ -213,6 +238,11 @@ public class LinkManager |
213 | public void linksVanished(ConnectPoint connectPoint) { | 238 | public void linksVanished(ConnectPoint connectPoint) { |
214 | checkNotNull(connectPoint, "Connect point cannot be null"); | 239 | checkNotNull(connectPoint, "Connect point cannot be null"); |
215 | checkValidity(); | 240 | checkValidity(); |
241 | + // if we aren't master for the device associated with the ConnectPoint | ||
242 | + // we probably shouldn't be doing this. | ||
243 | + if (deviceService.getRole(connectPoint.deviceId()) != MastershipRole.MASTER) { | ||
244 | + return; | ||
245 | + } | ||
216 | log.info("Links for connection point {} vanished", connectPoint); | 246 | log.info("Links for connection point {} vanished", connectPoint); |
217 | removeLinks(getLinks(connectPoint)); | 247 | removeLinks(getLinks(connectPoint)); |
218 | } | 248 | } |
... | @@ -221,6 +251,11 @@ public class LinkManager | ... | @@ -221,6 +251,11 @@ public class LinkManager |
221 | public void linksVanished(DeviceId deviceId) { | 251 | public void linksVanished(DeviceId deviceId) { |
222 | checkNotNull(deviceId, DEVICE_ID_NULL); | 252 | checkNotNull(deviceId, DEVICE_ID_NULL); |
223 | checkValidity(); | 253 | checkValidity(); |
254 | + // if we aren't master for the device associated with the ConnectPoint | ||
255 | + // we probably shouldn't be doing this. | ||
256 | + if (deviceService.getRole(deviceId) != MastershipRole.MASTER) { | ||
257 | + return; | ||
258 | + } | ||
224 | log.info("Links for device {} vanished", deviceId); | 259 | log.info("Links for device {} vanished", deviceId); |
225 | removeLinks(getDeviceLinks(deviceId)); | 260 | removeLinks(getDeviceLinks(deviceId)); |
226 | } | 261 | } | ... | ... |
... | @@ -59,6 +59,7 @@ public class LinkManagerTest { | ... | @@ -59,6 +59,7 @@ public class LinkManagerTest { |
59 | protected LinkProviderService providerService; | 59 | protected LinkProviderService providerService; |
60 | protected TestProvider provider; | 60 | protected TestProvider provider; |
61 | protected TestListener listener = new TestListener(); | 61 | protected TestListener listener = new TestListener(); |
62 | + protected DeviceManager devmgr = new TestDeviceManager(); | ||
62 | 63 | ||
63 | @Before | 64 | @Before |
64 | public void setUp() { | 65 | public void setUp() { |
... | @@ -68,7 +69,7 @@ public class LinkManagerTest { | ... | @@ -68,7 +69,7 @@ public class LinkManagerTest { |
68 | registry = mgr; | 69 | registry = mgr; |
69 | mgr.store = new SimpleLinkStore(); | 70 | mgr.store = new SimpleLinkStore(); |
70 | mgr.eventDispatcher = new TestEventDispatcher(); | 71 | mgr.eventDispatcher = new TestEventDispatcher(); |
71 | - mgr.deviceService = new DeviceManager(); | 72 | + mgr.deviceService = devmgr; |
72 | mgr.activate(); | 73 | mgr.activate(); |
73 | 74 | ||
74 | service.addListener(listener); | 75 | service.addListener(listener); |
... | @@ -259,4 +260,11 @@ public class LinkManagerTest { | ... | @@ -259,4 +260,11 @@ public class LinkManagerTest { |
259 | } | 260 | } |
260 | } | 261 | } |
261 | 262 | ||
263 | + private static class TestDeviceManager extends DeviceManager { | ||
264 | + @Override | ||
265 | + public MastershipRole getRole(DeviceId deviceId) { | ||
266 | + return MastershipRole.MASTER; | ||
267 | + } | ||
268 | + } | ||
269 | + | ||
262 | } | 270 | } | ... | ... |
... | @@ -44,6 +44,8 @@ public class DeviceClockManager implements DeviceClockService, DeviceClockProvid | ... | @@ -44,6 +44,8 @@ public class DeviceClockManager implements DeviceClockService, DeviceClockProvid |
44 | @Override | 44 | @Override |
45 | public Timestamp getTimestamp(DeviceId deviceId) { | 45 | public Timestamp getTimestamp(DeviceId deviceId) { |
46 | MastershipTerm term = deviceMastershipTerms.get(deviceId); | 46 | MastershipTerm term = deviceMastershipTerms.get(deviceId); |
47 | + log.info("term info for {} is: {}", deviceId, term); | ||
48 | + | ||
47 | if (term == null) { | 49 | if (term == null) { |
48 | throw new IllegalStateException("Requesting timestamp for a deviceId without mastership"); | 50 | throw new IllegalStateException("Requesting timestamp for a deviceId without mastership"); |
49 | } | 51 | } |
... | @@ -52,6 +54,7 @@ public class DeviceClockManager implements DeviceClockService, DeviceClockProvid | ... | @@ -52,6 +54,7 @@ public class DeviceClockManager implements DeviceClockService, DeviceClockProvid |
52 | 54 | ||
53 | @Override | 55 | @Override |
54 | public void setMastershipTerm(DeviceId deviceId, MastershipTerm term) { | 56 | public void setMastershipTerm(DeviceId deviceId, MastershipTerm term) { |
57 | + log.info("adding term info {} {}", deviceId, term.master()); | ||
55 | deviceMastershipTerms.put(deviceId, term); | 58 | deviceMastershipTerms.put(deviceId, term); |
56 | } | 59 | } |
57 | } | 60 | } | ... | ... |
... | @@ -390,6 +390,7 @@ public class GossipDeviceStore | ... | @@ -390,6 +390,7 @@ public class GossipDeviceStore |
390 | List<PortDescription> portDescriptions) { | 390 | List<PortDescription> portDescriptions) { |
391 | 391 | ||
392 | final Timestamp newTimestamp = deviceClockService.getTimestamp(deviceId); | 392 | final Timestamp newTimestamp = deviceClockService.getTimestamp(deviceId); |
393 | + log.info("timestamp for {} {}", deviceId, newTimestamp); | ||
393 | 394 | ||
394 | final Timestamped<List<PortDescription>> timestampedInput | 395 | final Timestamped<List<PortDescription>> timestampedInput |
395 | = new Timestamped<>(portDescriptions, newTimestamp); | 396 | = new Timestamped<>(portDescriptions, newTimestamp); | ... | ... |
... | @@ -360,7 +360,14 @@ public class GossipLinkStore | ... | @@ -360,7 +360,14 @@ public class GossipLinkStore |
360 | final LinkKey key = linkKey(src, dst); | 360 | final LinkKey key = linkKey(src, dst); |
361 | 361 | ||
362 | DeviceId dstDeviceId = dst.deviceId(); | 362 | DeviceId dstDeviceId = dst.deviceId(); |
363 | - Timestamp timestamp = deviceClockService.getTimestamp(dstDeviceId); | 363 | + Timestamp timestamp = null; |
364 | + try { | ||
365 | + timestamp = deviceClockService.getTimestamp(dstDeviceId); | ||
366 | + } catch (IllegalStateException e) { | ||
367 | + //there are times when this is called before mastership | ||
368 | + // handoff correctly completes. | ||
369 | + return null; | ||
370 | + } | ||
364 | 371 | ||
365 | LinkEvent event = removeLinkInternal(key, timestamp); | 372 | LinkEvent event = removeLinkInternal(key, timestamp); |
366 | 373 | ... | ... |
... | @@ -29,7 +29,10 @@ import org.onlab.onos.store.serializers.KryoSerializer; | ... | @@ -29,7 +29,10 @@ import org.onlab.onos.store.serializers.KryoSerializer; |
29 | import org.onlab.util.KryoPool; | 29 | import org.onlab.util.KryoPool; |
30 | 30 | ||
31 | import com.google.common.collect.ImmutableSet; | 31 | import com.google.common.collect.ImmutableSet; |
32 | +import com.hazelcast.core.EntryEvent; | ||
33 | +import com.hazelcast.core.EntryListener; | ||
32 | import com.hazelcast.core.IAtomicLong; | 34 | import com.hazelcast.core.IAtomicLong; |
35 | +import com.hazelcast.core.MapEvent; | ||
33 | 36 | ||
34 | import static org.onlab.onos.net.MastershipRole.*; | 37 | import static org.onlab.onos.net.MastershipRole.*; |
35 | 38 | ||
... | @@ -78,7 +81,7 @@ implements MastershipStore { | ... | @@ -78,7 +81,7 @@ implements MastershipStore { |
78 | roleMap = new SMap(theInstance.getMap("nodeRoles"), this.serializer); | 81 | roleMap = new SMap(theInstance.getMap("nodeRoles"), this.serializer); |
79 | terms = new SMap(theInstance.getMap("terms"), this.serializer); | 82 | terms = new SMap(theInstance.getMap("terms"), this.serializer); |
80 | clusterSize = theInstance.getAtomicLong("clustersize"); | 83 | clusterSize = theInstance.getAtomicLong("clustersize"); |
81 | - // roleMap.addEntryListener(new RemoteMasterShipEventHandler(), true); | 84 | + roleMap.addEntryListener((new RemoteMasterShipEventHandler()), true); |
82 | 85 | ||
83 | log.info("Started"); | 86 | log.info("Started"); |
84 | } | 87 | } |
... | @@ -207,6 +210,7 @@ implements MastershipStore { | ... | @@ -207,6 +210,7 @@ implements MastershipStore { |
207 | rv.reassign(local, NONE, STANDBY); | 210 | rv.reassign(local, NONE, STANDBY); |
208 | roleMap.put(deviceId, rv); | 211 | roleMap.put(deviceId, rv); |
209 | terms.putIfAbsent(deviceId, INIT); | 212 | terms.putIfAbsent(deviceId, INIT); |
213 | + | ||
210 | break; | 214 | break; |
211 | case NONE: | 215 | case NONE: |
212 | //claim mastership | 216 | //claim mastership |
... | @@ -289,7 +293,8 @@ implements MastershipStore { | ... | @@ -289,7 +293,8 @@ implements MastershipStore { |
289 | } | 293 | } |
290 | 294 | ||
291 | //helper to fetch a new master candidate for a given device. | 295 | //helper to fetch a new master candidate for a given device. |
292 | - private MastershipEvent reelect(NodeId current, DeviceId deviceId, RoleValue rv) { | 296 | + private MastershipEvent reelect( |
297 | + NodeId current, DeviceId deviceId, RoleValue rv) { | ||
293 | 298 | ||
294 | //if this is an queue it'd be neater. | 299 | //if this is an queue it'd be neater. |
295 | NodeId backup = null; | 300 | NodeId backup = null; |
... | @@ -301,17 +306,18 @@ implements MastershipStore { | ... | @@ -301,17 +306,18 @@ implements MastershipStore { |
301 | } | 306 | } |
302 | 307 | ||
303 | if (backup == null) { | 308 | if (backup == null) { |
309 | + log.info("{} giving up and going to NONE for {}", current, deviceId); | ||
304 | rv.remove(MASTER, current); | 310 | rv.remove(MASTER, current); |
305 | roleMap.put(deviceId, rv); | 311 | roleMap.put(deviceId, rv); |
306 | return null; | 312 | return null; |
307 | } else { | 313 | } else { |
314 | + log.info("{} trying to pass mastership for {} to {}", current, deviceId, backup); | ||
308 | rv.replace(current, backup, MASTER); | 315 | rv.replace(current, backup, MASTER); |
309 | rv.reassign(backup, STANDBY, NONE); | 316 | rv.reassign(backup, STANDBY, NONE); |
310 | roleMap.put(deviceId, rv); | 317 | roleMap.put(deviceId, rv); |
311 | Integer term = terms.get(deviceId); | 318 | Integer term = terms.get(deviceId); |
312 | terms.put(deviceId, ++term); | 319 | terms.put(deviceId, ++term); |
313 | - return new MastershipEvent( | 320 | + return new MastershipEvent(MASTER_CHANGED, deviceId, backup); |
314 | - MASTER_CHANGED, deviceId, backup); | ||
315 | } | 321 | } |
316 | } | 322 | } |
317 | 323 | ||
... | @@ -346,30 +352,51 @@ implements MastershipStore { | ... | @@ -346,30 +352,51 @@ implements MastershipStore { |
346 | 352 | ||
347 | //adds or updates term information. | 353 | //adds or updates term information. |
348 | private void updateTerm(DeviceId deviceId) { | 354 | private void updateTerm(DeviceId deviceId) { |
349 | - Integer term = terms.get(deviceId); | 355 | + terms.lock(deviceId); |
350 | - if (term == null) { | 356 | + try { |
351 | - terms.put(deviceId, INIT); | 357 | + Integer term = terms.get(deviceId); |
352 | - } else { | 358 | + if (term == null) { |
353 | - terms.put(deviceId, ++term); | 359 | + terms.put(deviceId, INIT); |
360 | + } else { | ||
361 | + terms.put(deviceId, ++term); | ||
362 | + } | ||
363 | + } finally { | ||
364 | + terms.unlock(deviceId); | ||
354 | } | 365 | } |
355 | } | 366 | } |
356 | 367 | ||
357 | - private class RemoteMasterShipEventHandler extends RemoteEventHandler<DeviceId, NodeId> { | 368 | + private class RemoteMasterShipEventHandler implements EntryListener<DeviceId, RoleValue> { |
369 | + | ||
370 | + @Override | ||
371 | + public void entryAdded(EntryEvent<DeviceId, RoleValue> event) { | ||
372 | + } | ||
373 | + | ||
374 | + @Override | ||
375 | + public void entryRemoved(EntryEvent<DeviceId, RoleValue> event) { | ||
376 | + } | ||
377 | + | ||
378 | + @Override | ||
379 | + public void entryUpdated(EntryEvent<DeviceId, RoleValue> event) { | ||
380 | + NodeId myId = clusterService.getLocalNode().id(); | ||
381 | + NodeId node = event.getValue().get(MASTER); | ||
382 | + if (myId.equals(node)) { | ||
383 | + // XXX or do we just let it get sent and caught by ourself? | ||
384 | + return; | ||
385 | + } | ||
386 | + notifyDelegate(new MastershipEvent( | ||
387 | + MASTER_CHANGED, event.getKey(), event.getValue().get(MASTER))); | ||
388 | + } | ||
358 | 389 | ||
359 | @Override | 390 | @Override |
360 | - protected void onAdd(DeviceId deviceId, NodeId nodeId) { | 391 | + public void entryEvicted(EntryEvent<DeviceId, RoleValue> event) { |
361 | - notifyDelegate(new MastershipEvent(MASTER_CHANGED, deviceId, nodeId)); | ||
362 | } | 392 | } |
363 | 393 | ||
364 | @Override | 394 | @Override |
365 | - protected void onRemove(DeviceId deviceId, NodeId nodeId) { | 395 | + public void mapEvicted(MapEvent event) { |
366 | - //notifyDelegate(new MastershipEvent(MASTER_CHANGED, deviceId, nodeId)); | ||
367 | } | 396 | } |
368 | 397 | ||
369 | @Override | 398 | @Override |
370 | - protected void onUpdate(DeviceId deviceId, NodeId oldNodeId, NodeId nodeId) { | 399 | + public void mapCleared(MapEvent event) { |
371 | - //only addition indicates a change in mastership | ||
372 | - //notifyDelegate(new MastershipEvent(MASTER_CHANGED, deviceId, nodeId)); | ||
373 | } | 400 | } |
374 | } | 401 | } |
375 | 402 | ... | ... |
... | @@ -94,7 +94,6 @@ public final class KryoPoolUtil { | ... | @@ -94,7 +94,6 @@ public final class KryoPoolUtil { |
94 | .register(ConnectPoint.class, new ConnectPointSerializer()) | 94 | .register(ConnectPoint.class, new ConnectPointSerializer()) |
95 | .register(DefaultLink.class, new DefaultLinkSerializer()) | 95 | .register(DefaultLink.class, new DefaultLinkSerializer()) |
96 | .register(MastershipTerm.class, new MastershipTermSerializer()) | 96 | .register(MastershipTerm.class, new MastershipTermSerializer()) |
97 | - .register(MastershipRole.class, new MastershipRoleSerializer()) | ||
98 | .register(HostLocation.class, new HostLocationSerializer()) | 97 | .register(HostLocation.class, new HostLocationSerializer()) |
99 | 98 | ||
100 | .build(); | 99 | .build(); | ... | ... |
-
Please register or login to post a comment