Brian O'Connor
Committed by Gerrit Code Review

ONOS-3350 AbstractOFSwitch race fixes

Protecting callers with synchronized block during
role request and reply methods.

Change-Id: Ie82f84d1d462923c9f410e6950e846ee3b05551c
...@@ -29,14 +29,10 @@ public interface OpenFlowSwitch { ...@@ -29,14 +29,10 @@ public interface OpenFlowSwitch {
29 29
30 /** 30 /**
31 * Writes the message to the driver. 31 * Writes the message to the driver.
32 - * 32 + * <p>
33 - * Note: 33 + * Note: Messages may be silently dropped/lost due to IOExceptions or
34 - * Calling {@link #sendMsg(OFMessage)} does NOT guarantee the messages to be 34 + * role. If this is a concern, then a caller should use barriers.
35 - * transmitted on the wire in order, especially during role transition. 35 + * </p>
36 - * The messages may be reordered at the switch side.
37 - *
38 - * Calling {@link #sendMsg(List)} guarantee the messages inside the list
39 - * to be transmitted on the wire in order.
40 * 36 *
41 * @param msg the message to write 37 * @param msg the message to write
42 */ 38 */
...@@ -44,6 +40,10 @@ public interface OpenFlowSwitch { ...@@ -44,6 +40,10 @@ public interface OpenFlowSwitch {
44 40
45 /** 41 /**
46 * Writes the OFMessage list to the driver. 42 * Writes the OFMessage list to the driver.
43 + * <p>
44 + * Note: Messages may be silently dropped/lost due to IOExceptions or
45 + * role. If this is a concern, then a caller should use barriers.
46 + * </p>
47 * 47 *
48 * @param msgs the messages to be written 48 * @param msgs the messages to be written
49 */ 49 */
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
16 16
17 package org.onosproject.openflow.controller.driver; 17 package org.onosproject.openflow.controller.driver;
18 18
19 +import com.google.common.collect.Lists;
19 import org.jboss.netty.channel.Channel; 20 import org.jboss.netty.channel.Channel;
20 import org.onlab.packet.IpAddress; 21 import org.onlab.packet.IpAddress;
21 import org.onosproject.net.Device; 22 import org.onosproject.net.Device;
...@@ -46,6 +47,7 @@ import java.util.ArrayList; ...@@ -46,6 +47,7 @@ import java.util.ArrayList;
46 import java.util.Collections; 47 import java.util.Collections;
47 import java.util.List; 48 import java.util.List;
48 import java.util.concurrent.atomic.AtomicInteger; 49 import java.util.concurrent.atomic.AtomicInteger;
50 +import java.util.concurrent.atomic.AtomicReference;
49 import java.util.stream.Collectors; 51 import java.util.stream.Collectors;
50 52
51 /** 53 /**
...@@ -74,12 +76,14 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour ...@@ -74,12 +76,14 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour
74 76
75 private RoleHandler roleMan; 77 private RoleHandler roleMan;
76 78
77 - protected RoleState role; 79 + // TODO this is accessed from multiple threads, but volatile may have performance implications
80 + protected volatile RoleState role;
78 81
79 protected OFFeaturesReply features; 82 protected OFFeaturesReply features;
80 protected OFDescStatsReply desc; 83 protected OFDescStatsReply desc;
81 84
82 - List<OFMessage> messagesPendingMastership; 85 + private final AtomicReference<List<OFMessage>> messagesPendingMastership
86 + = new AtomicReference<>();
83 87
84 @Override 88 @Override
85 public void init(Dpid dpid, OFDescStatsReply desc, OFVersion ofv) { 89 public void init(Dpid dpid, OFDescStatsReply desc, OFVersion ofv) {
...@@ -104,15 +108,53 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour ...@@ -104,15 +108,53 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour
104 108
105 @Override 109 @Override
106 public final void sendMsg(List<OFMessage> msgs) { 110 public final void sendMsg(List<OFMessage> msgs) {
107 - if (role == RoleState.MASTER && channel.isConnected()) { 111 + /*
112 + It is possible that in this block, we transition to SLAVE/EQUAL.
113 + If this is the case, the supplied messages will race with the
114 + RoleRequest message, and they could be rejected by the switch.
115 + In the interest of performance, we will not protect this block with
116 + a synchronization primitive, because the message would have just been
117 + dropped anyway.
118 + */
119 + if (role == RoleState.MASTER) {
120 + // fast path send when we are master
121 +
122 + sendMsgsOnChannel(msgs);
123 + return;
124 + }
125 + // check to see if mastership transition is in progress
126 + synchronized (messagesPendingMastership) {
127 + /*
128 + messagesPendingMastership is used as synchronization variable for
129 + all mastership related changes. In this block, mastership (including
130 + role update) will have either occurred or not.
131 + */
132 + if (role == RoleState.MASTER) {
133 + // transition to MASTER complete, send messages
134 + sendMsgsOnChannel(msgs);
135 + return;
136 + }
137 +
138 + List<OFMessage> messages = messagesPendingMastership.get();
139 + if (messages != null) {
140 + // we are transitioning to MASTER, so add messages to queue
141 + messages.addAll(msgs);
142 + log.debug("Enqueue message for switch {}. queue size after is {}",
143 + dpid, messages.size());
144 + } else {
145 + // not transitioning to MASTER
146 + log.warn("Dropping message for switch {} (role: {}, connected: {}): {}",
147 + dpid, role, channel.isConnected(), msgs);
148 + }
149 + }
150 + }
151 +
152 + private void sendMsgsOnChannel(List<OFMessage> msgs) {
153 + if (channel.isConnected()) {
108 channel.write(msgs); 154 channel.write(msgs);
109 - } else if (messagesPendingMastership != null) {
110 - messagesPendingMastership.addAll(msgs);
111 - log.debug("Enqueue message for switch {}. queue size after is {}",
112 - dpid, messagesPendingMastership.size());
113 } else { 155 } else {
114 - log.warn("Dropping message for switch {} (role: {}, connected: {}): {}", 156 + log.warn("Dropping messages for switch {} because channel is not connected: {}",
115 - dpid, role, channel.isConnected(), msgs); 157 + dpid, msgs);
116 } 158 }
117 } 159 }
118 160
...@@ -120,7 +162,7 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour ...@@ -120,7 +162,7 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour
120 public final void sendRoleRequest(OFMessage msg) { 162 public final void sendRoleRequest(OFMessage msg) {
121 if (msg instanceof OFRoleRequest || 163 if (msg instanceof OFRoleRequest ||
122 msg instanceof OFNiciraControllerRoleRequest) { 164 msg instanceof OFNiciraControllerRoleRequest) {
123 - channel.write(Collections.singletonList(msg)); 165 + sendMsgsOnChannel(Collections.singletonList(msg));
124 return; 166 return;
125 } 167 }
126 throw new IllegalArgumentException("Someone is trying to send " + 168 throw new IllegalArgumentException("Someone is trying to send " +
...@@ -130,7 +172,7 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour ...@@ -130,7 +172,7 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour
130 @Override 172 @Override
131 public final void sendHandshakeMessage(OFMessage message) { 173 public final void sendHandshakeMessage(OFMessage message) {
132 if (!this.isDriverHandshakeComplete()) { 174 if (!this.isDriverHandshakeComplete()) {
133 - channel.write(Collections.singletonList(message)); 175 + sendMsgsOnChannel(Collections.singletonList(message));
134 } 176 }
135 } 177 }
136 178
...@@ -239,11 +281,16 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour ...@@ -239,11 +281,16 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour
239 @Override 281 @Override
240 public final void transitionToMasterSwitch() { 282 public final void transitionToMasterSwitch() {
241 this.agent.transitionToMasterSwitch(dpid); 283 this.agent.transitionToMasterSwitch(dpid);
242 - if (messagesPendingMastership != null) { 284 + synchronized (messagesPendingMastership) {
243 - this.sendMsg(messagesPendingMastership); 285 + List<OFMessage> messages = messagesPendingMastership.get();
244 - log.debug("Sending {} pending messages to switch {}", 286 + if (messages != null) {
245 - messagesPendingMastership.size(), dpid); 287 + this.sendMsg(messages);
246 - messagesPendingMastership = null; 288 + log.debug("Sending {} pending messages to switch {}",
289 + messages.size(), dpid);
290 + messagesPendingMastership.set(null);
291 + }
292 + // perform role transition after clearing messages queue
293 + this.role = RoleState.MASTER;
247 } 294 }
248 } 295 }
249 296
...@@ -287,17 +334,27 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour ...@@ -287,17 +334,27 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour
287 @Override 334 @Override
288 public void setRole(RoleState role) { 335 public void setRole(RoleState role) {
289 try { 336 try {
337 + if (role == RoleState.SLAVE || role == RoleState.EQUAL) {
338 + // perform role transition to SLAVE/EQUAL before sending role request
339 + this.role = role;
340 + }
290 if (this.roleMan.sendRoleRequest(role, RoleRecvStatus.MATCHED_SET_ROLE)) { 341 if (this.roleMan.sendRoleRequest(role, RoleRecvStatus.MATCHED_SET_ROLE)) {
291 log.debug("Sending role {} to switch {}", role, getStringId()); 342 log.debug("Sending role {} to switch {}", role, getStringId());
292 - if (role == RoleState.SLAVE || role == RoleState.EQUAL) { 343 + if (role == RoleState.MASTER) {
293 - this.role = role; 344 + synchronized (messagesPendingMastership) {
294 - } else { 345 + if (messagesPendingMastership.get() == null) {
295 - if (messagesPendingMastership == null) { 346 + log.debug("Initializing new message queue for switch {}", dpid);
296 - log.debug("Initializing new queue for switch {}", dpid); 347 + /*
297 - messagesPendingMastership = new ArrayList<>(); 348 + The presence of messagesPendingMastership indicates that
349 + a switch is currently transitioning to MASTER, but
350 + is still awaiting role reply from switch.
351 + */
352 + messagesPendingMastership.set(Lists.newArrayList());
353 + }
298 } 354 }
299 } 355 }
300 - } else { 356 + } else if (role == RoleState.MASTER) {
357 + // role request not support; transition switch to MASTER
301 this.role = role; 358 this.role = role;
302 } 359 }
303 } catch (IOException e) { 360 } catch (IOException e) {
...@@ -307,6 +364,7 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour ...@@ -307,6 +364,7 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour
307 364
308 @Override 365 @Override
309 public void reassertRole() { 366 public void reassertRole() {
367 + // TODO should messages be sent directly or queue during reassertion?
310 if (this.getRole() == RoleState.MASTER) { 368 if (this.getRole() == RoleState.MASTER) {
311 log.warn("Received permission error from switch {} while " + 369 log.warn("Received permission error from switch {} while " +
312 "being master. Reasserting master role.", 370 "being master. Reasserting master role.",
...@@ -315,18 +373,15 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour ...@@ -315,18 +373,15 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour
315 } 373 }
316 } 374 }
317 375
318 -
319 -
320 @Override 376 @Override
321 public void handleRole(OFMessage m) throws SwitchStateException { 377 public void handleRole(OFMessage m) throws SwitchStateException {
322 RoleReplyInfo rri = roleMan.extractOFRoleReply((OFRoleReply) m); 378 RoleReplyInfo rri = roleMan.extractOFRoleReply((OFRoleReply) m);
323 RoleRecvStatus rrs = roleMan.deliverRoleReply(rri); 379 RoleRecvStatus rrs = roleMan.deliverRoleReply(rri);
324 if (rrs == RoleRecvStatus.MATCHED_SET_ROLE) { 380 if (rrs == RoleRecvStatus.MATCHED_SET_ROLE) {
325 if (rri.getRole() == RoleState.MASTER) { 381 if (rri.getRole() == RoleState.MASTER) {
326 - this.role = rri.getRole();
327 this.transitionToMasterSwitch(); 382 this.transitionToMasterSwitch();
328 } else if (rri.getRole() == RoleState.EQUAL || 383 } else if (rri.getRole() == RoleState.EQUAL ||
329 - rri.getRole() == RoleState.SLAVE) { 384 + rri.getRole() == RoleState.SLAVE) {
330 this.transitionToEqualSwitch(); 385 this.transitionToEqualSwitch();
331 } 386 }
332 } else { 387 } else {
...@@ -348,10 +403,9 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour ...@@ -348,10 +403,9 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour
348 new RoleReplyInfo(r, null, m.getXid())); 403 new RoleReplyInfo(r, null, m.getXid()));
349 if (rrs == RoleRecvStatus.MATCHED_SET_ROLE) { 404 if (rrs == RoleRecvStatus.MATCHED_SET_ROLE) {
350 if (r == RoleState.MASTER) { 405 if (r == RoleState.MASTER) {
351 - this.role = r;
352 this.transitionToMasterSwitch(); 406 this.transitionToMasterSwitch();
353 } else if (r == RoleState.EQUAL || 407 } else if (r == RoleState.EQUAL ||
354 - r == RoleState.SLAVE) { 408 + r == RoleState.SLAVE) {
355 this.transitionToEqualSwitch(); 409 this.transitionToEqualSwitch();
356 } 410 }
357 } else { 411 } else {
...@@ -369,8 +423,6 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour ...@@ -369,8 +423,6 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour
369 return true; 423 return true;
370 } 424 }
371 425
372 -
373 -
374 @Override 426 @Override
375 public final void setAgent(OpenFlowAgent ag) { 427 public final void setAgent(OpenFlowAgent ag) {
376 if (this.agent == null) { 428 if (this.agent == null) {
...@@ -398,9 +450,8 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour ...@@ -398,9 +450,8 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour
398 @Override 450 @Override
399 public List<OFPortDesc> getPorts() { 451 public List<OFPortDesc> getPorts() {
400 return this.ports.stream() 452 return this.ports.stream()
401 - .flatMap((portReply) -> (portReply.getEntries().stream())) 453 + .flatMap(portReply -> portReply.getEntries().stream())
402 .collect(Collectors.toList()); 454 .collect(Collectors.toList());
403 - //return Collections.unmodifiableList(ports.getEntries());
404 } 455 }
405 456
406 @Override 457 @Override
...@@ -408,13 +459,11 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour ...@@ -408,13 +459,11 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour
408 return this.desc.getMfrDesc(); 459 return this.desc.getMfrDesc();
409 } 460 }
410 461
411 -
412 @Override 462 @Override
413 public String datapathDescription() { 463 public String datapathDescription() {
414 return this.desc.getDpDesc(); 464 return this.desc.getDpDesc();
415 } 465 }
416 466
417 -
418 @Override 467 @Override
419 public String hardwareDescription() { 468 public String hardwareDescription() {
420 return this.desc.getHwDesc(); 469 return this.desc.getHwDesc();
...@@ -430,20 +479,15 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour ...@@ -430,20 +479,15 @@ public abstract class AbstractOpenFlowSwitch extends AbstractHandlerBehaviour
430 return this.desc.getSerialNum(); 479 return this.desc.getSerialNum();
431 } 480 }
432 481
433 -
434 @Override 482 @Override
435 public Device.Type deviceType() { 483 public Device.Type deviceType() {
436 return Device.Type.SWITCH; 484 return Device.Type.SWITCH;
437 } 485 }
438 486
439 -
440 @Override 487 @Override
441 public String toString() { 488 public String toString() {
442 return this.getClass().getName() + " [" + ((channel != null) 489 return this.getClass().getName() + " [" + ((channel != null)
443 ? channel.getRemoteAddress() : "?") 490 ? channel.getRemoteAddress() : "?")
444 + " DPID[" + ((getStringId() != null) ? getStringId() : "?") + "]]"; 491 + " DPID[" + ((getStringId() != null) ? getStringId() : "?") + "]]";
445 } 492 }
446 -
447 -
448 -
449 } 493 }
......