HIGUCHI Yuta
Committed by Gerrit Code Review

Remove all the mastership roles when a Node leaves the cluster

- Fix for ONOS-1189

Change-Id: I695ccd6bf2ff12da3702d1a982e377b7082c9341
......@@ -113,4 +113,12 @@ public interface MastershipStore extends Store<MastershipEvent, MastershipStoreD
*/
MastershipEvent relinquishRole(NodeId nodeId, DeviceId deviceId);
/**
* Removes all the roles for the specified controller instance.
* If the role was MASTER, another controller instance will be selected
* as a candidate master.
*
* @param nodeId the controller instance identifier
*/
void relinquishAllRole(NodeId nodeId);
}
......
......@@ -304,38 +304,18 @@ public class MastershipManager
case INSTANCE_REMOVED:
case INSTANCE_DEACTIVATED:
ControllerNode node = event.subject();
log.info("instance {} removed/deactivated", node);
store.relinquishAllRole(node.id());
if (node.equals(clusterService.getLocalNode())) {
//If we are in smaller cluster, relinquish and return
for (DeviceId device : getDevicesOf(node.id())) {
if (!isInMajority()) {
//own DeviceManager should catch event and tell switch
store.relinquishRole(node.id(), device);
}
}
log.info("broke off from cluster, relinquished devices");
break;
}
// if we are the larger one and the removed node(s) are brain dead,
// force relinquish on behalf of disabled node.
// check network channel to do this?
for (DeviceId device : getDevicesOf(node.id())) {
//some things to check:
// 1. we didn't break off as well while we're at it
// 2. others don't pile in and try too - maybe a lock
if (isInMajority()) {
store.relinquishRole(node.id(), device);
}
}
clusterSize.decrementAndGet();
log.info("instance {} removed/deactivated", event.subject());
break;
default:
log.warn("unknown cluster event {}", event);
}
}
// Can be removed if we go with naive split-brain handling: only majority
// assigns mastership
private boolean isInMajority() {
if (clusterService.getNodes().size() > (clusterSize.intValue() / 2)) {
return true;
......
......@@ -19,8 +19,11 @@ import static org.onosproject.mastership.MastershipEvent.Type.MASTER_CHANGED;
import static org.onosproject.mastership.MastershipEvent.Type.BACKUPS_CHANGED;
import static org.apache.commons.lang3.concurrent.ConcurrentUtils.putIfAbsent;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.felix.scr.annotations.Activate;
......@@ -360,6 +363,26 @@ public class DistributedMastershipStore
}
}
@Override
public void relinquishAllRole(NodeId nodeId) {
List<MastershipEvent> events = new ArrayList<>();
for (Entry<DeviceId, RoleValue> entry : roleMap.entrySet()) {
final DeviceId deviceId = entry.getKey();
final RoleValue roleValue = entry.getValue();
if (roleValue.contains(MASTER, nodeId) ||
roleValue.contains(STANDBY, nodeId)) {
MastershipEvent event = relinquishRole(nodeId, deviceId);
if (event != null) {
events.add(event);
}
}
}
notifyDelegate(events);
}
// TODO: Consider moving this to RoleValue method
//helper to fetch a new master candidate for a given device.
private NodeId reelect(
......
......@@ -352,4 +352,27 @@ public class SimpleMastershipStore
}
return null;
}
@Override
public synchronized void relinquishAllRole(NodeId nodeId) {
List<MastershipEvent> events = new ArrayList<>();
Set<DeviceId> toRelinquish = new HashSet<>();
masterMap.entrySet().stream()
.filter(entry -> nodeId.equals(entry.getValue()))
.forEach(entry -> toRelinquish.add(entry.getKey()));
backups.entrySet().stream()
.filter(entry -> entry.getValue().contains(nodeId))
.forEach(entry -> toRelinquish.add(entry.getKey()));
toRelinquish.forEach(deviceId -> {
MastershipEvent event = relinquishRole(nodeId, deviceId);
if (event != null) {
events.add(event);
}
});
notifyDelegate(events);
}
}
......