Thomas Vachuska

Added web-socket fail-over and fixed a defect in mastership balancing.

Change-Id: I14eeb17fdc7970140287b51915c5accc24cf855b
...@@ -17,8 +17,6 @@ package org.onlab.onos.cluster.impl; ...@@ -17,8 +17,6 @@ package org.onlab.onos.cluster.impl;
17 17
18 import com.codahale.metrics.Timer; 18 import com.codahale.metrics.Timer;
19 import com.codahale.metrics.Timer.Context; 19 import com.codahale.metrics.Timer.Context;
20 -import com.google.common.collect.HashMultimap;
21 -import com.google.common.collect.Multimap;
22 import org.apache.felix.scr.annotations.Activate; 20 import org.apache.felix.scr.annotations.Activate;
23 import org.apache.felix.scr.annotations.Component; 21 import org.apache.felix.scr.annotations.Component;
24 import org.apache.felix.scr.annotations.Deactivate; 22 import org.apache.felix.scr.annotations.Deactivate;
...@@ -48,8 +46,11 @@ import org.onlab.onos.net.MastershipRole; ...@@ -48,8 +46,11 @@ import org.onlab.onos.net.MastershipRole;
48 import org.slf4j.Logger; 46 import org.slf4j.Logger;
49 47
50 import java.util.Collection; 48 import java.util.Collection;
49 +import java.util.HashMap;
50 +import java.util.HashSet;
51 import java.util.Iterator; 51 import java.util.Iterator;
52 import java.util.List; 52 import java.util.List;
53 +import java.util.Map;
53 import java.util.Set; 54 import java.util.Set;
54 import java.util.concurrent.atomic.AtomicInteger; 55 import java.util.concurrent.atomic.AtomicInteger;
55 56
...@@ -57,6 +58,7 @@ import static com.google.common.base.Preconditions.checkNotNull; ...@@ -57,6 +58,7 @@ import static com.google.common.base.Preconditions.checkNotNull;
57 import static com.google.common.collect.Lists.newArrayList; 58 import static com.google.common.collect.Lists.newArrayList;
58 import static org.onlab.metrics.MetricsUtil.startTimer; 59 import static org.onlab.metrics.MetricsUtil.startTimer;
59 import static org.onlab.metrics.MetricsUtil.stopTimer; 60 import static org.onlab.metrics.MetricsUtil.stopTimer;
61 +import static org.onlab.onos.cluster.ControllerNode.State.ACTIVE;
60 import static org.onlab.onos.net.MastershipRole.MASTER; 62 import static org.onlab.onos.net.MastershipRole.MASTER;
61 import static org.slf4j.LoggerFactory.getLogger; 63 import static org.slf4j.LoggerFactory.getLogger;
62 64
...@@ -208,31 +210,34 @@ public class MastershipManager ...@@ -208,31 +210,34 @@ public class MastershipManager
208 @Override 210 @Override
209 public void balanceRoles() { 211 public void balanceRoles() {
210 List<ControllerNode> nodes = newArrayList(clusterService.getNodes()); 212 List<ControllerNode> nodes = newArrayList(clusterService.getNodes());
211 - Multimap<ControllerNode, DeviceId> controllerDevices = HashMultimap.create(); 213 + Map<ControllerNode, Set<DeviceId>> controllerDevices = new HashMap<>();
212 int deviceCount = 0; 214 int deviceCount = 0;
213 215
214 // Create buckets reflecting current ownership. 216 // Create buckets reflecting current ownership.
215 for (ControllerNode node : nodes) { 217 for (ControllerNode node : nodes) {
216 - Set<DeviceId> devicesOf = getDevicesOf(node.id()); 218 + if (clusterService.getState(node.id()) == ACTIVE) {
219 + Set<DeviceId> devicesOf = new HashSet<>(getDevicesOf(node.id()));
217 deviceCount += devicesOf.size(); 220 deviceCount += devicesOf.size();
218 - controllerDevices.putAll(node, devicesOf); 221 + controllerDevices.put(node, devicesOf);
219 log.info("Node {} has {} devices.", node.id(), devicesOf.size()); 222 log.info("Node {} has {} devices.", node.id(), devicesOf.size());
220 } 223 }
224 + }
221 225
222 - int rounds = nodes.size(); 226 + // Now re-balance the buckets until they are roughly even.
227 + int rounds = controllerDevices.keySet().size();
223 for (int i = 0; i < rounds; i++) { 228 for (int i = 0; i < rounds; i++) {
224 // Iterate over the buckets and find the smallest and the largest. 229 // Iterate over the buckets and find the smallest and the largest.
225 - ControllerNode smallest = findBucket(true, nodes, controllerDevices); 230 + ControllerNode smallest = findBucket(true, controllerDevices);
226 - ControllerNode largest = findBucket(false, nodes, controllerDevices); 231 + ControllerNode largest = findBucket(false, controllerDevices);
227 balanceBuckets(smallest, largest, controllerDevices, deviceCount); 232 balanceBuckets(smallest, largest, controllerDevices, deviceCount);
228 } 233 }
229 } 234 }
230 235
231 - private ControllerNode findBucket(boolean min, Collection<ControllerNode> nodes, 236 + private ControllerNode findBucket(boolean min,
232 - Multimap<ControllerNode, DeviceId> controllerDevices) { 237 + Map<ControllerNode, Set<DeviceId>> controllerDevices) {
233 int xSize = min ? Integer.MAX_VALUE : -1; 238 int xSize = min ? Integer.MAX_VALUE : -1;
234 ControllerNode xNode = null; 239 ControllerNode xNode = null;
235 - for (ControllerNode node : nodes) { 240 + for (ControllerNode node : controllerDevices.keySet()) {
236 int size = controllerDevices.get(node).size(); 241 int size = controllerDevices.get(node).size();
237 if ((min && size < xSize) || (!min && size > xSize)) { 242 if ((min && size < xSize) || (!min && size > xSize)) {
238 xSize = size; 243 xSize = size;
...@@ -243,7 +248,7 @@ public class MastershipManager ...@@ -243,7 +248,7 @@ public class MastershipManager
243 } 248 }
244 249
245 private void balanceBuckets(ControllerNode smallest, ControllerNode largest, 250 private void balanceBuckets(ControllerNode smallest, ControllerNode largest,
246 - Multimap<ControllerNode, DeviceId> controllerDevices, 251 + Map<ControllerNode, Set<DeviceId>> controllerDevices,
247 int deviceCount) { 252 int deviceCount) {
248 Collection<DeviceId> minBucket = controllerDevices.get(smallest); 253 Collection<DeviceId> minBucket = controllerDevices.get(smallest);
249 Collection<DeviceId> maxBucket = controllerDevices.get(largest); 254 Collection<DeviceId> maxBucket = controllerDevices.get(largest);
...@@ -262,7 +267,7 @@ public class MastershipManager ...@@ -262,7 +267,7 @@ public class MastershipManager
262 DeviceId deviceId = it.next(); 267 DeviceId deviceId = it.next();
263 log.info("Setting {} as the master for {}", smallest.id(), deviceId); 268 log.info("Setting {} as the master for {}", smallest.id(), deviceId);
264 setRole(smallest.id(), deviceId, MASTER); 269 setRole(smallest.id(), deviceId, MASTER);
265 - controllerDevices.put(smallest, deviceId); 270 + controllerDevices.get(smallest).add(deviceId);
266 it.remove(); 271 it.remove();
267 i++; 272 i++;
268 } 273 }
......
...@@ -197,6 +197,7 @@ ...@@ -197,6 +197,7 @@
197 summaryPane, 197 summaryPane,
198 detailPane, 198 detailPane,
199 antTimer = null, 199 antTimer = null,
200 + guiSuccessor = null,
200 onosInstances = {}, 201 onosInstances = {},
201 onosOrder = [], 202 onosOrder = [],
202 oiBox, 203 oiBox,
...@@ -620,7 +621,7 @@ ...@@ -620,7 +621,7 @@
620 var inst = data.payload, 621 var inst = data.payload,
621 id = inst.id; 622 id = inst.id;
622 if (onosInstances[id]) { 623 if (onosInstances[id]) {
623 - logicError('ONOS instance already added: ' + id); 624 + updateInstance(data);
624 return; 625 return;
625 } 626 }
626 onosInstances[id] = inst; 627 onosInstances[id] = inst;
...@@ -635,7 +636,7 @@ ...@@ -635,7 +636,7 @@
635 d; 636 d;
636 637
637 if (network.lookup[id]) { 638 if (network.lookup[id]) {
638 - logicError('Device already added: ' + id); 639 + updateDevice(data);
639 return; 640 return;
640 } 641 }
641 642
...@@ -2177,16 +2178,38 @@ ...@@ -2177,16 +2178,38 @@
2177 // ============================== 2178 // ==============================
2178 // Web-Socket for live data 2179 // Web-Socket for live data
2179 2180
2181 + function findGuiSuccessor() {
2182 + var idx = -1;
2183 + onosOrder.forEach(function (d, i) {
2184 + if (d.uiAttached) {
2185 + idx = i;
2186 + }
2187 + });
2188 +
2189 + for (var i = 0; i < onosOrder.length - 1; i++) {
2190 + var ni = (idx + 1 + i) % onosOrder.length;
2191 + if (onosOrder[ni].online) {
2192 + return onosOrder[ni].ip;
2193 + }
2194 + }
2195 + return null;
2196 + }
2197 +
2180 function webSockUrl() { 2198 function webSockUrl() {
2181 - return document.location.toString() 2199 + var url = document.location.toString()
2182 .replace(/\#.*/, '') 2200 .replace(/\#.*/, '')
2183 .replace('http://', 'ws://') 2201 .replace('http://', 'ws://')
2184 .replace('https://', 'wss://') 2202 .replace('https://', 'wss://')
2185 .replace('index.html', config.webSockUrl); 2203 .replace('index.html', config.webSockUrl);
2204 + if (guiSuccessor) {
2205 + url = url.replace(location.hostname, guiSuccessor);
2206 + }
2207 + return url;
2186 } 2208 }
2187 2209
2188 webSock = { 2210 webSock = {
2189 ws : null, 2211 ws : null,
2212 + retries: 0,
2190 2213
2191 connect : function() { 2214 connect : function() {
2192 webSock.ws = new WebSocket(webSockUrl()); 2215 webSock.ws = new WebSocket(webSockUrl());
...@@ -2195,6 +2218,7 @@ ...@@ -2195,6 +2218,7 @@
2195 noWebSock(false); 2218 noWebSock(false);
2196 requestSummary(); 2219 requestSummary();
2197 showInstances(); 2220 showInstances();
2221 + webSock.retries = 0;
2198 }; 2222 };
2199 2223
2200 webSock.ws.onmessage = function(m) { 2224 webSock.ws.onmessage = function(m) {
...@@ -2206,7 +2230,13 @@ ...@@ -2206,7 +2230,13 @@
2206 2230
2207 webSock.ws.onclose = function(m) { 2231 webSock.ws.onclose = function(m) {
2208 webSock.ws = null; 2232 webSock.ws = null;
2233 + guiSuccessor = findGuiSuccessor();
2234 + if (guiSuccessor && webSock.retries < onosOrder.length) {
2235 + webSock.retries++;
2236 + webSock.connect();
2237 + } else {
2209 noWebSock(true); 2238 noWebSock(true);
2239 + }
2210 }; 2240 };
2211 }, 2241 },
2212 2242
......