Madan Jampani
Committed by Pavlin Radoslavov

LeadershipService: Support for a leaderBoard.

Change-Id: I0dd8267e104466ec65a2c67d23d1c4d923cad266

Change-Id: I6bc548510400eacabb12482f8fba1b7f2abb0604
...@@ -11,12 +11,12 @@ public class Leadership { ...@@ -11,12 +11,12 @@ public class Leadership {
11 11
12 private final String topic; 12 private final String topic;
13 private final ControllerNode leader; 13 private final ControllerNode leader;
14 - private final long term; 14 + private final long epoch;
15 15
16 - public Leadership(String topic, ControllerNode leader, long term) { 16 + public Leadership(String topic, ControllerNode leader, long epoch) {
17 this.topic = topic; 17 this.topic = topic;
18 this.leader = leader; 18 this.leader = leader;
19 - this.term = term; 19 + this.epoch = epoch;
20 } 20 }
21 21
22 /** 22 /**
...@@ -36,16 +36,16 @@ public class Leadership { ...@@ -36,16 +36,16 @@ public class Leadership {
36 } 36 }
37 37
38 /** 38 /**
39 - * The term number associated with this leadership. 39 + * The epoch when the leadership was assumed.
40 - * @return leadership term 40 + * @return leadership epoch
41 */ 41 */
42 - public long term() { 42 + public long epoch() {
43 - return term; 43 + return epoch;
44 } 44 }
45 45
46 @Override 46 @Override
47 public int hashCode() { 47 public int hashCode() {
48 - return Objects.hash(topic, leader, term); 48 + return Objects.hash(topic, leader, epoch);
49 } 49 }
50 50
51 @Override 51 @Override
...@@ -53,7 +53,7 @@ public class Leadership { ...@@ -53,7 +53,7 @@ public class Leadership {
53 return MoreObjects.toStringHelper(this.getClass()) 53 return MoreObjects.toStringHelper(this.getClass())
54 .add("topic", topic) 54 .add("topic", topic)
55 .add("leader", leader) 55 .add("leader", leader)
56 - .add("term", term) 56 + .add("epoch", epoch)
57 .toString(); 57 .toString();
58 } 58 }
59 } 59 }
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -17,14 +17,21 @@ package org.onlab.onos.cluster; ...@@ -17,14 +17,21 @@ package org.onlab.onos.cluster;
17 17
18 /** 18 /**
19 * Service for leader election. 19 * Service for leader election.
20 - * Leadership contents are organized around topics. ONOS instance can join the 20 + * Leadership contests are organized around topics. A instance can join the
21 - * leadership race for a topic or withdraw from a race it has previously joined 21 + * leadership race for a topic or withdraw from a race it has previously joined.
22 - * Once in the race, the instance can get asynchronously notified 22 + * Listeners can be added to receive notifications asynchronously for various
23 - * of leadership election results. 23 + * leadership contests.
24 */ 24 */
25 public interface LeadershipService { 25 public interface LeadershipService {
26 26
27 /** 27 /**
28 + * Gets the most recent leader for the topic.
29 + * @param path topic
30 + * @return node who is the leader, null if so such topic exists.
31 + */
32 + ControllerNode getLeader(String path);
33 +
34 + /**
28 * Joins the leadership contest. 35 * Joins the leadership contest.
29 * @param path topic for which this controller node wishes to be a leader. 36 * @param path topic for which this controller node wishes to be a leader.
30 */ 37 */
......
...@@ -76,6 +76,15 @@ public interface Lock { ...@@ -76,6 +76,15 @@ public interface Lock {
76 boolean isLocked(); 76 boolean isLocked();
77 77
78 /** 78 /**
79 + * Returns the epoch for this lock.
80 + * If this lock is currently locked i.e. isLocked() returns true, epoch signifies the logical time
81 + * when the lock was acquired. The concept of epoch lets one come up with a global ordering for all
82 + * lock acquisition events
83 + * @return epoch
84 + */
85 + long epoch();
86 +
87 + /**
79 * Releases the lock. 88 * Releases the lock.
80 */ 89 */
81 void unlock(); 90 void unlock();
......
...@@ -5,6 +5,7 @@ import static com.google.common.base.Verify.verifyNotNull; ...@@ -5,6 +5,7 @@ import static com.google.common.base.Verify.verifyNotNull;
5 import static org.onlab.util.Tools.namedThreads; 5 import static org.onlab.util.Tools.namedThreads;
6 import static org.slf4j.LoggerFactory.getLogger; 6 import static org.slf4j.LoggerFactory.getLogger;
7 7
8 +import java.io.IOException;
8 import java.util.Map; 9 import java.util.Map;
9 import java.util.Set; 10 import java.util.Set;
10 import java.util.concurrent.Executors; 11 import java.util.concurrent.Executors;
...@@ -23,9 +24,16 @@ import org.onlab.onos.cluster.Leadership; ...@@ -23,9 +24,16 @@ import org.onlab.onos.cluster.Leadership;
23 import org.onlab.onos.cluster.LeadershipEvent; 24 import org.onlab.onos.cluster.LeadershipEvent;
24 import org.onlab.onos.cluster.LeadershipEventListener; 25 import org.onlab.onos.cluster.LeadershipEventListener;
25 import org.onlab.onos.cluster.LeadershipService; 26 import org.onlab.onos.cluster.LeadershipService;
27 +import org.onlab.onos.store.cluster.messaging.ClusterCommunicationService;
28 +import org.onlab.onos.store.cluster.messaging.ClusterMessage;
29 +import org.onlab.onos.store.cluster.messaging.ClusterMessageHandler;
30 +import org.onlab.onos.store.cluster.messaging.MessageSubject;
31 +import org.onlab.onos.store.serializers.KryoNamespaces;
32 +import org.onlab.onos.store.serializers.KryoSerializer;
26 import org.onlab.onos.store.service.Lock; 33 import org.onlab.onos.store.service.Lock;
27 import org.onlab.onos.store.service.LockService; 34 import org.onlab.onos.store.service.LockService;
28 import org.onlab.onos.store.service.impl.DistributedLockManager; 35 import org.onlab.onos.store.service.impl.DistributedLockManager;
36 +import org.onlab.util.KryoNamespace;
29 import org.slf4j.Logger; 37 import org.slf4j.Logger;
30 38
31 import com.google.common.collect.Maps; 39 import com.google.common.collect.Maps;
...@@ -45,32 +53,83 @@ public class LeadershipManager implements LeadershipService { ...@@ -45,32 +53,83 @@ public class LeadershipManager implements LeadershipService {
45 private static final int TERM_DURATION_MS = 53 private static final int TERM_DURATION_MS =
46 DistributedLockManager.DEAD_LOCK_TIMEOUT_MS; 54 DistributedLockManager.DEAD_LOCK_TIMEOUT_MS;
47 55
56 + // Time to wait before retrying leadership after
57 + // a unexpected error.
58 + private static final int WAIT_BEFORE_RETRY_MS = 2000;
59 +
48 // TODO: Appropriate Thread pool sizing. 60 // TODO: Appropriate Thread pool sizing.
49 private static final ScheduledExecutorService THREAD_POOL = 61 private static final ScheduledExecutorService THREAD_POOL =
50 Executors.newScheduledThreadPool(25, namedThreads("leadership-manager-%d")); 62 Executors.newScheduledThreadPool(25, namedThreads("leadership-manager-%d"));
51 63
64 + private static final MessageSubject LEADERSHIP_UPDATES =
65 + new MessageSubject("leadership-contest-updates");
66 +
52 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) 67 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
53 private ClusterService clusterService; 68 private ClusterService clusterService;
54 69
55 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY) 70 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
71 + private ClusterCommunicationService clusterCommunicator;
72 +
73 + @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
56 private LockService lockService; 74 private LockService lockService;
57 75
58 - private Map<String, Lock> openContests = Maps.newHashMap(); 76 + private final Map<String, Leadership> leaderBoard = Maps.newHashMap();
59 - private Set<LeadershipEventListener> listeners = Sets.newIdentityHashSet(); 77 +
78 + private final Map<String, Lock> openContests = Maps.newHashMap();
79 + private final Set<LeadershipEventListener> listeners = Sets.newIdentityHashSet();
60 private ControllerNode localNode; 80 private ControllerNode localNode;
61 81
82 + private final LeadershipEventListener peerAdvertiser = new PeerAdvertiser();
83 + private final LeadershipEventListener leaderBoardUpdater = new LeaderBoardUpdater();
84 +
85 + public static final KryoSerializer SERIALIZER = new KryoSerializer() {
86 + @Override
87 + protected void setupKryoPool() {
88 + serializerPool = KryoNamespace.newBuilder()
89 + .register(KryoNamespaces.API)
90 + .build()
91 + .populate(1);
92 + }
93 + };
94 +
62 @Activate 95 @Activate
63 public void activate() { 96 public void activate() {
64 localNode = clusterService.getLocalNode(); 97 localNode = clusterService.getLocalNode();
98 +
99 + addListener(peerAdvertiser);
100 + addListener(leaderBoardUpdater);
101 +
102 + clusterCommunicator.addSubscriber(
103 + LEADERSHIP_UPDATES,
104 + new PeerAdvertisementHandler());
105 +
65 log.info("Started."); 106 log.info("Started.");
66 } 107 }
67 108
68 @Deactivate 109 @Deactivate
69 public void deactivate() { 110 public void deactivate() {
111 + removeListener(peerAdvertiser);
112 + removeListener(leaderBoardUpdater);
113 +
114 + clusterCommunicator.removeSubscriber(LEADERSHIP_UPDATES);
115 +
70 THREAD_POOL.shutdown(); 116 THREAD_POOL.shutdown();
117 +
71 log.info("Stopped."); 118 log.info("Stopped.");
72 } 119 }
73 120
121 +
122 + @Override
123 + public ControllerNode getLeader(String path) {
124 + synchronized (leaderBoard) {
125 + Leadership leadership = leaderBoard.get(path);
126 + if (leadership != null) {
127 + return leadership.leader();
128 + }
129 + }
130 + return null;
131 + }
132 +
74 @Override 133 @Override
75 public void runForLeadership(String path) { 134 public void runForLeadership(String path) {
76 checkArgument(path != null); 135 checkArgument(path != null);
...@@ -94,8 +153,7 @@ public class LeadershipManager implements LeadershipService { ...@@ -94,8 +153,7 @@ public class LeadershipManager implements LeadershipService {
94 notifyListeners( 153 notifyListeners(
95 new LeadershipEvent( 154 new LeadershipEvent(
96 LeadershipEvent.Type.LEADER_BOOTED, 155 LeadershipEvent.Type.LEADER_BOOTED,
97 - new Leadership(lock.path(), localNode, 0))); 156 + new Leadership(lock.path(), localNode, lock.epoch())));
98 - // FIXME: Should set the correct term information.
99 } 157 }
100 } 158 }
101 159
...@@ -123,26 +181,31 @@ public class LeadershipManager implements LeadershipService { ...@@ -123,26 +181,31 @@ public class LeadershipManager implements LeadershipService {
123 lock.lockAsync(TERM_DURATION_MS).whenComplete((response, error) -> { 181 lock.lockAsync(TERM_DURATION_MS).whenComplete((response, error) -> {
124 if (error == null) { 182 if (error == null) {
125 THREAD_POOL.schedule( 183 THREAD_POOL.schedule(
126 - new RelectionTask(lock), 184 + new ReelectionTask(lock),
127 TERM_DURATION_MS / 2, 185 TERM_DURATION_MS / 2,
128 TimeUnit.MILLISECONDS); 186 TimeUnit.MILLISECONDS);
129 notifyListeners( 187 notifyListeners(
130 new LeadershipEvent( 188 new LeadershipEvent(
131 LeadershipEvent.Type.LEADER_ELECTED, 189 LeadershipEvent.Type.LEADER_ELECTED,
132 - new Leadership(lock.path(), localNode, 0))); 190 + new Leadership(lock.path(), localNode, lock.epoch())));
191 + return;
133 } else { 192 } else {
134 - log.error("Failed to acquire lock for {}", path, error); 193 + log.warn("Failed to acquire lock for {}. Will retry in {} sec", path, WAIT_BEFORE_RETRY_MS, error);
135 - // retry 194 + try {
195 + Thread.sleep(WAIT_BEFORE_RETRY_MS);
136 tryAcquireLeadership(path); 196 tryAcquireLeadership(path);
197 + } catch (InterruptedException e) {
198 + Thread.currentThread().interrupt();
199 + }
137 } 200 }
138 }); 201 });
139 } 202 }
140 203
141 - private class RelectionTask implements Runnable { 204 + private class ReelectionTask implements Runnable {
142 205
143 private final Lock lock; 206 private final Lock lock;
144 207
145 - public RelectionTask(Lock lock) { 208 + public ReelectionTask(Lock lock) {
146 this.lock = lock; 209 this.lock = lock;
147 } 210 }
148 211
...@@ -152,17 +215,69 @@ public class LeadershipManager implements LeadershipService { ...@@ -152,17 +215,69 @@ public class LeadershipManager implements LeadershipService {
152 notifyListeners( 215 notifyListeners(
153 new LeadershipEvent( 216 new LeadershipEvent(
154 LeadershipEvent.Type.LEADER_REELECTED, 217 LeadershipEvent.Type.LEADER_REELECTED,
155 - new Leadership(lock.path(), localNode, 0))); 218 + new Leadership(lock.path(), localNode, lock.epoch())));
156 THREAD_POOL.schedule(this, TERM_DURATION_MS / 2, TimeUnit.MILLISECONDS); 219 THREAD_POOL.schedule(this, TERM_DURATION_MS / 2, TimeUnit.MILLISECONDS);
157 } else { 220 } else {
158 if (openContests.containsKey(lock.path())) { 221 if (openContests.containsKey(lock.path())) {
159 notifyListeners( 222 notifyListeners(
160 new LeadershipEvent( 223 new LeadershipEvent(
161 LeadershipEvent.Type.LEADER_BOOTED, 224 LeadershipEvent.Type.LEADER_BOOTED,
162 - new Leadership(lock.path(), localNode, 0))); 225 + new Leadership(lock.path(), localNode, lock.epoch())));
163 tryAcquireLeadership(lock.path()); 226 tryAcquireLeadership(lock.path());
164 } 227 }
165 } 228 }
166 } 229 }
167 } 230 }
231 +
232 + private class PeerAdvertiser implements LeadershipEventListener {
233 + @Override
234 + public void event(LeadershipEvent event) {
235 + // publish events originating on this host.
236 + if (event.subject().leader().equals(localNode)) {
237 + try {
238 + clusterCommunicator.broadcast(
239 + new ClusterMessage(
240 + localNode.id(),
241 + LEADERSHIP_UPDATES,
242 + SERIALIZER.encode(event)));
243 + } catch (IOException e) {
244 + log.error("Failed to broadcast leadership update message", e);
245 + }
246 + }
247 + }
248 + }
249 +
250 + private class PeerAdvertisementHandler implements ClusterMessageHandler {
251 + @Override
252 + public void handle(ClusterMessage message) {
253 + LeadershipEvent event = SERIALIZER.decode(message.payload());
254 + log.debug("Received {} from {}", event, message.sender());
255 + notifyListeners(event);
256 + }
257 + }
258 +
259 + private class LeaderBoardUpdater implements LeadershipEventListener {
260 + @Override
261 + public void event(LeadershipEvent event) {
262 + Leadership leadershipUpdate = event.subject();
263 + synchronized (leaderBoard) {
264 + Leadership currentLeadership = leaderBoard.get(leadershipUpdate.topic());
265 + switch (event.type()) {
266 + case LEADER_ELECTED:
267 + case LEADER_REELECTED:
268 + if (currentLeadership == null || currentLeadership.epoch() < leadershipUpdate.epoch()) {
269 + leaderBoard.put(leadershipUpdate.topic(), leadershipUpdate);
270 + }
271 + break;
272 + case LEADER_BOOTED:
273 + if (currentLeadership != null && currentLeadership.epoch() <= leadershipUpdate.epoch()) {
274 + leaderBoard.remove(leadershipUpdate.topic());
275 + }
276 + break;
277 + default:
278 + break;
279 + }
280 + }
281 + }
282 + }
168 } 283 }
...\ No newline at end of file ...\ No newline at end of file
......
1 package org.onlab.onos.store.service.impl; 1 package org.onlab.onos.store.service.impl;
2 2
3 +import static com.google.common.base.Verify.verify;
3 import static org.slf4j.LoggerFactory.getLogger; 4 import static org.slf4j.LoggerFactory.getLogger;
4 5
5 import java.nio.charset.StandardCharsets; 6 import java.nio.charset.StandardCharsets;
7 +import java.util.Arrays;
6 import java.util.UUID; 8 import java.util.UUID;
7 import java.util.concurrent.CompletableFuture; 9 import java.util.concurrent.CompletableFuture;
8 import java.util.concurrent.ExecutionException; 10 import java.util.concurrent.ExecutionException;
...@@ -15,6 +17,7 @@ import org.onlab.onos.cluster.ClusterService; ...@@ -15,6 +17,7 @@ import org.onlab.onos.cluster.ClusterService;
15 import org.onlab.onos.store.service.DatabaseException; 17 import org.onlab.onos.store.service.DatabaseException;
16 import org.onlab.onos.store.service.DatabaseService; 18 import org.onlab.onos.store.service.DatabaseService;
17 import org.onlab.onos.store.service.Lock; 19 import org.onlab.onos.store.service.Lock;
20 +import org.onlab.onos.store.service.VersionedValue;
18 import org.slf4j.Logger; 21 import org.slf4j.Logger;
19 22
20 /** 23 /**
...@@ -29,6 +32,7 @@ public class DistributedLock implements Lock { ...@@ -29,6 +32,7 @@ public class DistributedLock implements Lock {
29 private final String path; 32 private final String path;
30 private DateTime lockExpirationTime; 33 private DateTime lockExpirationTime;
31 private AtomicBoolean isLocked = new AtomicBoolean(false); 34 private AtomicBoolean isLocked = new AtomicBoolean(false);
35 + private volatile long epoch = 0;
32 private byte[] lockId; 36 private byte[] lockId;
33 37
34 public DistributedLock( 38 public DistributedLock(
...@@ -74,6 +78,10 @@ public class DistributedLock implements Lock { ...@@ -74,6 +78,10 @@ public class DistributedLock implements Lock {
74 DistributedLockManager.ONOS_LOCK_TABLE_NAME, 78 DistributedLockManager.ONOS_LOCK_TABLE_NAME,
75 path, 79 path,
76 lockId)) { 80 lockId)) {
81 + VersionedValue vv =
82 + databaseService.get(DistributedLockManager.ONOS_LOCK_TABLE_NAME, path);
83 + verify(Arrays.equals(vv.value(), lockId));
84 + epoch = vv.version();
77 isLocked.set(true); 85 isLocked.set(true);
78 lockExpirationTime = DateTime.now().plusMillis(leaseDurationMillis); 86 lockExpirationTime = DateTime.now().plusMillis(leaseDurationMillis);
79 return true; 87 return true;
...@@ -121,6 +129,11 @@ public class DistributedLock implements Lock { ...@@ -121,6 +129,11 @@ public class DistributedLock implements Lock {
121 } 129 }
122 130
123 @Override 131 @Override
132 + public long epoch() {
133 + return epoch;
134 + }
135 +
136 + @Override
124 public void unlock() { 137 public void unlock() {
125 if (!isLocked()) { 138 if (!isLocked()) {
126 return; 139 return;
......
...@@ -25,6 +25,8 @@ import java.util.LinkedList; ...@@ -25,6 +25,8 @@ import java.util.LinkedList;
25 25
26 import org.onlab.onos.cluster.ControllerNode; 26 import org.onlab.onos.cluster.ControllerNode;
27 import org.onlab.onos.cluster.DefaultControllerNode; 27 import org.onlab.onos.cluster.DefaultControllerNode;
28 +import org.onlab.onos.cluster.Leadership;
29 +import org.onlab.onos.cluster.LeadershipEvent;
28 import org.onlab.onos.cluster.NodeId; 30 import org.onlab.onos.cluster.NodeId;
29 import org.onlab.onos.cluster.RoleInfo; 31 import org.onlab.onos.cluster.RoleInfo;
30 import org.onlab.onos.core.DefaultApplicationId; 32 import org.onlab.onos.core.DefaultApplicationId;
...@@ -166,6 +168,9 @@ public final class KryoNamespaces { ...@@ -166,6 +168,9 @@ public final class KryoNamespaces {
166 Link.Type.class, 168 Link.Type.class,
167 Link.State.class, 169 Link.State.class,
168 Timestamp.class, 170 Timestamp.class,
171 + Leadership.class,
172 + LeadershipEvent.class,
173 + LeadershipEvent.Type.class,
169 HostId.class, 174 HostId.class,
170 HostDescription.class, 175 HostDescription.class,
171 DefaultHostDescription.class, 176 DefaultHostDescription.class,
......