Committed by
Yuta Higuchi
Support for running copycat on a subset of ONOS cluster nodes.
This change ensures DatabaseService on each node does the right thing, irrespective of whether the node is part of Raft cluster or not. Change-Id: I1e8976d56b3a2892d5c7ecbb46c247770a633860
Showing
3 changed files
with
103 additions
and
59 deletions
... | @@ -13,6 +13,7 @@ import java.util.Vector; | ... | @@ -13,6 +13,7 @@ import java.util.Vector; |
13 | 13 | ||
14 | import net.kuujo.copycat.cluster.TcpClusterConfig; | 14 | import net.kuujo.copycat.cluster.TcpClusterConfig; |
15 | import net.kuujo.copycat.cluster.TcpMember; | 15 | import net.kuujo.copycat.cluster.TcpMember; |
16 | +import net.kuujo.copycat.event.LeaderElectEvent; | ||
16 | import net.kuujo.copycat.internal.log.ConfigurationEntry; | 17 | import net.kuujo.copycat.internal.log.ConfigurationEntry; |
17 | import net.kuujo.copycat.internal.log.CopycatEntry; | 18 | import net.kuujo.copycat.internal.log.CopycatEntry; |
18 | import net.kuujo.copycat.internal.log.OperationEntry; | 19 | import net.kuujo.copycat.internal.log.OperationEntry; |
... | @@ -103,6 +104,7 @@ public class ClusterMessagingProtocol | ... | @@ -103,6 +104,7 @@ public class ClusterMessagingProtocol |
103 | .register(OperationEntry.class) | 104 | .register(OperationEntry.class) |
104 | .register(TcpClusterConfig.class) | 105 | .register(TcpClusterConfig.class) |
105 | .register(TcpMember.class) | 106 | .register(TcpMember.class) |
107 | + .register(LeaderElectEvent.class) | ||
106 | .build(); | 108 | .build(); |
107 | 109 | ||
108 | private static final KryoNamespace DATABASE = KryoNamespace.newBuilder() | 110 | private static final KryoNamespace DATABASE = KryoNamespace.newBuilder() | ... | ... |
... | @@ -13,13 +13,15 @@ import java.util.concurrent.ExecutionException; | ... | @@ -13,13 +13,15 @@ import java.util.concurrent.ExecutionException; |
13 | import java.util.concurrent.TimeUnit; | 13 | import java.util.concurrent.TimeUnit; |
14 | import java.util.concurrent.TimeoutException; | 14 | import java.util.concurrent.TimeoutException; |
15 | 15 | ||
16 | +import net.kuujo.copycat.cluster.Member; | ||
16 | import net.kuujo.copycat.cluster.TcpMember; | 17 | import net.kuujo.copycat.cluster.TcpMember; |
17 | -import net.kuujo.copycat.event.EventHandler; | ||
18 | import net.kuujo.copycat.event.LeaderElectEvent; | 18 | import net.kuujo.copycat.event.LeaderElectEvent; |
19 | import net.kuujo.copycat.protocol.SubmitRequest; | 19 | import net.kuujo.copycat.protocol.SubmitRequest; |
20 | import net.kuujo.copycat.protocol.SubmitResponse; | 20 | import net.kuujo.copycat.protocol.SubmitResponse; |
21 | import net.kuujo.copycat.spi.protocol.ProtocolClient; | 21 | import net.kuujo.copycat.spi.protocol.ProtocolClient; |
22 | 22 | ||
23 | +import org.onlab.onos.store.cluster.messaging.ClusterMessage; | ||
24 | +import org.onlab.onos.store.cluster.messaging.ClusterMessageHandler; | ||
23 | import org.onlab.onos.store.service.BatchReadRequest; | 25 | import org.onlab.onos.store.service.BatchReadRequest; |
24 | import org.onlab.onos.store.service.BatchWriteRequest; | 26 | import org.onlab.onos.store.service.BatchWriteRequest; |
25 | import org.onlab.onos.store.service.DatabaseException; | 27 | import org.onlab.onos.store.service.DatabaseException; |
... | @@ -31,7 +33,7 @@ import org.slf4j.Logger; | ... | @@ -31,7 +33,7 @@ import org.slf4j.Logger; |
31 | /** | 33 | /** |
32 | * Client for interacting with the Copycat Raft cluster. | 34 | * Client for interacting with the Copycat Raft cluster. |
33 | */ | 35 | */ |
34 | -public class DatabaseClient implements EventHandler<LeaderElectEvent> { | 36 | +public class DatabaseClient implements ClusterMessageHandler { |
35 | 37 | ||
36 | private static final int RETRIES = 5; | 38 | private static final int RETRIES = 5; |
37 | 39 | ||
... | @@ -41,24 +43,28 @@ public class DatabaseClient implements EventHandler<LeaderElectEvent> { | ... | @@ -41,24 +43,28 @@ public class DatabaseClient implements EventHandler<LeaderElectEvent> { |
41 | 43 | ||
42 | private final DatabaseProtocolService protocol; | 44 | private final DatabaseProtocolService protocol; |
43 | private volatile ProtocolClient client = null; | 45 | private volatile ProtocolClient client = null; |
44 | - private volatile TcpMember currentLeader = null; | 46 | + private volatile Member currentLeader = null; |
45 | - | 47 | + private volatile long currentLeaderTerm = 0; |
46 | 48 | ||
47 | public DatabaseClient(DatabaseProtocolService protocol) { | 49 | public DatabaseClient(DatabaseProtocolService protocol) { |
48 | this.protocol = checkNotNull(protocol); | 50 | this.protocol = checkNotNull(protocol); |
49 | } | 51 | } |
50 | 52 | ||
51 | - // FIXME This handler relies on a fact that local node is part of Raft cluster | ||
52 | @Override | 53 | @Override |
53 | - public void handle(LeaderElectEvent event) { | 54 | + public void handle(ClusterMessage message) { |
54 | - final TcpMember newLeader = event.leader(); | 55 | + LeaderElectEvent event = |
55 | - if (newLeader != null && !newLeader.equals(currentLeader)) { | 56 | + ClusterMessagingProtocol.SERIALIZER.decode(message.payload()); |
56 | - log.info("{} became the new leader", newLeader); | 57 | + TcpMember newLeader = event.leader(); |
58 | + long newLeaderTerm = event.term(); | ||
59 | + if (newLeader != null && !newLeader.equals(currentLeader) && newLeaderTerm > currentLeaderTerm) { | ||
60 | + log.info("New leader detected. Leader: {}, term: {}", newLeader, newLeaderTerm); | ||
57 | ProtocolClient prevClient = client; | 61 | ProtocolClient prevClient = client; |
58 | - ProtocolClient newclient = protocol.createClient(newLeader); | 62 | + ProtocolClient newClient = protocol.createClient(newLeader); |
59 | - newclient.connect(); | 63 | + newClient.connect(); |
60 | - client = newclient; | 64 | + client = newClient; |
61 | currentLeader = newLeader; | 65 | currentLeader = newLeader; |
66 | + currentLeaderTerm = newLeaderTerm; | ||
67 | + | ||
62 | if (prevClient != null) { | 68 | if (prevClient != null) { |
63 | prevClient.close(); | 69 | prevClient.close(); |
64 | } | 70 | } |
... | @@ -80,7 +86,6 @@ public class DatabaseClient implements EventHandler<LeaderElectEvent> { | ... | @@ -80,7 +86,6 @@ public class DatabaseClient implements EventHandler<LeaderElectEvent> { |
80 | while (currentLeader == null) { | 86 | while (currentLeader == null) { |
81 | Thread.sleep(200); | 87 | Thread.sleep(200); |
82 | } | 88 | } |
83 | - log.info("Leader appeared: {}", currentLeader); | ||
84 | return; | 89 | return; |
85 | } catch (InterruptedException e) { | 90 | } catch (InterruptedException e) { |
86 | log.error("Interrupted while waiting for Leader", e); | 91 | log.error("Interrupted while waiting for Leader", e); | ... | ... |
... | @@ -19,6 +19,7 @@ import net.kuujo.copycat.cluster.Member; | ... | @@ -19,6 +19,7 @@ import net.kuujo.copycat.cluster.Member; |
19 | import net.kuujo.copycat.cluster.TcpCluster; | 19 | import net.kuujo.copycat.cluster.TcpCluster; |
20 | import net.kuujo.copycat.cluster.TcpClusterConfig; | 20 | import net.kuujo.copycat.cluster.TcpClusterConfig; |
21 | import net.kuujo.copycat.cluster.TcpMember; | 21 | import net.kuujo.copycat.cluster.TcpMember; |
22 | +import net.kuujo.copycat.event.EventHandler; | ||
22 | import net.kuujo.copycat.event.LeaderElectEvent; | 23 | import net.kuujo.copycat.event.LeaderElectEvent; |
23 | import net.kuujo.copycat.log.Log; | 24 | import net.kuujo.copycat.log.Log; |
24 | 25 | ||
... | @@ -35,6 +36,8 @@ import org.onlab.onos.cluster.ControllerNode; | ... | @@ -35,6 +36,8 @@ import org.onlab.onos.cluster.ControllerNode; |
35 | import org.onlab.onos.cluster.DefaultControllerNode; | 36 | import org.onlab.onos.cluster.DefaultControllerNode; |
36 | import org.onlab.onos.cluster.NodeId; | 37 | import org.onlab.onos.cluster.NodeId; |
37 | import org.onlab.onos.store.cluster.messaging.ClusterCommunicationService; | 38 | import org.onlab.onos.store.cluster.messaging.ClusterCommunicationService; |
39 | +import org.onlab.onos.store.cluster.messaging.ClusterMessage; | ||
40 | +import org.onlab.onos.store.cluster.messaging.MessageSubject; | ||
38 | import org.onlab.onos.store.service.BatchReadRequest; | 41 | import org.onlab.onos.store.service.BatchReadRequest; |
39 | import org.onlab.onos.store.service.BatchReadResult; | 42 | import org.onlab.onos.store.service.BatchReadResult; |
40 | import org.onlab.onos.store.service.BatchWriteRequest; | 43 | import org.onlab.onos.store.service.BatchWriteRequest; |
... | @@ -86,6 +89,9 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { | ... | @@ -86,6 +89,9 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { |
86 | // initial member configuration file path | 89 | // initial member configuration file path |
87 | private String initialMemberConfig = DEFAULT_MEMBER_FILE; | 90 | private String initialMemberConfig = DEFAULT_MEMBER_FILE; |
88 | 91 | ||
92 | + public static final MessageSubject RAFT_LEADER_ELECTION_EVENT = | ||
93 | + new MessageSubject("raft-leader-election-event"); | ||
94 | + | ||
89 | private Copycat copycat; | 95 | private Copycat copycat; |
90 | private DatabaseClient client; | 96 | private DatabaseClient client; |
91 | 97 | ||
... | @@ -102,8 +108,6 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { | ... | @@ -102,8 +108,6 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { |
102 | @Activate | 108 | @Activate |
103 | public void activate() throws InterruptedException, ExecutionException { | 109 | public void activate() throws InterruptedException, ExecutionException { |
104 | 110 | ||
105 | - // TODO: Not every node should be part of the consensus ring. | ||
106 | - | ||
107 | // load tablet configuration | 111 | // load tablet configuration |
108 | File file = new File(CONFIG_DIR, initialMemberConfig); | 112 | File file = new File(CONFIG_DIR, initialMemberConfig); |
109 | log.info("Loading config: {}", file.getAbsolutePath()); | 113 | log.info("Loading config: {}", file.getAbsolutePath()); |
... | @@ -117,16 +121,16 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { | ... | @@ -117,16 +121,16 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { |
117 | 121 | ||
118 | // load default tablet configuration and start copycat | 122 | // load default tablet configuration and start copycat |
119 | clusterConfig = new TcpClusterConfig(); | 123 | clusterConfig = new TcpClusterConfig(); |
120 | - Set<DefaultControllerNode> defaultMember = tabletMembers.get(DEFAULT_TABLET); | 124 | + Set<DefaultControllerNode> defaultMembers = tabletMembers.get(DEFAULT_TABLET); |
121 | - if (defaultMember == null || defaultMember.isEmpty()) { | 125 | + if (defaultMembers == null || defaultMembers.isEmpty()) { |
122 | - log.error("No member found in [{}] tablet configuration.", | 126 | + log.error("No members found in [{}] tablet configuration.", |
123 | DEFAULT_TABLET); | 127 | DEFAULT_TABLET); |
124 | throw new IllegalStateException("No member found in tablet configuration"); | 128 | throw new IllegalStateException("No member found in tablet configuration"); |
125 | 129 | ||
126 | } | 130 | } |
127 | 131 | ||
128 | final ControllerNode localNode = clusterService.getLocalNode(); | 132 | final ControllerNode localNode = clusterService.getLocalNode(); |
129 | - for (ControllerNode member : defaultMember) { | 133 | + for (ControllerNode member : defaultMembers) { |
130 | final TcpMember tcpMember = new TcpMember(member.ip().toString(), | 134 | final TcpMember tcpMember = new TcpMember(member.ip().toString(), |
131 | member.tcpPort()); | 135 | member.tcpPort()); |
132 | if (localNode.equals(member)) { | 136 | if (localNode.equals(member)) { |
... | @@ -136,6 +140,61 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { | ... | @@ -136,6 +140,61 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { |
136 | } | 140 | } |
137 | } | 141 | } |
138 | 142 | ||
143 | + if (clusterConfig.getLocalMember() != null) { | ||
144 | + | ||
145 | + // Wait for a minimum viable Raft cluster to boot up. | ||
146 | + waitForClusterQuorum(); | ||
147 | + | ||
148 | + final TcpCluster cluster; | ||
149 | + synchronized (clusterConfig) { | ||
150 | + // Create the cluster. | ||
151 | + cluster = new TcpCluster(clusterConfig); | ||
152 | + } | ||
153 | + log.info("Starting cluster: {}", cluster); | ||
154 | + | ||
155 | + DatabaseEntryExpirationTracker expirationTracker = | ||
156 | + new DatabaseEntryExpirationTracker( | ||
157 | + clusterConfig.getLocalMember(), | ||
158 | + clusterService.getLocalNode(), | ||
159 | + clusterCommunicator, | ||
160 | + this); | ||
161 | + | ||
162 | + DatabaseStateMachine stateMachine = new DatabaseStateMachine(); | ||
163 | + stateMachine.addEventListener(expirationTracker); | ||
164 | + Log consensusLog = new MapDBLog(LOG_FILE_PREFIX + localNode.id(), | ||
165 | + ClusterMessagingProtocol.SERIALIZER); | ||
166 | + | ||
167 | + copycat = new Copycat(stateMachine, consensusLog, cluster, copycatMessagingProtocol); | ||
168 | + copycat.event(LeaderElectEvent.class).registerHandler(new RaftLeaderElectionMonitor()); | ||
169 | + copycat.event(LeaderElectEvent.class).registerHandler(expirationTracker); | ||
170 | + } | ||
171 | + | ||
172 | + client = new DatabaseClient(copycatMessagingProtocol); | ||
173 | + clusterCommunicator.addSubscriber(RAFT_LEADER_ELECTION_EVENT, client); | ||
174 | + | ||
175 | + // Starts copycat if this node is a participant | ||
176 | + // of the Raft cluster. | ||
177 | + if (copycat != null) { | ||
178 | + copycat.start().get(); | ||
179 | + } | ||
180 | + | ||
181 | + client.waitForLeader(); | ||
182 | + log.info("Started."); | ||
183 | + } | ||
184 | + | ||
185 | + @Deactivate | ||
186 | + public void deactivate() { | ||
187 | + clusterService.removeListener(clusterEventListener); | ||
188 | + // TODO: ClusterCommunicationService must support more than one | ||
189 | + // handler per message subject. | ||
190 | + clusterCommunicator.removeSubscriber(RAFT_LEADER_ELECTION_EVENT); | ||
191 | + if (copycat != null) { | ||
192 | + copycat.stop(); | ||
193 | + } | ||
194 | + log.info("Stopped."); | ||
195 | + } | ||
196 | + | ||
197 | + private void waitForClusterQuorum() { | ||
139 | // note: from this point beyond, clusterConfig requires synchronization | 198 | // note: from this point beyond, clusterConfig requires synchronization |
140 | clusterEventLatch = new CountDownLatch(1); | 199 | clusterEventLatch = new CountDownLatch(1); |
141 | clusterEventListener = new InternalClusterEventListener(); | 200 | clusterEventListener = new InternalClusterEventListener(); |
... | @@ -153,46 +212,6 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { | ... | @@ -153,46 +212,6 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { |
153 | log.info("Interrupted waiting for others", e); | 212 | log.info("Interrupted waiting for others", e); |
154 | } | 213 | } |
155 | } | 214 | } |
156 | - | ||
157 | - final TcpCluster cluster; | ||
158 | - synchronized (clusterConfig) { | ||
159 | - // Create the cluster. | ||
160 | - cluster = new TcpCluster(clusterConfig); | ||
161 | - } | ||
162 | - log.info("Starting cluster: {}", cluster); | ||
163 | - | ||
164 | - DatabaseEntryExpirationTracker expirationTracker = | ||
165 | - new DatabaseEntryExpirationTracker( | ||
166 | - clusterConfig.getLocalMember(), | ||
167 | - clusterService.getLocalNode(), | ||
168 | - clusterCommunicator, | ||
169 | - this); | ||
170 | - | ||
171 | - DatabaseStateMachine stateMachine = new DatabaseStateMachine(); | ||
172 | - stateMachine.addEventListener(expirationTracker); | ||
173 | - Log consensusLog = new MapDBLog(LOG_FILE_PREFIX + localNode.id(), | ||
174 | - ClusterMessagingProtocol.SERIALIZER); | ||
175 | - | ||
176 | - copycat = new Copycat(stateMachine, consensusLog, cluster, copycatMessagingProtocol); | ||
177 | - | ||
178 | - client = new DatabaseClient(copycatMessagingProtocol); | ||
179 | - | ||
180 | - | ||
181 | - copycat.event(LeaderElectEvent.class).registerHandler(client); | ||
182 | - copycat.event(LeaderElectEvent.class).registerHandler(expirationTracker); | ||
183 | - | ||
184 | - copycat.start().get(); | ||
185 | - | ||
186 | - client.waitForLeader(); | ||
187 | - | ||
188 | - log.info("Started."); | ||
189 | - } | ||
190 | - | ||
191 | - @Deactivate | ||
192 | - public void deactivate() { | ||
193 | - clusterService.removeListener(clusterEventListener); | ||
194 | - copycat.stop(); | ||
195 | - log.info("Stopped."); | ||
196 | } | 215 | } |
197 | 216 | ||
198 | @Override | 217 | @Override |
... | @@ -353,6 +372,24 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { | ... | @@ -353,6 +372,24 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { |
353 | } | 372 | } |
354 | } | 373 | } |
355 | 374 | ||
375 | + private final class RaftLeaderElectionMonitor implements EventHandler<LeaderElectEvent> { | ||
376 | + @Override | ||
377 | + public void handle(LeaderElectEvent event) { | ||
378 | + try { | ||
379 | + if (clusterConfig.getLocalMember() != null && event.leader().equals(clusterConfig.getLocalMember())) { | ||
380 | + // This node just became the leader. | ||
381 | + clusterCommunicator.broadcastIncludeSelf( | ||
382 | + new ClusterMessage( | ||
383 | + clusterService.getLocalNode().id(), | ||
384 | + RAFT_LEADER_ELECTION_EVENT, | ||
385 | + ClusterMessagingProtocol.SERIALIZER.encode(event))); | ||
386 | + } | ||
387 | + } catch (IOException e) { | ||
388 | + log.error("Failed to broadcast raft leadership change event", e); | ||
389 | + } | ||
390 | + } | ||
391 | + } | ||
392 | + | ||
356 | private final class InternalClusterEventListener | 393 | private final class InternalClusterEventListener |
357 | implements ClusterEventListener { | 394 | implements ClusterEventListener { |
358 | 395 | ... | ... |
-
Please register or login to post a comment