Madan Jampani
Committed by Gerrit Code Review

ClusterService implementation that relies on accrual failure detector for determ…

…ining node up/down status.
Initially off by default, until futher testing is done.

Change-Id: I0ac8850d76af717e7804d4503bedb227d5894a0a
...@@ -32,6 +32,13 @@ ...@@ -32,6 +32,13 @@
32 <description>ONOS Gossip based distributed store subsystems</description> 32 <description>ONOS Gossip based distributed store subsystems</description>
33 33
34 <dependencies> 34 <dependencies>
35 +
36 + <dependency>
37 + <groupId>org.apache.commons</groupId>
38 + <artifactId>commons-math3</artifactId>
39 + <version>3.2</version>
40 + </dependency>
41 +
35 <dependency> 42 <dependency>
36 <groupId>org.onosproject</groupId> 43 <groupId>org.onosproject</groupId>
37 <artifactId>onos-core-serializers</artifactId> 44 <artifactId>onos-core-serializers</artifactId>
......
1 +package org.onosproject.store.cluster.impl;
2 +
3 +import static com.google.common.base.Preconditions.checkArgument;
4 +import static com.google.common.base.Preconditions.checkNotNull;
5 +
6 +import java.util.Map;
7 +
8 +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
9 +import org.onosproject.cluster.NodeId;
10 +
11 +import com.google.common.collect.Maps;
12 +
13 +/**
14 + * Phi Accrual failure detector.
15 + * <p>
16 + * Based on a paper titled: "The φ Accrual Failure Detector" by Hayashibara, et al.
17 + */
18 +public class PhiAccrualFailureDetector {
19 + private final Map<NodeId, History> states = Maps.newConcurrentMap();
20 +
21 + // TODO: make these configurable.
22 + private static final int WINDOW_SIZE = 250;
23 + private static final int MIN_SAMPLES = 25;
24 +
25 + // If a node does not have any heartbeats, this is the phi
26 + // value to report. Indicates the node is inactive (from the
27 + // detectors perspective.
28 + private static final double BOOTSTRAP_PHI_VALUE = 100.0;
29 +
30 + /**
31 + * Report a new heart beat for the specified node id.
32 + * @param nodeId node id
33 + */
34 + public void report(NodeId nodeId) {
35 + report(nodeId, System.currentTimeMillis());
36 + }
37 +
38 + /**
39 + * Report a new heart beat for the specified node id.
40 + * @param nodeId node id
41 + * @param arrivalTime arrival time
42 + */
43 + public void report(NodeId nodeId, long arrivalTime) {
44 + checkNotNull(nodeId, "NodeId must not be null");
45 + checkArgument(arrivalTime >= 0, "arrivalTime must not be negative");
46 + History nodeState =
47 + states.computeIfAbsent(nodeId, key -> new History());
48 + synchronized (nodeState) {
49 + long latestHeartbeat = nodeState.latestHeartbeatTime();
50 + if (latestHeartbeat != -1) {
51 + nodeState.samples().addValue(arrivalTime - latestHeartbeat);
52 + }
53 + nodeState.setLatestHeartbeatTime(arrivalTime);
54 + }
55 + }
56 +
57 + /**
58 + * Compute phi for the specified node id.
59 + * @param nodeId node id
60 + * @return phi value
61 + */
62 + public Double phi(NodeId nodeId) {
63 + if (!states.containsKey(nodeId)) {
64 + return BOOTSTRAP_PHI_VALUE;
65 + }
66 + checkNotNull(nodeId, "NodeId must not be null");
67 + History nodeState = states.get(nodeId);
68 + synchronized (nodeState) {
69 + long latestHeartbeat = nodeState.latestHeartbeatTime();
70 + DescriptiveStatistics samples = nodeState.samples();
71 + if (latestHeartbeat == -1 || samples.getN() < MIN_SAMPLES) {
72 + return 0.0;
73 + }
74 + return computePhi(samples, latestHeartbeat, System.currentTimeMillis());
75 + }
76 + }
77 +
78 + private double computePhi(DescriptiveStatistics samples, long tLast, long tNow) {
79 + long size = samples.getN();
80 + long t = tNow - tLast;
81 + return (size > 0)
82 + ? (1.0 / Math.log(10.0)) * t / samples.getMean()
83 + : BOOTSTRAP_PHI_VALUE;
84 + }
85 +
86 + private static class History {
87 + DescriptiveStatistics samples =
88 + new DescriptiveStatistics(WINDOW_SIZE);
89 + long lastHeartbeatTime = -1;
90 +
91 + public DescriptiveStatistics samples() {
92 + return samples;
93 + }
94 +
95 + public long latestHeartbeatTime() {
96 + return lastHeartbeatTime;
97 + }
98 +
99 + public void setLatestHeartbeatTime(long value) {
100 + lastHeartbeatTime = value;
101 + }
102 + }
103 +}
...\ No newline at end of file ...\ No newline at end of file
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
33 <bundle>mvn:io.netty/netty-codec/4.0.23.Final</bundle> 33 <bundle>mvn:io.netty/netty-codec/4.0.23.Final</bundle>
34 <bundle>mvn:io.netty/netty-transport-native-epoll/4.0.23.Final</bundle> 34 <bundle>mvn:io.netty/netty-transport-native-epoll/4.0.23.Final</bundle>
35 <bundle>mvn:commons-pool/commons-pool/1.6</bundle> 35 <bundle>mvn:commons-pool/commons-pool/1.6</bundle>
36 + <bundle>mvn:org.apache.commons/commons-math3/3.2</bundle>
36 37
37 <bundle>mvn:joda-time/joda-time/2.5</bundle> 38 <bundle>mvn:joda-time/joda-time/2.5</bundle>
38 39
......