Committed by
Gerrit Code Review
ClusterService implementation that relies on accrual failure detector for determ…
…ining node up/down status. Initially off by default, until futher testing is done. Change-Id: I0ac8850d76af717e7804d4503bedb227d5894a0a
Showing
4 changed files
with
111 additions
and
0 deletions
... | @@ -32,6 +32,13 @@ | ... | @@ -32,6 +32,13 @@ |
32 | <description>ONOS Gossip based distributed store subsystems</description> | 32 | <description>ONOS Gossip based distributed store subsystems</description> |
33 | 33 | ||
34 | <dependencies> | 34 | <dependencies> |
35 | + | ||
36 | + <dependency> | ||
37 | + <groupId>org.apache.commons</groupId> | ||
38 | + <artifactId>commons-math3</artifactId> | ||
39 | + <version>3.2</version> | ||
40 | + </dependency> | ||
41 | + | ||
35 | <dependency> | 42 | <dependency> |
36 | <groupId>org.onosproject</groupId> | 43 | <groupId>org.onosproject</groupId> |
37 | <artifactId>onos-core-serializers</artifactId> | 44 | <artifactId>onos-core-serializers</artifactId> | ... | ... |
This diff is collapsed. Click to expand it.
core/store/dist/src/main/java/org/onosproject/store/cluster/impl/PhiAccrualFailureDetector.java
0 → 100644
1 | +package org.onosproject.store.cluster.impl; | ||
2 | + | ||
3 | +import static com.google.common.base.Preconditions.checkArgument; | ||
4 | +import static com.google.common.base.Preconditions.checkNotNull; | ||
5 | + | ||
6 | +import java.util.Map; | ||
7 | + | ||
8 | +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; | ||
9 | +import org.onosproject.cluster.NodeId; | ||
10 | + | ||
11 | +import com.google.common.collect.Maps; | ||
12 | + | ||
13 | +/** | ||
14 | + * Phi Accrual failure detector. | ||
15 | + * <p> | ||
16 | + * Based on a paper titled: "The φ Accrual Failure Detector" by Hayashibara, et al. | ||
17 | + */ | ||
18 | +public class PhiAccrualFailureDetector { | ||
19 | + private final Map<NodeId, History> states = Maps.newConcurrentMap(); | ||
20 | + | ||
21 | + // TODO: make these configurable. | ||
22 | + private static final int WINDOW_SIZE = 250; | ||
23 | + private static final int MIN_SAMPLES = 25; | ||
24 | + | ||
25 | + // If a node does not have any heartbeats, this is the phi | ||
26 | + // value to report. Indicates the node is inactive (from the | ||
27 | + // detectors perspective. | ||
28 | + private static final double BOOTSTRAP_PHI_VALUE = 100.0; | ||
29 | + | ||
30 | + /** | ||
31 | + * Report a new heart beat for the specified node id. | ||
32 | + * @param nodeId node id | ||
33 | + */ | ||
34 | + public void report(NodeId nodeId) { | ||
35 | + report(nodeId, System.currentTimeMillis()); | ||
36 | + } | ||
37 | + | ||
38 | + /** | ||
39 | + * Report a new heart beat for the specified node id. | ||
40 | + * @param nodeId node id | ||
41 | + * @param arrivalTime arrival time | ||
42 | + */ | ||
43 | + public void report(NodeId nodeId, long arrivalTime) { | ||
44 | + checkNotNull(nodeId, "NodeId must not be null"); | ||
45 | + checkArgument(arrivalTime >= 0, "arrivalTime must not be negative"); | ||
46 | + History nodeState = | ||
47 | + states.computeIfAbsent(nodeId, key -> new History()); | ||
48 | + synchronized (nodeState) { | ||
49 | + long latestHeartbeat = nodeState.latestHeartbeatTime(); | ||
50 | + if (latestHeartbeat != -1) { | ||
51 | + nodeState.samples().addValue(arrivalTime - latestHeartbeat); | ||
52 | + } | ||
53 | + nodeState.setLatestHeartbeatTime(arrivalTime); | ||
54 | + } | ||
55 | + } | ||
56 | + | ||
57 | + /** | ||
58 | + * Compute phi for the specified node id. | ||
59 | + * @param nodeId node id | ||
60 | + * @return phi value | ||
61 | + */ | ||
62 | + public Double phi(NodeId nodeId) { | ||
63 | + if (!states.containsKey(nodeId)) { | ||
64 | + return BOOTSTRAP_PHI_VALUE; | ||
65 | + } | ||
66 | + checkNotNull(nodeId, "NodeId must not be null"); | ||
67 | + History nodeState = states.get(nodeId); | ||
68 | + synchronized (nodeState) { | ||
69 | + long latestHeartbeat = nodeState.latestHeartbeatTime(); | ||
70 | + DescriptiveStatistics samples = nodeState.samples(); | ||
71 | + if (latestHeartbeat == -1 || samples.getN() < MIN_SAMPLES) { | ||
72 | + return 0.0; | ||
73 | + } | ||
74 | + return computePhi(samples, latestHeartbeat, System.currentTimeMillis()); | ||
75 | + } | ||
76 | + } | ||
77 | + | ||
78 | + private double computePhi(DescriptiveStatistics samples, long tLast, long tNow) { | ||
79 | + long size = samples.getN(); | ||
80 | + long t = tNow - tLast; | ||
81 | + return (size > 0) | ||
82 | + ? (1.0 / Math.log(10.0)) * t / samples.getMean() | ||
83 | + : BOOTSTRAP_PHI_VALUE; | ||
84 | + } | ||
85 | + | ||
86 | + private static class History { | ||
87 | + DescriptiveStatistics samples = | ||
88 | + new DescriptiveStatistics(WINDOW_SIZE); | ||
89 | + long lastHeartbeatTime = -1; | ||
90 | + | ||
91 | + public DescriptiveStatistics samples() { | ||
92 | + return samples; | ||
93 | + } | ||
94 | + | ||
95 | + public long latestHeartbeatTime() { | ||
96 | + return lastHeartbeatTime; | ||
97 | + } | ||
98 | + | ||
99 | + public void setLatestHeartbeatTime(long value) { | ||
100 | + lastHeartbeatTime = value; | ||
101 | + } | ||
102 | + } | ||
103 | +} | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
... | @@ -33,6 +33,7 @@ | ... | @@ -33,6 +33,7 @@ |
33 | <bundle>mvn:io.netty/netty-codec/4.0.23.Final</bundle> | 33 | <bundle>mvn:io.netty/netty-codec/4.0.23.Final</bundle> |
34 | <bundle>mvn:io.netty/netty-transport-native-epoll/4.0.23.Final</bundle> | 34 | <bundle>mvn:io.netty/netty-transport-native-epoll/4.0.23.Final</bundle> |
35 | <bundle>mvn:commons-pool/commons-pool/1.6</bundle> | 35 | <bundle>mvn:commons-pool/commons-pool/1.6</bundle> |
36 | + <bundle>mvn:org.apache.commons/commons-math3/3.2</bundle> | ||
36 | 37 | ||
37 | <bundle>mvn:joda-time/joda-time/2.5</bundle> | 38 | <bundle>mvn:joda-time/joda-time/2.5</bundle> |
38 | 39 | ... | ... |
-
Please register or login to post a comment