Committed by
Gerrit Code Review
ClusterService implementation that relies on accrual failure detector for determ…
…ining node up/down status. Initially off by default, until futher testing is done. Change-Id: I0ac8850d76af717e7804d4503bedb227d5894a0a
Showing
4 changed files
with
111 additions
and
0 deletions
| ... | @@ -32,6 +32,13 @@ | ... | @@ -32,6 +32,13 @@ |
| 32 | <description>ONOS Gossip based distributed store subsystems</description> | 32 | <description>ONOS Gossip based distributed store subsystems</description> |
| 33 | 33 | ||
| 34 | <dependencies> | 34 | <dependencies> |
| 35 | + | ||
| 36 | + <dependency> | ||
| 37 | + <groupId>org.apache.commons</groupId> | ||
| 38 | + <artifactId>commons-math3</artifactId> | ||
| 39 | + <version>3.2</version> | ||
| 40 | + </dependency> | ||
| 41 | + | ||
| 35 | <dependency> | 42 | <dependency> |
| 36 | <groupId>org.onosproject</groupId> | 43 | <groupId>org.onosproject</groupId> |
| 37 | <artifactId>onos-core-serializers</artifactId> | 44 | <artifactId>onos-core-serializers</artifactId> | ... | ... |
This diff is collapsed. Click to expand it.
core/store/dist/src/main/java/org/onosproject/store/cluster/impl/PhiAccrualFailureDetector.java
0 → 100644
| 1 | +package org.onosproject.store.cluster.impl; | ||
| 2 | + | ||
| 3 | +import static com.google.common.base.Preconditions.checkArgument; | ||
| 4 | +import static com.google.common.base.Preconditions.checkNotNull; | ||
| 5 | + | ||
| 6 | +import java.util.Map; | ||
| 7 | + | ||
| 8 | +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; | ||
| 9 | +import org.onosproject.cluster.NodeId; | ||
| 10 | + | ||
| 11 | +import com.google.common.collect.Maps; | ||
| 12 | + | ||
| 13 | +/** | ||
| 14 | + * Phi Accrual failure detector. | ||
| 15 | + * <p> | ||
| 16 | + * Based on a paper titled: "The φ Accrual Failure Detector" by Hayashibara, et al. | ||
| 17 | + */ | ||
| 18 | +public class PhiAccrualFailureDetector { | ||
| 19 | + private final Map<NodeId, History> states = Maps.newConcurrentMap(); | ||
| 20 | + | ||
| 21 | + // TODO: make these configurable. | ||
| 22 | + private static final int WINDOW_SIZE = 250; | ||
| 23 | + private static final int MIN_SAMPLES = 25; | ||
| 24 | + | ||
| 25 | + // If a node does not have any heartbeats, this is the phi | ||
| 26 | + // value to report. Indicates the node is inactive (from the | ||
| 27 | + // detectors perspective. | ||
| 28 | + private static final double BOOTSTRAP_PHI_VALUE = 100.0; | ||
| 29 | + | ||
| 30 | + /** | ||
| 31 | + * Report a new heart beat for the specified node id. | ||
| 32 | + * @param nodeId node id | ||
| 33 | + */ | ||
| 34 | + public void report(NodeId nodeId) { | ||
| 35 | + report(nodeId, System.currentTimeMillis()); | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + /** | ||
| 39 | + * Report a new heart beat for the specified node id. | ||
| 40 | + * @param nodeId node id | ||
| 41 | + * @param arrivalTime arrival time | ||
| 42 | + */ | ||
| 43 | + public void report(NodeId nodeId, long arrivalTime) { | ||
| 44 | + checkNotNull(nodeId, "NodeId must not be null"); | ||
| 45 | + checkArgument(arrivalTime >= 0, "arrivalTime must not be negative"); | ||
| 46 | + History nodeState = | ||
| 47 | + states.computeIfAbsent(nodeId, key -> new History()); | ||
| 48 | + synchronized (nodeState) { | ||
| 49 | + long latestHeartbeat = nodeState.latestHeartbeatTime(); | ||
| 50 | + if (latestHeartbeat != -1) { | ||
| 51 | + nodeState.samples().addValue(arrivalTime - latestHeartbeat); | ||
| 52 | + } | ||
| 53 | + nodeState.setLatestHeartbeatTime(arrivalTime); | ||
| 54 | + } | ||
| 55 | + } | ||
| 56 | + | ||
| 57 | + /** | ||
| 58 | + * Compute phi for the specified node id. | ||
| 59 | + * @param nodeId node id | ||
| 60 | + * @return phi value | ||
| 61 | + */ | ||
| 62 | + public Double phi(NodeId nodeId) { | ||
| 63 | + if (!states.containsKey(nodeId)) { | ||
| 64 | + return BOOTSTRAP_PHI_VALUE; | ||
| 65 | + } | ||
| 66 | + checkNotNull(nodeId, "NodeId must not be null"); | ||
| 67 | + History nodeState = states.get(nodeId); | ||
| 68 | + synchronized (nodeState) { | ||
| 69 | + long latestHeartbeat = nodeState.latestHeartbeatTime(); | ||
| 70 | + DescriptiveStatistics samples = nodeState.samples(); | ||
| 71 | + if (latestHeartbeat == -1 || samples.getN() < MIN_SAMPLES) { | ||
| 72 | + return 0.0; | ||
| 73 | + } | ||
| 74 | + return computePhi(samples, latestHeartbeat, System.currentTimeMillis()); | ||
| 75 | + } | ||
| 76 | + } | ||
| 77 | + | ||
| 78 | + private double computePhi(DescriptiveStatistics samples, long tLast, long tNow) { | ||
| 79 | + long size = samples.getN(); | ||
| 80 | + long t = tNow - tLast; | ||
| 81 | + return (size > 0) | ||
| 82 | + ? (1.0 / Math.log(10.0)) * t / samples.getMean() | ||
| 83 | + : BOOTSTRAP_PHI_VALUE; | ||
| 84 | + } | ||
| 85 | + | ||
| 86 | + private static class History { | ||
| 87 | + DescriptiveStatistics samples = | ||
| 88 | + new DescriptiveStatistics(WINDOW_SIZE); | ||
| 89 | + long lastHeartbeatTime = -1; | ||
| 90 | + | ||
| 91 | + public DescriptiveStatistics samples() { | ||
| 92 | + return samples; | ||
| 93 | + } | ||
| 94 | + | ||
| 95 | + public long latestHeartbeatTime() { | ||
| 96 | + return lastHeartbeatTime; | ||
| 97 | + } | ||
| 98 | + | ||
| 99 | + public void setLatestHeartbeatTime(long value) { | ||
| 100 | + lastHeartbeatTime = value; | ||
| 101 | + } | ||
| 102 | + } | ||
| 103 | +} | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
| ... | @@ -33,6 +33,7 @@ | ... | @@ -33,6 +33,7 @@ |
| 33 | <bundle>mvn:io.netty/netty-codec/4.0.23.Final</bundle> | 33 | <bundle>mvn:io.netty/netty-codec/4.0.23.Final</bundle> |
| 34 | <bundle>mvn:io.netty/netty-transport-native-epoll/4.0.23.Final</bundle> | 34 | <bundle>mvn:io.netty/netty-transport-native-epoll/4.0.23.Final</bundle> |
| 35 | <bundle>mvn:commons-pool/commons-pool/1.6</bundle> | 35 | <bundle>mvn:commons-pool/commons-pool/1.6</bundle> |
| 36 | + <bundle>mvn:org.apache.commons/commons-math3/3.2</bundle> | ||
| 36 | 37 | ||
| 37 | <bundle>mvn:joda-time/joda-time/2.5</bundle> | 38 | <bundle>mvn:joda-time/joda-time/2.5</bundle> |
| 38 | 39 | ... | ... |
-
Please register or login to post a comment