Madan Jampani
Committed by Pavlin Radoslavov

1. DatabaseManager activate will attempt to listTables to ensure store is in good shape.

2. lock and tryLock can now throw InterruptedExceptions.

Change-Id: Ifa766ad441f677a4071b68d8f6caa564cf320869

Change-Id: I318ff762a96b261737831f6bd7c200b384c638e9

Change-Id: I0f509703520b3187931fa3669cd8213a91e85c96
...@@ -160,7 +160,7 @@ public class FooComponent { ...@@ -160,7 +160,7 @@ public class FooComponent {
160 } 160 }
161 } 161 }
162 162
163 - private void lockUnlock() { 163 + private void lockUnlock() throws InterruptedException {
164 try { 164 try {
165 final String locksTable = "onos-locks"; 165 final String locksTable = "onos-locks";
166 166
......
...@@ -105,11 +105,11 @@ public class SdnIp implements SdnIpService { ...@@ -105,11 +105,11 @@ public class SdnIp implements SdnIpService {
105 new ThreadFactoryBuilder() 105 new ThreadFactoryBuilder()
106 .setNameFormat("sdnip-leader-election-%d").build()); 106 .setNameFormat("sdnip-leader-election-%d").build());
107 leaderElectionExecutor.execute(new Runnable() { 107 leaderElectionExecutor.execute(new Runnable() {
108 - @Override 108 + @Override
109 - public void run() { 109 + public void run() {
110 - doLeaderElectionThread(); 110 + doLeaderElectionThread();
111 - } 111 + }
112 - }); 112 + });
113 113
114 // Manually set the instance as the leader to allow testing 114 // Manually set the instance as the leader to allow testing
115 // TODO change this when we get a leader election 115 // TODO change this when we get a leader election
...@@ -174,22 +174,22 @@ public class SdnIp implements SdnIpService { ...@@ -174,22 +174,22 @@ public class SdnIp implements SdnIpService {
174 log.debug("SDN-IP Leader Election begin"); 174 log.debug("SDN-IP Leader Election begin");
175 175
176 // Block until it becomes the leader 176 // Block until it becomes the leader
177 - leaderLock.lock(LEASE_DURATION_MS); 177 + try {
178 - 178 + leaderLock.lock(LEASE_DURATION_MS);
179 - // This instance is the leader 179 +
180 - log.info("SDN-IP Leader Elected"); 180 + // This instance is the leader
181 - intentSynchronizer.leaderChanged(true); 181 + log.info("SDN-IP Leader Elected");
182 - 182 + intentSynchronizer.leaderChanged(true);
183 - // Keep extending the expiration until shutdown 183 +
184 - int extensionFailedCountdown = LEASE_EXTEND_RETRY_MAX - 1; 184 + // Keep extending the expiration until shutdown
185 - 185 + int extensionFailedCountdown = LEASE_EXTEND_RETRY_MAX - 1;
186 - // 186 +
187 - // Keep periodically extending the lock expiration. 187 + //
188 - // If there are multiple back-to-back failures to extend (with 188 + // Keep periodically extending the lock expiration.
189 - // extra sleep time between retrials), then release the lock. 189 + // If there are multiple back-to-back failures to extend (with
190 - // 190 + // extra sleep time between retrials), then release the lock.
191 - while (!isShutdown) { 191 + //
192 - try { 192 + while (!isShutdown) {
193 Thread.sleep(LEASE_DURATION_MS / LEASE_EXTEND_RETRY_MAX); 193 Thread.sleep(LEASE_DURATION_MS / LEASE_EXTEND_RETRY_MAX);
194 if (leaderLock.extendExpiration(LEASE_DURATION_MS)) { 194 if (leaderLock.extendExpiration(LEASE_DURATION_MS)) {
195 log.trace("SDN-IP Leader Extended"); 195 log.trace("SDN-IP Leader Extended");
...@@ -211,13 +211,12 @@ public class SdnIp implements SdnIpService { ...@@ -211,13 +211,12 @@ public class SdnIp implements SdnIpService {
211 break; // Try again to get the lock 211 break; // Try again to get the lock
212 } 212 }
213 } 213 }
214 - } catch (InterruptedException e) {
215 - // Thread interrupted. Time to shutdown
216 - log.debug("SDN-IP Leader Interrupted");
217 } 214 }
215 + } catch (InterruptedException e) {
216 + // Thread interrupted. Time to shutdown
217 + log.debug("SDN-IP Leader Interrupted");
218 } 218 }
219 } 219 }
220 -
221 // If we reach here, the instance was shutdown 220 // If we reach here, the instance was shutdown
222 intentSynchronizer.leaderChanged(false); 221 intentSynchronizer.leaderChanged(false);
223 leaderLock.unlock(); 222 leaderLock.unlock();
......
...@@ -31,8 +31,9 @@ public interface Lock { ...@@ -31,8 +31,9 @@ public interface Lock {
31 * lock after granting it, before automatically releasing it if it hasn't 31 * lock after granting it, before automatically releasing it if it hasn't
32 * already been released by invoking unlock(). Must be in the range 32 * already been released by invoking unlock(). Must be in the range
33 * (0, LockManager.MAX_LEASE_MILLIS] 33 * (0, LockManager.MAX_LEASE_MILLIS]
34 + * @throws InterruptedException if the thread is interrupted while waiting
34 */ 35 */
35 - void lock(int leaseDurationMillis); 36 + void lock(int leaseDurationMillis) throws InterruptedException;
36 37
37 /** 38 /**
38 * Acquires the lock only if it is free at the time of invocation. 39 * Acquires the lock only if it is free at the time of invocation.
...@@ -54,8 +55,9 @@ public interface Lock { ...@@ -54,8 +55,9 @@ public interface Lock {
54 * (0, LockManager.MAX_LEASE_MILLIS] 55 * (0, LockManager.MAX_LEASE_MILLIS]
55 * @return true if the lock was acquired and false if the waiting time 56 * @return true if the lock was acquired and false if the waiting time
56 * elapsed before the lock was acquired 57 * elapsed before the lock was acquired
58 + * @throws InterruptedException if the thread is interrupted while waiting
57 */ 59 */
58 - boolean tryLock(long waitTimeMillis, int leaseDurationMillis); 60 + boolean tryLock(long waitTimeMillis, int leaseDurationMillis) throws InterruptedException;
59 61
60 /** 62 /**
61 * Returns true if this Lock instance currently holds the lock. 63 * Returns true if this Lock instance currently holds the lock.
......
...@@ -179,6 +179,11 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { ...@@ -179,6 +179,11 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService {
179 } 179 }
180 180
181 client.waitForLeader(); 181 client.waitForLeader();
182 +
183 + // Try and list the tables to verify database manager is
184 + // in a state where it can serve requests.
185 + tryTableListing();
186 +
182 log.info("Started."); 187 log.info("Started.");
183 } 188 }
184 189
...@@ -214,6 +219,24 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { ...@@ -214,6 +219,24 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService {
214 } 219 }
215 } 220 }
216 221
222 + private void tryTableListing() {
223 + int retries = 0;
224 + do {
225 + try {
226 + listTables();
227 + return;
228 + } catch (DatabaseException e) {
229 + if (retries == 10) {
230 + log.error("Failed to listTables after multiple attempts. Giving up.", e);
231 + throw e;
232 + } else {
233 + log.debug("Failed to listTables. Will retry...", e);
234 + retries++;
235 + }
236 + }
237 + } while (true);
238 + }
239 +
217 @Override 240 @Override
218 public boolean createTable(String name) { 241 public boolean createTable(String name) {
219 return client.createTable(name); 242 return client.createTable(name);
......
...@@ -12,6 +12,7 @@ import java.util.concurrent.atomic.AtomicBoolean; ...@@ -12,6 +12,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
12 12
13 import org.joda.time.DateTime; 13 import org.joda.time.DateTime;
14 import org.onlab.onos.cluster.ClusterService; 14 import org.onlab.onos.cluster.ClusterService;
15 +import org.onlab.onos.store.service.DatabaseException;
15 import org.onlab.onos.store.service.DatabaseService; 16 import org.onlab.onos.store.service.DatabaseService;
16 import org.onlab.onos.store.service.Lock; 17 import org.onlab.onos.store.service.Lock;
17 import org.slf4j.Logger; 18 import org.slf4j.Logger;
...@@ -23,8 +24,6 @@ public class DistributedLock implements Lock { ...@@ -23,8 +24,6 @@ public class DistributedLock implements Lock {
23 24
24 private final Logger log = getLogger(getClass()); 25 private final Logger log = getLogger(getClass());
25 26
26 - private static final long MAX_WAIT_TIME_MS = 100000000L;
27 -
28 private final DistributedLockManager lockManager; 27 private final DistributedLockManager lockManager;
29 private final DatabaseService databaseService; 28 private final DatabaseService databaseService;
30 private final String path; 29 private final String path;
...@@ -53,13 +52,17 @@ public class DistributedLock implements Lock { ...@@ -53,13 +52,17 @@ public class DistributedLock implements Lock {
53 } 52 }
54 53
55 @Override 54 @Override
56 - public void lock(int leaseDurationMillis) { 55 + public void lock(int leaseDurationMillis) throws InterruptedException {
57 if (isLocked() && lockExpirationTime.isAfter(DateTime.now().plusMillis(leaseDurationMillis))) { 56 if (isLocked() && lockExpirationTime.isAfter(DateTime.now().plusMillis(leaseDurationMillis))) {
58 - // Nothing to do.
59 - // Current expiration time is beyond what is requested.
60 return; 57 return;
61 } else { 58 } else {
62 - tryLock(MAX_WAIT_TIME_MS, leaseDurationMillis); 59 + CompletableFuture<DateTime> future =
60 + lockManager.lockIfAvailable(this, leaseDurationMillis);
61 + try {
62 + lockExpirationTime = future.get();
63 + } catch (ExecutionException e) {
64 + throw new DatabaseException(e);
65 + }
63 } 66 }
64 } 67 }
65 68
...@@ -79,22 +82,17 @@ public class DistributedLock implements Lock { ...@@ -79,22 +82,17 @@ public class DistributedLock implements Lock {
79 @Override 82 @Override
80 public boolean tryLock( 83 public boolean tryLock(
81 long waitTimeMillis, 84 long waitTimeMillis,
82 - int leaseDurationMillis) { 85 + int leaseDurationMillis) throws InterruptedException {
83 if (tryLock(leaseDurationMillis)) { 86 if (tryLock(leaseDurationMillis)) {
84 return true; 87 return true;
85 } 88 }
86 -
87 CompletableFuture<DateTime> future = 89 CompletableFuture<DateTime> future =
88 lockManager.lockIfAvailable(this, waitTimeMillis, leaseDurationMillis); 90 lockManager.lockIfAvailable(this, waitTimeMillis, leaseDurationMillis);
89 try { 91 try {
90 lockExpirationTime = future.get(waitTimeMillis, TimeUnit.MILLISECONDS); 92 lockExpirationTime = future.get(waitTimeMillis, TimeUnit.MILLISECONDS);
91 return true; 93 return true;
92 - } catch (ExecutionException | InterruptedException e) { 94 + } catch (ExecutionException e) {
93 - log.error("Encountered an exception trying to acquire lock for " + path, e); 95 + throw new DatabaseException(e);
94 - // TODO: ExecutionException could indicate something
95 - // wrong with the backing database.
96 - // Throw an exception?
97 - return false;
98 } catch (TimeoutException e) { 96 } catch (TimeoutException e) {
99 log.debug("Timed out waiting to acquire lock for {}", path); 97 log.debug("Timed out waiting to acquire lock for {}", path);
100 return false; 98 return false;
......
...@@ -64,23 +64,22 @@ public class DistributedLockManager implements LockService { ...@@ -64,23 +64,22 @@ public class DistributedLockManager implements LockService {
64 @Activate 64 @Activate
65 public void activate() { 65 public void activate() {
66 try { 66 try {
67 - Set<String> tableNames = databaseAdminService.listTables(); 67 + Set<String> tables = databaseAdminService.listTables();
68 - if (!tableNames.contains(ONOS_LOCK_TABLE_NAME)) { 68 +
69 + if (!tables.contains(ONOS_LOCK_TABLE_NAME)) {
69 if (databaseAdminService.createTable(ONOS_LOCK_TABLE_NAME, DEAD_LOCK_TIMEOUT_MS)) { 70 if (databaseAdminService.createTable(ONOS_LOCK_TABLE_NAME, DEAD_LOCK_TIMEOUT_MS)) {
70 log.info("Created {} table.", ONOS_LOCK_TABLE_NAME); 71 log.info("Created {} table.", ONOS_LOCK_TABLE_NAME);
71 } 72 }
72 } 73 }
73 } catch (DatabaseException e) { 74 } catch (DatabaseException e) {
74 log.error("DistributedLockManager#activate failed.", e); 75 log.error("DistributedLockManager#activate failed.", e);
75 - throw e;
76 } 76 }
77 77
78 - clusterCommunicator.addSubscriber( 78 + clusterCommunicator.addSubscriber(
79 - DatabaseStateMachine.DATABASE_UPDATE_EVENTS, 79 + DatabaseStateMachine.DATABASE_UPDATE_EVENTS,
80 - new LockEventMessageListener()); 80 + new LockEventMessageListener());
81 -
82 - log.info("Started.");
83 81
82 + log.info("Started");
84 } 83 }
85 84
86 @Deactivate 85 @Deactivate
...@@ -119,7 +118,31 @@ public class DistributedLockManager implements LockService { ...@@ -119,7 +118,31 @@ public class DistributedLockManager implements LockService {
119 long waitTimeMillis, 118 long waitTimeMillis,
120 int leaseDurationMillis) { 119 int leaseDurationMillis) {
121 CompletableFuture<DateTime> future = new CompletableFuture<>(); 120 CompletableFuture<DateTime> future = new CompletableFuture<>();
122 - locksToAcquire.put(lock.path(), new LockRequest(lock, waitTimeMillis, leaseDurationMillis, future)); 121 + LockRequest request = new LockRequest(
122 + lock,
123 + leaseDurationMillis,
124 + DateTime.now().plusMillis(leaseDurationMillis),
125 + future);
126 + locksToAcquire.put(lock.path(), request);
127 + return future;
128 + }
129 +
130 + /**
131 + * Attempts to acquire the lock as soon as it becomes available.
132 + * @param lock lock to acquire.
133 + * @param leaseDurationMillis the duration for which to acquire the lock initially.
134 + * @return Future lease expiration date.
135 + */
136 + protected CompletableFuture<DateTime> lockIfAvailable(
137 + Lock lock,
138 + int leaseDurationMillis) {
139 + CompletableFuture<DateTime> future = new CompletableFuture<>();
140 + LockRequest request = new LockRequest(
141 + lock,
142 + leaseDurationMillis,
143 + DateTime.now().plusYears(100),
144 + future);
145 + locksToAcquire.put(lock.path(), request);
123 return future; 146 return future;
124 } 147 }
125 148
...@@ -182,11 +205,14 @@ public class DistributedLockManager implements LockService { ...@@ -182,11 +205,14 @@ public class DistributedLockManager implements LockService {
182 private final int leaseDurationMillis; 205 private final int leaseDurationMillis;
183 private final CompletableFuture<DateTime> future; 206 private final CompletableFuture<DateTime> future;
184 207
185 - public LockRequest(Lock lock, long waitTimeMillis, 208 + public LockRequest(
186 - int leaseDurationMillis, CompletableFuture<DateTime> future) { 209 + Lock lock,
210 + int leaseDurationMillis,
211 + DateTime requestExpirationTime,
212 + CompletableFuture<DateTime> future) {
187 213
188 this.lock = lock; 214 this.lock = lock;
189 - this.requestExpirationTime = DateTime.now().plusMillis((int) waitTimeMillis); 215 + this.requestExpirationTime = requestExpirationTime;
190 this.leaseDurationMillis = leaseDurationMillis; 216 this.leaseDurationMillis = leaseDurationMillis;
191 this.future = future; 217 this.future = future;
192 } 218 }
......