Committed by
Pavlin Radoslavov
1. DatabaseManager activate will attempt to listTables to ensure store is in good shape.
2. lock and tryLock can now throw InterruptedExceptions. Change-Id: Ifa766ad441f677a4071b68d8f6caa564cf320869 Change-Id: I318ff762a96b261737831f6bd7c200b384c638e9 Change-Id: I0f509703520b3187931fa3669cd8213a91e85c96
Showing
6 changed files
with
102 additions
and
54 deletions
... | @@ -160,7 +160,7 @@ public class FooComponent { | ... | @@ -160,7 +160,7 @@ public class FooComponent { |
160 | } | 160 | } |
161 | } | 161 | } |
162 | 162 | ||
163 | - private void lockUnlock() { | 163 | + private void lockUnlock() throws InterruptedException { |
164 | try { | 164 | try { |
165 | final String locksTable = "onos-locks"; | 165 | final String locksTable = "onos-locks"; |
166 | 166 | ... | ... |
... | @@ -105,11 +105,11 @@ public class SdnIp implements SdnIpService { | ... | @@ -105,11 +105,11 @@ public class SdnIp implements SdnIpService { |
105 | new ThreadFactoryBuilder() | 105 | new ThreadFactoryBuilder() |
106 | .setNameFormat("sdnip-leader-election-%d").build()); | 106 | .setNameFormat("sdnip-leader-election-%d").build()); |
107 | leaderElectionExecutor.execute(new Runnable() { | 107 | leaderElectionExecutor.execute(new Runnable() { |
108 | - @Override | 108 | + @Override |
109 | - public void run() { | 109 | + public void run() { |
110 | - doLeaderElectionThread(); | 110 | + doLeaderElectionThread(); |
111 | - } | 111 | + } |
112 | - }); | 112 | + }); |
113 | 113 | ||
114 | // Manually set the instance as the leader to allow testing | 114 | // Manually set the instance as the leader to allow testing |
115 | // TODO change this when we get a leader election | 115 | // TODO change this when we get a leader election |
... | @@ -174,22 +174,22 @@ public class SdnIp implements SdnIpService { | ... | @@ -174,22 +174,22 @@ public class SdnIp implements SdnIpService { |
174 | log.debug("SDN-IP Leader Election begin"); | 174 | log.debug("SDN-IP Leader Election begin"); |
175 | 175 | ||
176 | // Block until it becomes the leader | 176 | // Block until it becomes the leader |
177 | - leaderLock.lock(LEASE_DURATION_MS); | 177 | + try { |
178 | - | 178 | + leaderLock.lock(LEASE_DURATION_MS); |
179 | - // This instance is the leader | 179 | + |
180 | - log.info("SDN-IP Leader Elected"); | 180 | + // This instance is the leader |
181 | - intentSynchronizer.leaderChanged(true); | 181 | + log.info("SDN-IP Leader Elected"); |
182 | - | 182 | + intentSynchronizer.leaderChanged(true); |
183 | - // Keep extending the expiration until shutdown | 183 | + |
184 | - int extensionFailedCountdown = LEASE_EXTEND_RETRY_MAX - 1; | 184 | + // Keep extending the expiration until shutdown |
185 | - | 185 | + int extensionFailedCountdown = LEASE_EXTEND_RETRY_MAX - 1; |
186 | - // | 186 | + |
187 | - // Keep periodically extending the lock expiration. | 187 | + // |
188 | - // If there are multiple back-to-back failures to extend (with | 188 | + // Keep periodically extending the lock expiration. |
189 | - // extra sleep time between retrials), then release the lock. | 189 | + // If there are multiple back-to-back failures to extend (with |
190 | - // | 190 | + // extra sleep time between retrials), then release the lock. |
191 | - while (!isShutdown) { | 191 | + // |
192 | - try { | 192 | + while (!isShutdown) { |
193 | Thread.sleep(LEASE_DURATION_MS / LEASE_EXTEND_RETRY_MAX); | 193 | Thread.sleep(LEASE_DURATION_MS / LEASE_EXTEND_RETRY_MAX); |
194 | if (leaderLock.extendExpiration(LEASE_DURATION_MS)) { | 194 | if (leaderLock.extendExpiration(LEASE_DURATION_MS)) { |
195 | log.trace("SDN-IP Leader Extended"); | 195 | log.trace("SDN-IP Leader Extended"); |
... | @@ -211,13 +211,12 @@ public class SdnIp implements SdnIpService { | ... | @@ -211,13 +211,12 @@ public class SdnIp implements SdnIpService { |
211 | break; // Try again to get the lock | 211 | break; // Try again to get the lock |
212 | } | 212 | } |
213 | } | 213 | } |
214 | - } catch (InterruptedException e) { | ||
215 | - // Thread interrupted. Time to shutdown | ||
216 | - log.debug("SDN-IP Leader Interrupted"); | ||
217 | } | 214 | } |
215 | + } catch (InterruptedException e) { | ||
216 | + // Thread interrupted. Time to shutdown | ||
217 | + log.debug("SDN-IP Leader Interrupted"); | ||
218 | } | 218 | } |
219 | } | 219 | } |
220 | - | ||
221 | // If we reach here, the instance was shutdown | 220 | // If we reach here, the instance was shutdown |
222 | intentSynchronizer.leaderChanged(false); | 221 | intentSynchronizer.leaderChanged(false); |
223 | leaderLock.unlock(); | 222 | leaderLock.unlock(); | ... | ... |
... | @@ -31,8 +31,9 @@ public interface Lock { | ... | @@ -31,8 +31,9 @@ public interface Lock { |
31 | * lock after granting it, before automatically releasing it if it hasn't | 31 | * lock after granting it, before automatically releasing it if it hasn't |
32 | * already been released by invoking unlock(). Must be in the range | 32 | * already been released by invoking unlock(). Must be in the range |
33 | * (0, LockManager.MAX_LEASE_MILLIS] | 33 | * (0, LockManager.MAX_LEASE_MILLIS] |
34 | + * @throws InterruptedException if the thread is interrupted while waiting | ||
34 | */ | 35 | */ |
35 | - void lock(int leaseDurationMillis); | 36 | + void lock(int leaseDurationMillis) throws InterruptedException; |
36 | 37 | ||
37 | /** | 38 | /** |
38 | * Acquires the lock only if it is free at the time of invocation. | 39 | * Acquires the lock only if it is free at the time of invocation. |
... | @@ -54,8 +55,9 @@ public interface Lock { | ... | @@ -54,8 +55,9 @@ public interface Lock { |
54 | * (0, LockManager.MAX_LEASE_MILLIS] | 55 | * (0, LockManager.MAX_LEASE_MILLIS] |
55 | * @return true if the lock was acquired and false if the waiting time | 56 | * @return true if the lock was acquired and false if the waiting time |
56 | * elapsed before the lock was acquired | 57 | * elapsed before the lock was acquired |
58 | + * @throws InterruptedException if the thread is interrupted while waiting | ||
57 | */ | 59 | */ |
58 | - boolean tryLock(long waitTimeMillis, int leaseDurationMillis); | 60 | + boolean tryLock(long waitTimeMillis, int leaseDurationMillis) throws InterruptedException; |
59 | 61 | ||
60 | /** | 62 | /** |
61 | * Returns true if this Lock instance currently holds the lock. | 63 | * Returns true if this Lock instance currently holds the lock. | ... | ... |
... | @@ -179,6 +179,11 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { | ... | @@ -179,6 +179,11 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { |
179 | } | 179 | } |
180 | 180 | ||
181 | client.waitForLeader(); | 181 | client.waitForLeader(); |
182 | + | ||
183 | + // Try and list the tables to verify database manager is | ||
184 | + // in a state where it can serve requests. | ||
185 | + tryTableListing(); | ||
186 | + | ||
182 | log.info("Started."); | 187 | log.info("Started."); |
183 | } | 188 | } |
184 | 189 | ||
... | @@ -214,6 +219,24 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { | ... | @@ -214,6 +219,24 @@ public class DatabaseManager implements DatabaseService, DatabaseAdminService { |
214 | } | 219 | } |
215 | } | 220 | } |
216 | 221 | ||
222 | + private void tryTableListing() { | ||
223 | + int retries = 0; | ||
224 | + do { | ||
225 | + try { | ||
226 | + listTables(); | ||
227 | + return; | ||
228 | + } catch (DatabaseException e) { | ||
229 | + if (retries == 10) { | ||
230 | + log.error("Failed to listTables after multiple attempts. Giving up.", e); | ||
231 | + throw e; | ||
232 | + } else { | ||
233 | + log.debug("Failed to listTables. Will retry...", e); | ||
234 | + retries++; | ||
235 | + } | ||
236 | + } | ||
237 | + } while (true); | ||
238 | + } | ||
239 | + | ||
217 | @Override | 240 | @Override |
218 | public boolean createTable(String name) { | 241 | public boolean createTable(String name) { |
219 | return client.createTable(name); | 242 | return client.createTable(name); | ... | ... |
... | @@ -12,6 +12,7 @@ import java.util.concurrent.atomic.AtomicBoolean; | ... | @@ -12,6 +12,7 @@ import java.util.concurrent.atomic.AtomicBoolean; |
12 | 12 | ||
13 | import org.joda.time.DateTime; | 13 | import org.joda.time.DateTime; |
14 | import org.onlab.onos.cluster.ClusterService; | 14 | import org.onlab.onos.cluster.ClusterService; |
15 | +import org.onlab.onos.store.service.DatabaseException; | ||
15 | import org.onlab.onos.store.service.DatabaseService; | 16 | import org.onlab.onos.store.service.DatabaseService; |
16 | import org.onlab.onos.store.service.Lock; | 17 | import org.onlab.onos.store.service.Lock; |
17 | import org.slf4j.Logger; | 18 | import org.slf4j.Logger; |
... | @@ -23,8 +24,6 @@ public class DistributedLock implements Lock { | ... | @@ -23,8 +24,6 @@ public class DistributedLock implements Lock { |
23 | 24 | ||
24 | private final Logger log = getLogger(getClass()); | 25 | private final Logger log = getLogger(getClass()); |
25 | 26 | ||
26 | - private static final long MAX_WAIT_TIME_MS = 100000000L; | ||
27 | - | ||
28 | private final DistributedLockManager lockManager; | 27 | private final DistributedLockManager lockManager; |
29 | private final DatabaseService databaseService; | 28 | private final DatabaseService databaseService; |
30 | private final String path; | 29 | private final String path; |
... | @@ -53,13 +52,17 @@ public class DistributedLock implements Lock { | ... | @@ -53,13 +52,17 @@ public class DistributedLock implements Lock { |
53 | } | 52 | } |
54 | 53 | ||
55 | @Override | 54 | @Override |
56 | - public void lock(int leaseDurationMillis) { | 55 | + public void lock(int leaseDurationMillis) throws InterruptedException { |
57 | if (isLocked() && lockExpirationTime.isAfter(DateTime.now().plusMillis(leaseDurationMillis))) { | 56 | if (isLocked() && lockExpirationTime.isAfter(DateTime.now().plusMillis(leaseDurationMillis))) { |
58 | - // Nothing to do. | ||
59 | - // Current expiration time is beyond what is requested. | ||
60 | return; | 57 | return; |
61 | } else { | 58 | } else { |
62 | - tryLock(MAX_WAIT_TIME_MS, leaseDurationMillis); | 59 | + CompletableFuture<DateTime> future = |
60 | + lockManager.lockIfAvailable(this, leaseDurationMillis); | ||
61 | + try { | ||
62 | + lockExpirationTime = future.get(); | ||
63 | + } catch (ExecutionException e) { | ||
64 | + throw new DatabaseException(e); | ||
65 | + } | ||
63 | } | 66 | } |
64 | } | 67 | } |
65 | 68 | ||
... | @@ -79,22 +82,17 @@ public class DistributedLock implements Lock { | ... | @@ -79,22 +82,17 @@ public class DistributedLock implements Lock { |
79 | @Override | 82 | @Override |
80 | public boolean tryLock( | 83 | public boolean tryLock( |
81 | long waitTimeMillis, | 84 | long waitTimeMillis, |
82 | - int leaseDurationMillis) { | 85 | + int leaseDurationMillis) throws InterruptedException { |
83 | if (tryLock(leaseDurationMillis)) { | 86 | if (tryLock(leaseDurationMillis)) { |
84 | return true; | 87 | return true; |
85 | } | 88 | } |
86 | - | ||
87 | CompletableFuture<DateTime> future = | 89 | CompletableFuture<DateTime> future = |
88 | lockManager.lockIfAvailable(this, waitTimeMillis, leaseDurationMillis); | 90 | lockManager.lockIfAvailable(this, waitTimeMillis, leaseDurationMillis); |
89 | try { | 91 | try { |
90 | lockExpirationTime = future.get(waitTimeMillis, TimeUnit.MILLISECONDS); | 92 | lockExpirationTime = future.get(waitTimeMillis, TimeUnit.MILLISECONDS); |
91 | return true; | 93 | return true; |
92 | - } catch (ExecutionException | InterruptedException e) { | 94 | + } catch (ExecutionException e) { |
93 | - log.error("Encountered an exception trying to acquire lock for " + path, e); | 95 | + throw new DatabaseException(e); |
94 | - // TODO: ExecutionException could indicate something | ||
95 | - // wrong with the backing database. | ||
96 | - // Throw an exception? | ||
97 | - return false; | ||
98 | } catch (TimeoutException e) { | 96 | } catch (TimeoutException e) { |
99 | log.debug("Timed out waiting to acquire lock for {}", path); | 97 | log.debug("Timed out waiting to acquire lock for {}", path); |
100 | return false; | 98 | return false; | ... | ... |
... | @@ -64,23 +64,22 @@ public class DistributedLockManager implements LockService { | ... | @@ -64,23 +64,22 @@ public class DistributedLockManager implements LockService { |
64 | @Activate | 64 | @Activate |
65 | public void activate() { | 65 | public void activate() { |
66 | try { | 66 | try { |
67 | - Set<String> tableNames = databaseAdminService.listTables(); | 67 | + Set<String> tables = databaseAdminService.listTables(); |
68 | - if (!tableNames.contains(ONOS_LOCK_TABLE_NAME)) { | 68 | + |
69 | + if (!tables.contains(ONOS_LOCK_TABLE_NAME)) { | ||
69 | if (databaseAdminService.createTable(ONOS_LOCK_TABLE_NAME, DEAD_LOCK_TIMEOUT_MS)) { | 70 | if (databaseAdminService.createTable(ONOS_LOCK_TABLE_NAME, DEAD_LOCK_TIMEOUT_MS)) { |
70 | log.info("Created {} table.", ONOS_LOCK_TABLE_NAME); | 71 | log.info("Created {} table.", ONOS_LOCK_TABLE_NAME); |
71 | } | 72 | } |
72 | } | 73 | } |
73 | } catch (DatabaseException e) { | 74 | } catch (DatabaseException e) { |
74 | log.error("DistributedLockManager#activate failed.", e); | 75 | log.error("DistributedLockManager#activate failed.", e); |
75 | - throw e; | ||
76 | } | 76 | } |
77 | 77 | ||
78 | - clusterCommunicator.addSubscriber( | 78 | + clusterCommunicator.addSubscriber( |
79 | - DatabaseStateMachine.DATABASE_UPDATE_EVENTS, | 79 | + DatabaseStateMachine.DATABASE_UPDATE_EVENTS, |
80 | - new LockEventMessageListener()); | 80 | + new LockEventMessageListener()); |
81 | - | ||
82 | - log.info("Started."); | ||
83 | 81 | ||
82 | + log.info("Started"); | ||
84 | } | 83 | } |
85 | 84 | ||
86 | @Deactivate | 85 | @Deactivate |
... | @@ -119,7 +118,31 @@ public class DistributedLockManager implements LockService { | ... | @@ -119,7 +118,31 @@ public class DistributedLockManager implements LockService { |
119 | long waitTimeMillis, | 118 | long waitTimeMillis, |
120 | int leaseDurationMillis) { | 119 | int leaseDurationMillis) { |
121 | CompletableFuture<DateTime> future = new CompletableFuture<>(); | 120 | CompletableFuture<DateTime> future = new CompletableFuture<>(); |
122 | - locksToAcquire.put(lock.path(), new LockRequest(lock, waitTimeMillis, leaseDurationMillis, future)); | 121 | + LockRequest request = new LockRequest( |
122 | + lock, | ||
123 | + leaseDurationMillis, | ||
124 | + DateTime.now().plusMillis(leaseDurationMillis), | ||
125 | + future); | ||
126 | + locksToAcquire.put(lock.path(), request); | ||
127 | + return future; | ||
128 | + } | ||
129 | + | ||
130 | + /** | ||
131 | + * Attempts to acquire the lock as soon as it becomes available. | ||
132 | + * @param lock lock to acquire. | ||
133 | + * @param leaseDurationMillis the duration for which to acquire the lock initially. | ||
134 | + * @return Future lease expiration date. | ||
135 | + */ | ||
136 | + protected CompletableFuture<DateTime> lockIfAvailable( | ||
137 | + Lock lock, | ||
138 | + int leaseDurationMillis) { | ||
139 | + CompletableFuture<DateTime> future = new CompletableFuture<>(); | ||
140 | + LockRequest request = new LockRequest( | ||
141 | + lock, | ||
142 | + leaseDurationMillis, | ||
143 | + DateTime.now().plusYears(100), | ||
144 | + future); | ||
145 | + locksToAcquire.put(lock.path(), request); | ||
123 | return future; | 146 | return future; |
124 | } | 147 | } |
125 | 148 | ||
... | @@ -182,11 +205,14 @@ public class DistributedLockManager implements LockService { | ... | @@ -182,11 +205,14 @@ public class DistributedLockManager implements LockService { |
182 | private final int leaseDurationMillis; | 205 | private final int leaseDurationMillis; |
183 | private final CompletableFuture<DateTime> future; | 206 | private final CompletableFuture<DateTime> future; |
184 | 207 | ||
185 | - public LockRequest(Lock lock, long waitTimeMillis, | 208 | + public LockRequest( |
186 | - int leaseDurationMillis, CompletableFuture<DateTime> future) { | 209 | + Lock lock, |
210 | + int leaseDurationMillis, | ||
211 | + DateTime requestExpirationTime, | ||
212 | + CompletableFuture<DateTime> future) { | ||
187 | 213 | ||
188 | this.lock = lock; | 214 | this.lock = lock; |
189 | - this.requestExpirationTime = DateTime.now().plusMillis((int) waitTimeMillis); | 215 | + this.requestExpirationTime = requestExpirationTime; |
190 | this.leaseDurationMillis = leaseDurationMillis; | 216 | this.leaseDurationMillis = leaseDurationMillis; |
191 | this.future = future; | 217 | this.future = future; |
192 | } | 218 | } | ... | ... |
-
Please register or login to post a comment