001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.net.unix; 019 020import java.io.Closeable; 021import java.io.EOFException; 022 023import org.apache.hadoop.classification.InterfaceAudience; 024import org.apache.hadoop.io.IOUtils; 025 026import java.io.IOException; 027import java.nio.channels.ClosedChannelException; 028import java.util.Iterator; 029import java.util.LinkedList; 030import java.util.TreeMap; 031import java.util.Map; 032import java.util.concurrent.locks.Condition; 033import java.util.concurrent.locks.ReentrantLock; 034 035import org.apache.commons.lang.SystemUtils; 036import org.apache.commons.logging.Log; 037import org.apache.commons.logging.LogFactory; 038import org.apache.hadoop.util.NativeCodeLoader; 039 040import com.google.common.annotations.VisibleForTesting; 041import com.google.common.base.Preconditions; 042import com.google.common.util.concurrent.Uninterruptibles; 043 044/** 045 * The DomainSocketWatcher watches a set of domain sockets to see when they 046 * become readable, or closed. When one of those events happens, it makes a 047 * callback. 048 * 049 * See {@link DomainSocket} for more information about UNIX domain sockets. 050 */ 051@InterfaceAudience.LimitedPrivate("HDFS") 052public final class DomainSocketWatcher implements Closeable { 053 static { 054 if (SystemUtils.IS_OS_WINDOWS) { 055 loadingFailureReason = "UNIX Domain sockets are not available on Windows."; 056 } else if (!NativeCodeLoader.isNativeCodeLoaded()) { 057 loadingFailureReason = "libhadoop cannot be loaded."; 058 } else { 059 String problem; 060 try { 061 anchorNative(); 062 problem = null; 063 } catch (Throwable t) { 064 problem = "DomainSocketWatcher#anchorNative got error: " + 065 t.getMessage(); 066 } 067 loadingFailureReason = problem; 068 } 069 } 070 071 static Log LOG = LogFactory.getLog(DomainSocketWatcher.class); 072 073 /** 074 * The reason why DomainSocketWatcher is not available, or null if it is 075 * available. 076 */ 077 private final static String loadingFailureReason; 078 079 /** 080 * Initializes the native library code. 081 */ 082 private static native void anchorNative(); 083 084 public static String getLoadingFailureReason() { 085 return loadingFailureReason; 086 } 087 088 public interface Handler { 089 /** 090 * Handles an event on a socket. An event may be the socket becoming 091 * readable, or the remote end being closed. 092 * 093 * @param sock The socket that the event occurred on. 094 * @return Whether we should close the socket. 095 */ 096 boolean handle(DomainSocket sock); 097 } 098 099 /** 100 * Handler for {DomainSocketWatcher#notificationSockets[1]} 101 */ 102 private class NotificationHandler implements Handler { 103 public boolean handle(DomainSocket sock) { 104 assert(lock.isHeldByCurrentThread()); 105 try { 106 kicked = false; 107 if (LOG.isTraceEnabled()) { 108 LOG.trace(this + ": NotificationHandler: doing a read on " + 109 sock.fd); 110 } 111 if (sock.getInputStream().read() == -1) { 112 if (LOG.isTraceEnabled()) { 113 LOG.trace(this + ": NotificationHandler: got EOF on " + sock.fd); 114 } 115 throw new EOFException(); 116 } 117 if (LOG.isTraceEnabled()) { 118 LOG.trace(this + ": NotificationHandler: read succeeded on " + 119 sock.fd); 120 } 121 return false; 122 } catch (IOException e) { 123 if (LOG.isTraceEnabled()) { 124 LOG.trace(this + ": NotificationHandler: setting closed to " + 125 "true for " + sock.fd); 126 } 127 closed = true; 128 return true; 129 } 130 } 131 } 132 133 private static class Entry { 134 final DomainSocket socket; 135 final Handler handler; 136 137 Entry(DomainSocket socket, Handler handler) { 138 this.socket = socket; 139 this.handler = handler; 140 } 141 142 DomainSocket getDomainSocket() { 143 return socket; 144 } 145 146 Handler getHandler() { 147 return handler; 148 } 149 } 150 151 /** 152 * The FdSet is a set of file descriptors that gets passed to poll(2). 153 * It contains a native memory segment, so that we don't have to copy 154 * in the poll0 function. 155 */ 156 private static class FdSet { 157 private long data; 158 159 private native static long alloc0(); 160 161 FdSet() { 162 data = alloc0(); 163 } 164 165 /** 166 * Add a file descriptor to the set. 167 * 168 * @param fd The file descriptor to add. 169 */ 170 native void add(int fd); 171 172 /** 173 * Remove a file descriptor from the set. 174 * 175 * @param fd The file descriptor to remove. 176 */ 177 native void remove(int fd); 178 179 /** 180 * Get an array containing all the FDs marked as readable. 181 * Also clear the state of all FDs. 182 * 183 * @return An array containing all of the currently readable file 184 * descriptors. 185 */ 186 native int[] getAndClearReadableFds(); 187 188 /** 189 * Close the object and de-allocate the memory used. 190 */ 191 native void close(); 192 } 193 194 /** 195 * Lock which protects toAdd, toRemove, and closed. 196 */ 197 private final ReentrantLock lock = new ReentrantLock(); 198 199 /** 200 * Condition variable which indicates that toAdd and toRemove have been 201 * processed. 202 */ 203 private final Condition processedCond = lock.newCondition(); 204 205 /** 206 * Entries to add. 207 */ 208 private final LinkedList<Entry> toAdd = 209 new LinkedList<Entry>(); 210 211 /** 212 * Entries to remove. 213 */ 214 private final TreeMap<Integer, DomainSocket> toRemove = 215 new TreeMap<Integer, DomainSocket>(); 216 217 /** 218 * Maximum length of time to go between checking whether the interrupted 219 * bit has been set for this thread. 220 */ 221 private final int interruptCheckPeriodMs; 222 223 /** 224 * A pair of sockets used to wake up the thread after it has called poll(2). 225 */ 226 private final DomainSocket notificationSockets[]; 227 228 /** 229 * Whether or not this DomainSocketWatcher is closed. 230 */ 231 private boolean closed = false; 232 233 /** 234 * True if we have written a byte to the notification socket. We should not 235 * write anything else to the socket until the notification handler has had a 236 * chance to run. Otherwise, our thread might block, causing deadlock. 237 * See HADOOP-11333 for details. 238 */ 239 private boolean kicked = false; 240 241 public DomainSocketWatcher(int interruptCheckPeriodMs, String src) 242 throws IOException { 243 if (loadingFailureReason != null) { 244 throw new UnsupportedOperationException(loadingFailureReason); 245 } 246 Preconditions.checkArgument(interruptCheckPeriodMs > 0); 247 this.interruptCheckPeriodMs = interruptCheckPeriodMs; 248 notificationSockets = DomainSocket.socketpair(); 249 watcherThread.setDaemon(true); 250 watcherThread.setName(src + " DomainSocketWatcher"); 251 watcherThread 252 .setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() { 253 @Override 254 public void uncaughtException(Thread thread, Throwable t) { 255 LOG.error(thread + " terminating on unexpected exception", t); 256 } 257 }); 258 watcherThread.start(); 259 } 260 261 /** 262 * Close the DomainSocketWatcher and wait for its thread to terminate. 263 * 264 * If there is more than one close, all but the first will be ignored. 265 */ 266 @Override 267 public void close() throws IOException { 268 lock.lock(); 269 try { 270 if (closed) return; 271 if (LOG.isDebugEnabled()) { 272 LOG.debug(this + ": closing"); 273 } 274 closed = true; 275 } finally { 276 lock.unlock(); 277 } 278 // Close notificationSockets[0], so that notificationSockets[1] gets an EOF 279 // event. This will wake up the thread immediately if it is blocked inside 280 // the select() system call. 281 notificationSockets[0].close(); 282 // Wait for the select thread to terminate. 283 Uninterruptibles.joinUninterruptibly(watcherThread); 284 } 285 286 @VisibleForTesting 287 public boolean isClosed() { 288 lock.lock(); 289 try { 290 return closed; 291 } finally { 292 lock.unlock(); 293 } 294 } 295 296 /** 297 * Add a socket. 298 * 299 * @param sock The socket to add. It is an error to re-add a socket that 300 * we are already watching. 301 * @param handler The handler to associate with this socket. This may be 302 * called any time after this function is called. 303 */ 304 public void add(DomainSocket sock, Handler handler) { 305 lock.lock(); 306 try { 307 if (closed) { 308 handler.handle(sock); 309 IOUtils.cleanup(LOG, sock); 310 return; 311 } 312 Entry entry = new Entry(sock, handler); 313 try { 314 sock.refCount.reference(); 315 } catch (ClosedChannelException e1) { 316 // If the socket is already closed before we add it, invoke the 317 // handler immediately. Then we're done. 318 handler.handle(sock); 319 return; 320 } 321 toAdd.add(entry); 322 kick(); 323 while (true) { 324 processedCond.awaitUninterruptibly(); 325 if (!toAdd.contains(entry)) { 326 break; 327 } 328 } 329 } finally { 330 lock.unlock(); 331 } 332 } 333 334 /** 335 * Remove a socket. Its handler will be called. 336 * 337 * @param sock The socket to remove. 338 */ 339 public void remove(DomainSocket sock) { 340 lock.lock(); 341 try { 342 if (closed) return; 343 toRemove.put(sock.fd, sock); 344 kick(); 345 while (true) { 346 processedCond.awaitUninterruptibly(); 347 if (!toRemove.containsKey(sock.fd)) { 348 break; 349 } 350 } 351 } finally { 352 lock.unlock(); 353 } 354 } 355 356 /** 357 * Wake up the DomainSocketWatcher thread. 358 */ 359 private void kick() { 360 assert(lock.isHeldByCurrentThread()); 361 362 if (kicked) { 363 return; 364 } 365 366 try { 367 notificationSockets[0].getOutputStream().write(0); 368 kicked = true; 369 } catch (IOException e) { 370 if (!closed) { 371 LOG.error(this + ": error writing to notificationSockets[0]", e); 372 } 373 } 374 } 375 376 /** 377 * Send callback and return whether or not the domain socket was closed as a 378 * result of processing. 379 * 380 * @param caller reason for call 381 * @param entries mapping of file descriptor to entry 382 * @param fdSet set of file descriptors 383 * @param fd file descriptor 384 * @return true if the domain socket was closed as a result of processing 385 */ 386 private boolean sendCallback(String caller, TreeMap<Integer, Entry> entries, 387 FdSet fdSet, int fd) { 388 if (LOG.isTraceEnabled()) { 389 LOG.trace(this + ": " + caller + " starting sendCallback for fd " + fd); 390 } 391 Entry entry = entries.get(fd); 392 Preconditions.checkNotNull(entry, 393 this + ": fdSet contained " + fd + ", which we were " + 394 "not tracking."); 395 DomainSocket sock = entry.getDomainSocket(); 396 if (entry.getHandler().handle(sock)) { 397 if (LOG.isTraceEnabled()) { 398 LOG.trace(this + ": " + caller + ": closing fd " + fd + 399 " at the request of the handler."); 400 } 401 if (toRemove.remove(fd) != null) { 402 if (LOG.isTraceEnabled()) { 403 LOG.trace(this + ": " + caller + " : sendCallback processed fd " + 404 fd + " in toRemove."); 405 } 406 } 407 try { 408 sock.refCount.unreferenceCheckClosed(); 409 } catch (IOException e) { 410 Preconditions.checkArgument(false, 411 this + ": file descriptor " + sock.fd + " was closed while " + 412 "still in the poll(2) loop."); 413 } 414 IOUtils.cleanup(LOG, sock); 415 fdSet.remove(fd); 416 return true; 417 } else { 418 if (LOG.isTraceEnabled()) { 419 LOG.trace(this + ": " + caller + ": sendCallback not " + 420 "closing fd " + fd); 421 } 422 return false; 423 } 424 } 425 426 /** 427 * Send callback, and if the domain socket was closed as a result of 428 * processing, then also remove the entry for the file descriptor. 429 * 430 * @param caller reason for call 431 * @param entries mapping of file descriptor to entry 432 * @param fdSet set of file descriptors 433 * @param fd file descriptor 434 */ 435 private void sendCallbackAndRemove(String caller, 436 TreeMap<Integer, Entry> entries, FdSet fdSet, int fd) { 437 if (sendCallback(caller, entries, fdSet, fd)) { 438 entries.remove(fd); 439 } 440 } 441 442 @VisibleForTesting 443 final Thread watcherThread = new Thread(new Runnable() { 444 @Override 445 public void run() { 446 if (LOG.isDebugEnabled()) { 447 LOG.debug(this + ": starting with interruptCheckPeriodMs = " + 448 interruptCheckPeriodMs); 449 } 450 final TreeMap<Integer, Entry> entries = new TreeMap<Integer, Entry>(); 451 FdSet fdSet = new FdSet(); 452 addNotificationSocket(entries, fdSet); 453 try { 454 while (true) { 455 lock.lock(); 456 try { 457 for (int fd : fdSet.getAndClearReadableFds()) { 458 sendCallbackAndRemove("getAndClearReadableFds", entries, fdSet, 459 fd); 460 } 461 if (!(toAdd.isEmpty() && toRemove.isEmpty())) { 462 // Handle pending additions (before pending removes). 463 for (Iterator<Entry> iter = toAdd.iterator(); iter.hasNext(); ) { 464 Entry entry = iter.next(); 465 DomainSocket sock = entry.getDomainSocket(); 466 Entry prevEntry = entries.put(sock.fd, entry); 467 Preconditions.checkState(prevEntry == null, 468 this + ": tried to watch a file descriptor that we " + 469 "were already watching: " + sock); 470 if (LOG.isTraceEnabled()) { 471 LOG.trace(this + ": adding fd " + sock.fd); 472 } 473 fdSet.add(sock.fd); 474 iter.remove(); 475 } 476 // Handle pending removals 477 while (true) { 478 Map.Entry<Integer, DomainSocket> entry = toRemove.firstEntry(); 479 if (entry == null) break; 480 sendCallbackAndRemove("handlePendingRemovals", 481 entries, fdSet, entry.getValue().fd); 482 } 483 processedCond.signalAll(); 484 } 485 // Check if the thread should terminate. Doing this check now is 486 // easier than at the beginning of the loop, since we know toAdd and 487 // toRemove are now empty and processedCond has been notified if it 488 // needed to be. 489 if (closed) { 490 if (LOG.isDebugEnabled()) { 491 LOG.debug(toString() + " thread terminating."); 492 } 493 return; 494 } 495 // Check if someone sent our thread an InterruptedException while we 496 // were waiting in poll(). 497 if (Thread.interrupted()) { 498 throw new InterruptedException(); 499 } 500 } finally { 501 lock.unlock(); 502 } 503 doPoll0(interruptCheckPeriodMs, fdSet); 504 } 505 } catch (InterruptedException e) { 506 LOG.info(toString() + " terminating on InterruptedException"); 507 } catch (Throwable e) { 508 LOG.error(toString() + " terminating on exception", e); 509 } finally { 510 lock.lock(); 511 try { 512 kick(); // allow the handler for notificationSockets[0] to read a byte 513 for (Entry entry : entries.values()) { 514 // We do not remove from entries as we iterate, because that can 515 // cause a ConcurrentModificationException. 516 sendCallback("close", entries, fdSet, entry.getDomainSocket().fd); 517 } 518 entries.clear(); 519 fdSet.close(); 520 } finally { 521 lock.unlock(); 522 } 523 } 524 } 525 }); 526 527 private void addNotificationSocket(final TreeMap<Integer, Entry> entries, 528 FdSet fdSet) { 529 entries.put(notificationSockets[1].fd, 530 new Entry(notificationSockets[1], new NotificationHandler())); 531 try { 532 notificationSockets[1].refCount.reference(); 533 } catch (IOException e) { 534 throw new RuntimeException(e); 535 } 536 fdSet.add(notificationSockets[1].fd); 537 if (LOG.isTraceEnabled()) { 538 LOG.trace(this + ": adding notificationSocket " + 539 notificationSockets[1].fd + ", connected to " + 540 notificationSockets[0].fd); 541 } 542 } 543 544 public String toString() { 545 return "DomainSocketWatcher(" + System.identityHashCode(this) + ")"; 546 } 547 548 private static native int doPoll0(int maxWaitMs, FdSet readFds) 549 throws IOException; 550}