001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.net; 019 020import java.util.ArrayList; 021import java.util.List; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.List; 025import java.util.Random; 026import java.util.TreeMap; 027import java.util.concurrent.locks.ReadWriteLock; 028import java.util.concurrent.locks.ReentrantReadWriteLock; 029 030import com.google.common.annotations.VisibleForTesting; 031import org.apache.commons.logging.Log; 032import org.apache.commons.logging.LogFactory; 033import org.apache.hadoop.classification.InterfaceAudience; 034import org.apache.hadoop.classification.InterfaceStability; 035import org.apache.hadoop.conf.Configuration; 036import org.apache.hadoop.fs.CommonConfigurationKeysPublic; 037import org.apache.hadoop.util.ReflectionUtils; 038 039import com.google.common.base.Preconditions; 040import com.google.common.collect.Lists; 041 042/** The class represents a cluster of computer with a tree hierarchical 043 * network topology. 044 * For example, a cluster may be consists of many data centers filled 045 * with racks of computers. 046 * In a network topology, leaves represent data nodes (computers) and inner 047 * nodes represent switches/routers that manage traffic in/out of data centers 048 * or racks. 049 * 050 */ 051@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 052@InterfaceStability.Unstable 053public class NetworkTopology { 054 public final static String DEFAULT_RACK = "/default-rack"; 055 public final static int DEFAULT_HOST_LEVEL = 2; 056 public static final Log LOG = 057 LogFactory.getLog(NetworkTopology.class); 058 059 public static class InvalidTopologyException extends RuntimeException { 060 private static final long serialVersionUID = 1L; 061 public InvalidTopologyException(String msg) { 062 super(msg); 063 } 064 } 065 066 /** 067 * Get an instance of NetworkTopology based on the value of the configuration 068 * parameter net.topology.impl. 069 * 070 * @param conf the configuration to be used 071 * @return an instance of NetworkTopology 072 */ 073 public static NetworkTopology getInstance(Configuration conf){ 074 return ReflectionUtils.newInstance( 075 conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY, 076 NetworkTopology.class, NetworkTopology.class), conf); 077 } 078 079 /** InnerNode represents a switch/router of a data center or rack. 080 * Different from a leaf node, it has non-null children. 081 */ 082 static class InnerNode extends NodeBase { 083 protected List<Node> children=new ArrayList<Node>(); 084 private int numOfLeaves; 085 086 /** Construct an InnerNode from a path-like string */ 087 InnerNode(String path) { 088 super(path); 089 } 090 091 /** Construct an InnerNode from its name and its network location */ 092 InnerNode(String name, String location) { 093 super(name, location); 094 } 095 096 /** Construct an InnerNode 097 * from its name, its network location, its parent, and its level */ 098 InnerNode(String name, String location, InnerNode parent, int level) { 099 super(name, location, parent, level); 100 } 101 102 /** @return its children */ 103 List<Node> getChildren() {return children;} 104 105 /** @return the number of children this node has */ 106 int getNumOfChildren() { 107 return children.size(); 108 } 109 110 /** Judge if this node represents a rack 111 * @return true if it has no child or its children are not InnerNodes 112 */ 113 boolean isRack() { 114 if (children.isEmpty()) { 115 return true; 116 } 117 118 Node firstChild = children.get(0); 119 if (firstChild instanceof InnerNode) { 120 return false; 121 } 122 123 return true; 124 } 125 126 /** Judge if this node is an ancestor of node <i>n</i> 127 * 128 * @param n a node 129 * @return true if this node is an ancestor of <i>n</i> 130 */ 131 boolean isAncestor(Node n) { 132 return getPath(this).equals(NodeBase.PATH_SEPARATOR_STR) || 133 (n.getNetworkLocation()+NodeBase.PATH_SEPARATOR_STR). 134 startsWith(getPath(this)+NodeBase.PATH_SEPARATOR_STR); 135 } 136 137 /** Judge if this node is the parent of node <i>n</i> 138 * 139 * @param n a node 140 * @return true if this node is the parent of <i>n</i> 141 */ 142 boolean isParent(Node n) { 143 return n.getNetworkLocation().equals(getPath(this)); 144 } 145 146 /* Return a child name of this node who is an ancestor of node <i>n</i> */ 147 private String getNextAncestorName(Node n) { 148 if (!isAncestor(n)) { 149 throw new IllegalArgumentException( 150 this + "is not an ancestor of " + n); 151 } 152 String name = n.getNetworkLocation().substring(getPath(this).length()); 153 if (name.charAt(0) == PATH_SEPARATOR) { 154 name = name.substring(1); 155 } 156 int index=name.indexOf(PATH_SEPARATOR); 157 if (index !=-1) 158 name = name.substring(0, index); 159 return name; 160 } 161 162 /** Add node <i>n</i> to the subtree of this node 163 * @param n node to be added 164 * @return true if the node is added; false otherwise 165 */ 166 boolean add(Node n) { 167 if (!isAncestor(n)) 168 throw new IllegalArgumentException(n.getName()+", which is located at " 169 +n.getNetworkLocation()+", is not a decendent of " 170 +getPath(this)); 171 if (isParent(n)) { 172 // this node is the parent of n; add n directly 173 n.setParent(this); 174 n.setLevel(this.level+1); 175 for(int i=0; i<children.size(); i++) { 176 if (children.get(i).getName().equals(n.getName())) { 177 children.set(i, n); 178 return false; 179 } 180 } 181 children.add(n); 182 numOfLeaves++; 183 return true; 184 } else { 185 // find the next ancestor node 186 String parentName = getNextAncestorName(n); 187 InnerNode parentNode = null; 188 for(int i=0; i<children.size(); i++) { 189 if (children.get(i).getName().equals(parentName)) { 190 parentNode = (InnerNode)children.get(i); 191 break; 192 } 193 } 194 if (parentNode == null) { 195 // create a new InnerNode 196 parentNode = createParentNode(parentName); 197 children.add(parentNode); 198 } 199 // add n to the subtree of the next ancestor node 200 if (parentNode.add(n)) { 201 numOfLeaves++; 202 return true; 203 } else { 204 return false; 205 } 206 } 207 } 208 209 /** 210 * Creates a parent node to be added to the list of children. 211 * Creates a node using the InnerNode four argument constructor specifying 212 * the name, location, parent, and level of this node. 213 * 214 * <p>To be overridden in subclasses for specific InnerNode implementations, 215 * as alternative to overriding the full {@link #add(Node)} method. 216 * 217 * @param parentName The name of the parent node 218 * @return A new inner node 219 * @see InnerNode#InnerNode(String, String, InnerNode, int) 220 */ 221 protected InnerNode createParentNode(String parentName) { 222 return new InnerNode(parentName, getPath(this), this, this.getLevel()+1); 223 } 224 225 /** Remove node <i>n</i> from the subtree of this node 226 * @param n node to be deleted 227 * @return true if the node is deleted; false otherwise 228 */ 229 boolean remove(Node n) { 230 String parent = n.getNetworkLocation(); 231 String currentPath = getPath(this); 232 if (!isAncestor(n)) 233 throw new IllegalArgumentException(n.getName() 234 +", which is located at " 235 +parent+", is not a descendent of "+currentPath); 236 if (isParent(n)) { 237 // this node is the parent of n; remove n directly 238 for(int i=0; i<children.size(); i++) { 239 if (children.get(i).getName().equals(n.getName())) { 240 children.remove(i); 241 numOfLeaves--; 242 n.setParent(null); 243 return true; 244 } 245 } 246 return false; 247 } else { 248 // find the next ancestor node: the parent node 249 String parentName = getNextAncestorName(n); 250 InnerNode parentNode = null; 251 int i; 252 for(i=0; i<children.size(); i++) { 253 if (children.get(i).getName().equals(parentName)) { 254 parentNode = (InnerNode)children.get(i); 255 break; 256 } 257 } 258 if (parentNode==null) { 259 return false; 260 } 261 // remove n from the parent node 262 boolean isRemoved = parentNode.remove(n); 263 // if the parent node has no children, remove the parent node too 264 if (isRemoved) { 265 if (parentNode.getNumOfChildren() == 0) { 266 children.remove(i); 267 } 268 numOfLeaves--; 269 } 270 return isRemoved; 271 } 272 } // end of remove 273 274 /** Given a node's string representation, return a reference to the node 275 * @param loc string location of the form /rack/node 276 * @return null if the node is not found or the childnode is there but 277 * not an instance of {@link InnerNode} 278 */ 279 private Node getLoc(String loc) { 280 if (loc == null || loc.length() == 0) return this; 281 282 String[] path = loc.split(PATH_SEPARATOR_STR, 2); 283 Node childnode = null; 284 for(int i=0; i<children.size(); i++) { 285 if (children.get(i).getName().equals(path[0])) { 286 childnode = children.get(i); 287 } 288 } 289 if (childnode == null) return null; // non-existing node 290 if (path.length == 1) return childnode; 291 if (childnode instanceof InnerNode) { 292 return ((InnerNode)childnode).getLoc(path[1]); 293 } else { 294 return null; 295 } 296 } 297 298 /** get <i>leafIndex</i> leaf of this subtree 299 * if it is not in the <i>excludedNode</i> 300 * 301 * @param leafIndex an indexed leaf of the node 302 * @param excludedNode an excluded node (can be null) 303 * @return 304 */ 305 Node getLeaf(int leafIndex, Node excludedNode) { 306 int count=0; 307 // check if the excluded node a leaf 308 boolean isLeaf = 309 excludedNode == null || !(excludedNode instanceof InnerNode); 310 // calculate the total number of excluded leaf nodes 311 int numOfExcludedLeaves = 312 isLeaf ? 1 : ((InnerNode)excludedNode).getNumOfLeaves(); 313 if (isLeafParent()) { // children are leaves 314 if (isLeaf) { // excluded node is a leaf node 315 int excludedIndex = children.indexOf(excludedNode); 316 if (excludedIndex != -1 && leafIndex >= 0) { 317 // excluded node is one of the children so adjust the leaf index 318 leafIndex = leafIndex>=excludedIndex ? leafIndex+1 : leafIndex; 319 } 320 } 321 // range check 322 if (leafIndex<0 || leafIndex>=this.getNumOfChildren()) { 323 return null; 324 } 325 return children.get(leafIndex); 326 } else { 327 for(int i=0; i<children.size(); i++) { 328 InnerNode child = (InnerNode)children.get(i); 329 if (excludedNode == null || excludedNode != child) { 330 // not the excludedNode 331 int numOfLeaves = child.getNumOfLeaves(); 332 if (excludedNode != null && child.isAncestor(excludedNode)) { 333 numOfLeaves -= numOfExcludedLeaves; 334 } 335 if (count+numOfLeaves > leafIndex) { 336 // the leaf is in the child subtree 337 return child.getLeaf(leafIndex-count, excludedNode); 338 } else { 339 // go to the next child 340 count = count+numOfLeaves; 341 } 342 } else { // it is the excluededNode 343 // skip it and set the excludedNode to be null 344 excludedNode = null; 345 } 346 } 347 return null; 348 } 349 } 350 351 protected boolean isLeafParent() { 352 return isRack(); 353 } 354 355 /** 356 * Determine if children a leaves, default implementation calls {@link #isRack()} 357 * <p>To be overridden in subclasses for specific InnerNode implementations, 358 * as alternative to overriding the full {@link #getLeaf(int, Node)} method. 359 * 360 * @return true if children are leaves, false otherwise 361 */ 362 protected boolean areChildrenLeaves() { 363 return isRack(); 364 } 365 366 /** 367 * Get number of leaves. 368 */ 369 int getNumOfLeaves() { 370 return numOfLeaves; 371 } 372 } // end of InnerNode 373 374 /** 375 * the root cluster map 376 */ 377 InnerNode clusterMap; 378 /** Depth of all leaf nodes */ 379 private int depthOfAllLeaves = -1; 380 /** rack counter */ 381 protected int numOfRacks = 0; 382 383 /** 384 * Whether or not this cluster has ever consisted of more than 1 rack, 385 * according to the NetworkTopology. 386 */ 387 private boolean clusterEverBeenMultiRack = false; 388 389 /** the lock used to manage access */ 390 protected ReadWriteLock netlock = new ReentrantReadWriteLock(); 391 392 public NetworkTopology() { 393 clusterMap = new InnerNode(InnerNode.ROOT); 394 } 395 396 /** Add a leaf node 397 * Update node counter & rack counter if necessary 398 * @param node node to be added; can be null 399 * @exception IllegalArgumentException if add a node to a leave 400 or node to be added is not a leaf 401 */ 402 public void add(Node node) { 403 if (node==null) return; 404 String oldTopoStr = this.toString(); 405 if( node instanceof InnerNode ) { 406 throw new IllegalArgumentException( 407 "Not allow to add an inner node: "+NodeBase.getPath(node)); 408 } 409 int newDepth = NodeBase.locationToDepth(node.getNetworkLocation()) + 1; 410 netlock.writeLock().lock(); 411 try { 412 if ((depthOfAllLeaves != -1) && (depthOfAllLeaves != newDepth)) { 413 LOG.error("Error: can't add leaf node " + NodeBase.getPath(node) + 414 " at depth " + newDepth + " to topology:\n" + oldTopoStr); 415 throw new InvalidTopologyException("Failed to add " + NodeBase.getPath(node) + 416 ": You cannot have a rack and a non-rack node at the same " + 417 "level of the network topology."); 418 } 419 Node rack = getNodeForNetworkLocation(node); 420 if (rack != null && !(rack instanceof InnerNode)) { 421 throw new IllegalArgumentException("Unexpected data node " 422 + node.toString() 423 + " at an illegal network location"); 424 } 425 if (clusterMap.add(node)) { 426 LOG.info("Adding a new node: "+NodeBase.getPath(node)); 427 if (rack == null) { 428 incrementRacks(); 429 } 430 if (!(node instanceof InnerNode)) { 431 if (depthOfAllLeaves == -1) { 432 depthOfAllLeaves = node.getLevel(); 433 } 434 } 435 } 436 if(LOG.isDebugEnabled()) { 437 LOG.debug("NetworkTopology became:\n" + this.toString()); 438 } 439 } finally { 440 netlock.writeLock().unlock(); 441 } 442 } 443 444 protected void incrementRacks() { 445 numOfRacks++; 446 if (!clusterEverBeenMultiRack && numOfRacks > 1) { 447 clusterEverBeenMultiRack = true; 448 } 449 } 450 451 /** 452 * Return a reference to the node given its string representation. 453 * Default implementation delegates to {@link #getNode(String)}. 454 * 455 * <p>To be overridden in subclasses for specific NetworkTopology 456 * implementations, as alternative to overriding the full {@link #add(Node)} 457 * method. 458 * 459 * @param node The string representation of this node's network location is 460 * used to retrieve a Node object. 461 * @return a reference to the node; null if the node is not in the tree 462 * 463 * @see #add(Node) 464 * @see #getNode(String) 465 */ 466 protected Node getNodeForNetworkLocation(Node node) { 467 return getNode(node.getNetworkLocation()); 468 } 469 470 /** 471 * Given a string representation of a rack, return its children 472 * @param loc a path-like string representation of a rack 473 * @return a newly allocated list with all the node's children 474 */ 475 public List<Node> getDatanodesInRack(String loc) { 476 netlock.readLock().lock(); 477 try { 478 loc = NodeBase.normalize(loc); 479 if (!NodeBase.ROOT.equals(loc)) { 480 loc = loc.substring(1); 481 } 482 InnerNode rack = (InnerNode) clusterMap.getLoc(loc); 483 if (rack == null) { 484 return null; 485 } 486 return new ArrayList<Node>(rack.getChildren()); 487 } finally { 488 netlock.readLock().unlock(); 489 } 490 } 491 492 /** Remove a node 493 * Update node counter and rack counter if necessary 494 * @param node node to be removed; can be null 495 */ 496 public void remove(Node node) { 497 if (node==null) return; 498 if( node instanceof InnerNode ) { 499 throw new IllegalArgumentException( 500 "Not allow to remove an inner node: "+NodeBase.getPath(node)); 501 } 502 LOG.info("Removing a node: "+NodeBase.getPath(node)); 503 netlock.writeLock().lock(); 504 try { 505 if (clusterMap.remove(node)) { 506 InnerNode rack = (InnerNode)getNode(node.getNetworkLocation()); 507 if (rack == null) { 508 numOfRacks--; 509 } 510 } 511 if(LOG.isDebugEnabled()) { 512 LOG.debug("NetworkTopology became:\n" + this.toString()); 513 } 514 } finally { 515 netlock.writeLock().unlock(); 516 } 517 } 518 519 /** Check if the tree contains node <i>node</i> 520 * 521 * @param node a node 522 * @return true if <i>node</i> is already in the tree; false otherwise 523 */ 524 public boolean contains(Node node) { 525 if (node == null) return false; 526 netlock.readLock().lock(); 527 try { 528 Node parent = node.getParent(); 529 for (int level = node.getLevel(); parent != null && level > 0; 530 parent = parent.getParent(), level--) { 531 if (parent == clusterMap) { 532 return true; 533 } 534 } 535 } finally { 536 netlock.readLock().unlock(); 537 } 538 return false; 539 } 540 541 /** Given a string representation of a node, return its reference 542 * 543 * @param loc 544 * a path-like string representation of a node 545 * @return a reference to the node; null if the node is not in the tree 546 */ 547 public Node getNode(String loc) { 548 netlock.readLock().lock(); 549 try { 550 loc = NodeBase.normalize(loc); 551 if (!NodeBase.ROOT.equals(loc)) 552 loc = loc.substring(1); 553 return clusterMap.getLoc(loc); 554 } finally { 555 netlock.readLock().unlock(); 556 } 557 } 558 559 /** 560 * @return true if this cluster has ever consisted of multiple racks, even if 561 * it is not now a multi-rack cluster. 562 */ 563 public boolean hasClusterEverBeenMultiRack() { 564 return clusterEverBeenMultiRack; 565 } 566 567 /** Given a string representation of a rack for a specific network 568 * location 569 * 570 * To be overridden in subclasses for specific NetworkTopology 571 * implementations, as alternative to overriding the full 572 * {@link #getRack(String)} method. 573 * @param loc 574 * a path-like string representation of a network location 575 * @return a rack string 576 */ 577 public String getRack(String loc) { 578 return loc; 579 } 580 581 /** @return the total number of racks */ 582 public int getNumOfRacks() { 583 netlock.readLock().lock(); 584 try { 585 return numOfRacks; 586 } finally { 587 netlock.readLock().unlock(); 588 } 589 } 590 591 /** @return the total number of leaf nodes */ 592 public int getNumOfLeaves() { 593 netlock.readLock().lock(); 594 try { 595 return clusterMap.getNumOfLeaves(); 596 } finally { 597 netlock.readLock().unlock(); 598 } 599 } 600 601 /** Return the distance between two nodes 602 * It is assumed that the distance from one node to its parent is 1 603 * The distance between two nodes is calculated by summing up their distances 604 * to their closest common ancestor. 605 * @param node1 one node 606 * @param node2 another node 607 * @return the distance between node1 and node2 which is zero if they are the same 608 * or {@link Integer#MAX_VALUE} if node1 or node2 do not belong to the cluster 609 */ 610 public int getDistance(Node node1, Node node2) { 611 if (node1 == node2) { 612 return 0; 613 } 614 Node n1=node1, n2=node2; 615 int dis = 0; 616 netlock.readLock().lock(); 617 try { 618 int level1=node1.getLevel(), level2=node2.getLevel(); 619 while(n1!=null && level1>level2) { 620 n1 = n1.getParent(); 621 level1--; 622 dis++; 623 } 624 while(n2!=null && level2>level1) { 625 n2 = n2.getParent(); 626 level2--; 627 dis++; 628 } 629 while(n1!=null && n2!=null && n1.getParent()!=n2.getParent()) { 630 n1=n1.getParent(); 631 n2=n2.getParent(); 632 dis+=2; 633 } 634 } finally { 635 netlock.readLock().unlock(); 636 } 637 if (n1==null) { 638 LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node1)); 639 return Integer.MAX_VALUE; 640 } 641 if (n2==null) { 642 LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node2)); 643 return Integer.MAX_VALUE; 644 } 645 return dis+2; 646 } 647 648 /** Check if two nodes are on the same rack 649 * @param node1 one node (can be null) 650 * @param node2 another node (can be null) 651 * @return true if node1 and node2 are on the same rack; false otherwise 652 * @exception IllegalArgumentException when either node1 or node2 is null, or 653 * node1 or node2 do not belong to the cluster 654 */ 655 public boolean isOnSameRack( Node node1, Node node2) { 656 if (node1 == null || node2 == null) { 657 return false; 658 } 659 660 netlock.readLock().lock(); 661 try { 662 return isSameParents(node1, node2); 663 } finally { 664 netlock.readLock().unlock(); 665 } 666 } 667 668 /** 669 * Check if network topology is aware of NodeGroup 670 */ 671 public boolean isNodeGroupAware() { 672 return false; 673 } 674 675 /** 676 * Return false directly as not aware of NodeGroup, to be override in sub-class 677 */ 678 public boolean isOnSameNodeGroup(Node node1, Node node2) { 679 return false; 680 } 681 682 /** 683 * Compare the parents of each node for equality 684 * 685 * <p>To be overridden in subclasses for specific NetworkTopology 686 * implementations, as alternative to overriding the full 687 * {@link #isOnSameRack(Node, Node)} method. 688 * 689 * @param node1 the first node to compare 690 * @param node2 the second node to compare 691 * @return true if their parents are equal, false otherwise 692 * 693 * @see #isOnSameRack(Node, Node) 694 */ 695 protected boolean isSameParents(Node node1, Node node2) { 696 return node1.getParent()==node2.getParent(); 697 } 698 699 private static final Random r = new Random(); 700 701 @VisibleForTesting 702 void setRandomSeed(long seed) { 703 r.setSeed(seed); 704 } 705 706 /** randomly choose one node from <i>scope</i> 707 * if scope starts with ~, choose one from the all nodes except for the 708 * ones in <i>scope</i>; otherwise, choose one from <i>scope</i> 709 * @param scope range of nodes from which a node will be chosen 710 * @return the chosen node 711 */ 712 public Node chooseRandom(String scope) { 713 netlock.readLock().lock(); 714 try { 715 if (scope.startsWith("~")) { 716 return chooseRandom(NodeBase.ROOT, scope.substring(1)); 717 } else { 718 return chooseRandom(scope, null); 719 } 720 } finally { 721 netlock.readLock().unlock(); 722 } 723 } 724 725 private Node chooseRandom(String scope, String excludedScope){ 726 if (excludedScope != null) { 727 if (scope.startsWith(excludedScope)) { 728 return null; 729 } 730 if (!excludedScope.startsWith(scope)) { 731 excludedScope = null; 732 } 733 } 734 Node node = getNode(scope); 735 if (!(node instanceof InnerNode)) { 736 return node; 737 } 738 InnerNode innerNode = (InnerNode)node; 739 int numOfDatanodes = innerNode.getNumOfLeaves(); 740 if (excludedScope == null) { 741 node = null; 742 } else { 743 node = getNode(excludedScope); 744 if (!(node instanceof InnerNode)) { 745 numOfDatanodes -= 1; 746 } else { 747 numOfDatanodes -= ((InnerNode)node).getNumOfLeaves(); 748 } 749 } 750 if (numOfDatanodes == 0) { 751 throw new InvalidTopologyException( 752 "Failed to find datanode (scope=\"" + String.valueOf(scope) + 753 "\" excludedScope=\"" + String.valueOf(excludedScope) + "\")."); 754 } 755 int leaveIndex = r.nextInt(numOfDatanodes); 756 return innerNode.getLeaf(leaveIndex, node); 757 } 758 759 /** return leaves in <i>scope</i> 760 * @param scope a path string 761 * @return leaves nodes under specific scope 762 */ 763 public List<Node> getLeaves(String scope) { 764 Node node = getNode(scope); 765 List<Node> leafNodes = new ArrayList<Node>(); 766 if (!(node instanceof InnerNode)) { 767 leafNodes.add(node); 768 } else { 769 InnerNode innerNode = (InnerNode) node; 770 for (int i=0;i<innerNode.getNumOfLeaves();i++) { 771 leafNodes.add(innerNode.getLeaf(i, null)); 772 } 773 } 774 return leafNodes; 775 } 776 777 /** return the number of leaves in <i>scope</i> but not in <i>excludedNodes</i> 778 * if scope starts with ~, return the number of nodes that are not 779 * in <i>scope</i> and <i>excludedNodes</i>; 780 * @param scope a path string that may start with ~ 781 * @param excludedNodes a list of nodes 782 * @return number of available nodes 783 */ 784 public int countNumOfAvailableNodes(String scope, 785 Collection<Node> excludedNodes) { 786 boolean isExcluded=false; 787 if (scope.startsWith("~")) { 788 isExcluded=true; 789 scope=scope.substring(1); 790 } 791 scope = NodeBase.normalize(scope); 792 int excludedCountInScope = 0; // the number of nodes in both scope & excludedNodes 793 int excludedCountOffScope = 0; // the number of nodes outside scope & excludedNodes 794 netlock.readLock().lock(); 795 try { 796 for (Node node : excludedNodes) { 797 node = getNode(NodeBase.getPath(node)); 798 if (node == null) { 799 continue; 800 } 801 if ((NodeBase.getPath(node) + NodeBase.PATH_SEPARATOR_STR) 802 .startsWith(scope + NodeBase.PATH_SEPARATOR_STR)) { 803 excludedCountInScope++; 804 } else { 805 excludedCountOffScope++; 806 } 807 } 808 Node n = getNode(scope); 809 int scopeNodeCount = 0; 810 if (n != null) { 811 scopeNodeCount++; 812 } 813 if (n instanceof InnerNode) { 814 scopeNodeCount=((InnerNode)n).getNumOfLeaves(); 815 } 816 if (isExcluded) { 817 return clusterMap.getNumOfLeaves() - scopeNodeCount 818 - excludedCountOffScope; 819 } else { 820 return scopeNodeCount - excludedCountInScope; 821 } 822 } finally { 823 netlock.readLock().unlock(); 824 } 825 } 826 827 /** convert a network tree to a string */ 828 @Override 829 public String toString() { 830 // print the number of racks 831 StringBuilder tree = new StringBuilder(); 832 tree.append("Number of racks: "); 833 tree.append(numOfRacks); 834 tree.append("\n"); 835 // print the number of leaves 836 int numOfLeaves = getNumOfLeaves(); 837 tree.append("Expected number of leaves:"); 838 tree.append(numOfLeaves); 839 tree.append("\n"); 840 // print nodes 841 for(int i=0; i<numOfLeaves; i++) { 842 tree.append(NodeBase.getPath(clusterMap.getLeaf(i, null))); 843 tree.append("\n"); 844 } 845 return tree.toString(); 846 } 847 848 /** 849 * Divide networklocation string into two parts by last separator, and get 850 * the first part here. 851 * 852 * @param networkLocation 853 * @return 854 */ 855 public static String getFirstHalf(String networkLocation) { 856 int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR); 857 return networkLocation.substring(0, index); 858 } 859 860 /** 861 * Divide networklocation string into two parts by last separator, and get 862 * the second part here. 863 * 864 * @param networkLocation 865 * @return 866 */ 867 public static String getLastHalf(String networkLocation) { 868 int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR); 869 return networkLocation.substring(index); 870 } 871 872 /** 873 * Returns an integer weight which specifies how far away {node} is away from 874 * {reader}. A lower value signifies that a node is closer. 875 * 876 * @param reader Node where data will be read 877 * @param node Replica of data 878 * @return weight 879 */ 880 protected int getWeight(Node reader, Node node) { 881 // 0 is local, 1 is same rack, 2 is off rack 882 // Start off by initializing to off rack 883 int weight = 2; 884 if (reader != null) { 885 if (reader == node) { 886 weight = 0; 887 } else if (isOnSameRack(reader, node)) { 888 weight = 1; 889 } 890 } 891 return weight; 892 } 893 894 /** 895 * Sort nodes array by network distance to <i>reader</i>. 896 * <p/> 897 * In a three-level topology, a node can be either local, on the same rack, 898 * or on a different rack from the reader. Sorting the nodes based on network 899 * distance from the reader reduces network traffic and improves 900 * performance. 901 * <p/> 902 * As an additional twist, we also randomize the nodes at each network 903 * distance. This helps with load balancing when there is data skew. 904 * 905 * @param reader Node where data will be read 906 * @param nodes Available replicas with the requested data 907 * @param activeLen Number of active nodes at the front of the array 908 */ 909 public void sortByDistance(Node reader, Node[] nodes, int activeLen) { 910 /** Sort weights for the nodes array */ 911 int[] weights = new int[activeLen]; 912 for (int i=0; i<activeLen; i++) { 913 weights[i] = getWeight(reader, nodes[i]); 914 } 915 // Add weight/node pairs to a TreeMap to sort 916 TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>(); 917 for (int i=0; i<activeLen; i++) { 918 int weight = weights[i]; 919 Node node = nodes[i]; 920 List<Node> list = tree.get(weight); 921 if (list == null) { 922 list = Lists.newArrayListWithExpectedSize(1); 923 tree.put(weight, list); 924 } 925 list.add(node); 926 } 927 928 int idx = 0; 929 for (List<Node> list: tree.values()) { 930 if (list != null) { 931 Collections.shuffle(list, r); 932 for (Node n: list) { 933 nodes[idx] = n; 934 idx++; 935 } 936 } 937 } 938 Preconditions.checkState(idx == activeLen, 939 "Sorted the wrong number of nodes!"); 940 } 941}