001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.net; 019 020import java.util.ArrayList; 021import java.util.HashMap; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.List; 025import java.util.Map; 026import java.util.Random; 027import java.util.TreeMap; 028import java.util.concurrent.locks.ReadWriteLock; 029import java.util.concurrent.locks.ReentrantReadWriteLock; 030 031import com.google.common.annotations.VisibleForTesting; 032import org.apache.commons.logging.Log; 033import org.apache.commons.logging.LogFactory; 034import org.apache.hadoop.classification.InterfaceAudience; 035import org.apache.hadoop.classification.InterfaceStability; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.fs.CommonConfigurationKeysPublic; 038import org.apache.hadoop.util.ReflectionUtils; 039 040import com.google.common.base.Preconditions; 041import com.google.common.collect.Lists; 042 043/** The class represents a cluster of computer with a tree hierarchical 044 * network topology. 045 * For example, a cluster may be consists of many data centers filled 046 * with racks of computers. 047 * In a network topology, leaves represent data nodes (computers) and inner 048 * nodes represent switches/routers that manage traffic in/out of data centers 049 * or racks. 050 * 051 */ 052@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 053@InterfaceStability.Unstable 054public class NetworkTopology { 055 public final static String DEFAULT_RACK = "/default-rack"; 056 public final static int DEFAULT_HOST_LEVEL = 2; 057 public static final Log LOG = 058 LogFactory.getLog(NetworkTopology.class); 059 060 public static class InvalidTopologyException extends RuntimeException { 061 private static final long serialVersionUID = 1L; 062 public InvalidTopologyException(String msg) { 063 super(msg); 064 } 065 } 066 067 /** 068 * Get an instance of NetworkTopology based on the value of the configuration 069 * parameter net.topology.impl. 070 * 071 * @param conf the configuration to be used 072 * @return an instance of NetworkTopology 073 */ 074 public static NetworkTopology getInstance(Configuration conf){ 075 return ReflectionUtils.newInstance( 076 conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY, 077 NetworkTopology.class, NetworkTopology.class), conf); 078 } 079 080 /** InnerNode represents a switch/router of a data center or rack. 081 * Different from a leaf node, it has non-null children. 082 */ 083 static class InnerNode extends NodeBase { 084 protected List<Node> children=new ArrayList<Node>(); 085 private Map<String, Node> childrenMap = new HashMap<String, Node>(); 086 private int numOfLeaves; 087 088 /** Construct an InnerNode from a path-like string */ 089 InnerNode(String path) { 090 super(path); 091 } 092 093 /** Construct an InnerNode from its name and its network location */ 094 InnerNode(String name, String location) { 095 super(name, location); 096 } 097 098 /** Construct an InnerNode 099 * from its name, its network location, its parent, and its level */ 100 InnerNode(String name, String location, InnerNode parent, int level) { 101 super(name, location, parent, level); 102 } 103 104 /** @return its children */ 105 List<Node> getChildren() {return children;} 106 107 /** @return the number of children this node has */ 108 int getNumOfChildren() { 109 return children.size(); 110 } 111 112 /** Judge if this node represents a rack 113 * @return true if it has no child or its children are not InnerNodes 114 */ 115 boolean isRack() { 116 if (children.isEmpty()) { 117 return true; 118 } 119 120 Node firstChild = children.get(0); 121 if (firstChild instanceof InnerNode) { 122 return false; 123 } 124 125 return true; 126 } 127 128 /** Judge if this node is an ancestor of node <i>n</i> 129 * 130 * @param n a node 131 * @return true if this node is an ancestor of <i>n</i> 132 */ 133 boolean isAncestor(Node n) { 134 return getPath(this).equals(NodeBase.PATH_SEPARATOR_STR) || 135 (n.getNetworkLocation()+NodeBase.PATH_SEPARATOR_STR). 136 startsWith(getPath(this)+NodeBase.PATH_SEPARATOR_STR); 137 } 138 139 /** Judge if this node is the parent of node <i>n</i> 140 * 141 * @param n a node 142 * @return true if this node is the parent of <i>n</i> 143 */ 144 boolean isParent(Node n) { 145 return n.getNetworkLocation().equals(getPath(this)); 146 } 147 148 /* Return a child name of this node who is an ancestor of node <i>n</i> */ 149 private String getNextAncestorName(Node n) { 150 if (!isAncestor(n)) { 151 throw new IllegalArgumentException( 152 this + "is not an ancestor of " + n); 153 } 154 String name = n.getNetworkLocation().substring(getPath(this).length()); 155 if (name.charAt(0) == PATH_SEPARATOR) { 156 name = name.substring(1); 157 } 158 int index=name.indexOf(PATH_SEPARATOR); 159 if (index !=-1) 160 name = name.substring(0, index); 161 return name; 162 } 163 164 /** Add node <i>n</i> to the subtree of this node 165 * @param n node to be added 166 * @return true if the node is added; false otherwise 167 */ 168 boolean add(Node n) { 169 if (!isAncestor(n)) 170 throw new IllegalArgumentException(n.getName()+", which is located at " 171 +n.getNetworkLocation()+", is not a decendent of " 172 +getPath(this)); 173 if (isParent(n)) { 174 // this node is the parent of n; add n directly 175 n.setParent(this); 176 n.setLevel(this.level+1); 177 Node prev = childrenMap.put(n.getName(), n); 178 if (prev != null) { 179 for(int i=0; i<children.size(); i++) { 180 if (children.get(i).getName().equals(n.getName())) { 181 children.set(i, n); 182 return false; 183 } 184 } 185 } 186 children.add(n); 187 numOfLeaves++; 188 return true; 189 } else { 190 // find the next ancestor node 191 String parentName = getNextAncestorName(n); 192 InnerNode parentNode = (InnerNode)childrenMap.get(parentName); 193 if (parentNode == null) { 194 // create a new InnerNode 195 parentNode = createParentNode(parentName); 196 children.add(parentNode); 197 childrenMap.put(parentNode.getName(), parentNode); 198 } 199 // add n to the subtree of the next ancestor node 200 if (parentNode.add(n)) { 201 numOfLeaves++; 202 return true; 203 } else { 204 return false; 205 } 206 } 207 } 208 209 /** 210 * Creates a parent node to be added to the list of children. 211 * Creates a node using the InnerNode four argument constructor specifying 212 * the name, location, parent, and level of this node. 213 * 214 * <p>To be overridden in subclasses for specific InnerNode implementations, 215 * as alternative to overriding the full {@link #add(Node)} method. 216 * 217 * @param parentName The name of the parent node 218 * @return A new inner node 219 * @see InnerNode#InnerNode(String, String, InnerNode, int) 220 */ 221 protected InnerNode createParentNode(String parentName) { 222 return new InnerNode(parentName, getPath(this), this, this.getLevel()+1); 223 } 224 225 /** Remove node <i>n</i> from the subtree of this node 226 * @param n node to be deleted 227 * @return true if the node is deleted; false otherwise 228 */ 229 boolean remove(Node n) { 230 String parent = n.getNetworkLocation(); 231 String currentPath = getPath(this); 232 if (!isAncestor(n)) 233 throw new IllegalArgumentException(n.getName() 234 +", which is located at " 235 +parent+", is not a descendent of "+currentPath); 236 if (isParent(n)) { 237 // this node is the parent of n; remove n directly 238 if (childrenMap.containsKey(n.getName())) { 239 for (int i=0; i<children.size(); i++) { 240 if (children.get(i).getName().equals(n.getName())) { 241 children.remove(i); 242 childrenMap.remove(n.getName()); 243 numOfLeaves--; 244 n.setParent(null); 245 return true; 246 } 247 } 248 } 249 return false; 250 } else { 251 // find the next ancestor node: the parent node 252 String parentName = getNextAncestorName(n); 253 InnerNode parentNode = null; 254 int i; 255 for(i=0; i<children.size(); i++) { 256 if (children.get(i).getName().equals(parentName)) { 257 parentNode = (InnerNode)children.get(i); 258 break; 259 } 260 } 261 if (parentNode==null) { 262 return false; 263 } 264 // remove n from the parent node 265 boolean isRemoved = parentNode.remove(n); 266 // if the parent node has no children, remove the parent node too 267 if (isRemoved) { 268 if (parentNode.getNumOfChildren() == 0) { 269 Node prev = children.remove(i); 270 childrenMap.remove(prev.getName()); 271 } 272 numOfLeaves--; 273 } 274 return isRemoved; 275 } 276 } // end of remove 277 278 /** Given a node's string representation, return a reference to the node 279 * @param loc string location of the form /rack/node 280 * @return null if the node is not found or the childnode is there but 281 * not an instance of {@link InnerNode} 282 */ 283 private Node getLoc(String loc) { 284 if (loc == null || loc.length() == 0) return this; 285 286 String[] path = loc.split(PATH_SEPARATOR_STR, 2); 287 Node childnode = childrenMap.get(path[0]); 288 if (childnode == null) return null; // non-existing node 289 if (path.length == 1) return childnode; 290 if (childnode instanceof InnerNode) { 291 return ((InnerNode)childnode).getLoc(path[1]); 292 } else { 293 return null; 294 } 295 } 296 297 /** get <i>leafIndex</i> leaf of this subtree 298 * if it is not in the <i>excludedNode</i> 299 * 300 * @param leafIndex an indexed leaf of the node 301 * @param excludedNode an excluded node (can be null) 302 * @return 303 */ 304 Node getLeaf(int leafIndex, Node excludedNode) { 305 int count=0; 306 // check if the excluded node a leaf 307 boolean isLeaf = 308 excludedNode == null || !(excludedNode instanceof InnerNode); 309 // calculate the total number of excluded leaf nodes 310 int numOfExcludedLeaves = 311 isLeaf ? 1 : ((InnerNode)excludedNode).getNumOfLeaves(); 312 if (isLeafParent()) { // children are leaves 313 if (isLeaf) { // excluded node is a leaf node 314 if (excludedNode != null && 315 childrenMap.containsKey(excludedNode.getName())) { 316 int excludedIndex = children.indexOf(excludedNode); 317 if (excludedIndex != -1 && leafIndex >= 0) { 318 // excluded node is one of the children so adjust the leaf index 319 leafIndex = leafIndex>=excludedIndex ? leafIndex+1 : leafIndex; 320 } 321 } 322 } 323 // range check 324 if (leafIndex<0 || leafIndex>=this.getNumOfChildren()) { 325 return null; 326 } 327 return children.get(leafIndex); 328 } else { 329 for(int i=0; i<children.size(); i++) { 330 InnerNode child = (InnerNode)children.get(i); 331 if (excludedNode == null || excludedNode != child) { 332 // not the excludedNode 333 int numOfLeaves = child.getNumOfLeaves(); 334 if (excludedNode != null && child.isAncestor(excludedNode)) { 335 numOfLeaves -= numOfExcludedLeaves; 336 } 337 if (count+numOfLeaves > leafIndex) { 338 // the leaf is in the child subtree 339 return child.getLeaf(leafIndex-count, excludedNode); 340 } else { 341 // go to the next child 342 count = count+numOfLeaves; 343 } 344 } else { // it is the excluededNode 345 // skip it and set the excludedNode to be null 346 excludedNode = null; 347 } 348 } 349 return null; 350 } 351 } 352 353 protected boolean isLeafParent() { 354 return isRack(); 355 } 356 357 /** 358 * Determine if children a leaves, default implementation calls {@link #isRack()} 359 * <p>To be overridden in subclasses for specific InnerNode implementations, 360 * as alternative to overriding the full {@link #getLeaf(int, Node)} method. 361 * 362 * @return true if children are leaves, false otherwise 363 */ 364 protected boolean areChildrenLeaves() { 365 return isRack(); 366 } 367 368 /** 369 * Get number of leaves. 370 */ 371 int getNumOfLeaves() { 372 return numOfLeaves; 373 } 374 } // end of InnerNode 375 376 /** 377 * the root cluster map 378 */ 379 InnerNode clusterMap; 380 /** Depth of all leaf nodes */ 381 private int depthOfAllLeaves = -1; 382 /** rack counter */ 383 protected int numOfRacks = 0; 384 385 /** 386 * Whether or not this cluster has ever consisted of more than 1 rack, 387 * according to the NetworkTopology. 388 */ 389 private boolean clusterEverBeenMultiRack = false; 390 391 /** the lock used to manage access */ 392 protected ReadWriteLock netlock = new ReentrantReadWriteLock(true); 393 394 public NetworkTopology() { 395 clusterMap = new InnerNode(InnerNode.ROOT); 396 } 397 398 /** Add a leaf node 399 * Update node counter & rack counter if necessary 400 * @param node node to be added; can be null 401 * @exception IllegalArgumentException if add a node to a leave 402 or node to be added is not a leaf 403 */ 404 public void add(Node node) { 405 if (node==null) return; 406 int newDepth = NodeBase.locationToDepth(node.getNetworkLocation()) + 1; 407 netlock.writeLock().lock(); 408 try { 409 if( node instanceof InnerNode ) { 410 throw new IllegalArgumentException( 411 "Not allow to add an inner node: "+NodeBase.getPath(node)); 412 } 413 if ((depthOfAllLeaves != -1) && (depthOfAllLeaves != newDepth)) { 414 LOG.error("Error: can't add leaf node " + NodeBase.getPath(node) + 415 " at depth " + newDepth + " to topology:\n" + this.toString()); 416 throw new InvalidTopologyException("Failed to add " + NodeBase.getPath(node) + 417 ": You cannot have a rack and a non-rack node at the same " + 418 "level of the network topology."); 419 } 420 Node rack = getNodeForNetworkLocation(node); 421 if (rack != null && !(rack instanceof InnerNode)) { 422 throw new IllegalArgumentException("Unexpected data node " 423 + node.toString() 424 + " at an illegal network location"); 425 } 426 if (clusterMap.add(node)) { 427 LOG.info("Adding a new node: "+NodeBase.getPath(node)); 428 if (rack == null) { 429 incrementRacks(); 430 } 431 if (!(node instanceof InnerNode)) { 432 if (depthOfAllLeaves == -1) { 433 depthOfAllLeaves = node.getLevel(); 434 } 435 } 436 } 437 if(LOG.isDebugEnabled()) { 438 LOG.debug("NetworkTopology became:\n" + this.toString()); 439 } 440 } finally { 441 netlock.writeLock().unlock(); 442 } 443 } 444 445 protected void incrementRacks() { 446 numOfRacks++; 447 if (!clusterEverBeenMultiRack && numOfRacks > 1) { 448 clusterEverBeenMultiRack = true; 449 } 450 } 451 452 /** 453 * Return a reference to the node given its string representation. 454 * Default implementation delegates to {@link #getNode(String)}. 455 * 456 * <p>To be overridden in subclasses for specific NetworkTopology 457 * implementations, as alternative to overriding the full {@link #add(Node)} 458 * method. 459 * 460 * @param node The string representation of this node's network location is 461 * used to retrieve a Node object. 462 * @return a reference to the node; null if the node is not in the tree 463 * 464 * @see #add(Node) 465 * @see #getNode(String) 466 */ 467 protected Node getNodeForNetworkLocation(Node node) { 468 return getNode(node.getNetworkLocation()); 469 } 470 471 /** 472 * Given a string representation of a rack, return its children 473 * @param loc a path-like string representation of a rack 474 * @return a newly allocated list with all the node's children 475 */ 476 public List<Node> getDatanodesInRack(String loc) { 477 netlock.readLock().lock(); 478 try { 479 loc = NodeBase.normalize(loc); 480 if (!NodeBase.ROOT.equals(loc)) { 481 loc = loc.substring(1); 482 } 483 InnerNode rack = (InnerNode) clusterMap.getLoc(loc); 484 if (rack == null) { 485 return null; 486 } 487 return new ArrayList<Node>(rack.getChildren()); 488 } finally { 489 netlock.readLock().unlock(); 490 } 491 } 492 493 /** Remove a node 494 * Update node counter and rack counter if necessary 495 * @param node node to be removed; can be null 496 */ 497 public void remove(Node node) { 498 if (node==null) return; 499 if( node instanceof InnerNode ) { 500 throw new IllegalArgumentException( 501 "Not allow to remove an inner node: "+NodeBase.getPath(node)); 502 } 503 LOG.info("Removing a node: "+NodeBase.getPath(node)); 504 netlock.writeLock().lock(); 505 try { 506 if (clusterMap.remove(node)) { 507 InnerNode rack = (InnerNode)getNode(node.getNetworkLocation()); 508 if (rack == null) { 509 numOfRacks--; 510 } 511 } 512 if(LOG.isDebugEnabled()) { 513 LOG.debug("NetworkTopology became:\n" + this.toString()); 514 } 515 } finally { 516 netlock.writeLock().unlock(); 517 } 518 } 519 520 /** Check if the tree contains node <i>node</i> 521 * 522 * @param node a node 523 * @return true if <i>node</i> is already in the tree; false otherwise 524 */ 525 public boolean contains(Node node) { 526 if (node == null) return false; 527 netlock.readLock().lock(); 528 try { 529 Node parent = node.getParent(); 530 for (int level = node.getLevel(); parent != null && level > 0; 531 parent = parent.getParent(), level--) { 532 if (parent == clusterMap) { 533 return true; 534 } 535 } 536 } finally { 537 netlock.readLock().unlock(); 538 } 539 return false; 540 } 541 542 /** Given a string representation of a node, return its reference 543 * 544 * @param loc 545 * a path-like string representation of a node 546 * @return a reference to the node; null if the node is not in the tree 547 */ 548 public Node getNode(String loc) { 549 netlock.readLock().lock(); 550 try { 551 loc = NodeBase.normalize(loc); 552 if (!NodeBase.ROOT.equals(loc)) 553 loc = loc.substring(1); 554 return clusterMap.getLoc(loc); 555 } finally { 556 netlock.readLock().unlock(); 557 } 558 } 559 560 /** 561 * @return true if this cluster has ever consisted of multiple racks, even if 562 * it is not now a multi-rack cluster. 563 */ 564 public boolean hasClusterEverBeenMultiRack() { 565 return clusterEverBeenMultiRack; 566 } 567 568 /** Given a string representation of a rack for a specific network 569 * location 570 * 571 * To be overridden in subclasses for specific NetworkTopology 572 * implementations, as alternative to overriding the full 573 * {@link #getRack(String)} method. 574 * @param loc 575 * a path-like string representation of a network location 576 * @return a rack string 577 */ 578 public String getRack(String loc) { 579 return loc; 580 } 581 582 /** @return the total number of racks */ 583 public int getNumOfRacks() { 584 netlock.readLock().lock(); 585 try { 586 return numOfRacks; 587 } finally { 588 netlock.readLock().unlock(); 589 } 590 } 591 592 /** @return the total number of leaf nodes */ 593 public int getNumOfLeaves() { 594 netlock.readLock().lock(); 595 try { 596 return clusterMap.getNumOfLeaves(); 597 } finally { 598 netlock.readLock().unlock(); 599 } 600 } 601 602 /** Return the distance between two nodes 603 * It is assumed that the distance from one node to its parent is 1 604 * The distance between two nodes is calculated by summing up their distances 605 * to their closest common ancestor. 606 * @param node1 one node 607 * @param node2 another node 608 * @return the distance between node1 and node2 which is zero if they are the same 609 * or {@link Integer#MAX_VALUE} if node1 or node2 do not belong to the cluster 610 */ 611 public int getDistance(Node node1, Node node2) { 612 if (node1 == node2) { 613 return 0; 614 } 615 Node n1=node1, n2=node2; 616 int dis = 0; 617 netlock.readLock().lock(); 618 try { 619 int level1=node1.getLevel(), level2=node2.getLevel(); 620 while(n1!=null && level1>level2) { 621 n1 = n1.getParent(); 622 level1--; 623 dis++; 624 } 625 while(n2!=null && level2>level1) { 626 n2 = n2.getParent(); 627 level2--; 628 dis++; 629 } 630 while(n1!=null && n2!=null && n1.getParent()!=n2.getParent()) { 631 n1=n1.getParent(); 632 n2=n2.getParent(); 633 dis+=2; 634 } 635 } finally { 636 netlock.readLock().unlock(); 637 } 638 if (n1==null) { 639 LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node1)); 640 return Integer.MAX_VALUE; 641 } 642 if (n2==null) { 643 LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node2)); 644 return Integer.MAX_VALUE; 645 } 646 return dis+2; 647 } 648 649 /** Check if two nodes are on the same rack 650 * @param node1 one node (can be null) 651 * @param node2 another node (can be null) 652 * @return true if node1 and node2 are on the same rack; false otherwise 653 * @exception IllegalArgumentException when either node1 or node2 is null, or 654 * node1 or node2 do not belong to the cluster 655 */ 656 public boolean isOnSameRack( Node node1, Node node2) { 657 if (node1 == null || node2 == null) { 658 return false; 659 } 660 661 netlock.readLock().lock(); 662 try { 663 return isSameParents(node1, node2); 664 } finally { 665 netlock.readLock().unlock(); 666 } 667 } 668 669 /** 670 * Check if network topology is aware of NodeGroup 671 */ 672 public boolean isNodeGroupAware() { 673 return false; 674 } 675 676 /** 677 * Return false directly as not aware of NodeGroup, to be override in sub-class 678 */ 679 public boolean isOnSameNodeGroup(Node node1, Node node2) { 680 return false; 681 } 682 683 /** 684 * Compare the parents of each node for equality 685 * 686 * <p>To be overridden in subclasses for specific NetworkTopology 687 * implementations, as alternative to overriding the full 688 * {@link #isOnSameRack(Node, Node)} method. 689 * 690 * @param node1 the first node to compare 691 * @param node2 the second node to compare 692 * @return true if their parents are equal, false otherwise 693 * 694 * @see #isOnSameRack(Node, Node) 695 */ 696 protected boolean isSameParents(Node node1, Node node2) { 697 return node1.getParent()==node2.getParent(); 698 } 699 700 private static final Random r = new Random(); 701 702 @VisibleForTesting 703 void setRandomSeed(long seed) { 704 r.setSeed(seed); 705 } 706 707 /** randomly choose one node from <i>scope</i> 708 * if scope starts with ~, choose one from the all nodes except for the 709 * ones in <i>scope</i>; otherwise, choose one from <i>scope</i> 710 * @param scope range of nodes from which a node will be chosen 711 * @return the chosen node 712 */ 713 public Node chooseRandom(String scope) { 714 netlock.readLock().lock(); 715 try { 716 if (scope.startsWith("~")) { 717 return chooseRandom(NodeBase.ROOT, scope.substring(1)); 718 } else { 719 return chooseRandom(scope, null); 720 } 721 } finally { 722 netlock.readLock().unlock(); 723 } 724 } 725 726 private Node chooseRandom(String scope, String excludedScope){ 727 if (excludedScope != null) { 728 if (scope.startsWith(excludedScope)) { 729 return null; 730 } 731 if (!excludedScope.startsWith(scope)) { 732 excludedScope = null; 733 } 734 } 735 Node node = getNode(scope); 736 if (!(node instanceof InnerNode)) { 737 return node; 738 } 739 InnerNode innerNode = (InnerNode)node; 740 int numOfDatanodes = innerNode.getNumOfLeaves(); 741 if (excludedScope == null) { 742 node = null; 743 } else { 744 node = getNode(excludedScope); 745 if (!(node instanceof InnerNode)) { 746 numOfDatanodes -= 1; 747 } else { 748 numOfDatanodes -= ((InnerNode)node).getNumOfLeaves(); 749 } 750 } 751 if (numOfDatanodes == 0) { 752 throw new InvalidTopologyException( 753 "Failed to find datanode (scope=\"" + String.valueOf(scope) + 754 "\" excludedScope=\"" + String.valueOf(excludedScope) + "\")."); 755 } 756 int leaveIndex = r.nextInt(numOfDatanodes); 757 return innerNode.getLeaf(leaveIndex, node); 758 } 759 760 /** return leaves in <i>scope</i> 761 * @param scope a path string 762 * @return leaves nodes under specific scope 763 */ 764 public List<Node> getLeaves(String scope) { 765 Node node = getNode(scope); 766 List<Node> leafNodes = new ArrayList<Node>(); 767 if (!(node instanceof InnerNode)) { 768 leafNodes.add(node); 769 } else { 770 InnerNode innerNode = (InnerNode) node; 771 for (int i=0;i<innerNode.getNumOfLeaves();i++) { 772 leafNodes.add(innerNode.getLeaf(i, null)); 773 } 774 } 775 return leafNodes; 776 } 777 778 /** return the number of leaves in <i>scope</i> but not in <i>excludedNodes</i> 779 * if scope starts with ~, return the number of nodes that are not 780 * in <i>scope</i> and <i>excludedNodes</i>; 781 * @param scope a path string that may start with ~ 782 * @param excludedNodes a list of nodes 783 * @return number of available nodes 784 */ 785 public int countNumOfAvailableNodes(String scope, 786 Collection<Node> excludedNodes) { 787 boolean isExcluded=false; 788 if (scope.startsWith("~")) { 789 isExcluded=true; 790 scope=scope.substring(1); 791 } 792 scope = NodeBase.normalize(scope); 793 int excludedCountInScope = 0; // the number of nodes in both scope & excludedNodes 794 int excludedCountOffScope = 0; // the number of nodes outside scope & excludedNodes 795 netlock.readLock().lock(); 796 try { 797 for (Node node : excludedNodes) { 798 node = getNode(NodeBase.getPath(node)); 799 if (node == null) { 800 continue; 801 } 802 if ((NodeBase.getPath(node) + NodeBase.PATH_SEPARATOR_STR) 803 .startsWith(scope + NodeBase.PATH_SEPARATOR_STR)) { 804 excludedCountInScope++; 805 } else { 806 excludedCountOffScope++; 807 } 808 } 809 Node n = getNode(scope); 810 int scopeNodeCount = 0; 811 if (n != null) { 812 scopeNodeCount++; 813 } 814 if (n instanceof InnerNode) { 815 scopeNodeCount=((InnerNode)n).getNumOfLeaves(); 816 } 817 if (isExcluded) { 818 return clusterMap.getNumOfLeaves() - scopeNodeCount 819 - excludedCountOffScope; 820 } else { 821 return scopeNodeCount - excludedCountInScope; 822 } 823 } finally { 824 netlock.readLock().unlock(); 825 } 826 } 827 828 /** convert a network tree to a string */ 829 @Override 830 public String toString() { 831 // print the number of racks 832 StringBuilder tree = new StringBuilder(); 833 tree.append("Number of racks: "); 834 tree.append(numOfRacks); 835 tree.append("\n"); 836 // print the number of leaves 837 int numOfLeaves = getNumOfLeaves(); 838 tree.append("Expected number of leaves:"); 839 tree.append(numOfLeaves); 840 tree.append("\n"); 841 // print nodes 842 for(int i=0; i<numOfLeaves; i++) { 843 tree.append(NodeBase.getPath(clusterMap.getLeaf(i, null))); 844 tree.append("\n"); 845 } 846 return tree.toString(); 847 } 848 849 /** 850 * Divide networklocation string into two parts by last separator, and get 851 * the first part here. 852 * 853 * @param networkLocation 854 * @return 855 */ 856 public static String getFirstHalf(String networkLocation) { 857 int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR); 858 return networkLocation.substring(0, index); 859 } 860 861 /** 862 * Divide networklocation string into two parts by last separator, and get 863 * the second part here. 864 * 865 * @param networkLocation 866 * @return 867 */ 868 public static String getLastHalf(String networkLocation) { 869 int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR); 870 return networkLocation.substring(index); 871 } 872 873 /** 874 * Returns an integer weight which specifies how far away {node} is away from 875 * {reader}. A lower value signifies that a node is closer. 876 * 877 * @param reader Node where data will be read 878 * @param node Replica of data 879 * @return weight 880 */ 881 protected int getWeight(Node reader, Node node) { 882 // 0 is local, 1 is same rack, 2 is off rack 883 // Start off by initializing to off rack 884 int weight = 2; 885 if (reader != null) { 886 if (reader.equals(node)) { 887 weight = 0; 888 } else if (isOnSameRack(reader, node)) { 889 weight = 1; 890 } 891 } 892 return weight; 893 } 894 895 /** 896 * Sort nodes array by network distance to <i>reader</i>. 897 * <p/> 898 * In a three-level topology, a node can be either local, on the same rack, 899 * or on a different rack from the reader. Sorting the nodes based on network 900 * distance from the reader reduces network traffic and improves 901 * performance. 902 * <p/> 903 * As an additional twist, we also randomize the nodes at each network 904 * distance. This helps with load balancing when there is data skew. 905 * 906 * @param reader Node where data will be read 907 * @param nodes Available replicas with the requested data 908 * @param activeLen Number of active nodes at the front of the array 909 */ 910 public void sortByDistance(Node reader, Node[] nodes, int activeLen) { 911 /** Sort weights for the nodes array */ 912 int[] weights = new int[activeLen]; 913 for (int i=0; i<activeLen; i++) { 914 weights[i] = getWeight(reader, nodes[i]); 915 } 916 // Add weight/node pairs to a TreeMap to sort 917 TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>(); 918 for (int i=0; i<activeLen; i++) { 919 int weight = weights[i]; 920 Node node = nodes[i]; 921 List<Node> list = tree.get(weight); 922 if (list == null) { 923 list = Lists.newArrayListWithExpectedSize(1); 924 tree.put(weight, list); 925 } 926 list.add(node); 927 } 928 929 int idx = 0; 930 for (List<Node> list: tree.values()) { 931 if (list != null) { 932 Collections.shuffle(list, r); 933 for (Node n: list) { 934 nodes[idx] = n; 935 idx++; 936 } 937 } 938 } 939 Preconditions.checkState(idx == activeLen, 940 "Sorted the wrong number of nodes!"); 941 } 942}