001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.net; 019 020import java.util.ArrayList; 021import java.util.List; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.List; 025import java.util.Random; 026import java.util.TreeMap; 027import java.util.concurrent.locks.ReadWriteLock; 028import java.util.concurrent.locks.ReentrantReadWriteLock; 029 030import org.apache.commons.logging.Log; 031import org.apache.commons.logging.LogFactory; 032import org.apache.hadoop.classification.InterfaceAudience; 033import org.apache.hadoop.classification.InterfaceStability; 034import org.apache.hadoop.conf.Configuration; 035import org.apache.hadoop.fs.CommonConfigurationKeysPublic; 036import org.apache.hadoop.util.ReflectionUtils; 037 038import com.google.common.base.Preconditions; 039import com.google.common.collect.Lists; 040 041/** The class represents a cluster of computer with a tree hierarchical 042 * network topology. 043 * For example, a cluster may be consists of many data centers filled 044 * with racks of computers. 045 * In a network topology, leaves represent data nodes (computers) and inner 046 * nodes represent switches/routers that manage traffic in/out of data centers 047 * or racks. 048 * 049 */ 050@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 051@InterfaceStability.Unstable 052public class NetworkTopology { 053 public final static String DEFAULT_RACK = "/default-rack"; 054 public final static int DEFAULT_HOST_LEVEL = 2; 055 public static final Log LOG = 056 LogFactory.getLog(NetworkTopology.class); 057 058 public static class InvalidTopologyException extends RuntimeException { 059 private static final long serialVersionUID = 1L; 060 public InvalidTopologyException(String msg) { 061 super(msg); 062 } 063 } 064 065 /** 066 * Get an instance of NetworkTopology based on the value of the configuration 067 * parameter net.topology.impl. 068 * 069 * @param conf the configuration to be used 070 * @return an instance of NetworkTopology 071 */ 072 public static NetworkTopology getInstance(Configuration conf){ 073 return ReflectionUtils.newInstance( 074 conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY, 075 NetworkTopology.class, NetworkTopology.class), conf); 076 } 077 078 /** InnerNode represents a switch/router of a data center or rack. 079 * Different from a leaf node, it has non-null children. 080 */ 081 static class InnerNode extends NodeBase { 082 protected List<Node> children=new ArrayList<Node>(); 083 private int numOfLeaves; 084 085 /** Construct an InnerNode from a path-like string */ 086 InnerNode(String path) { 087 super(path); 088 } 089 090 /** Construct an InnerNode from its name and its network location */ 091 InnerNode(String name, String location) { 092 super(name, location); 093 } 094 095 /** Construct an InnerNode 096 * from its name, its network location, its parent, and its level */ 097 InnerNode(String name, String location, InnerNode parent, int level) { 098 super(name, location, parent, level); 099 } 100 101 /** @return its children */ 102 List<Node> getChildren() {return children;} 103 104 /** @return the number of children this node has */ 105 int getNumOfChildren() { 106 return children.size(); 107 } 108 109 /** Judge if this node represents a rack 110 * @return true if it has no child or its children are not InnerNodes 111 */ 112 boolean isRack() { 113 if (children.isEmpty()) { 114 return true; 115 } 116 117 Node firstChild = children.get(0); 118 if (firstChild instanceof InnerNode) { 119 return false; 120 } 121 122 return true; 123 } 124 125 /** Judge if this node is an ancestor of node <i>n</i> 126 * 127 * @param n a node 128 * @return true if this node is an ancestor of <i>n</i> 129 */ 130 boolean isAncestor(Node n) { 131 return getPath(this).equals(NodeBase.PATH_SEPARATOR_STR) || 132 (n.getNetworkLocation()+NodeBase.PATH_SEPARATOR_STR). 133 startsWith(getPath(this)+NodeBase.PATH_SEPARATOR_STR); 134 } 135 136 /** Judge if this node is the parent of node <i>n</i> 137 * 138 * @param n a node 139 * @return true if this node is the parent of <i>n</i> 140 */ 141 boolean isParent(Node n) { 142 return n.getNetworkLocation().equals(getPath(this)); 143 } 144 145 /* Return a child name of this node who is an ancestor of node <i>n</i> */ 146 private String getNextAncestorName(Node n) { 147 if (!isAncestor(n)) { 148 throw new IllegalArgumentException( 149 this + "is not an ancestor of " + n); 150 } 151 String name = n.getNetworkLocation().substring(getPath(this).length()); 152 if (name.charAt(0) == PATH_SEPARATOR) { 153 name = name.substring(1); 154 } 155 int index=name.indexOf(PATH_SEPARATOR); 156 if (index !=-1) 157 name = name.substring(0, index); 158 return name; 159 } 160 161 /** Add node <i>n</i> to the subtree of this node 162 * @param n node to be added 163 * @return true if the node is added; false otherwise 164 */ 165 boolean add(Node n) { 166 if (!isAncestor(n)) 167 throw new IllegalArgumentException(n.getName()+", which is located at " 168 +n.getNetworkLocation()+", is not a decendent of " 169 +getPath(this)); 170 if (isParent(n)) { 171 // this node is the parent of n; add n directly 172 n.setParent(this); 173 n.setLevel(this.level+1); 174 for(int i=0; i<children.size(); i++) { 175 if (children.get(i).getName().equals(n.getName())) { 176 children.set(i, n); 177 return false; 178 } 179 } 180 children.add(n); 181 numOfLeaves++; 182 return true; 183 } else { 184 // find the next ancestor node 185 String parentName = getNextAncestorName(n); 186 InnerNode parentNode = null; 187 for(int i=0; i<children.size(); i++) { 188 if (children.get(i).getName().equals(parentName)) { 189 parentNode = (InnerNode)children.get(i); 190 break; 191 } 192 } 193 if (parentNode == null) { 194 // create a new InnerNode 195 parentNode = createParentNode(parentName); 196 children.add(parentNode); 197 } 198 // add n to the subtree of the next ancestor node 199 if (parentNode.add(n)) { 200 numOfLeaves++; 201 return true; 202 } else { 203 return false; 204 } 205 } 206 } 207 208 /** 209 * Creates a parent node to be added to the list of children. 210 * Creates a node using the InnerNode four argument constructor specifying 211 * the name, location, parent, and level of this node. 212 * 213 * <p>To be overridden in subclasses for specific InnerNode implementations, 214 * as alternative to overriding the full {@link #add(Node)} method. 215 * 216 * @param parentName The name of the parent node 217 * @return A new inner node 218 * @see InnerNode#InnerNode(String, String, InnerNode, int) 219 */ 220 protected InnerNode createParentNode(String parentName) { 221 return new InnerNode(parentName, getPath(this), this, this.getLevel()+1); 222 } 223 224 /** Remove node <i>n</i> from the subtree of this node 225 * @param n node to be deleted 226 * @return true if the node is deleted; false otherwise 227 */ 228 boolean remove(Node n) { 229 String parent = n.getNetworkLocation(); 230 String currentPath = getPath(this); 231 if (!isAncestor(n)) 232 throw new IllegalArgumentException(n.getName() 233 +", which is located at " 234 +parent+", is not a descendent of "+currentPath); 235 if (isParent(n)) { 236 // this node is the parent of n; remove n directly 237 for(int i=0; i<children.size(); i++) { 238 if (children.get(i).getName().equals(n.getName())) { 239 children.remove(i); 240 numOfLeaves--; 241 n.setParent(null); 242 return true; 243 } 244 } 245 return false; 246 } else { 247 // find the next ancestor node: the parent node 248 String parentName = getNextAncestorName(n); 249 InnerNode parentNode = null; 250 int i; 251 for(i=0; i<children.size(); i++) { 252 if (children.get(i).getName().equals(parentName)) { 253 parentNode = (InnerNode)children.get(i); 254 break; 255 } 256 } 257 if (parentNode==null) { 258 return false; 259 } 260 // remove n from the parent node 261 boolean isRemoved = parentNode.remove(n); 262 // if the parent node has no children, remove the parent node too 263 if (isRemoved) { 264 if (parentNode.getNumOfChildren() == 0) { 265 children.remove(i); 266 } 267 numOfLeaves--; 268 } 269 return isRemoved; 270 } 271 } // end of remove 272 273 /** Given a node's string representation, return a reference to the node 274 * @param loc string location of the form /rack/node 275 * @return null if the node is not found or the childnode is there but 276 * not an instance of {@link InnerNode} 277 */ 278 private Node getLoc(String loc) { 279 if (loc == null || loc.length() == 0) return this; 280 281 String[] path = loc.split(PATH_SEPARATOR_STR, 2); 282 Node childnode = null; 283 for(int i=0; i<children.size(); i++) { 284 if (children.get(i).getName().equals(path[0])) { 285 childnode = children.get(i); 286 } 287 } 288 if (childnode == null) return null; // non-existing node 289 if (path.length == 1) return childnode; 290 if (childnode instanceof InnerNode) { 291 return ((InnerNode)childnode).getLoc(path[1]); 292 } else { 293 return null; 294 } 295 } 296 297 /** get <i>leafIndex</i> leaf of this subtree 298 * if it is not in the <i>excludedNode</i> 299 * 300 * @param leafIndex an indexed leaf of the node 301 * @param excludedNode an excluded node (can be null) 302 * @return 303 */ 304 Node getLeaf(int leafIndex, Node excludedNode) { 305 int count=0; 306 // check if the excluded node a leaf 307 boolean isLeaf = 308 excludedNode == null || !(excludedNode instanceof InnerNode); 309 // calculate the total number of excluded leaf nodes 310 int numOfExcludedLeaves = 311 isLeaf ? 1 : ((InnerNode)excludedNode).getNumOfLeaves(); 312 if (isLeafParent()) { // children are leaves 313 if (isLeaf) { // excluded node is a leaf node 314 int excludedIndex = children.indexOf(excludedNode); 315 if (excludedIndex != -1 && leafIndex >= 0) { 316 // excluded node is one of the children so adjust the leaf index 317 leafIndex = leafIndex>=excludedIndex ? leafIndex+1 : leafIndex; 318 } 319 } 320 // range check 321 if (leafIndex<0 || leafIndex>=this.getNumOfChildren()) { 322 return null; 323 } 324 return children.get(leafIndex); 325 } else { 326 for(int i=0; i<children.size(); i++) { 327 InnerNode child = (InnerNode)children.get(i); 328 if (excludedNode == null || excludedNode != child) { 329 // not the excludedNode 330 int numOfLeaves = child.getNumOfLeaves(); 331 if (excludedNode != null && child.isAncestor(excludedNode)) { 332 numOfLeaves -= numOfExcludedLeaves; 333 } 334 if (count+numOfLeaves > leafIndex) { 335 // the leaf is in the child subtree 336 return child.getLeaf(leafIndex-count, excludedNode); 337 } else { 338 // go to the next child 339 count = count+numOfLeaves; 340 } 341 } else { // it is the excluededNode 342 // skip it and set the excludedNode to be null 343 excludedNode = null; 344 } 345 } 346 return null; 347 } 348 } 349 350 protected boolean isLeafParent() { 351 return isRack(); 352 } 353 354 /** 355 * Determine if children a leaves, default implementation calls {@link #isRack()} 356 * <p>To be overridden in subclasses for specific InnerNode implementations, 357 * as alternative to overriding the full {@link #getLeaf(int, Node)} method. 358 * 359 * @return true if children are leaves, false otherwise 360 */ 361 protected boolean areChildrenLeaves() { 362 return isRack(); 363 } 364 365 /** 366 * Get number of leaves. 367 */ 368 int getNumOfLeaves() { 369 return numOfLeaves; 370 } 371 } // end of InnerNode 372 373 /** 374 * the root cluster map 375 */ 376 InnerNode clusterMap; 377 /** Depth of all leaf nodes */ 378 private int depthOfAllLeaves = -1; 379 /** rack counter */ 380 protected int numOfRacks = 0; 381 /** the lock used to manage access */ 382 protected ReadWriteLock netlock = new ReentrantReadWriteLock(); 383 384 public NetworkTopology() { 385 clusterMap = new InnerNode(InnerNode.ROOT); 386 } 387 388 /** Add a leaf node 389 * Update node counter & rack counter if necessary 390 * @param node node to be added; can be null 391 * @exception IllegalArgumentException if add a node to a leave 392 or node to be added is not a leaf 393 */ 394 public void add(Node node) { 395 if (node==null) return; 396 String oldTopoStr = this.toString(); 397 if( node instanceof InnerNode ) { 398 throw new IllegalArgumentException( 399 "Not allow to add an inner node: "+NodeBase.getPath(node)); 400 } 401 int newDepth = NodeBase.locationToDepth(node.getNetworkLocation()) + 1; 402 netlock.writeLock().lock(); 403 try { 404 if ((depthOfAllLeaves != -1) && (depthOfAllLeaves != newDepth)) { 405 LOG.error("Error: can't add leaf node " + NodeBase.getPath(node) + 406 " at depth " + newDepth + " to topology:\n" + oldTopoStr); 407 throw new InvalidTopologyException("Failed to add " + NodeBase.getPath(node) + 408 ": You cannot have a rack and a non-rack node at the same " + 409 "level of the network topology."); 410 } 411 Node rack = getNodeForNetworkLocation(node); 412 if (rack != null && !(rack instanceof InnerNode)) { 413 throw new IllegalArgumentException("Unexpected data node " 414 + node.toString() 415 + " at an illegal network location"); 416 } 417 if (clusterMap.add(node)) { 418 LOG.info("Adding a new node: "+NodeBase.getPath(node)); 419 if (rack == null) { 420 numOfRacks++; 421 } 422 if (!(node instanceof InnerNode)) { 423 if (depthOfAllLeaves == -1) { 424 depthOfAllLeaves = node.getLevel(); 425 } 426 } 427 } 428 if(LOG.isDebugEnabled()) { 429 LOG.debug("NetworkTopology became:\n" + this.toString()); 430 } 431 } finally { 432 netlock.writeLock().unlock(); 433 } 434 } 435 436 /** 437 * Return a reference to the node given its string representation. 438 * Default implementation delegates to {@link #getNode(String)}. 439 * 440 * <p>To be overridden in subclasses for specific NetworkTopology 441 * implementations, as alternative to overriding the full {@link #add(Node)} 442 * method. 443 * 444 * @param node The string representation of this node's network location is 445 * used to retrieve a Node object. 446 * @return a reference to the node; null if the node is not in the tree 447 * 448 * @see #add(Node) 449 * @see #getNode(String) 450 */ 451 protected Node getNodeForNetworkLocation(Node node) { 452 return getNode(node.getNetworkLocation()); 453 } 454 455 /** 456 * Given a string representation of a rack, return its children 457 * @param loc a path-like string representation of a rack 458 * @return a newly allocated list with all the node's children 459 */ 460 public List<Node> getDatanodesInRack(String loc) { 461 netlock.readLock().lock(); 462 try { 463 loc = NodeBase.normalize(loc); 464 if (!NodeBase.ROOT.equals(loc)) { 465 loc = loc.substring(1); 466 } 467 InnerNode rack = (InnerNode) clusterMap.getLoc(loc); 468 if (rack == null) { 469 return null; 470 } 471 return new ArrayList<Node>(rack.getChildren()); 472 } finally { 473 netlock.readLock().unlock(); 474 } 475 } 476 477 /** Remove a node 478 * Update node counter and rack counter if necessary 479 * @param node node to be removed; can be null 480 */ 481 public void remove(Node node) { 482 if (node==null) return; 483 if( node instanceof InnerNode ) { 484 throw new IllegalArgumentException( 485 "Not allow to remove an inner node: "+NodeBase.getPath(node)); 486 } 487 LOG.info("Removing a node: "+NodeBase.getPath(node)); 488 netlock.writeLock().lock(); 489 try { 490 if (clusterMap.remove(node)) { 491 InnerNode rack = (InnerNode)getNode(node.getNetworkLocation()); 492 if (rack == null) { 493 numOfRacks--; 494 } 495 } 496 if(LOG.isDebugEnabled()) { 497 LOG.debug("NetworkTopology became:\n" + this.toString()); 498 } 499 } finally { 500 netlock.writeLock().unlock(); 501 } 502 } 503 504 /** Check if the tree contains node <i>node</i> 505 * 506 * @param node a node 507 * @return true if <i>node</i> is already in the tree; false otherwise 508 */ 509 public boolean contains(Node node) { 510 if (node == null) return false; 511 netlock.readLock().lock(); 512 try { 513 Node parent = node.getParent(); 514 for (int level = node.getLevel(); parent != null && level > 0; 515 parent = parent.getParent(), level--) { 516 if (parent == clusterMap) { 517 return true; 518 } 519 } 520 } finally { 521 netlock.readLock().unlock(); 522 } 523 return false; 524 } 525 526 /** Given a string representation of a node, return its reference 527 * 528 * @param loc 529 * a path-like string representation of a node 530 * @return a reference to the node; null if the node is not in the tree 531 */ 532 public Node getNode(String loc) { 533 netlock.readLock().lock(); 534 try { 535 loc = NodeBase.normalize(loc); 536 if (!NodeBase.ROOT.equals(loc)) 537 loc = loc.substring(1); 538 return clusterMap.getLoc(loc); 539 } finally { 540 netlock.readLock().unlock(); 541 } 542 } 543 544 /** Given a string representation of a rack for a specific network 545 * location 546 * 547 * To be overridden in subclasses for specific NetworkTopology 548 * implementations, as alternative to overriding the full 549 * {@link #getRack(String)} method. 550 * @param loc 551 * a path-like string representation of a network location 552 * @return a rack string 553 */ 554 public String getRack(String loc) { 555 return loc; 556 } 557 558 /** @return the total number of racks */ 559 public int getNumOfRacks() { 560 netlock.readLock().lock(); 561 try { 562 return numOfRacks; 563 } finally { 564 netlock.readLock().unlock(); 565 } 566 } 567 568 /** @return the total number of leaf nodes */ 569 public int getNumOfLeaves() { 570 netlock.readLock().lock(); 571 try { 572 return clusterMap.getNumOfLeaves(); 573 } finally { 574 netlock.readLock().unlock(); 575 } 576 } 577 578 /** Return the distance between two nodes 579 * It is assumed that the distance from one node to its parent is 1 580 * The distance between two nodes is calculated by summing up their distances 581 * to their closest common ancestor. 582 * @param node1 one node 583 * @param node2 another node 584 * @return the distance between node1 and node2 which is zero if they are the same 585 * or {@link Integer#MAX_VALUE} if node1 or node2 do not belong to the cluster 586 */ 587 public int getDistance(Node node1, Node node2) { 588 if (node1 == node2) { 589 return 0; 590 } 591 Node n1=node1, n2=node2; 592 int dis = 0; 593 netlock.readLock().lock(); 594 try { 595 int level1=node1.getLevel(), level2=node2.getLevel(); 596 while(n1!=null && level1>level2) { 597 n1 = n1.getParent(); 598 level1--; 599 dis++; 600 } 601 while(n2!=null && level2>level1) { 602 n2 = n2.getParent(); 603 level2--; 604 dis++; 605 } 606 while(n1!=null && n2!=null && n1.getParent()!=n2.getParent()) { 607 n1=n1.getParent(); 608 n2=n2.getParent(); 609 dis+=2; 610 } 611 } finally { 612 netlock.readLock().unlock(); 613 } 614 if (n1==null) { 615 LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node1)); 616 return Integer.MAX_VALUE; 617 } 618 if (n2==null) { 619 LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node2)); 620 return Integer.MAX_VALUE; 621 } 622 return dis+2; 623 } 624 625 /** Check if two nodes are on the same rack 626 * @param node1 one node (can be null) 627 * @param node2 another node (can be null) 628 * @return true if node1 and node2 are on the same rack; false otherwise 629 * @exception IllegalArgumentException when either node1 or node2 is null, or 630 * node1 or node2 do not belong to the cluster 631 */ 632 public boolean isOnSameRack( Node node1, Node node2) { 633 if (node1 == null || node2 == null) { 634 return false; 635 } 636 637 netlock.readLock().lock(); 638 try { 639 return isSameParents(node1, node2); 640 } finally { 641 netlock.readLock().unlock(); 642 } 643 } 644 645 /** 646 * Check if network topology is aware of NodeGroup 647 */ 648 public boolean isNodeGroupAware() { 649 return false; 650 } 651 652 /** 653 * Return false directly as not aware of NodeGroup, to be override in sub-class 654 */ 655 public boolean isOnSameNodeGroup(Node node1, Node node2) { 656 return false; 657 } 658 659 /** 660 * Compare the parents of each node for equality 661 * 662 * <p>To be overridden in subclasses for specific NetworkTopology 663 * implementations, as alternative to overriding the full 664 * {@link #isOnSameRack(Node, Node)} method. 665 * 666 * @param node1 the first node to compare 667 * @param node2 the second node to compare 668 * @return true if their parents are equal, false otherwise 669 * 670 * @see #isOnSameRack(Node, Node) 671 */ 672 protected boolean isSameParents(Node node1, Node node2) { 673 return node1.getParent()==node2.getParent(); 674 } 675 676 private static final ThreadLocal<Random> r = new ThreadLocal<Random>(); 677 678 /** 679 * Getter for thread-local Random, which provides better performance than 680 * a shared Random (even though Random is thread-safe). 681 * 682 * @return Thread-local Random. 683 */ 684 protected Random getRandom() { 685 Random rand = r.get(); 686 if (rand == null) { 687 rand = new Random(); 688 r.set(rand); 689 } 690 return rand; 691 } 692 693 /** randomly choose one node from <i>scope</i> 694 * if scope starts with ~, choose one from the all nodes except for the 695 * ones in <i>scope</i>; otherwise, choose one from <i>scope</i> 696 * @param scope range of nodes from which a node will be chosen 697 * @return the chosen node 698 */ 699 public Node chooseRandom(String scope) { 700 netlock.readLock().lock(); 701 try { 702 if (scope.startsWith("~")) { 703 return chooseRandom(NodeBase.ROOT, scope.substring(1)); 704 } else { 705 return chooseRandom(scope, null); 706 } 707 } finally { 708 netlock.readLock().unlock(); 709 } 710 } 711 712 private Node chooseRandom(String scope, String excludedScope){ 713 if (excludedScope != null) { 714 if (scope.startsWith(excludedScope)) { 715 return null; 716 } 717 if (!excludedScope.startsWith(scope)) { 718 excludedScope = null; 719 } 720 } 721 Node node = getNode(scope); 722 if (!(node instanceof InnerNode)) { 723 return node; 724 } 725 InnerNode innerNode = (InnerNode)node; 726 int numOfDatanodes = innerNode.getNumOfLeaves(); 727 if (excludedScope == null) { 728 node = null; 729 } else { 730 node = getNode(excludedScope); 731 if (!(node instanceof InnerNode)) { 732 numOfDatanodes -= 1; 733 } else { 734 numOfDatanodes -= ((InnerNode)node).getNumOfLeaves(); 735 } 736 } 737 if (numOfDatanodes == 0) { 738 throw new InvalidTopologyException( 739 "Failed to find datanode (scope=\"" + String.valueOf(scope) + 740 "\" excludedScope=\"" + String.valueOf(excludedScope) + "\")."); 741 } 742 int leaveIndex = getRandom().nextInt(numOfDatanodes); 743 return innerNode.getLeaf(leaveIndex, node); 744 } 745 746 /** return leaves in <i>scope</i> 747 * @param scope a path string 748 * @return leaves nodes under specific scope 749 */ 750 public List<Node> getLeaves(String scope) { 751 Node node = getNode(scope); 752 List<Node> leafNodes = new ArrayList<Node>(); 753 if (!(node instanceof InnerNode)) { 754 leafNodes.add(node); 755 } else { 756 InnerNode innerNode = (InnerNode) node; 757 for (int i=0;i<innerNode.getNumOfLeaves();i++) { 758 leafNodes.add(innerNode.getLeaf(i, null)); 759 } 760 } 761 return leafNodes; 762 } 763 764 /** return the number of leaves in <i>scope</i> but not in <i>excludedNodes</i> 765 * if scope starts with ~, return the number of nodes that are not 766 * in <i>scope</i> and <i>excludedNodes</i>; 767 * @param scope a path string that may start with ~ 768 * @param excludedNodes a list of nodes 769 * @return number of available nodes 770 */ 771 public int countNumOfAvailableNodes(String scope, 772 Collection<Node> excludedNodes) { 773 boolean isExcluded=false; 774 if (scope.startsWith("~")) { 775 isExcluded=true; 776 scope=scope.substring(1); 777 } 778 scope = NodeBase.normalize(scope); 779 int count=0; // the number of nodes in both scope & excludedNodes 780 netlock.readLock().lock(); 781 try { 782 for(Node node:excludedNodes) { 783 if ((NodeBase.getPath(node)+NodeBase.PATH_SEPARATOR_STR). 784 startsWith(scope+NodeBase.PATH_SEPARATOR_STR)) { 785 count++; 786 } 787 } 788 Node n=getNode(scope); 789 int scopeNodeCount=1; 790 if (n instanceof InnerNode) { 791 scopeNodeCount=((InnerNode)n).getNumOfLeaves(); 792 } 793 if (isExcluded) { 794 return clusterMap.getNumOfLeaves()- 795 scopeNodeCount-excludedNodes.size()+count; 796 } else { 797 return scopeNodeCount-count; 798 } 799 } finally { 800 netlock.readLock().unlock(); 801 } 802 } 803 804 /** convert a network tree to a string */ 805 @Override 806 public String toString() { 807 // print the number of racks 808 StringBuilder tree = new StringBuilder(); 809 tree.append("Number of racks: "); 810 tree.append(numOfRacks); 811 tree.append("\n"); 812 // print the number of leaves 813 int numOfLeaves = getNumOfLeaves(); 814 tree.append("Expected number of leaves:"); 815 tree.append(numOfLeaves); 816 tree.append("\n"); 817 // print nodes 818 for(int i=0; i<numOfLeaves; i++) { 819 tree.append(NodeBase.getPath(clusterMap.getLeaf(i, null))); 820 tree.append("\n"); 821 } 822 return tree.toString(); 823 } 824 825 /** 826 * Divide networklocation string into two parts by last separator, and get 827 * the first part here. 828 * 829 * @param networkLocation 830 * @return 831 */ 832 public static String getFirstHalf(String networkLocation) { 833 int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR); 834 return networkLocation.substring(0, index); 835 } 836 837 /** 838 * Divide networklocation string into two parts by last separator, and get 839 * the second part here. 840 * 841 * @param networkLocation 842 * @return 843 */ 844 public static String getLastHalf(String networkLocation) { 845 int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR); 846 return networkLocation.substring(index); 847 } 848 849 /** 850 * Returns an integer weight which specifies how far away {node} is away from 851 * {reader}. A lower value signifies that a node is closer. 852 * 853 * @param reader Node where data will be read 854 * @param node Replica of data 855 * @return weight 856 */ 857 protected int getWeight(Node reader, Node node) { 858 // 0 is local, 1 is same rack, 2 is off rack 859 // Start off by initializing to off rack 860 int weight = 2; 861 if (reader != null) { 862 if (reader == node) { 863 weight = 0; 864 } else if (isOnSameRack(reader, node)) { 865 weight = 1; 866 } 867 } 868 return weight; 869 } 870 871 /** 872 * Sort nodes array by network distance to <i>reader</i>. 873 * <p/> 874 * In a three-level topology, a node can be either local, on the same rack, or 875 * on a different rack from the reader. Sorting the nodes based on network 876 * distance from the reader reduces network traffic and improves performance. 877 * <p/> 878 * As an additional twist, we also randomize the nodes at each network 879 * distance using the provided random seed. This helps with load balancing 880 * when there is data skew. 881 * 882 * @param reader Node where data will be read 883 * @param nodes Available replicas with the requested data 884 * @param seed Used to seed the pseudo-random generator that randomizes the 885 * set of nodes at each network distance. 886 */ 887 public void sortByDistance(Node reader, Node[] nodes, 888 int activeLen, long seed) { 889 /** Sort weights for the nodes array */ 890 int[] weights = new int[activeLen]; 891 for (int i=0; i<activeLen; i++) { 892 weights[i] = getWeight(reader, nodes[i]); 893 } 894 // Add weight/node pairs to a TreeMap to sort 895 TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>(); 896 for (int i=0; i<activeLen; i++) { 897 int weight = weights[i]; 898 Node node = nodes[i]; 899 List<Node> list = tree.get(weight); 900 if (list == null) { 901 list = Lists.newArrayListWithExpectedSize(1); 902 tree.put(weight, list); 903 } 904 list.add(node); 905 } 906 907 // Seed is normally the block id 908 // This means we use the same pseudo-random order for each block, for 909 // potentially better page cache usage. 910 Random rand = getRandom(); 911 rand.setSeed(seed); 912 int idx = 0; 913 for (List<Node> list: tree.values()) { 914 if (list != null) { 915 Collections.shuffle(list, rand); 916 for (Node n: list) { 917 nodes[idx] = n; 918 idx++; 919 } 920 } 921 } 922 Preconditions.checkState(idx == activeLen, 923 "Sorted the wrong number of nodes!"); 924 } 925}