001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.net;
019    
020    import java.util.ArrayList;
021    import java.util.List;
022    import java.util.Collection;
023    import java.util.List;
024    import java.util.Random;
025    import java.util.concurrent.locks.ReadWriteLock;
026    import java.util.concurrent.locks.ReentrantReadWriteLock;
027    
028    import org.apache.commons.logging.Log;
029    import org.apache.commons.logging.LogFactory;
030    import org.apache.hadoop.classification.InterfaceAudience;
031    import org.apache.hadoop.classification.InterfaceStability;
032    import org.apache.hadoop.conf.Configuration;
033    import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
034    import org.apache.hadoop.util.ReflectionUtils;
035    
036    /** The class represents a cluster of computer with a tree hierarchical
037     * network topology.
038     * For example, a cluster may be consists of many data centers filled 
039     * with racks of computers.
040     * In a network topology, leaves represent data nodes (computers) and inner
041     * nodes represent switches/routers that manage traffic in/out of data centers
042     * or racks.  
043     * 
044     */
045    @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
046    @InterfaceStability.Unstable
047    public class NetworkTopology {
048      public final static String DEFAULT_RACK = "/default-rack";
049      public final static int DEFAULT_HOST_LEVEL = 2;
050      public static final Log LOG = 
051        LogFactory.getLog(NetworkTopology.class);
052        
053      public static class InvalidTopologyException extends RuntimeException {
054        private static final long serialVersionUID = 1L;
055        public InvalidTopologyException(String msg) {
056          super(msg);
057        }
058      }
059      
060      /**
061       * Get an instance of NetworkTopology based on the value of the configuration
062       * parameter net.topology.impl.
063       * 
064       * @param conf the configuration to be used
065       * @return an instance of NetworkTopology
066       */
067      public static NetworkTopology getInstance(Configuration conf){
068        return ReflectionUtils.newInstance(
069            conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY,
070            NetworkTopology.class, NetworkTopology.class), conf);
071      }
072    
073      /** InnerNode represents a switch/router of a data center or rack.
074       * Different from a leaf node, it has non-null children.
075       */
076      static class InnerNode extends NodeBase {
077        protected List<Node> children=new ArrayList<Node>();
078        private int numOfLeaves;
079            
080        /** Construct an InnerNode from a path-like string */
081        InnerNode(String path) {
082          super(path);
083        }
084            
085        /** Construct an InnerNode from its name and its network location */
086        InnerNode(String name, String location) {
087          super(name, location);
088        }
089            
090        /** Construct an InnerNode
091         * from its name, its network location, its parent, and its level */
092        InnerNode(String name, String location, InnerNode parent, int level) {
093          super(name, location, parent, level);
094        }
095            
096        /** @return its children */
097        List<Node> getChildren() {return children;}
098            
099        /** @return the number of children this node has */
100        int getNumOfChildren() {
101          return children.size();
102        }
103            
104        /** Judge if this node represents a rack 
105         * @return true if it has no child or its children are not InnerNodes
106         */ 
107        boolean isRack() {
108          if (children.isEmpty()) {
109            return true;
110          }
111                
112          Node firstChild = children.get(0);
113          if (firstChild instanceof InnerNode) {
114            return false;
115          }
116                
117          return true;
118        }
119            
120        /** Judge if this node is an ancestor of node <i>n</i>
121         * 
122         * @param n a node
123         * @return true if this node is an ancestor of <i>n</i>
124         */
125        boolean isAncestor(Node n) {
126          return getPath(this).equals(NodeBase.PATH_SEPARATOR_STR) ||
127            (n.getNetworkLocation()+NodeBase.PATH_SEPARATOR_STR).
128            startsWith(getPath(this)+NodeBase.PATH_SEPARATOR_STR);
129        }
130            
131        /** Judge if this node is the parent of node <i>n</i>
132         * 
133         * @param n a node
134         * @return true if this node is the parent of <i>n</i>
135         */
136        boolean isParent(Node n) {
137          return n.getNetworkLocation().equals(getPath(this));
138        }
139            
140        /* Return a child name of this node who is an ancestor of node <i>n</i> */
141        private String getNextAncestorName(Node n) {
142          if (!isAncestor(n)) {
143            throw new IllegalArgumentException(
144                                               this + "is not an ancestor of " + n);
145          }
146          String name = n.getNetworkLocation().substring(getPath(this).length());
147          if (name.charAt(0) == PATH_SEPARATOR) {
148            name = name.substring(1);
149          }
150          int index=name.indexOf(PATH_SEPARATOR);
151          if (index !=-1)
152            name = name.substring(0, index);
153          return name;
154        }
155            
156        /** Add node <i>n</i> to the subtree of this node 
157         * @param n node to be added
158         * @return true if the node is added; false otherwise
159         */
160        boolean add(Node n) {
161          if (!isAncestor(n))
162            throw new IllegalArgumentException(n.getName()+", which is located at "
163                    +n.getNetworkLocation()+", is not a decendent of "
164                    +getPath(this));
165          if (isParent(n)) {
166            // this node is the parent of n; add n directly
167            n.setParent(this);
168            n.setLevel(this.level+1);
169            for(int i=0; i<children.size(); i++) {
170              if (children.get(i).getName().equals(n.getName())) {
171                children.set(i, n);
172                return false;
173              }
174            }
175            children.add(n);
176            numOfLeaves++;
177            return true;
178          } else {
179            // find the next ancestor node
180            String parentName = getNextAncestorName(n);
181            InnerNode parentNode = null;
182            for(int i=0; i<children.size(); i++) {
183              if (children.get(i).getName().equals(parentName)) {
184                parentNode = (InnerNode)children.get(i);
185                break;
186              }
187            }
188            if (parentNode == null) {
189              // create a new InnerNode
190              parentNode = createParentNode(parentName);
191              children.add(parentNode);
192            }
193            // add n to the subtree of the next ancestor node
194            if (parentNode.add(n)) {
195              numOfLeaves++;
196              return true;
197            } else {
198              return false;
199            }
200          }
201        }
202    
203        /**
204         * Creates a parent node to be added to the list of children.  
205         * Creates a node using the InnerNode four argument constructor specifying 
206         * the name, location, parent, and level of this node.
207         * 
208         * <p>To be overridden in subclasses for specific InnerNode implementations,
209         * as alternative to overriding the full {@link #add(Node)} method.
210         * 
211         * @param parentName The name of the parent node
212         * @return A new inner node
213         * @see InnerNode#InnerNode(String, String, InnerNode, int)
214         */
215        protected InnerNode createParentNode(String parentName) {
216          return new InnerNode(parentName, getPath(this), this, this.getLevel()+1);
217        }
218    
219        /** Remove node <i>n</i> from the subtree of this node
220         * @param n node to be deleted 
221         * @return true if the node is deleted; false otherwise
222         */
223        boolean remove(Node n) {
224          String parent = n.getNetworkLocation();
225          String currentPath = getPath(this);
226          if (!isAncestor(n))
227            throw new IllegalArgumentException(n.getName()
228                                               +", which is located at "
229                                               +parent+", is not a descendent of "+currentPath);
230          if (isParent(n)) {
231            // this node is the parent of n; remove n directly
232            for(int i=0; i<children.size(); i++) {
233              if (children.get(i).getName().equals(n.getName())) {
234                children.remove(i);
235                numOfLeaves--;
236                n.setParent(null);
237                return true;
238              }
239            }
240            return false;
241          } else {
242            // find the next ancestor node: the parent node
243            String parentName = getNextAncestorName(n);
244            InnerNode parentNode = null;
245            int i;
246            for(i=0; i<children.size(); i++) {
247              if (children.get(i).getName().equals(parentName)) {
248                parentNode = (InnerNode)children.get(i);
249                break;
250              }
251            }
252            if (parentNode==null) {
253              return false;
254            }
255            // remove n from the parent node
256            boolean isRemoved = parentNode.remove(n);
257            // if the parent node has no children, remove the parent node too
258            if (isRemoved) {
259              if (parentNode.getNumOfChildren() == 0) {
260                children.remove(i);
261              }
262              numOfLeaves--;
263            }
264            return isRemoved;
265          }
266        } // end of remove
267            
268        /** Given a node's string representation, return a reference to the node
269         * @param loc string location of the form /rack/node
270         * @return null if the node is not found or the childnode is there but
271         * not an instance of {@link InnerNode}
272         */
273        private Node getLoc(String loc) {
274          if (loc == null || loc.length() == 0) return this;
275                
276          String[] path = loc.split(PATH_SEPARATOR_STR, 2);
277          Node childnode = null;
278          for(int i=0; i<children.size(); i++) {
279            if (children.get(i).getName().equals(path[0])) {
280              childnode = children.get(i);
281            }
282          }
283          if (childnode == null) return null; // non-existing node
284          if (path.length == 1) return childnode;
285          if (childnode instanceof InnerNode) {
286            return ((InnerNode)childnode).getLoc(path[1]);
287          } else {
288            return null;
289          }
290        }
291            
292        /** get <i>leafIndex</i> leaf of this subtree 
293         * if it is not in the <i>excludedNode</i>
294         *
295         * @param leafIndex an indexed leaf of the node
296         * @param excludedNode an excluded node (can be null)
297         * @return
298         */
299        Node getLeaf(int leafIndex, Node excludedNode) {
300          int count=0;
301          // check if the excluded node a leaf
302          boolean isLeaf =
303            excludedNode == null || !(excludedNode instanceof InnerNode);
304          // calculate the total number of excluded leaf nodes
305          int numOfExcludedLeaves =
306            isLeaf ? 1 : ((InnerNode)excludedNode).getNumOfLeaves();
307          if (isLeafParent()) { // children are leaves
308            if (isLeaf) { // excluded node is a leaf node
309              int excludedIndex = children.indexOf(excludedNode);
310              if (excludedIndex != -1 && leafIndex >= 0) {
311                // excluded node is one of the children so adjust the leaf index
312                leafIndex = leafIndex>=excludedIndex ? leafIndex+1 : leafIndex;
313              }
314            }
315            // range check
316            if (leafIndex<0 || leafIndex>=this.getNumOfChildren()) {
317              return null;
318            }
319            return children.get(leafIndex);
320          } else {
321            for(int i=0; i<children.size(); i++) {
322              InnerNode child = (InnerNode)children.get(i);
323              if (excludedNode == null || excludedNode != child) {
324                // not the excludedNode
325                int numOfLeaves = child.getNumOfLeaves();
326                if (excludedNode != null && child.isAncestor(excludedNode)) {
327                  numOfLeaves -= numOfExcludedLeaves;
328                }
329                if (count+numOfLeaves > leafIndex) {
330                  // the leaf is in the child subtree
331                  return child.getLeaf(leafIndex-count, excludedNode);
332                } else {
333                  // go to the next child
334                  count = count+numOfLeaves;
335                }
336              } else { // it is the excluededNode
337                // skip it and set the excludedNode to be null
338                excludedNode = null;
339              }
340            }
341            return null;
342          }
343        }
344        
345        protected boolean isLeafParent() {
346          return isRack();
347        }
348    
349        /**
350          * Determine if children a leaves, default implementation calls {@link #isRack()}
351          * <p>To be overridden in subclasses for specific InnerNode implementations,
352          * as alternative to overriding the full {@link #getLeaf(int, Node)} method.
353          * 
354          * @return true if children are leaves, false otherwise
355          */
356        protected boolean areChildrenLeaves() {
357          return isRack();
358        }
359    
360        /**
361         * Get number of leaves.
362         */
363        int getNumOfLeaves() {
364          return numOfLeaves;
365        }
366      } // end of InnerNode
367    
368      /**
369       * the root cluster map
370       */
371      InnerNode clusterMap;
372      /** Depth of all leaf nodes */
373      private int depthOfAllLeaves = -1;
374      /** rack counter */
375      protected int numOfRacks = 0;
376      /** the lock used to manage access */
377      protected ReadWriteLock netlock = new ReentrantReadWriteLock();
378    
379      public NetworkTopology() {
380        clusterMap = new InnerNode(InnerNode.ROOT);
381      }
382    
383      /** Add a leaf node
384       * Update node counter & rack counter if necessary
385       * @param node node to be added; can be null
386       * @exception IllegalArgumentException if add a node to a leave 
387                                             or node to be added is not a leaf
388       */
389      public void add(Node node) {
390        if (node==null) return;
391        String oldTopoStr = this.toString();
392        if( node instanceof InnerNode ) {
393          throw new IllegalArgumentException(
394            "Not allow to add an inner node: "+NodeBase.getPath(node));
395        }
396        int newDepth = NodeBase.locationToDepth(node.getNetworkLocation()) + 1;
397        netlock.writeLock().lock();
398        try {
399          if ((depthOfAllLeaves != -1) && (depthOfAllLeaves != newDepth)) {
400            LOG.error("Error: can't add leaf node " + NodeBase.getPath(node) +
401                " at depth " + newDepth + " to topology:\n" + oldTopoStr);
402            throw new InvalidTopologyException("Failed to add " + NodeBase.getPath(node) +
403                ": You cannot have a rack and a non-rack node at the same " +
404                "level of the network topology.");
405          }
406          Node rack = getNodeForNetworkLocation(node);
407          if (rack != null && !(rack instanceof InnerNode)) {
408            throw new IllegalArgumentException("Unexpected data node " 
409                                               + node.toString() 
410                                               + " at an illegal network location");
411          }
412          if (clusterMap.add(node)) {
413            LOG.info("Adding a new node: "+NodeBase.getPath(node));
414            if (rack == null) {
415              numOfRacks++;
416            }
417            if (!(node instanceof InnerNode)) {
418              if (depthOfAllLeaves == -1) {
419                depthOfAllLeaves = node.getLevel();
420              }
421            }
422          }
423          if(LOG.isDebugEnabled()) {
424            LOG.debug("NetworkTopology became:\n" + this.toString());
425          }
426        } finally {
427          netlock.writeLock().unlock();
428        }
429      }
430      
431      /**
432       * Return a reference to the node given its string representation.
433       * Default implementation delegates to {@link #getNode(String)}.
434       * 
435       * <p>To be overridden in subclasses for specific NetworkTopology 
436       * implementations, as alternative to overriding the full {@link #add(Node)}
437       *  method.
438       * 
439       * @param node The string representation of this node's network location is
440       * used to retrieve a Node object. 
441       * @return a reference to the node; null if the node is not in the tree
442       * 
443       * @see #add(Node)
444       * @see #getNode(String)
445       */
446      protected Node getNodeForNetworkLocation(Node node) {
447        return getNode(node.getNetworkLocation());
448      }
449      
450      /**
451       * Given a string representation of a rack, return its children
452       * @param loc a path-like string representation of a rack
453       * @return a newly allocated list with all the node's children
454       */
455      public List<Node> getDatanodesInRack(String loc) {
456        netlock.readLock().lock();
457        try {
458          loc = NodeBase.normalize(loc);
459          if (!NodeBase.ROOT.equals(loc)) {
460            loc = loc.substring(1);
461          }
462          InnerNode rack = (InnerNode) clusterMap.getLoc(loc);
463          if (rack == null) {
464            return null;
465          }
466          return new ArrayList<Node>(rack.getChildren());
467        } finally {
468          netlock.readLock().unlock();
469        }
470      }
471    
472      /** Remove a node
473       * Update node counter and rack counter if necessary
474       * @param node node to be removed; can be null
475       */ 
476      public void remove(Node node) {
477        if (node==null) return;
478        if( node instanceof InnerNode ) {
479          throw new IllegalArgumentException(
480            "Not allow to remove an inner node: "+NodeBase.getPath(node));
481        }
482        LOG.info("Removing a node: "+NodeBase.getPath(node));
483        netlock.writeLock().lock();
484        try {
485          if (clusterMap.remove(node)) {
486            InnerNode rack = (InnerNode)getNode(node.getNetworkLocation());
487            if (rack == null) {
488              numOfRacks--;
489            }
490          }
491          if(LOG.isDebugEnabled()) {
492            LOG.debug("NetworkTopology became:\n" + this.toString());
493          }
494        } finally {
495          netlock.writeLock().unlock();
496        }
497      }
498    
499      /** Check if the tree contains node <i>node</i>
500       * 
501       * @param node a node
502       * @return true if <i>node</i> is already in the tree; false otherwise
503       */
504      public boolean contains(Node node) {
505        if (node == null) return false;
506        netlock.readLock().lock();
507        try {
508          Node parent = node.getParent();
509          for (int level = node.getLevel(); parent != null && level > 0;
510               parent = parent.getParent(), level--) {
511            if (parent == clusterMap) {
512              return true;
513            }
514          }
515        } finally {
516          netlock.readLock().unlock();
517        }
518        return false; 
519      }
520        
521      /** Given a string representation of a node, return its reference
522       * 
523       * @param loc
524       *          a path-like string representation of a node
525       * @return a reference to the node; null if the node is not in the tree
526       */
527      public Node getNode(String loc) {
528        netlock.readLock().lock();
529        try {
530          loc = NodeBase.normalize(loc);
531          if (!NodeBase.ROOT.equals(loc))
532            loc = loc.substring(1);
533          return clusterMap.getLoc(loc);
534        } finally {
535          netlock.readLock().unlock();
536        }
537      }
538      
539      /** Given a string representation of a rack for a specific network
540       *  location
541       * 
542       * To be overridden in subclasses for specific NetworkTopology 
543       * implementations, as alternative to overriding the full 
544       * {@link #getRack(String)} method.
545       * @param loc
546       *          a path-like string representation of a network location
547       * @return a rack string
548       */
549      public String getRack(String loc) {
550        return loc;
551      }
552      
553      /** @return the total number of racks */
554      public int getNumOfRacks() {
555        netlock.readLock().lock();
556        try {
557          return numOfRacks;
558        } finally {
559          netlock.readLock().unlock();
560        }
561      }
562    
563      /** @return the total number of leaf nodes */
564      public int getNumOfLeaves() {
565        netlock.readLock().lock();
566        try {
567          return clusterMap.getNumOfLeaves();
568        } finally {
569          netlock.readLock().unlock();
570        }
571      }
572    
573      /** Return the distance between two nodes
574       * It is assumed that the distance from one node to its parent is 1
575       * The distance between two nodes is calculated by summing up their distances
576       * to their closest common ancestor.
577       * @param node1 one node
578       * @param node2 another node
579       * @return the distance between node1 and node2 which is zero if they are the same
580       *  or {@link Integer#MAX_VALUE} if node1 or node2 do not belong to the cluster
581       */
582      public int getDistance(Node node1, Node node2) {
583        if (node1 == node2) {
584          return 0;
585        }
586        Node n1=node1, n2=node2;
587        int dis = 0;
588        netlock.readLock().lock();
589        try {
590          int level1=node1.getLevel(), level2=node2.getLevel();
591          while(n1!=null && level1>level2) {
592            n1 = n1.getParent();
593            level1--;
594            dis++;
595          }
596          while(n2!=null && level2>level1) {
597            n2 = n2.getParent();
598            level2--;
599            dis++;
600          }
601          while(n1!=null && n2!=null && n1.getParent()!=n2.getParent()) {
602            n1=n1.getParent();
603            n2=n2.getParent();
604            dis+=2;
605          }
606        } finally {
607          netlock.readLock().unlock();
608        }
609        if (n1==null) {
610          LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node1));
611          return Integer.MAX_VALUE;
612        }
613        if (n2==null) {
614          LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node2));
615          return Integer.MAX_VALUE;
616        }
617        return dis+2;
618      }
619    
620      /** Check if two nodes are on the same rack
621       * @param node1 one node (can be null)
622       * @param node2 another node (can be null)
623       * @return true if node1 and node2 are on the same rack; false otherwise
624       * @exception IllegalArgumentException when either node1 or node2 is null, or
625       * node1 or node2 do not belong to the cluster
626       */
627      public boolean isOnSameRack( Node node1,  Node node2) {
628        if (node1 == null || node2 == null) {
629          return false;
630        }
631          
632        netlock.readLock().lock();
633        try {
634          return isSameParents(node1, node2);
635        } finally {
636          netlock.readLock().unlock();
637        }
638      }
639      
640      /**
641       * Check if network topology is aware of NodeGroup
642       */
643      public boolean isNodeGroupAware() {
644        return false;
645      }
646      
647      /** 
648       * Return false directly as not aware of NodeGroup, to be override in sub-class
649       */
650      public boolean isOnSameNodeGroup(Node node1, Node node2) {
651        return false;
652      }
653    
654      /**
655       * Compare the parents of each node for equality
656       * 
657       * <p>To be overridden in subclasses for specific NetworkTopology 
658       * implementations, as alternative to overriding the full 
659       * {@link #isOnSameRack(Node, Node)} method.
660       * 
661       * @param node1 the first node to compare
662       * @param node2 the second node to compare
663       * @return true if their parents are equal, false otherwise
664       * 
665       * @see #isOnSameRack(Node, Node)
666       */
667      protected boolean isSameParents(Node node1, Node node2) {
668        return node1.getParent()==node2.getParent();
669      }
670    
671      final protected static Random r = new Random();
672      /** randomly choose one node from <i>scope</i>
673       * if scope starts with ~, choose one from the all nodes except for the
674       * ones in <i>scope</i>; otherwise, choose one from <i>scope</i>
675       * @param scope range of nodes from which a node will be chosen
676       * @return the chosen node
677       */
678      public Node chooseRandom(String scope) {
679        netlock.readLock().lock();
680        try {
681          if (scope.startsWith("~")) {
682            return chooseRandom(NodeBase.ROOT, scope.substring(1));
683          } else {
684            return chooseRandom(scope, null);
685          }
686        } finally {
687          netlock.readLock().unlock();
688        }
689      }
690    
691      private Node chooseRandom(String scope, String excludedScope){
692        if (excludedScope != null) {
693          if (scope.startsWith(excludedScope)) {
694            return null;
695          }
696          if (!excludedScope.startsWith(scope)) {
697            excludedScope = null;
698          }
699        }
700        Node node = getNode(scope);
701        if (!(node instanceof InnerNode)) {
702          return node;
703        }
704        InnerNode innerNode = (InnerNode)node;
705        int numOfDatanodes = innerNode.getNumOfLeaves();
706        if (excludedScope == null) {
707          node = null;
708        } else {
709          node = getNode(excludedScope);
710          if (!(node instanceof InnerNode)) {
711            numOfDatanodes -= 1;
712          } else {
713            numOfDatanodes -= ((InnerNode)node).getNumOfLeaves();
714          }
715        }
716        int leaveIndex = r.nextInt(numOfDatanodes);
717        return innerNode.getLeaf(leaveIndex, node);
718      }
719    
720      /** return leaves in <i>scope</i>
721       * @param scope a path string
722       * @return leaves nodes under specific scope
723       */
724      public List<Node> getLeaves(String scope) {
725        Node node = getNode(scope);
726        List<Node> leafNodes = new ArrayList<Node>();
727        if (!(node instanceof InnerNode)) {
728          leafNodes.add(node);
729        } else {
730          InnerNode innerNode = (InnerNode) node;
731          for (int i=0;i<innerNode.getNumOfLeaves();i++) {
732            leafNodes.add(innerNode.getLeaf(i, null));
733          }
734        }
735        return leafNodes;
736      }
737    
738      /** return the number of leaves in <i>scope</i> but not in <i>excludedNodes</i>
739       * if scope starts with ~, return the number of nodes that are not
740       * in <i>scope</i> and <i>excludedNodes</i>; 
741       * @param scope a path string that may start with ~
742       * @param excludedNodes a list of nodes
743       * @return number of available nodes
744       */
745      public int countNumOfAvailableNodes(String scope,
746                                          Collection<Node> excludedNodes) {
747        boolean isExcluded=false;
748        if (scope.startsWith("~")) {
749          isExcluded=true;
750          scope=scope.substring(1);
751        }
752        scope = NodeBase.normalize(scope);
753        int count=0; // the number of nodes in both scope & excludedNodes
754        netlock.readLock().lock();
755        try {
756          for(Node node:excludedNodes) {
757            if ((NodeBase.getPath(node)+NodeBase.PATH_SEPARATOR_STR).
758                startsWith(scope+NodeBase.PATH_SEPARATOR_STR)) {
759              count++;
760            }
761          }
762          Node n=getNode(scope);
763          int scopeNodeCount=1;
764          if (n instanceof InnerNode) {
765            scopeNodeCount=((InnerNode)n).getNumOfLeaves();
766          }
767          if (isExcluded) {
768            return clusterMap.getNumOfLeaves()-
769              scopeNodeCount-excludedNodes.size()+count;
770          } else {
771            return scopeNodeCount-count;
772          }
773        } finally {
774          netlock.readLock().unlock();
775        }
776      }
777    
778      /** convert a network tree to a string */
779      @Override
780      public String toString() {
781        // print the number of racks
782        StringBuilder tree = new StringBuilder();
783        tree.append("Number of racks: ");
784        tree.append(numOfRacks);
785        tree.append("\n");
786        // print the number of leaves
787        int numOfLeaves = getNumOfLeaves();
788        tree.append("Expected number of leaves:");
789        tree.append(numOfLeaves);
790        tree.append("\n");
791        // print nodes
792        for(int i=0; i<numOfLeaves; i++) {
793          tree.append(NodeBase.getPath(clusterMap.getLeaf(i, null)));
794          tree.append("\n");
795        }
796        return tree.toString();
797      }
798      
799      /**
800       * Divide networklocation string into two parts by last separator, and get 
801       * the first part here.
802       * 
803       * @param networkLocation
804       * @return
805       */
806      public static String getFirstHalf(String networkLocation) {
807        int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR);
808        return networkLocation.substring(0, index);
809      }
810    
811      /**
812       * Divide networklocation string into two parts by last separator, and get 
813       * the second part here.
814       * 
815       * @param networkLocation
816       * @return
817       */
818      public static String getLastHalf(String networkLocation) {
819        int index = networkLocation.lastIndexOf(NodeBase.PATH_SEPARATOR_STR);
820        return networkLocation.substring(index);
821      }
822    
823      /** swap two array items */
824      static protected void swap(Node[] nodes, int i, int j) {
825        Node tempNode;
826        tempNode = nodes[j];
827        nodes[j] = nodes[i];
828        nodes[i] = tempNode;
829      }
830      
831      /** Sort nodes array by their distances to <i>reader</i>
832       * It linearly scans the array, if a local node is found, swap it with
833       * the first element of the array.
834       * If a local rack node is found, swap it with the first element following
835       * the local node.
836       * If neither local node or local rack node is found, put a random replica
837       * location at position 0.
838       * It leaves the rest nodes untouched.
839       * @param reader the node that wishes to read a block from one of the nodes
840       * @param nodes the list of nodes containing data for the reader
841       */
842      public void pseudoSortByDistance( Node reader, Node[] nodes ) {
843        int tempIndex = 0;
844        int localRackNode = -1;
845        if (reader != null ) {
846          //scan the array to find the local node & local rack node
847          for(int i=0; i<nodes.length; i++) {
848            if(tempIndex == 0 && reader == nodes[i]) { //local node
849              //swap the local node and the node at position 0
850              if( i != 0 ) {
851                swap(nodes, tempIndex, i);
852              }
853              tempIndex=1;
854              if(localRackNode != -1 ) {
855                if(localRackNode == 0) {
856                  localRackNode = i;
857                }
858                break;
859              }
860            } else if(localRackNode == -1 && isOnSameRack(reader, nodes[i])) {
861              //local rack
862              localRackNode = i;
863              if(tempIndex != 0 ) break;
864            }
865          }
866    
867          // swap the local rack node and the node at position tempIndex
868          if(localRackNode != -1 && localRackNode != tempIndex ) {
869            swap(nodes, tempIndex, localRackNode);
870            tempIndex++;
871          }
872        }
873        
874        // put a random node at position 0 if it is not a local/local-rack node
875        if(tempIndex == 0 && localRackNode == -1 && nodes.length != 0) {
876          swap(nodes, 0, r.nextInt(nodes.length));
877        }
878      }
879      
880    }