001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.util;
019    
020    import java.io.PrintStream;
021    import java.util.ConcurrentModificationException;
022    import java.util.Iterator;
023    
024    import org.apache.hadoop.HadoopIllegalArgumentException;
025    import org.apache.hadoop.classification.InterfaceAudience;
026    import org.apache.hadoop.util.StringUtils;
027    
028    import com.google.common.annotations.VisibleForTesting;
029    
030    /**
031     * A low memory footprint {@link GSet} implementation,
032     * which uses an array for storing the elements
033     * and linked lists for collision resolution.
034     *
035     * No rehash will be performed.
036     * Therefore, the internal array will never be resized.
037     *
038     * This class does not support null element.
039     *
040     * This class is not thread safe.
041     *
042     * @param <K> Key type for looking up the elements
043     * @param <E> Element type, which must be
044     *       (1) a subclass of K, and
045     *       (2) implementing {@link LinkedElement} interface.
046     */
047    @InterfaceAudience.Private
048    public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
049      /**
050       * Elements of {@link LightWeightGSet}.
051       */
052      public static interface LinkedElement {
053        /** Set the next element. */
054        public void setNext(LinkedElement next);
055    
056        /** Get the next element. */
057        public LinkedElement getNext();
058      }
059    
060      static final int MAX_ARRAY_LENGTH = 1 << 30; //prevent int overflow problem
061      static final int MIN_ARRAY_LENGTH = 1;
062    
063      /**
064       * An internal array of entries, which are the rows of the hash table.
065       * The size must be a power of two.
066       */
067      private final LinkedElement[] entries;
068      /** A mask for computing the array index from the hash value of an element. */
069      private final int hash_mask;
070      /** The size of the set (not the entry array). */
071      private int size = 0;
072      /** Modification version for fail-fast.
073       * @see ConcurrentModificationException
074       */
075      private int modification = 0;
076    
077      /**
078       * @param recommended_length Recommended size of the internal array.
079       */
080      public LightWeightGSet(final int recommended_length) {
081        final int actual = actualArrayLength(recommended_length);
082        if (LOG.isDebugEnabled()) {
083          LOG.debug("recommended=" + recommended_length + ", actual=" + actual);
084        }
085        entries = new LinkedElement[actual];
086        hash_mask = entries.length - 1;
087      }
088    
089      //compute actual length
090      private static int actualArrayLength(int recommended) {
091        if (recommended > MAX_ARRAY_LENGTH) {
092          return MAX_ARRAY_LENGTH;
093        } else if (recommended < MIN_ARRAY_LENGTH) {
094          return MIN_ARRAY_LENGTH;
095        } else {
096          final int a = Integer.highestOneBit(recommended);
097          return a == recommended? a: a << 1;
098        }
099      }
100    
101      @Override
102      public int size() {
103        return size;
104      }
105    
106      private int getIndex(final K key) {
107        return key.hashCode() & hash_mask;
108      }
109    
110      private E convert(final LinkedElement e){
111        @SuppressWarnings("unchecked")
112        final E r = (E)e;
113        return r;
114      }
115    
116      @Override
117      public E get(final K key) {
118        //validate key
119        if (key == null) {
120          throw new NullPointerException("key == null");
121        }
122    
123        //find element
124        final int index = getIndex(key);
125        for(LinkedElement e = entries[index]; e != null; e = e.getNext()) {
126          if (e.equals(key)) {
127            return convert(e);
128          }
129        }
130        //element not found
131        return null;
132      }
133    
134      @Override
135      public boolean contains(final K key) {
136        return get(key) != null;
137      }
138    
139      @Override
140      public E put(final E element) {
141        //validate element
142        if (element == null) {
143          throw new NullPointerException("Null element is not supported.");
144        }
145        if (!(element instanceof LinkedElement)) {
146          throw new HadoopIllegalArgumentException(
147              "!(element instanceof LinkedElement), element.getClass()="
148              + element.getClass());
149        }
150        final LinkedElement e = (LinkedElement)element;
151    
152        //find index
153        final int index = getIndex(element);
154    
155        //remove if it already exists
156        final E existing = remove(index, element);
157    
158        //insert the element to the head of the linked list
159        modification++;
160        size++;
161        e.setNext(entries[index]);
162        entries[index] = e;
163    
164        return existing;
165      }
166    
167      /**
168       * Remove the element corresponding to the key,
169       * given key.hashCode() == index.
170       *
171       * @return If such element exists, return it.
172       *         Otherwise, return null.
173       */
174      private E remove(final int index, final K key) {
175        if (entries[index] == null) {
176          return null;
177        } else if (entries[index].equals(key)) {
178          //remove the head of the linked list
179          modification++;
180          size--;
181          final LinkedElement e = entries[index];
182          entries[index] = e.getNext();
183          e.setNext(null);
184          return convert(e);
185        } else {
186          //head != null and key is not equal to head
187          //search the element
188          LinkedElement prev = entries[index];
189          for(LinkedElement curr = prev.getNext(); curr != null; ) {
190            if (curr.equals(key)) {
191              //found the element, remove it
192              modification++;
193              size--;
194              prev.setNext(curr.getNext());
195              curr.setNext(null);
196              return convert(curr);
197            } else {
198              prev = curr;
199              curr = curr.getNext();
200            }
201          }
202          //element not found
203          return null;
204        }
205      }
206    
207      @Override
208      public E remove(final K key) {
209        //validate key
210        if (key == null) {
211          throw new NullPointerException("key == null");
212        }
213        return remove(getIndex(key), key);
214      }
215    
216      @Override
217      public Iterator<E> iterator() {
218        return new SetIterator();
219      }
220    
221      @Override
222      public String toString() {
223        final StringBuilder b = new StringBuilder(getClass().getSimpleName());
224        b.append("(size=").append(size)
225         .append(String.format(", %08x", hash_mask))
226         .append(", modification=").append(modification)
227         .append(", entries.length=").append(entries.length)
228         .append(")");
229        return b.toString();
230      }
231    
232      /** Print detailed information of this object. */
233      public void printDetails(final PrintStream out) {
234        out.print(this + ", entries = [");
235        for(int i = 0; i < entries.length; i++) {
236          if (entries[i] != null) {
237            LinkedElement e = entries[i];
238            out.print("\n  " + i + ": " + e);
239            for(e = e.getNext(); e != null; e = e.getNext()) {
240              out.print(" -> " + e);
241            }
242          }
243        }
244        out.println("\n]");
245      }
246    
247      private class SetIterator implements Iterator<E> {
248        /** The starting modification for fail-fast. */
249        private final int startModification = modification;
250        /** The current index of the entry array. */
251        private int index = -1;
252        /** The next element to return. */
253        private LinkedElement next = nextNonemptyEntry();
254    
255        /** Find the next nonempty entry starting at (index + 1). */
256        private LinkedElement nextNonemptyEntry() {
257          for(index++; index < entries.length && entries[index] == null; index++);
258          return index < entries.length? entries[index]: null;
259        }
260    
261        @Override
262        public boolean hasNext() {
263          return next != null;
264        }
265    
266        @Override
267        public E next() {
268          if (modification != startModification) {
269            throw new ConcurrentModificationException("modification=" + modification
270                + " != startModification = " + startModification);
271          }
272    
273          final E e = convert(next);
274    
275          //find the next element
276          final LinkedElement n = next.getNext();
277          next = n != null? n: nextNonemptyEntry();
278    
279          return e;
280        }
281    
282        @Override
283        public void remove() {
284          throw new UnsupportedOperationException("Remove is not supported.");
285        }
286      }
287      
288      /**
289       * Let t = percentage of max memory.
290       * Let e = round(log_2 t).
291       * Then, we choose capacity = 2^e/(size of reference),
292       * unless it is outside the close interval [1, 2^30].
293       */
294      public static int computeCapacity(double percentage, String mapName) {
295        return computeCapacity(Runtime.getRuntime().maxMemory(), percentage,
296            mapName);
297      }
298      
299      @VisibleForTesting
300      static int computeCapacity(long maxMemory, double percentage,
301          String mapName) {
302        if (percentage > 100.0 || percentage < 0.0) {
303          throw new HadoopIllegalArgumentException("Percentage " + percentage
304              + " must be greater than or equal to 0 "
305              + " and less than or equal to 100");
306        }
307        if (maxMemory < 0) {
308          throw new HadoopIllegalArgumentException("Memory " + maxMemory
309              + " must be greater than or equal to 0");
310        }
311        if (percentage == 0.0 || maxMemory == 0) {
312          return 0;
313        }
314        //VM detection
315        //See http://java.sun.com/docs/hotspot/HotSpotFAQ.html#64bit_detection
316        final String vmBit = System.getProperty("sun.arch.data.model");
317    
318        //Percentage of max memory
319        final double percentDivisor = 100.0/percentage;
320        final double percentMemory = maxMemory/percentDivisor;
321        
322        //compute capacity
323        final int e1 = (int)(Math.log(percentMemory)/Math.log(2.0) + 0.5);
324        final int e2 = e1 - ("32".equals(vmBit)? 2: 3);
325        final int exponent = e2 < 0? 0: e2 > 30? 30: e2;
326        final int c = 1 << exponent;
327    
328        LOG.info("Computing capacity for map " + mapName);
329        LOG.info("VM type       = " + vmBit + "-bit");
330        LOG.info(percentage + "% max memory = "
331            + StringUtils.TraditionalBinaryPrefix.long2String(maxMemory, "B", 1));
332        LOG.info("capacity      = 2^" + exponent + " = " + c + " entries");
333        return c;
334      }
335      
336      public void clear() {
337        for (int i = 0; i < entries.length; i++) {
338          entries[i] = null;
339        }
340        size = 0;
341      }
342    }