001package org.jsoup.nodes; 002 003import org.jsoup.helper.Validate; 004import org.jspecify.annotations.Nullable; 005 006import java.util.Iterator; 007import java.util.NoSuchElementException; 008 009/** 010 Iterate through a Node and its tree of descendants, in document order, and returns nodes of the specified type. This 011 iterator supports structural changes to the tree during the traversal, such as {@link Node#remove()}, 012 {@link Node#replaceWith(Node)}, {@link Node#wrap(String)}, etc. 013 <p>See also the {@link org.jsoup.select.NodeTraversor NodeTraversor} if {@code head} and {@code tail} callbacks are 014 desired for each node.</p> 015 @since 1.17.1 016 */ 017public class NodeIterator<T extends Node> implements Iterator<T> { 018 private Node root; // root / starting node 019 private @Nullable T next; // the next node to return 020 private Node current; // the current (last emitted) node 021 private Node previous; // the previously emitted node; used to recover from structural changes 022 private @Nullable Node currentParent; // the current node's parent; used to detect structural changes 023 private final Class<T> type; // the desired node class type 024 025 /** 026 Create a NoteIterator that will iterate the supplied node, and all of its descendants. The returned {@link #next} 027 type will be filtered to the input type. 028 * @param start initial node 029 * @param type node type to filter for 030 */ 031 public NodeIterator(Node start, Class<T> type) { 032 Validate.notNull(start); 033 Validate.notNull(type); 034 this.type = type; 035 036 restart(start); 037 } 038 039 /** 040 Create a NoteIterator that will iterate the supplied node, and all of its descendants. All node types will be 041 returned. 042 * @param start initial node 043 */ 044 public static NodeIterator<Node> from(Node start) { 045 return new NodeIterator<>(start, Node.class); 046 } 047 048 /** 049 Restart this Iterator from the specified start node. Will act as if it were newly constructed. Useful for e.g. to 050 save some GC if the iterator is used in a tight loop. 051 * @param start the new start node. 052 */ 053 public void restart(Node start) { 054 if (type.isInstance(start)) 055 //noinspection unchecked 056 next = (T) start; // first next() will be the start node 057 058 root = previous = current = start; 059 currentParent = current.parent(); 060 } 061 062 @Override public boolean hasNext() { 063 maybeFindNext(); 064 return next != null; 065 } 066 067 @Override public T next() { 068 maybeFindNext(); 069 if (next == null) throw new NoSuchElementException(); 070 071 T result = next; 072 previous = current; 073 current = next; 074 currentParent = current.parent(); 075 next = null; 076 return result; 077 } 078 079 /** 080 If next is not null, looks for and sets next. If next is null after this, we have reached the end. 081 */ 082 private void maybeFindNext() { 083 if (next != null) return; 084 085 // change detected (removed or replaced), redo from previous 086 if (currentParent != null && !current.hasParent()) 087 current = previous; 088 089 next = findNextNode(); 090 } 091 092 private @Nullable T findNextNode() { 093 Node node = current; 094 while (true) { 095 if (node.childNodeSize() > 0) 096 node = node.childNode(0); // descend children 097 else if (root.equals(node)) 098 node = null; // complete when all children of root are fully visited 099 else if (node.nextSibling() != null) 100 node = node.nextSibling(); // in a descendant with no more children; traverse 101 else { 102 while (true) { 103 node = node.parent(); // pop out of descendants 104 if (node == null || root.equals(node)) 105 return null; // got back to root; complete 106 if (node.nextSibling() != null) { 107 node = node.nextSibling(); // traverse 108 break; 109 } 110 } 111 } 112 if (node == null) 113 return null; // reached the end 114 115 if (type.isInstance(node)) 116 //noinspection unchecked 117 return (T) node; 118 } 119 } 120 121 @Override public void remove() { 122 current.remove(); 123 } 124}