import java.util.Arrays; import java.util.HashSet; import java.util.LinkedList; import java.util.Queue; import java.util.Set; /* * SD2x Homework #2 * This class represents a single HTML tag. * Please do not change this code! Your solution will be evaluated using this version of the class. */ public class HtmlTag { protected final String element; protected final boolean openTag; public HtmlTag(String element, boolean isOpenTag) { this.element = element.toLowerCase(); openTag = isOpenTag; } public String getElement() { return element; } public boolean isOpenTag() { return openTag && !isSelfClosing(); } public boolean matches(HtmlTag other) { return other != null && element.equalsIgnoreCase(other.element) && openTag != other.openTag; } public boolean isSelfClosing() { return SELF_CLOSING_TAGS.contains(element); } public boolean equals(Object obj) { if (obj instanceof HtmlTag) { HtmlTag other = (HtmlTag) obj; return element.equals(other.element) && openTag == other.openTag; } return false; } public String toString() { return "<" + (openTag ? "" : "/") + (element.equals("!--") ? "!-- --" : element) + ">"; } /** * The remaining fields and functions are related to HTML file parsing. */ // a set of tags that don't need to be matched (self-closing) protected static final Set SELF_CLOSING_TAGS = new HashSet( Arrays.asList("!doctype", "!--", "?xml", "xml", "area", "base", "basefont", "br", "col", "frame", "hr", "img", "input", "link", "meta", "param")); protected static final String WHITESPACE = " \f\n\r\t"; public static Queue tokenize(String text) { StringBuffer buf = new StringBuffer(text); Queue queue = new LinkedList(); HtmlTag nextTag = nextTag(buf); while (nextTag != null) { queue.add(nextTag); nextTag = nextTag(buf); } return queue; } protected static HtmlTag nextTag(StringBuffer buf) { int openBracket = buf.indexOf("<"); int closeBracket = buf.indexOf(">"); if (openBracket >= 0 && closeBracket > openBracket) { // check for HTML comments: int commentIndex = openBracket + 4; if (commentIndex <= buf.length() && buf.substring(openBracket + 1, commentIndex).equals("!--")) { // look for closing comment tag --> closeBracket = buf.indexOf("-->", commentIndex); if (closeBracket < 0) { return null; } else { buf.insert(commentIndex, " "); closeBracket += 3; // advance to the closing bracket > } } String element = buf.substring(openBracket + 1, closeBracket).trim(); // remove attributes for (int i = 0; i < WHITESPACE.length(); i++) { int attributeIndex = element.indexOf(WHITESPACE.charAt(i)); if (attributeIndex >= 0) { element = element.substring(0, attributeIndex); } } // determine whether opening or closing tag boolean isOpenTag = true; int checkForClosing = element.indexOf("/"); if (checkForClosing == 0) { isOpenTag = false; element = element.substring(1); } element = element.replaceAll("[^a-zA-Z0-9!-]+", ""); buf.delete(0, closeBracket + 1); return new HtmlTag(element, isOpenTag); } else { return null; } } }