|
|
-
-
- import java.util.Arrays;
- import java.util.HashSet;
- import java.util.LinkedList;
- import java.util.Queue;
- import java.util.Set;
-
- /*
- * SD2x Homework #2
- * This class represents a single HTML tag.
- * Please do not change this code! Your solution will be evaluated using this version of the class.
- */
-
- public class HtmlTag {
-
- protected final String element;
- protected final boolean openTag;
-
- public HtmlTag(String element, boolean isOpenTag) {
- this.element = element.toLowerCase();
- openTag = isOpenTag;
- }
-
- public String getElement() {
- return element;
- }
-
- public boolean isOpenTag() {
- return openTag && !isSelfClosing();
- }
-
- public boolean matches(HtmlTag other) {
- return other != null
- && element.equalsIgnoreCase(other.element)
- && openTag != other.openTag;
- }
-
- public boolean isSelfClosing() {
- return SELF_CLOSING_TAGS.contains(element);
- }
-
- public boolean equals(Object obj) {
- if (obj instanceof HtmlTag) {
- HtmlTag other = (HtmlTag) obj;
- return element.equals(other.element)
- && openTag == other.openTag;
- }
- return false;
- }
-
- public String toString() {
- return "<" + (openTag ? "" : "/")
- + (element.equals("!--") ? "!-- --" : element) + ">";
- }
-
- /**
- * The remaining fields and functions are related to HTML file parsing.
- */
-
- // a set of tags that don't need to be matched (self-closing)
- protected static final Set<String> SELF_CLOSING_TAGS = new HashSet<String>(
- Arrays.asList("!doctype", "!--", "?xml", "xml", "area", "base",
- "basefont", "br", "col", "frame", "hr", "img",
- "input", "link", "meta", "param"));
-
-
- protected static final String WHITESPACE = " \f\n\r\t";
-
- public static Queue<HtmlTag> tokenize(String text) {
- StringBuffer buf = new StringBuffer(text);
- Queue<HtmlTag> queue = new LinkedList<HtmlTag>();
-
- HtmlTag nextTag = nextTag(buf);
- while (nextTag != null) {
- queue.add(nextTag);
- nextTag = nextTag(buf);
- }
- return queue;
- }
-
- protected static HtmlTag nextTag(StringBuffer buf) {
- int openBracket = buf.indexOf("<");
- int closeBracket = buf.indexOf(">");
- if (openBracket >= 0 && closeBracket > openBracket) {
- // check for HTML comments: <!-- -->
- int commentIndex = openBracket + 4;
- if (commentIndex <= buf.length()
- && buf.substring(openBracket + 1, commentIndex).equals("!--")) {
- // look for closing comment tag -->
- closeBracket = buf.indexOf("-->", commentIndex);
- if (closeBracket < 0) {
- return null;
- } else {
- buf.insert(commentIndex, " ");
- closeBracket += 3; // advance to the closing bracket >
- }
- }
-
- String element = buf.substring(openBracket + 1, closeBracket).trim();
- // remove attributes
- for (int i = 0; i < WHITESPACE.length(); i++) {
- int attributeIndex = element.indexOf(WHITESPACE.charAt(i));
- if (attributeIndex >= 0) {
- element = element.substring(0, attributeIndex);
- }
- }
-
- // determine whether opening or closing tag
- boolean isOpenTag = true;
- int checkForClosing = element.indexOf("/");
- if (checkForClosing == 0) {
- isOpenTag = false;
- element = element.substring(1);
- }
- element = element.replaceAll("[^a-zA-Z0-9!-]+", "");
-
- buf.delete(0, closeBracket + 1);
- return new HtmlTag(element, isOpenTag);
- } else {
- return null;
- }
- }
- }
|