This repository acts as a personal archive for my solutions to EdX course *Data Structures and Software Design* from PennX.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123 lines
4.0 KiB

  1. import java.util.Arrays;
  2. import java.util.HashSet;
  3. import java.util.LinkedList;
  4. import java.util.Queue;
  5. import java.util.Set;
  6. /*
  7. * SD2x Homework #2
  8. * This class represents a single HTML tag.
  9. * Please do not change this code! Your solution will be evaluated using this version of the class.
  10. */
  11. public class HtmlTag {
  12. protected final String element;
  13. protected final boolean openTag;
  14. public HtmlTag(String element, boolean isOpenTag) {
  15. this.element = element.toLowerCase();
  16. openTag = isOpenTag;
  17. }
  18. public String getElement() {
  19. return element;
  20. }
  21. public boolean isOpenTag() {
  22. return openTag && !isSelfClosing();
  23. }
  24. public boolean matches(HtmlTag other) {
  25. return other != null
  26. && element.equalsIgnoreCase(other.element)
  27. && openTag != other.openTag;
  28. }
  29. public boolean isSelfClosing() {
  30. return SELF_CLOSING_TAGS.contains(element);
  31. }
  32. public boolean equals(Object obj) {
  33. if (obj instanceof HtmlTag) {
  34. HtmlTag other = (HtmlTag) obj;
  35. return element.equals(other.element)
  36. && openTag == other.openTag;
  37. }
  38. return false;
  39. }
  40. public String toString() {
  41. return "<" + (openTag ? "" : "/")
  42. + (element.equals("!--") ? "!-- --" : element) + ">";
  43. }
  44. /**
  45. * The remaining fields and functions are related to HTML file parsing.
  46. */
  47. // a set of tags that don't need to be matched (self-closing)
  48. protected static final Set<String> SELF_CLOSING_TAGS = new HashSet<String>(
  49. Arrays.asList("!doctype", "!--", "?xml", "xml", "area", "base",
  50. "basefont", "br", "col", "frame", "hr", "img",
  51. "input", "link", "meta", "param"));
  52. protected static final String WHITESPACE = " \f\n\r\t";
  53. public static Queue<HtmlTag> tokenize(String text) {
  54. StringBuffer buf = new StringBuffer(text);
  55. Queue<HtmlTag> queue = new LinkedList<HtmlTag>();
  56. HtmlTag nextTag = nextTag(buf);
  57. while (nextTag != null) {
  58. queue.add(nextTag);
  59. nextTag = nextTag(buf);
  60. }
  61. return queue;
  62. }
  63. protected static HtmlTag nextTag(StringBuffer buf) {
  64. int openBracket = buf.indexOf("<");
  65. int closeBracket = buf.indexOf(">");
  66. if (openBracket >= 0 && closeBracket > openBracket) {
  67. // check for HTML comments: <!-- -->
  68. int commentIndex = openBracket + 4;
  69. if (commentIndex <= buf.length()
  70. && buf.substring(openBracket + 1, commentIndex).equals("!--")) {
  71. // look for closing comment tag -->
  72. closeBracket = buf.indexOf("-->", commentIndex);
  73. if (closeBracket < 0) {
  74. return null;
  75. } else {
  76. buf.insert(commentIndex, " ");
  77. closeBracket += 3; // advance to the closing bracket >
  78. }
  79. }
  80. String element = buf.substring(openBracket + 1, closeBracket).trim();
  81. // remove attributes
  82. for (int i = 0; i < WHITESPACE.length(); i++) {
  83. int attributeIndex = element.indexOf(WHITESPACE.charAt(i));
  84. if (attributeIndex >= 0) {
  85. element = element.substring(0, attributeIndex);
  86. }
  87. }
  88. // determine whether opening or closing tag
  89. boolean isOpenTag = true;
  90. int checkForClosing = element.indexOf("/");
  91. if (checkForClosing == 0) {
  92. isOpenTag = false;
  93. element = element.substring(1);
  94. }
  95. element = element.replaceAll("[^a-zA-Z0-9!-]+", "");
  96. buf.delete(0, closeBracket + 1);
  97. return new HtmlTag(element, isOpenTag);
  98. } else {
  99. return null;
  100. }
  101. }
  102. }