import java.util.TreeMap; import java.util.Iterator; import java.util.Set; import java.net.URL; import java.util.StringTokenizer; /** * A class to count the number of occurences of HTML tags in an html file.
* This class illustrates the use of a TreeMap. */ public class HTMLTagCounter { // The map to store the list of tags and their number of occurences // A tree map allows the ordering of the entry. We provide our own ordering // since we want first the begin tags (such as ) in alphabetical order // and then the end tags (such as ) also in alphabetical order. private TreeMap HTMLTagMap; // is an HTML tag currently read? private boolean inATag; // the HTML tag currently extracted private String currentTag; /** * Construct the list of HTML tags and their number of occurences * in an html file. The file is specified by its url. Examples of url's are * http://www.seattlecentral.org * file:///C:/temp/myFile.html (on a windows machine) * @param url the location of the html file */ public HTMLTagCounter(String URLName) { // Read the list of tags from the html file InputURL input = new InputURL(URLName); HTMLTagMap = new TreeMap(new HTMLTagComparator()); // No tag initially currentTag = ""; // Read the html file line by line String line; while( (line=input.readLine())!=null) { // add the tags in the current line to the map addHTMLTagsIn(line); } } /** * Add (in alphabetical order) all of the HTML tags in this line to the map */ private void addHTMLTagsIn(String line) { // Look for < if the start of a tag has not been found yet // (return if there is no < in the line) if (!inATag) { } // At this point, a tag has been found // The tag is the first word up to the first > (unless we already have // a tag and are just looking for >). // There is no tag if currentTag is "" or "/" // Use a StringTokenizer to extract the first word // Look for the end of the tag (>) // Look for tags in what is left in line } /** * add a tag to the list of tags and update the number of occurences * of that tag * @param tag the HTML tag (given as a String) */ private void addHTMLTag(String tag) { // Make the tag all upper case // Is it already in the map? // If not, create a new entry } /** * Return in a String the list of all of the tags and their occurences */ public String toString() { // Write the tags with their number of occurences return null; //CHANGE THIS } /** * Return as a String an entry of the map. * @param tag the key of the entry in the map */ private String entryAsAString(String tag) { return null; //CHANGE THIS } /** * Return the number of occurences of a tag * @param tag the HTML tag whose number of occurences is returned */ public int numberOfOccurences(String tag) { // Format the tag according to what is in the map // Add the delimeters < and > if not present // Find it in the map return 0; //CHANGE THIS } /** * To test the class */ public static void main(String[] args) { uwcse.io.Input input = new uwcse.io.Input(); // Get the url String urlName = input.readString("URL (e.g. http://www.washington.edu): "); // Count the tags in this file HTMLTagCounter c = new HTMLTagCounter(urlName); System.out.println(c.toString()); // Count tag by tag String tag; do { tag=input.readString("Tag to count (0 to stop): "); if (!tag.equals("0")) System.out.println(tag+": "+c.numberOfOccurences(tag)); }while(!tag.equals("0")); } }