Διαίρεση συμβολοσειρών με πρότυπα

Παράδειγμα: επεξεργασία αρχείων πρόσβαση σε ιστοσελίδες

/*
 * Collect and print Web statistics
 * D. Spinellis, January 2004
 */

import java.util.*;
import java.util.regex.*;
import java.io.*;

class WebStats {

        /**
         * Increment the integer value of map's member by 1
         * The member is obtained by using the matcher to extract
         * the specified group from the string s
         */
        static void increment(Map map, String s, Matcher m, int group) {
                String member = s.substring(m.start(group), m.end(group));
                Integer i = (Integer)map.get(member);
                if (i == null)
                        i = new Integer(1);
                else
                        i = new Integer(i.intValue() + 1);
                map.put(member, i);
        }

        /** List the contents of the given map */
        static void list(String title, Map map) {
                System.out.println("\n" + title);
                Set s = map.entrySet();
                Iterator i;
                for (i = s.iterator(); i.hasNext(); ) {
                        Map.Entry e = (Map.Entry)i.next();
                        System.out.println(e.getValue() + " " + e.getKey());
                }
        }


        public static void main(String args[]) {
                if (args.length != 1) {
                        System.err.println("Usage: WebStats file");
                        System.exit(1);
                }

                Pattern cre = null;             // Compiled RE
                try {
                        // A standard log line is a line like:
                        // 192.168.136.16 - - [26/Jan/2004:19:45:48 +0200] "GET /c136.html HTTP/1.1" 200 1674 "http://office/c120.html" "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007"
                        cre = Pattern.compile(
                        "([-\\w.]+)\\s+" +      // 1. Host
                        "([-\\w]+)\\s+" +       // 2. Logname
                        "([-\\w]+)\\s+" +       // 3. User
                        "\\[(\\d+)/" +          // 4. Date
                        "(\\w+)/" +             // 5. Month
                        "(\\d+):" +             // 6. Year
                        "(\\d+):" +             // 7. Hour
                        "(\\d+)" +              // 8. Minute
                        "([^]]+?)\\]\\s+" +     // 9. Rest of time
                        "\"([-\\w]+)\\s*" +     // 10. Request verb
                        "([^\\s]*)" +           // 11. Request URL
                        "([^\"]*?)\"\\s+" +     // 12. Request protocol etc.
                        "(\\d+)\\s+" +          // 13. Status
                        "([-\\d]+)\\s+" +       // 14. Bytes
                        "\"([^\"]*)\"\\s+" +    // 15. Referrer URL
                        "\"([^\"]*)\""          // 16. Client
                        );
                } catch (PatternSyntaxException e) {
                        System.err.println("Invalid RE syntax: " + e.getDescription());
                        System.exit(1);
                }

                BufferedReader in = null;
                try {
                        in = new BufferedReader(new InputStreamReader(new FileInputStream(args[0])));
                } catch (FileNotFoundException e) {
                        System.err.println("Unable to open file " + args[1] + ": " + e.getMessage());
                        System.exit(1);
                }

                HashMap host = new HashMap();
                HashMap hour = new HashMap();
                HashMap request = new HashMap();
                HashMap referrer = new HashMap();
                try {
                        String s;
                        while ((s = in.readLine()) != null) {
                                Matcher m = cre.matcher(s);
                                if (!m.matches())
                                        System.out.println("Invalid line: " + s);
                                else {
                                        increment(host, s, m, 1);
                                        increment(hour, s, m, 7);
                                        increment(request, s, m, 11);
                                        increment(referrer, s, m, 15);
                                }
                        }
                } catch (Exception e) {
                        System.err.println("Error reading line: " + e.getMessage());
                        System.exit(1);
                }
                list("Host Access Counts", host);
                list("Hourly Access Counts", hour);
                list("Request URL Access Counts", request);
                list("Referrer URL Access Counts", referrer);
        }
}