Παράδειγμα: επεξεργασία αρχείων καταγραφής πρόσβασης σε ιστοσελίδες
/*
* Collect and print Web statistics
* D. Spinellis, 2004-2024
*/
import java.util.*;
import java.util.regex.*;
import java.io.*;
class WebStats {
/**
* Increment the integer value of map's member by 1
* The member is obtained by using the matcher to extract
* the specified group from the string s
*/
static void increment(Map<String, Integer> map, String s, Matcher m, int group) {
String member = s.substring(m.start(group), m.end(group));
Integer i = map.get(member);
map.put(member, i == null ? 1 : i + 1);
}
/** List the contents of the given map */
static void list(String title, Map<String, Integer> map) {
System.out.println("\n" + title);
for (Map.Entry e : map.entrySet())
System.out.println(e.getValue() + " " + e.getKey());
}
/** List the contents of the given map ordered by their values.
* (You are not expected to undestand this).
*/
static void sortedList(String title, Map<String, Integer> map) {
System.out.println("\n" + title);
var valueOrder = new TreeSet<Map.Entry<String, Integer>>(new
Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> a,
Map.Entry<String, Integer> b) {
return (-a.getValue().compareTo(b.getValue()));
}
}
);
valueOrder.addAll(map.entrySet());
for (Map.Entry e : valueOrder)
System.out.println(e.getValue() + " " + e.getKey());
}
public static void main(String args[]) {
if (args.length != 1) {
System.err.println("Usage: WebStats file");
System.exit(1);
}
Pattern cre = null; // Compiled RE
try {
// A standard log line is a line like:
// 192.168.136.16 - - [26/Jan/2004:19:45:48 +0200] "GET /c136.html HTTP/1.1" 200 1674 "http://office/c120.html" "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007"
String patternString = """
([-\\w.]+)\\s+ # 1. Host
([-\\w]+)\\s+ # 2. Logname
([-\\w]+)\\s+ # 3. User
\\[(\\d+)/ # 4. Date
(\\w+)/ # 5. Month
(\\d+): # 6. Year
(\\d+): # 7. Hour
(\\d+) # 8. Minute
([^]]+?)\\]\\s+ # 9. Rest of time
\"([-\\w]+)\\s* # 10. Request verb
([^\\s]*) # 11. Request URL
([^\"]*?)\"\\s+ # 12. Request protocol etc.
(\\d+)\\s+ # 13. Status
([-\\d]+)\\s+ # 14. Bytes
\"([^\"]*)\"\\s+ # 15. Referrer URL
\"([^\"]*)\" # 16. Client
""";
cre = Pattern.compile(patternString, Pattern.COMMENTS);
} catch (PatternSyntaxException e) {
System.err.println("Invalid RE syntax: " + e.getDescription());
System.exit(1);
}
BufferedReader in = null;
try {
in = new BufferedReader(new InputStreamReader(new FileInputStream(args[0])));
} catch (FileNotFoundException e) {
System.err.println("Unable to open file " + args[1] + ": " + e.getMessage());
System.exit(1);
}
var host = new HashMap<String, Integer>();
var hour = new HashMap<String, Integer>();
var request = new HashMap<String, Integer>();
var referrer = new HashMap<String, Integer>();
try {
String s;
while ((s = in.readLine()) != null) {
Matcher m = cre.matcher(s);
if (!m.matches())
System.out.println("Invalid line: " + s);
else {
increment(host, s, m, 1);
increment(hour, s, m, 7);
increment(request, s, m, 11);
increment(referrer, s, m, 15);
}
}
} catch (Exception e) {
System.err.println("Error reading line: " + e.getMessage());
System.exit(1);
}
sortedList("Host Access Counts", host);
sortedList("Hourly Access Counts", hour);
sortedList("Request URL Access Counts", request);
sortedList("Referrer URL Access Counts", referrer);
}
}