运用Observer和Observable这两个类的共同使用,可以产生一些很好的效果。 我们先看看Observer这个类,它主要是一个接口,只有一个方法就是update(Observable o, Object arg),而它只是在Observable的类中用setchanged()和notifyObservers(Object)的调用才能被调用。 我们看例程:
//Main.java
import java.io.*; import java.util.*; import java.util.zip.*;
class Main implements Observer { Main(File dir, boolean includeDirectories) { FileWalker fw = new FileWalker(); fw.addObserver(this); System.out.println("Main.Main()[1]"); fw.walk(dir, includeDirectories); System.out.println("Main.Main()[2]"); }
// Simply print out the name of the file. public void update(Observable o, Object arg) { File f = (File)arg; // try { System.out.println(f.getAbsolutePath()+i); // } catch (IOException e) { // e.printStackTrace(); // } }
public static void main(String[] args) { if (args.length < 1 || args.length > 2) { System.err.println("Usage: java Main [-d]"); System.exit(1); } boolean includeDirectories = args.length == 2 && args[1].equals("-d"); new Main(new File(args[0]), includeDirectories); } }
class FileWalker extends Observable { // If includeDirectories is false, the walker does not notify the observers // when it encounters a directory. Encountered files are always reported. void walk(File dir, boolean includeDirectories) { if (dir.isDirectory()) { if (includeDirectories) { System.out.println("FileWalker.walk()[1]"); setChanged(); notifyObservers(dir); } String[] filenames = dir.list();
// Recursively walk all subdirectories. if (filenames != null) { for (int i=0; i System.out.println("FileWalker.walk()[2]"); walk(new File(dir, filenames[i]), includeDirectories); } } } else { System.out.println("FileWalker.walk()[3]"); setChanged(); notifyObservers(dir); } } }
分析:该程序如果直接用File.list()方法和isDiretory()也同样是可以对目录的列表显示功能,而为了引用Observable和Observer,我们来看看它的流程: 该程序主要有两个类Main和FilesWalker,前者主要是引用FilesWalker类并在update()方法显示出文件列表。
1、implments 了Observer,并重载了update()方法,并新建了一个FilesWalker类,然后调用了Observable超类的addObserver(this)的方法。 2、再在Main中调用了walk这个主要的方法。 3。walk是本程序的核心部分。用了File的一些特性isDirectory()、list()和一个递归算法walk(new File(dir, filenames[i]), includeDirectories),其次他通过setChanged()和notifyObservers(dir)来调用Observable的update(),就这是本程序的核心。
下面我再一个例程:
import java.net.*; import java.io.*; import java.util.*;
class Main implements Observer { Main(String u, int depth) { try { URL url = new URL(Spider.adjustIfDir(u)); Spider spider = new Spider(url, depth);
spider.addObserver(this); spider.thread.join(); // Wait for spider to finish. } catch (MalformedURLException e) { } catch (InterruptedException e) { } }
// This method is called immediately whenever the spider // discovers a new URL. It should return as quickly as // possible since it is holding up the spider. public void update(Observable o, Object arg) { SpiderArgs warg = (SpiderArgs)arg; for (int i=0; i System.out.print(" "); } System.out.println(warg.dst); }
public static void main(String[] args) { if (args.length != 2) { System.err.println("Usage: java Main "); } else { new Main(args[0], Integer.parseInt(args[1])); } } }
class Spider extends Observable implements Runnable { Hashtable walked = new Hashtable(); int maxDepth; URL homeURL; String host; int port; Thread thread;
Spider(URL url, int depth) { homeURL = url; maxDepth = depth; host = url.getHost(); port = getPort(url);
// Start spider thread. thread = new Thread(this); thread.start(); }
void walk(URL url, int curDepth) throws IOException { Vector v = findLinks(url); // Remove duplicates for (int i=v.size()-1; i>=0; i--) { try { URL ur = new URL(url, (String)v.elementAt(i)); if (walked.get(ur) != null || !ur.getProtocol().equals("http") || !(getPort(ur) == port) || !ur.getHost().equals(host)) { v.removeElementAt(i); } else { walked.put(ur, ur); setChanged(); notifyObservers(new SpiderArgs(url, ur, curDepth)); } } catch (MalformedURLException e) { } }
// Now walk each of the links in url. if (curDepth < maxDepth) { for (int i=0; i URL ur = null; try { ur = new URL(url, (String)v.elementAt(i)); walk(ur, curDepth + 1); } catch (MalformedURLException e) { } catch (IOException e) { System.out.println("*** " + url + " -> " + ur); } } } }
// Finds all the links in 'url' and returns them in a vector. Vector findLinks(URL url) throws IOException { Vector v = new Vector(); BufferedReader in = new BufferedReader( new InputStreamReader(url.openStream())); String line; String lineLC;
while ((line = in.readLine()) != null) { while (line != null) { int p = line.indexOf(" if (p < 0) { p = line.indexOf(" if (p < 0) { break; } }
// Make sure the > is on the same line. int q = 0; while ((q=line.indexOf(">", p)) < 0) { String l = in.readLine(); if (l == null) { // EOF reached. return v; } line += l; } String u = getLink(in, line, p);
if (u != null && u.length() > 0) { v.addElement(adjustIfDir(u)); } // Continue looking for links on the line. line = line.substring(q+1); } } in.close(); return v; }
// Returns the port number of 'url'. If the port number is // not defined, returns the default HTTP port number. int getPort(URL url) { int p = url.getPort(); if (p == -1) { p = 80; } return p; }
// This method implements a heuristic for URLs that are probably // directories. If the last component of the URL does not contain // a dot and does not end with a "/", then it is explicitly // converted to a directory by appending a "/". static String adjustIfDir(String s) { int p = s.lastIndexOf("/") + 1;
if (!s.endsWith("/") && s.indexOf(".", p) < 0) { s += "/"; } return s; }
// Extracts the tag from s and then returns the remainder of // the line. String getLink(BufferedReader in, String s, int p) throws IOException { int e;
// Find the href attribute. p = s.indexOf("href="); if (p < 0) { p = s.indexOf("HREF="); if (p < 0) { // No href so skip the tag. return null; } }
// Skip the "href=" p += 5; int q = -1; if (s.charAt(p) == '"') { p++; q = s.indexOf('"', p); } else { q = s.indexOf(' ', p); int q2 = s.indexOf('>', p); if (Math.min(q, q2) < 0 && Math.max(q, q2) >= 0) { // If one is > 0 and the other < 0, use the > 0 one. q = Math.max(q, q2); }
// Use the smaller of the two. q = Math.min(q, q2); }
// Could not complete the href tag for some reason // so skip the tag. if (q < 0) { return null; } s = s.substring(p, q);
// Remove the reference, if any. p = s.indexOf('#'); if (p == 0) { return null; } else if (p > 0) { s = s.substring(0, p); } return s; }
public void run() { try { walk(homeURL, 0); } catch (IOException e) { System.out.println("*** " + homeURL); } } }
class SpiderArgs { SpiderArgs(URL src, URL dst, int depth) { this.src = src; this.dst = dst; this.depth = depth; }
URL src; URL dst; int depth; }
该程式主要采用Runable的线程技术和一些字符串的判断来得到从一个页面得到的所有链接的递归算法。当然此程序还是有一些小小的漏洞,就是利用Frontpage做出那个用area href的链接没有在考虑之内。其实parse分析也是一样的。
|
|