Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

handler-parser.h

Go to the documentation of this file.
00001 //***************************************************************************
00002 // This source code is copyrighted 2002 by Google Inc.  All rights
00003 // reserved.  You are given a limited license to use this source code for
00004 // purposes of participating in the Google programming contest.  If you
00005 // choose to use or distribute the source code for any other purpose, you
00006 // must either (1) first obtain written approval from Google, or (2)
00007 // prominently display the foregoing copyright notice and the following
00008 // warranty and liability disclaimer on each copy used or distributed.
00009 // 
00010 // The source code and repository (the "Software") is provided "AS IS",
00011 // with no warranty, express or implied, including but not limited to the
00012 // implied warranties of merchantability and fitness for a particular
00013 // use.  In no event shall Google Inc. be liable for any damages, direct
00014 // or indirect, even if advised of the possibility of such damages.
00015 //***************************************************************************
00016 
00017 
00018 #ifndef _PARSE_HANDLER_H_
00019 #define _PARSE_HANDLER_H_
00020 #include "document.h"
00021 
00022 // Allowed pattern of calls on a ParserHandler is:
00023 //    ( ( NewDocument
00024 //        ( AddHeader  |
00025 //          AddBody    |
00026 //          AddTerm    |
00027 //          AddNumber  |
00028 //          AddTitle   |
00029 //          AddBaseURL |
00030 //          AddAnchor  |
00031 //          AnchorDone |
00032 //          ChangeFontColor    |
00033 //          ChangeFontColorEnd |
00034 //          ChangeBGColor      |
00035 //          ChangeBGColorEnd   |
00036 //          AddImage   |
00037 //          AddForm    |
00038 //          AddApplet  |
00039 //          AddArea    |
00040 //          AddFrame   |
00041 //          AddMeta    |
00042 //          AddBodyDone)*
00043 //        EndDocument
00044 //      ) | (Flush Checkpoint?)?
00045 //   )*
00046 // I.e all of the "Add*" calls are bracketed by NewDocument/EndDocument,
00047 // and "Flush" is not bracketed by NewDocument/EndDocument.
00048 
00049 class ParseHandler {
00050  public:
00051   virtual ~ParseHandler() {}
00052 
00053   // These are called before and after we parse a document.
00054   virtual void NewDocument(const Document* doc) {}
00055   virtual void EndDocument(const Document* doc) {}
00056 
00057   // This is called when the Parser is told to Flush().
00058   // It will never be called within a NewDocument/EndDocument pair.
00059   virtual void Flush() {}
00060 
00061   // The intended use is to parse the header lines of http pages,
00062   // but it can be called with any associative data found on a page.
00063   // If valuelen is 0, key holds the status line ("HTTP/1.0 200 OK")
00064   virtual void AddHeader(const char* key, int keylen,
00065                          const char* value, int valuelen) {}
00066 
00067   // This tells you info about the status line (first line of header)
00068   virtual void AddResponseCode(int response_code) {}
00069 
00070   virtual void AddTerm(const char* term, int termlen, int face, int size) { }
00071 
00072   virtual void AddPunctuation(const char * text, int leng,
00073                               int face, int size) { }
00074 
00075   // These are pretty HTML-specific.  They're used to handle anchors.
00076   // NewAnchor is called with the HREF of the anchor, while AnchorDone
00077   // is called upon seeing the </A>.  You can use this information to
00078   // have AddTerm() treat intervening words as anchor text.
00079   virtual void AddBaseURL(const char* baseurl, int baseurllen) {}
00080   virtual void AddAnchor(const char* href, int hreflen) {} 
00081   virtual void AddLocalName(const char * name, int namelen) {}
00082   virtual void AnchorDone() {}
00083   // the following 4 methods are used to trace color changes
00084   virtual void ChangeFontColor(const char * color, int colorlen) {}
00085   virtual void ChangeFontColorEnd() {}
00086   virtual void ChangeBGColor(const char * color, int colorlen) {}
00087   virtual void ChangeBGColorEnd() {}
00088 
00089   // Also html-specific: the "entire" field of an img tag or perhaps imgmap.
00090   virtual void AddImage(const char* tag, int taglen) {}
00091 
00092   virtual void AddImageHeight(const char* src, int srclen) {}
00093   virtual void AddImageWidth(const char* src, int srclen) {}
00094 
00095   // "Embedded" applications
00096   virtual void AddApplet(const char* src, int srclen) {}
00097   virtual void AddAppletDone() {}
00098   virtual void AddIFrame(const char* src, int srclen) {}
00099   virtual void AddIFrameDone() {}
00100 
00101   // Hard to imagine how these would be used outside html.  The variable
00102   // name indicates which field of the given tag we're interested in.
00103   // Note: we treat frames and areas the same as anchors, by default
00104   virtual void AddFrame(const char* src, int srclen) { 
00105     AddAnchor(src, srclen); AnchorDone();       // no anchor text for us
00106   }
00107   virtual void AddArea(const char* href, int hreflen) {
00108     AddAnchor(href, hreflen); AnchorDone();       // no anchor text for us
00109   } 
00110 
00111   // Similar in some ways to AddHeader, but it doesn't try to separate
00112   // the key or value. 
00113   virtual void AddMeta(const char* meta, int metalen) {}
00114 
00115   virtual void AddFrameset(const char* fields, int fieldslen) {}
00116   virtual void AddFramesetDone() {}
00117 
00118   // use with caution, as only the first call in the one document is valid
00119   virtual void AddBody(const char* body, int bodylen) {}
00120   virtual void AddBodyDone() {}
00121 
00122   // These are called when we reach a P, /P respectively.  For most
00123   // applications, we probably just want to do the same thing.
00124   virtual void ParagraphStart(const char* fields, int fieldlen) { }
00125   virtual void ParagraphEnd() { }
00126 
00127   virtual void AddBreak() { }
00128   virtual void AddHorizontalRule() { }
00129 
00130   virtual void AddListItem() { }
00131   virtual void AddUnorderedList() { }
00132   virtual void AddOrderedList() { }
00133   virtual void AddListDone() { }
00134 
00135   virtual void AddDiv(const char * fields, int fieldlen) { }
00136   virtual void AddDivDone(const char * fields, int fieldlen) { }
00137   virtual void AddSpan(const char * fields, int fieldlen) { }
00138   virtual void AddSpanDone(const char * fields, int fieldlen) { }
00139 
00140   virtual void AddTable() { }
00141   virtual void AddTableDone() { }
00142   virtual void AddCaption() { }
00143   virtual void AddCaptionDone() { }
00144   virtual void AddTableHCell(const char* fields, int fieldlen) { }
00145   virtual void AddTableDCell(const char* fields, int fieldlen) { }
00146   virtual void AddTableCellDone() { }
00147   virtual void AddTableRow() { }
00148   virtual void AddTableRowDone() { }
00149 
00150   virtual void AddForm(const char * fields, int fieldlen) { }
00151   virtual void AddFormDone() { }
00152   virtual void AddSelect(const char * fields, int fieldlen) { }
00153   virtual void AddSelectDone() { }
00154   virtual void AddOption(const char * fields, int fieldlen) { }
00155   virtual void AddOptionDone() { }
00156   virtual void AddTextArea(const char * fields, int fieldlen) { }
00157   virtual void AddTextAreaDone() { }
00158   virtual void AddInput(const char * fields, int fieldlen) { }
00159   
00160   virtual void AddHeading(int hnum) { }
00161   virtual void AddHeadingDone() { }
00162 
00163   virtual void AddNoframes() { }
00164   virtual void AddNoframesDone() { }
00165 
00166   virtual void WhitespaceEndedTerm() { }
00167 
00168   virtual void AddObject(const char * fields, int fieldlen) { }
00169   virtual void AddObjectDone() { }
00170   virtual void AddParam(const char * fields, int fieldlen) { }
00171   virtual void AddEmbed(const char * fields, int fieldlen) { }
00172 
00173   virtual void AddHead(const char * fields, int fieldlen) { }
00174   virtual void AddHeadDone() { }
00175 };
00176 
00177 #endif /* #ifndef _PARSE_HANDLER_H_ */

Generated on Wed May 29 11:37:14 2002 for MarkovPR by doxygen1.2.15