Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

parsehandler-preparsecat.cc

Go to the documentation of this file.
00001 //***************************************************************************
00002 // This source code is copyrighted 2002 by Google Inc.  All rights
00003 // reserved.  You are given a limited license to use this source code for
00004 // purposes of participating in the Google programming contest.  If you
00005 // choose to use or distribute the source code for any other purpose, you
00006 // must either (1) first obtain written approval from Google, or (2)
00007 // prominently display the foregoing copyright notice and the following
00008 // warranty and liability disclaimer on each copy used or distributed.
00009 // 
00010 // The source code and repository (the "Software") is provided "AS IS",
00011 // with no warranty, express or implied, including but not limited to the
00012 // implied warranties of merchantability and fitness for a particular
00013 // use.  In no event shall Google Inc. be liable for any damages, direct
00014 // or indirect, even if advised of the possibility of such damages.
00015 //***************************************************************************
00016 
00017 
00018 // This is for testing the preparser.  It prints to
00019 // stdout most callbacks that it gets.
00020 
00021 #include <stdio.h>
00022 #include "handler-parser.h"        // where parsehandlers are defined
00023 
00024 
00025 class PreparseCatParseHandler : public ParseHandler {
00026  public:
00027   virtual void NewDocument(const Document* doc) {
00028     printf("New document: '%s'\n", doc->url());
00029     in_anchor_ = false;            // initial value for this document
00030     just_put_whitespace_ = false;
00031     just_put_term_or_punc_ = false;
00032   }
00033 
00034   virtual void EndDocument(const Document* doc) {
00035     printf("End of document\n");
00036   }
00037 
00038   virtual void AddHeader(const char* key, int keylen,
00039                          const char* value, int valuelen) {
00040     printf("Header: '%.*s'='%.*s'\n", keylen, key, valuelen, value);
00041     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00042   }
00043 
00044   virtual void AddResponseCode(int response_code) {
00045     printf("AddResponseCode: %d\n", response_code);
00046     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00047   }
00048 
00049   virtual void AddTerm(const char* term, int termlen, int face, int size) {
00050     printf("AddTerm: '%.*s' (face=%d, size=%d%s)\n",
00051            termlen, term, face, size, in_anchor_ ? ", in anchor" : "");
00052     just_put_whitespace_ = false; 
00053     just_put_term_or_punc_ = true;
00054    }
00055 
00056   virtual void AddBaseURL(const char* baseurl, int baseurllen) {
00057     printf("AddBaseURL: '%.*s'\n", baseurllen, baseurl);
00058     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00059    }
00060 
00061   virtual void AddAnchor(const char* href, int hreflen) {
00062     printf("AddAnchor: '%.*s'\n", hreflen, href);
00063     in_anchor_ = true;
00064     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00065    }
00066 
00067   virtual void AddLocalName(const char * name, int namelen) {
00068     printf("AddLocalName: '%.*s'\n", namelen, name);
00069     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00070    }
00071 
00072   virtual void AnchorDone() {
00073     printf("Done with anchor text\n");
00074     in_anchor_ = false;
00075     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00076    }
00077 
00078   virtual void AddImage(const char* tag, int taglen) {
00079     printf("AddImage: '%.*s'\n", taglen, tag);
00080     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00081    }
00082 
00083   virtual void AddApplet(const char* code, int codelen) {
00084     printf("AddApplet: '%.*s'\n", codelen, code);
00085     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00086    }
00087 
00088   virtual void AddIFrame(const char *text, int textlen) {
00089     printf("AddIFrame: '%.*s'\n", textlen, text);
00090     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00091    }
00092 
00093   virtual void AddIFrameDone() {
00094     printf("AddIFrameDone\n");
00095     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00096    }
00097 
00098   virtual void AddArea(const char* href, int hreflen) {
00099     printf("AddArea: '%.*s'\n", hreflen, href);
00100     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00101    }
00102 
00103   virtual void AddFrame(const char* src, int srclen) {
00104     printf("AddFrame: '%.*s'\n", srclen, src);
00105     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00106    }
00107 
00108   virtual void AddFrameset(const char* src, int srclen) {
00109     printf("AddFrameset: '%.*s'\n", srclen, src);
00110     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00111    }
00112 
00113   virtual void AddFramesetDone() { 
00114     printf("AddFramesetDone\n"); 
00115     just_put_whitespace_ = false; just_put_term_or_punc_ = false; 
00116   }
00117 
00118   virtual void AddMeta(const char* meta, int metalen) {
00119     printf("AddMeta: '%.*s'\n", metalen, meta);
00120     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00121    }
00122 
00123   virtual void AddBody(const char* body, int bodylen) {
00124     printf("AddBody: '%.*s'\n", bodylen, body);
00125     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00126    }
00127 
00128   virtual void AddBodyDone() {
00129     printf("AddBodyDone\n");
00130     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00131    }
00132 
00133   virtual void ParagraphStart(const char *fields, int fieldlen) {
00134     printf("Paragraph Start: '%.*s'\n", fieldlen, fields);
00135     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00136    }
00137 
00138   virtual void ParagraphEnd() {
00139     printf("Paragraph End\n");
00140     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00141    }
00142 
00143   virtual void AddPunctuation(const char * text, int textlen, int face,
00144                                 int size) {
00145     printf("AddPunctuation: %.*s (face=%d, size=%d%s)\n",
00146            textlen, text, face, size, in_anchor_ ? ", in anchor" : "");
00147            
00148     just_put_whitespace_ = false; 
00149     just_put_term_or_punc_ = true;
00150    }
00151 
00152   // These are useful for tables...
00153   virtual void AddTable() { 
00154     printf("Table\n"); 
00155     just_put_whitespace_ = false; just_put_term_or_punc_ = false; 
00156   }
00157 
00158   virtual void AddTableDone() { 
00159     printf("TableDone\n"); 
00160     just_put_whitespace_ = false; just_put_term_or_punc_ = false; 
00161   }
00162 
00163   virtual void AddCaption() { 
00164     printf("Caption\n"); 
00165     just_put_whitespace_ = false; just_put_term_or_punc_ = false; 
00166   }
00167 
00168   virtual void AddCaptionDone() { 
00169     printf("CaptionDone\n"); 
00170     just_put_whitespace_ = false; just_put_term_or_punc_ = false; 
00171   }
00172 
00173   virtual void AddTableHCell(const char* fields, int fieldlen) {
00174     printf("TableHCell: %.*s\n", fieldlen, fields); 
00175     just_put_whitespace_ = false; just_put_term_or_punc_ = false; 
00176   }
00177 
00178   virtual void AddTableDCell(const char* fields, int fieldlen) {
00179     printf("TableDCell: %.*s\n", fieldlen, fields); 
00180     just_put_whitespace_ = false; just_put_term_or_punc_ = false; 
00181   }
00182 
00183   virtual void AddTableRow() { 
00184     printf("TableRow\n"); 
00185     just_put_whitespace_ = false; just_put_term_or_punc_ = false; 
00186   }
00187 
00188   virtual void AddTableRowDone() { 
00189     printf("TableRowDone\n"); 
00190     just_put_whitespace_ = false; just_put_term_or_punc_ = false; 
00191   }
00192 
00193   // Forms
00194   virtual void AddForm(const char * fields, int fieldlen) {
00195     printf("Form\n");
00196     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00197    }
00198 
00199   virtual void AddFormDone() { 
00200     printf("FormDone\n"); 
00201     just_put_whitespace_ = false; just_put_term_or_punc_ = false; 
00202   }
00203 
00204   virtual void AddSelect(const char * fields, int fieldlen) {
00205     printf("Select\n");
00206     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00207    }
00208   virtual void AddSelectDone() { 
00209     printf("SelectDone\n"); 
00210     just_put_whitespace_ = false; just_put_term_or_punc_ = false; 
00211   }
00212 
00213   virtual void AddNoframes() { 
00214     printf("Noframes\n"); 
00215     just_put_whitespace_ = false; just_put_term_or_punc_ = false; 
00216   }
00217 
00218   virtual void AddNoframesDone() { 
00219     printf("NoframesDone\n"); 
00220     just_put_whitespace_ = false; just_put_term_or_punc_ = false; 
00221   }
00222 
00223   virtual void AddOption(const char * fields, int fieldlen) {
00224     printf("Option: '%.*s'\n", fieldlen, fields);
00225     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00226    }
00227 
00228   virtual void AddOptionDone() { 
00229     printf("OptionDone\n"); 
00230     just_put_whitespace_ = false; just_put_term_or_punc_ = false; 
00231   }
00232 
00233   virtual void ChangeFontColor(const char* color, int len) {
00234     printf("ChangeFontColor: '%.*s'\n", len, color);
00235     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00236    }
00237 
00238   virtual void ChangeBGColor(const char* color, int len) {
00239     printf("ChangeBGColor: '%.*s'\n", len, color);
00240     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00241    }
00242 
00243   virtual void ChangeFontColorEnd() {
00244     printf("ChangeFontColorEnd\n");
00245     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00246    }
00247 
00248   virtual void ChangeBGColorEnd() {
00249     printf("ChangeBGColorEnd\n");
00250     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00251    }
00252 
00253   virtual void AddBreak() {
00254     printf("AddBreak\n");
00255     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00256    }
00257 
00258   virtual void AddHorizontalRule() {
00259     printf("AddHorizontalRule\n");
00260     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00261    }
00262 
00263   virtual void AddListItem() {
00264     printf("AddListItem\n");
00265     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00266    }
00267 
00268   virtual void AddUnorderedList() {
00269     printf("AddUnorderedList\n");
00270     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00271    }
00272 
00273   virtual void AddOrderedList() {
00274     printf("AddOrderedList\n");
00275     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00276    }
00277 
00278   virtual void AddListDone() {
00279     printf("AddListDone\n");
00280     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00281    }
00282 
00283   virtual void AddTextAreaDone() {
00284     printf("AddTextAreaDone\n");
00285     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00286    }
00287 
00288   virtual void AddTableCellDone() {
00289     printf("AddTableCellDone\n");
00290     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00291    }
00292 
00293   virtual void AddDiv(const char* fields, int fieldlen) {
00294     printf("AddDiv: '%.*s'\n", fieldlen, fields);
00295     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00296    }
00297 
00298   virtual void AddDivDone(const char * fields, int fieldlen) {
00299     printf("AddDivDone: '%.*s'\n", fieldlen, fields);
00300     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00301    }
00302 
00303   virtual void AddSpan(const char* fields, int fieldlen) {
00304     printf("AddSpan: '%.*s'\n", fieldlen, fields);
00305     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00306    }
00307 
00308   virtual void AddSpanDone(const char * fields, int fieldlen) {
00309     printf("AddSpanDone: '%.*s'\n", fieldlen, fields);
00310     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00311    }
00312 
00313   virtual void AddInput(const char* fields, int fieldlen) {
00314     printf("AddInput: '%.*s'\n", fieldlen, fields);
00315     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00316    }
00317 
00318   virtual void AddHeading(int hnum) {
00319     printf("AddHeading: %d\n", hnum);
00320     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00321    }
00322 
00323   virtual void AddHeadingDone() {
00324     printf("AddHeadingDone\n");
00325     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00326    }
00327 
00328   virtual void WhitespaceEndedTerm() {
00329     if (just_put_term_or_punc_ && ! just_put_whitespace_) 
00330       printf("WhitespaceEndedTerm\n");
00331     just_put_whitespace_ = true;
00332    }
00333 
00334   virtual void AddObject(const char* fields, int fieldlen) {
00335     printf("AddObject\n");
00336     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00337    }
00338 
00339   virtual void AddObjectDone() {
00340     printf("AddObjectDone\n");
00341     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00342    }
00343 
00344   virtual void AddParam(const char* fields, int fieldlen) {
00345     printf("AddParam\n");
00346     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00347    }
00348 
00349   virtual void AddEmbed(const char* fields, int fieldlen) {
00350     printf("AddEmbed\n");
00351     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00352    }
00353 
00354   virtual void AddHead(const char* fields, int fieldlen) {
00355     printf("AddHead\n");
00356     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00357    }
00358 
00359   virtual void AddHeadDone() {
00360     printf("AddHeadDone\n");
00361     just_put_whitespace_ = false; just_put_term_or_punc_ = false;
00362    }
00363 
00364  private:
00365   bool in_anchor_;
00366   bool just_put_whitespace_;
00367   bool just_put_term_or_punc_;
00368 };
00369 
00370 ParseHandler* MakeCatHandler() {
00371   return new PreparseCatParseHandler;
00372 }

Generated on Wed May 29 11:37:15 2002 for MarkovPR by doxygen1.2.15