Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

urlfilter.h

Go to the documentation of this file.
00001 /** @file urlfilter.h */
00002 /* 
00003  * Copyright (C) 2002 Laird Breyer
00004  *  
00005  * This program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 2 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program; if not, write to the Free Software
00017  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00018  * 
00019  * Author:   Laird Breyer <laird@lbreyer.com>
00020  */
00021 
00022 #ifndef _URL_FILTER_H_
00023 #define _URL_FILTER_H_
00024 #include <string.h>
00025 #include <stdexcept>
00026 #include "content-type.h"
00027 
00028 #define SLASHBIT 0x80
00029 
00030 #define STRINGBUF_LEN0 511
00031 #define STRINGBUF_LEN1 1023
00032 #define STRINGBUF_LEN2 2047
00033 #define STRINGBUF_LEN3 4095
00034 
00035 /// Contains the parts of a URL as separate strings.
00036 struct URLComponents {
00037   char scheme[STRINGBUF_LEN1+1];
00038   char netloc[STRINGBUF_LEN1+1];
00039   char query[STRINGBUF_LEN1+1];
00040   char params[STRINGBUF_LEN1+1];
00041   char path[STRINGBUF_LEN1+1];
00042 
00043   void Clear() { scheme[0] = netloc[0] = query[0] = params[0] = path[0] = 0; }
00044 };
00045 
00046 /// Knows how to parse a URL string and related functions
00047 class URLFilter {
00048  public:
00049   URLFilter(bool rs);
00050   const char* DeindexURL(const char *anurl);
00051   const char* CompressURL(const char *anurl);
00052   void ParseURL(const char *anurl, char *schemebuf, 
00053                 char *netlocbuf, char *querybuf,
00054                 char *paramsbuf, char *pathbuf);
00055   void NormalizeURLPath(char *apath);
00056   ContentType ClassifyURLPath(const char *path);
00057   const char * FormatURL(const char *anurl, int anurl_len, 
00058                          URLComponents *baseurl, ContentType *foundtype) throw (domain_error);
00059 
00060  private:
00061 
00062   char scratchbuf0[STRINGBUF_LEN0+1];
00063   char scratchbuf1[STRINGBUF_LEN2+1];
00064   char scratchbuf2[STRINGBUF_LEN2+1];
00065   char scratchbuf3[STRINGBUF_LEN2+1];
00066   char scratchbuf4[STRINGBUF_LEN3+1];
00067   char scratchbuf5[STRINGBUF_LEN1+1];
00068   char scratchbuf6[STRINGBUF_LEN1+1];
00069 
00070   char comp_scratchbuf[STRINGBUF_LEN2+1];
00071   char parse_scratchbuf[STRINGBUF_LEN1+1];
00072   char deindex_scratchbuf[STRINGBUF_LEN2+1];
00073 
00074   struct {
00075     bool remove_html_suffix;
00076     bool rearrange_components;
00077   } flags;
00078 };
00079 #endif

Generated on Wed May 29 11:37:16 2002 for MarkovPR by doxygen1.2.15