Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

trie.h

Go to the documentation of this file.
00001 /** @file trie.h */
00002 /* 
00003  * Copyright (C) 2002 Laird Breyer
00004  *  
00005  * This program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 2 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program; if not, write to the Free Software
00017  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00018  * 
00019  * Author:   Laird Breyer <laird@lbreyer.com>
00020  */
00021 
00022 #ifndef _TRIE_H_
00023 #define _TRIE_H_
00024 #include <iostream>
00025 #include "simplehash.h"
00026 #include <stdexcept>
00027 
00028 #define STRINGBUF_LEN0 511
00029 #define STRINGBUF_LEN1 1023
00030 #define STRINGBUF_LEN2 2047
00031 #define STRINGBUF_LEN3 4095
00032 
00033 /// Hashtable used to navigate trie.
00034 typedef SimpleHashTable<char*> SimpleCharPtrHashTable;
00035 
00036 /// Stores URL strings by superposition of common prefixes
00037 /**
00038  * This class implements a classic trie structure (Knuth, Vol. 3)
00039  * which consists of a very long string space together with a hashtable
00040  * (jumptable) which allows navigation.
00041  *
00042  * The trie allows large space savings by storing common string prefixes only
00043  * once.
00044  */
00045 class Trie {
00046  public:
00047   Trie(unsigned long slen, long jlen);
00048   ptrdiff_t FindURL(const char *url);
00049   ptrdiff_t InsertURL(const char *url) throw (overflow_error);
00050 
00051   void Statistics(ostream& o);
00052 
00053   uint32 StatsCumulativeStringSize() 
00054     { return stats_cumulative_string_size; }
00055   uint32 StatsBigstringInsertions()
00056     { return stats_bigstring_insertions; }
00057   uint32 StatsJumptableInsertions()
00058     { return stats_jumptable_insertions; }
00059 
00060   char* bigs;
00061 
00062  protected:
00063 
00064   unsigned long slen_;
00065   unsigned long end_of_bigs;
00066 
00067   SimpleCharPtrHashTable *jumptable;
00068 
00069   uint32 stats_cumulative_string_size;
00070   uint32 stats_bigstring_insertions;
00071   uint32 stats_jumptable_insertions;
00072 
00073 };
00074 #endif

Generated on Wed May 29 11:37:16 2002 for MarkovPR by doxygen1.2.15