summaryrefslogtreecommitdiff
path: root/library/qdawg.cpp
Unidiff
Diffstat (limited to 'library/qdawg.cpp') (more/less context) (ignore whitespace changes)
-rw-r--r--library/qdawg.cpp2
1 files changed, 0 insertions, 2 deletions
diff --git a/library/qdawg.cpp b/library/qdawg.cpp
index af5dc82..2ea5734 100644
--- a/library/qdawg.cpp
+++ b/library/qdawg.cpp
@@ -1,407 +1,405 @@
1/********************************************************************** 1/**********************************************************************
2** Copyright (C) 2000-2002 Trolltech AS. All rights reserved. 2** Copyright (C) 2000-2002 Trolltech AS. All rights reserved.
3** 3**
4** This file is part of the Qtopia Environment. 4** This file is part of the Qtopia Environment.
5** 5**
6** This file may be distributed and/or modified under the terms of the 6** This file may be distributed and/or modified under the terms of the
7** GNU General Public License version 2 as published by the Free Software 7** GNU General Public License version 2 as published by the Free Software
8** Foundation and appearing in the file LICENSE.GPL included in the 8** Foundation and appearing in the file LICENSE.GPL included in the
9** packaging of this file. 9** packaging of this file.
10** 10**
11** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE 11** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
12** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 12** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
13** 13**
14** See http://www.trolltech.com/gpl/ for GPL licensing information. 14** See http://www.trolltech.com/gpl/ for GPL licensing information.
15** 15**
16** Contact info@trolltech.com if any conditions of this licensing are 16** Contact info@trolltech.com if any conditions of this licensing are
17** not clear to you. 17** not clear to you.
18** 18**
19**********************************************************************/ 19**********************************************************************/
20#include "qdawg.h" 20#include "qdawg.h"
21#include <qintdict.h> 21#include <qintdict.h>
22#include <qvaluelist.h>
23#include <qtextstream.h>
24#include <qfile.h> 22#include <qfile.h>
25#include <qtl.h> 23#include <qtl.h>
26 24
27#include <limits.h> 25#include <limits.h>
28#include <stdio.h> 26#include <stdio.h>
29 27
30// for mmap 28// for mmap
31#include <sys/types.h> 29#include <sys/types.h>
32#include <sys/stat.h> 30#include <sys/stat.h>
33#include <sys/mman.h> 31#include <sys/mman.h>
34#include <fcntl.h> 32#include <fcntl.h>
35#include <errno.h> 33#include <errno.h>
36#include <unistd.h> 34#include <unistd.h>
37 35
38class QDawgPrivate; 36class QDawgPrivate;
39class QTrie; 37class QTrie;
40 38
41typedef QValueList<QTrie*> TrieClub; 39typedef QValueList<QTrie*> TrieClub;
42typedef QIntDict<TrieClub> TrieClubDirectory; 40typedef QIntDict<TrieClub> TrieClubDirectory;
43 41
44class TriePtr { 42class TriePtr {
45public: 43public:
46 QChar letter; 44 QChar letter;
47 QTrie* p; 45 QTrie* p;
48 int operator <(const TriePtr& o) const; 46 int operator <(const TriePtr& o) const;
49 int operator >(const TriePtr& o) const; 47 int operator >(const TriePtr& o) const;
50 int operator <=(const TriePtr& o) const; 48 int operator <=(const TriePtr& o) const;
51}; 49};
52 50
53class TrieList : public QValueList<TriePtr> { 51class TrieList : public QValueList<TriePtr> {
54 bool sorted; 52 bool sorted;
55public: 53public:
56 TrieList() 54 TrieList()
57 { 55 {
58 sorted=TRUE; 56 sorted=TRUE;
59 } 57 }
60 58
61 QTrie* findAdd(QChar c); 59 QTrie* findAdd(QChar c);
62 bool equal(TrieList& l); 60 bool equal(TrieList& l);
63 61
64 void sort() 62 void sort()
65 { 63 {
66 if ( !sorted ) { 64 if ( !sorted ) {
67 qHeapSort(*this); 65 qHeapSort(*this);
68 sorted = TRUE; 66 sorted = TRUE;
69 } 67 }
70 } 68 }
71}; 69};
72 70
73// A fast but memory-wasting temporary class. The Dawg is the goal. 71// A fast but memory-wasting temporary class. The Dawg is the goal.
74class QTrie { 72class QTrie {
75public: 73public:
76 QTrie(); 74 QTrie();
77 ~QTrie(); 75 ~QTrie();
78 76
79 void insertWord(const QString& s, uint index=0); 77 void insertWord(const QString& s, uint index=0);
80 bool equal(QTrie* o); 78 bool equal(QTrie* o);
81 void dump(int indent=0); 79 void dump(int indent=0);
82 80
83private: 81private:
84 TrieList children; 82 TrieList children;
85 bool isword; 83 bool isword;
86 84
87 friend class QDawgPrivate; 85 friend class QDawgPrivate;
88 int maxdepth; 86 int maxdepth;
89 int decendants; 87 int decendants;
90 int key; 88 int key;
91 void distributeKeys(TrieClubDirectory& directory); 89 void distributeKeys(TrieClubDirectory& directory);
92 QTrie* clubLeader(TrieClubDirectory& directory); 90 QTrie* clubLeader(TrieClubDirectory& directory);
93 int collectKeys(); 91 int collectKeys();
94 friend class TriePtr; 92 friend class TriePtr;
95 friend class TrieList; 93 friend class TrieList;
96}; 94};
97 95
98QTrie::QTrie() 96QTrie::QTrie()
99{ 97{
100 key = 0; 98 key = 0;
101 isword = FALSE; 99 isword = FALSE;
102} 100}
103 101
104QTrie::~QTrie() 102QTrie::~QTrie()
105{ 103{
106 // NOTE: we do not delete the children - after conversion to DAWG 104 // NOTE: we do not delete the children - after conversion to DAWG
107 // it's too difficult. The QTrie's are deleted via the directory. 105 // it's too difficult. The QTrie's are deleted via the directory.
108} 106}
109 107
110void QTrie::insertWord(const QString& s, uint index) 108void QTrie::insertWord(const QString& s, uint index)
111{ 109{
112 if ( index == s.length() ) { 110 if ( index == s.length() ) {
113 isword = TRUE; 111 isword = TRUE;
114 } else { 112 } else {
115 QTrie* t = children.findAdd(s[index]); 113 QTrie* t = children.findAdd(s[index]);
116 t->insertWord(s,index+1); 114 t->insertWord(s,index+1);
117 } 115 }
118} 116}
119 117
120bool QTrie::equal(QTrie* o) 118bool QTrie::equal(QTrie* o)
121{ 119{
122 if ( o == this ) return TRUE; 120 if ( o == this ) return TRUE;
123 if ( isword != o->isword ) 121 if ( isword != o->isword )
124 return FALSE; 122 return FALSE;
125 return children.equal(o->children); 123 return children.equal(o->children);
126} 124}
127 125
128void QTrie::dump(int indent) 126void QTrie::dump(int indent)
129{ 127{
130 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { 128 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) {
131 QTrie* s = (*it).p; 129 QTrie* s = (*it).p;
132 for (int in=0; in<indent; in++) 130 for (int in=0; in<indent; in++)
133 fputc(' ',stderr); 131 fputc(' ',stderr);
134 fprintf(stderr," %c %d %s %p\n",(*it).letter.unicode(), 132 fprintf(stderr," %c %d %s %p\n",(*it).letter.unicode(),
135 s->key,s->isword?"word":"",s); 133 s->key,s->isword?"word":"",s);
136 s->dump(indent+2); 134 s->dump(indent+2);
137 } 135 }
138} 136}
139 137
140void QTrie::distributeKeys(TrieClubDirectory& directory) 138void QTrie::distributeKeys(TrieClubDirectory& directory)
141{ 139{
142 maxdepth = INT_MIN; 140 maxdepth = INT_MIN;
143 decendants = children.count(); 141 decendants = children.count();
144 key = 0; 142 key = 0;
145 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { 143 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) {
146 QTrie* s = (*it).p; 144 QTrie* s = (*it).p;
147 QChar l = (*it).letter; 145 QChar l = (*it).letter;
148 s->distributeKeys(directory); 146 s->distributeKeys(directory);
149 key = key*64+l.unicode()+s->key*5; 147 key = key*64+l.unicode()+s->key*5;
150 decendants += s->decendants; 148 decendants += s->decendants;
151 if ( s->maxdepth+1 > maxdepth ) 149 if ( s->maxdepth+1 > maxdepth )
152 maxdepth = s->maxdepth+1; 150 maxdepth = s->maxdepth+1;
153 } 151 }
154 if ( decendants ) { 152 if ( decendants ) {
155 key += decendants + maxdepth*256 + children.count() * 65536; 153 key += decendants + maxdepth*256 + children.count() * 65536;
156 if ( !key ) key++; // unlikely 154 if ( !key ) key++; // unlikely
157 } 155 }
158 TrieClub* c = directory[key]; 156 TrieClub* c = directory[key];
159 if ( !c ) directory.insert(key, (c = new TrieClub) ); 157 if ( !c ) directory.insert(key, (c = new TrieClub) );
160 c->prepend(this); 158 c->prepend(this);
161} 159}
162 160
163QTrie* QTrie::clubLeader(TrieClubDirectory& directory) 161QTrie* QTrie::clubLeader(TrieClubDirectory& directory)
164{ 162{
165 if ( !key ) return directory[0]->first(); 163 if ( !key ) return directory[0]->first();
166 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) { 164 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) {
167 QTrie* t= (*it).p->clubLeader(directory); 165 QTrie* t= (*it).p->clubLeader(directory);
168 (*it).p = t; 166 (*it).p = t;
169 } 167 }
170 TrieClub *club = directory[key]; 168 TrieClub *club = directory[key];
171 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) { 169 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) {
172 QTrie* o = *it; 170 QTrie* o = *it;
173 if ( o->equal(this) ) 171 if ( o->equal(this) )
174 return o; 172 return o;
175 } 173 }
176 return this; 174 return this;
177} 175}
178 176
179int QTrie::collectKeys() 177int QTrie::collectKeys()
180{ 178{
181 int n=0; 179 int n=0;
182 if ( key ) key=0,n+=children.count(); 180 if ( key ) key=0,n+=children.count();
183 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it) 181 for (TrieList::Iterator it=children.begin(); it!=children.end(); ++it)
184 n += (*it).p->collectKeys(); 182 n += (*it).p->collectKeys();
185 return n; 183 return n;
186} 184}
187 185
188int TriePtr::operator <(const TriePtr& o) const 186int TriePtr::operator <(const TriePtr& o) const
189 { return letter < o.letter; } 187 { return letter < o.letter; }
190int TriePtr::operator >(const TriePtr& o) const 188int TriePtr::operator >(const TriePtr& o) const
191 { return letter > o.letter; } 189 { return letter > o.letter; }
192int TriePtr::operator <=(const TriePtr& o) const 190int TriePtr::operator <=(const TriePtr& o) const
193 { return letter <= o.letter; } 191 { return letter <= o.letter; }
194 192
195bool TrieList::equal(TrieList& l) 193bool TrieList::equal(TrieList& l)
196{ 194{
197 if ( count() != l.count() ) 195 if ( count() != l.count() )
198 return FALSE; 196 return FALSE;
199 sort(); l.sort(); 197 sort(); l.sort();
200 ConstIterator it2 = begin(); 198 ConstIterator it2 = begin();
201 ConstIterator it = l.begin(); 199 ConstIterator it = l.begin();
202 for( ; it != l.end(); ++it, ++it2 ) 200 for( ; it != l.end(); ++it, ++it2 )
203 if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) ) 201 if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) )
204 return FALSE; 202 return FALSE;
205 return TRUE; 203 return TRUE;
206} 204}
207QTrie* TrieList::findAdd(QChar c) 205QTrie* TrieList::findAdd(QChar c)
208{ 206{
209 for (Iterator it=begin(); it!=end(); ++it) { 207 for (Iterator it=begin(); it!=end(); ++it) {
210 if ( (*it).letter == c ) 208 if ( (*it).letter == c )
211 return (*it).p; 209 return (*it).p;
212 } 210 }
213 TriePtr p; 211 TriePtr p;
214 p.p = new QTrie; 212 p.p = new QTrie;
215 p.letter = c; 213 p.letter = c;
216 prepend(p); 214 prepend(p);
217 sorted=FALSE; 215 sorted=FALSE;
218 sort(); 216 sort();
219 return p.p; 217 return p.p;
220} 218}
221 219
222static const char* dawg_sig = "QDAWG100"; 220static const char* dawg_sig = "QDAWG100";
223 221
224class QDawgPrivate { 222class QDawgPrivate {
225public: 223public:
226 QDawgPrivate(QIODevice* dev) 224 QDawgPrivate(QIODevice* dev)
227 { 225 {
228 QDataStream ds(dev); 226 QDataStream ds(dev);
229 char sig[8]; 227 char sig[8];
230 ds.readRawBytes(sig,8); 228 ds.readRawBytes(sig,8);
231 if ( !strncmp(dawg_sig,sig,8) ) { 229 if ( !strncmp(dawg_sig,sig,8) ) {
232 uint n; 230 uint n;
233 char* nn; 231 char* nn;
234 ds.readBytes(nn,n); 232 ds.readBytes(nn,n);
235 233
236 // #### endianness problem ignored. 234 // #### endianness problem ignored.
237 node = (QDawg::Node*)nn; 235 node = (QDawg::Node*)nn;
238 nodes = n / sizeof(QDawg::Node); 236 nodes = n / sizeof(QDawg::Node);
239 } else { 237 } else {
240 node = 0; 238 node = 0;
241 } 239 }
242 } 240 }
243 241
244 bool ok() const { return node; } 242 bool ok() const { return node; }
245 243
246 QDawgPrivate(uchar* mem) 244 QDawgPrivate(uchar* mem)
247 { 245 {
248 if ( !strncmp(dawg_sig,(char*)mem,8) ) { 246 if ( !strncmp(dawg_sig,(char*)mem,8) ) {
249 mem += 8; 247 mem += 8;
250 248
251 int n = ((mem[0]*256+mem[1])*256+mem[2])*256+mem[3]; 249 int n = ((mem[0]*256+mem[1])*256+mem[2])*256+mem[3];
252 mem += 4; 250 mem += 4;
253 251
254 // #### endianness problem ignored. 252 // #### endianness problem ignored.
255 node = (QDawg::Node*)((char*)mem); 253 node = (QDawg::Node*)((char*)mem);
256 nodes = n / sizeof(QDawg::Node); 254 nodes = n / sizeof(QDawg::Node);
257 } 255 }
258 } 256 }
259 257
260 QDawgPrivate(QTrie* t) // destroys the QTrie. 258 QDawgPrivate(QTrie* t) // destroys the QTrie.
261 { 259 {
262 TrieClubDirectory directory(9973); 260 TrieClubDirectory directory(9973);
263 t->distributeKeys(directory); 261 t->distributeKeys(directory);
264 QTrie* l = t->clubLeader(directory); 262 QTrie* l = t->clubLeader(directory);
265 ASSERT(l==t); 263 ASSERT(l==t);
266 generateArray(t); 264 generateArray(t);
267 265
268 TrieClub *club; 266 TrieClub *club;
269 for (QIntDictIterator<TrieClub> dit(directory); (club=dit); ++dit) 267 for (QIntDictIterator<TrieClub> dit(directory); (club=dit); ++dit)
270 { 268 {
271 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) { 269 for (TrieClub::Iterator it = club->begin(); it != club->end(); ++it) {
272 delete *it; 270 delete *it;
273 } 271 }
274 delete club; 272 delete club;
275 } 273 }
276 } 274 }
277 275
278 bool write(QIODevice* dev) 276 bool write(QIODevice* dev)
279 { 277 {
280 QDataStream ds(dev); 278 QDataStream ds(dev);
281 ds.writeRawBytes(dawg_sig,8); 279 ds.writeRawBytes(dawg_sig,8);
282 // #### endianness problem ignored. 280 // #### endianness problem ignored.
283 ds.writeBytes((char*)node,sizeof(QDawg::Node)*nodes); 281 ds.writeBytes((char*)node,sizeof(QDawg::Node)*nodes);
284 return dev->state() == IO_Ok; 282 return dev->state() == IO_Ok;
285 } 283 }
286 284
287 void dumpWords(int nid=0, int index=0) 285 void dumpWords(int nid=0, int index=0)
288 { 286 {
289 static char word[256]; // ick latin1 287 static char word[256]; // ick latin1
290 int i=0; 288 int i=0;
291 do { 289 do {
292 QDawg::Node& n = node[nid+i]; 290 QDawg::Node& n = node[nid+i];
293 word[index] = n.let; 291 word[index] = n.let;
294 if ( n.isword ) 292 if ( n.isword )
295 fprintf(stderr,"%.*s\n",index+1,word); 293 fprintf(stderr,"%.*s\n",index+1,word);
296 if ( n.offset ) dumpWords(n.offset+nid+i,index+1); 294 if ( n.offset ) dumpWords(n.offset+nid+i,index+1);
297 } while (!node[nid+i++].islast); 295 } while (!node[nid+i++].islast);
298 } 296 }
299 297
300 void dump(int nid=0, int indent=0) 298 void dump(int nid=0, int indent=0)
301 { 299 {
302 int i=0; 300 int i=0;
303 do { 301 do {
304 QDawg::Node& n = node[nid+i]; 302 QDawg::Node& n = node[nid+i];
305 fprintf(stderr,"%d: ",nid+i); 303 fprintf(stderr,"%d: ",nid+i);
306 for (int in=0; in<indent; in++) 304 for (int in=0; in<indent; in++)
307 fputc(' ',stderr); 305 fputc(' ',stderr);
308 fprintf(stderr," %c %d %d %d\n",n.let, 306 fprintf(stderr," %c %d %d %d\n",n.let,
309 n.isword,n.islast,n.offset); 307 n.isword,n.islast,n.offset);
310 if ( n.offset ) dump(n.offset+nid+i,indent+2); 308 if ( n.offset ) dump(n.offset+nid+i,indent+2);
311 } while (!node[nid+i++].islast); 309 } while (!node[nid+i++].islast);
312 } 310 }
313 311
314 int countWords(int nid=0) 312 int countWords(int nid=0)
315 { 313 {
316 int t=0; 314 int t=0;
317 int i=0; 315 int i=0;
318 do { 316 do {
319 QDawg::Node& n = node[nid+i]; 317 QDawg::Node& n = node[nid+i];
320 if ( n.isword ) 318 if ( n.isword )
321 t++; 319 t++;
322 if ( n.offset ) 320 if ( n.offset )
323 t+=countWords(n.offset+nid+i); 321 t+=countWords(n.offset+nid+i);
324 } while (!node[nid+i++].islast); 322 } while (!node[nid+i++].islast);
325 return t; 323 return t;
326 } 324 }
327 325
328 bool contains(const QString& s, int nid=0, int index=0) const 326 bool contains(const QString& s, int nid=0, int index=0) const
329 { 327 {
330 int i=0; 328 int i=0;
331 do { 329 do {
332 QDawg::Node& n = node[nid+i]; 330 QDawg::Node& n = node[nid+i];
333 if ( s[index] == QChar((ushort)n.let) ) { 331 if ( s[index] == QChar((ushort)n.let) ) {
334 if ( n.isword && index == (int)s.length()-1 ) 332 if ( n.isword && index == (int)s.length()-1 )
335 return TRUE; 333 return TRUE;
336 if ( n.offset ) 334 if ( n.offset )
337 return contains(s,n.offset+nid+i,index+1); 335 return contains(s,n.offset+nid+i,index+1);
338 } 336 }
339 } while (!node[nid+i++].islast); 337 } while (!node[nid+i++].islast);
340 return FALSE; 338 return FALSE;
341 } 339 }
342 340
343 void appendAllWords(QStringList& list, int nid=0, QString s="") const 341 void appendAllWords(QStringList& list, int nid=0, QString s="") const
344 { 342 {
345 int i=0; 343 int i=0;
346 int next = s.length(); 344 int next = s.length();
347 do { 345 do {
348 QDawg::Node& n = node[nid+i]; 346 QDawg::Node& n = node[nid+i];
349 s[next] = QChar((ushort)n.let); 347 s[next] = QChar((ushort)n.let);
350 if ( n.isword ) 348 if ( n.isword )
351 list.append(s); 349 list.append(s);
352 if ( n.offset ) 350 if ( n.offset )
353 appendAllWords(list, n.offset+nid+i, s); 351 appendAllWords(list, n.offset+nid+i, s);
354 } while (!node[nid+i++].islast); 352 } while (!node[nid+i++].islast);
355 } 353 }
356 354
357 const QDawg::Node* root() { return node; } 355 const QDawg::Node* root() { return node; }
358 356
359private: 357private:
360 void generateArray(QTrie* t) 358 void generateArray(QTrie* t)
361 { 359 {
362 nodes = 0; 360 nodes = 0;
363 int n = t->collectKeys(); 361 int n = t->collectKeys();
364 node = new QDawg::Node[n]; 362 node = new QDawg::Node[n];
365 appendToArray(t); 363 appendToArray(t);
366 ASSERT(n == nodes); 364 ASSERT(n == nodes);
367 } 365 }
368 366
369 int appendToArray(QTrie* t) 367 int appendToArray(QTrie* t)
370 { 368 {
371 if ( !t->key ) { 369 if ( !t->key ) {
372 if ( !t->children.count() ) 370 if ( !t->children.count() )
373 return 0; 371 return 0;
374 t->key = nodes; 372 t->key = nodes;
375 nodes += t->children.count(); 373 nodes += t->children.count();
376 QDawg::Node* n = &node[t->key-1]; 374 QDawg::Node* n = &node[t->key-1];
377 int here = t->key; 375 int here = t->key;
378 for (TrieList::Iterator it=t->children.begin(); it!=t->children.end(); ++it) { 376 for (TrieList::Iterator it=t->children.begin(); it!=t->children.end(); ++it) {
379 QTrie* s = (*it).p; 377 QTrie* s = (*it).p;
380 ++n; 378 ++n;
381 n->let = (*it).letter.unicode(); 379 n->let = (*it).letter.unicode();
382 n->isword = s->isword; 380 n->isword = s->isword;
383 n->islast = 0; 381 n->islast = 0;
384 n->offset = appendToArray(s); 382 n->offset = appendToArray(s);
385 if ( n->offset ) { 383 if ( n->offset ) {
386 int t = n->offset-here; 384 int t = n->offset-here;
387 n->offset=t; 385 n->offset=t;
388 if ( n->offset != t ) 386 if ( n->offset != t )
389 qWarning("Overflow: too many words"); 387 qWarning("Overflow: too many words");
390 } 388 }
391 here++; 389 here++;
392 } 390 }
393 n->islast = 1; 391 n->islast = 1;
394 } 392 }
395 return t->key; 393 return t->key;
396 } 394 }
397 395
398private: 396private:
399 int nodes; 397 int nodes;
400 QDawg::Node *node; 398 QDawg::Node *node;
401}; 399};
402 400
403/*! 401/*!
404 \class QDawg qdawg.h 402 \class QDawg qdawg.h
405 \brief The QDawg class provides an implementation of a Directed Acyclic Word Graph. 403 \brief The QDawg class provides an implementation of a Directed Acyclic Word Graph.
406 404
407 A DAWG provides very fast look-up of words in a word list. 405 A DAWG provides very fast look-up of words in a word list.