author | zecke <zecke> | 2002-09-10 12:09:49 (UTC) |
---|---|---|
committer | zecke <zecke> | 2002-09-10 12:09:49 (UTC) |
commit | 6b77a1cdb9536b1c135eb86d53a6b2c22c19b0a4 (patch) (unidiff) | |
tree | 6ebc93c6432f4ed9d00ef1448b6a047ef522a79a /library/qdawg.cpp | |
parent | d10cddb3c9ce75bc90b14add14bc133737fe35aa (diff) | |
download | opie-6b77a1cdb9536b1c135eb86d53a6b2c22c19b0a4.zip opie-6b77a1cdb9536b1c135eb86d53a6b2c22c19b0a4.tar.gz opie-6b77a1cdb9536b1c135eb86d53a6b2c22c19b0a4.tar.bz2 |
Qtopia1-6 merge
still to test
bic changes to be resolved
more changes to be made?
-rw-r--r-- | library/qdawg.cpp | 123 |
1 files changed, 120 insertions, 3 deletions
diff --git a/library/qdawg.cpp b/library/qdawg.cpp index 3615693..af5dc82 100644 --- a/library/qdawg.cpp +++ b/library/qdawg.cpp | |||
@@ -1,16 +1,16 @@ | |||
1 | /********************************************************************** | 1 | /********************************************************************** |
2 | ** Copyright (C) 2000 Trolltech AS. All rights reserved. | 2 | ** Copyright (C) 2000-2002 Trolltech AS. All rights reserved. |
3 | ** | 3 | ** |
4 | ** This file is part of Qtopia Environment. | 4 | ** This file is part of the Qtopia Environment. |
5 | ** | 5 | ** |
6 | ** This file may be distributed and/or modified under the terms of the | 6 | ** This file may be distributed and/or modified under the terms of the |
7 | ** GNU General Public License version 2 as published by the Free Software | 7 | ** GNU General Public License version 2 as published by the Free Software |
8 | ** Foundation and appearing in the file LICENSE.GPL included in the | 8 | ** Foundation and appearing in the file LICENSE.GPL included in the |
9 | ** packaging of this file. | 9 | ** packaging of this file. |
10 | ** | 10 | ** |
11 | ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE | 11 | ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE |
12 | ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. | 12 | ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. |
13 | ** | 13 | ** |
14 | ** See http://www.trolltech.com/gpl/ for GPL licensing information. | 14 | ** See http://www.trolltech.com/gpl/ for GPL licensing information. |
15 | ** | 15 | ** |
16 | ** Contact info@trolltech.com if any conditions of this licensing are | 16 | ** Contact info@trolltech.com if any conditions of this licensing are |
@@ -187,25 +187,25 @@ int QTrie::collectKeys() | |||
187 | 187 | ||
188 | int TriePtr::operator <(const TriePtr& o) const | 188 | int TriePtr::operator <(const TriePtr& o) const |
189 | { return letter < o.letter; } | 189 | { return letter < o.letter; } |
190 | int TriePtr::operator >(const TriePtr& o) const | 190 | int TriePtr::operator >(const TriePtr& o) const |
191 | { return letter > o.letter; } | 191 | { return letter > o.letter; } |
192 | int TriePtr::operator <=(const TriePtr& o) const | 192 | int TriePtr::operator <=(const TriePtr& o) const |
193 | { return letter <= o.letter; } | 193 | { return letter <= o.letter; } |
194 | 194 | ||
195 | bool TrieList::equal(TrieList& l) | 195 | bool TrieList::equal(TrieList& l) |
196 | { | 196 | { |
197 | if ( count() != l.count() ) | 197 | if ( count() != l.count() ) |
198 | return FALSE; | 198 | return FALSE; |
199 | sort(); l.sort(); | 199 | sort(); l.sort(); |
200 | ConstIterator it2 = begin(); | 200 | ConstIterator it2 = begin(); |
201 | ConstIterator it = l.begin(); | 201 | ConstIterator it = l.begin(); |
202 | for( ; it != l.end(); ++it, ++it2 ) | 202 | for( ; it != l.end(); ++it, ++it2 ) |
203 | if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) ) | 203 | if ( (*it).letter != (*it2).letter || ! (*it).p->equal((*it2).p) ) |
204 | return FALSE; | 204 | return FALSE; |
205 | return TRUE; | 205 | return TRUE; |
206 | } | 206 | } |
207 | QTrie* TrieList::findAdd(QChar c) | 207 | QTrie* TrieList::findAdd(QChar c) |
208 | { | 208 | { |
209 | for (Iterator it=begin(); it!=end(); ++it) { | 209 | for (Iterator it=begin(); it!=end(); ++it) { |
210 | if ( (*it).letter == c ) | 210 | if ( (*it).letter == c ) |
211 | return (*it).p; | 211 | return (*it).p; |
@@ -391,120 +391,237 @@ private: | |||
391 | here++; | 391 | here++; |
392 | } | 392 | } |
393 | n->islast = 1; | 393 | n->islast = 1; |
394 | } | 394 | } |
395 | return t->key; | 395 | return t->key; |
396 | } | 396 | } |
397 | 397 | ||
398 | private: | 398 | private: |
399 | int nodes; | 399 | int nodes; |
400 | QDawg::Node *node; | 400 | QDawg::Node *node; |
401 | }; | 401 | }; |
402 | 402 | ||
403 | /*! | ||
404 | \class QDawg qdawg.h | ||
405 | \brief The QDawg class provides an implementation of a Directed Acyclic Word Graph. | ||
403 | 406 | ||
407 | A DAWG provides very fast look-up of words in a word list. | ||
408 | |||
409 | The word list is created using readFile(), read() or | ||
410 | createFromWords(). A list of all the DAWG's words is returned by | ||
411 | allWords(), and the total number of words is returned by | ||
412 | countWords(). Use contains() to see if a particular word is in the | ||
413 | DAWG. The root \link qdawg-node.html node\endlink is returned by root(). | ||
414 | |||
415 | A global DAWG is maintained for the current locale. See the | ||
416 | \l Global class for details. | ||
417 | |||
418 | The structure of a DAWG is a graph of \link qdawg-node.html | ||
419 | Nodes\endlink. There are no cycles in the graph (since there are no | ||
420 | inifinitely repeating words). Each \link qdawg-node.html | ||
421 | Node\endlink is a member of a list of \link qdawg-node.html | ||
422 | Nodes\endlink called a child list. Each \link qdawg-node.html | ||
423 | Node\endlink in the child list has a \e letter, an \e isWord flag, | ||
424 | at most one \e jump arc, and at most one arc to the next child in | ||
425 | the list. | ||
426 | |||
427 | If you traverse the \link qdawg-node.html Nodes\endlink in a DAWG, | ||
428 | starting from the root(), and you concatenate all the letters from | ||
429 | the single child in each child list that you visit, at every \link | ||
430 | qdawg-node.html Node\endlink which has the isWord flag set your | ||
431 | concatenation will be a word in the list represented by the DAWG. | ||
432 | |||
433 | For example, the DAWG below represents the word list: | ||
434 | ban, band, can, cane, cans, pan, pane, pans. | ||
435 | |||
436 | This structuring not only provides O(1) lookup of words in the word list, | ||
437 | but also produces a smaller storage file than a plain text file word list. | ||
438 | |||
439 | \img qdawg.png | ||
440 | */ | ||
441 | |||
442 | /*! | ||
443 | Constructs a new empty DAWG. | ||
444 | */ | ||
404 | QDawg::QDawg() | 445 | QDawg::QDawg() |
405 | { | 446 | { |
406 | d = 0; | 447 | d = 0; |
407 | } | 448 | } |
408 | 449 | ||
450 | /*! | ||
451 | Deletes the DAWG. | ||
452 | */ | ||
409 | QDawg::~QDawg() | 453 | QDawg::~QDawg() |
410 | { | 454 | { |
411 | delete d; | 455 | delete d; |
412 | } | 456 | } |
413 | 457 | ||
458 | /*! | ||
459 | \overload | ||
460 | Replaces all the DAWG's words with words read from \a dev. | ||
461 | */ | ||
414 | bool QDawg::createFromWords(QIODevice* dev) | 462 | bool QDawg::createFromWords(QIODevice* dev) |
415 | { | 463 | { |
416 | delete d; | 464 | delete d; |
417 | 465 | ||
418 | QTextStream i(dev); | 466 | QTextStream i(dev); |
419 | QTrie* trie = new QTrie; | 467 | QTrie* trie = new QTrie; |
420 | int n=0; | 468 | int n=0; |
421 | while (!i.atEnd()) { | 469 | while (!i.atEnd()) { |
422 | trie->insertWord(QString::fromUtf8(i.readLine())); | 470 | trie->insertWord(QString::fromUtf8(i.readLine())); |
423 | n++; | 471 | n++; |
424 | } | 472 | } |
425 | if ( n ) | 473 | if ( n ) |
426 | d = new QDawgPrivate(trie); | 474 | d = new QDawgPrivate(trie); |
427 | else | 475 | else |
428 | d = 0; | 476 | d = 0; |
429 | return TRUE; | 477 | return TRUE; |
430 | } | 478 | } |
431 | 479 | ||
480 | /*! | ||
481 | Replaces all the DAWG's words with the words in the \a list. | ||
482 | */ | ||
432 | void QDawg::createFromWords(const QStringList& list) | 483 | void QDawg::createFromWords(const QStringList& list) |
433 | { | 484 | { |
434 | delete d; | 485 | delete d; |
435 | 486 | ||
436 | if ( list.count() ) { | 487 | if ( list.count() ) { |
437 | QTrie* trie = new QTrie; | 488 | QTrie* trie = new QTrie; |
438 | for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) { | 489 | for (QStringList::ConstIterator it = list.begin(); it != list.end(); ++it) { |
439 | trie->insertWord(*it); | 490 | trie->insertWord(*it); |
440 | } | 491 | } |
441 | d = new QDawgPrivate(trie); | 492 | d = new QDawgPrivate(trie); |
442 | } else { | 493 | } else { |
443 | d = 0; | 494 | d = 0; |
444 | } | 495 | } |
445 | } | 496 | } |
446 | 497 | ||
498 | /*! | ||
499 | Returns a list of all the words in the DAWG. | ||
500 | */ | ||
447 | QStringList QDawg::allWords() const | 501 | QStringList QDawg::allWords() const |
448 | { | 502 | { |
449 | QStringList result; | 503 | QStringList result; |
450 | if ( d ) d->appendAllWords(result); | 504 | if ( d ) d->appendAllWords(result); |
451 | return result; | 505 | return result; |
452 | } | 506 | } |
453 | 507 | ||
454 | 508 | ||
509 | /*! | ||
510 | Replaces the DAWG with the DAWG in \a filename. | ||
511 | The file is memory-mapped. | ||
512 | |||
513 | \sa write() | ||
514 | */ | ||
455 | bool QDawg::readFile(const QString& filename) | 515 | bool QDawg::readFile(const QString& filename) |
456 | { | 516 | { |
457 | delete d; | 517 | delete d; |
458 | d = 0; | 518 | d = 0; |
459 | int f = ::open( QFile::encodeName(filename), O_RDONLY ); | 519 | int f = ::open( QFile::encodeName(filename), O_RDONLY ); |
460 | if ( f < 0 ) | 520 | if ( f < 0 ) |
461 | return FALSE; | 521 | return FALSE; |
462 | struct stat st; | 522 | struct stat st; |
463 | if ( !fstat( f, &st ) ) { | 523 | if ( !fstat( f, &st ) ) { |
464 | char * tmp = (char*)mmap( 0, st.st_size, // any address, whole file | 524 | char * tmp = (char*)mmap( 0, st.st_size, // any address, whole file |
465 | PROT_READ, // read-only memory | 525 | PROT_READ, // read-only memory |
466 | MAP_FILE | MAP_PRIVATE, // swap-backed map from file | 526 | MAP_FILE | MAP_PRIVATE, // swap-backed map from file |
467 | f, 0 ); // from offset 0 of f | 527 | f, 0 ); // from offset 0 of f |
468 | if ( tmp && tmp != (char*)MAP_FAILED ) | 528 | if ( tmp && tmp != (char*)MAP_FAILED ) |
469 | d = new QDawgPrivate((uchar*)tmp); | 529 | d = new QDawgPrivate((uchar*)tmp); |
470 | } | 530 | } |
471 | ::close( f ); | 531 | ::close( f ); |
472 | return d; | 532 | return d; |
473 | } | 533 | } |
474 | 534 | ||
535 | /*! | ||
536 | Replaces the DAWG with the DAWG in \a dev. | ||
537 | The file is memory-mapped. | ||
538 | |||
539 | \sa write() | ||
540 | */ | ||
475 | bool QDawg::read(QIODevice* dev) | 541 | bool QDawg::read(QIODevice* dev) |
476 | { | 542 | { |
477 | delete d; | 543 | delete d; |
478 | d = new QDawgPrivate(dev); | 544 | d = new QDawgPrivate(dev); |
479 | if ( d->ok() ) | 545 | if ( d->ok() ) |
480 | return TRUE; | 546 | return TRUE; |
481 | delete d; | 547 | delete d; |
482 | d = 0; | 548 | d = 0; |
483 | return FALSE; | 549 | return FALSE; |
484 | } | 550 | } |
485 | 551 | ||
552 | /*! | ||
553 | Writes the DAWG to \a dev, in a custom QDAWG format. | ||
554 | */ | ||
486 | bool QDawg::write(QIODevice* dev) const | 555 | bool QDawg::write(QIODevice* dev) const |
487 | { | 556 | { |
488 | return d ? d->write(dev) : TRUE; | 557 | return d ? d->write(dev) : TRUE; |
489 | } | 558 | } |
490 | 559 | ||
560 | /*! | ||
561 | Returns the number of words in the DAWG. | ||
562 | */ | ||
491 | int QDawg::countWords() const | 563 | int QDawg::countWords() const |
492 | { | 564 | { |
493 | return d ? d->countWords() : 0; | 565 | return d ? d->countWords() : 0; |
494 | } | 566 | } |
495 | 567 | ||
568 | /*! | ||
569 | Returns the root \link qdawg-node.html Node\endlink of the DAWG. | ||
570 | */ | ||
496 | const QDawg::Node* QDawg::root() const | 571 | const QDawg::Node* QDawg::root() const |
497 | { | 572 | { |
498 | return d ? d->root() : 0; | 573 | return d ? d->root() : 0; |
499 | } | 574 | } |
500 | 575 | ||
576 | /*! | ||
577 | Returns TRUE if the DAWG contains the word \a s; otherwise returns | ||
578 | FALSE. | ||
579 | */ | ||
501 | bool QDawg::contains(const QString& s) const | 580 | bool QDawg::contains(const QString& s) const |
502 | { | 581 | { |
503 | return d ? d->contains(s) : FALSE; | 582 | return d ? d->contains(s) : FALSE; |
504 | } | 583 | } |
505 | 584 | ||
585 | /*! | ||
586 | \internal | ||
587 | |||
588 | For debugging: prints out the DAWG contents. | ||
589 | */ | ||
506 | void QDawg::dump() const | 590 | void QDawg::dump() const |
507 | { | 591 | { |
508 | if ( d ) d->dump(); | 592 | if ( d ) d->dump(); |
509 | } | 593 | } |
510 | 594 | ||
595 | /*! | ||
596 | \class QDawg::Node qdawg.h | ||
597 | \brief The QDawg::Node class represents one node of a QDawg. | ||
598 | */ | ||
599 | |||
600 | /*! | ||
601 | \fn QChar QDawg::Node::letter() const | ||
602 | |||
603 | Returns this Node's letter. | ||
604 | */ | ||
605 | /*! | ||
606 | \fn bool QDawg::Node::isWord() const | ||
607 | |||
608 | Returns TRUE if this Node is the end of a word; otherwise returns | ||
609 | FALSE. | ||
610 | */ | ||
611 | /*! | ||
612 | \fn bool QDawg::Node::isLast() const | ||
613 | |||
614 | Returns TRUE if this Node is the last in the child list; otherwise | ||
615 | returns FALSE. | ||
616 | */ | ||
617 | /*! | ||
618 | \fn const Node* QDawg::Node::next() const | ||
619 | |||
620 | Returns the next child Node in the child list or 0 if the current | ||
621 | Node isLast(). | ||
622 | */ | ||
623 | /*! | ||
624 | \fn const Node* QDawg::Node::jump() const | ||
625 | |||
626 | Returns the node connected to this Node. | ||
627 | */ | ||