diff options
author | Lars Knoll <lars.knoll@nokia.com> | 2009-03-23 10:18:55 +0100 |
---|---|---|
committer | Simon Hausmann <simon.hausmann@nokia.com> | 2009-03-23 10:18:55 +0100 |
commit | e5fcad302d86d316390c6b0f62759a067313e8a9 (patch) | |
tree | c2afbf6f1066b6ce261f14341cf6d310e5595bc1 /tools/assistant/lib/qhelpsearchindexreader_clucene.cpp | |
download | qt4-tools-e5fcad302d86d316390c6b0f62759a067313e8a9.tar.gz |
Long live Qt 4.5!
Diffstat (limited to 'tools/assistant/lib/qhelpsearchindexreader_clucene.cpp')
-rw-r--r-- | tools/assistant/lib/qhelpsearchindexreader_clucene.cpp | 392 |
1 files changed, 392 insertions, 0 deletions
diff --git a/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp b/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp new file mode 100644 index 0000000000..82a3a17d3f --- /dev/null +++ b/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp @@ -0,0 +1,392 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the Qt Assistant of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the either Technology Preview License Agreement or the +** Beta Release License Agreement. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain +** additional rights. These rights are described in the Nokia Qt LGPL +** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this +** package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qhelpenginecore.h" +#include "fulltextsearch/qsearchable_p.h" +#include "fulltextsearch/qqueryparser_p.h" +#include "fulltextsearch/qindexreader_p.h" +#include "qhelpsearchindexreader_clucene_p.h" + +#include <QtCore/QDir> +#include <QtCore/QSet> +#include <QtCore/QString> +#include <QtCore/QFileInfo> +#include <QtCore/QStringList> +#include <QtCore/QTextStream> +#include <QtCore/QMutexLocker> + +QT_BEGIN_NAMESPACE + +namespace qt { + namespace fulltextsearch { + namespace clucene { + +QHelpSearchIndexReader::QHelpSearchIndexReader() + : QThread() + , m_cancel(false) +{ + // nothing todo +} + +QHelpSearchIndexReader::~QHelpSearchIndexReader() +{ + mutex.lock(); + this->m_cancel = true; + waitCondition.wakeOne(); + mutex.unlock(); + + wait(); +} + +void QHelpSearchIndexReader::cancelSearching() +{ + mutex.lock(); + this->m_cancel = true; + mutex.unlock(); +} + +void QHelpSearchIndexReader::search(const QString &collectionFile, + const QString &indexFilesFolder, + const QList<QHelpSearchQuery> &queryList) +{ + QMutexLocker lock(&mutex); + + this->hitList.clear(); + this->m_cancel = false; + this->m_query = queryList; + this->m_collectionFile = collectionFile; + this->m_indexFilesFolder = indexFilesFolder; + + start(QThread::NormalPriority); +} + +int QHelpSearchIndexReader::hitsCount() const +{ + return hitList.count(); +} + +QHelpSearchEngine::SearchHit QHelpSearchIndexReader::hit(int index) const +{ + return hitList.at(index); +} + +void QHelpSearchIndexReader::run() +{ + mutex.lock(); + + if (m_cancel) { + mutex.unlock(); + return; + } + + const QString collectionFile(this->m_collectionFile); + const QList<QHelpSearchQuery> &queryList = this->m_query; + const QString indexPath(m_indexFilesFolder); + + mutex.unlock(); + + QHelpEngineCore engine(collectionFile, 0); + if (!engine.setupData()) + return; + + QFileInfo fInfo(indexPath); + if (fInfo.exists() && !fInfo.isWritable()) { + qWarning("Full Text Search, could not read index (missing permissions)."); + return; + } + + if(QCLuceneIndexReader::indexExists(indexPath)) { + mutex.lock(); + if (m_cancel) { + mutex.unlock(); + return; + } + mutex.unlock(); + + emit searchingStarted(); + +#if !defined(QT_NO_EXCEPTIONS) + try { +#endif + QCLuceneBooleanQuery booleanQuery; + if (!buildQuery(booleanQuery, queryList)) { + emit searchingFinished(0); + return; + } + + const QStringList attribList = engine.filterAttributes(engine.currentFilter()); + if (!attribList.isEmpty()) { + QCLuceneStandardAnalyzer analyzer; + QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+") + + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), + analyzer); + + if (!query) { + emit searchingFinished(0); + return; + } + booleanQuery.add(query, true, true, false); + } + + QCLuceneIndexSearcher indexSearcher(indexPath); + QCLuceneHits hits = indexSearcher.search(booleanQuery); + const QStringList namespaceList = engine.registeredDocumentations(); + + QSet<QString> pathSet; + QCLuceneDocument document; + for (qint32 i = 0; i < hits.length(); i++) { + document = hits.document(i); + const QString path = document.get(QLatin1String("path")); + if (!pathSet.contains(path) && namespaceList.contains( + document.get(QLatin1String("namespace")), Qt::CaseInsensitive)) { + pathSet.insert(path); + hitList.append(qMakePair(path, document.get(QLatin1String("title")))); + } + document.clear(); + + mutex.lock(); + if (m_cancel) { + mutex.unlock(); + emit searchingFinished(0); + return; + } + mutex.unlock(); + } + + indexSearcher.close(); + int count = hitList.count(); + if (count > 0) + boostSearchHits(engine, hitList, queryList); + emit searchingFinished(hitList.count()); + +#if !defined(QT_NO_EXCEPTIONS) + } catch(...) { + hitList.clear(); + emit searchingFinished(0); + } +#endif + } +} + +bool QHelpSearchIndexReader::defaultQuery(const QString &term, + QCLuceneBooleanQuery &booleanQuery) +{ + QCLuceneStandardAnalyzer analyzer; + + const QLatin1String c("content"); + const QLatin1String t("titleTokenized"); + + QCLuceneQuery *query = QCLuceneQueryParser::parse(term, c, analyzer); + QCLuceneQuery *query2 = QCLuceneQueryParser::parse(term, t, analyzer); + if (query && query2) { + booleanQuery.add(query, true, false, false); + booleanQuery.add(query2, true, false, false); + return true; + } + + return false; +} + +bool QHelpSearchIndexReader::buildQuery(QCLuceneBooleanQuery &booleanQuery, + const QList<QHelpSearchQuery> &queryList) +{ + foreach (const QHelpSearchQuery query, queryList) { + switch (query.fieldName) { + case QHelpSearchQuery::FUZZY: { + const QLatin1String fuzzy("~"); + foreach (const QString term, query.wordList) { + if (term.isEmpty() || !defaultQuery(term.toLower() + fuzzy, booleanQuery)) + return false; + } + } break; + + case QHelpSearchQuery::WITHOUT: { + QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); + foreach (const QString term, query.wordList) { + if (stopWords.contains(term, Qt::CaseInsensitive)) + continue; + + QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( + QLatin1String("content"), term.toLower())); + QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm( + QLatin1String("titleTokenized"), term.toLower())); + + if (query && query2) { + booleanQuery.add(query, true, false, true); + booleanQuery.add(query2, true, false, true); + } else { + return false; + } + } + } break; + + case QHelpSearchQuery::PHRASE: { + const QString term = query.wordList.at(0).toLower(); + if (term.contains(QLatin1Char(' '))) { + QStringList termList = term.split(QLatin1String(" ")); + QCLucenePhraseQuery *q = new QCLucenePhraseQuery(); + QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); + foreach (const QString t, termList) { + if (!stopWords.contains(t, Qt::CaseInsensitive)) + q->addTerm(QCLuceneTerm(QLatin1String("content"), t.toLower())); + } + booleanQuery.add(q, true, true, false); + } else { + QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( + QLatin1String("content"), term.toLower())); + QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm( + QLatin1String("titleTokenized"), term.toLower())); + + if (query && query2) { + booleanQuery.add(query, true, true, false); + booleanQuery.add(query2, true, false, false); + } else { + return false; + } + } + } break; + + case QHelpSearchQuery::ALL: { + QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); + foreach (const QString term, query.wordList) { + if (stopWords.contains(term, Qt::CaseInsensitive)) + continue; + + QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( + QLatin1String("content"), term.toLower())); + + if (query) { + booleanQuery.add(query, true, true, false); + } else { + return false; + } + } + } break; + + case QHelpSearchQuery::DEFAULT: { + QCLuceneStandardAnalyzer analyzer; + foreach (const QString t, query.wordList) { + QCLuceneQuery *query = QCLuceneQueryParser::parse(t.toLower(), + QLatin1String("content"), analyzer); + + if (query) + booleanQuery.add(query, true, true, false); + } + } break; + + case QHelpSearchQuery::ATLEAST: { + foreach (const QString term, query.wordList) { + if (term.isEmpty() || !defaultQuery(term.toLower(), booleanQuery)) + return false; + } + } + } + } + + return true; +} + +void QHelpSearchIndexReader::boostSearchHits(const QHelpEngineCore &engine, + QList<QHelpSearchEngine::SearchHit> &hitList, + const QList<QHelpSearchQuery> &queryList) +{ + foreach (const QHelpSearchQuery query, queryList) { + if (query.fieldName != QHelpSearchQuery::DEFAULT) + continue; + + QString joinedQuery = query.wordList.join(QLatin1String(" ")); + + QCLuceneStandardAnalyzer analyzer; + QCLuceneQuery *parsedQuery = QCLuceneQueryParser::parse( + joinedQuery, QLatin1String("content"), analyzer); + + if (parsedQuery) { + joinedQuery = parsedQuery->toString(); + delete parsedQuery; + } + + int length = QString(QLatin1String("content:")).length(); + int index = joinedQuery.indexOf(QLatin1String("content:")); + + QString term; + int nextIndex = 0; + QStringList searchTerms; + while (index != -1) { + nextIndex = joinedQuery.indexOf(QLatin1String("content:"), index + 1); + term = joinedQuery.mid(index + length, nextIndex - (length + index)) + .simplified(); + if (term.startsWith(QLatin1String("\"")) + && term.endsWith(QLatin1String("\""))) { + searchTerms.append(term.remove(QLatin1String("\""))); + } else { + searchTerms += term.split(QLatin1Char(' ')); + } + index = nextIndex; + } + searchTerms.removeDuplicates(); + + int count = qMin(75, hitList.count()); + QMap<int, QHelpSearchEngine::SearchHit> hitMap; + for (int i = 0; i < count; ++i) { + const QHelpSearchEngine::SearchHit &hit = hitList.at(i); + QString data = QString::fromUtf8(engine.fileData(hit.first)); + + int counter = 0; + foreach (const QString& term, searchTerms) + counter += data.count(term, Qt::CaseInsensitive); + hitMap.insertMulti(counter, hit); + } + + QList<QHelpSearchEngine::SearchHit> boostedList; + QMap<int, QHelpSearchEngine::SearchHit>::const_iterator i; + for (i = hitMap.constEnd(), --i; i != hitMap.constBegin(); --i) + boostedList.append(i.value()); + boostedList += hitList.mid(count - 1, hitList.count()); + + hitList = boostedList; + } +} + + } // namespace clucene + } // namespace fulltextsearch +} // namespace qt + +QT_END_NAMESPACE |