1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
** This file is part of the Qt Linguist of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL21$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see http://www.qt.io/terms-conditions. For further
** information use the contact form at http://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 or version 3 as published by the Free
** Software Foundation and appearing in the file LICENSE.LGPLv21 and
** LICENSE.LGPLv3 included in the packaging of this file. Please review the
** following information to ensure the GNU Lesser General Public License
** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** As a special exception, The Qt Company gives you certain additional
** rights. These rights are described in The Qt Company LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** $QT_END_LICENSE$
**
****************************************************************************/
#ifndef SIMTEXTH_H
#define SIMTEXTH_H
const int textSimilarityThreshold = 190;
#include <QString>
#include <QList>
QT_BEGIN_NAMESPACE
class Translator;
struct Candidate
{
Candidate() {}
Candidate(const QString& source0, const QString &target0)
: source(source0), target(target0)
{}
QString source;
QString target;
};
inline bool operator==( const Candidate& c, const Candidate& d ) {
return c.target == d.target && c.source == d.source;
}
inline bool operator!=( const Candidate& c, const Candidate& d ) {
return !operator==( c, d );
}
typedef QList<Candidate> CandidateList;
struct CoMatrix
{
CoMatrix(const QString &str);
CoMatrix() {}
/*
The matrix has 20 * 20 = 400 entries. This requires 50 bytes, or 13
words. Some operations are performed on words for more efficiency.
*/
union {
quint8 b[52];
quint32 w[13];
};
};
/**
* This class is more efficient for searching through a large array of candidate strings, since we only
* have to construct the CoMatrix for the \a stringToMatch once,
* after that we just call getSimilarityScore(strCandidate).
* \sa getSimilarityScore
*/
class StringSimilarityMatcher {
public:
StringSimilarityMatcher(const QString &stringToMatch);
int getSimilarityScore(const QString &strCandidate);
private:
CoMatrix m_cm;
int m_length;
};
/**
* Checks how similar two strings are.
* The return value is the score, and a higher score is more similar
* than one with a low score.
* Linguist considers a score over 190 to be a good match.
* \sa StringSimilarityMatcher
*/
static inline int getSimilarityScore(const QString &str1, const QString &str2)
{
return StringSimilarityMatcher(str1).getSimilarityScore(str2);
}
CandidateList similarTextHeuristicCandidates( const Translator *tor,
const QString &text,
int maxCandidates );
QT_END_NAMESPACE
#endif
|