summaryrefslogtreecommitdiff
path: root/tests/functional-tests/310-fts-indexing.py
blob: 4673da60493331b90f74ec7812a7501a0d4de6eb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
#!/usr/bin/python
#-*- coding: utf-8 -*-

# Copyright (C) 2010, Nokia (ivan.frade@nokia.com)
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA  02110-1301, USA.

#
# TODO:
#     These tests are for files... we need to write them for folders!
#
"""
Monitor a directory, copy/move/remove/update text files and check that
the text contents are updated accordingly in the indexes.
"""
import os
import shutil
import locale
import time

import unittest2 as ut
from common.utils.helpers import log
from common.utils.minertest import CommonTrackerMinerTest, MINER_TMP_DIR, uri, path, DEFAULT_TEXT
from common.utils import configuration as cfg

class CommonMinerFTS (CommonTrackerMinerTest):
    """
    Superclass to share methods. Shouldn't be run by itself.
    """
    def prepare_directories (self):
        # Override content from the base class
        pass

    def setUp (self):
        self.testfile = "test-monitored/miner-fts-test.txt"
        if os.path.exists (path (self.testfile)):
            os.remove (path (self.testfile))

        super(CommonMinerFTS, self).setUp()

    def set_text (self, text):
        exists = os.path.exists(path(self.testfile))

        f = open (path (self.testfile), "w")
        f.write (text)
        f.close ()

        if exists:
            subject_id = self.tracker.get_resource_id(uri(self.testfile))
            self.tracker.await_property_changed(
                subject_id=subject_id, property_uri='nie:plainTextContent')
        else:
            self.tracker.await_resource_inserted(
                rdf_class='nfo:Document', url=uri(self.testfile),
                required_property='nie:plainTextContent')

        self.tracker.reset_graph_updates_tracking()

    def search_word (self, word):
        """
        Return list of URIs with the word in them
        """
        log ("Search for: %s" % word)
        results = self.tracker.query ("""
                SELECT ?url WHERE {
                  ?u a nfo:TextDocument ;
                      nie:url ?url ;
                      fts:match '%s'.
                 }
                 """ % (word))
        return [r[0] for r in results]
   
    def basic_test (self, text, word):
        """
        Save the text on the testfile, search the word
        and assert the testfile is only result.

        Be careful with the default contents of the text files
        ( see common/utils/minertest.py DEFAULT_TEXT )
        """
        self.set_text (text)
        results = self.search_word (word)
        self.assertEquals (len (results), 1)
        self.assertIn ( uri (self.testfile), results)

    def _query_id (self, uri):
        query = "SELECT tracker:id(?urn) WHERE { ?urn nie:url \"%s\". }" % uri
        result = self.tracker.query (query)
        assert len (result) == 1
        return int (result[0][0])


class MinerFTSBasicTest (CommonMinerFTS):
    """
    Tests different contents in a single file
    """

    def test_01_single_word (self):
        TEXT = "automobile"
        self.basic_test (TEXT, TEXT)

    def test_02_multiple_words (self):
        TEXT = "automobile with unlimited power"
        self.set_text (TEXT)
        
        results = self.search_word ("automobile")
        self.assertEquals (len (results), 1)
        self.assertIn (uri (self.testfile), results)

        results = self.search_word ("unlimited")
        self.assertEquals (len (results), 1)
        self.assertIn (uri (self.testfile), results)


    def test_03_long_word (self):
        # TEXT is longer than the 20 characters specified in the fts configuration
        TEXT = "fsfsfsdfskfweeqrewqkmnbbvkdasdjefjewriqjfnc"
        self.set_text (TEXT)

        results = self.search_word (TEXT)
        self.assertEquals (len (results), 0)

    def test_04_non_existent_word (self):
        TEXT = "This a trick"
        self.set_text (TEXT)
        results = self.search_word ("trikc")
        self.assertEquals (len (results), 0)


    def test_05_word_in_multiple_files (self):
        # Safeguard, in the case we modify the DEFAULT_TEXT later...
        assert "content" in DEFAULT_TEXT

        self.set_text (DEFAULT_TEXT)
        results = self.search_word ("content")
        self.assertEquals (len (results), 4)
        self.assertIn ( uri (self.testfile), results)
        self.assertIn ( uri ("test-monitored/file1.txt"), results)
        self.assertIn ( uri ("test-monitored/dir1/file2.txt"), results)
        self.assertIn ( uri ("test-monitored/dir1/dir2/file3.txt"), results)

    def test_06_word_multiple_times_in_file (self):
        TEXT = "automobile is red. automobile is big. automobile is great!"
        self.basic_test (TEXT, "automobile")

    def test_07_sentence (self):
        TEXT = "plastic is fantastic"
        self.basic_test (TEXT, TEXT)

    def test_08_partial_sentence (self):
        TEXT = "plastic is fantastic"
        self.basic_test (TEXT, "is fantastic")

    def test_09_strange_word (self):
        # FIXME Not sure what are we testing here
        TEXT = "'summer.time'"
        self.basic_test (TEXT, "summer.time")

    # Skip the test 'search for .'

    def test_10_mixed_letters_and_numbers (self):
        TEXT = "abc123"
        self.basic_test (TEXT, "abc123")

    def test_11_ignore_numbers (self):
        TEXT = "palabra 123123"
        self.set_text (TEXT)
        results = self.search_word ("123123")
        self.assertEquals (len (results), 0)


class MinerFTSFileOperationsTest (CommonMinerFTS):
    """
    Move, update, delete the files and check the text indexes are updated accordingly.
    """

    def test_01_removal_of_file (self):
        """
        When removing the file, its text contents disappear from the index
        """
        TEXT = "automobile is red and big and whatnot"
        self.basic_test (TEXT, "automobile")

        id = self._query_id (uri (self.testfile))
        os.remove ( path (self.testfile))
        self.tracker.await_resource_deleted (id)

        results = self.search_word ("automobile")
        self.assertEquals (len (results), 0)

    def test_02_empty_the_file (self):
        """
        Emptying the file, the indexed words are also removed

        FIXME: this test currently fails!
        """
        TEXT = "automobile is red and big and whatnot"
        self.basic_test (TEXT, "automobile")

        self.set_text ("")
        results = self.search_word ("automobile")
        self.assertEquals (len (results), 0)

    def test_03_update_the_file (self):
        """
        Changing the contents of the file, updates the index

        FIXME: this test fails!
        """
        TEXT = "automobile is red and big and whatnot"
        self.basic_test (TEXT, "automobile")

        self.set_text ("airplane is blue and small and wonderful")

        results = self.search_word ("automobile")
        self.assertEquals (len (results), 0)

        results = self.search_word ("airplane")
        self.assertEquals (len (results), 1)

    # Skip the test_text_13... feel, feet, fee in three diff files and search feet

    def __recreate_file (self, filename, content):
        if os.path.exists (filename):
            os.remove (filename)

        f = open (filename, "w")
        f.write (content)
        f.close ()
        

    def test_04_on_unmonitored_file (self):
        """
        Set text in an unmonitored file. There should be no results.
        """
        TEXT = "automobile is red"

        TEST_15_FILE = "test-no-monitored/fts-indexing-test-15.txt"
        self.__recreate_file (path (TEST_15_FILE), TEXT)

        results = self.search_word ("automobile")
        self.assertEquals (len (results), 0)

        os.remove (path (TEST_15_FILE))

    def test_05_move_file_unmonitored_monitored (self):
        """
        Move file from unmonitored location to monitored location and index should be updated
        """

        TEXT = "airplane is beautiful"
        TEST_16_SOURCE = "test-no-monitored/fts-indexing-text-16.txt"
        TEST_16_DEST = "test-monitored/fts-indexing-text-16.txt"
        
        self.__recreate_file (path (TEST_16_SOURCE), TEXT)
        # the file is supposed to be ignored by tracker, so there is no notification..
        time.sleep (5)

        results = self.search_word ("airplane")
        self.assertEquals (len (results), 0)

        shutil.copyfile ( path (TEST_16_SOURCE), path (TEST_16_DEST))
        self.tracker.await_resource_inserted (rdf_class = 'nfo:Document',
                                              url = uri(TEST_16_DEST),
                                              required_property = 'nie:plainTextContent')

        results = self.search_word ("airplane")
        self.assertEquals (len (results), 1)

        os.remove ( path (TEST_16_SOURCE))
        os.remove ( path (TEST_16_DEST))

    # skip test for a file in a hidden directory

class MinerFTSStopwordsTest (CommonMinerFTS):
    """
    Search for stopwords in a file 
    """

    def __get_some_stopwords (self):

        langcode, encoding = locale.getdefaultlocale ()
        if "_" in langcode:
            langcode = langcode.split ("_")[0]

        stopwordsfile = os.path.join (cfg.DATADIR, "tracker", "stop-words", "stopwords." + langcode)

        if not os.path.exists (stopwordsfile):
            self.skipTest ("No stopwords for the current locale ('%s' doesn't exist)" % (stopwordsfile))
            return []
        
        stopwords = []
        counter = 0
        for line in open (stopwordsfile, "r"):
            if len (line) > 4:
                stopwords.append (line[:-1])
                counter += 1

            if counter > 5:
                break
            
        return stopwords
    
    def test_01_stopwords (self):
        stopwords = self.__get_some_stopwords ()
        TEXT = " ".join (["this a completely normal text automobile"] + stopwords)
        
        self.set_text (TEXT)
        results = self.search_word ("automobile")
        self.assertEquals (len (results), 1)
        log ("Stopwords: %s" % stopwords)
        for i in range (0, len (stopwords)):
            results = self.search_word (stopwords[i])
            self.assertEquals (len (results), 0)

    ## FIXME add all the special character tests!
    ##  http://git.gnome.org/browse/tracker/commit/?id=81c0d3bd754a6b20ac72323481767dc5b4a6217b
    

if __name__ == "__main__":
    ut.main ()