apscheduler/jobstores/mongodb.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

from __future__ import absolute_import
import warnings

from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError
from apscheduler.util import maybe_ref, datetime_to_utc_timestamp, utc_timestamp_to_datetime
from apscheduler.job import Job

try:
    import cPickle as pickle
except ImportError:  # pragma: nocover
    import pickle

try:
    from bson.binary import Binary
    from pymongo.errors import DuplicateKeyError
    from pymongo import MongoClient, ASCENDING
except ImportError:  # pragma: nocover
    raise ImportError('MongoDBJobStore requires PyMongo installed')


class MongoDBJobStore(BaseJobStore):
    """
    Stores jobs in a MongoDB database. Any leftover keyword arguments are directly passed to
    pymongo's `MongoClient
    <http://api.mongodb.org/python/current/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient>`_.

    Plugin alias: ``mongodb``

    :param str database: database to store jobs in
    :param str collection: collection to store jobs in
    :param client: a :class:`~pymongo.mongo_client.MongoClient` instance to use instead of
        providing connection arguments
    :param int pickle_protocol: pickle protocol level to use (for serialization), defaults to the
        highest available
    """

    def __init__(self, database='apscheduler', collection='jobs', client=None,
                 pickle_protocol=pickle.HIGHEST_PROTOCOL, **connect_args):
        super(MongoDBJobStore, self).__init__()
        self.pickle_protocol = pickle_protocol

        if not database:
            raise ValueError('The "database" parameter must not be empty')
        if not collection:
            raise ValueError('The "collection" parameter must not be empty')

        if client:
            self.client = maybe_ref(client)
        else:
            connect_args.setdefault('w', 1)
            self.client = MongoClient(**connect_args)

        self.collection = self.client[database][collection]

    def start(self, scheduler, alias):
        super(MongoDBJobStore, self).start(scheduler, alias)
        self.collection.create_index('next_run_time', sparse=True)

    @property
    def connection(self):
        warnings.warn('The "connection" member is deprecated -- use "client" instead',
                      DeprecationWarning)
        return self.client

    def lookup_job(self, job_id):
        document = self.collection.find_one(job_id, ['job_state'])
        return self._reconstitute_job(document['job_state']) if document else None

    def get_due_jobs(self, now):
        timestamp = datetime_to_utc_timestamp(now)
        return self._get_jobs({'next_run_time': {'$lte': timestamp}})

    def get_next_run_time(self):
        document = self.collection.find_one({'next_run_time': {'$ne': None}},
                                            projection=['next_run_time'],
                                            sort=[('next_run_time', ASCENDING)])
        return utc_timestamp_to_datetime(document['next_run_time']) if document else None

    def get_all_jobs(self):
        jobs = self._get_jobs({})
        self._fix_paused_jobs_sorting(jobs)
        return jobs

    def add_job(self, job):
        try:
            self.collection.insert_one({
                '_id': job.id,
                'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
                'job_state': Binary(pickle.dumps(job.__getstate__(), self.pickle_protocol))
            })
        except DuplicateKeyError:
            raise ConflictingIdError(job.id)

    def update_job(self, job):
        changes = {
            'next_run_time': datetime_to_utc_timestamp(job.next_run_time),
            'job_state': Binary(pickle.dumps(job.__getstate__(), self.pickle_protocol))
        }
        result = self.collection.update_one({'_id': job.id}, {'$set': changes})
        if result and result.matched_count == 0:
            raise JobLookupError(job.id)

    def remove_job(self, job_id):
        result = self.collection.delete_one({'_id': job_id})
        if result and result.deleted_count == 0:
            raise JobLookupError(job_id)

    def remove_all_jobs(self):
        self.collection.remove()

    def shutdown(self):
        self.client.close()

    def _reconstitute_job(self, job_state):
        job_state = pickle.loads(job_state)
        job = Job.__new__(Job)
        job.__setstate__(job_state)
        job._scheduler = self._scheduler
        job._jobstore_alias = self._alias
        return job

    def _get_jobs(self, conditions):
        jobs = []
        failed_job_ids = []
        for document in self.collection.find(conditions, ['_id', 'job_state'],
                                             sort=[('next_run_time', ASCENDING)]):
            try:
                jobs.append(self._reconstitute_job(document['job_state']))
            except BaseException:
                self._logger.exception('Unable to restore job "%s" -- removing it',
                                       document['_id'])
                failed_job_ids.append(document['_id'])

        # Remove all the jobs we failed to restore
        if failed_job_ids:
            self.collection.remove({'_id': {'$in': failed_job_ids}})

        return jobs

    def __repr__(self):
        return '<%s (client=%s)>' % (self.__class__.__name__, self.client)