summaryrefslogtreecommitdiff
path: root/glance_store/_drivers/http.py
blob: ef2ed4c73b63ef97b92c9a196ffa868f017311f8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# Copyright 2010 OpenStack Foundation
# All Rights Reserved.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

import logging

from oslo_utils import encodeutils
from six.moves import urllib

import requests

from glance_store import capabilities
import glance_store.driver
from glance_store import exceptions
from glance_store.i18n import _
import glance_store.location

LOG = logging.getLogger(__name__)


MAX_REDIRECTS = 5


class StoreLocation(glance_store.location.StoreLocation):

    """Class describing an HTTP(S) URI."""

    def process_specs(self):
        self.scheme = self.specs.get('scheme', 'http')
        self.netloc = self.specs['netloc']
        self.user = self.specs.get('user')
        self.password = self.specs.get('password')
        self.path = self.specs.get('path')

    def _get_credstring(self):
        if self.user:
            return '%s:%s@' % (self.user, self.password)
        return ''

    def get_uri(self):
        return "%s://%s%s%s" % (
            self.scheme,
            self._get_credstring(),
            self.netloc,
            self.path)

    def parse_uri(self, uri):
        """
        Parse URLs. This method fixes an issue where credentials specified
        in the URL are interpreted differently in Python 2.6.1+ than prior
        versions of Python.
        """
        pieces = urllib.parse.urlparse(uri)
        assert pieces.scheme in ('https', 'http')
        self.scheme = pieces.scheme
        netloc = pieces.netloc
        path = pieces.path
        try:
            if '@' in netloc:
                creds, netloc = netloc.split('@')
            else:
                creds = None
        except ValueError:
            # Python 2.6.1 compat
            # see lp659445 and Python issue7904
            if '@' in path:
                creds, path = path.split('@')
            else:
                creds = None
        if creds:
            try:
                self.user, self.password = creds.split(':')
            except ValueError:
                reason = _("Credentials are not well-formatted.")
                LOG.info(reason)
                raise exceptions.BadStoreUri(message=reason)
        else:
            self.user = None
        if netloc == '':
            LOG.info(_("No address specified in HTTP URL"))
            raise exceptions.BadStoreUri(uri=uri)
        else:
            # IPv6 address has the following format [1223:0:0:..]:<some_port>
            # we need to be sure that we are validating port in both IPv4,IPv6
            delimiter = "]:" if netloc.count(":") > 1 else ":"
            host, dlm, port = netloc.partition(delimiter)
            # if port is present in location then validate port format
            if port and not port.isdigit():
                raise exceptions.BadStoreUri(uri=uri)

        self.netloc = netloc
        self.path = path


def http_response_iterator(conn, response, size):
    """
    Return an iterator for a file-like object.

    :param conn: HTTP(S) Connection
    :param response: urllib3.HTTPResponse object
    :param size: Chunk size to iterate with
    """
    try:
        chunk = response.read(size)
        while chunk:
            yield chunk
            chunk = response.read(size)
    finally:
        conn.close()


class Store(glance_store.driver.Store):

    """An implementation of the HTTP(S) Backend Adapter"""

    _CAPABILITIES = (capabilities.BitMasks.READ_ACCESS |
                     capabilities.BitMasks.DRIVER_REUSABLE)

    @capabilities.check
    def get(self, location, offset=0, chunk_size=None, context=None):
        """
        Takes a `glance_store.location.Location` object that indicates
        where to find the image file, and returns a tuple of generator
        (for reading the image file) and image_size

        :param location: `glance_store.location.Location` object, supplied
                        from glance_store.location.get_location_from_uri()
        """
        try:
            conn, resp, content_length = self._query(location, 'GET')
        except requests.exceptions.ConnectionError:
            reason = _("Remote server where the image is present "
                       "is unavailable.")
            LOG.exception(reason)
            raise exceptions.RemoteServiceUnavailable(message=reason)

        iterator = http_response_iterator(conn, resp, self.READ_CHUNKSIZE)

        class ResponseIndexable(glance_store.Indexable):
            def another(self):
                try:
                    return next(self.wrapped)
                except StopIteration:
                    return ''

        return (ResponseIndexable(iterator, content_length), content_length)

    def get_schemes(self):
        return ('http', 'https')

    def get_size(self, location, context=None):
        """
        Takes a `glance_store.location.Location` object that indicates
        where to find the image file, and returns the size

        :param location: `glance_store.location.Location` object, supplied
                        from glance_store.location.get_location_from_uri()
        """
        conn = None
        try:
            conn, resp, size = self._query(location, 'HEAD')
        except requests.exceptions.ConnectionError as exc:
            err_msg = encodeutils.exception_to_unicode(exc)
            reason = _("The HTTP URL is invalid: %s") % err_msg
            LOG.info(reason)
            raise exceptions.BadStoreUri(message=reason)
        finally:
            # NOTE(sabari): Close the connection as the request was made with
            # stream=True
            if conn is not None:
                conn.close()
        return size

    def _query(self, location, verb):
        redirects_followed = 0

        while redirects_followed < MAX_REDIRECTS:
            loc = location.store_location

            conn = self._get_response(loc, verb)

            # NOTE(sigmavirus24): If it was generally successful, break early
            if conn.status_code < 300:
                break

            self._check_store_uri(conn, loc)

            redirects_followed += 1

            # NOTE(sigmavirus24): Close the response so we don't leak sockets
            conn.close()

            location = self._new_location(location, conn.headers['location'])
        else:
            reason = (_("The HTTP URL exceeded %s maximum "
                        "redirects.") % MAX_REDIRECTS)
            LOG.debug(reason)
            raise exceptions.MaxRedirectsExceeded(message=reason)

        resp = conn.raw

        content_length = int(resp.getheader('content-length', 0))
        return (conn, resp, content_length)

    def _new_location(self, old_location, url):
        store_name = old_location.store_name
        store_class = old_location.store_location.__class__
        image_id = old_location.image_id
        store_specs = old_location.store_specs
        return glance_store.location.Location(store_name,
                                              store_class,
                                              self.conf,
                                              uri=url,
                                              image_id=image_id,
                                              store_specs=store_specs)

    @staticmethod
    def _check_store_uri(conn, loc):
        # TODO(sigmavirus24): Make this a staticmethod
        # Check for bad status codes
        if conn.status_code >= 400:
            if conn.status_code == requests.codes.not_found:
                reason = _("HTTP datastore could not find image at URI.")
                LOG.debug(reason)
                raise exceptions.NotFound(message=reason)

            reason = (_("HTTP URL %(url)s returned a "
                        "%(status)s status code. \nThe response body:\n"
                        "%(body)s") %
                      {'url': loc.path, 'status': conn.status_code,
                       'body': conn.text})
            LOG.debug(reason)
            raise exceptions.BadStoreUri(message=reason)

        if conn.is_redirect and conn.status_code not in (301, 302):
            reason = (_("The HTTP URL %(url)s attempted to redirect "
                        "with an invalid %(status)s status code.") %
                      {'url': loc.path, 'status': conn.status_code})
            LOG.info(reason)
            raise exceptions.BadStoreUri(message=reason)

    def _get_response(self, location, verb):
        if not hasattr(self, 'session'):
            self.session = requests.Session()
        return self.session.request(verb, location.get_uri(), stream=True,
                                    allow_redirects=False)