summaryrefslogtreecommitdiff
path: root/bindings/python/libproxy.py
blob: 563c2de432bb4ebad32d16674ac71162c108f5b7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
###############################################################################
# libproxy - A library for proxy configuration
# Copyright (C) 2006 Nathaniel McCallum <nathaniel@natemccallum.com>
# 
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# 
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
# 
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
###############################################################################

"A library for proxy configuration and autodetection."

import ctypes
import ctypes.util
import platform

import sys

# Load C library
if platform.system() == "Windows":
    _libc = ctypes.cdll.msvcrt
else:
    if not ctypes.util.find_library("c"):
        raise ImportError("Unable to import C Library!?!")
    _libc = ctypes.cdll.LoadLibrary(ctypes.util.find_library("c"))


# Load libproxy
if not ctypes.util.find_library("proxy"):
    raise ImportError("Unable to import libproxy!?!?")


_libproxy = ctypes.cdll.LoadLibrary(ctypes.util.find_library("proxy"))
_libproxy.px_proxy_factory_get_proxies.restype = ctypes.POINTER(ctypes.c_void_p)

class ProxyFactory(object):
    """A ProxyFactory object is used to provide potential proxies to use
    in order to reach a given URL (via 'getProxies(url)').
 
    This instance should be kept around as long as possible as it contains
    cached data to increase performance.  Memory usage should be minimal (cache
    is small) and the cache lifespan is handled automatically.

    Usage is pretty simple:
        pf = libproxy.ProxyFactory()
        for url in urls:
            proxies = pf.getProxies(url)
            for proxy in proxies:
                if proxy == "direct://":
                    # Fetch URL without using a proxy
                elif proxy.startswith("http://"):
                    # Fetch URL using an HTTP proxy
                elif proxy.startswith("socks://"):
                    # Fetch URL using a SOCKS proxy
                
                if fetchSucceeded:
                    break    
    """

    class ProxyResolutionError(RuntimeError):
        """Exception raised when proxy cannot be resolved generally
           due to invalid URL"""
        pass

    def __init__(self):
        self._pf = _libproxy.px_proxy_factory_new()
        
    def getProxies(self, url):
        """Given a URL, returns a list of proxies in priority order to be used
        to reach that URL.

        A list of proxy strings is returned.  If the first proxy fails, the 
        second should be tried, etc... In all cases, at least one entry in the
        list will be returned. There are no error conditions.

        Regarding performance: this method always blocks and may be called
        in a separate thread (is thread-safe).  In most cases, the time
        required to complete this function call is simply the time required
        to read the configuration (e.g  from GConf, Kconfig, etc).  

        In the case of PAC, if no valid PAC is found in the cache (i.e.
        configuration has changed, cache is invalid, etc), the PAC file is 
        downloaded and inserted into the cache. This is the most expensive
        operation as the PAC is retrieved over the network. Once a PAC exists
        in the cache, it is merely a JavaScript invocation to evaluate the PAC.
        One should note that DNS can be called from within a PAC during 
        JavaScript invocation.

        In the case of WPAD, WPAD is used to automatically locate a PAC on the
        network.  Currently, we only use DNS for this, but other methods may
        be implemented in the future.  Once the PAC is located, normal PAC 
        performance (described above) applies.

        """
        if type(url) != str:
            raise TypeError("url must be a string!")
        
        proxies = []
        array = _libproxy.px_proxy_factory_get_proxies(self._pf, url)
    
        if not bool(array):
            raise ProxyFactory.ProxyResolutionError(
                    "Can't resolve proxy for '%s'" % url)

        i=0
        while array[i]:
            proxies.append(str(ctypes.cast(array[i], ctypes.c_char_p).value))
            _libc.free(array[i])
            i += 1
        _libc.free(array)
        
        return proxies
        
    def __del__(self):
        _libproxy.px_proxy_factory_free(self._pf)