summaryrefslogtreecommitdiff
path: root/Tools/freeze
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1998-03-05 03:42:00 +0000
committerGuido van Rossum <guido@python.org>1998-03-05 03:42:00 +0000
commit75dc4969ab202e8c3dda15bedacc880d589e1e44 (patch)
tree1dc5d67c772aba4027076eedeeddbe73c797257e /Tools/freeze
parent3455edcbc8fa586a6d3c70ea34e3c71b2763c98a (diff)
downloadcpython-git-75dc4969ab202e8c3dda15bedacc880d589e1e44.tar.gz
Added support for packages.
We have a whole new module finder that uses the actual Python parser and scans the bytecode for IMPORT_NAME and IMPORT_FROM. This requires some support in import.c (that hasn't been checked in). New command line options for this: -d, -q, -m.
Diffstat (limited to 'Tools/freeze')
-rw-r--r--Tools/freeze/findmodules.py127
-rwxr-xr-xTools/freeze/freeze.py63
-rw-r--r--Tools/freeze/makefreeze.py74
-rw-r--r--Tools/freeze/modulefinder.py373
4 files changed, 438 insertions, 199 deletions
diff --git a/Tools/freeze/findmodules.py b/Tools/freeze/findmodules.py
deleted file mode 100644
index 9e02f2be36..0000000000
--- a/Tools/freeze/findmodules.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# Determine the names and filenames of the modules imported by a
-# script, recursively. This is done by scanning for lines containing
-# import statements. (The scanning has only superficial knowledge of
-# Python syntax and no knowledge of semantics, so in theory the result
-# may be incorrect -- however this is quite unlikely if you don't
-# intentionally obscure your Python code.)
-
-import os
-import regex
-import string
-import sys
-
-
-# Top-level interface.
-# First argument is the main program (script).
-# Second optional argument is list of modules to be searched as well.
-
-def findmodules(scriptfile, modules = [], path = sys.path):
- todo = {}
- todo['__main__'] = scriptfile
- for name in modules:
- mod = os.path.basename(name)
- if mod[-3:] == '.py': mod = mod[:-3]
- elif mod[-4:] == '.pyc': mod = mod[:-4]
- todo[mod] = name
- done = closure(todo)
- return done
-
-
-# Compute the closure of scanfile() and findmodule().
-# Return a dictionary mapping module names to filenames.
-# Writes to stderr if a file can't be or read.
-
-def closure(todo):
- done = {}
- while todo:
- newtodo = {}
- for modname in todo.keys():
- if not done.has_key(modname):
- filename = todo[modname]
- if filename is None:
- filename = findmodule(modname)
- done[modname] = filename
- if filename in ('<builtin>', '<unknown>'):
- continue
- try:
- modules = scanfile(filename)
- except IOError, msg:
- sys.stderr.write("%s: %s\n" %
- (filename, str(msg)))
- continue
- for m in modules:
- if not done.has_key(m):
- newtodo[m] = None
- todo = newtodo
- return done
-
-
-# Scan a file looking for import statements.
-# Return list of module names.
-# Can raise IOError.
-
-importstr = '\(^\|:\)[ \t]*import[ \t]+\([a-zA-Z0-9_, \t]+\)'
-fromstr = '\(^\|:\)[ \t]*from[ \t]+\([a-zA-Z0-9_]+\)[ \t]+import[ \t]+'
-isimport = regex.compile(importstr)
-isfrom = regex.compile(fromstr)
-
-def scanfile(filename):
- allmodules = {}
- f = open(filename, 'r')
- try:
- while 1:
- line = f.readline()
- if not line: break # EOF
- while line[-2:] == '\\\n': # Continuation line
- line = line[:-2] + ' '
- line = line + f.readline()
- if isimport.search(line) >= 0:
- rawmodules = isimport.group(2)
- modules = string.splitfields(rawmodules, ',')
- for i in range(len(modules)):
- modules[i] = string.strip(modules[i])
- elif isfrom.search(line) >= 0:
- modules = [isfrom.group(2)]
- else:
- continue
- for mod in modules:
- allmodules[mod] = None
- finally:
- f.close()
- return allmodules.keys()
-
-
-# Find the file containing a module, given its name.
-# Return filename, or '<builtin>', or '<unknown>'.
-
-builtins = sys.builtin_module_names
-tails = ['.py', '.pyc']
-
-def findmodule(modname, path = sys.path):
- if modname in builtins: return '<builtin>'
- for dirname in path:
- for tail in tails:
- fullname = os.path.join(dirname, modname + tail)
- try:
- f = open(fullname, 'r')
- except IOError:
- continue
- f.close()
- return fullname
- return '<unknown>'
-
-
-# Test the above functions.
-
-def test():
- if not sys.argv[1:]:
- print 'usage: python findmodules.py scriptfile [morefiles ...]'
- sys.exit(2)
- done = findmodules(sys.argv[1], sys.argv[2:])
- items = done.items()
- items.sort()
- for mod, file in [('Module', 'File')] + items:
- print "%-15s %s" % (mod, file)
-
-if __name__ == '__main__':
- test()
diff --git a/Tools/freeze/freeze.py b/Tools/freeze/freeze.py
index 181e84598b..b6f2d4ada2 100755
--- a/Tools/freeze/freeze.py
+++ b/Tools/freeze/freeze.py
@@ -26,6 +26,12 @@ Options:
-o dir: Directory where the output files are created; default '.'.
+-m: Additional arguments are module names instead of filenames.
+
+-d: Debugging mode for the module finder.
+
+-q: Make the module finder totally quiet.
+
-h: Print this help message.
-w: Toggle Windows (NT or 95) behavior.
@@ -42,7 +48,8 @@ script.py: The Python script to be executed by the resulting binary.
module ...: Additional Python modules (referenced by pathname)
that will be included in the resulting binary. These
- may be .py or .pyc files.
+ may be .py or .pyc files. If -m is specified, these are
+ module names that are search in the path instead.
NOTES:
@@ -67,7 +74,7 @@ import addpack
# Import the freeze-private modules
import checkextensions
-import findmodules
+import modulefinder
import makeconfig
import makefreeze
import makemakefile
@@ -82,6 +89,8 @@ def main():
exec_prefix = None # settable with -P option
extensions = []
path = sys.path
+ modargs = 0
+ debug = 1
odir = ''
win = sys.platform[:3] == 'win'
@@ -97,7 +106,7 @@ def main():
# parse command line
try:
- opts, args = getopt.getopt(sys.argv[1:], 'he:o:p:P:s:w')
+ opts, args = getopt.getopt(sys.argv[1:], 'deh:mo:p:P:qs:w')
except getopt.error, msg:
usage('getopt error: ' + str(msg))
@@ -106,14 +115,20 @@ def main():
if o == '-h':
print __doc__
return
+ if o == '-d':
+ debug = debug + 1
if o == '-e':
extensions.append(a)
+ if o == '-m':
+ modargs = 1
if o == '-o':
odir = a
if o == '-p':
prefix = a
if o == '-P':
exec_prefix = a
+ if o == '-q':
+ debug = 0
if o == '-w':
win = not win
if o == '-s':
@@ -220,18 +235,30 @@ def main():
target = os.path.join(odir, target)
makefile = os.path.join(odir, makefile)
- for mod in implicits:
- modules.append(findmodules.findmodule(mod))
-
# Actual work starts here...
- dict = findmodules.findmodules(scriptfile, modules, path)
- names = dict.keys()
- names.sort()
- print "Modules being frozen:"
- for name in names:
- print '\t', name
-
+ # collect all modules of the program
+ mf = modulefinder.ModuleFinder(path, debug)
+ for mod in implicits:
+ mf.import_hook(mod)
+ for mod in modules:
+ if mod == '-m':
+ modargs = 1
+ continue
+ if modargs:
+ if mod[-2:] == '.*':
+ mf.import_hook(mod[:-2], None, ["*"])
+ else:
+ mf.import_hook(mod)
+ else:
+ mf.load_file(mod)
+ mf.run_script(scriptfile)
+ if debug > 0:
+ mf.report()
+ print
+ dict = mf.modules
+
+ # generate output for frozen modules
backup = frozen_c + '~'
try:
os.rename(frozen_c, backup)
@@ -239,7 +266,7 @@ def main():
backup = None
outfp = open(frozen_c, 'w')
try:
- makefreeze.makefreeze(outfp, dict)
+ makefreeze.makefreeze(outfp, dict, debug)
if win and subsystem == 'windows':
import winmakemakefile
outfp.write(winmakemakefile.WINMAINTEMPLATE)
@@ -251,6 +278,7 @@ def main():
frozen_c)
os.rename(backup, frozen_c)
+ # windows gets different treatment
if win:
# Taking a shortcut here...
import winmakemakefile
@@ -264,14 +292,17 @@ def main():
outfp.close()
return
+ # generate config.c and Makefile
builtins = []
unknown = []
mods = dict.keys()
mods.sort()
for mod in mods:
- if dict[mod] == '<builtin>':
+ if dict[mod].__code__:
+ continue
+ if not dict[mod].__file__:
builtins.append(mod)
- elif dict[mod] == '<unknown>':
+ else:
unknown.append(mod)
addfiles = []
diff --git a/Tools/freeze/makefreeze.py b/Tools/freeze/makefreeze.py
index 5c6f371af5..97315b3391 100644
--- a/Tools/freeze/makefreeze.py
+++ b/Tools/freeze/makefreeze.py
@@ -1,4 +1,5 @@
import marshal
+import string
# Write a file containing frozen code for the modules in the dictionary.
@@ -23,51 +24,31 @@ main(argc, argv)
"""
-def makefreeze(outfp, dict):
+def makefreeze(outfp, dict, debug=0):
done = []
mods = dict.keys()
mods.sort()
for mod in mods:
- modfn = dict[mod]
- try:
- str = makecode(modfn)
- except IOError, msg:
- sys.stderr.write("%s: %s\n" % (modfn, str(msg)))
- continue
- if str:
- done.append(mod, len(str))
- writecode(outfp, mod, str)
+ m = dict[mod]
+ mangled = string.join(string.split(mod, "."), "__")
+ if m.__code__:
+ if debug:
+ print "freezing", mod, "..."
+ str = marshal.dumps(m.__code__)
+ size = len(str)
+ if m.__path__:
+ # Indicate package by negative size
+ size = -size
+ done.append((mod, mangled, size))
+ writecode(outfp, mangled, str)
+ if debug:
+ print "generating table of frozen modules"
outfp.write(header)
- for mod, size in done:
- outfp.write('\t{"%s", M_%s, %d},\n' % (mod, mod, size))
+ for mod, mangled, size in done:
+ outfp.write('\t{"%s", M_%s, %d},\n' % (mod, mangled, size))
outfp.write(trailer)
-# Return code string for a given module -- either a .py or a .pyc
-# file. Return either a string or None (if it's not Python code).
-# May raise IOError.
-
-def makecode(filename):
- if filename[-3:] == '.py':
- f = open(filename, 'r')
- try:
- text = f.read()
- code = compile(text, filename, 'exec')
- finally:
- f.close()
- return marshal.dumps(code)
- if filename[-4:] == '.pyc':
- f = open(filename, 'rb')
- try:
- f.seek(8)
- str = f.read()
- finally:
- f.close()
- return str
- # Can't generate code for this extension
- return None
-
-
# Write a C initializer for a module containing the frozen python code.
# The array is called M_<mod>.
@@ -78,22 +59,3 @@ def writecode(outfp, mod, str):
for c in str[i:i+16]:
outfp.write('%d,' % ord(c))
outfp.write('\n};\n')
-
-
-# Test for the above functions.
-
-def test():
- import os
- import sys
- if not sys.argv[1:]:
- print 'usage: python freezepython.py file.py(c) ...'
- sys.exit(2)
- dict = {}
- for arg in sys.argv[1:]:
- base = os.path.basename(arg)
- mod, ext = os.path.splitext(base)
- dict[mod] = arg
- makefreeze(sys.stdout, dict)
-
-if __name__ == '__main__':
- test()
diff --git a/Tools/freeze/modulefinder.py b/Tools/freeze/modulefinder.py
new file mode 100644
index 0000000000..79665b7e75
--- /dev/null
+++ b/Tools/freeze/modulefinder.py
@@ -0,0 +1,373 @@
+"""Find modules used by a script, using introspection."""
+
+import dis
+import imp
+import marshal
+import os
+import re
+import string
+import sys
+
+
+IMPORT_NAME = dis.opname.index('IMPORT_NAME')
+IMPORT_FROM = dis.opname.index('IMPORT_FROM')
+
+
+class Module:
+
+ def __init__(self, name, file=None, path=None):
+ self.__name__ = name
+ self.__file__ = file
+ self.__path__ = path
+ self.__code__ = None
+
+ def __repr__(self):
+ s = "Module(%s" % `self.__name__`
+ if self.__file__ is not None:
+ s = s + ", %s" % `self.__file__`
+ if self.__path__ is not None:
+ s = s + ", %s" % `self.__path__`
+ s = s + ")"
+ return s
+
+
+class ModuleFinder:
+
+ def __init__(self, path=None, debug=0):
+ if path is None:
+ path = sys.path
+ self.path = path
+ self.modules = {}
+ self.badmodules = {}
+ self.debug = debug
+ self.indent = 0
+
+ def msg(self, level, str, *args):
+ if level <= self.debug:
+ for i in range(self.indent):
+ print " ",
+ print str,
+ for arg in args:
+ print repr(arg),
+ print
+
+ def msgin(self, *args):
+ level = args[0]
+ if level <= self.debug:
+ self.indent = self.indent + 1
+ apply(self.msg, args)
+
+ def msgout(self, *args):
+ level = args[0]
+ if level <= self.debug:
+ self.indent = self.indent - 1
+ apply(self.msg, args)
+
+ def run_script(self, pathname):
+ self.msg(2, "run_script", pathname)
+ fp = open(pathname)
+ stuff = ("", "r", imp.PY_SOURCE)
+ self.load_module('__main__', fp, pathname, stuff)
+
+ def load_file(self, pathname):
+ dir, name = os.path.split(pathname)
+ name, ext = os.path.splitext(name)
+ fp = open(pathname)
+ stuff = (ext, "r", imp.PY_SOURCE)
+ self.load_module(name, fp, pathname, stuff)
+
+ def import_hook(self, name, caller=None, fromlist=None):
+ self.msg(3, "import_hook", name, caller, fromlist)
+ parent = self.determine_parent(caller)
+ q, tail = self.find_head_package(parent, name)
+ m = self.load_tail(q, tail)
+ if not fromlist:
+ return q
+ if m.__path__:
+ self.ensure_fromlist(m, fromlist)
+
+ def determine_parent(self, caller):
+ self.msgin(4, "determine_parent", caller)
+ if not caller:
+ self.msgout(4, "determine_parent -> None")
+ return None
+ pname = caller.__name__
+ if caller.__path__:
+ parent = self.modules[pname]
+ assert caller is parent
+ self.msgout(4, "determine_parent ->", parent)
+ return parent
+ if '.' in pname:
+ i = string.rfind(pname, '.')
+ pname = pname[:i]
+ parent = self.modules[pname]
+ assert parent.__name__ == pname
+ self.msgout(4, "determine_parent ->", parent)
+ return parent
+ self.msgout(4, "determine_parent -> None")
+ return None
+
+ def find_head_package(self, parent, name):
+ self.msgin(4, "find_head_package", parent, name)
+ if '.' in name:
+ i = string.find(name, '.')
+ head = name[:i]
+ tail = name[i+1:]
+ else:
+ head = name
+ tail = ""
+ if parent:
+ qname = "%s.%s" % (parent.__name__, head)
+ else:
+ qname = head
+ q = self.import_module(head, qname, parent)
+ if q:
+ self.msgout(4, "find_head_package ->", (q, tail))
+ return q, tail
+ if parent:
+ qname = head
+ parent = None
+ q = self.import_module(head, qname, parent)
+ if q:
+ self.msgout(4, "find_head_package ->", (q, tail))
+ return q, tail
+ self.msgout(4, "raise ImportError: No module named", qname)
+ raise ImportError, "No module named " + qname
+
+ def load_tail(self, q, tail):
+ self.msgin(4, "load_tail", q, tail)
+ m = q
+ while tail:
+ i = string.find(tail, '.')
+ if i < 0: i = len(tail)
+ head, tail = tail[:i], tail[i+1:]
+ mname = "%s.%s" % (m.__name__, head)
+ m = self.import_module(head, mname, m)
+ if not m:
+ self.msgout(4, "raise ImportError: No module named", mname)
+ raise ImportError, "No module named " + mname
+ self.msgout(4, "load_tail ->", m)
+ return m
+
+ def ensure_fromlist(self, m, fromlist, recursive=0):
+ self.msg(4, "ensure_fromlist", m, fromlist, recursive)
+ for sub in fromlist:
+ if sub == "*":
+ if not recursive:
+ all = self.find_all_submodules(m)
+ if all:
+ self.ensure_fromlist(m, all, 1)
+ elif not hasattr(m, sub):
+ subname = "%s.%s" % (m.__name__, sub)
+ submod = self.import_module(sub, subname, m)
+ if not submod:
+ raise ImportError, "No module named " + subname
+
+ def find_all_submodules(self, m):
+ if not m.__path__:
+ return
+ modules = {}
+ suffixes = [".py", ".pyc", ".pyo"]
+ for dir in m.__path__:
+ try:
+ names = os.listdir(dir)
+ except os.error:
+ self.msg(2, "can't list directory", dir)
+ continue
+ for name in names:
+ mod = None
+ for suff in suffixes:
+ n = len(suff)
+ if name[-n:] == suff:
+ mod = name[:-n]
+ break
+ if mod and mod != "__init__":
+ modules[mod] = mod
+ return modules.keys()
+
+ def import_module(self, partname, fqname, parent):
+ self.msgin(3, "import_module", partname, fqname, parent)
+ try:
+ m = self.modules[fqname]
+ except KeyError:
+ pass
+ else:
+ self.msgout(3, "import_module ->", m)
+ return m
+ if self.badmodules.has_key(fqname):
+ self.msgout(3, "import_module -> None")
+ return None
+ try:
+ fp, pathname, stuff = self.find_module(partname,
+ parent and parent.__path__)
+ except ImportError:
+ self.msgout(3, "import_module ->", None)
+ return None
+ try:
+ m = self.load_module(fqname, fp, pathname, stuff)
+ finally:
+ if fp: fp.close()
+ if parent:
+ setattr(parent, partname, m)
+ self.msgout(3, "import_module ->", m)
+ return m
+
+ def load_module(self, fqname, fp, pathname, (suffix, mode, type)):
+ self.msgin(2, "load_module", fqname, fp and "fp", pathname)
+ if type == imp.PKG_DIRECTORY:
+ m = self.load_package(fqname, pathname)
+ self.msgout(2, "load_module ->", m)
+ return m
+ if type == imp.PY_SOURCE:
+ co = compile(fp.read(), pathname, 'exec')
+ elif type == imp.PY_COMPILED:
+ if fp.read(4) != imp.get_magic():
+ self.msgout(2, "raise ImportError: Bad magic number", pathname)
+ raise ImportError, "Bad magic number in %s", pathname
+ fp.read(4)
+ co = marshal.load(fp)
+ else:
+ co = None
+ m = self.add_module(fqname)
+ if co:
+ m.__file__ = pathname
+ m.__code__ = co
+ code = co.co_code
+ n = len(code)
+ i = 0
+ lastname = None
+ while i < n:
+ c = code[i]
+ i = i+1
+ op = ord(c)
+ if op >= dis.HAVE_ARGUMENT:
+ oparg = ord(code[i]) + ord(code[i+1])*256
+ i = i+2
+ if op == IMPORT_NAME:
+ name = lastname = co.co_names[oparg]
+ if not self.badmodules.has_key(lastname):
+ try:
+ self.import_hook(name, m)
+ except ImportError, msg:
+ self.msg(2, "ImportError:", str(msg))
+ self.badmodules[name] = None
+ elif op == IMPORT_FROM:
+ name = co.co_names[oparg]
+ assert lastname is not None
+ if not self.badmodules.has_key(lastname):
+ try:
+ self.import_hook(lastname, m, [name])
+ except ImportError, msg:
+ self.msg(2, "ImportError:", str(msg))
+ fullname = lastname + "." + name
+ self.badmodules[fullname] = None
+ else:
+ lastname = None
+ self.msgout(2, "load_module ->", m)
+ return m
+
+ def load_package(self, fqname, pathname):
+ self.msgin(2, "load_package", fqname, pathname)
+ m = self.add_module(fqname)
+ m.__file__ = pathname
+ m.__path__ = [pathname]
+ fp, buf, stuff = self.find_module("__init__", m.__path__)
+ self.load_module(fqname, fp, buf, stuff)
+ self.msgout(2, "load_package ->", m)
+ return m
+
+ def add_module(self, fqname):
+ if self.modules.has_key(fqname):
+ return self.modules[fqname]
+ self.modules[fqname] = m = Module(fqname)
+ return m
+
+ def find_module(self, name, path):
+ if path is None:
+ if name in sys.builtin_module_names:
+ return (None, None, ("", "", imp.C_BUILTIN))
+ path = self.path
+ return imp.find_module(name, path)
+
+ def report(self):
+ print
+ print " %-25s %s" % ("Name", "File")
+ print " %-25s %s" % ("----", "----")
+ # Print modules found
+ keys = self.modules.keys()
+ keys.sort()
+ for key in keys:
+ m = self.modules[key]
+ if m.__path__:
+ print "P",
+ else:
+ print "m",
+ print "%-25s" % key, m.__file__ or ""
+
+ # Print missing modules
+ keys = self.badmodules.keys()
+ keys.sort()
+ for key in keys:
+ print "?", key
+
+
+def test():
+ # Parse command line
+ import getopt
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "dmp:q")
+ except getopt.error, msg:
+ print msg
+ return
+
+ # Process options
+ debug = 1
+ domods = 0
+ addpath = []
+ for o, a in opts:
+ if o == '-d':
+ debug = debug + 1
+ if o == '-m':
+ domods = 1
+ if o == '-p':
+ addpath = addpath + string.split(a, os.pathsep)
+ if o == '-q':
+ debug = 0
+
+ # Provide default arguments
+ if not args:
+ script = "hello.py"
+ else:
+ script = args[0]
+
+ # Set the path based on sys.path and the script directory
+ path = sys.path[:]
+ path[0] = os.path.dirname(script)
+ path = addpath + path
+ if debug > 1:
+ print "path:"
+ for item in path:
+ print " ", `item`
+
+ # Create the module finder and turn its crank
+ mf = ModuleFinder(path, debug)
+ for arg in args[1:]:
+ if arg == '-m':
+ domods = 1
+ continue
+ if domods:
+ if arg[-2:] == '.*':
+ mf.import_hook(arg[:-2], None, ["*"])
+ else:
+ mf.import_hook(arg)
+ else:
+ mf.load_file(arg)
+ mf.run_script(script)
+ mf.report()
+
+
+if __name__ == '__main__':
+ try:
+ test()
+ except KeyboardInterrupt:
+ print "\n[interrupt]"