diff options
author | Nicolas Chauvat <nicolas.chauvat@logilab.fr> | 2007-09-28 02:08:48 +0200 |
---|---|---|
committer | Nicolas Chauvat <nicolas.chauvat@logilab.fr> | 2007-09-28 02:08:48 +0200 |
commit | a7bbed8e220645e7713f770c1fd9d3da51f14376 (patch) | |
tree | 9455c007bdb848d857edd9bd070af243e6471ec4 /pdf_ext.py | |
parent | f3b59c81250f59adea07839aad09d3aa03d7e6ef (diff) | |
download | logilab-common-a7bbed8e220645e7713f770c1fd9d3da51f14376.tar.gz |
pdf_ext manipulates pdf and fills pdf forms via fdf files. pdftk recommended.
Diffstat (limited to 'pdf_ext.py')
-rw-r--r-- | pdf_ext.py | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/pdf_ext.py b/pdf_ext.py new file mode 100644 index 0000000..c742262 --- /dev/null +++ b/pdf_ext.py @@ -0,0 +1,97 @@ +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +""" Copyright (c) 2003-2007 LOGILAB S.A. (Paris, FRANCE). + http://www.logilab.fr/ -- mailto:contact@logilab.fr + +manipulate pdf and fdf files. pdftk recommended. + +Notes regarding pdftk, pdf forms and fdf files (form definition file) +fields names can be extracted with: + pdftk orig.pdf generate_fdf output truc.fdf +to merge fdf and pdf: + pdftk orig.pdf fill_form test.fdf output result.pdf [flatten] +without flatten, one could further edit the resulting form. +with flatten, everything is turned into text. +""" + +import os + +HEAD="""%FDF-1.2 +%\xE2\xE3\xCF\xD3 +1 0 obj +<< +/FDF +<< +/Fields [ +""" + +TAIL="""] +>> +>> +endobj +trailer + +<< +/Root 1 0 R +>> +%%EOF +""" + +def output_field( f ): + return "\xfe\xff" + "".join( [ "\x00"+c for c in f ] ) + +def extract_keys(lines): + keys = [] + for line in lines: + if line.startswith('/V'): + pass #print 'value',line + elif line.startswith('/T'): + key = line[7:-2] + key = ''.join(key.split('\x00')) + keys.append( key ) + return keys + +def write_field(out, key, value): + out.write("<<\n") + if value: + out.write("/V (%s)\n" %value) + else: + out.write("/V /\n") + out.write("/T (%s)\n" % output_field(key) ) + out.write(">> \n") + +def write_fields(out, fields): + out.write(HEAD) + for (key,value,comment) in fields: + write_field(out, key, value) + write_field(out, key+"a", value) # pour copie-carbone sur autres pages + out.write(TAIL) + +def extract_keys_from_pdf(filename): + # what about using 'pdftk filename dump_data_fields' and parsing the output ? + os.system('pdftk %s generate_fdf output /tmp/toto.fdf' % filename) + lines = file('/tmp/toto.fdf').readlines() + return extract_keys(lines) + + +def fill_pdf(infile, outfile, fields): + write_fields(file('/tmp/toto.fdf', 'w'), fields) + os.system('pdftk %s fill_form /tmp/toto.fdf output %s flatten' % (infile, outfile)) + +def testfill_pdf(filename): + keys = extract_keys_from_pdf(filename) + fields = [] + for key in keys: + fields.append( (key, key, '') ) + fill_pdf(filename, '_'+filename, fields) + |