summaryrefslogtreecommitdiff
path: root/pdf_ext.py
blob: c8ef64ed65fdd94b7bc2797d8e0d80bf7d7e5d04 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
""" Copyright (c) 2003-2007 LOGILAB S.A. (Paris, FRANCE).
 http://www.logilab.fr/ -- mailto:contact@logilab.fr

manipulate pdf and fdf files. pdftk recommended.

Notes regarding pdftk, pdf forms and fdf files (form definition file) 
fields names can be extracted with:
    pdftk orig.pdf generate_fdf output truc.fdf
to merge fdf and pdf:      
    pdftk orig.pdf fill_form test.fdf output result.pdf [flatten]
without flatten, one could further edit the resulting form.
with flatten, everything is turned into text.
"""

import os

HEAD="""%FDF-1.2
%\xE2\xE3\xCF\xD3
1 0 obj 
<<
/FDF 
<<
/Fields [
"""

TAIL="""]
>>
>>
endobj 
trailer

<<
/Root 1 0 R
>>
%%EOF
"""

def output_field( f ):
    return "\xfe\xff" + "".join( [ "\x00"+c for c in f ] )

def extract_keys(lines):
    keys = []
    for line in lines:
        if line.startswith('/V'):
            pass #print 'value',line
        elif line.startswith('/T'):
            key = line[7:-2]
            key = ''.join(key.split('\x00'))
            keys.append( key )
    return keys

def write_field(out, key, value):
    out.write("<<\n")
    if value:
        out.write("/V (%s)\n" %value)
    else:
        out.write("/V /\n")
    out.write("/T (%s)\n" % output_field(key) )
    out.write(">> \n")

def write_fields(out, fields):
    out.write(HEAD)
    for (key,value,comment) in fields:
        write_field(out, key, value)
        write_field(out, key+"a", value) # pour copie-carbone sur autres pages
    out.write(TAIL)

def extract_keys_from_pdf(filename):
    # what about using 'pdftk filename dump_data_fields' and parsing the output ?
    os.system('pdftk %s generate_fdf output /tmp/toto.fdf' % filename)
    lines = file('/tmp/toto.fdf').readlines()
    return extract_keys(lines)


def fill_pdf(infile, outfile, fields):
    write_fields(file('/tmp/toto.fdf', 'w'), fields)
    os.system('pdftk %s fill_form /tmp/toto.fdf output %s flatten' % (infile, outfile))

def testfill_pdf(infile, outfile):
    keys = extract_keys_from_pdf(infile)
    fields = []
    for key in keys:
        fields.append( (key, key, '') )
    fill_pdf(infile, outfile, fields)