summaryrefslogtreecommitdiff
path: root/src/tools/msvc/gendef.pl
blob: d6bed1ce1512e6ad063e69085c4c4b43cbab58f4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205

# Copyright (c) 2021-2022, PostgreSQL Global Development Group

use strict;
use warnings;
use List::Util qw(min);
use Getopt::Long;

my @def;

#
# Script that generates a .DEF file for all objects in a directory
#
# src/tools/msvc/gendef.pl
#

# Given a symbol file path, loops over its contents
# and returns a list of symbols of interest as a dictionary
# of 'symbolname' -> symtype, where symtype is:
#
#     0    a CODE symbol, left undecorated in the .DEF
#     1    A DATA symbol, i.e. global var export
#
sub extract_syms
{
	my ($symfile, $def) = @_;
	open(my $f, '<', $symfile) || die "Could not open $symfile for $_: $!\n";
	while (<$f>)
	{

		# Expected symbol lines look like:
		#
		# 0   1        2      3            4            5 6
		# IDX SYMBOL   SECT   SYMTYPE      SYMSTATIC      SYMNAME
		# ------------------------------------------------------------------------
		# 02E 00000130 SECTA  notype       External     | _standbyState
		# 02F 00000009 SECT9  notype       Static       | _LocalRecoveryInProgress
		# 064 00000020 SECTC  notype ()    Static       | _XLogCheckBuffer
		# 065 00000000 UNDEF  notype ()    External     | _BufferGetTag
		#
		# See http://msdn.microsoft.com/en-us/library/b842y285.aspx
		#
		# We're not interested in the symbol index or offset.
		#
		# SECT[ION] is only examined to see whether the symbol is defined in a
		# COFF section of the local object file; if UNDEF, it's a symbol to be
		# resolved at link time from another object so we can't export it.
		#
		# SYMTYPE is always notype for C symbols as there's no typeinfo and no
		# way to get the symbol type from name (de)mangling. However, we care
		# if "notype" is suffixed by "()" or not. The presence of () means the
		# symbol is a function, the absence means it isn't.
		#
		# SYMSTATIC indicates whether it's a compilation-unit local "static"
		# symbol ("Static"), or whether it's available for use from other
		# compilation units ("External"). We export all symbols that aren't
		# static as part of the whole program DLL interface to produce UNIX-like
		# default linkage.
		#
		# SYMNAME is, obviously, the symbol name. The leading underscore
		# indicates that the _cdecl calling convention is used. See
		# http://www.unixwiz.net/techtips/win32-callconv.html
		# http://www.codeproject.com/Articles/1388/Calling-Conventions-Demystified
		#
		s/notype \(\)/func/g;
		s/notype/data/g;

		my @pieces = split;

		# Skip file and section headers and other non-symbol entries
		next unless defined($pieces[0]) and $pieces[0] =~ /^[A-F0-9]{3,}$/;

		# Skip blank symbol names
		next unless $pieces[6];

		# Skip externs used from another compilation unit
		next if ($pieces[2] eq "UNDEF");

		# Skip static symbols
		next unless ($pieces[4] eq "External");

		# Skip some more MSVC-generated crud
		next if $pieces[6] =~ /^@/;
		next if $pieces[6] =~ /^\(/;

		# __real and __xmm are out-of-line floating point literals and
		# (for __xmm) their SIMD equivalents. They shouldn't be part
		# of the DLL interface.
		next if $pieces[6] =~ /^__real/;
		next if $pieces[6] =~ /^__xmm/;

		# __imp entries are imports from other DLLs, eg __imp__malloc .
		# (We should never have one of these that hasn't already been skipped
		# by the UNDEF test above, though).
		next if $pieces[6] =~ /^__imp/;

		# More under-documented internal crud
		next if $pieces[6] =~ /NULL_THUNK_DATA$/;
		next if $pieces[6] =~ /^__IMPORT_DESCRIPTOR/;
		next if $pieces[6] =~ /^__NULL_IMPORT/;

		# Skip string literals
		next if $pieces[6] =~ /^\?\?_C/;

		# We assume that if a symbol is defined as data, then as a function,
		# the linker will reject the binary anyway. So it's OK to just pick
		# whatever came last.
		$def->{ $pieces[6] } = $pieces[3];
	}
	close($f);
	return;
}

sub writedef
{
	my ($deffile, $arch, $def) = @_;
	open(my $fh, '>', $deffile) || die "Could not write to $deffile\n";
	print $fh "EXPORTS\n";
	foreach my $f (sort keys %{$def})
	{
		my $isdata = $def->{$f} eq 'data';

		# Strip the leading underscore for win32, but not x64
		$f =~ s/^_//
		  unless ($arch eq "x86_64");

		# Emit just the name if it's a function symbol, or emit the name
		# decorated with the DATA option for variables.
		if ($isdata)
		{
			print $fh "  $f DATA\n";
		}
		else
		{
			print $fh "  $f\n";
		}
	}
	close($fh);
	return;
}


sub usage
{
	die("Usage: gendef.pl --arch <arch> --deffile <deffile> --tempdir <tempdir> files-or-directories\n"
		  . "    arch: x86 | x86_64\n"
		  . "    deffile: path of the generated file\n"
		  . "    tempdir: directory for temporary files\n"
		  . "    files or directories: object files or directory containing object files\n"
	);
}

my $arch;
my $deffile;
my $tempdir = '.';

GetOptions(
	'arch:s'    => \$arch,
	'deffile:s' => \$deffile,
	'tempdir:s' => \$tempdir,) or usage();

usage("arch: $arch")
  unless ($arch eq 'x86' || $arch eq 'x86_64');

my @files;

foreach my $in (@ARGV)
{
	if (-d $in)
	{
		push @files, glob "$in/*.obj";
	}
	else
	{
		push @files, $in;
	}
}

# if the def file exists and is newer than all input object files, skip
# its creation
if (-f $deffile
	&& (-M $deffile < min(map { -M } @files)))
{
	print "Not re-generating $deffile, file already exists.\n";
	exit(0);
}

print "Generating $deffile in tempdir $tempdir\n";

my %def = ();

my $symfile = "$tempdir/all.sym";
my $tmpfile = "$tempdir/tmp.sym";
mkdir($tempdir) unless -d $tempdir;

my $cmd = "dumpbin /nologo /symbols /out:$tmpfile " . join(' ', @files);

system($cmd) && die "Could not call dumpbin";
rename($tmpfile, $symfile);
extract_syms($symfile, \%def);
print "\n";

writedef($deffile, $arch, \%def);

print "Generated " . scalar(keys(%def)) . " symbols\n";