summaryrefslogtreecommitdiff
path: root/src/bin/pg_dump/compress_io.c
blob: db19058354df647148351b6cde2b7d58f43821c8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
/*-------------------------------------------------------------------------
 *
 * compress_io.c
 *	 Routines for archivers to write an uncompressed or compressed data
 *	 stream.
 *
 * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * This file includes two APIs for dealing with compressed data. The first
 * provides more flexibility, using callbacks to read/write data from the
 * underlying stream. The second API is a wrapper around fopen and
 * friends, providing an interface similar to those, but abstracts away
 * the possible compression. The second API is aimed for the resulting
 * files to be easily manipulated with an external compression utility
 * program.
 *
 * Compressor API
 * --------------
 *
 *	The interface for writing to an archive consists of three functions:
 *	AllocateCompressor, writeData, and EndCompressor. First you call
 *	AllocateCompressor, then write all the data by calling writeData as many
 *	times as needed, and finally EndCompressor. writeData will call the
 *	WriteFunc that was provided to AllocateCompressor for each chunk of
 *	compressed data.
 *
 *	The interface for reading an archive consists of the same three functions:
 *	AllocateCompressor, readData, and EndCompressor. First you call
 *	AllocateCompressor, then read all the data by calling readData to read the
 *	whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc
 *	returns the compressed data one chunk at a time. Then readData decompresses
 *	it and passes the decompressed data to ahwrite(), until ReadFunc returns 0
 *	to signal EOF. The interface is the same for compressed and uncompressed
 *	streams.
 *
 * Compressed stream API
 * ----------------------
 *
 *	The compressed stream API is providing a set of function pointers for
 *	opening, reading, writing, and finally closing files. The implemented
 *	function pointers are documented in the corresponding header file and are
 *	common for all streams. It allows the caller to use the same functions for
 *	both compressed and uncompressed streams.
 *
 *	The interface consists of three functions, InitCompressFileHandle,
 *	InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the
 *	compression is known, then start by calling InitCompressFileHandle,
 *	otherwise discover it by using InitDiscoverCompressFileHandle. Then call
 *	the function pointers as required for the read/write operations. Finally
 *	call EndCompressFileHandle to end the stream.
 *
 *	InitDiscoverCompressFileHandle tries to infer the compression by the
 *	filename suffix. If the suffix is not yet known then it tries to simply
 *	open the file and if it fails, it tries to open the same file with
 *	compressed suffixes (.gz, .lz4 and .zst, in this order).
 *
 * IDENTIFICATION
 *	   src/bin/pg_dump/compress_io.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres_fe.h"

#include <sys/stat.h>
#include <unistd.h>

#include "compress_gzip.h"
#include "compress_io.h"
#include "compress_lz4.h"
#include "compress_none.h"
#include "compress_zstd.h"
#include "pg_backup_utils.h"

/*----------------------
 * Generic functions
 *----------------------
 */

/*
 * Checks whether support for a compression algorithm is implemented in
 * pg_dump/restore.
 *
 * On success returns NULL, otherwise returns a malloc'ed string which can be
 * used by the caller in an error message.
 */
char *
supports_compression(const pg_compress_specification compression_spec)
{
	const pg_compress_algorithm	algorithm = compression_spec.algorithm;
	bool						supported = false;

	if (algorithm == PG_COMPRESSION_NONE)
		supported = true;
#ifdef HAVE_LIBZ
	if (algorithm == PG_COMPRESSION_GZIP)
		supported = true;
#endif
#ifdef USE_LZ4
	if (algorithm == PG_COMPRESSION_LZ4)
		supported = true;
#endif
#ifdef USE_ZSTD
	if (algorithm == PG_COMPRESSION_ZSTD)
		supported = true;
#endif

	if (!supported)
		return psprintf("this build does not support compression with %s",
						get_compress_algorithm_name(algorithm));

	return NULL;
}

/*----------------------
 * Compressor API
 *----------------------
 */

/*
 * Allocate a new compressor.
 */
CompressorState *
AllocateCompressor(const pg_compress_specification compression_spec,
				   ReadFunc readF, WriteFunc writeF)
{
	CompressorState *cs;

	cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
	cs->readF = readF;
	cs->writeF = writeF;

	if (compression_spec.algorithm == PG_COMPRESSION_NONE)
		InitCompressorNone(cs, compression_spec);
	else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
		InitCompressorGzip(cs, compression_spec);
	else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
		InitCompressorLZ4(cs, compression_spec);
	else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
		InitCompressorZstd(cs, compression_spec);

	return cs;
}

/*
 * Terminate compression library context and flush its buffers.
 */
void
EndCompressor(ArchiveHandle *AH, CompressorState *cs)
{
	cs->end(AH, cs);
	pg_free(cs);
}

/*----------------------
 * Compressed stream API
 *----------------------
 */

/*
 * Private routines
 */
static int
hasSuffix(const char *filename, const char *suffix)
{
	int			filenamelen = strlen(filename);
	int			suffixlen = strlen(suffix);

	if (filenamelen < suffixlen)
		return 0;

	return memcmp(&filename[filenamelen - suffixlen],
				  suffix,
				  suffixlen) == 0;
}

/* free() without changing errno; useful in several places below */
static void
free_keep_errno(void *p)
{
	int			save_errno = errno;

	free(p);
	errno = save_errno;
}

/*
 * Public interface
 */

/*
 * Initialize a compress file handle for the specified compression algorithm.
 */
CompressFileHandle *
InitCompressFileHandle(const pg_compress_specification compression_spec)
{
	CompressFileHandle *CFH;

	CFH = pg_malloc0(sizeof(CompressFileHandle));

	if (compression_spec.algorithm == PG_COMPRESSION_NONE)
		InitCompressFileHandleNone(CFH, compression_spec);
	else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
		InitCompressFileHandleGzip(CFH, compression_spec);
	else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
		InitCompressFileHandleLZ4(CFH, compression_spec);
	else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
		InitCompressFileHandleZstd(CFH, compression_spec);

	return CFH;
}

/*
 * Checks if a compressed file (with the specified extension) exists.
 *
 * The filename of the tested file is stored to fname buffer (the existing
 * buffer is freed, new buffer is allocated and returned through the pointer).
 */
static bool
check_compressed_file(const char *path, char **fname, char *ext)
{
	free_keep_errno(*fname);
	*fname = psprintf("%s.%s", path, ext);
	return (access(*fname, F_OK) == 0);
}

/*
 * Open a file for reading. 'path' is the file to open, and 'mode' should
 * be either "r" or "rb".
 *
 * If the file at 'path' contains the suffix of a supported compression method,
 * currently this includes ".gz", ".lz4" and ".zst", then this compression will be used
 * throughout. Otherwise the compression will be inferred by iteratively trying
 * to open the file at 'path', first as is, then by appending known compression
 * suffixes. So if you pass "foo" as 'path', this will open either "foo" or
 * "foo.{gz,lz4,zst}", trying in that order.
 *
 * On failure, return NULL with an error code in errno.
 */
CompressFileHandle *
InitDiscoverCompressFileHandle(const char *path, const char *mode)
{
	CompressFileHandle *CFH = NULL;
	struct stat st;
	char	   *fname;
	pg_compress_specification compression_spec = {0};

	compression_spec.algorithm = PG_COMPRESSION_NONE;

	Assert(strcmp(mode, PG_BINARY_R) == 0);

	fname = strdup(path);

	if (hasSuffix(fname, ".gz"))
		compression_spec.algorithm = PG_COMPRESSION_GZIP;
	else if (hasSuffix(fname, ".lz4"))
		compression_spec.algorithm = PG_COMPRESSION_LZ4;
	else if (hasSuffix(fname, ".zst"))
		compression_spec.algorithm = PG_COMPRESSION_ZSTD;
	else
	{
		if (stat(path, &st) == 0)
			compression_spec.algorithm = PG_COMPRESSION_NONE;
		else if (check_compressed_file(path, &fname, "gz"))
			compression_spec.algorithm = PG_COMPRESSION_GZIP;
		else if (check_compressed_file(path, &fname, "lz4"))
			compression_spec.algorithm = PG_COMPRESSION_LZ4;
		else if (check_compressed_file(path, &fname, "zst"))
			compression_spec.algorithm = PG_COMPRESSION_ZSTD;
	}

	CFH = InitCompressFileHandle(compression_spec);
	if (!CFH->open_func(fname, -1, mode, CFH))
	{
		free_keep_errno(CFH);
		CFH = NULL;
	}
	free_keep_errno(fname);

	return CFH;
}

/*
 * Close an open file handle and release its memory.
 *
 * On failure, returns false and sets errno appropriately.
 */
bool
EndCompressFileHandle(CompressFileHandle *CFH)
{
	bool		ret = false;

	if (CFH->private_data)
		ret = CFH->close_func(CFH);

	free_keep_errno(CFH);

	return ret;
}