diff options
author | Egmont Koblinger <egmont@gmail.com> | 2014-12-14 22:59:16 +0100 |
---|---|---|
committer | Egmont Koblinger <egmont@gmail.com> | 2014-12-14 23:13:51 +0100 |
commit | 7e63a5068f50567b102d0ca18bc5394389687286 (patch) | |
tree | 4bf367d7d7de93ff5a125aa2828e1c048f4f4bfa | |
parent | 4ab677d4d8d273a969093b29b375c8054416df2e (diff) | |
download | vte-7e63a5068f50567b102d0ca18bc5394389687286.tar.gz |
stream: Implement with one file descriptor
Instead of paging between two file descriptors, store the stream in one;
with a special "snake" mapping between logical and physical offsets.
Replace stdio caching with pwrite/pread of 64kB units, and manual buffering
on top of the snake layer.
https://bugzilla.gnome.org/show_bug.cgi?id=738601
-rw-r--r-- | src/Makefile.am | 21 | ||||
-rw-r--r-- | src/vtestream-base.h | 21 | ||||
-rw-r--r-- | src/vtestream-file.h | 1040 | ||||
-rw-r--r-- | src/vtestream.h | 3 |
4 files changed, 831 insertions, 254 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 1c533da5..28e0f56c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -320,8 +320,8 @@ TEST_SH = \ $(NULL) EXTRA_DIST += $(TEST_SH) -check_PROGRAMS = dumpkeys reflect-text-view reflect-vte mev table xticker vteconv -TESTS = table vteconv $(TEST_SH) +check_PROGRAMS = dumpkeys reflect-text-view reflect-vte mev table xticker vteconv vtestream-file +TESTS = table vteconv vtestream-file $(TEST_SH) reflect_text_view_CPPFLAGS = -DUSE_TEXT_VIEW $(AM_CPPFLAGS) reflect_text_view_CFLAGS = $(VTE_CFLAGS) $(AM_CFLAGS) @@ -392,6 +392,23 @@ table_LDADD = \ $(GLIB_LIBS) \ $(GOBJECT_LIBS) +vtestream_file_SOURCES = \ + vtestream-base.h \ + vtestream-file.h \ + vtestream.c \ + vtestream.h \ + vteutils.c \ + vteutils.h \ + $(NULL) +vtestream_file_CPPFLAGS = \ + -DVTESTREAM_MAIN \ + $(AM_CPPFLAGS) +vtestream_file_CFLAGS = \ + $(VTE_CFLAGS) \ + $(AM_CFLAGS) +vtestream_file_LDADD = \ + $(VTE_LIBS) + vteconv_SOURCES = buffer.h debug.c debug.h vteconv.c vteconv.h vteconv_CPPFLAGS = -DVTECONV_MAIN $(AM_CPPFLAGS) vteconv_CFLAGS = $(VTE_CFLAGS) $(AM_CFLAGS) diff --git a/src/vtestream-base.h b/src/vtestream-base.h index ebe3eec5..2d8d9060 100644 --- a/src/vtestream-base.h +++ b/src/vtestream-base.h @@ -33,10 +33,11 @@ typedef struct _VteStreamClass { GObjectClass parent_class; void (*reset) (VteStream *stream, gsize offset); - void (*append) (VteStream *stream, const char *data, gsize len); gboolean (*read) (VteStream *stream, gsize offset, char *data, gsize len); + void (*append) (VteStream *stream, const char *data, gsize len); void (*truncate) (VteStream *stream, gsize offset); void (*advance_tail) (VteStream *stream, gsize offset); + gsize (*tail) (VteStream *stream); gsize (*head) (VteStream *stream); } VteStreamClass; @@ -62,12 +63,6 @@ _vte_stream_reset (VteStream *stream, gsize offset) VTE_STREAM_GET_CLASS (stream)->reset (stream, offset); } -void -_vte_stream_append (VteStream *stream, const char *data, gsize len) -{ - VTE_STREAM_GET_CLASS (stream)->append (stream, data, len); -} - gboolean _vte_stream_read (VteStream *stream, gsize offset, char *data, gsize len) { @@ -75,6 +70,12 @@ _vte_stream_read (VteStream *stream, gsize offset, char *data, gsize len) } void +_vte_stream_append (VteStream *stream, const char *data, gsize len) +{ + VTE_STREAM_GET_CLASS (stream)->append (stream, data, len); +} + +void _vte_stream_truncate (VteStream *stream, gsize offset) { VTE_STREAM_GET_CLASS (stream)->truncate (stream, offset); @@ -87,6 +88,12 @@ _vte_stream_advance_tail (VteStream *stream, gsize offset) } gsize +_vte_stream_tail (VteStream *stream) +{ + return VTE_STREAM_GET_CLASS (stream)->tail (stream); +} + +gsize _vte_stream_head (VteStream *stream) { return VTE_STREAM_GET_CLASS (stream)->head (stream); diff --git a/src/vtestream-file.h b/src/vtestream-file.h index 5c709b41..f56cffb9 100644 --- a/src/vtestream-file.h +++ b/src/vtestream-file.h @@ -18,33 +18,66 @@ * * Red Hat Author(s): Behdad Esfahbod * Google Author(s): Behdad Esfahbod + * Independent Author(s): Egmont Koblinger */ -#include <errno.h> - -#include "vteutils.h" - - /* - * We provide two file implementations: one based on raw - * syscalls, and one based on stdio. The syscall one has - * got more than enough testing in the past. The stdio - * is newer, but has a default cache, which greatly reduces - * number of syscalls. + * VteFileStream is implemented as two layers above each other. + * + * o The bottom layer is VteSnake. It provides a mapping from logical offsets + * to physical file offsets, storing the stream in at most 3 continuous + * regions of the file. See below for details how this mapping is done. * - * Change the following conditional to 0 for syscalls, and - * to 1 for stdio. Please test both branches before - * making changes to _file_t semantics or implementation. + * It operates with a fixed block size (64kB at the moment), allows + * random-access-read of a single block, random-access-overwrite of a single + * block within the stream, write (append) a single block right after the + * current head, advancing the tail by arbitrary number of blocks, and + * resetting. + * + * (Random-access-overwrite within the existing area is a rare event, occurs + * only when the terminal window size changes. We use it to redo differently + * the latest appends. In the topmost layer it's achieved by truncating at + * the head of the stream, and then appending again. In this layer and also + * in the next one, offering random-access-overwrite instead of truncation + * makes the implementation a lot easier. It's also essential to maintain a + * unique encryption IV in a forthcoming version.) + * + * The name was chosen because VteFileStream's way of advancing the head and + * the tail is kinda like a snake, and the mapping to file offsets reminds + * me of the well-known game on old mobile phones. + * + * o The top layer is VteFileStream. It does buffering and caching. As opposed + * to the previous layers, this one provides methods on arbitrary amount of + * data. It doesn't offer random-access-writes, instead, it offers appending + * data, and truncating the head (undoing the latest appends). Write + * requests are batched up until there's a complete block to be written to + * disk. Read requests are answered by reading possibly more underlying + * blocks, and sped up by caching the result. + * + * Design discussions: https://bugzilla.gnome.org/show_bug.cgi?id=738601 */ -#if 0 +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include "vteutils.h" +#ifndef VTESTREAM_MAIN +# define VTE_SNAKE_BLOCKSIZE 65536 +typedef guint32 _vte_block_datalength_t; +#else +/* Smaller sizes for unit testing */ +# define VTE_SNAKE_BLOCKSIZE 7 +typedef guint8 _vte_block_datalength_t; +#endif -/* - * File implementation using Unix syscalls. - */ +#define ALIGN_SNAKE(x) ((x) / VTE_SNAKE_BLOCKSIZE * VTE_SNAKE_BLOCKSIZE) +#define MOD_SNAKE(x) ((x) % VTE_SNAKE_BLOCKSIZE) -#include <unistd.h> +/******************************************************************************************/ #ifndef HAVE_PREAD #define pread _pread @@ -69,70 +102,74 @@ pwrite (int fd, char *data, gsize len, gsize offset) #endif -typedef struct -{ - int fd; -} _file_t; - static inline void -_file_init (_file_t *f) +_file_close (int fd) { - f->fd = -1; -} - -static inline void -_file_open (_file_t *f, int fd) -{ - f->fd = fd; -} + if (G_UNLIKELY (fd == -1)) + return; -static inline gboolean -_file_isopen (_file_t *f) -{ - return f->fd != -1; + close (fd); } -static inline void -_file_close (_file_t *f) -{ - if (G_UNLIKELY (!_file_isopen (f))) - return; - - close (f->fd); -} - - static gboolean -_file_try_truncate (_file_t *f, gsize offset) +_file_try_truncate (int fd, gsize offset) { int ret; - if (G_UNLIKELY (!_file_isopen (f))) - return TRUE; + if (G_UNLIKELY (fd == -1)) + return FALSE; do { - ret = ftruncate (f->fd, offset); + ret = ftruncate (fd, offset); } while (ret == -1 && errno == EINTR); return !ret; } static void -_file_reset (_file_t *f) +_file_reset (int fd) +{ + _file_try_truncate (fd, 0); +} + +static gboolean +_file_try_punch_hole (int fd, gsize offset, gsize len) { - _file_try_truncate (f, 0); +#ifndef VTESTREAM_MAIN +# ifdef FALLOC_FL_PUNCH_HOLE + if (G_UNLIKELY (fd == -1)) + return FALSE; + + /* Punching hole is slow for me (Linux 3.16, ext4), + * causing a ~10% overall performance regression. + * Disable it for now. */ + /* fallocate (fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, len); */ + + return TRUE; +# else + return FALSE; +# endif +#else /* VTESTREAM_MAIN */ + /* For unittesting, overwrite the part with dots. */ + char c = '.'; + while (len--) { + if (pwrite(fd, &c, 1, offset++) != 1) + return FALSE; + } + return TRUE; +#endif } static gsize -_file_read (_file_t *f, char *data, gsize len, gsize offset) +_file_read (int fd, char *data, gsize len, gsize offset) { gsize ret, total = 0; - if (G_UNLIKELY (!_file_isopen (f))) + if (G_UNLIKELY (fd == -1)) return 0; while (len) { - ret = pread (f->fd, data, len, offset); + ret = pread (fd, data, len, offset); if (G_UNLIKELY (ret == (gsize) -1)) { if (errno == EINTR) continue; @@ -150,29 +187,18 @@ _file_read (_file_t *f, char *data, gsize len, gsize offset) } static void -_file_write (_file_t *f, const char *data, gsize len, gsize offset) +_file_write (int fd, const char *data, gsize len, gsize offset) { gsize ret; - gboolean truncated = FALSE; - g_assert (_file_isopen (f) || !len); + if (G_UNLIKELY (fd == -1)) + return; while (len) { - ret = pwrite (f->fd, data, len, offset); + ret = pwrite (fd, data, len, offset); if (G_UNLIKELY (ret == (gsize) -1)) { if (errno == EINTR) continue; - else if (errno == EINVAL && !truncated) - { - /* Perhaps previous writes failed and now we are - * seeking past end of file. Try extending it - * and retry. This allows recovering from a - * "/tmp is full" error. - */ - _file_try_truncate (f, offset); - truncated = TRUE; - continue; - } else break; } @@ -184,162 +210,332 @@ _file_write (_file_t *f, const char *data, gsize len, gsize offset) } } - -#else - +/******************************************************************************************/ /* - * File implementation using stdio. + * VteSnake: + * + * The data structure implemented here remembers the last certain amount of + * data written, with the size dynamically changing. Basic operations include + * appending data (advancing the head), forgetting old data (advancing the + * tail), and random access read. In these cases the logical head and tail + * offsets can only increase. Rare special operations that don't influence the + * overall design are overwriting existing data, and resetting the stream. + * + * The mapping from logical to physical offsets can be in one of 4 states: + * 1. One continuous region; + * 2. Two continuous regions, the second one preceding the first; + * 3. Three continuous regions, the second one preceding the first, and the + * third being at the end; + * 4. Two continuous regions, the first one preceding the second. + * + * In the example below, each lowercase letter represents a 64kB block, and + * dots denote blocks whose contents are no longer important. + * + * Initially, data is simply written to the file, we're in state 1: + * (A) abcd + * + * Later the tail starts to advance too, but we're still in state 1: + * (B) ...de + * + * At some point, based on heuristics, we wrap to the beginning, into state 2: + * (C) f..de + * + * The trivial case: If tail reaches EOF before head would bite tail, we're + * back at state 1 and we can even truncate the file: + * (D) fg... + * (E) fg + * + * Let's write some more data and advance the tail to get back to a state + * equivalent to (C). + * (F) k..ij + * + * If head would bite tail, we continue appending at EOF, entering state 3: + * (G) klmijno + * + * As tail keeps advancing, we're still in state 3: + * (H) klm.jnop + * + * When tail finishes with the middle segment, we enter state 4: + * (I) klm..nop + * + * Further advancing tail keeps us in state 4: + * (J) ..m..nopqr + * + * When tail finishes with the first segment, we return to state 1: + * (K) .....nopqr + * + * Depending on the aforementioned heuristics, we might stay in state 1: + * (L) .....nopqrs + * + * But probably we soon return to state 2: + * (M) ......opqrs + * (N) tu....opqrs + * + * and so on... */ -#include <stdio.h> +typedef struct _VteSnake { + GObject parent; + int fd; + int state; + struct { + gsize st_tail; /* Stream's logical tail offset. */ + gsize st_head; /* Stream's logical head offset. */ + gsize fd_tail; /* FD's physical tail offset. */ + gsize fd_head; /* FD's physical head offset. One of these four is redundant, nevermind. */ + } segment[3]; /* At most 3 segments, [0] at the tail. */ + gsize tail, head; /* These are redundant too, for convenience. */ +} VteSnake; +#define VTE_SNAKE_SEGMENTS(s) ((s)->state == 4 ? 2 : (s)->state) + +typedef struct _VteSnakeClass { + GObjectClass parent_class; + + void (*reset) (VteSnake *snake, gsize offset); + void (*write) (VteSnake *snake, gsize offset, const char *data); + gboolean (*read) (VteSnake *snake, gsize offset, char *data); + void (*advance_tail) (VteSnake *snake, gsize offset); + gsize (*tail) (VteSnake *snake); + gsize (*head) (VteSnake *snake); +} VteSnakeClass; + +static GType _vte_snake_get_type (void); +#define VTE_TYPE_SNAKE _vte_snake_get_type () +#define VTE_SNAKE_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), VTE_TYPE_SNAKE, VteSnakeClass)) + +G_DEFINE_TYPE (VteSnake, _vte_snake, G_TYPE_OBJECT) -/* Use unlocked versions if available. */ -#ifdef HAVE_FWRITE_UNLOCKED -# define fwrite fwrite_unlocked -# define fread fread_unlocked -# define fflush fflush_unlocked -#endif - -typedef struct +static void +_vte_snake_init (VteSnake *snake) { - FILE *fp; - gsize pos; -} _file_t; + snake->fd = -1; + snake->state = 1; +} -static inline void -_file_init (_file_t *f) +static void +_vte_snake_finalize (GObject *object) { - f->fp = NULL; - f->pos = -1; + VteSnake *snake = (VteSnake *) object; + + _file_close (snake->fd); + + G_OBJECT_CLASS (_vte_snake_parent_class)->finalize(object); } static inline void -_file_open (_file_t *f, int fd) +_vte_snake_ensure_file (VteSnake *snake) { - f->fp = fdopen (fd, "w+"); - f->pos = 0; + if (G_LIKELY (snake->fd != -1)) + return; + + snake->fd = _vte_mkstemp (); } -static inline gboolean -_file_isopen (_file_t *f) +static void +_vte_snake_reset (VteSnake *snake, gsize offset) { - return f->fp != NULL; + g_assert_cmpuint (offset % VTE_SNAKE_BLOCKSIZE, ==, 0); + + _file_reset (snake->fd); + + snake->segment[0].st_tail = snake->segment[0].st_head = snake->tail = snake->head = offset; + snake->segment[0].fd_tail = snake->segment[0].fd_head = 0; + snake->state = 1; } -static inline void -_file_close (_file_t *f) +/* + * Turn a logical offset into a physical one; only for offsets that are + * already within the streams, not for appending a new block. + */ +static gsize +_vte_snake_offset_map (VteSnake *snake, gsize offset) { - if (G_UNLIKELY (!_file_isopen (f))) - return; + int i; + int segments = VTE_SNAKE_SEGMENTS(snake); - fclose (f->fp); -} + g_assert_cmpuint (offset % VTE_SNAKE_BLOCKSIZE, ==, 0); + for (i = 0; i < segments; i++) { + if (offset >= snake->segment[i].st_tail && offset < snake->segment[i].st_head) + return offset - snake->segment[i].st_tail + snake->segment[i].fd_tail; + } + g_assert_not_reached(); +} +/* Place VTE_SNAKE_BLOCKSIZE bytes at data */ static gboolean -_file_try_truncate (_file_t *f, gsize offset) +_vte_snake_read (VteSnake *snake, gsize offset, char *data) { - if (G_UNLIKELY (!_file_isopen (f))) - return TRUE; - - /* We don't have to truncate. The bytestream works - * without a working truncate (hence the "try"). - * Here's a fallback implementation. Things should - * work with or without this. - */ - if (1) - { - int ret; + gsize fd_offset; - fflush (f->fp); + g_assert_cmpuint (offset % VTE_SNAKE_BLOCKSIZE, ==, 0); - do { - ret = ftruncate (fileno (f->fp), offset); - } while (ret == -1 && errno == EINTR); + if (G_UNLIKELY (offset < snake->tail || offset >= snake->head)) + return FALSE; - return !ret; - } + fd_offset = _vte_snake_offset_map(snake, offset); - return FALSE; + return (_file_read (snake->fd, data, VTE_SNAKE_BLOCKSIZE, fd_offset) == VTE_SNAKE_BLOCKSIZE); } +/* + * offset is either within the stream (overwrite data), or at its head (append data). + * data is at most VTE_SNAKE_BLOCKSIZE bytes large; if shorter then the remaining amount is skipped. + * When reading back, that skipped area will contain garbage (e.g. when the FS doesn't support + * punching holes), the caller needs to deal with it. + * + * When appending, the following state transfers can occur: + * 1->2, 2->3. + */ static void -_file_reset (_file_t *f) +_vte_snake_write (VteSnake *snake, gsize offset, const char *data) { - int fd; + gsize fd_offset; + + g_assert_cmpuint (offset, >=, snake->tail); + g_assert_cmpuint (offset, <=, snake->head); + g_assert_cmpuint (offset % VTE_SNAKE_BLOCKSIZE, ==, 0); + + if (G_LIKELY (offset == snake->head)) { + /* Appending a new block to the head. */ + _vte_snake_ensure_file (snake); + if (G_UNLIKELY (snake->state == 1 && 2 * snake->segment[0].fd_tail > snake->segment[0].fd_head)) { + /* State 1 -> 2 based on heuristics. The only crucial thing is that fd_tail needs to be greater than 0. + * Note: changing the heuristics might break the unit tests! */ + snake->segment[1].st_tail = snake->segment[0].st_head; + snake->segment[1].st_head = snake->segment[0].st_head + VTE_SNAKE_BLOCKSIZE; + snake->segment[1].fd_tail = fd_offset = 0; + snake->segment[1].fd_head = VTE_SNAKE_BLOCKSIZE; + snake->state = 2; + } else if (G_UNLIKELY (snake->state == 2 && snake->segment[1].fd_head == snake->segment[0].fd_tail)) { + /* State 2 -> 3 when head would bite the tail. */ + snake->segment[2].st_tail = snake->segment[1].st_head; + snake->segment[2].st_head = snake->segment[1].st_head + VTE_SNAKE_BLOCKSIZE; + snake->segment[2].fd_tail = fd_offset = snake->segment[0].fd_head; + snake->segment[2].fd_head = snake->segment[0].fd_head + VTE_SNAKE_BLOCKSIZE; + snake->state = 3; + } else { + /* No state change. */ + int last_segment = VTE_SNAKE_SEGMENTS(snake) - 1; + fd_offset = snake->segment[last_segment].fd_head; + snake->segment[last_segment].st_head += VTE_SNAKE_BLOCKSIZE; + snake->segment[last_segment].fd_head += VTE_SNAKE_BLOCKSIZE; + } + snake->head = offset + VTE_SNAKE_BLOCKSIZE; + } else { + /* Overwriting an existing block. The new block might be shorter than the old one, + * punch a hole to potentially free up disk space (and for easier unit testing). */ + fd_offset = _vte_snake_offset_map(snake, offset); + _file_try_punch_hole (snake->fd, fd_offset, VTE_SNAKE_BLOCKSIZE); + } + _file_write (snake->fd, data, VTE_SNAKE_BLOCKSIZE, fd_offset); +} - if (_file_try_truncate (f, 0)) - return; +/* + * When advancing the tail, the following state transfers can occur (even more + * of them if the amount discarded is large enough): + * 2->1, 3->4, 4->1. + */ +static void +_vte_snake_advance_tail (VteSnake *snake, gsize offset) +{ + g_assert_cmpuint (offset, >=, snake->tail); + g_assert_cmpuint (offset, <=, snake->head); + g_assert_cmpuint (offset % VTE_SNAKE_BLOCKSIZE, ==, 0); - if (G_UNLIKELY (!_file_isopen (f))) + if (G_UNLIKELY (offset == snake->head)) { + _vte_snake_reset (snake, offset); return; - - /* Reopen file to truncate it. */ - - fd = dup (fileno (f->fp)); - - _file_close (f); - _file_open (f, fd); + } + + while (offset > snake->segment[0].st_tail) { + if (offset < snake->segment[0].st_head) { + /* Drop some (but not all) bytes from the first segment. */ + _file_try_punch_hole (snake->fd, snake->segment[0].fd_tail, offset - snake->tail); + snake->segment[0].fd_tail += offset - snake->tail; + snake->segment[0].st_tail = snake->tail = offset; + return; + } else { + /* Drop the entire first segment. */ + switch (snake->state) { + case 1: + g_assert_not_reached(); + break; + case 2: + snake->segment[0] = snake->segment[1]; + _file_try_truncate (snake->fd, snake->segment[0].fd_head); + snake->state = 1; + break; + case 3: + _file_try_punch_hole (snake->fd, snake->segment[0].fd_tail, snake->segment[0].fd_head - snake->segment[0].fd_tail); + snake->segment[0] = snake->segment[1]; + snake->segment[1] = snake->segment[2]; + snake->state = 4; + break; + case 4: + _file_try_punch_hole (snake->fd, snake->segment[0].fd_tail, snake->segment[0].fd_head - snake->segment[0].fd_tail); + snake->segment[0] = snake->segment[1]; + snake->state = 1; + break; + default: + g_assert_not_reached(); + break; + } + } + } + g_assert_cmpuint (snake->segment[0].st_tail, ==, offset); + snake->tail = offset; } -static gboolean -_file_seek (_file_t *f, gsize offset) +static gsize +_vte_snake_tail (VteSnake *snake) { - if (f->pos != offset) - { - if (-1 == fseek (f->fp, offset, SEEK_SET)) - return FALSE; - f->pos = offset; - } - return TRUE; + return snake->tail; } static gsize -_file_read (_file_t *f, char *data, gsize len, gsize offset) +_vte_snake_head (VteSnake *snake) { - gsize ret; - - if (G_UNLIKELY (!_file_isopen (f))) - return 0; - - if (!_file_seek (f, offset)) - return 0; - - ret = fread (data, 1, len, f->fp); - f->pos += ret; - return ret; + return snake->head; } static void -_file_write (_file_t *f, const char *data, gsize len, gsize offset) +_vte_snake_class_init (VteSnakeClass *klass) { - gsize ret; + GObjectClass *gobject_class = G_OBJECT_CLASS (klass); - g_assert (_file_isopen (f) || !len); - - if (!_file_seek (f, offset)) - return; + gobject_class->finalize = _vte_snake_finalize; - ret = fwrite (data, 1, len, f->fp); - f->pos += ret; + klass->reset = _vte_snake_reset; + klass->read = _vte_snake_read; + klass->write = _vte_snake_write; + klass->advance_tail = _vte_snake_advance_tail; + klass->tail = _vte_snake_tail; + klass->head = _vte_snake_head; } - -#endif - +/******************************************************************************************/ /* - * VteFileStream: A file-based stream + * VteFileStream: Implement buffering/caching on top of VteSnake. */ typedef struct _VteFileStream { - VteStream parent; + GObject parent; + + VteSnake *snake; + + char *rbuf; + /* Offset of the cached record, always a multiple of block size. + * Use a value of 1 (or anything that's not a multiple of block size) + * to denote if no record is cached. */ + gsize rbuf_offset; + + char *wbuf; + gsize wbuf_len; - /* The first file/offset is for the write head, second is for last page */ - _file_t file[2]; - gsize offset[2]; - gsize head; + gsize head, tail; } VteFileStream; typedef VteStreamClass VteFileStreamClass; @@ -349,13 +545,6 @@ static GType _vte_file_stream_get_type (void); G_DEFINE_TYPE (VteFileStream, _vte_file_stream, VTE_TYPE_STREAM) -static void -_vte_file_stream_init (VteFileStream *stream) -{ - _file_init (&stream->file[0]); - _file_init (&stream->file[1]); -} - VteStream * _vte_file_stream_new (void) { @@ -363,79 +552,104 @@ _vte_file_stream_new (void) } static void -_vte_file_stream_finalize (GObject *object) +_vte_file_stream_init (VteFileStream *stream) { - VteFileStream *stream = (VteFileStream *) object; - - _file_close (&stream->file[0]); - _file_close (&stream->file[1]); + stream->snake = g_object_new (VTE_TYPE_SNAKE, NULL); + _vte_snake_init (stream->snake); - G_OBJECT_CLASS (_vte_file_stream_parent_class)->finalize(object); + stream->rbuf = g_malloc(VTE_SNAKE_BLOCKSIZE); + stream->wbuf = g_malloc(VTE_SNAKE_BLOCKSIZE); + stream->rbuf_offset = 1; /* Invalidate */ } -static inline void -_vte_file_stream_ensure_file0 (VteFileStream *stream) +static void +_vte_file_stream_finalize (GObject *object) { - int fd; + VteFileStream *stream = (VteFileStream *) object; - if (G_LIKELY (_file_isopen (&stream->file[0]))) - return; + g_free(stream->rbuf); + g_free(stream->wbuf); + g_object_unref (stream->snake); - fd = _vte_mkstemp (); - if (fd == -1) - return; - - _file_open (&stream->file[0], fd); + G_OBJECT_CLASS (_vte_file_stream_parent_class)->finalize(object); } static void _vte_file_stream_reset (VteStream *astream, gsize offset) { VteFileStream *stream = (VteFileStream *) astream; + gsize offset_aligned = ALIGN_SNAKE(offset); - _file_reset (&stream->file[0]); - _file_reset (&stream->file[1]); - - stream->head = stream->offset[0] = stream->offset[1] = offset; -} - -static void -_vte_file_stream_append (VteStream *astream, const char *data, gsize len) -{ - VteFileStream *stream = (VteFileStream *) astream; + _vte_snake_reset (stream->snake, offset_aligned); + stream->tail = stream->head = offset; - _vte_file_stream_ensure_file0 (stream); + /* When resetting at a non-aligned offset, initial bytes of the write buffer + * will eventually be written to disk, although doesn't contain useful information. + * Rather than leaving garbage there, fill it with zeros. + * For unit testing, fill it with dashes for convenience. */ +#ifndef VTESTREAM_MAIN + memset(stream->wbuf, 0, MOD_SNAKE(offset)); +#else + memset(stream->wbuf, '-', MOD_SNAKE(offset)); +#endif - _file_write (&stream->file[0], data, len, stream->head - stream->offset[0]); - stream->head += len; + stream->wbuf_len = MOD_SNAKE(offset); + stream->rbuf_offset = 1; /* Invalidate */ } static gboolean _vte_file_stream_read (VteStream *astream, gsize offset, char *data, gsize len) { VteFileStream *stream = (VteFileStream *) astream; - gsize l; - - if (G_UNLIKELY (offset < stream->offset[1])) - return FALSE; - - if (offset < stream->offset[0]) { - l = _file_read (&stream->file[1], data, len, offset - stream->offset[1]); - offset += l; data += l; len -= l; if (!len) return TRUE; - } - l = _file_read (&stream->file[0], data, len, offset - stream->offset[0]); - offset += l; data += l; len -= l; if (!len) return TRUE; - - return FALSE; + /* Out of bounds request. + * Note: It needs to detect when offset is extremely large + * (actually a negative value stored in unsigned gsize), + * and the read attempt wraps around to a sane offset: + * https://bugzilla.gnome.org/show_bug.cgi?id=740347#c3 + * FIXME this is ugly and shouldn't be necessary, should fix our callers. + */ + if (G_UNLIKELY (offset < stream->tail || offset + len > stream->head || offset + len < offset)) { + /* If completely out of bounds, the caller expects a FALSE. */ + if (G_LIKELY (offset + len <= stream->tail || offset >= stream->head)) + return FALSE; + /* Partial out of bounds requests never happen. */ + g_assert_not_reached(); + } + + while (len && offset < ALIGN_SNAKE(stream->head)) { + gsize l = MIN(VTE_SNAKE_BLOCKSIZE - MOD_SNAKE(offset), len); + gsize offset_aligned = ALIGN_SNAKE(offset); + if (offset_aligned != stream->rbuf_offset) { + if (G_UNLIKELY (!_vte_snake_read (stream->snake, offset_aligned, stream->rbuf))) + return FALSE; + stream->rbuf_offset = offset_aligned; + } + memcpy(data, stream->rbuf + MOD_SNAKE(offset), l); + offset += l; data += l; len -= l; + } + if (len) { + g_assert_cmpuint (MOD_SNAKE(offset) + len, <=, stream->wbuf_len); + memcpy(data, stream->wbuf + MOD_SNAKE(offset), len); + } + return TRUE; } static void -_vte_file_stream_swap_fds (VteFileStream *stream) +_vte_file_stream_append (VteStream *astream, const char *data, gsize len) { - _file_t f; + VteFileStream *stream = (VteFileStream *) astream; - f = stream->file[0]; stream->file[0] = stream->file[1]; stream->file[1] = f; + while (len) { + gsize l = MIN(VTE_SNAKE_BLOCKSIZE - stream->wbuf_len, len); + memcpy(stream->wbuf + stream->wbuf_len, data, l); + stream->wbuf_len += l; data += l; len -= l; + if (stream->wbuf_len == VTE_SNAKE_BLOCKSIZE) { + _vte_snake_write (stream->snake, ALIGN_SNAKE(stream->head), stream->wbuf); + stream->wbuf_len = 0; + } + stream->head += l; + } } static void @@ -443,19 +657,27 @@ _vte_file_stream_truncate (VteStream *astream, gsize offset) { VteFileStream *stream = (VteFileStream *) astream; - if (G_UNLIKELY (offset < stream->offset[1])) { - _file_reset (&stream->file[1]); - stream->offset[1] = offset; - } - - if (G_UNLIKELY (offset < stream->offset[0])) { - _file_reset (&stream->file[0]); - stream->offset[0] = stream->offset[1]; - _vte_file_stream_swap_fds (stream); - } else { - _file_try_truncate (&stream->file[0], offset - stream->offset[0]); - } - + g_assert_cmpuint (offset, >=, stream->tail); + g_assert_cmpuint (offset, <=, stream->head); + + if (offset < ALIGN_SNAKE(stream->head)) { + /* Truncating goes back to the part that we've written to the + * file. For simplicity (since this is a rare event, only + * happens when the window size changes) go for the simplest + * local hack here that allows to leave the rest of the code + * intact, that is, read back the new partial last block to + * the write cache. */ + gsize offset_aligned = ALIGN_SNAKE(offset); + if (G_UNLIKELY (!_vte_snake_read (stream->snake, offset_aligned, stream->wbuf))) { + /* what now? */ + memset(stream->wbuf, 0, VTE_SNAKE_BLOCKSIZE); + } + + if (stream->rbuf_offset >= offset_aligned) { + stream->rbuf_offset = 1; /* Invalidate */ + } + } + stream->wbuf_len = MOD_SNAKE(offset); stream->head = offset; } @@ -464,14 +686,21 @@ _vte_file_stream_advance_tail (VteStream *astream, gsize offset) { VteFileStream *stream = (VteFileStream *) astream; - g_assert(offset <= stream->head); + g_assert_cmpuint (offset, >=, stream->tail); + g_assert_cmpuint (offset, <=, stream->head); - if (offset >= stream->offset[0]) { - stream->offset[1] = stream->offset[0]; - stream->offset[0] = stream->head; - _vte_file_stream_swap_fds (stream); - _file_reset (&stream->file[0]); - } + if (ALIGN_SNAKE(offset) > ALIGN_SNAKE(stream->tail)) + _vte_snake_advance_tail (stream->snake, ALIGN_SNAKE(offset)); + + stream->tail = offset; +} + +static gsize +_vte_file_stream_tail (VteStream *astream) +{ + VteFileStream *stream = (VteFileStream *) astream; + + return stream->tail; } static gsize @@ -490,9 +719,332 @@ _vte_file_stream_class_init (VteFileStreamClass *klass) gobject_class->finalize = _vte_file_stream_finalize; klass->reset = _vte_file_stream_reset; - klass->append = _vte_file_stream_append; klass->read = _vte_file_stream_read; + klass->append = _vte_file_stream_append; klass->truncate = _vte_file_stream_truncate; klass->advance_tail = _vte_file_stream_advance_tail; + klass->tail = _vte_file_stream_tail; klass->head = _vte_file_stream_head; } + +/******************************************************************************************/ + +#ifdef VTESTREAM_MAIN + +/* Some helpers. Macros rather than functions to report useful line numbers on failure. */ + +/* Check for the file's exact contents */ +#define assert_file(__fd, __contents) do { \ + char __buf[100]; \ + ssize_t __filesize = pread(__fd, __buf, 100, 0); \ + g_assert_cmpuint (__filesize, ==, strlen(__contents)); \ + g_assert (memcmp(__buf, __contents, strlen(__contents)) == 0); \ +} while (0) + +/* Check for the snake's state, tail, head and contents */ +#define assert_snake(__snake, __state, __tail, __head, __contents) do { \ + char __buf[VTE_SNAKE_BLOCKSIZE]; \ + int __i; \ + g_assert_cmpuint (__snake->state, ==, __state); \ + g_assert_cmpuint (__snake->tail, ==, __tail); \ + g_assert_cmpuint (__snake->head, ==, __head); \ + g_assert_cmpuint (strlen(__contents), ==, __head - __tail); \ + for (__i = __tail; __i < __head; __i += VTE_SNAKE_BLOCKSIZE) { \ + g_assert (_vte_snake_read (__snake, __i, __buf)); \ + g_assert (memcmp(__buf, __contents + __i - __tail, VTE_SNAKE_BLOCKSIZE) == 0); \ + } \ +} while (0) + +/* Check for the stream's tail, head and contents */ +#define assert_stream(__astream, __tail, __head, __contents) do { \ + char __buf[100]; \ + g_assert_cmpuint (_vte_stream_tail (__astream), ==, __tail); \ + g_assert_cmpuint (_vte_stream_head (__astream), ==, __head); \ + g_assert_cmpuint (strlen(__contents), ==, __head - __tail); \ + g_assert (_vte_stream_read (__astream, __tail, __buf, __head - __tail)); \ + g_assert (memcmp(__buf, __contents, __head - __tail) == 0); \ +} while (0) + +static void +test_snake (void) +{ + VteSnake *snake = g_object_new (VTE_TYPE_SNAKE, NULL); + + /* Test overwriting data */ + _vte_snake_write (snake, 0, "Alpaca."); + assert_snake (snake, 1, 0, 7, "Alpaca."); + + _vte_snake_write (snake, 7, "Bobcat."); + assert_file (snake->fd, "Alpaca.Bobcat."); + assert_snake (snake, 1, 0, 14, "Alpaca.Bobcat."); + + _vte_snake_write (snake, 7, "Camel.."); + assert_file (snake->fd, "Alpaca.Camel.."); + assert_snake (snake, 1, 0, 14, "Alpaca.Camel.."); + + _vte_snake_write (snake, 0, "Duck..."); + assert_file (snake->fd, "Duck...Camel.."); + assert_snake (snake, 1, 0, 14, "Duck...Camel.."); + + _vte_snake_write (snake, 14, "......."); + assert_file (snake->fd, "Duck...Camel........."); + assert_snake (snake, 1, 0, 21, "Duck...Camel........."); + + _vte_snake_write (snake, 21, "Ferret."); + assert_file (snake->fd, "Duck...Camel.........Ferret."); + assert_snake (snake, 1, 0, 28, "Duck...Camel.........Ferret."); + + /* Reset */ + _vte_snake_reset (snake, 0); + assert_snake (snake, 1, 0, 0, ""); + + /* State 1 */ + _vte_snake_write (snake, 0, "Alpaca."); + _vte_snake_write (snake, 7, "Bobcat."); + assert_file (snake->fd, "Alpaca.Bobcat."); + assert_snake (snake, 1, 0, 14, "Alpaca.Bobcat."); + + /* Stay in state 1 */ + _vte_snake_advance_tail (snake, 7); + _vte_snake_write (snake, 14, "Camel.."); + assert_file (snake->fd, ".......Bobcat.Camel.."); + assert_snake (snake, 1, 7, 21, "Bobcat.Camel.."); + + /* State 1 -> 2 */ + _vte_snake_advance_tail (snake, 14); + _vte_snake_write (snake, 21, "Duck..."); + assert_file (snake->fd, "Duck..........Camel.."); + assert_snake (snake, 2, 14, 28, "Camel..Duck..."); + + /* Stay in state 2 */ + _vte_snake_write (snake, 28, "Eagle.."); + assert_file (snake->fd, "Duck...Eagle..Camel.."); + assert_snake (snake, 2, 14, 35, "Camel..Duck...Eagle.."); + + /* State 2 -> 3 */ + _vte_snake_write (snake, 35, "Ferret."); + assert_file (snake->fd, "Duck...Eagle..Camel..Ferret."); + assert_snake (snake, 3, 14, 42, "Camel..Duck...Eagle..Ferret."); + + /* State 3 -> 4 */ + _vte_snake_advance_tail (snake, 21); + assert_file (snake->fd, "Duck...Eagle.........Ferret."); + assert_snake (snake, 4, 21, 42, "Duck...Eagle..Ferret."); + + /* Stay in state 4 */ + _vte_snake_advance_tail (snake, 28); + assert_file (snake->fd, ".......Eagle.........Ferret."); + assert_snake (snake, 4, 28, 42, "Eagle..Ferret."); + + /* State 4 -> 1 */ + _vte_snake_advance_tail (snake, 35); + assert_file (snake->fd, ".....................Ferret."); + assert_snake (snake, 1, 35, 42, "Ferret."); + + /* State 1 -> 2 */ + _vte_snake_write (snake, 42, "Giraffe"); + assert_file (snake->fd, "Giraffe..............Ferret."); + assert_snake (snake, 2, 35, 49, "Ferret.Giraffe"); + + /* Reset, back to state 1 */ + _vte_snake_reset (snake, 175); + assert_snake (snake, 1, 175, 175, ""); + + /* Stay in state 1 */ + _vte_snake_write (snake, 175, "Zebra.."); + assert_file (snake->fd, "Zebra.."); + assert_snake (snake, 1, 175, 182, "Zebra.."); + + g_object_unref (snake); +} + +#define stream_append(as, str) _vte_stream_append((as), (str), strlen(str)) + +static void +test_stream (void) +{ + VteSnake *snake; + char buf[8]; + + VteStream *astream = _vte_file_stream_new(); + VteFileStream *stream = (VteFileStream *) astream; + _vte_file_stream_init (stream); + snake = stream->snake; + + /* Append */ + stream_append (astream, "axolot"); + g_assert (snake->fd == -1); + assert_stream (astream, 0, 6, "axolot"); + + stream_append (astream, "l"); + assert_file (snake->fd, "axolotl"); + assert_snake (snake, 1, 0, 7, "axolotl"); + assert_stream (astream, 0, 7, "axolotl"); + + stream_append (astream, "beeee"); + assert_file (snake->fd, "axolotl"); + assert_snake (snake, 1, 0, 7, "axolotl"); + assert_stream (astream, 0, 12, "axolotl" "beeee"); + + stream_append (astream, "es" "cat"); + assert_file (snake->fd, "axolotl" "beeeees"); + assert_snake (snake, 1, 0, 14, "axolotl" "beeeees"); + assert_stream (astream, 0, 17, "axolotl" "beeeees" "cat"); + + /* Truncate */ + _vte_stream_truncate (astream, 14); + assert_file (snake->fd, "axolotl" "beeeees"); + assert_snake (snake, 1, 0, 14, "axolotl" "beeeees"); + assert_stream (astream, 0, 14, "axolotl" "beeeees"); + + _vte_stream_truncate (astream, 10); + assert_file (snake->fd, "axolotl" "beeeees"); + assert_snake (snake, 1, 0, 14, "axolotl" "beeeees"); + assert_stream (astream, 0, 10, "axolotl" "bee"); + + /* Increase overwrite counter, overwrite with shorter block */ + stream_append (astream, "eeee" "cat"); + assert_file (snake->fd, "axolotl" "beeeeee"); + assert_snake (snake, 1, 0, 14, "axolotl" "beeeeee"); + assert_stream (astream, 0, 17, "axolotl" "beeeeee" "cat"); + + /* Test that the read cache is invalidated on truncate */ + _vte_stream_read (astream, 12, buf, 2); + g_assert_cmpuint (stream->rbuf_offset, ==, 7); + _vte_stream_truncate (astream, 13); + g_assert_cmpuint (stream->rbuf_offset, ==, 1); + stream_append (astream, "z" "cat"); + _vte_stream_read (astream, 12, buf, 2); + g_assert_cmpuint (stream->rbuf_offset, ==, 7); + buf[2] = '\0'; + g_assert_cmpstr (buf, ==, "ez"); + assert_file (snake->fd, "axolotl" "beeeeez"); + assert_snake (snake, 1, 0, 14, "axolotl" "beeeeez"); + assert_stream (astream, 0, 17, "axolotl" "beeeeez" "cat"); + + /* Truncate again */ + _vte_stream_truncate (astream, 10); + assert_file (snake->fd, "axolotl" "beeeeez"); + assert_snake (snake, 1, 0, 14, "axolotl" "beeeeez"); + assert_stream (astream, 0, 10, "axolotl" "bee"); + + /* Advance_tail */ + _vte_stream_advance_tail (astream, 6); + assert_file (snake->fd, "axolotl" "beeeeez"); + assert_snake (snake, 1, 0, 14, "axolotl" "beeeeez"); + assert_stream (astream, 6, 10, "l" "bee"); + + _vte_stream_advance_tail (astream, 7); + assert_file (snake->fd, "......." "beeeeez"); + assert_snake (snake, 1, 7, 14, "beeeeez"); + assert_stream (astream, 7, 10, "bee"); + + /* Tail and head within the same block in the stream, + * but not in underlying layers (due to a previous truncate). + * Nothing special. */ + _vte_stream_advance_tail (astream, 9); + assert_file (snake->fd, "......." "beeeeez"); + assert_snake (snake, 1, 7, 14, "beeeeez"); + assert_stream (astream, 9, 10, "e"); + + /* Tail reaches head. Still nothing special. */ + _vte_stream_advance_tail (astream, 10); + assert_file (snake->fd, "......." "beeeeez"); + assert_snake (snake, 1, 7, 14, "beeeeez"); + assert_stream (astream, 10, 10, ""); + + /* Snake state 2 */ + stream_append (astream, "eeee" "catfish"); + _vte_stream_advance_tail (astream, 15); + stream_append (astream, "dolphin" "echi"); + assert_file (snake->fd, "dolphin" "......." "catfish"); + assert_snake (snake, 2, 14, 28, "catfish" "dolphin"); + assert_stream (astream, 15, 32, "atfish" "dolphin" "echi"); + + /* Tail and head within the same block. + * The snake resets itself to state 1, ... + * (Note: despite advance_tail, "ec" is still there in the write buffer) */ + _vte_stream_advance_tail (astream, 30); + assert_snake (snake, 1, 28, 28, ""); + assert_stream (astream, 30, 32, "hi"); + + /* ... and the next write goes to beginning of the file */ + stream_append (astream, "dna"); + assert_file (snake->fd, "echidna"); + assert_snake (snake, 1, 28, 35, "echidna"); + assert_stream (astream, 30, 35, "hidna"); + + /* Test a bit what happens when "accidentally" writing aligned blocks. */ + _vte_stream_advance_tail (astream, 35); + stream_append (astream, "flicker" "grizzly"); + assert_file (snake->fd, "flicker" "grizzly"); + assert_snake (snake, 1, 35, 49, "flicker" "grizzly"); + assert_stream (astream, 35, 49, "flicker" "grizzly"); + + stream_append (astream, "hamster"); + assert_file (snake->fd, "flicker" "grizzly" "hamster"); + assert_snake (snake, 1, 35, 56, "flicker" "grizzly" "hamster"); + assert_stream (astream, 35, 56, "flicker" "grizzly" "hamster"); + + _vte_stream_advance_tail (astream, 49); + assert_file (snake->fd, "......." "......." "hamster"); + assert_snake (snake, 1, 49, 56, "hamster"); + assert_stream (astream, 49, 56, "hamster"); + + /* State 2 */ + stream_append (astream, "ibexxxx"); + assert_file (snake->fd, "ibexxxx" "......." "hamster"); + assert_snake (snake, 2, 49, 63, "hamster" "ibexxxx"); + assert_stream (astream, 49, 63, "hamster" "ibexxxx"); + + stream_append (astream, "jjjjjjj"); + assert_file (snake->fd, "ibexxxx" "jjjjjjj" "hamster"); + assert_snake (snake, 2, 49, 70, "hamster" "ibexxxx" "jjjjjjj"); + assert_stream (astream, 49, 70, "hamster" "ibexxxx" "jjjjjjj"); + + /* State 3 */ + stream_append (astream, "karakul"); + assert_file (snake->fd, "ibexxxx" "jjjjjjj" "hamster" "karakul"); + assert_snake (snake, 3, 49, 77, "hamster" "ibexxxx" "jjjjjjj" "karakul"); + assert_stream (astream, 49, 77, "hamster" "ibexxxx" "jjjjjjj" "karakul"); + + /* State 4 */ + _vte_stream_advance_tail (astream, 56); + assert_file (snake->fd, "ibexxxx" "jjjjjjj" "......." "karakul"); + assert_snake (snake, 4, 56, 77, "ibexxxx" "jjjjjjj" "karakul"); + assert_stream (astream, 56, 77, "ibexxxx" "jjjjjjj" "karakul"); + + stream_append (astream, "llllama"); + assert_file (snake->fd, "ibexxxx" "jjjjjjj" "......." "karakul" "llllama"); + assert_snake (snake, 4, 56, 84, "ibexxxx" "jjjjjjj" "karakul" "llllama"); + assert_stream (astream, 56, 84, "ibexxxx" "jjjjjjj" "karakul" "llllama"); + + /* Explicit reset to the middle of a poor meerkat */ + _vte_stream_reset (astream, 88); + stream_append (astream, "kat"); + /* Unused leading blocks are filled with dashes */ + assert_file (snake->fd, "----kat"); + assert_snake (snake, 1, 84, 91, "----kat"); + assert_stream (astream, 88, 91, "kat"); + + /* Explicit reset to a block boundary */ + _vte_stream_reset (astream, 175); + stream_append (astream, "zebraaa"); + assert_file (snake->fd, "zebraaa"); + assert_snake (snake, 1, 175, 182, "zebraaa"); + assert_stream (astream, 175, 182, "zebraaa"); + + g_object_unref (astream); +} + +int +main (int argc, char **argv) +{ + test_snake(); + test_stream(); + + printf("vtestream-file tests passed :)\n"); + return 0; +} + +#endif /* VTESTREAM_MAIN */ diff --git a/src/vtestream.h b/src/vtestream.h index 4cf68624..916332f2 100644 --- a/src/vtestream.h +++ b/src/vtestream.h @@ -29,10 +29,11 @@ G_BEGIN_DECLS typedef struct _VteStream VteStream; void _vte_stream_reset (VteStream *stream, gsize offset); -void _vte_stream_append (VteStream *stream, const char *data, gsize len); gboolean _vte_stream_read (VteStream *stream, gsize offset, char *data, gsize len); +void _vte_stream_append (VteStream *stream, const char *data, gsize len); void _vte_stream_truncate (VteStream *stream, gsize offset); void _vte_stream_advance_tail (VteStream *stream, gsize offset); +gsize _vte_stream_tail (VteStream *stream); gsize _vte_stream_head (VteStream *stream); /* Various streams */ |