diff options
author | Nathan Sidwell <nathan@acm.org> | 2020-12-14 08:10:27 -0800 |
---|---|---|
committer | Nathan Sidwell <nathan@acm.org> | 2020-12-15 07:09:59 -0800 |
commit | 362303298ac4c1f93bda87535df2b726481d54bb (patch) | |
tree | b728e42aa7e93c1fd673e75ee0071b86b8ae9c6c /libcody/buffer.cc | |
parent | c5271279d6e86df0d0203c11fc4c3e3c99a14bb7 (diff) | |
download | gcc-362303298ac4c1f93bda87535df2b726481d54bb.tar.gz |
Add libcody
In order to separate compiler from build system, C++ Modules, as
implemented in GCC introduces a communication channel between those
two entities. This is implemented by libcody. It is anticipated that
other implementations will also implement this protocol, or use
libcody to provide it.
* Makefile.def: Add libcody.
* configure.ac: Add libcody.
* Makefile.in: Regenerated.
* configure: Regenerated.
gcc/
* Makefile.in (CODYINC, CODYLIB, CODYLIB_H): New. Use them.
libcody/
* configure.ac: New.
* CMakeLists.txt: New.
* CODING.md: New.
* CONTRIB.md: New.
* LICENSE: New.
* LICENSE.gcc: New.
* Makefile.in: New.
* Makesub.in: New.
* README.md: New.
* buffer.cc: New.
* build-aux/config.guess: New.
* build-aux/config.sub: New.
* build-aux/install-sh: New.
* client.cc: New.
* cmake/libcody-config-ix.cmake
* cody.hh: New.
* config.h.in: New.
* config.m4: New.
* configure: New.
* configure.ac: New.
* dox.cfg.in: New.
* fatal.cc: New.
* gdbinit.in: New.
* internal.hh: New.
* netclient.cc: New.
* netserver.cc: New.
* packet.cc: New.
* resolver.cc: New.
* server.cc: New.
* tests/01-serialize/connect.cc: New.
* tests/01-serialize/decoder.cc: New.
* tests/01-serialize/encoder.cc: New.
* tests/02-comms/client-1.cc: New.
* tests/02-comms/pivot-1.cc: New.
* tests/02-comms/server-1.cc: New.
* tests/Makesub.in: New.
* tests/jouster: New.
Diffstat (limited to 'libcody/buffer.cc')
-rw-r--r-- | libcody/buffer.cc | 387 |
1 files changed, 387 insertions, 0 deletions
diff --git a/libcody/buffer.cc b/libcody/buffer.cc new file mode 100644 index 00000000000..52df3176c9a --- /dev/null +++ b/libcody/buffer.cc @@ -0,0 +1,387 @@ +// CODYlib -*- mode:c++ -*- +// Copyright (C) 2020 Nathan Sidwell, nathan@acm.org +// License: Apache v2.0 + +// Cody +#include "internal.hh" +// C++ +#include <algorithm> +// C +#include <cstring> +// OS +#include <unistd.h> +#include <cerrno> + +// MessageBuffer code + +// Lines consist of words and end with a NEWLINE (0xa) char +// Whitespace characters are TAB (0x9) and SPACE (0x20) +// Words consist of non-whitespace chars separated by whitespace. +// Multiple lines in one transaction are indicated by ending non-final +// lines with a SEMICOLON (0x3b) word, immediately before the NEWLINE +// Continuations with ; preceding it +// Words matching regexp [-+_/%.a-zA-Z0-9]+ need no quoting. +// Quoting with '...' +// Anything outside of [-+_/%.a-zA-Z0-9] needs quoting +// Anything outside of <= <space> or DEL or \' or \\ needs escaping. +// Escapes are \\, \', \n, \t, \_, everything else as \<hex><hex>? +// Spaces separate words, UTF8 encoding for non-ascii chars + +namespace Cody { +namespace Detail { + +static const char CONTINUE = S2C(u8";"); + +void MessageBuffer::BeginLine () +{ + if (!buffer.empty ()) + { + // Terminate the previous line with a continuation + buffer.reserve (buffer.size () + 3); + buffer.push_back (S2C(u8" ")); + buffer.push_back (CONTINUE); + buffer.push_back (S2C(u8"\n")); + } + lastBol = buffer.size (); +} + +// QUOTE means 'maybe quote', we search it for quote-needing chars + +void MessageBuffer::Append (char const *str, bool quote, size_t len) +{ + if (len == ~size_t (0)) + len = strlen (str); + + if (!len && !quote) + return; + + // We want to quote characters outside of [-+_A-Za-z0-9/%.], anything + // that could remotely be shell-active. UTF8 encoding for non-ascii. + if (quote && len) + { + quote = false; + // Scan looking for quote-needing characters. We could just + // append until we find one, but that's probably confusing + for (size_t ix = len; ix--;) + { + unsigned char c = (unsigned char)str[ix]; + if (!((c >= S2C(u8"a") && c <= S2C(u8"z")) + || (c >= S2C(u8"A") && c <= S2C(u8"Z")) + || (c >= S2C(u8"0") && c <= S2C(u8"9")) + || c == S2C(u8"-") || c == S2C(u8"+") || c == S2C(u8"_") + || c == S2C(u8"/") || c == S2C(u8"%") || c == S2C(u8"."))) + { + quote = true; + break; + } + } + } + + // Maximal length of appended string + buffer.reserve (buffer.size () + len * (quote ? 3 : 1) + 2); + + if (quote) + buffer.push_back (S2C(u8"'")); + + for (auto *end = str + len; str != end;) + { + auto *e = end; + + if (quote) + // Look for next escape-needing char. More relaxed than + // the earlier needs-quoting check. + for (e = str; e != end; ++e) + { + unsigned char c = (unsigned char)*e; + if (c < S2C(u8" ") || c == 0x7f + || c == S2C(u8"\\") || c == S2C(u8"'")) + break; + } + buffer.insert (buffer.end (), str, e); + str = e; + + if (str == end) + break; + + buffer.push_back (S2C(u8"\\")); + switch (unsigned char c = (unsigned char)*str++) + { + case S2C(u8"\t"): + c = S2C(u8"t"); + goto append; + + case S2C(u8"\n"): + c = S2C(u8"n"); + goto append; + + case S2C(u8"'"): + case S2C(u8"\\"): + append: + buffer.push_back (c); + break; + + default: + // Full-on escape. Use 2 lower-case hex chars + for (unsigned shift = 8; shift;) + { + shift -= 4; + + char nibble = (c >> shift) & 0xf; + nibble += S2C(u8"0"); + if (nibble > S2C(u8"9")) + nibble += S2C(u8"a") - (S2C(u8"9") + 1); + buffer.push_back (nibble); + } + } + } + + if (quote) + buffer.push_back (S2C(u8"'")); +} + +void MessageBuffer::Append (char c) +{ + buffer.push_back (c); +} + +void MessageBuffer::AppendInteger (unsigned u) +{ + std::string v (std::to_string (u)); + AppendWord (v); +} + +int MessageBuffer::Write (int fd) noexcept +{ + size_t limit = buffer.size () - lastBol; + ssize_t count = write (fd, &buffer.data ()[lastBol], limit); + + int err = 0; + if (count < 0) + err = errno; + else + { + lastBol += count; + if (size_t (count) != limit) + err = EAGAIN; + } + + if (err != EAGAIN && err != EINTR) + { + // Reset for next message + buffer.clear (); + lastBol = 0; + } + + return err; +} + +int MessageBuffer::Read (int fd) noexcept +{ + constexpr size_t blockSize = 200; + + size_t lwm = buffer.size (); + size_t hwm = buffer.capacity (); + if (hwm - lwm < blockSize / 2) + hwm += blockSize; + buffer.resize (hwm); + + auto iter = buffer.begin () + lwm; + ssize_t count = read (fd, &*iter, hwm - lwm); + buffer.resize (lwm + (count >= 0 ? count : 0)); + + if (count < 0) + return errno; + + if (!count) + // End of file + return -1; + + bool more = true; + for (;;) + { + auto newline = std::find (iter, buffer.end (), S2C(u8"\n")); + if (newline == buffer.end ()) + break; + more = newline != buffer.begin () && newline[-1] == CONTINUE; + iter = newline + 1; + + if (iter == buffer.end ()) + break; + + if (!more) + { + // There is no continuation, but there are chars after the + // newline. Truncate the buffer and return an error + buffer.resize (iter - buffer.begin ()); + return EINVAL; + } + } + + return more ? EAGAIN : 0; +} + +int MessageBuffer::Lex (std::vector<std::string> &result) +{ + result.clear (); + + int err = ENOENT; + if (IsAtEnd ()) + return ENOENT; + + Assert (buffer.back () == S2C(u8"\n")); + + auto iter = buffer.begin () + lastBol; + + for (std::string *word = nullptr;;) + { + char c = *iter; + + ++iter; + if (c == S2C(u8" ") || c == S2C(u8"\t")) + { + word = nullptr; + continue; + } + + if (c == S2C(u8"\n")) + break; + + if (c == CONTINUE) + { + // Line continuation + if (word || *iter != S2C(u8"\n")) + goto malformed; + ++iter; + break; + } + + if (c <= S2C(u8" ") || c >= 0x7f) + goto malformed; + + if (!word) + { + result.emplace_back (); + word = &result.back (); + } + + if (c == S2C(u8"'")) + { + // Quoted word + for (;;) + { + c = *iter; + + if (c == S2C(u8"\n")) + { + malformed:; + result.clear (); + iter = std::find (iter, buffer.end (), S2C(u8"\n")); + auto back = iter; + if (back[-1] == CONTINUE && back[-2] == S2C(u8" ")) + // Smells like a line continuation + back -= 2; + result.emplace_back (&buffer[lastBol], + back - buffer.begin () - lastBol); + ++iter; + lastBol = iter - buffer.begin (); + return EINVAL; + } + + if (c < S2C(u8" ") || c >= 0x7f) + goto malformed; + + ++iter; + if (c == S2C(u8"'")) + break; + + if (c == S2C(u8"\\")) + // escape + switch (c = *iter) + { + case S2C(u8"\\"): + case S2C(u8"'"): + ++iter; + break; + + case S2C(u8"n"): + c = S2C(u8"\n"); + ++iter; + break; + + case S2C(u8"_"): + // We used to escape SPACE as \_, so accept that + c = S2C(u8" "); + ++iter; + break; + + case S2C(u8"t"): + c = S2C(u8"\t"); + ++iter; + break; + + default: + { + unsigned v = 0; + for (unsigned nibble = 0; nibble != 2; nibble++) + { + c = *iter; + if (c < S2C(u8"0")) + { + if (!nibble) + goto malformed; + break; + } + else if (c <= S2C(u8"9")) + c -= S2C(u8"0"); + else if (c < S2C(u8"a")) + { + if (!nibble) + goto malformed; + break; + } + else if (c <= S2C(u8"f")) + c -= S2C(u8"a") - 10; + else + { + if (!nibble) + goto malformed; + break; + } + ++iter; + v = (v << 4) | c; + } + c = v; + } + } + word->push_back (c); + } + } + else + // Unquoted character + word->push_back (c); + } + lastBol = iter - buffer.begin (); + if (result.empty ()) + return ENOENT; + + return 0; +} + +void MessageBuffer::LexedLine (std::string &str) +{ + if (lastBol) + { + size_t pos = lastBol - 1; + for (; pos; pos--) + if (buffer[pos-1] == S2C(u8"\n")) + break; + + size_t end = lastBol - 1; + if (buffer[end-1] == CONTINUE && buffer[end-2] == S2C(u8" ")) + // Strip line continuation + end -= 2; + str.append (&buffer[pos], end - pos); + } +} +} // Detail +} // Cody |