summaryrefslogtreecommitdiff
path: root/libcody/buffer.cc
diff options
context:
space:
mode:
authorNathan Sidwell <nathan@acm.org>2020-12-14 08:10:27 -0800
committerNathan Sidwell <nathan@acm.org>2020-12-15 07:09:59 -0800
commit362303298ac4c1f93bda87535df2b726481d54bb (patch)
treeb728e42aa7e93c1fd673e75ee0071b86b8ae9c6c /libcody/buffer.cc
parentc5271279d6e86df0d0203c11fc4c3e3c99a14bb7 (diff)
downloadgcc-362303298ac4c1f93bda87535df2b726481d54bb.tar.gz
Add libcody
In order to separate compiler from build system, C++ Modules, as implemented in GCC introduces a communication channel between those two entities. This is implemented by libcody. It is anticipated that other implementations will also implement this protocol, or use libcody to provide it. * Makefile.def: Add libcody. * configure.ac: Add libcody. * Makefile.in: Regenerated. * configure: Regenerated. gcc/ * Makefile.in (CODYINC, CODYLIB, CODYLIB_H): New. Use them. libcody/ * configure.ac: New. * CMakeLists.txt: New. * CODING.md: New. * CONTRIB.md: New. * LICENSE: New. * LICENSE.gcc: New. * Makefile.in: New. * Makesub.in: New. * README.md: New. * buffer.cc: New. * build-aux/config.guess: New. * build-aux/config.sub: New. * build-aux/install-sh: New. * client.cc: New. * cmake/libcody-config-ix.cmake * cody.hh: New. * config.h.in: New. * config.m4: New. * configure: New. * configure.ac: New. * dox.cfg.in: New. * fatal.cc: New. * gdbinit.in: New. * internal.hh: New. * netclient.cc: New. * netserver.cc: New. * packet.cc: New. * resolver.cc: New. * server.cc: New. * tests/01-serialize/connect.cc: New. * tests/01-serialize/decoder.cc: New. * tests/01-serialize/encoder.cc: New. * tests/02-comms/client-1.cc: New. * tests/02-comms/pivot-1.cc: New. * tests/02-comms/server-1.cc: New. * tests/Makesub.in: New. * tests/jouster: New.
Diffstat (limited to 'libcody/buffer.cc')
-rw-r--r--libcody/buffer.cc387
1 files changed, 387 insertions, 0 deletions
diff --git a/libcody/buffer.cc b/libcody/buffer.cc
new file mode 100644
index 00000000000..52df3176c9a
--- /dev/null
+++ b/libcody/buffer.cc
@@ -0,0 +1,387 @@
+// CODYlib -*- mode:c++ -*-
+// Copyright (C) 2020 Nathan Sidwell, nathan@acm.org
+// License: Apache v2.0
+
+// Cody
+#include "internal.hh"
+// C++
+#include <algorithm>
+// C
+#include <cstring>
+// OS
+#include <unistd.h>
+#include <cerrno>
+
+// MessageBuffer code
+
+// Lines consist of words and end with a NEWLINE (0xa) char
+// Whitespace characters are TAB (0x9) and SPACE (0x20)
+// Words consist of non-whitespace chars separated by whitespace.
+// Multiple lines in one transaction are indicated by ending non-final
+// lines with a SEMICOLON (0x3b) word, immediately before the NEWLINE
+// Continuations with ; preceding it
+// Words matching regexp [-+_/%.a-zA-Z0-9]+ need no quoting.
+// Quoting with '...'
+// Anything outside of [-+_/%.a-zA-Z0-9] needs quoting
+// Anything outside of <= <space> or DEL or \' or \\ needs escaping.
+// Escapes are \\, \', \n, \t, \_, everything else as \<hex><hex>?
+// Spaces separate words, UTF8 encoding for non-ascii chars
+
+namespace Cody {
+namespace Detail {
+
+static const char CONTINUE = S2C(u8";");
+
+void MessageBuffer::BeginLine ()
+{
+ if (!buffer.empty ())
+ {
+ // Terminate the previous line with a continuation
+ buffer.reserve (buffer.size () + 3);
+ buffer.push_back (S2C(u8" "));
+ buffer.push_back (CONTINUE);
+ buffer.push_back (S2C(u8"\n"));
+ }
+ lastBol = buffer.size ();
+}
+
+// QUOTE means 'maybe quote', we search it for quote-needing chars
+
+void MessageBuffer::Append (char const *str, bool quote, size_t len)
+{
+ if (len == ~size_t (0))
+ len = strlen (str);
+
+ if (!len && !quote)
+ return;
+
+ // We want to quote characters outside of [-+_A-Za-z0-9/%.], anything
+ // that could remotely be shell-active. UTF8 encoding for non-ascii.
+ if (quote && len)
+ {
+ quote = false;
+ // Scan looking for quote-needing characters. We could just
+ // append until we find one, but that's probably confusing
+ for (size_t ix = len; ix--;)
+ {
+ unsigned char c = (unsigned char)str[ix];
+ if (!((c >= S2C(u8"a") && c <= S2C(u8"z"))
+ || (c >= S2C(u8"A") && c <= S2C(u8"Z"))
+ || (c >= S2C(u8"0") && c <= S2C(u8"9"))
+ || c == S2C(u8"-") || c == S2C(u8"+") || c == S2C(u8"_")
+ || c == S2C(u8"/") || c == S2C(u8"%") || c == S2C(u8".")))
+ {
+ quote = true;
+ break;
+ }
+ }
+ }
+
+ // Maximal length of appended string
+ buffer.reserve (buffer.size () + len * (quote ? 3 : 1) + 2);
+
+ if (quote)
+ buffer.push_back (S2C(u8"'"));
+
+ for (auto *end = str + len; str != end;)
+ {
+ auto *e = end;
+
+ if (quote)
+ // Look for next escape-needing char. More relaxed than
+ // the earlier needs-quoting check.
+ for (e = str; e != end; ++e)
+ {
+ unsigned char c = (unsigned char)*e;
+ if (c < S2C(u8" ") || c == 0x7f
+ || c == S2C(u8"\\") || c == S2C(u8"'"))
+ break;
+ }
+ buffer.insert (buffer.end (), str, e);
+ str = e;
+
+ if (str == end)
+ break;
+
+ buffer.push_back (S2C(u8"\\"));
+ switch (unsigned char c = (unsigned char)*str++)
+ {
+ case S2C(u8"\t"):
+ c = S2C(u8"t");
+ goto append;
+
+ case S2C(u8"\n"):
+ c = S2C(u8"n");
+ goto append;
+
+ case S2C(u8"'"):
+ case S2C(u8"\\"):
+ append:
+ buffer.push_back (c);
+ break;
+
+ default:
+ // Full-on escape. Use 2 lower-case hex chars
+ for (unsigned shift = 8; shift;)
+ {
+ shift -= 4;
+
+ char nibble = (c >> shift) & 0xf;
+ nibble += S2C(u8"0");
+ if (nibble > S2C(u8"9"))
+ nibble += S2C(u8"a") - (S2C(u8"9") + 1);
+ buffer.push_back (nibble);
+ }
+ }
+ }
+
+ if (quote)
+ buffer.push_back (S2C(u8"'"));
+}
+
+void MessageBuffer::Append (char c)
+{
+ buffer.push_back (c);
+}
+
+void MessageBuffer::AppendInteger (unsigned u)
+{
+ std::string v (std::to_string (u));
+ AppendWord (v);
+}
+
+int MessageBuffer::Write (int fd) noexcept
+{
+ size_t limit = buffer.size () - lastBol;
+ ssize_t count = write (fd, &buffer.data ()[lastBol], limit);
+
+ int err = 0;
+ if (count < 0)
+ err = errno;
+ else
+ {
+ lastBol += count;
+ if (size_t (count) != limit)
+ err = EAGAIN;
+ }
+
+ if (err != EAGAIN && err != EINTR)
+ {
+ // Reset for next message
+ buffer.clear ();
+ lastBol = 0;
+ }
+
+ return err;
+}
+
+int MessageBuffer::Read (int fd) noexcept
+{
+ constexpr size_t blockSize = 200;
+
+ size_t lwm = buffer.size ();
+ size_t hwm = buffer.capacity ();
+ if (hwm - lwm < blockSize / 2)
+ hwm += blockSize;
+ buffer.resize (hwm);
+
+ auto iter = buffer.begin () + lwm;
+ ssize_t count = read (fd, &*iter, hwm - lwm);
+ buffer.resize (lwm + (count >= 0 ? count : 0));
+
+ if (count < 0)
+ return errno;
+
+ if (!count)
+ // End of file
+ return -1;
+
+ bool more = true;
+ for (;;)
+ {
+ auto newline = std::find (iter, buffer.end (), S2C(u8"\n"));
+ if (newline == buffer.end ())
+ break;
+ more = newline != buffer.begin () && newline[-1] == CONTINUE;
+ iter = newline + 1;
+
+ if (iter == buffer.end ())
+ break;
+
+ if (!more)
+ {
+ // There is no continuation, but there are chars after the
+ // newline. Truncate the buffer and return an error
+ buffer.resize (iter - buffer.begin ());
+ return EINVAL;
+ }
+ }
+
+ return more ? EAGAIN : 0;
+}
+
+int MessageBuffer::Lex (std::vector<std::string> &result)
+{
+ result.clear ();
+
+ int err = ENOENT;
+ if (IsAtEnd ())
+ return ENOENT;
+
+ Assert (buffer.back () == S2C(u8"\n"));
+
+ auto iter = buffer.begin () + lastBol;
+
+ for (std::string *word = nullptr;;)
+ {
+ char c = *iter;
+
+ ++iter;
+ if (c == S2C(u8" ") || c == S2C(u8"\t"))
+ {
+ word = nullptr;
+ continue;
+ }
+
+ if (c == S2C(u8"\n"))
+ break;
+
+ if (c == CONTINUE)
+ {
+ // Line continuation
+ if (word || *iter != S2C(u8"\n"))
+ goto malformed;
+ ++iter;
+ break;
+ }
+
+ if (c <= S2C(u8" ") || c >= 0x7f)
+ goto malformed;
+
+ if (!word)
+ {
+ result.emplace_back ();
+ word = &result.back ();
+ }
+
+ if (c == S2C(u8"'"))
+ {
+ // Quoted word
+ for (;;)
+ {
+ c = *iter;
+
+ if (c == S2C(u8"\n"))
+ {
+ malformed:;
+ result.clear ();
+ iter = std::find (iter, buffer.end (), S2C(u8"\n"));
+ auto back = iter;
+ if (back[-1] == CONTINUE && back[-2] == S2C(u8" "))
+ // Smells like a line continuation
+ back -= 2;
+ result.emplace_back (&buffer[lastBol],
+ back - buffer.begin () - lastBol);
+ ++iter;
+ lastBol = iter - buffer.begin ();
+ return EINVAL;
+ }
+
+ if (c < S2C(u8" ") || c >= 0x7f)
+ goto malformed;
+
+ ++iter;
+ if (c == S2C(u8"'"))
+ break;
+
+ if (c == S2C(u8"\\"))
+ // escape
+ switch (c = *iter)
+ {
+ case S2C(u8"\\"):
+ case S2C(u8"'"):
+ ++iter;
+ break;
+
+ case S2C(u8"n"):
+ c = S2C(u8"\n");
+ ++iter;
+ break;
+
+ case S2C(u8"_"):
+ // We used to escape SPACE as \_, so accept that
+ c = S2C(u8" ");
+ ++iter;
+ break;
+
+ case S2C(u8"t"):
+ c = S2C(u8"\t");
+ ++iter;
+ break;
+
+ default:
+ {
+ unsigned v = 0;
+ for (unsigned nibble = 0; nibble != 2; nibble++)
+ {
+ c = *iter;
+ if (c < S2C(u8"0"))
+ {
+ if (!nibble)
+ goto malformed;
+ break;
+ }
+ else if (c <= S2C(u8"9"))
+ c -= S2C(u8"0");
+ else if (c < S2C(u8"a"))
+ {
+ if (!nibble)
+ goto malformed;
+ break;
+ }
+ else if (c <= S2C(u8"f"))
+ c -= S2C(u8"a") - 10;
+ else
+ {
+ if (!nibble)
+ goto malformed;
+ break;
+ }
+ ++iter;
+ v = (v << 4) | c;
+ }
+ c = v;
+ }
+ }
+ word->push_back (c);
+ }
+ }
+ else
+ // Unquoted character
+ word->push_back (c);
+ }
+ lastBol = iter - buffer.begin ();
+ if (result.empty ())
+ return ENOENT;
+
+ return 0;
+}
+
+void MessageBuffer::LexedLine (std::string &str)
+{
+ if (lastBol)
+ {
+ size_t pos = lastBol - 1;
+ for (; pos; pos--)
+ if (buffer[pos-1] == S2C(u8"\n"))
+ break;
+
+ size_t end = lastBol - 1;
+ if (buffer[end-1] == CONTINUE && buffer[end-2] == S2C(u8" "))
+ // Strip line continuation
+ end -= 2;
+ str.append (&buffer[pos], end - pos);
+ }
+}
+} // Detail
+} // Cody