From 804d65e5fd86057e78c0d0b5f6139172d20b8207 Mon Sep 17 00:00:00 2001 From: Vadim Bendebury Date: Mon, 24 Feb 2020 17:20:57 -0800 Subject: make: add preprocessor stage For the upcoming introduction of transitioning Cr50 console communications to packet mode, there is a need to be able to replace all print function invocations in the code with calls to packet sending function. This replacement is easiest to make in C preprocessor outputs, as there all macros are replaced with actual function invocations. This patch adds a configuration option CONFIG_EXTRACT_PRINTF_STRINGS, when enabled, building of the image object files starts happening in three steps instead of one, instead of .c => .o transition, the steps are .c => .E => .Ep => .o, where .E is the C preprocessor output, and .Ep is result of post processing by ./util/util_precompile.py. BUG=b:149964350 TEST=image layout does not change if CONFIG_EXTRACT_PRINTF_STRINGS is not defined. With the rest of the patches applied defining the above config option allows to build a Cr50 image supporting packet console communications mode. Signed-off-by: Vadim Bendebury Change-Id: I20b8ba7c5d13cb54ac6adbdbce856d92023ce997 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/ec/+/2113122 Reviewed-by: Randall Spangler --- Makefile | 19 +++++ Makefile.rules | 14 +++- common/build.mk | 1 + docs/packetized-console.md | 180 +++++++++++++++++++++++++++++++++++++++++++++ include/config.h | 3 + 5 files changed, 216 insertions(+), 1 deletion(-) create mode 100644 docs/packetized-console.md diff --git a/Makefile b/Makefile index 703c84e662..b2a68512be 100644 --- a/Makefile +++ b/Makefile @@ -339,7 +339,26 @@ ifeq ($(CONFIG_SHAREDLIB),y) ro-objs := $(filter-out %_sharedlib.o, $(ro-objs)) endif ro-deps := $(addsuffix .d, $(ro-objs)) +ifeq ($(CONFIG_EXTRACT_PRINTF_STRINGS),) rw-deps := $(addsuffix .d, $(rw-objs)) +else + +# See docs/packetized-console.md for details. + +s-src = chip/g/ite_sync.S core/cortex-m/init.S core/cortex-m/ldivmod.S \ + core/cortex-m/switch.S core/cortex-m/uldivmod.S +s-objs := $(patsubst %.S,$(out)/RW/%.o,$(s-src)) + +rw-ep-objs := $(filter-out $(s-objs), $(rw-objs)) +rw-es := $(patsubst %.o,%.E,$(rw-ep-objs)) +rw-eps := $(patsubst %.o,%.Ep,$(rw-ep-objs)) +rw-deps := $(patsubst %.o,%.E.d,$(rw-objs)) + +$(rw-eps) $(out)/RW/str_blob: $(rw-es) + ${Q}util/util_precompile.py -o $(out)/RW/str_blob $(rw-es) + +$(rw-objs): $(out)/RW/str_blob $(rw-eps) +endif deps := $(ro-deps) $(rw-deps) $(deps-y) diff --git a/Makefile.rules b/Makefile.rules index 1f9b05b1af..ecbccceb80 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -83,6 +83,9 @@ cmd_run_fuzz = build/host/$*/$*.exe -seed=1 -runs=1 $(silent) \ cmd_exe = $(CC) $(ro-objs) $(HOST_TEST_LDFLAGS) $(LDFLAGS_EXTRA) -o $@ cmd_c_to_o = $(CC) $(C_WARN) $(CFLAGS) -MMD -MP -MF $@.d -c $< \ -MT $(@D)/$(@F) -o $(@D)/$(@F) +cmd_c_to_e = $(CC) $(C_WARN) $(CFLAGS) -MMD -MP -MF $@.d -E $< \ + -MT $(@D)/$(@F) -o $(@D)/$(@F) +cmd_ep_to_o = $(CC) $(C_WARN) $(CFLAGS) -x c -c $< -o $(@D)/$(@F) cmd_cxx_to_o = $(CXX) -std=c++11 $(CFLAGS) $(CXXFLAGS) -MMD -MP -MF $@.d -c $< \ -MT $(@D)/$(@F) -o $(@D)/$(@F) cmd_c_to_build = $(BUILDCC) $(BUILD_CFLAGS) \ @@ -482,9 +485,19 @@ $(out)/RO/%.o.cmd:%.c $(file > $@,$(subst .o.cmd,.o,$(cmd_c_to_o))) $(out)/RO/%.o:%.c $(call quiet,c_to_o,CC ) +ifeq ($(CONFIG_EXTRACT_PRINTF_STRINGS),) $(out)/RW/%.o:%.c $(call quiet,c_to_o,CC ) +$(rw-objs): | $(out)/ec_version.h +else +$(out)/RW/%.o:$(out)/RW/%.Ep + $(call quiet,ep_to_o,EO ) +$(out)/RW/%.E:%.c + $(call quiet,c_to_e,CE ) + +$(rw-es): | $(out)/ec_version.h +endif $(out)/RO/%.o:%.cc $(call quiet,cxx_to_o,CXX ) $(out)/RW/%.o:%.cc @@ -537,7 +550,6 @@ endif # that truly depend on ec_version.h will have that information encoded in their # .d file. $(ro-objs): | $(out)/ec_version.h -$(rw-objs): | $(out)/ec_version.h $(sharedlib-objs): | $(out)/ec_version.h $(out)/ec_version.h: diff --git a/common/build.mk b/common/build.mk index 0032ea069c..747b63053f 100644 --- a/common/build.mk +++ b/common/build.mk @@ -241,6 +241,7 @@ BLOB_FILE = rma_key_blob.p256.test endif $(out)/RW/common/rma_auth.o: $(out)/rma_key_from_blob.h +$(out)/RW/common/rma_auth.E: $(out)/rma_key_from_blob.h $(out)/rma_key_from_blob.h: board/$(BOARD)/$(BLOB_FILE) util/bin2h.sh $(Q)util/bin2h.sh RMA_KEY_BLOB $< $@ diff --git a/docs/packetized-console.md b/docs/packetized-console.md new file mode 100644 index 0000000000..8cbba5b725 --- /dev/null +++ b/docs/packetized-console.md @@ -0,0 +1,180 @@ +# Packetized console mode +[TOC] + +## Overview + +Some EC board images are getting very close to their flash space size limits. +A significant part of the image is often taken by the text strings printed out +as debug messages or console command output. + +Removing text strings from the image would allow to free up a lot of space +(approximately 10% in case of Cr50), on top of that moving console command +support code from the image would allow to free quite a bit more. + +One requirement for such a facility would be that removing text strings from +the image is possible without touching the source code, so that all existing +boards could be retrofitted with the new approach without actual code changes. + +## Post processing the source code + +The vast majority of printouts generated by the EC images come from +invocations to console output generating functions (aka `cprintf()`, +`cprints()` and `cputs()`). In many cases these functions are invoked through +macros. + +The proposed approach is to introduce additional processing steps where the +source code could be modified pre-compilation during build time. + +In the standard processing case `.o` files are generated directly from `.c` +files by the compiler. To support the packet mode console the source files' +processing has to be more involved. + +First the source code is taken through C preprocessor. This extends all macro +invocations, and each input `.c` file gets a corresponding generated `.E` +file. + +Then invocations of the console output generating functions are substituted +such that the format strings can be replaced by their indices (see below). +This produces an additional set of intermediate files, each `.E` gets a +corresponding `.Ep` file. Then the set of `.Ep` files are taken through the +compilation step, resulting an a set of `.o` files generated. + +In more details, the following steps are taken when generating the set of `.o` +files necessary to generate the executable image: + +- run `.c` sources though C preprocessor, generating `.E` files instead of + `.o` files (.i.e. use -E compiler command line option instead of -c) + +- once all sources are preoprocessed, invoke the `util_precompile.py` script + to scan all `.E` files together. For each instance of `cprintf()`, + `cprints()`, and `cputs()` found in `.E` files do the following: + + - save the format string in a dictionary, unless the string is already + there. The keys in the dictionary are the format strings, the values - are + the string indices, for each new string the value is the length of the + dictionary before the string was added. + + - replace the console output generating function names with `cmsgX`, where + `X` is the number of arguments, values from 0 to 8 are supported. + + - scan the format string for format characters (%), and based on number of + format characters pick the `X` value for `cmsgX` function name. + + - based on the format attributes analyze the parameters and make sure that + they can be typecasted to `uintptr_t`. This is the case for pretty much + any parameter other than 64 bit values, timestamps in particular. In case + there are 64 bit parameters, augment the code by adding a block around the + invocation, declaring a 64 bit variable, assigning it the parameter value + and using the variable address as the appropriate `cmsgX` parameter. + + - Wile scanning the parameters, also prepare a 32 bit descriptor, with each + parameter's description packed into 4 bits, thus supporting up to 8 + parameters. + + - complete `cmsgX` call declaration giving it as the parameters the console + channel number (as is, retrieved from the source code), the index the text + string got when it was added to the dictionary, the 32 bit parameter + descriptor and the original parameters typecasted to `uintptr_t`, unless + processing of 64 bit value(s) took place, in which case pointer(s) to the + value(s) are used. + + - for each `.E` file generate the `.Ep` file with all console output + generating functions replaced with `cmsgX()` invocations. + +- once all .E files have been processed, the `util_precompile.py` script + converts the format string dictionary into a list of strings, each string + placed in the list at its index, so knowing the index the terminal program + can retrieve the format string. The list is serialized, compressed and saved + in a file. + +## Sending packets to the terminal + +Some code has to be added to process `cmsgX()` invocations. + +Each of these functions prepares an array of parameters and calls the common +function, which processes the format descriptor and the parameters, and +generates a packet sent over the console channel to the terminal. The packet +has the following structure, very similar to the one used in the Acropora +project: + +```c +struct console_packet { + /* Magic number = CONSOLE_PACKET_MAGIC */ + uint8_t magic; + /* + * Packet sequence number, incremented each time sender sends a packet. + * So receiver can detect small numbers of corrupt/dropped packets. + */ + uint8_t sequence : 4; + + /* Set if the sender had to discard packets due to buffer overflow. */ + uint8_t overflow : 1; + uint8_t dummy : 3; + + /* Channel; values from enum console_channel */ + uint8_t channel; + + /* Bottom 48 bits of system time; enough for 8 years @ 1 us */ + uint8_t timestamp[6]; + + /* + * Length of variable-length section in bytes, not including the + * packet end trailer. + */ + uint8_t data_len; + + /* Index of the format string in the string database. */ + uint16_t str_index; + + /* Header checksum */ + uint8_t crc; + + /* Fixed length header to here. + * + * Followed by variable-length data, if data_len > 0. + * + * params: 1-8 of objects matching the format of the string indexed by + * 'str_index' above. + * + * CONSOLE_PACKET_END, as a sanity-check that we haven't dropped + * anything. A checksum or CRC would be kinda expensive for debug + * data. Note that it is not present if data_len == 0. + */ +} +``` + +The data part of the packet is the concatenated values of the parameters +processed in accordance with the format: all integer values less than 64 bits +in size and pointers are transferred as 4 byte entities. In case the format +calls for a 64 bit value, the parameter is interpreted as an address or an 8 +byte value, which is retrieved from memory and added to the packet. In case +the format calls for a string, the string is included in the packet, along +with the trailing zero. + +Inclusion of `__func__` in the parameter list is a special case. The name of +the function is added to the string dictionary, and 3 bytes are sent in the +packet, 0xff and then the number `__func__` was assigned in the string +database. This allows the terminal to tell that not the actual string but the +index is sent as the parameter. + +## Terminal program + +A Python script (`acroterm.py`) was copied from the Acropora project and +modified to support the changed packet format. + +The script receives the name of the file containing the format strings blob as +one of command lie parameters. When starting, the scrip attaches to the TTY +device (be it UART or USB channel) and searches for the packet header +characters in the stream. Symbols received before packet header is encountered +or between packets are sent to the console directly. + +When packets are received, header integrity is verified and the `str_index` +field from the header is used to retrieve the format string from the blob. The +string is scanned for the format characters, and the data section of the +packet is interpreted according to the format specification, recreated strings +are sent to the console. + +Data received out of packets is sent to the console directly and is displayed +using a different color. This, among other things, allows to display text +generated by early boot stages and in general support builds which do not yet +deploy packet mode. \ No newline at end of file diff --git a/include/config.h b/include/config.h index f810be8e90..5e556b7224 100644 --- a/include/config.h +++ b/include/config.h @@ -1411,6 +1411,9 @@ /* Enable verbose output to UART console and extra timestamp print precision. */ #define CONFIG_CONSOLE_VERBOSE +/* Trigger building the image with all format strings extracted. */ +#undef CONFIG_EXTRACT_PRINTF_STRINGS + /* * Enable EC-CR50 communication (a.k.a. EC-EFS2). This is for CR50 config only. */ -- cgit v1.2.1