From 0d79a2a8e2d91fc258ac795c19c13e3ab505a6c2 Mon Sep 17 00:00:00 2001
From: Ulf Samuelsson <ulf@emagii.com>
Date: Tue, 14 Feb 2023 10:13:28 +0000
Subject: ASCIZ Command for output section

Adds a new directive to the linker script syntax: ASCIZ.
This inserts a zero-terminated string into the output at the place where it is used.
---
 ld/NEWS                            |  3 ++
 ld/ld.texi                         | 13 ++++++
 ld/ldgram.y                        |  7 +++-
 ld/ldlang.c                        | 83 ++++++++++++++++++++++++++++++++++++++
 ld/ldlang.h                        |  4 +-
 ld/ldlex.l                         |  1 +
 ld/testsuite/ld-scripts/asciz.d    | 17 ++++++++
 ld/testsuite/ld-scripts/asciz.s    |  8 ++++
 ld/testsuite/ld-scripts/asciz.t    | 23 +++++++++++
 ld/testsuite/ld-scripts/script.exp |  1 +
 10 files changed, 157 insertions(+), 3 deletions(-)
 create mode 100644 ld/testsuite/ld-scripts/asciz.d
 create mode 100644 ld/testsuite/ld-scripts/asciz.s
 create mode 100644 ld/testsuite/ld-scripts/asciz.t

(limited to 'ld')
diff --git a/ld/NEWS b/ld/NEWS
index 9982ad0168d..4ce7e19d40b 100644
--- a/ld/NEWS
+++ b/ld/NEWS
@@ -1,5 +1,8 @@
 -*- text -*-
 
+* The linker script syntax has a new command for output sections: ASCIZ "string"
+  This will insert a zero-terminated string at the current location.
+
 Changes in 2.40:
 
 * The linker has a new command line option to suppress the generation of any
diff --git a/ld/ld.texi b/ld/ld.texi
index 36005dc2b0d..f576a8bae6c 100644
--- a/ld/ld.texi
+++ b/ld/ld.texi
@@ -5308,6 +5308,7 @@ C identifiers because they contain a @samp{.} character.
 @cindex data
 @cindex section data
 @cindex output section data
+@kindex ASCIZ ``@var{string}''
 @kindex BYTE(@var{expression})
 @kindex SHORT(@var{expression})
 @kindex LONG(@var{expression})
@@ -5344,6 +5345,18 @@ When the object file format does not have an explicit endianness, as is
 true of, for example, S-records, the value will be stored in the
 endianness of the first input object file.
 
+You can include a zero-terminated string in an output section by using
+@code{ASCIZ}.  The keyword is followed by a string which is stored at
+the current value of the location counter adding a zero byte at the
+end.  If the string includes spaces it must be enclosed in double
+quotes.  The string may contain '\n', '\r', '\t' and octal numbers.
+Hex numbers are not supported.
+
+For example, this string of 16 characters will create a 17 byte area
+@smallexample
+  ASCIZ "This is 16 bytes"
+@end smallexample
+
 Note---these commands only work inside a section description and not
 between them, so the following will produce an error from the linker:
 @smallexample
diff --git a/ld/ldgram.y b/ld/ldgram.y
index fa5f01fef1d..8240cf97327 100644
--- a/ld/ldgram.y
+++ b/ld/ldgram.y
@@ -125,7 +125,7 @@ static int error_index;
 %right UNARY
 %token END
 %left <token> '('
-%token <token> ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE
+%token <token> ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE ASCIZ
 %token SECTIONS PHDRS INSERT_K AFTER BEFORE
 %token DATA_SEGMENT_ALIGN DATA_SEGMENT_RELRO_END DATA_SEGMENT_END
 %token SORT_BY_NAME SORT_BY_ALIGNMENT SORT_NONE
@@ -668,7 +668,10 @@ statement:
 		{
 		  lang_add_data ((int) $1, $3);
 		}
-
+	| ASCIZ NAME
+		{
+		  lang_add_string ($2);
+		}
 	| FILL '(' fill_exp ')'
 		{
 		  lang_add_fill ($3);
diff --git a/ld/ldlang.c b/ld/ldlang.c
index b5e0d026ae4..b20455c9373 100644
--- a/ld/ldlang.c
+++ b/ld/ldlang.c
@@ -8361,6 +8361,89 @@ lang_add_data (int type, union etree_union *exp)
   new_stmt->type = type;
 }
 
+void
+lang_add_string (const char *s)
+{
+  bfd_vma  len = strlen (s);
+  bfd_vma  i;
+  bool     escape = false;
+
+  /* Add byte expressions until end of string.  */
+  for (i = 0 ; i < len; i++)
+    {
+      char c = *s++;
+
+      if (escape)
+	{
+	  switch (c)
+	    {
+	    default:
+	      /* Ignore the escape.  */
+	      break;
+
+	    case 'n': c = '\n'; break;
+	    case 'r': c = '\r'; break;
+	    case 't': c = '\t'; break;
+	  
+	    case '0':
+	    case '1':
+	    case '2':
+	    case '3':
+	    case '4':
+	    case '5':
+	    case '6':
+	    case '7':
+	      /* We have an octal number.  */
+	      {
+		unsigned int value = c - '0';
+
+		c = *s;
+		if ((c >= '0') && (c <= '7'))
+		  {
+		    value <<= 3;
+		    value += (c - '0');
+		    i++;
+		    s++;
+
+		    c = *s;
+		    if ((c >= '0') && (c <= '7'))
+		      {
+			value <<= 3;
+			value += (c - '0');
+			i++;
+			s++;
+		      }
+		  }
+
+		if (value > 0xff)
+		  {
+		    /* octal: \777 is treated as '\077' + '7' */
+		    value >>= 3;
+		    i--;
+		    s--;
+		  }
+
+		c = value;
+	      }
+	      break;
+	    }
+
+	  lang_add_data (BYTE, exp_intop (c));
+	  escape = false;
+	}
+      else
+	{
+	  if (c == '\\')
+	    escape = true;
+	  else
+	    lang_add_data (BYTE, exp_intop (c));
+	}
+    }
+
+  /* Remeber to terminate the string.  */
+  lang_add_data (BYTE, exp_intop (0));
+}
+
 /* Create a new reloc statement.  RELOC is the BFD relocation type to
    generate.  HOWTO is the corresponding howto structure (we could
    look this up, but the caller has already done so).  SECTION is the
diff --git a/ld/ldlang.h b/ld/ldlang.h
index 24c42f48218..32819066b8a 100644
--- a/ld/ldlang.h
+++ b/ld/ldlang.h
@@ -645,7 +645,9 @@ extern void push_stat_ptr
 extern void pop_stat_ptr
   (void);
 extern void lang_add_data
-  (int type, union etree_union *);
+  (int, union etree_union *);
+extern void lang_add_string
+  (const char *);
 extern void lang_add_reloc
   (bfd_reloc_code_real_type, reloc_howto_type *, asection *, const char *,
    union etree_union *);
diff --git a/ld/ldlex.l b/ld/ldlex.l
index cf596530b20..32336cf0be2 100644
--- a/ld/ldlex.l
+++ b/ld/ldlex.l
@@ -309,6 +309,7 @@ V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)*
 <WILD>"LONG"				{ RTOKEN(LONG); }
 <WILD>"SHORT"				{ RTOKEN(SHORT); }
 <WILD>"BYTE"				{ RTOKEN(BYTE); }
+<WILD>"ASCIZ"				{ RTOKEN(ASCIZ); }
 <SCRIPT>"NOFLOAT"			{ RTOKEN(NOFLOAT); }
 <SCRIPT,EXPRESSION>"NOCROSSREFS"	{ RTOKEN(NOCROSSREFS); }
 <SCRIPT,EXPRESSION>"NOCROSSREFS_TO"	{ RTOKEN(NOCROSSREFS_TO); }
diff --git a/ld/testsuite/ld-scripts/asciz.d b/ld/testsuite/ld-scripts/asciz.d
new file mode 100644
index 00000000000..615cf99732f
--- /dev/null
+++ b/ld/testsuite/ld-scripts/asciz.d
@@ -0,0 +1,17 @@
+#source: asciz.s
+#ld: -T asciz.t
+#objdump: -s -j .text
+#target: [is_elf_format]
+#skip: mips*-*-*
+#skip: tilegx*-*-* tilepro-*-*
+# COFF, PE and MIPS targets align code to a 16 byte boundary
+# tilegx andtilepro aligns code to a 8 byte boundary.
+
+.*:     file format .*
+
+Contents of section .text:
+ .... 01010101 54686973 20697320 61207374  ....This is a st
+ .... 72696e67 00...... ........ ........  ring............
+ .... 54686973 20697320 616e6f74 68657220  This is another 
+ .... 0a737472 696e6753 00                 .stringS........
+#pass
diff --git a/ld/testsuite/ld-scripts/asciz.s b/ld/testsuite/ld-scripts/asciz.s
new file mode 100644
index 00000000000..5803bb4e8ed
--- /dev/null
+++ b/ld/testsuite/ld-scripts/asciz.s
@@ -0,0 +1,8 @@
+	.section .text
+	.long 0x01010101
+	
+	.section .data
+	.long 0x9abcdef0
+	
+	.section .bss
+	.long 0
diff --git a/ld/testsuite/ld-scripts/asciz.t b/ld/testsuite/ld-scripts/asciz.t
new file mode 100644
index 00000000000..ab66f9a5bfb
--- /dev/null
+++ b/ld/testsuite/ld-scripts/asciz.t
@@ -0,0 +1,23 @@
+MEMORY {
+  rom : ORIGIN = 0x00000, LENGTH = 0x10000
+  ram : ORIGIN = 0x10000, LENGTH = 0x10000
+}
+
+_start = 0x000000;
+SECTIONS
+{
+  . = 0x1000 + SIZEOF_HEADERS;
+  .text ALIGN (0x20) :
+    {
+      *(.text)
+      ASCIZ "This is a string"
+      . = ALIGN(0x20);
+      align_label = .;
+      ASCIZ "This is another \nstring\123"
+      unalign_label = .;
+    }
+  .data : AT (0x10000) { *(.data) } >ram /* NO default AT>rom */
+  . = ALIGN(0x20);
+  .bss : { *(.bss) } >ram /* NO default AT>rom */
+  /DISCARD/ : { *(*) }
+}
diff --git a/ld/testsuite/ld-scripts/script.exp b/ld/testsuite/ld-scripts/script.exp
index e0af28134a7..a574dde034c 100644
--- a/ld/testsuite/ld-scripts/script.exp
+++ b/ld/testsuite/ld-scripts/script.exp
@@ -227,6 +227,7 @@ foreach test_script $test_script_list {
     run_dump_test [string range $test_script 0 end-2]
 }
 
+run_dump_test "asciz"
 run_dump_test "align-with-input"
 run_dump_test "pr20302"
 run_dump_test "output-section-types"
-- 
cgit v1.2.1