Merge pull request #4303 from RedwanFox/fix_textfile_encoding_issue

set default encoding of written files to UTF-8, added ability to pass custom file encoding
author: William Deegan <bill@baddogconsulting.com> 2023-02-20 14:51:59 -0800
committer: GitHub <noreply@github.com> 2023-02-20 14:51:59 -0800
commit: 55b490b385948577a4a3a2338e48caa28780f6c7 (patch)
tree: f4de5bf39c9df5cd700dbeab67a21832cd7cee5b
parent: 4b97682fb76be5d42efc850564571a4c2c596d2a (diff)
parent: d65044711ecfee7c96fc673ec5e9fc57684ce8af (diff)
download: scons-git-55b490b385948577a4a3a2338e48caa28780f6c7.tar.gz
6 files changed, 36 insertions, 5 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index 4a2cb0d8e..6052a21a3 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -46,6 +46,11 @@ RELEASE  VERSION/DATE TO BE FILLED IN LATER
     - Added JAVAPROCESSORPATH construction variable which populates -processorpath.
     - Updated JavaScanner to scan JAVAPROCESSORPATH.
 
+  From Nickolai Korshunov
+    - Added FILE_ENCODING, to allow explicitly setting the text encoding for files
+      written by the Textfile() and Substfile() builders. If not specified, Textfile() and Substfile() builders
+      will write files as UTF-8. Fixed Issue #4302.
+
   From Dan Mezhiborsky:
     - Add newline to end of compilation db (compile_commands.json).
 
@@ -60,7 +65,6 @@ RELEASE  VERSION/DATE TO BE FILLED IN LATER
       Now configure checks now clear node info for non conftest nodes, so they will be re-evaluated for
       the real taskmaster run when the build commences.
 
-
   From Andrew Morrow
     - Avoid returning UniqueList for `children` and other `Executor` APIs. This type
       iterates more slowly than the builtin types. Also simplify uniquer_hashables to
diff --git a/RELEASE.txt b/RELEASE.txt
index 7a96cec03..91dbb862c 100644
--- a/RELEASE.txt
+++ b/RELEASE.txt
@@ -31,6 +31,9 @@ NEW FUNCTIONALITY
   not be called until all AddOption() calls are completed. Resolves Issue #4187
 - Added --experimental=tm_v2, which enables Andrew Morrow's NewParallel Job implementation.
   This should scale much better for highly parallel builds.  You can also enable this via SetOption().
+- Added FILE_ENCODING, to allow explicitly setting the text encoding for files
+  written by the Textfile() and Substfile() builders. If not specified, Textfile() and Substfile() builders
+  will write files as UTF-8.
 
 
 DEPRECATED FUNCTIONALITY
diff --git a/SCons/Tool/textfile.py b/SCons/Tool/textfile.py
index 7fdc8b7be..0ec31d8f7 100644
--- a/SCons/Tool/textfile.py
+++ b/SCons/Tool/textfile.py
@@ -117,9 +117,12 @@ def _action(target, source, env):
                 value = str(value)
             subs.append((k, value))
 
+    # Pull file encoding from the environment or default to UTF-8
+    file_encoding = env.get('FILE_ENCODING', 'utf-8')
+
     # write the file
     try:
-        target_file = open(target[0].get_path(), TEXTFILE_FILE_WRITE_MODE, newline='')
+        target_file = open(target[0].get_path(), TEXTFILE_FILE_WRITE_MODE, newline='', encoding=file_encoding)
     except (OSError, IOError) as e:
         raise SCons.Errors.UserError("Can't write target file %s [%s]" % (target[0],e))
 
@@ -186,6 +189,7 @@ def generate(env):
     env['BUILDERS']['Substfile'] = _subst_builder
     env['SUBSTFILEPREFIX'] = ''
     env['SUBSTFILESUFFIX'] = ''
+    env['FILE_ENCODING'] = env.get('FILE_ENCODING', 'utf-8')
 
 
 def exists(env):
diff --git a/SCons/Tool/textfile.xml b/SCons/Tool/textfile.xml
index f2e8bb89e..b019c687d 100644
--- a/SCons/Tool/textfile.xml
+++ b/SCons/Tool/textfile.xml
@@ -35,6 +35,7 @@ Set &consvars; for the &b-Textfile; and &b-Substfile; builders.
 <item>SUBSTFILESUFFIX</item>
 <item>TEXTFILEPREFIX</item>
 <item>TEXTFILESUFFIX</item>
+<item>FILE_ENCODING</item>
 </sets>
 <uses>
 <item>SUBST_DICT</item>
@@ -56,7 +57,7 @@ Nested lists of source strings
 are flattened.
 Source strings need not literally be Python strings:
 they can be Nodes or Python objects that convert cleanly
-to &f-link-Value; nodes
+to &f-link-Value; nodes.
 </para>
 
 <para>
@@ -64,6 +65,9 @@ The prefix and suffix specified by the &cv-link-TEXTFILEPREFIX;
 and &cv-link-TEXTFILESUFFIX; &consvars;
 (by default an empty string and <filename>.txt</filename>, respectively)
 are automatically added to the target if they are not already present.
+</para>
+<para>
+By default the target file encoding is "utf-8" and can be changed by &cv-link-FILE_ENCODING;
 Examples:
 </para>
 
@@ -126,6 +130,11 @@ are flattened. See also &b-link-Textfile;.
 </para>
 
 <para>
+By default the target file encoding is "utf-8" and can be changed by &cv-link-FILE_ENCODING;
+Examples:
+</para>
+
+<para>
 If a single source file name is specified and has a <filename>.in</filename> suffix,
 the suffix is stripped and the remainder of the name is used as the default target name.
 </para>
@@ -259,4 +268,13 @@ The suffix used for &b-link-Textfile; file names;
 </summary>
 </cvar>
 
+<cvar name="FILE_ENCODING">
+<summary>
+<para>
+File encoding used for files written by &b-link-Textfile; and &b-link-Substfile;.  Set to "utf-8" by default.
+<emphasis>Added in version  4.5.0.</emphasis>
+</para>
+</summary>
+</cvar>
+
 </sconsdoc>
diff --git a/test/textfile/fixture/SConstruct b/test/textfile/fixture/SConstruct
index 60e7225a0..b2466870f 100644
--- a/test/textfile/fixture/SConstruct
+++ b/test/textfile/fixture/SConstruct
@@ -2,7 +2,8 @@ DefaultEnvironment(tools=[])
 
 env = Environment(tools=['textfile'])
 data0 = ['Goethe', 'Schiller']
-data = ['lalala', 42, data0, 'tanteratei']
+data = ['lalala', 42, data0, 'tanteratei',
+        '×'] # <-- this is unicode /xd7 symbol
 
 env.Textfile('foo1', data)
 env.Textfile('foo2', data, LINESEPARATOR='|*')
diff --git a/test/textfile/textfile.py b/test/textfile/textfile.py
index a2d005cfb..f614dfc94 100644
--- a/test/textfile/textfile.py
+++ b/test/textfile/textfile.py
@@ -44,7 +44,8 @@ linesep = '\n'
 
 textparts = ['lalala', '42',
              'Goethe', 'Schiller',
-             'tanteratei']
+             'tanteratei',
+             '×']  # <-- this is unicode /xd7 symbol   
 foo1Text = linesep.join(textparts)
 foo2Text = '|*'.join(textparts)
 foo1aText = foo1Text + linesep
author	William Deegan <bill@baddogconsulting.com>	2023-02-20 14:51:59 -0800
committer	GitHub <noreply@github.com>	2023-02-20 14:51:59 -0800
commit	55b490b385948577a4a3a2338e48caa28780f6c7 (patch)
tree	f4de5bf39c9df5cd700dbeab67a21832cd7cee5b
parent	4b97682fb76be5d42efc850564571a4c2c596d2a (diff)
parent	d65044711ecfee7c96fc673ec5e9fc57684ce8af (diff)
download	scons-git-55b490b385948577a4a3a2338e48caa28780f6c7.tar.gz