summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAarni Koskela <akx@iki.fi>2022-10-31 13:05:00 +0200
committerAarni Koskela <akx@iki.fi>2022-11-01 10:48:43 +0200
commit2e5708f8f231696afcbea3f5803ca43b1e33bf7f (patch)
tree7e62cf0a4ec2ff2fc13661d1e5a7f105ffdfd25d
parenta946ae6bad2701a021969d82b550ba6be1f5c7d7 (diff)
downloadbabel-2e5708f8f231696afcbea3f5803ca43b1e33bf7f.tar.gz
Use `ast` instead of `eval` for string extraction
This is safer (as we don't actually execute anything), and allows us to parse f-strings too. Closes #769 (supersedes it) Refs #715 (doesn't add an error yet, but doesn't crash on f-strings)
-rw-r--r--babel/messages/extract.py35
-rw-r--r--tests/messages/test_extract.py27
2 files changed, 53 insertions, 9 deletions
diff --git a/babel/messages/extract.py b/babel/messages/extract.py
index c95f1cb..74e57a1 100644
--- a/babel/messages/extract.py
+++ b/babel/messages/extract.py
@@ -15,7 +15,7 @@
:copyright: (c) 2013-2022 by the Babel Team.
:license: BSD, see LICENSE for more details.
"""
-
+import ast
import os
from os.path import relpath
import sys
@@ -487,14 +487,9 @@ def extract_python(fileobj, keywords, comment_tags, options):
if nested:
funcname = value
elif tok == STRING:
- # Unwrap quotes in a safe manner, maintaining the string's
- # encoding
- # https://sourceforge.net/tracker/?func=detail&atid=355470&
- # aid=617979&group_id=5470
- code = compile('# coding=%s\n%s' % (str(encoding), value),
- '<string>', 'eval', future_flags)
- value = eval(code, {'__builtins__': {}}, {})
- buf.append(value)
+ val = _parse_python_string(value, encoding, future_flags)
+ if val is not None:
+ buf.append(val)
elif tok == OP and value == ',':
if buf:
messages.append(''.join(buf))
@@ -516,6 +511,28 @@ def extract_python(fileobj, keywords, comment_tags, options):
funcname = value
+def _parse_python_string(value, encoding, future_flags):
+ # Unwrap quotes in a safe manner, maintaining the string's encoding
+ # https://sourceforge.net/tracker/?func=detail&atid=355470&aid=617979&group_id=5470
+ code = compile(
+ f'# coding={str(encoding)}\n{value}',
+ '<string>',
+ 'eval',
+ ast.PyCF_ONLY_AST | future_flags,
+ )
+ if isinstance(code, ast.Expression):
+ body = code.body
+ if isinstance(body, ast.Str):
+ return body.s
+ if isinstance(body, ast.JoinedStr): # f-string
+ if all(isinstance(node, ast.Str) for node in body.values):
+ return ''.join(node.s for node in body.values)
+ if all(isinstance(node, ast.Constant) for node in body.values):
+ return ''.join(str(node.value) for node in body.values)
+ # TODO: we could raise an error or warning when not all nodes are constants
+ return None
+
+
def extract_javascript(fileobj, keywords, comment_tags, options):
"""Extract messages from JavaScript source code.
diff --git a/tests/messages/test_extract.py b/tests/messages/test_extract.py
index 47fe306..3873191 100644
--- a/tests/messages/test_extract.py
+++ b/tests/messages/test_extract.py
@@ -528,3 +528,30 @@ nbsp = _('\xa0')
messages = list(extract.extract('python', buf,
extract.DEFAULT_KEYWORDS, [], {}))
assert messages[0][1] == u'\xa0'
+
+ def test_f_strings(self):
+ buf = BytesIO(br"""
+t1 = _('foobar')
+t2 = _(f'spameggs' f'feast') # should be extracted; constant parts only
+t2 = _(f'spameggs' 'kerroshampurilainen') # should be extracted (mixing f with no f)
+t3 = _(f'''whoa! a ''' # should be extracted (continues on following lines)
+f'flying shark'
+ '... hello'
+)
+t4 = _(f'spameggs {t1}') # should not be extracted
+""")
+ messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
+ assert len(messages) == 4
+ assert messages[0][1] == u'foobar'
+ assert messages[1][1] == u'spameggsfeast'
+ assert messages[2][1] == u'spameggskerroshampurilainen'
+ assert messages[3][1] == u'whoa! a flying shark... hello'
+
+ def test_f_strings_non_utf8(self):
+ buf = BytesIO(b"""
+# -- coding: latin-1 --
+t2 = _(f'\xe5\xe4\xf6' f'\xc5\xc4\xd6')
+""")
+ messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {}))
+ assert len(messages) == 1
+ assert messages[0][1] == u'åäöÅÄÖ'