summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohannes Wilm <johanneswilm@gmail.com>2023-01-06 21:18:35 +0100
committerGitHub <noreply@github.com>2023-01-06 22:18:35 +0200
commitd425f86a08d5f459d7380d7c196ecb33af564f5c (patch)
tree7b4258d4a2d24386aeddb05c70471f427fded2af
parent82c41ccda68da8322c96c1176936b07ad8af8f1b (diff)
downloadbabel-d425f86a08d5f459d7380d7c196ecb33af564f5c.tar.gz
Improved javascript template string expression extracting (#939)
Co-authored-by: Rik <gitaarik@posteo.net> Co-authored-by: Aarni Koskela <akx@iki.fi>
-rw-r--r--babel/messages/extract.py59
-rw-r--r--babel/messages/jslexer.py4
-rw-r--r--tests/messages/test_js_extract.py39
3 files changed, 94 insertions, 8 deletions
diff --git a/babel/messages/extract.py b/babel/messages/extract.py
index 4f0f649..c19dd5a 100644
--- a/babel/messages/extract.py
+++ b/babel/messages/extract.py
@@ -16,9 +16,10 @@
:license: BSD, see LICENSE for more details.
"""
import ast
+import io
import os
-from os.path import relpath
import sys
+from os.path import relpath
from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
from babel.util import parse_encoding, parse_future_flags, pathmatch
@@ -532,7 +533,7 @@ def _parse_python_string(value, encoding, future_flags):
return None
-def extract_javascript(fileobj, keywords, comment_tags, options):
+def extract_javascript(fileobj, keywords, comment_tags, options, lineno=1):
"""Extract messages from JavaScript source code.
:param fileobj: the seekable, file-like object the messages should be
@@ -544,7 +545,11 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
:param options: a dictionary of additional options (optional)
Supported options are:
* `jsx` -- set to false to disable JSX/E4X support.
- * `template_string` -- set to false to disable ES6 template string support.
+ * `template_string` -- if `True`, supports gettext(`key`)
+ * `parse_template_string` -- if `True` will parse the
+ contents of javascript
+ template strings.
+ :param lineno: line number offset (for parsing embedded fragments)
"""
from babel.messages.jslexer import Token, tokenize, unquote_string
funcname = message_lineno = None
@@ -556,12 +561,12 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
last_token = None
call_stack = -1
dotted = any('.' in kw for kw in keywords)
-
for token in tokenize(
fileobj.read().decode(encoding),
jsx=options.get("jsx", True),
template_string=options.get("template_string", True),
- dotted=dotted
+ dotted=dotted,
+ lineno=lineno
):
if ( # Turn keyword`foo` expressions into keyword("foo") calls:
funcname and # have a keyword...
@@ -573,7 +578,11 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
call_stack = 0
token = Token('operator', ')', token.lineno)
- if token.type == 'operator' and token.value == '(':
+ if options.get('parse_template_string') and not funcname and token.type == 'template_string':
+ for item in parse_template_string(token.value, keywords, comment_tags, options, token.lineno):
+ yield item
+
+ elif token.type == 'operator' and token.value == '(':
if funcname:
message_lineno = token.lineno
call_stack += 1
@@ -665,3 +674,41 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
funcname = token.value
last_token = token
+
+
+def parse_template_string(template_string, keywords, comment_tags, options, lineno=1):
+ """Parse JavaScript template string.
+
+ :param template_string: the template string to be parsed
+ :param keywords: a list of keywords (i.e. function names) that should be
+ recognized as translation functions
+ :param comment_tags: a list of translator tags to search for and include
+ in the results
+ :param options: a dictionary of additional options (optional)
+ :param lineno: starting line number (optional)
+ """
+ from babel.messages.jslexer import line_re
+ prev_character = None
+ level = 0
+ inside_str = False
+ expression_contents = ''
+ for character in template_string[1:-1]:
+ if not inside_str and character in ('"', "'", '`'):
+ inside_str = character
+ elif inside_str == character and prev_character != r'\\':
+ inside_str = False
+ if level:
+ expression_contents += character
+ if not inside_str:
+ if character == '{' and prev_character == '$':
+ level += 1
+ elif level and character == '}':
+ level -= 1
+ if level == 0 and expression_contents:
+ expression_contents = expression_contents[0:-1]
+ fake_file_obj = io.BytesIO(expression_contents.encode())
+ for item in extract_javascript(fake_file_obj, keywords, comment_tags, options, lineno):
+ yield item
+ lineno += len(line_re.findall(expression_contents))
+ expression_contents = ''
+ prev_character = character
diff --git a/babel/messages/jslexer.py b/babel/messages/jslexer.py
index 1264b2d..886f69d 100644
--- a/babel/messages/jslexer.py
+++ b/babel/messages/jslexer.py
@@ -151,17 +151,17 @@ def unquote_string(string):
return u''.join(result)
-def tokenize(source, jsx=True, dotted=True, template_string=True):
+def tokenize(source, jsx=True, dotted=True, template_string=True, lineno=1):
"""
Tokenize JavaScript/JSX source. Returns a generator of tokens.
:param jsx: Enable (limited) JSX parsing.
:param dotted: Read dotted names as single name token.
:param template_string: Support ES6 template strings
+ :param lineno: starting line number (optional)
"""
may_divide = False
pos = 0
- lineno = 1
end = len(source)
rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string)
diff --git a/tests/messages/test_js_extract.py b/tests/messages/test_js_extract.py
index 72c5211..95985c0 100644
--- a/tests/messages/test_js_extract.py
+++ b/tests/messages/test_js_extract.py
@@ -150,3 +150,42 @@ def test_template_string_tag_usage():
)
assert messages == [(1, 'Tag template, wow', [], None)]
+
+
+def test_inside_template_string():
+ buf = BytesIO(b"const msg = `${gettext('Hello')} ${user.name}`")
+ messages = list(
+ extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
+ )
+
+ assert messages == [(1, 'Hello', [], None)]
+
+
+def test_inside_template_string_with_linebreaks():
+ buf = BytesIO(b"""\
+const userName = gettext('Username')
+const msg = `${
+gettext('Hello')
+} ${userName} ${
+gettext('Are you having a nice day?')
+}`
+const msg2 = `${
+gettext('Howdy')
+} ${userName} ${
+gettext('Are you doing ok?')
+}`
+""")
+ messages = list(
+ extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
+ )
+
+ assert messages == [(1, 'Username', [], None), (3, 'Hello', [], None), (5, 'Are you having a nice day?', [], None), (8, 'Howdy', [], None), (10, 'Are you doing ok?', [], None)]
+
+
+def test_inside_nested_template_string():
+ buf = BytesIO(b"const msg = `${gettext('Greetings!')} ${ evening ? `${user.name}: ${gettext('This is a lovely evening.')}` : `${gettext('The day is really nice!')} ${user.name}`}`")
+ messages = list(
+ extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
+ )
+
+ assert messages == [(1, 'Greetings!', [], None), (1, 'This is a lovely evening.', [], None), (1, 'The day is really nice!', [], None)]