summaryrefslogtreecommitdiff
path: root/lib/parsetools
diff options
context:
space:
mode:
authorMarko Minđek <marko.mindek@invariant.hr>2023-01-30 00:05:30 +0100
committerMarko Minđek <marko.mindek@invariant.hr>2023-02-21 13:39:54 +0100
commit23014493ea15eb7753777d2ca1f664096263a76b (patch)
treed9799cf47248bdc470bb2fb1af45e5180d2e4e98 /lib/parsetools
parentbc6f6bc98e60c4dc46b0372b615ec0433f1d8abd (diff)
downloaderlang-23014493ea15eb7753777d2ca1f664096263a76b.tar.gz
leex column number support
Added: Support for including column number in token, end and error locations. Variables in Rules: TokenCol and TokenLoc Leex options: error_location and tab_size Fixed: Bug in testcase OTP14285
Diffstat (limited to 'lib/parsetools')
-rw-r--r--lib/parsetools/doc/src/leex.xml53
-rw-r--r--lib/parsetools/include/leexinc.hrl377
-rw-r--r--lib/parsetools/src/leex.erl190
-rw-r--r--lib/parsetools/test/leex_SUITE.erl334
4 files changed, 592 insertions, 362 deletions
diff --git a/lib/parsetools/doc/src/leex.xml b/lib/parsetools/doc/src/leex.xml
index d802e46b59..7714a7c27f 100644
--- a/lib/parsetools/doc/src/leex.xml
+++ b/lib/parsetools/doc/src/leex.xml
@@ -115,6 +115,22 @@
<p>Causes generated -file() attributes to only include
the basename of the file path.</p>
</item>
+ <tag><c>{error_location, line | column}</c></tag>
+ <item>
+ <p>If set to <c>column</c>, error location will be
+ <c>{Line,Column}</c> tuple instead of just <c>Line</c>. Also, <c>StartLoc</c>
+ and <c>EndLoc</c> in <c>string/2</c>, <c>token/3</c>, and <c>tokens/3</c>
+ functions will be <c>{Line,Column}</c> tuple instead of just <c>Line</c>.
+ Default is <c>line</c>. Note that you can use <c>TokenLoc</c> for token
+ location independently, even if the <c>error_location</c> is set to <c>line</c>.</p>
+ <p>Unicode characters are counted as many columns as they use bytes
+ to represent.</p>
+ </item>
+ <tag><c>{tab_size, pos_integer()}</c></tag>
+ <item>
+ <p>Sets the width of <c>\t</c> character (only relevant if <c>error_location</c>
+ is set to <c>column</c>). Default is <c>8</c>.</p>
+ </item>
</taglist>
<p>Any of the Boolean options can be set to <c>true</c> by
stating the name of the option. For example, <c>verbose</c>
@@ -147,17 +163,18 @@
</fsdescription>
<func>
<name since="">Module:string(String) -> StringRet</name>
- <name since="">Module:string(String, StartLine) -> StringRet</name>
+ <name since="">Module:string(String, StartLoc) -> StringRet</name>
<fsummary>Generated by Leex</fsummary>
<type>
<v>String = string()</v>
- <v>StringRet = {ok,Tokens,EndLine} | ErrorInfo</v>
+ <v>StringRet = {ok,Tokens,EndLoc} | ErrorInfo</v>
<v>Tokens = [Token]</v>
- <v>EndLine = StartLine = erl_anno:line()</v>
+ <v>StartLoc = EndLoc = erl_anno:location()</v>
</type>
<desc>
<p>Scans <c>String</c> and returns all the tokens in it, or an
- error.</p>
+ error. <c>StartLoc</c> and <c>EndLoc</c> are either <c>erl_anno:line()</c>
+ or <c>erl_anno:location()</c>, depending on the <c>error_location</c> option.</p>
<note><p>It is an error if not all of the characters in
<c>String</c> are consumed.</p></note>
</desc>
@@ -166,7 +183,7 @@
<func>
<name since="">Module:token(Cont, Chars) -> {more,Cont1} | {done,TokenRet,RestChars}
</name>
- <name since="">Module:token(Cont, Chars, StartLine) -> {more,Cont1}
+ <name since="">Module:token(Cont, Chars, StartLoc) -> {more,Cont1}
| {done,TokenRet,RestChars}
</name>
<fsummary>Generated by Leex</fsummary>
@@ -174,10 +191,10 @@
<v>Cont = [] | Cont1</v>
<v>Cont1 = tuple()</v>
<v>Chars = RestChars = string() | eof</v>
- <v>TokenRet = {ok, Token, EndLine}
- | {eof, EndLine}
+ <v>TokenRet = {ok, Token, EndLoc}
+ | {eof, EndLoc}
| ErrorInfo</v>
- <v>StartLine = EndLine = erl_anno:line()</v>
+ <v>StartLoc = EndLoc = erl_anno:location()</v>
</type>
<desc>
<p>This is a re-entrant call to try and scan one token from
@@ -193,7 +210,7 @@
but used through the i/o system where it can typically be
called in an application by:</p>
<code>
-io:request(InFile, {get_until,unicode,Prompt,Module,token,[Line]})
+io:request(InFile, {get_until,unicode,Prompt,Module,token,[Loc]})
-> TokenRet</code>
</desc>
</func>
@@ -201,7 +218,7 @@ io:request(InFile, {get_until,unicode,Prompt,Module,token,[Line]})
<func>
<name since="">Module:tokens(Cont, Chars) -> {more,Cont1} | {done,TokensRet,RestChars}
</name>
- <name since="">Module:tokens(Cont, Chars, StartLine) ->
+ <name since="">Module:tokens(Cont, Chars, StartLoc) ->
{more,Cont1} | {done,TokensRet,RestChars}
</name>
<fsummary>Generated by Leex</fsummary>
@@ -209,11 +226,11 @@ io:request(InFile, {get_until,unicode,Prompt,Module,token,[Line]})
<v>Cont = [] | Cont1</v>
<v>Cont1 = tuple()</v>
<v>Chars = RestChars = string() | eof</v>
- <v>TokensRet = {ok, Tokens, EndLine}
- | {eof, EndLine}
+ <v>TokensRet = {ok, Tokens, EndLoc}
+ | {eof, EndLoc}
| ErrorInfo</v>
<v>Tokens = [Token]</v>
- <v>StartLine = EndLine = erl_anno:line()</v>
+ <v>StartLoc = EndLoc = erl_anno:location()</v>
</type>
<desc>
<p>This is a re-entrant call to try and scan tokens from
@@ -240,7 +257,7 @@ io:request(InFile, {get_until,unicode,Prompt,Module,token,[Line]})
but used through the i/o system where it can typically be
called in an application by:</p>
<code>
-io:request(InFile, {get_until,unicode,Prompt,Module,tokens,[Line]})
+io:request(InFile, {get_until,unicode,Prompt,Module,tokens,[Loc]})
-> TokensRet</code>
</desc>
</func>
@@ -320,6 +337,14 @@ NAME = VALUE</code>
<tag><c>TokenLine</c></tag>
<item><p>The line number where the token occurred.</p>
</item>
+ <tag><c>TokenCol</c></tag>
+ <item><p>The column number where the token occurred
+ (column of the first character included in the token).</p>
+ </item>
+ <tag><c>TokenLoc</c></tag>
+ <item><p>Token location. Expands to <c>{TokenLine,TokenCol}</c> (even
+ when <c>error_location</c> is set to <c>line</c>.</p>
+ </item>
</taglist>
<p>The code must return:</p>
diff --git a/lib/parsetools/include/leexinc.hrl b/lib/parsetools/include/leexinc.hrl
index 8dfc42f479..a06584ff79 100644
--- a/lib/parsetools/include/leexinc.hrl
+++ b/lib/parsetools/include/leexinc.hrl
@@ -16,261 +16,269 @@
format_error({illegal,S}) -> ["illegal characters ",io_lib:write_string(S)];
format_error({user,S}) -> S.
-string(String) -> string(String, 1).
-
-string(String, Line) -> string(String, Line, String, []).
-
-%% string(InChars, Line, TokenChars, Tokens) ->
-%% {ok,Tokens,Line} | {error,ErrorInfo,Line}.
-%% Note the line number going into yystate, L0, is line of token
-%% start while line number returned is line of token end. We want line
-%% of token start.
-
-string([], L, [], Ts) -> % No partial tokens!
- {ok,yyrev(Ts),L};
-string(Ics0, L0, Tcs, Ts) ->
- case yystate(yystate(), Ics0, L0, 0, reject, 0) of
- {A,Alen,Ics1,L1} -> % Accepting end state
- string_cont(Ics1, L1, yyaction(A, Alen, Tcs, L0), Ts);
- {A,Alen,Ics1,L1,_S1} -> % Accepting transition state
- string_cont(Ics1, L1, yyaction(A, Alen, Tcs, L0), Ts);
- {reject,_Alen,Tlen,_Ics1,L1,_S1} -> % After a non-accepting state
- {error,{L0,?MODULE,{illegal,yypre(Tcs, Tlen+1)}},L1};
- {A,Alen,Tlen,_Ics1,L1,_S1} ->
+%% string(InChars) ->
+%% string(InChars, Loc) ->
+%% {ok,Tokens,EndLoc} | {error,ErrorInfo,EndLoc}.
+%% Loc is the starting location of the token, while EndLoc is the first not scanned
+%% location. Location is either Line or {Line,Column}, depending on the "error_location" option.
+
+##str
+
+do_string([], L, C, [], Ts) -> % No partial tokens!
+ {ok,yyrev(Ts),{L,C}};
+do_string(Ics0, L0, C0, Tcs, Ts) ->
+ case yystate(yystate(), Ics0, L0, C0, 0, reject, 0) of
+ {A,Alen,Ics1,L1,_C1} -> % Accepting end state
+ C2 = adjust_col(Tcs, Alen, C0),
+ string_cont(Ics1, L1, C2, yyaction(A, Alen, Tcs, L0, C0), Ts);
+ {A,Alen,Ics1,L1,_C1,_S1} -> % Accepting transition state
+ C2 = adjust_col(Tcs, Alen, C0),
+ string_cont(Ics1, L1, C2, yyaction(A, Alen, Tcs, L0, C0), Ts);
+ {reject,_Alen,Tlen,_Ics1,_L1,_C1,_S1} -> % After a non-accepting state
+ {error,{{L0, C0} ,?MODULE,{illegal,yypre(Tcs, Tlen+1)}},{L0, C0}};
+ {A,Alen,Tlen,_Ics1,L1, C1,_S1}->
Tcs1 = yysuf(Tcs, Alen),
L2 = adjust_line(Tlen, Alen, Tcs1, L1),
- string_cont(Tcs1, L2, yyaction(A, Alen, Tcs, L0), Ts)
+ C2 = adjust_col(Tcs, Alen, C1),
+ string_cont(Tcs1, L2, C2, yyaction(A, Alen, Tcs, L0,C0), Ts)
end.
-%% string_cont(RestChars, Line, Token, Tokens)
+%% string_cont(RestChars, Line, Col, Token, Tokens)
%% Test for and remove the end token wrapper. Push back characters
%% are prepended to RestChars.
--dialyzer({nowarn_function, string_cont/4}).
+-dialyzer({nowarn_function, string_cont/5}).
-string_cont(Rest, Line, {token,T}, Ts) ->
- string(Rest, Line, Rest, [T|Ts]);
-string_cont(Rest, Line, {token,T,Push}, Ts) ->
+string_cont(Rest, Line, Col, {token,T}, Ts) ->
+ do_string(Rest, Line, Col, Rest, [T|Ts]);
+string_cont(Rest, Line, Col, {token,T,Push}, Ts) ->
NewRest = Push ++ Rest,
- string(NewRest, Line, NewRest, [T|Ts]);
-string_cont(Rest, Line, {end_token,T}, Ts) ->
- string(Rest, Line, Rest, [T|Ts]);
-string_cont(Rest, Line, {end_token,T,Push}, Ts) ->
+ do_string(NewRest, Line, Col, NewRest, [T|Ts]);
+string_cont(Rest, Line, Col, {end_token,T}, Ts) ->
+ do_string(Rest, Line, Col, Rest, [T|Ts]);
+string_cont(Rest, Line, Col, {end_token,T,Push}, Ts) ->
NewRest = Push ++ Rest,
- string(NewRest, Line, NewRest, [T|Ts]);
-string_cont(Rest, Line, skip_token, Ts) ->
- string(Rest, Line, Rest, Ts);
-string_cont(Rest, Line, {skip_token,Push}, Ts) ->
+ do_string(NewRest, Line, Col, NewRest, [T|Ts]);
+string_cont(Rest, Line, Col, skip_token, Ts) ->
+ do_string(Rest, Line, Col, Rest, Ts);
+string_cont(Rest, Line, Col, {skip_token,Push}, Ts) ->
NewRest = Push ++ Rest,
- string(NewRest, Line, NewRest, Ts);
-string_cont(_Rest, Line, {error,S}, _Ts) ->
- {error,{Line,?MODULE,{user,S}},Line}.
+ do_string(NewRest, Line, Col, NewRest, Ts);
+string_cont(_Rest, Line, Col, {error,S}, _Ts) ->
+ {error,{{Line, Col},?MODULE,{user,S}},{Line,Col}}.
%% token(Continuation, Chars) ->
-%% token(Continuation, Chars, Line) ->
+%% token(Continuation, Chars, Loc) ->
%% {more,Continuation} | {done,ReturnVal,RestChars}.
%% Must be careful when re-entering to append the latest characters to the
%% after characters in an accept. The continuation is:
-%% {token,State,CurrLine,TokenChars,TokenLen,TokenLine,AccAction,AccLen}
+%% {token,State,CurrLine,CurrCol,TokenChars,TokenLen,TokenLine,TokenCol,AccAction,AccLen}
-token(Cont, Chars) -> token(Cont, Chars, 1).
+##tkn
-token([], Chars, Line) ->
- token(yystate(), Chars, Line, Chars, 0, Line, reject, 0);
-token({token,State,Line,Tcs,Tlen,Tline,Action,Alen}, Chars, _) ->
- token(State, Chars, Line, Tcs ++ Chars, Tlen, Tline, Action, Alen).
+do_token([], Chars, Line, Col) ->
+ token(yystate(), Chars, Line, Col, Chars, 0, Line, Col, reject, 0);
+do_token({token,State,Line,Col,Tcs,Tlen,Tline,Tcol,Action,Alen}, Chars, _, _) ->
+ token(State, Chars, Line, Col, Tcs ++ Chars, Tlen, Tline, Tcol, Action, Alen).
-%% token(State, InChars, Line, TokenChars, TokenLen, TokenLine,
+%% token(State, InChars, Line, Col, TokenChars, TokenLen, TokenLine, TokenCol
%% AcceptAction, AcceptLen) ->
%% {more,Continuation} | {done,ReturnVal,RestChars}.
%% The argument order is chosen to be more efficient.
-token(S0, Ics0, L0, Tcs, Tlen0, Tline, A0, Alen0) ->
- case yystate(S0, Ics0, L0, Tlen0, A0, Alen0) of
+token(S0, Ics0, L0, C0, Tcs, Tlen0, Tline, Tcol, A0, Alen0) ->
+ case yystate(S0, Ics0, L0, C0, Tlen0, A0, Alen0) of
%% Accepting end state, we have a token.
- {A1,Alen1,Ics1,L1} ->
- token_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline));
+ {A1,Alen1,Ics1,L1,C1} ->
+ C2 = adjust_col(Tcs, Alen1, C1),
+ token_cont(Ics1, L1, C2, yyaction(A1, Alen1, Tcs, Tline,Tcol));
%% Accepting transition state, can take more chars.
- {A1,Alen1,[],L1,S1} -> % Need more chars to check
- {more,{token,S1,L1,Tcs,Alen1,Tline,A1,Alen1}};
- {A1,Alen1,Ics1,L1,_S1} -> % Take what we got
- token_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline));
+ {A1,Alen1,[],L1,C1,S1} -> % Need more chars to check
+ {more,{token,S1,L1,C1,Tcs,Alen1,Tline,Tcol,A1,Alen1}};
+ {A1,Alen1,Ics1,L1,C1,_S1} -> % Take what we got
+ C2 = adjust_col(Tcs, Alen1, C1),
+ token_cont(Ics1, L1, C2, yyaction(A1, Alen1, Tcs, Tline,Tcol));
%% After a non-accepting state, maybe reach accept state later.
- {A1,Alen1,Tlen1,[],L1,S1} -> % Need more chars to check
- {more,{token,S1,L1,Tcs,Tlen1,Tline,A1,Alen1}};
- {reject,_Alen1,Tlen1,eof,L1,_S1} -> % No token match
+ {A1,Alen1,Tlen1,[],L1,C1,S1} -> % Need more chars to check
+ {more,{token,S1,L1,C1,Tcs,Tlen1,Tline,Tcol,A1,Alen1}};
+ {reject,_Alen1,Tlen1,eof,L1,C1,_S1} -> % No token match
%% Check for partial token which is error.
- Ret = if Tlen1 > 0 -> {error,{Tline,?MODULE,
+ Ret = if Tlen1 > 0 -> {error,{{Tline,Tcol},?MODULE,
%% Skip eof tail in Tcs.
- {illegal,yypre(Tcs, Tlen1)}},L1};
- true -> {eof,L1}
+ {illegal,yypre(Tcs, Tlen1)}},{L1,C1}};
+ true -> {eof,{L1,C1}}
end,
{done,Ret,eof};
- {reject,_Alen1,Tlen1,Ics1,L1,_S1} -> % No token match
- Error = {Tline,?MODULE,{illegal,yypre(Tcs, Tlen1+1)}},
- {done,{error,Error,L1},Ics1};
- {A1,Alen1,Tlen1,_Ics1,L1,_S1} -> % Use last accept match
+ {reject,_Alen1,Tlen1,Ics1,_L1,_C1,_S1} -> % No token match
+ Error = {{Tline,Tcol},?MODULE,{illegal,yypre(Tcs, Tlen1+1)}},
+ {done,{error,Error,{Tline,Tcol}},Ics1};
+ {A1,Alen1,Tlen1,_Ics1,L1,_C1,_S1} -> % Use last accept match
Tcs1 = yysuf(Tcs, Alen1),
L2 = adjust_line(Tlen1, Alen1, Tcs1, L1),
- token_cont(Tcs1, L2, yyaction(A1, Alen1, Tcs, Tline))
+ C2 = C0 + Alen1,
+ token_cont(Tcs1, L2, C2, yyaction(A1, Alen1, Tcs, Tline, Tcol))
end.
-%% token_cont(RestChars, Line, Token)
+%% token_cont(RestChars, Line, Col, Token)
%% If we have a token or error then return done, else if we have a
%% skip_token then continue.
--dialyzer({nowarn_function, token_cont/3}).
+-dialyzer({nowarn_function, token_cont/4}).
-token_cont(Rest, Line, {token,T}) ->
- {done,{ok,T,Line},Rest};
-token_cont(Rest, Line, {token,T,Push}) ->
+token_cont(Rest, Line, Col, {token,T}) ->
+ {done,{ok,T,{Line,Col}},Rest};
+token_cont(Rest, Line, Col, {token,T,Push}) ->
NewRest = Push ++ Rest,
- {done,{ok,T,Line},NewRest};
-token_cont(Rest, Line, {end_token,T}) ->
- {done,{ok,T,Line},Rest};
-token_cont(Rest, Line, {end_token,T,Push}) ->
+ {done,{ok,T,{Line,Col}},NewRest};
+token_cont(Rest, Line, Col, {end_token,T}) ->
+ {done,{ok,T,{Line,Col}},Rest};
+token_cont(Rest, Line, Col, {end_token,T,Push}) ->
NewRest = Push ++ Rest,
- {done,{ok,T,Line},NewRest};
-token_cont(Rest, Line, skip_token) ->
- token(yystate(), Rest, Line, Rest, 0, Line, reject, 0);
-token_cont(Rest, Line, {skip_token,Push}) ->
+ {done,{ok,T,{Line,Col}},NewRest};
+token_cont(Rest, Line, Col, skip_token) ->
+ token(yystate(), Rest, Line, Col, Rest, 0, Line, Col, reject, 0);
+token_cont(Rest, Line, Col, {skip_token,Push}) ->
NewRest = Push ++ Rest,
- token(yystate(), NewRest, Line, NewRest, 0, Line, reject, 0);
-token_cont(Rest, Line, {error,S}) ->
- {done,{error,{Line,?MODULE,{user,S}},Line},Rest}.
+ token(yystate(), NewRest, Line, Col, NewRest, 0, Line, Col, reject, 0);
+token_cont(Rest, Line, Col, {error,S}) ->
+ {done,{error,{{Line, Col},?MODULE,{user,S}},{Line, Col}},Rest}.
-%% tokens(Continuation, Chars, Line) ->
+%% tokens(Continuation, Chars) ->
+%% tokens(Continuation, Chars, Loc) ->
%% {more,Continuation} | {done,ReturnVal,RestChars}.
%% Must be careful when re-entering to append the latest characters to the
%% after characters in an accept. The continuation is:
-%% {tokens,State,CurrLine,TokenChars,TokenLen,TokenLine,Tokens,AccAction,AccLen}
-%% {skip_tokens,State,CurrLine,TokenChars,TokenLen,TokenLine,Error,AccAction,AccLen}
+%% {tokens,State,CurrLine,CurrCol,TokenChars,TokenLen,TokenLine,TokenCur,Tokens,AccAction,AccLen}
+%% {skip_tokens,State,CurrLine,CurrCol,TokenChars,TokenLen,TokenLine,TokenCur,Error,AccAction,AccLen}
-tokens(Cont, Chars) -> tokens(Cont, Chars, 1).
+##tks
-tokens([], Chars, Line) ->
- tokens(yystate(), Chars, Line, Chars, 0, Line, [], reject, 0);
-tokens({tokens,State,Line,Tcs,Tlen,Tline,Ts,Action,Alen}, Chars, _) ->
- tokens(State, Chars, Line, Tcs ++ Chars, Tlen, Tline, Ts, Action, Alen);
-tokens({skip_tokens,State,Line,Tcs,Tlen,Tline,Error,Action,Alen}, Chars, _) ->
- skip_tokens(State, Chars, Line, Tcs ++ Chars, Tlen, Tline, Error, Action, Alen).
+do_tokens([], Chars, Line, Col) ->
+ tokens(yystate(), Chars, Line, Col, Chars, 0, Line, Col, [], reject, 0);
+do_tokens({tokens,State,Line,Col,Tcs,Tlen,Tline,Tcol,Ts,Action,Alen}, Chars, _,_) ->
+ tokens(State, Chars, Line, Col, Tcs ++ Chars, Tlen, Tline, Tcol, Ts, Action, Alen);
+do_tokens({skip_tokens,State,Line, Col, Tcs,Tlen,Tline,Tcol,Error,Action,Alen}, Chars, _,_) ->
+ skip_tokens(State, Chars, Line, Col, Tcs ++ Chars, Tlen, Tline, Tcol, Error, Action, Alen).
-%% tokens(State, InChars, Line, TokenChars, TokenLen, TokenLine, Tokens,
+%% tokens(State, InChars, Line, Col, TokenChars, TokenLen, TokenLine, TokenCol,Tokens,
%% AcceptAction, AcceptLen) ->
%% {more,Continuation} | {done,ReturnVal,RestChars}.
-tokens(S0, Ics0, L0, Tcs, Tlen0, Tline, Ts, A0, Alen0) ->
- case yystate(S0, Ics0, L0, Tlen0, A0, Alen0) of
+tokens(S0, Ics0, L0, C0, Tcs, Tlen0, Tline, Tcol, Ts, A0, Alen0) ->
+ case yystate(S0, Ics0, L0, C0, Tlen0, A0, Alen0) of
%% Accepting end state, we have a token.
- {A1,Alen1,Ics1,L1} ->
- tokens_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline), Ts);
+ {A1,Alen1,Ics1,L1,C1} ->
+ C2 = adjust_col(Tcs, Alen1, C1),
+ tokens_cont(Ics1, L1, C2, yyaction(A1, Alen1, Tcs, Tline, Tcol), Ts);
%% Accepting transition state, can take more chars.
- {A1,Alen1,[],L1,S1} -> % Need more chars to check
- {more,{tokens,S1,L1,Tcs,Alen1,Tline,Ts,A1,Alen1}};
- {A1,Alen1,Ics1,L1,_S1} -> % Take what we got
- tokens_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline), Ts);
+ {A1,Alen1,[],L1,C1,S1} -> % Need more chars to check
+ {more,{tokens,S1,L1,C1,Tcs,Alen1,Tline,Tcol,Ts,A1,Alen1}};
+ {A1,Alen1,Ics1,L1,C1,_S1} -> % Take what we got
+ C2 = adjust_col(Tcs, Alen1, C1),
+ tokens_cont(Ics1, L1, C2, yyaction(A1, Alen1, Tcs, Tline,Tcol), Ts);
%% After a non-accepting state, maybe reach accept state later.
- {A1,Alen1,Tlen1,[],L1,S1} -> % Need more chars to check
- {more,{tokens,S1,L1,Tcs,Tlen1,Tline,Ts,A1,Alen1}};
- {reject,_Alen1,Tlen1,eof,L1,_S1} -> % No token match
+ {A1,Alen1,Tlen1,[],L1,C1,S1} -> % Need more chars to check
+ {more,{tokens,S1,L1,C1,Tcs,Tlen1,Tline,Tcol,Ts,A1,Alen1}};
+ {reject,_Alen1,Tlen1,eof,L1,C1,_S1} -> % No token match
%% Check for partial token which is error, no need to skip here.
- Ret = if Tlen1 > 0 -> {error,{Tline,?MODULE,
+ Ret = if Tlen1 > 0 -> {error,{{Tline,Tcol},?MODULE,
%% Skip eof tail in Tcs.
- {illegal,yypre(Tcs, Tlen1)}},L1};
- Ts == [] -> {eof,L1};
- true -> {ok,yyrev(Ts),L1}
+ {illegal,yypre(Tcs, Tlen1)}},{L1,C1}};
+ Ts == [] -> {eof,{L1,C1}};
+ true -> {ok,yyrev(Ts),{L1,C1}}
end,
{done,Ret,eof};
- {reject,_Alen1,Tlen1,_Ics1,L1,_S1} ->
+ {reject,_Alen1,Tlen1,_Ics1,L1,C1,_S1} ->
%% Skip rest of tokens.
- Error = {L1,?MODULE,{illegal,yypre(Tcs, Tlen1+1)}},
- skip_tokens(yysuf(Tcs, Tlen1+1), L1, Error);
- {A1,Alen1,Tlen1,_Ics1,L1,_S1} ->
- Token = yyaction(A1, Alen1, Tcs, Tline),
+ Error = {{L1,C1},?MODULE,{illegal,yypre(Tcs, Tlen1+1)}},
+ skip_tokens(yysuf(Tcs, Tlen1+1), L1, C1, Error);
+ {A1,Alen1,Tlen1,_Ics1,L1,_C1,_S1} ->
+ Token = yyaction(A1, Alen1, Tcs, Tline,Tcol),
Tcs1 = yysuf(Tcs, Alen1),
L2 = adjust_line(Tlen1, Alen1, Tcs1, L1),
- tokens_cont(Tcs1, L2, Token, Ts)
+ C2 = C0 + Alen1,
+ tokens_cont(Tcs1, L2, C2, Token, Ts)
end.
-%% tokens_cont(RestChars, Line, Token, Tokens)
+%% tokens_cont(RestChars, Line, Column, Token, Tokens)
%% If we have an end_token or error then return done, else if we have
%% a token then save it and continue, else if we have a skip_token
%% just continue.
--dialyzer({nowarn_function, tokens_cont/4}).
+-dialyzer({nowarn_function, tokens_cont/5}).
-tokens_cont(Rest, Line, {token,T}, Ts) ->
- tokens(yystate(), Rest, Line, Rest, 0, Line, [T|Ts], reject, 0);
-tokens_cont(Rest, Line, {token,T,Push}, Ts) ->
+tokens_cont(Rest, Line, Col, {token,T}, Ts) ->
+ tokens(yystate(), Rest, Line, Col, Rest, 0, Line, Col, [T|Ts], reject, 0);
+tokens_cont(Rest, Line, Col, {token,T,Push}, Ts) ->
NewRest = Push ++ Rest,
- tokens(yystate(), NewRest, Line, NewRest, 0, Line, [T|Ts], reject, 0);
-tokens_cont(Rest, Line, {end_token,T}, Ts) ->
- {done,{ok,yyrev(Ts, [T]),Line},Rest};
-tokens_cont(Rest, Line, {end_token,T,Push}, Ts) ->
+ tokens(yystate(), NewRest, Line, Col, NewRest, 0, Line, Col, [T|Ts], reject, 0);
+tokens_cont(Rest, Line, Col, {end_token,T}, Ts) ->
+ {done,{ok,yyrev(Ts, [T]),{Line,Col}},Rest};
+tokens_cont(Rest, Line, Col, {end_token,T,Push}, Ts) ->
NewRest = Push ++ Rest,
- {done,{ok,yyrev(Ts, [T]),Line},NewRest};
-tokens_cont(Rest, Line, skip_token, Ts) ->
- tokens(yystate(), Rest, Line, Rest, 0, Line, Ts, reject, 0);
-tokens_cont(Rest, Line, {skip_token,Push}, Ts) ->
+ {done,{ok,yyrev(Ts, [T]),{Line, Col}},NewRest};
+tokens_cont(Rest, Line, Col, skip_token, Ts) ->
+ tokens(yystate(), Rest, Line, Col, Rest, 0, Line, Col, Ts, reject, 0);
+tokens_cont(Rest, Line, Col, {skip_token,Push}, Ts) ->
NewRest = Push ++ Rest,
- tokens(yystate(), NewRest, Line, NewRest, 0, Line, Ts, reject, 0);
-tokens_cont(Rest, Line, {error,S}, _Ts) ->
- skip_tokens(Rest, Line, {Line,?MODULE,{user,S}}).
+ tokens(yystate(), NewRest, Line, Col, NewRest, 0, Line, Col, Ts, reject, 0);
+tokens_cont(Rest, Line, Col, {error,S}, _Ts) ->
+ skip_tokens(Rest, Line, Col, {{Line,Col},?MODULE,{user,S}}).
-%%skip_tokens(InChars, Line, Error) -> {done,{error,Error,Line},Ics}.
+%% skip_tokens(InChars, Line, Col, Error) -> {done,{error,Error,{Line,Col}},Ics}.
%% Skip tokens until an end token, junk everything and return the error.
-skip_tokens(Ics, Line, Error) ->
- skip_tokens(yystate(), Ics, Line, Ics, 0, Line, Error, reject, 0).
+skip_tokens(Ics, Line, Col, Error) ->
+ skip_tokens(yystate(), Ics, Line, Col, Ics, 0, Line, Col, Error, reject, 0).
-%% skip_tokens(State, InChars, Line, TokenChars, TokenLen, TokenLine, Tokens,
+%% skip_tokens(State, InChars, Line, Col, TokenChars, TokenLen, TokenLine, TokenCol, Tokens,
%% AcceptAction, AcceptLen) ->
%% {more,Continuation} | {done,ReturnVal,RestChars}.
-skip_tokens(S0, Ics0, L0, Tcs, Tlen0, Tline, Error, A0, Alen0) ->
- case yystate(S0, Ics0, L0, Tlen0, A0, Alen0) of
- {A1,Alen1,Ics1,L1} -> % Accepting end state
- skip_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline), Error);
- {A1,Alen1,[],L1,S1} -> % After an accepting state
- {more,{skip_tokens,S1,L1,Tcs,Alen1,Tline,Error,A1,Alen1}};
- {A1,Alen1,Ics1,L1,_S1} ->
- skip_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline), Error);
- {A1,Alen1,Tlen1,[],L1,S1} -> % After a non-accepting state
- {more,{skip_tokens,S1,L1,Tcs,Tlen1,Tline,Error,A1,Alen1}};
- {reject,_Alen1,_Tlen1,eof,L1,_S1} ->
- {done,{error,Error,L1},eof};
- {reject,_Alen1,Tlen1,_Ics1,L1,_S1} ->
- skip_tokens(yysuf(Tcs, Tlen1+1), L1, Error);
- {A1,Alen1,Tlen1,_Ics1,L1,_S1} ->
- Token = yyaction(A1, Alen1, Tcs, Tline),
+skip_tokens(S0, Ics0, L0, C0, Tcs, Tlen0, Tline, Tcol, Error, A0, Alen0) ->
+ case yystate(S0, Ics0, L0, C0, Tlen0, A0, Alen0) of
+ {A1,Alen1,Ics1,L1, C1} -> % Accepting end state
+ skip_cont(Ics1, L1, C1, yyaction(A1, Alen1, Tcs, Tline, Tcol), Error);
+ {A1,Alen1,[],L1,C1, S1} -> % After an accepting state
+ {more,{skip_tokens,S1,L1,C1,Tcs,Alen1,Tline,Tcol,Error,A1,Alen1}};
+ {A1,Alen1,Ics1,L1,C1,_S1} ->
+ skip_cont(Ics1, L1, C1, yyaction(A1, Alen1, Tcs, Tline, Tcol), Error);
+ {A1,Alen1,Tlen1,[],L1,C1,S1} -> % After a non-accepting state
+ {more,{skip_tokens,S1,L1,C1,Tcs,Tlen1,Tline,Tcol,Error,A1,Alen1}};
+ {reject,_Alen1,_Tlen1,eof,L1,C1,_S1} ->
+ {done,{error,Error,{L1,C1}},eof};
+ {reject,_Alen1,Tlen1,_Ics1,L1,C1,_S1} ->
+ skip_tokens(yysuf(Tcs, Tlen1+1), L1, C1,Error);
+ {A1,Alen1,Tlen1,_Ics1,L1,C1,_S1} ->
+ Token = yyaction(A1, Alen1, Tcs, Tline, Tcol),
Tcs1 = yysuf(Tcs, Alen1),
L2 = adjust_line(Tlen1, Alen1, Tcs1, L1),
- skip_cont(Tcs1, L2, Token, Error)
+ skip_cont(Tcs1, L2, C1, Token, Error)
end.
-%% skip_cont(RestChars, Line, Token, Error)
+%% skip_cont(RestChars, Line, Col, Token, Error)
%% Skip tokens until we have an end_token or error then return done
%% with the original rror.
--dialyzer({nowarn_function, skip_cont/4}).
+-dialyzer({nowarn_function, skip_cont/5}).
-skip_cont(Rest, Line, {token,_T}, Error) ->
- skip_tokens(yystate(), Rest, Line, Rest, 0, Line, Error, reject, 0);
-skip_cont(Rest, Line, {token,_T,Push}, Error) ->
+skip_cont(Rest, Line, Col, {token,_T}, Error) ->
+ skip_tokens(yystate(), Rest, Line, Col, Rest, 0, Line, Col, Error, reject, 0);
+skip_cont(Rest, Line, Col, {token,_T,Push}, Error) ->
NewRest = Push ++ Rest,
- skip_tokens(yystate(), NewRest, Line, NewRest, 0, Line, Error, reject, 0);
-skip_cont(Rest, Line, {end_token,_T}, Error) ->
- {done,{error,Error,Line},Rest};
-skip_cont(Rest, Line, {end_token,_T,Push}, Error) ->
+ skip_tokens(yystate(), NewRest, Line, Col, NewRest, 0, Line, Col, Error, reject, 0);
+skip_cont(Rest, Line, Col, {end_token,_T}, Error) ->
+ {done,{error,Error,{Line,Col}},Rest};
+skip_cont(Rest, Line, Col, {end_token,_T,Push}, Error) ->
NewRest = Push ++ Rest,
- {done,{error,Error,Line},NewRest};
-skip_cont(Rest, Line, skip_token, Error) ->
- skip_tokens(yystate(), Rest, Line, Rest, 0, Line, Error, reject, 0);
-skip_cont(Rest, Line, {skip_token,Push}, Error) ->
+ {done,{error,Error,{Line,Col}},NewRest};
+skip_cont(Rest, Line, Col, skip_token, Error) ->
+ skip_tokens(yystate(), Rest, Line, Col, Rest, 0, Line, Col, Error, reject, 0);
+skip_cont(Rest, Line, Col, {skip_token,Push}, Error) ->
NewRest = Push ++ Rest,
- skip_tokens(yystate(), NewRest, Line, NewRest, 0, Line, Error, reject, 0);
-skip_cont(Rest, Line, {error,_S}, Error) ->
- skip_tokens(yystate(), Rest, Line, Rest, 0, Line, Error, reject, 0).
+ skip_tokens(yystate(), NewRest, Line, Col, NewRest, 0, Line, Col, Error, reject, 0);
+skip_cont(Rest, Line, Col, {error,_S}, Error) ->
+ skip_tokens(yystate(), Rest, Line, Col, Rest, 0, Line, Col, Error, reject, 0).
-compile({nowarn_unused_function, [yyrev/1, yyrev/2, yypre/2, yysuf/2]}).
@@ -292,21 +300,44 @@ adjust_line(T, A, [$\n|Cs], L) ->
adjust_line(T, A, [_|Cs], L) ->
adjust_line(T-1, A, Cs, L).
+%% adjust_col(Chars, AcceptLength, Col) -> NewCol
+%% Handle newlines, tabs and unicode chars.
+adjust_col(_, 0, Col) ->
+ Col;
+adjust_col([$\n | R], L, _) ->
+ adjust_col(R, L-1, 1);
+adjust_col([$\t | R], L, Col) ->
+ adjust_col(R, L-1, tab_forward(Col)+1);
+adjust_col([C | R], L, Col) when C>=0 andalso C=< 16#7F ->
+ adjust_col(R, L-1, Col+1);
+adjust_col([C | R], L, Col) when C>= 16#80 andalso C=< 16#7FF ->
+ adjust_col(R, L-1, Col+2);
+adjust_col([C | R], L, Col) when C>= 16#800 andalso C=< 16#FFFF ->
+ adjust_col(R, L-1, Col+3);
+adjust_col([C | R], L, Col) when C>= 16#10000 andalso C=< 16#10FFFF ->
+ adjust_col(R, L-1, Col+4).
+
+tab_forward(C) ->
+ D = C rem tab_size(),
+ A = tab_size()-D,
+ C+A.
+
+##tab_size
+
%% yystate() -> InitialState.
-%% yystate(State, InChars, Line, CurrTokLen, AcceptAction, AcceptLen) ->
-%% {Action, AcceptLen, RestChars, Line} |
-%% {Action, AcceptLen, RestChars, Line, State} |
-%% {reject, AcceptLen, CurrTokLen, RestChars, Line, State} |
-%% {Action, AcceptLen, CurrTokLen, RestChars, Line, State}.
+%% yystate(State, InChars, Line, Col, CurrTokLen, AcceptAction, AcceptLen) ->
+%% {Action, AcceptLen, RestChars, Line, Col} |
+%% {Action, AcceptLen, RestChars, Line, Col, State} |
+%% {reject, AcceptLen, CurrTokLen, RestChars, Line, Col, State} |
+%% {Action, AcceptLen, CurrTokLen, RestChars, Line, Col, State}.
%% Generated state transition functions. The non-accepting end state
%% return signal either an unrecognised character or end of current
%% input.
##dfa
-%% yyaction(Action, TokenLength, TokenChars, TokenLine) ->
+%% yyaction(Action, TokenLength, TokenChars, TokenLine, TokenCol) ->
%% {token,Token} | {end_token, Token} | skip_token | {error,String}.
%% Generated action function.
##actions
-
diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl
index b764678516..c5e61b8149 100644
--- a/lib/parsetools/src/leex.erl
+++ b/lib/parsetools/src/leex.erl
@@ -119,6 +119,8 @@ file(File) -> file(File, []).
| {'verbose', boolean()}
| {'warnings_as_errors', boolean()}
| {'deterministic', boolean()}
+ | {'error_location', line | column}
+ | {'tab_size', pos_integer()}
| 'dfa_graph'
| 'report_errors' | 'report_warnings' | 'report'
| 'return_errors' | 'return_warnings' | 'return'
@@ -281,6 +283,12 @@ check_options([{Option, Boolean} | Options], AllOptions, L)
false ->
badarg
end;
+check_options([{error_location, Loc}=O | Options], AllOptions, L)
+ when Loc =:= line; Loc =:= column ->
+ check_options(Options, AllOptions, [O | L]);
+check_options([{tab_size, S}=O | Options], AllOptions, L)
+ when is_integer(S) andalso S>0 ->
+ check_options(Options, AllOptions, [O | L]);
check_options([], _AllOptions, L) ->
L;
check_options(_Options, _, _L) ->
@@ -289,7 +297,7 @@ check_options(_Options, _, _L) ->
all_options() ->
[dfa_graph,includefile,report_errors,report_warnings,
return_errors,return_warnings,scannerfile,verbose,
- warnings_as_errors, deterministic].
+ warnings_as_errors,deterministic,error_location,tab_size].
default_option(dfa_graph) -> false;
default_option(includefile) -> [];
@@ -300,7 +308,9 @@ default_option(return_warnings) -> false;
default_option(scannerfile) -> [];
default_option(verbose) -> false;
default_option(warnings_as_errors) -> false;
-default_option(deterministic) -> false.
+default_option(deterministic) -> false;
+default_option(error_location) -> line;
+default_option(tab_size) -> 8.
atom_option(dfa_graph) -> {dfa_graph,true};
atom_option(report_errors) -> {report_errors,true};
@@ -596,7 +606,9 @@ parse_rule(S, Line, Atoks, Ms, N, St) ->
TokenChars = var_used('TokenChars', Atoks),
TokenLen = var_used('TokenLen', Atoks),
TokenLine = var_used('TokenLine', Atoks),
- {ok,{R,N},{N,Atoks,TokenChars,TokenLen,TokenLine},St};
+ TokenCol = var_used('TokenCol', Atoks),
+ TokenLoc = var_used('TokenLoc', Atoks),
+ {ok,{R,N},{N,Atoks,TokenChars,TokenLen,TokenLine,TokenCol,TokenLoc},St};
{error,E} ->
add_error({Line,leex,E}, St)
end.
@@ -1415,6 +1427,10 @@ out_file(Ifile, Ofile, St, DFA, DF, Actions, Code, L) ->
case string:slice(Line, 0, 5) of
"##mod" -> out_module(Ofile, St);
"##cod" -> out_erlang_code(Ofile, St, Code, L);
+ "##str" -> out_string(Ofile, St#leex.opts);
+ "##tkn" -> out_token(Ofile, St#leex.opts);
+ "##tks" -> out_tokens(Ofile, St#leex.opts);
+ "##tab" -> out_tab_size(Ofile, St#leex.opts);
"##dfa" -> out_dfa(Ofile, St, DFA, Code, DF, L);
"##act" -> out_actions(Ofile, St#leex.xfile, Deterministic, Actions);
_ -> io:put_chars(Ofile, Line)
@@ -1440,6 +1456,92 @@ out_erlang_code(File, St, Code, L) ->
io:nl(File),
output_file_directive(File, St#leex.ifile, Deterministic, L).
+out_tab_size(File, Opts) ->
+ Size = proplists:get_value(tab_size, Opts),
+ io:fwrite(File, "tab_size() -> ~p.\n", [Size]).
+
+%% Exclude column number if needed
+out_string(File, Opts) ->
+ out_string_1(File, Opts),
+ out_string_2(File, Opts),
+ Vars = lists:join(", ",["Ics","L0","C0","Tcs","Ts"]),
+ out_head(File,string,Vars),
+ EL = proplists:get_value(error_location, Opts),
+ case EL of
+ column ->
+ io:fwrite(File," do_string(~s).\n",[Vars]);
+ line ->
+ io:fwrite(File," case do_string(~s) of\n",[Vars]),
+ io:fwrite(File," {ok, T, {L,_}} -> {ok, T, L};\n",[]),
+ io:fwrite(File," {error, {{EL,_},M,D}, {L,_}} ->\n",[]),
+ io:fwrite(File," EI = {EL,M,D},\n",[]),
+ io:fwrite(File," {error, EI, L}\n",[]),
+ io:fwrite(File," end.\n",[])
+ end.
+
+out_string_1(File, Opts) ->
+ out_head(File,string,"Ics"),
+ EL = proplists:get_value(error_location, Opts),
+ DefLoc = case EL of
+ column -> "{1,1}";
+ line -> "1"
+ end,
+ io:fwrite(File," string(~s).\n",["Ics,"++DefLoc]).
+
+out_string_2(File, Opts) ->
+ EL = proplists:get_value(error_location, Opts),
+ case EL of
+ column ->
+ out_head(File,string,"Ics,{L0,C0}"),
+ CallVars = lists:join(", ", ["Ics","L0","C0","Ics","[]"]),
+ io:fwrite(File," string(~s).\n",[CallVars]);
+ line ->
+ out_head(File,string,"Ics,L0"),
+ CallVars = lists:join(", ", ["Ics","L0","1","Ics","[]"]),
+ io:fwrite(File," string(~s).\n",[CallVars])
+ end.
+
+out_token(File, Opts) ->
+ out_tokens_wrapper(File, Opts, token).
+
+out_tokens(File, Opts) ->
+ out_tokens_wrapper(File, Opts, tokens).
+
+out_tokens_wrapper(File, Opts, Fun) ->
+ out_token_2(File, Opts, Fun),
+ EL = proplists:get_value(error_location, Opts),
+ case EL of
+ column ->
+ VarsCol = lists:join(", ",["Cont","Chars","{Line,Col}"]),
+ out_head(File, Fun, VarsCol),
+ io:fwrite(File," do_~s(~s).\n",[Fun,"Cont,Chars,Line,Col"]);
+ line ->
+ VarsCol = lists:join(", ",["Cont","Chars","Line"]),
+ out_head(File, Fun, VarsCol),
+ io:fwrite(File," case do_~s(~s) of\n",[Fun,"Cont,Chars,Line,1"]),
+ io:fwrite(File," {more, _} = C -> C;\n",[]),
+ io:fwrite(File," {done, Ret0, R} ->\n",[]),
+ io:fwrite(File," Ret1 = case Ret0 of\n",[]),
+ io:fwrite(File," {ok, T, {L,_}} -> {ok, T, L};\n",[]),
+ io:fwrite(File," {eof, {L,_}} -> {eof, L};\n",[]),
+ io:fwrite(File," {error, {{EL,_},M,D},{L,_}} -> {error, {EL,M,D},L}\n",[]),
+ io:fwrite(File," end,\n",[]),
+ io:fwrite(File," {done, Ret1, R}\n",[]),
+ io:fwrite(File," end.\n",[])
+ end.
+
+out_token_2(File, Opts, Fun) ->
+ out_head(File, Fun, "Cont,Chars"),
+ EL = proplists:get_value(error_location, Opts),
+ DefLoc = case EL of
+ column -> "{1,1}";
+ line -> "1"
+ end,
+ io:fwrite(File," ~s(~s).\n",[Fun,"Cont,Chars,"++DefLoc]).
+
+out_head(File, Fun, Vars) ->
+ io:fwrite(File, "~s(~s) -> \n",[Fun,Vars]).
+
file_copy(From, To) ->
case io:get_line(From, leex) of
eof -> ok;
@@ -1455,36 +1557,36 @@ out_dfa(File, St, DFA, Code, DF, L) ->
output_file_directive(File, St#leex.efile, Deterministic, L+(NCodeLines-1)+3),
io:fwrite(File, "yystate() -> ~w.~n~n", [DF]),
foreach(fun (S) -> out_trans(File, S) end, DFA),
- io:fwrite(File, "yystate(S, Ics, Line, Tlen, Action, Alen) ->~n", []),
- io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,S}.~n", []).
+ io:fwrite(File, "yystate(S, Ics, Line, Col, Tlen, Action, Alen) ->~n", []),
+ io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,Col,S}.~n", []).
out_trans(File, #dfa_state{no=N,trans=[],accept={accept,A}}) ->
%% Accepting end state, guaranteed done.
- io:fwrite(File, "yystate(~w, Ics, Line, Tlen, _, _) ->~n", [N]),
- io:fwrite(File, " {~w,Tlen,Ics,Line};~n", [A]);
+ io:fwrite(File, "yystate(~w, Ics, Line, Col, Tlen, _, _) ->~n", [N]),
+ io:fwrite(File, " {~w,Tlen,Ics,Line,Col};~n", [A]);
out_trans(File, #dfa_state{no=N,trans=Tr,accept={accept,A}}) ->
%% Accepting state, but there maybe more.
foreach(fun (T) -> out_accept_tran(File, N, A, T) end, pack_trans(Tr)),
- io:fwrite(File, "yystate(~w, Ics, Line, Tlen, _, _) ->~n", [N]),
- io:fwrite(File, " {~w,Tlen,Ics,Line,~w};~n", [A,N]);
+ io:fwrite(File, "yystate(~w, Ics, Line, Col, Tlen, _, _) ->~n", [N]),
+ io:fwrite(File, " {~w,Tlen,Ics,Line,Col,~w};~n", [A,N]);
out_trans(File, #dfa_state{no=N,trans=Tr,accept=noaccept}) ->
%% Non-accepting transition state.
foreach(fun (T) -> out_noaccept_tran(File, N, T) end, pack_trans(Tr)),
- io:fwrite(File, "yystate(~w, Ics, Line, Tlen, Action, Alen) ->~n", [N]),
- io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,~w};~n", [N]).
+ io:fwrite(File, "yystate(~w, Ics, Line, Col, Tlen, Action, Alen) ->~n", [N]),
+ io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,Col,~w};~n", [N]).
out_accept_tran(File, N, A, {{Cf,maxchar},S}) ->
out_accept_head_max(File, N, Cf),
- out_accept_body(File, S, "Line", A);
+ out_accept_body(File, S, "Line", "Col", A);
out_accept_tran(File, N, A, {{Cf,Cl},S}) ->
out_accept_head_range(File, N, Cf, Cl),
- out_accept_body(File, S, "Line", A);
+ out_accept_body(File, S, "Line", "Col", A);
out_accept_tran(File, N, A, {$\n,S}) ->
out_accept_head_1(File, N, $\n),
- out_accept_body(File, S, "Line+1", A);
+ out_accept_body(File, S, "Line+1", "1", A);
out_accept_tran(File, N, A, {C,S}) ->
out_accept_head_1(File, N, C),
- out_accept_body(File, S, "Line", A).
+ out_accept_body(File, S, "Line", "Col", A).
out_accept_head_1(File, State, Char) ->
out_head_1(File, State, Char, "_", "_").
@@ -1495,21 +1597,21 @@ out_accept_head_max(File, State, Min) ->
out_accept_head_range(File, State, Min, Max) ->
out_head_range(File, State, Min, Max, "_", "_").
-out_accept_body(File, Next, Line, Action) ->
- out_body(File, Next, Line, io_lib:write(Action), "Tlen").
+out_accept_body(File, Next, Line, Col, Action) ->
+ out_body(File, Next, Line, Col, io_lib:write(Action), "Tlen").
out_noaccept_tran(File, N, {{Cf,maxchar},S}) ->
out_noaccept_head_max(File, N, Cf),
- out_noaccept_body(File, S, "Line");
+ out_noaccept_body(File, S, "Line", "Col");
out_noaccept_tran(File, N, {{Cf,Cl},S}) ->
out_noaccept_head_range(File, N, Cf, Cl),
- out_noaccept_body(File, S, "Line");
+ out_noaccept_body(File, S, "Line", "Col");
out_noaccept_tran(File, N, {$\n,S}) ->
out_noaccept_head_1(File, N, $\n),
- out_noaccept_body(File, S, "Line+1");
+ out_noaccept_body(File, S, "Line+1", "1");
out_noaccept_tran(File, N, {C,S}) ->
out_noaccept_head_1(File, N, C),
- out_noaccept_body(File, S, "Line").
+ out_noaccept_body(File, S, "Line", "Col").
out_noaccept_head_1(File, State, Char) ->
out_head_1(File, State, Char, "Action", "Alen").
@@ -1520,24 +1622,27 @@ out_noaccept_head_max(File, State, Min) ->
out_noaccept_head_range(File, State, Min, Max) ->
out_head_range(File, State, Min, Max, "Action", "Alen").
-out_noaccept_body(File, Next, Line) ->
- out_body(File, Next, Line, "Action", "Alen").
+out_noaccept_body(File, Next, Line, Col) ->
+ out_body(File, Next, Line, Col, "Action", "Alen").
+out_head_1(File, State, Char = $\n, Action, Alen) ->
+ io:fwrite(File, "yystate(~w, [~w|Ics], Line, _, Tlen, ~s, ~s) ->\n",
+ [State,Char,Action,Alen]);
out_head_1(File, State, Char, Action, Alen) ->
- io:fwrite(File, "yystate(~w, [~w|Ics], Line, Tlen, ~s, ~s) ->\n",
+ io:fwrite(File, "yystate(~w, [~w|Ics], Line, Col, Tlen, ~s, ~s) ->\n",
[State,Char,Action,Alen]).
out_head_max(File, State, Min, Action, Alen) ->
- io:fwrite(File, "yystate(~w, [C|Ics], Line, Tlen, ~s, ~s) when C >= ~w ->\n",
+ io:fwrite(File, "yystate(~w, [C|Ics], Line, Col, Tlen, ~s, ~s) when C >= ~w ->\n",
[State,Action,Alen,Min]).
out_head_range(File, State, Min, Max, Action, Alen) ->
- io:fwrite(File, "yystate(~w, [C|Ics], Line, Tlen, ~s, ~s) when C >= ~w, C =< ~w ->\n",
+ io:fwrite(File, "yystate(~w, [C|Ics], Line, Col, Tlen, ~s, ~s) when C >= ~w, C =< ~w ->\n",
[State,Action,Alen,Min,Max]).
-out_body(File, Next, Line, Action, Alen) ->
- io:fwrite(File, " yystate(~w, Ics, ~s, Tlen+1, ~s, ~s);\n",
- [Next,Line,Action,Alen]).
+out_body(File, Next, Line, Col, Action, Alen) ->
+ io:fwrite(File, " yystate(~w, Ics, ~s, ~s, Tlen+1, ~s, ~s);\n",
+ [Next,Line,Col,Action,Alen]).
%% pack_trans([{Crange,State}]) -> [{Crange,State}] when
%% Crange = {Char,Char} | Char.
@@ -1581,31 +1686,32 @@ pack_trans([], Pt) -> Pt.
out_actions(File, XrlFile, Deterministic, As) ->
As1 = prep_out_actions(As),
foreach(fun (A) -> out_action(File, A) end, As1),
- io:fwrite(File, "yyaction(_, _, _, _) -> error.~n", []),
+ io:fwrite(File, "yyaction(_, _, _, _, _) -> error.~n", []),
foreach(fun (A) -> out_action_code(File, XrlFile, Deterministic, A) end, As1).
prep_out_actions(As) ->
map(fun ({A,empty_action}) ->
{A,empty_action};
- ({A,Code,TokenChars,TokenLen,TokenLine}) ->
+ ({A,Code,TokenChars,TokenLen,TokenLine,TokenCol,TokenLoc}) ->
Vs = [{TokenChars,"TokenChars"},
{TokenLen,"TokenLen"},
- {TokenLine,"TokenLine"},
+ {TokenLine or TokenLoc,"TokenLine"},
+ {TokenCol or TokenLoc,"TokenCol"},
{TokenChars,"YYtcs"},
{TokenLen or TokenChars,"TokenLen"}],
Vars = [if F -> S; true -> "_" end || {F,S} <- Vs],
Name = list_to_atom(lists:concat([yyaction_,A])),
- [Chars,Len,Line,_,_] = Vars,
- Args = [V || V <- [Chars,Len,Line], V =/= "_"],
+ [Chars,Len,Line,Col,_,_] = Vars,
+ Args = [V || V <- [Chars,Len,Line,Col], V =/= "_"],
ArgsChars = lists:join(", ", Args),
- {A,Code,Vars,Name,Args,ArgsChars}
+ {A,Code,Vars,Name,Args,ArgsChars, TokenLoc}
end, As).
out_action(File, {A,empty_action}) ->
- io:fwrite(File, "yyaction(~w, _, _, _) -> skip_token;~n", [A]);
-out_action(File, {A,_Code,Vars,Name,_Args,ArgsChars}) ->
- [_,_,Line,Tcs,Len] = Vars,
- io:fwrite(File, "yyaction(~w, ~s, ~s, ~s) ->~n", [A,Len,Tcs,Line]),
+ io:fwrite(File, "yyaction(~w, _, _, _, _) -> skip_token;~n", [A]);
+out_action(File, {A,_Code,Vars,Name,_Args,ArgsChars,_TokenLoc}) ->
+ [_,_,Line,Col,Tcs,Len] = Vars,
+ io:fwrite(File, "yyaction(~w, ~s, ~s, ~s, ~s) ->~n", [A,Len,Tcs,Line,Col]),
if
Tcs =/= "_" ->
io:fwrite(File, " TokenChars = yypre(YYtcs, TokenLen),~n", []);
@@ -1615,13 +1721,17 @@ out_action(File, {A,_Code,Vars,Name,_Args,ArgsChars}) ->
out_action_code(_File, _XrlFile, _Deterministic, {_A,empty_action}) ->
ok;
-out_action_code(File, XrlFile, Deterministic, {_A,Code,_Vars,Name,Args,ArgsChars}) ->
+out_action_code(File, XrlFile, Deterministic, {_A,Code,_Vars,Name,Args,ArgsChars, TokenLoc}) ->
%% Should set the file to the .erl file, but instead assumes that
%% ?LEEXINC is syntactically correct.
io:fwrite(File, "\n-compile({inline,~w/~w}).\n", [Name, length(Args)]),
L = erl_scan:line(hd(Code)),
output_file_directive(File, XrlFile, Deterministic, L-2),
io:fwrite(File, "~s(~s) ->~n", [Name, ArgsChars]),
+ if
+ TokenLoc -> io:fwrite(File," TokenLoc={TokenLine,TokenCol},~n",[]);
+ true -> ok
+ end,
io:fwrite(File, " ~ts\n", [pp_tokens(Code, L, File)]).
%% pp_tokens(Tokens, Line, File) -> [char()].
diff --git a/lib/parsetools/test/leex_SUITE.erl b/lib/parsetools/test/leex_SUITE.erl
index 8d7e44629c..92e74034b5 100644
--- a/lib/parsetools/test/leex_SUITE.erl
+++ b/lib/parsetools/test/leex_SUITE.erl
@@ -45,7 +45,7 @@
pt/1, man/1, ex/1, ex2/1, not_yet/1,
line_wrap/1,
otp_10302/1, otp_11286/1, unicode/1, otp_13916/1, otp_14285/1,
- otp_17023/1, compiler_warnings/1]).
+ otp_17023/1, compiler_warnings/1, column_support/1]).
% Default timetrap timeout (set in init_per_testcase).
-define(default_timeout, test_server:minutes(1)).
@@ -66,7 +66,7 @@ all() ->
groups() ->
[{checks, [], [file, compile, syntax, deterministic]},
- {examples, [], [pt, man, ex, ex2, not_yet, unicode]},
+ {examples, [], [pt, man, ex, ex2, not_yet, unicode, column_support]},
{tickets, [], [otp_10302, otp_11286, otp_13916, otp_14285, otp_17023,
compiler_warnings]},
{bugs, [], [line_wrap]}].
@@ -118,6 +118,17 @@ file(Config) when is_list(Config) ->
{'EXIT', {badarg, _}} =
(catch leex:file(Filename, includefile)),
+ {'EXIT', {badarg, _}} =
+ (catch leex:file(Filename, {tab_size,0})),
+ {'EXIT', {badarg, _}} =
+ (catch leex:file(Filename, {tab_size,"4"})),
+ {'EXIT', {badarg, _}} =
+ (catch leex:file(Filename, {tab_size,3.5})),
+ {'EXIT', {badarg, _}} =
+ (catch leex:file(Filename, {error_location,{line,column}})),
+ {'EXIT', {badarg, _}} =
+ (catch leex:file(Filename, {error_location,col})),
+
Mini = <<"Definitions.\n"
"D = [0-9]\n"
"Rules.\n"
@@ -417,17 +428,17 @@ pt(Config) when is_list(Config) ->
"L = [a-z]\n"
"Rules.\n"
- "{L}+ : {token,{word,TokenLine,TokenChars}}.\n"
+ "{L}+ : {token,{word,TokenLoc,TokenChars}}.\n"
"abc{D}+ : {skip_token,\"sture\" ++ string:substr(TokenChars, 4)}.\n"
- "{D}+ : {token,{integer,TokenLine,list_to_integer(TokenChars)}}.\n"
+ "{D}+ : {token,{integer,TokenLoc,list_to_integer(TokenChars)}}.\n"
"\\s : .\n"
"\\r\\n : {end_token,{crlf,TokenLine}}.\n"
"Erlang code.\n"
"-export([t/0]).\n"
"t() ->
- {ok,[{word,1,\"sture\"},{integer,1,123}],1} =
- string(\"abc123\"), ok. ">>,
+ {ok,[{word,{1,7},\"sture\"},{integer,{1,12},123}],{1,15}} =
+ string(\"abc123\"), ok. ">>,
default,
ok}],
@@ -442,10 +453,10 @@ unicode(Config) when is_list(Config) ->
"Definitions.\n"
"RTLarrow = (←)\n"
"Rules.\n"
- "{RTLarrow} : {token,{\"←\",TokenLine}}.\n"
+ "{RTLarrow} : {token,{\"←\",TokenLoc}}.\n"
"Erlang code.\n"
"-export([t/0]).\n"
- "t() -> {ok, [{\"←\", 1}], 1} = string(\"←\"), ok.">>,
+ "t() -> {ok, [{\"←\", {1,1}}], {1,4}} = string(\"←\"), ok.">>,
default,
ok}],
@@ -460,34 +471,33 @@ man(Config) when is_list(Config) ->
<<"Definitions.\n"
"Rules.\n"
"[a-z][0-9a-zA-Z_]* :\n"
- " {token,{atom,TokenLine,list_to_atom(TokenChars)}}.\n"
+ " {token,{atom,TokenLoc,list_to_atom(TokenChars)}}.\n"
"[A-Z_][0-9a-zA-Z_]* :\n"
- " {token,{var,TokenLine,list_to_atom(TokenChars)}}.\n"
+ " {token,{var,TokenLoc,list_to_atom(TokenChars)}}.\n"
"(\\+|-)?[0-9]+\\.[0-9]+((E|e)(\\+|-)?[0-9]+)? : \n"
- " {token,{float,TokenLine,list_to_float(TokenChars)}}.\n"
+ " {token,{float,TokenLoc,list_to_float(TokenChars)}}.\n"
"\\s : skip_token.\n"
"Erlang code.\n"
"-export([t/0]).\n"
"t() ->\n"
- " {ok,[{float,1,3.14},{atom,1,atom},{var,1,'V314'}],1} =\n"
+ " {ok,[{float,{1,1},3.14},{atom,{1,5},atom},{var,{1,10},'V314'}],{1,14}} =\n"
" string(\"3.14atom V314\"),\n"
" ok.\n">>,
default,
ok},
-
- {man_2,
+ {man_2,
<<"Definitions.\n"
"D = [0-9]\n"
"Rules.\n"
"{D}+ :\n"
- " {token,{integer,TokenLine,list_to_integer(TokenChars)}}.\n"
+ " {token,{integer,TokenLoc,list_to_integer(TokenChars)}}.\n"
"{D}+\\.{D}+((E|e)(\\+|\\-)?{D}+)? :\n"
- " {token,{float,TokenLine,list_to_float(TokenChars)}}.\n"
+ " {token,{float,TokenLoc,list_to_float(TokenChars)}}.\n"
"\\s : skip_token.\n"
"Erlang code.\n"
"-export([t/0]).\n"
"t() ->\n"
- " {ok,[{float,1,3.14},{integer,1,314}],1} = \n"
+ " {ok,[{float,{1,1},3.14},{integer,{1,6},314}],{1,9}} = \n"
" string(\"3.14 314\"),\n"
" ok.\n">>,
default,
@@ -505,13 +515,13 @@ ex(Config) when is_list(Config) ->
"D = [0-543-705-982]\n"
"Rules.\n"
"{D}+ :\n"
- " {token,{integer,TokenLine,list_to_integer(TokenChars)}}.\n"
+ " {token,{integer,TokenLoc,list_to_integer(TokenChars)}}.\n"
"[^235]+ :\n"
- " {token,{list_to_atom(TokenChars),TokenLine}}.\n"
+ " {token,{list_to_atom(TokenChars),TokenLoc}}.\n"
"Erlang code.\n"
"-export([t/0]).\n"
"t() ->\n"
- " {ok,[{integer,1,12},{' c\\na',1},{integer,2,34},{b789a,2}],2} =\n"
+ " {ok,[{integer,{1,1},12},{' c\\na',{1,3}},{integer,{2,2},34},{b789a,{2,4}}],{2,9}} =\n"
" string(\"12 c\\na34b789a\"),\n"
" ok.\n">>,
default,
@@ -528,7 +538,7 @@ ex(Config) when is_list(Config) ->
"Erlang code.\n"
"-export([t/0]).\n"
"t() ->\n"
- " {ok,[chars,zyx],1} = string(\"abcdef zyx123\"),\n"
+ " {ok,[chars,zyx],{1,14}} = string(\"abcdef zyx123\"),\n"
" ok.\n">>,
default,
ok},
@@ -541,7 +551,7 @@ ex(Config) when is_list(Config) ->
"Erlang code.\n"
"-export([t/0]).\n"
"t() ->\n"
- " {ok,[],1} = string(\"\"), ok.\n">>, % string("a") would loop...
+ " {ok,[],{1,1}} = string(\"\"), ok.\n">>, % string("a") would loop...
default,
ok},
@@ -574,12 +584,12 @@ ex(Config) when is_list(Config) ->
"Erlang code.\n"
"-export([t/0]).\n"
"t() ->\n"
- " {ok,[{white,\"\\b\\f\"}],1} = string(\"\\b\\f\"),\n"
- " {ok,[{form,\"ff\\f\"}],1} = string(\"ff\\f\"),\n"
- " {ok,[{string,\"\\\"foo\\\"\"}],1} = string(\"\\\"foo\\\"\"),\n"
- " {ok,[{char,\"$.\"}],1} = string(\"$\\.\"),\n"
- " {ok,[{list,\"[a,b,c]\"}],1} = string(\"[a,b,c]\"),\n"
- " {ok,[{other,\"$^\\\\\"}],1} = string(\"$^\\\\\"),\n"
+ " {ok,[{white,\"\\b\\f\"}],{1,3}} = string(\"\\b\\f\"),\n"
+ " {ok,[{form,\"ff\\f\"}],{1,4}} = string(\"ff\\f\"),\n"
+ " {ok,[{string,\"\\\"foo\\\"\"}],{1,6}} = string(\"\\\"foo\\\"\"),\n"
+ " {ok,[{char,\"$.\"}],{1,3}} = string(\"$\\.\"),\n"
+ " {ok,[{list,\"[a,b,c]\"}],{1,8}} = string(\"[a,b,c]\"),\n"
+ " {ok,[{other,\"$^\\\\\"}],{1,4}} = string(\"$^\\\\\"),\n"
" ok.\n">>,
default,
ok},
@@ -607,7 +617,7 @@ ex(Config) when is_list(Config) ->
"Erlang code.\n"
"-export([t/0]).\n"
"t() ->\n"
- " {ok,[{hex,[17,171,48,172]}],1} =\n"
+ " {ok,[{hex,[17,171,48,172]}],{1,7}} =\n"
" string(\"\\x{11}\\xab0\\xac\"),\n"
" ok.\n">>,
default,
@@ -637,47 +647,47 @@ WS = ([\\000-\\s]|%.*)
Rules.
{D}+\\.{D}+((E|e)(\\+|\\-)?{D}+)? :
- {token,{float,TokenLine,list_to_float(TokenChars)}}.
-{D}+#{H}+ : base(TokenLine, TokenChars).
-{D}+ : {token,{integer,TokenLine,list_to_integer(TokenChars)}}.
+ {token,{float,TokenLoc,list_to_float(TokenChars)}}.
+{D}+#{H}+ : base(TokenLoc, TokenChars).
+{D}+ : {token,{integer,TokenLoc,list_to_integer(TokenChars)}}.
{L}{A}* : Atom = list_to_atom(TokenChars),
{token,case reserved_word(Atom) of
- true -> {Atom,TokenLine};
- false -> {atom,TokenLine,Atom}
+ true -> {Atom,TokenLoc};
+ false -> {atom,TokenLoc,Atom}
end}.
'(\\\\\\^.|\\\\.|[^'])*' :
%% Strip quotes.
S = lists:sublist(TokenChars, 2, TokenLen - 2),
case catch list_to_atom(string_gen(S)) of
{'EXIT',_} -> {error,\"illegal atom \" ++ TokenChars};
- Atom -> {token,{atom,TokenLine,Atom}}
+ Atom -> {token,{atom,TokenLoc,Atom}}
end.
-({U}|_){A}* : {token,{var,TokenLine,list_to_atom(TokenChars)}}.
+({U}|_){A}* : {token,{var,TokenLoc,list_to_atom(TokenChars)}}.
\"(\\\\\\^.|\\\\.|[^\"])*\" :
%% Strip quotes.
S = lists:sublist(TokenChars, 2, TokenLen - 2),
- {token,{string,TokenLine,string_gen(S)}}.
+ {token,{string,TokenLoc,string_gen(S)}}.
\\$(\\\\{O}{O}{O}|\\\\\\^.|\\\\.|.) :
- {token,{char,TokenLine,cc_convert(TokenChars)}}.
--> : {token,{'->',TokenLine}}.
-:- : {token,{':-',TokenLine}}.
-\\|\\| : {token,{'||',TokenLine}}.
-<- : {token,{'<-',TokenLine}}.
-\\+\\+ : {token,{'++',TokenLine}}.
--- : {token,{'--',TokenLine}}.
-=/= : {token,{'=/=',TokenLine}}.
-== : {token,{'==',TokenLine}}.
-=:= : {token,{'=:=',TokenLine}}.
-/= : {token,{'/=',TokenLine}}.
->= : {token,{'>=',TokenLine}}.
-=< : {token,{'=<',TokenLine}}.
-<= : {token,{'<=',TokenLine}}.
-<< : {token,{'<<',TokenLine}}.
->> : {token,{'>>',TokenLine}}.
-:: : {token,{'::',TokenLine}}.
+ {token,{char,TokenLoc,cc_convert(TokenChars)}}.
+-> : {token,{'->',TokenLoc}}.
+:- : {token,{':-',TokenLoc}}.
+\\|\\| : {token,{'||',TokenLoc}}.
+<- : {token,{'<-',TokenLoc}}.
+\\+\\+ : {token,{'++',TokenLoc}}.
+-- : {token,{'--',TokenLoc}}.
+=/= : {token,{'=/=',TokenLoc}}.
+== : {token,{'==',TokenLoc}}.
+=:= : {token,{'=:=',TokenLoc}}.
+/= : {token,{'/=',TokenLoc}}.
+>= : {token,{'>=',TokenLoc}}.
+=< : {token,{'=<',TokenLoc}}.
+<= : {token,{'<=',TokenLoc}}.
+<< : {token,{'<<',TokenLoc}}.
+>> : {token,{'>>',TokenLoc}}.
+:: : {token,{'::',TokenLoc}}.
[]()[}{|!?/;:,.*+#<>=-] :
- {token,{list_to_atom(TokenChars),TokenLine}}.
-\\.{WS} : {end_token,{dot,TokenLine}}.
+ {token,{list_to_atom(TokenChars),TokenLoc}}.
+\\.{WS} : {end_token,{dot,TokenLoc}}.
{WS}+ : skip_token.
Erlang code.
@@ -775,7 +785,7 @@ escape_char(C) -> C.
XrlFile = filename:join(Dir, "erlang_scan.xrl"),
ok = file:write_file(XrlFile, Xrl),
ErlFile = filename:join(Dir, "erlang_scan.erl"),
- {ok, _} = leex:file(XrlFile, []),
+ {ok, _} = leex:file(XrlFile, [{error_location, column}]),
{ok, _} = compile:file(ErlFile, [{outdir,Dir}]),
code:purge(erlang_scan),
AbsFile = filename:rootname(ErlFile, ".erl"),
@@ -785,79 +795,79 @@ escape_char(C) -> C.
erlang_scan:tokens(Cont, Chars, Location)
end,
F1 = fun(Cont, Chars, Location) ->
- erlang_scan:token(Cont, Chars, Location)
- end,
+ erlang_scan:token(Cont, Chars, Location)
+ end,
fun() ->
S = "ab cd. ",
- {ok, Ts, 1} = scan_tokens_1(S, F, 1),
- {ok, Ts, 1} = scan_token_1(S, F1, 1),
- {ok, Ts, 1} = scan_tokens(S, F, 1),
- {ok, Ts, 1} = erlang_scan:string(S, 1)
+ {ok, Ts, {1,8}} = scan_tokens_1(S, F, {1,1}),
+ {ok, Ts, {1,8}} = scan_token_1(S, F1, {1,1}),
+ {ok, Ts, {1,8}} = scan_tokens(S, F, {1,1}),
+ {ok, Ts, {1,8}} = erlang_scan:string(S, {1,1})
end(),
fun() ->
S = "'ab\n cd'. ",
- {ok, Ts, 2} = scan_tokens_1(S, F, 1),
- {ok, Ts, 2} = scan_token_1(S, F1, 1),
- {ok, Ts, 2} = scan_tokens(S, F, 1),
- {ok, Ts, 2} = erlang_scan:string(S, 1)
+ {ok, Ts, {2,7}} = scan_tokens_1(S, F, {1,1}),
+ {ok, Ts, {2,7}} = scan_token_1(S, F1, {1,1}),
+ {ok, Ts, {2,7}} = scan_tokens(S, F, {1,1}),
+ {ok, Ts, {2,7}} = erlang_scan:string(S, {1,1})
end(),
fun() ->
S = "99. ",
- {ok, Ts, 1} = scan_tokens_1(S, F, 1),
- {ok, Ts, 1} = scan_token_1(S, F1, 1),
- {ok, Ts, 1} = scan_tokens(S, F, 1),
- {ok, Ts, 1} = erlang_scan:string(S, 1)
+ {ok, Ts, {1,5}} = scan_tokens_1(S, F, {1,1}),
+ {ok, Ts, {1,5}} = scan_token_1(S, F1, {1,1}),
+ {ok, Ts, {1,5}} = scan_tokens(S, F, {1,1}),
+ {ok, Ts, {1,5}} = erlang_scan:string(S, {1,1})
end(),
- {ok,[{integer,1,99},{dot,1}],1} = erlang_scan:string("99. "),
+ {ok,[{integer,{1,1},99},{dot,{1,3}}],{1,5}} = erlang_scan:string("99. "),
fun() ->
Atom = "'" ++ lists:duplicate(1000,$a) ++ "'",
S = Atom ++ ". ",
Reason = "illegal atom " ++ Atom,
- Err = {error,{1,erlang_scan,{user,Reason}},1},
- {done,Err,[]} = scan_tokens_1(S, F, 1),
- {done,Err,[]} = scan_token_1(S, F1, 1),
- {done,Err,[]} = scan_tokens(S, F, 1),
- Err = erlang_scan:string(S, 1)
+ Err = {error,{{1,1003},erlang_scan,{user,Reason}},{1,1003}},
+ {done,Err,[]} = scan_tokens_1(S, F, {1,1}),
+ {done,Err,[]} = scan_token_1(S, F1, {1,1}),
+ {done,Err,[]} = scan_tokens(S, F, {1,1}),
+ Err = erlang_scan:string(S, {1,1})
end(),
fun() ->
S = "\x{aaa}. ",
- Err = {error,{1,erlang_scan,{illegal,[2730]}},1},
- {done,Err,[]} = scan_tokens_1(S, F, 1),
- {done,Err,[_]} = scan_token_1(S, F1, 1), % Note: Rest non-empty
- {done,Err,[]} = scan_tokens(S, F, 1),
- Err = erlang_scan:string(S, 1)
+ Err = {error,{{1,1},erlang_scan,{illegal,[2730]}},{1,1}},
+ {done,Err,[]} = scan_tokens_1(S, F, {1,1}),
+ {done,Err,[_]} = scan_token_1(S, F1, {1,1}), % Note: Rest non-empty
+ {done,Err,[]} = scan_tokens(S, F, {1,1}),
+ Err = erlang_scan:string(S, {1,1})
end(),
fun() ->
S = "\x{aaa} + 1. 34",
- Err = {error,{1,erlang_scan,{illegal,[2730]}},1},
- {done,Err,[]} = scan_tokens_1(S, F, 1),
- {done,Err,[_]} = scan_token_1(S, F1, 1), % Note: Rest non-empty
- {done,Err,"34"} = scan_tokens(S, F, 1),
- Err = erlang_scan:string(S, 1)
+ Err = {error,{{1,1},erlang_scan,{illegal,[2730]}},{1,1}},
+ {done,Err,[]} = scan_tokens_1(S, F, {1,1}),
+ {done,Err,[_]} = scan_token_1(S, F1, {1,1}), % Note: Rest non-empty
+ {done,Err,"34"} = scan_tokens(S, F, {1,1}),
+ Err = erlang_scan:string(S, {1,1})
end(),
fun() ->
S = "\x{aaa} \x{bbb}. 34",
- Err = {error,{1,erlang_scan,{illegal,[2730]}},1},
- {done,Err,[]} = scan_tokens_1(S, F, 1),
- {done,Err,[_]} = scan_token_1(S, F1, 1), % Note: Rest non-empty
- {done,Err,"34"} = scan_tokens(S, F, 1),
- Err = erlang_scan:string(S, 1)
+ Err = {error,{{1,1},erlang_scan,{illegal,[2730]}},{1,1}},
+ {done,Err,[]} = scan_tokens_1(S, F, {1,1}),
+ {done,Err,[_]} = scan_token_1(S, F1, {1,1}), % Note: Rest non-empty
+ {done,Err,"34"} = scan_tokens(S, F, {1,1}),
+ Err = erlang_scan:string(S, {1,1})
end(),
fun() ->
S = "\x{aaa} 18#34. 34",
- Err = {error,{1,erlang_scan,{illegal,[2730]}},1},
- {done,Err,[]} = scan_tokens_1(S, F, 1),
- {done,Err,[_]} = scan_token_1(S, F1, 1), % Note: Rest non-empty
- {done,Err,"34"} = scan_tokens(S, F, 1),
- Err = erlang_scan:string(S, 1)
+ Err = {error,{{1,1},erlang_scan,{illegal,[2730]}},{1,1}},
+ {done,Err,[]} = scan_tokens_1(S, F, {1,1}),
+ {done,Err,[_]} = scan_token_1(S, F1, {1,1}), % Note: Rest non-empty
+ {done,Err,"34"} = scan_tokens(S, F, {1,1}),
+ Err = erlang_scan:string(S, {1,1})
end(),
fun() ->
S = "\x{aaa}"++eof,
- Err = {error,{1,erlang_scan,{illegal,[2730]}},1},
- {done,Err,eof} = scan_tokens_1(S, F, 1),
- {done,Err,[_]} = scan_token_1(S, F1, 1), % Note: Rest non-empty
- {done,Err,eof} = scan_tokens(S, F, 1),
- Err = erlang_scan:string(S, 1)
+ Err = {error,{{1,1},erlang_scan,{illegal,[2730]}},{1,1}},
+ {done,Err,eof} = scan_tokens_1(S, F, {1,1}),
+ {done,Err,[_]} = scan_token_1(S, F1, {1,1}), % Note: Rest non-empty
+ {done,Err,eof} = scan_tokens(S, F, {1,1}),
+ Err = erlang_scan:string(S, {1,1})
end(),
ok.
@@ -912,8 +922,8 @@ line_wrap(Config) when is_list(Config) ->
<<"
Definitions.
Rules.
-[a]+[\\n]*= : {token, {first, TokenLine}}.
-[a]+ : {token, {second, TokenLine}}.
+[a]+[\\n]*= : {token, {first, TokenLoc}}.
+[a]+ : {token, {second, TokenLoc}}.
[\\s\\r\\n\\t]+ : skip_token.
Erlang code.
">>,
@@ -928,20 +938,20 @@ Erlang code.
code:load_abs(AbsFile, test_line_wrap),
fun() ->
S = "aaa\naaa",
- {ok,[{second,1},{second,2}],2} = test_line_wrap:string(S)
+ {ok,[{second,{1,1}},{second,{2,1}}],2} = test_line_wrap:string(S)
end(),
fun() ->
S = "aaa\naaa",
- {ok,[{second,3},{second,4}],4} = test_line_wrap:string(S, 3)
+ {ok,[{second,{3,1}},{second,{4,1}}],4} = test_line_wrap:string(S, 3)
end(),
fun() ->
- {done,{ok,{second,1},1},"\na"} = test_line_wrap:token([], "a\na"),
+ {done,{ok,{second,{1,1}},1},"\na"} = test_line_wrap:token([], "a\na"),
{more,Cont1} = test_line_wrap:token([], "\na"),
- {done,{ok,{second,2},2},eof} = test_line_wrap:token(Cont1, eof)
+ {done,{ok,{second,{2,1}},2},eof} = test_line_wrap:token(Cont1, eof)
end(),
fun() ->
{more,Cont1} = test_line_wrap:tokens([], "a\na"),
- {done,{ok,[{second,1},{second,2}],2},eof} = test_line_wrap:tokens(Cont1, eof)
+ {done,{ok,[{second,{1,1}},{second,{2,1}}],2},eof} = test_line_wrap:tokens(Cont1, eof)
end(),
ok.
@@ -1044,7 +1054,7 @@ otp_10302(Config) when is_list(Config) ->
"-export([t/0]).\n"
"t() ->\n"
" %% Häpp, 'Häpp',\"\\x{400}B\",\"örn_Ѐ\"\n"
- " {ok, [R], 1} = string(\"tip\"),\n"
+ " {ok, [R], {1,4}} = string(\"tip\"),\n"
" {tip,foo,'Häpp',[1024,66],[246,114,110,95,1024]} = R,\n"
" Häpp = foo,\n"
" {tip, Häpp, 'Häpp',\"\\x{400}B\",\"örn_Ѐ\"} = R,\n"
@@ -1065,7 +1075,7 @@ otp_10302(Config) when is_list(Config) ->
"-export([t/0]).\n"
"t() ->\n"
" %% Häpp, 'Häpp',\"\\x{400}B\",\"örn_Ѐ\"\n"
- " {ok, [R], 1} = string(\"tip\"),\n"
+ " {ok, [R], {1,4}} = string(\"tip\"),\n"
" {tip,foo,'Häpp',[1024,66],[195,182,114,110,95,208,128]} = R,\n"
" Häpp = foo,\n"
" {tip, Häpp, 'Häpp',\"\\x{400}B\",\"örn_Ѐ\"} = R,\n"
@@ -1139,23 +1149,23 @@ otp_13916(Config) when is_list(Config) ->
"Rules.\n"
"%% mark line break(s) and empty lines by token 'break'\n"
"%% in order to use as delimiters\n"
- "{B}({S}*{B})+ : {token, {break, TokenLine}}.\n"
- "{B} : {token, {break, TokenLine}}.\n"
- "{S}+ : {token, {blank, TokenLine, TokenChars}}.\n"
- "{W}+ : {token, {word, TokenLine, TokenChars}}.\n"
+ "{B}({S}*{B})+ : {token, {break, TokenLoc}}.\n"
+ "{B} : {token, {break, TokenLoc}}.\n"
+ "{S}+ : {token, {blank, TokenLoc, TokenChars}}.\n"
+ "{W}+ : {token, {word, TokenLoc, TokenChars}}.\n"
"Erlang code.\n"
"-export([t/0]).\n"
"t() ->\n"
- " {ok,[{break,1},{blank,4,\" \"},{word,4,\"breaks\"}],4} =\n"
+ " {ok,[{break,{1,1}},{blank,{4,1},\" \"},{word,{4,3},\"breaks\"}],{4,9}} =\n"
" string(\"\\n\\n \\n breaks\"),\n"
- " {ok,[{break,1},{word,4,\"works\"}],4} =\n"
+ "{ok,[{break,{1,1}},{word,{4,1},\"works\"}],{4,6}} =\n"
" string(\"\\n\\n \\nworks\"),\n"
- " {ok,[{break,1},{word,4,\"L4\"},{break,4},\n"
- " {word,5,\"L5\"},{break,5},{word,7,\"L7\"}], 7} =\n"
+ " {ok,[{break,{1,1}},{word,{4,1},\"L4\"},{break,{4,3}},\n"
+ " {word,{5,1},\"L5\"},{break,{5,3}},{word,{7,1},\"L7\"}], {7,3}} =\n"
" string(\"\\n\\n \\nL4\\nL5\\n\\nL7\"),\n"
- " {ok,[{break,1},{blank,4,\" \"},{word,4,\"L4\"},\n"
- " {break,4},{blank,5,\" \"},{word,5,\"L5\"},\n"
- " {break,5},{blank,7,\" \"},{word,7,\"L7\"}], 7} =\n"
+ "{ok,[{break,{1,1}},{blank,{4,1},\" \"},{word,{4,2} ,\"L4\"},\n"
+ " {break,{4,4}},{blank,{5,1},\" \"},{word,{5,2},\"L5\"},\n"
+ " {break,{5,4}},{blank,{7,1},\" \"},{word,{7,2},\"L7\"}], {7,4}} =\n"
" string(\"\\n\\n \\n L4\\n L5\\n\\n L7\"),\n"
" ok.\n">>,
default,
@@ -1164,6 +1174,7 @@ otp_13916(Config) when is_list(Config) ->
ok.
otp_14285(Config) ->
+ %% x{400} takes 2 bytes to represent
Ts = [{otp_14285_1,
<<"%% encoding: latin-1\n"
"Definitions.\n"
@@ -1173,11 +1184,11 @@ otp_14285(Config) ->
"U = [\\x{400}]\n"
"Rules.\n"
"{L}+ : {token,l}.\n"
- "{U}+ : {token,'\\x{400}'}.\n"
+ "{U}+ : {token,{TokenLine,'\\x{400}'}}.\n"
"Erlang code.\n"
"-export([t/0]).\n"
"t() ->\n"
- " {ok,['\\x{400}'],1} = string(\"\\x{400}\"), ok.\n">>,
+ " {ok,[{1,'\\x{400}'}],{1,3}} = string(\"\\x{400}\"), ok.\n">>,
default,
ok},
{otp_14285_2,
@@ -1193,7 +1204,7 @@ otp_14285(Config) ->
"Erlang code.\n"
"-export([t/0]).\n"
"t() ->\n"
- " {ok,['\x{400}'],1} = string(\"\x{400}\"), ok.\n">>,
+ " {ok,['\x{400}'],{1,3}} = string(\"\x{400}\"), ok.\n"/utf8>>,
default,
ok}],
run(Config, Ts),
@@ -1225,6 +1236,54 @@ otp_17023(Config) ->
end,
ok.
+%% Additional tests added with column support
+column_support(Config) ->
+ Ts = [{token_col_var,
+ <<"Definitions.\n"
+ "D = [0-9]\n"
+ "W = [\\s\\n]\n"
+ "Rules.\n"
+ "{W}+ :\n"
+ "skip_token.\n"
+ "{D}+ :\n"
+ "{token,{integer,{TokenLine,TokenCol},list_to_integer(TokenChars)}}.\n"
+ "{D}+\\.{D}+((E|e)(\\+|\\-)?{D}+)? :\n"
+ "{token,{float,{TokenLine,TokenCol},list_to_float(TokenChars)}}.\n"
+ "Erlang code.\n"
+ "-export([t/0]).\n"
+ "t() ->\n"
+ "{ok,[{float, {2,1}, 4.44},{integer, {3,3}, 5},{integer, {7,3}, 7}],{8,2}}"
+ "= string(\"\n4.44 \n 5 \n \n\n\n 7 \n \"), ok.\n">>,
+ default,
+ ok},
+ {tab,
+ <<"Definitions.\n"
+ "Rules.\n"
+ "[a]+[\\n]*= : {token, {first, TokenLoc}}.\n"
+ "[a]+ : {token, {second, TokenLoc}}.\n"
+ "[\\s\\r\\n\\t]+ : skip_token.\n"
+ "Erlang code.\n"
+ "-export([t/0]).\n"
+ "t() ->\n"
+ "{ok,[{second,{1,27}},{second,{2,19}}],{2,25}} = string(\" \t \t\t a\\n \t \t aaa\t\"), ok.\n">>,
+ default,
+ ok},
+ {tab_custom_size,
+ <<"Definitions.\n"
+ "Rules.\n"
+ "[a]+[\\n]*= : {token, {first, TokenLoc}}.\n"
+ "[a]+ : {token, {second, TokenLoc}}.\n"
+ "[\\s\\r\\n\\t]+ : skip_token.\n"
+ "Erlang code.\n"
+ "-export([t/0]).\n"
+ "t() ->\n"
+ "{ok,[{second,{1,15}},{second,{2,9}}],{2,16}} = string(\" \t \t\t a\\n \t \t aaa\t\"), ok.\n">>,
+ default,
+ [{tab_size,3}],
+ ok}],
+ run(Config, Ts),
+ ok.
+
%% OTP-17499. GH-4918.
compiler_warnings(Config) ->
Xrl =
@@ -1256,18 +1315,23 @@ writable(Fname) ->
ok = file:write_file_info(Fname, Info#file_info{mode = Mode}).
run(Config, Tests) ->
- F = fun({N,P,Pre,E}) ->
- case catch run_test(Config, P, Pre) of
- E ->
- ok;
- Bad ->
- ct:fail("~nTest ~p failed. Expected~n ~p~n"
- "but got~n ~p~n", [N, E, Bad])
- end
+ F = fun F({N,P,Pre,E}) ->
+ F({N,P,Pre,[],E});
+ F({N,P,Pre,Opts,E}) ->
+ case catch run_test(Config,P,Pre,Opts) of
+ E ->
+ ok;
+ Bad ->
+ ct:fail("~nTest ~p failed. Expected~n ~p~n"
+ "but got~n ~p~n", [N, E, Bad])
+ end
end,
lists:foreach(F, Tests).
run_test(Config, Def, Pre) ->
+ run_test(Config, Def, Pre, []).
+
+run_test(Config, Def, Pre, LOpts0) ->
%% io:format("testing ~s~n", [binary_to_list(Def)]),
DefFile = 'leex_test.xrl',
Filename = 'leex_test.erl',
@@ -1276,14 +1340,14 @@ run_test(Config, Def, Pre) ->
ErlFile = filename:join(DataDir, Filename),
Opts = [return, warn_unused_vars,{outdir,DataDir}],
ok = file:write_file(XrlFile, Def),
- LOpts = [return, {report, false} |
+ LOpts = LOpts0 ++ [return, {report, false} |
case Pre of
default ->
[];
_ ->
[{includefile,Pre}]
end],
- XOpts = [verbose, dfa_graph], % just to get some code coverage...
+ XOpts = [verbose, dfa_graph, {error_location, column}], % just to get some code coverage...
LRet = leex:file(XrlFile, XOpts ++ LOpts),
case LRet of
{ok, _Outfile, _LWs} ->