diff options
author | Marko Minđek <marko.mindek@invariant.hr> | 2023-01-30 00:05:30 +0100 |
---|---|---|
committer | Marko Minđek <marko.mindek@invariant.hr> | 2023-02-21 13:39:54 +0100 |
commit | 23014493ea15eb7753777d2ca1f664096263a76b (patch) | |
tree | d9799cf47248bdc470bb2fb1af45e5180d2e4e98 /lib/parsetools | |
parent | bc6f6bc98e60c4dc46b0372b615ec0433f1d8abd (diff) | |
download | erlang-23014493ea15eb7753777d2ca1f664096263a76b.tar.gz |
leex column number support
Added:
Support for including column number in token, end and error locations.
Variables in Rules: TokenCol and TokenLoc
Leex options: error_location and tab_size
Fixed:
Bug in testcase OTP14285
Diffstat (limited to 'lib/parsetools')
-rw-r--r-- | lib/parsetools/doc/src/leex.xml | 53 | ||||
-rw-r--r-- | lib/parsetools/include/leexinc.hrl | 377 | ||||
-rw-r--r-- | lib/parsetools/src/leex.erl | 190 | ||||
-rw-r--r-- | lib/parsetools/test/leex_SUITE.erl | 334 |
4 files changed, 592 insertions, 362 deletions
diff --git a/lib/parsetools/doc/src/leex.xml b/lib/parsetools/doc/src/leex.xml index d802e46b59..7714a7c27f 100644 --- a/lib/parsetools/doc/src/leex.xml +++ b/lib/parsetools/doc/src/leex.xml @@ -115,6 +115,22 @@ <p>Causes generated -file() attributes to only include the basename of the file path.</p> </item> + <tag><c>{error_location, line | column}</c></tag> + <item> + <p>If set to <c>column</c>, error location will be + <c>{Line,Column}</c> tuple instead of just <c>Line</c>. Also, <c>StartLoc</c> + and <c>EndLoc</c> in <c>string/2</c>, <c>token/3</c>, and <c>tokens/3</c> + functions will be <c>{Line,Column}</c> tuple instead of just <c>Line</c>. + Default is <c>line</c>. Note that you can use <c>TokenLoc</c> for token + location independently, even if the <c>error_location</c> is set to <c>line</c>.</p> + <p>Unicode characters are counted as many columns as they use bytes + to represent.</p> + </item> + <tag><c>{tab_size, pos_integer()}</c></tag> + <item> + <p>Sets the width of <c>\t</c> character (only relevant if <c>error_location</c> + is set to <c>column</c>). Default is <c>8</c>.</p> + </item> </taglist> <p>Any of the Boolean options can be set to <c>true</c> by stating the name of the option. For example, <c>verbose</c> @@ -147,17 +163,18 @@ </fsdescription> <func> <name since="">Module:string(String) -> StringRet</name> - <name since="">Module:string(String, StartLine) -> StringRet</name> + <name since="">Module:string(String, StartLoc) -> StringRet</name> <fsummary>Generated by Leex</fsummary> <type> <v>String = string()</v> - <v>StringRet = {ok,Tokens,EndLine} | ErrorInfo</v> + <v>StringRet = {ok,Tokens,EndLoc} | ErrorInfo</v> <v>Tokens = [Token]</v> - <v>EndLine = StartLine = erl_anno:line()</v> + <v>StartLoc = EndLoc = erl_anno:location()</v> </type> <desc> <p>Scans <c>String</c> and returns all the tokens in it, or an - error.</p> + error. <c>StartLoc</c> and <c>EndLoc</c> are either <c>erl_anno:line()</c> + or <c>erl_anno:location()</c>, depending on the <c>error_location</c> option.</p> <note><p>It is an error if not all of the characters in <c>String</c> are consumed.</p></note> </desc> @@ -166,7 +183,7 @@ <func> <name since="">Module:token(Cont, Chars) -> {more,Cont1} | {done,TokenRet,RestChars} </name> - <name since="">Module:token(Cont, Chars, StartLine) -> {more,Cont1} + <name since="">Module:token(Cont, Chars, StartLoc) -> {more,Cont1} | {done,TokenRet,RestChars} </name> <fsummary>Generated by Leex</fsummary> @@ -174,10 +191,10 @@ <v>Cont = [] | Cont1</v> <v>Cont1 = tuple()</v> <v>Chars = RestChars = string() | eof</v> - <v>TokenRet = {ok, Token, EndLine} - | {eof, EndLine} + <v>TokenRet = {ok, Token, EndLoc} + | {eof, EndLoc} | ErrorInfo</v> - <v>StartLine = EndLine = erl_anno:line()</v> + <v>StartLoc = EndLoc = erl_anno:location()</v> </type> <desc> <p>This is a re-entrant call to try and scan one token from @@ -193,7 +210,7 @@ but used through the i/o system where it can typically be called in an application by:</p> <code> -io:request(InFile, {get_until,unicode,Prompt,Module,token,[Line]}) +io:request(InFile, {get_until,unicode,Prompt,Module,token,[Loc]}) -> TokenRet</code> </desc> </func> @@ -201,7 +218,7 @@ io:request(InFile, {get_until,unicode,Prompt,Module,token,[Line]}) <func> <name since="">Module:tokens(Cont, Chars) -> {more,Cont1} | {done,TokensRet,RestChars} </name> - <name since="">Module:tokens(Cont, Chars, StartLine) -> + <name since="">Module:tokens(Cont, Chars, StartLoc) -> {more,Cont1} | {done,TokensRet,RestChars} </name> <fsummary>Generated by Leex</fsummary> @@ -209,11 +226,11 @@ io:request(InFile, {get_until,unicode,Prompt,Module,token,[Line]}) <v>Cont = [] | Cont1</v> <v>Cont1 = tuple()</v> <v>Chars = RestChars = string() | eof</v> - <v>TokensRet = {ok, Tokens, EndLine} - | {eof, EndLine} + <v>TokensRet = {ok, Tokens, EndLoc} + | {eof, EndLoc} | ErrorInfo</v> <v>Tokens = [Token]</v> - <v>StartLine = EndLine = erl_anno:line()</v> + <v>StartLoc = EndLoc = erl_anno:location()</v> </type> <desc> <p>This is a re-entrant call to try and scan tokens from @@ -240,7 +257,7 @@ io:request(InFile, {get_until,unicode,Prompt,Module,token,[Line]}) but used through the i/o system where it can typically be called in an application by:</p> <code> -io:request(InFile, {get_until,unicode,Prompt,Module,tokens,[Line]}) +io:request(InFile, {get_until,unicode,Prompt,Module,tokens,[Loc]}) -> TokensRet</code> </desc> </func> @@ -320,6 +337,14 @@ NAME = VALUE</code> <tag><c>TokenLine</c></tag> <item><p>The line number where the token occurred.</p> </item> + <tag><c>TokenCol</c></tag> + <item><p>The column number where the token occurred + (column of the first character included in the token).</p> + </item> + <tag><c>TokenLoc</c></tag> + <item><p>Token location. Expands to <c>{TokenLine,TokenCol}</c> (even + when <c>error_location</c> is set to <c>line</c>.</p> + </item> </taglist> <p>The code must return:</p> diff --git a/lib/parsetools/include/leexinc.hrl b/lib/parsetools/include/leexinc.hrl index 8dfc42f479..a06584ff79 100644 --- a/lib/parsetools/include/leexinc.hrl +++ b/lib/parsetools/include/leexinc.hrl @@ -16,261 +16,269 @@ format_error({illegal,S}) -> ["illegal characters ",io_lib:write_string(S)]; format_error({user,S}) -> S. -string(String) -> string(String, 1). - -string(String, Line) -> string(String, Line, String, []). - -%% string(InChars, Line, TokenChars, Tokens) -> -%% {ok,Tokens,Line} | {error,ErrorInfo,Line}. -%% Note the line number going into yystate, L0, is line of token -%% start while line number returned is line of token end. We want line -%% of token start. - -string([], L, [], Ts) -> % No partial tokens! - {ok,yyrev(Ts),L}; -string(Ics0, L0, Tcs, Ts) -> - case yystate(yystate(), Ics0, L0, 0, reject, 0) of - {A,Alen,Ics1,L1} -> % Accepting end state - string_cont(Ics1, L1, yyaction(A, Alen, Tcs, L0), Ts); - {A,Alen,Ics1,L1,_S1} -> % Accepting transition state - string_cont(Ics1, L1, yyaction(A, Alen, Tcs, L0), Ts); - {reject,_Alen,Tlen,_Ics1,L1,_S1} -> % After a non-accepting state - {error,{L0,?MODULE,{illegal,yypre(Tcs, Tlen+1)}},L1}; - {A,Alen,Tlen,_Ics1,L1,_S1} -> +%% string(InChars) -> +%% string(InChars, Loc) -> +%% {ok,Tokens,EndLoc} | {error,ErrorInfo,EndLoc}. +%% Loc is the starting location of the token, while EndLoc is the first not scanned +%% location. Location is either Line or {Line,Column}, depending on the "error_location" option. + +##str + +do_string([], L, C, [], Ts) -> % No partial tokens! + {ok,yyrev(Ts),{L,C}}; +do_string(Ics0, L0, C0, Tcs, Ts) -> + case yystate(yystate(), Ics0, L0, C0, 0, reject, 0) of + {A,Alen,Ics1,L1,_C1} -> % Accepting end state + C2 = adjust_col(Tcs, Alen, C0), + string_cont(Ics1, L1, C2, yyaction(A, Alen, Tcs, L0, C0), Ts); + {A,Alen,Ics1,L1,_C1,_S1} -> % Accepting transition state + C2 = adjust_col(Tcs, Alen, C0), + string_cont(Ics1, L1, C2, yyaction(A, Alen, Tcs, L0, C0), Ts); + {reject,_Alen,Tlen,_Ics1,_L1,_C1,_S1} -> % After a non-accepting state + {error,{{L0, C0} ,?MODULE,{illegal,yypre(Tcs, Tlen+1)}},{L0, C0}}; + {A,Alen,Tlen,_Ics1,L1, C1,_S1}-> Tcs1 = yysuf(Tcs, Alen), L2 = adjust_line(Tlen, Alen, Tcs1, L1), - string_cont(Tcs1, L2, yyaction(A, Alen, Tcs, L0), Ts) + C2 = adjust_col(Tcs, Alen, C1), + string_cont(Tcs1, L2, C2, yyaction(A, Alen, Tcs, L0,C0), Ts) end. -%% string_cont(RestChars, Line, Token, Tokens) +%% string_cont(RestChars, Line, Col, Token, Tokens) %% Test for and remove the end token wrapper. Push back characters %% are prepended to RestChars. --dialyzer({nowarn_function, string_cont/4}). +-dialyzer({nowarn_function, string_cont/5}). -string_cont(Rest, Line, {token,T}, Ts) -> - string(Rest, Line, Rest, [T|Ts]); -string_cont(Rest, Line, {token,T,Push}, Ts) -> +string_cont(Rest, Line, Col, {token,T}, Ts) -> + do_string(Rest, Line, Col, Rest, [T|Ts]); +string_cont(Rest, Line, Col, {token,T,Push}, Ts) -> NewRest = Push ++ Rest, - string(NewRest, Line, NewRest, [T|Ts]); -string_cont(Rest, Line, {end_token,T}, Ts) -> - string(Rest, Line, Rest, [T|Ts]); -string_cont(Rest, Line, {end_token,T,Push}, Ts) -> + do_string(NewRest, Line, Col, NewRest, [T|Ts]); +string_cont(Rest, Line, Col, {end_token,T}, Ts) -> + do_string(Rest, Line, Col, Rest, [T|Ts]); +string_cont(Rest, Line, Col, {end_token,T,Push}, Ts) -> NewRest = Push ++ Rest, - string(NewRest, Line, NewRest, [T|Ts]); -string_cont(Rest, Line, skip_token, Ts) -> - string(Rest, Line, Rest, Ts); -string_cont(Rest, Line, {skip_token,Push}, Ts) -> + do_string(NewRest, Line, Col, NewRest, [T|Ts]); +string_cont(Rest, Line, Col, skip_token, Ts) -> + do_string(Rest, Line, Col, Rest, Ts); +string_cont(Rest, Line, Col, {skip_token,Push}, Ts) -> NewRest = Push ++ Rest, - string(NewRest, Line, NewRest, Ts); -string_cont(_Rest, Line, {error,S}, _Ts) -> - {error,{Line,?MODULE,{user,S}},Line}. + do_string(NewRest, Line, Col, NewRest, Ts); +string_cont(_Rest, Line, Col, {error,S}, _Ts) -> + {error,{{Line, Col},?MODULE,{user,S}},{Line,Col}}. %% token(Continuation, Chars) -> -%% token(Continuation, Chars, Line) -> +%% token(Continuation, Chars, Loc) -> %% {more,Continuation} | {done,ReturnVal,RestChars}. %% Must be careful when re-entering to append the latest characters to the %% after characters in an accept. The continuation is: -%% {token,State,CurrLine,TokenChars,TokenLen,TokenLine,AccAction,AccLen} +%% {token,State,CurrLine,CurrCol,TokenChars,TokenLen,TokenLine,TokenCol,AccAction,AccLen} -token(Cont, Chars) -> token(Cont, Chars, 1). +##tkn -token([], Chars, Line) -> - token(yystate(), Chars, Line, Chars, 0, Line, reject, 0); -token({token,State,Line,Tcs,Tlen,Tline,Action,Alen}, Chars, _) -> - token(State, Chars, Line, Tcs ++ Chars, Tlen, Tline, Action, Alen). +do_token([], Chars, Line, Col) -> + token(yystate(), Chars, Line, Col, Chars, 0, Line, Col, reject, 0); +do_token({token,State,Line,Col,Tcs,Tlen,Tline,Tcol,Action,Alen}, Chars, _, _) -> + token(State, Chars, Line, Col, Tcs ++ Chars, Tlen, Tline, Tcol, Action, Alen). -%% token(State, InChars, Line, TokenChars, TokenLen, TokenLine, +%% token(State, InChars, Line, Col, TokenChars, TokenLen, TokenLine, TokenCol %% AcceptAction, AcceptLen) -> %% {more,Continuation} | {done,ReturnVal,RestChars}. %% The argument order is chosen to be more efficient. -token(S0, Ics0, L0, Tcs, Tlen0, Tline, A0, Alen0) -> - case yystate(S0, Ics0, L0, Tlen0, A0, Alen0) of +token(S0, Ics0, L0, C0, Tcs, Tlen0, Tline, Tcol, A0, Alen0) -> + case yystate(S0, Ics0, L0, C0, Tlen0, A0, Alen0) of %% Accepting end state, we have a token. - {A1,Alen1,Ics1,L1} -> - token_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline)); + {A1,Alen1,Ics1,L1,C1} -> + C2 = adjust_col(Tcs, Alen1, C1), + token_cont(Ics1, L1, C2, yyaction(A1, Alen1, Tcs, Tline,Tcol)); %% Accepting transition state, can take more chars. - {A1,Alen1,[],L1,S1} -> % Need more chars to check - {more,{token,S1,L1,Tcs,Alen1,Tline,A1,Alen1}}; - {A1,Alen1,Ics1,L1,_S1} -> % Take what we got - token_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline)); + {A1,Alen1,[],L1,C1,S1} -> % Need more chars to check + {more,{token,S1,L1,C1,Tcs,Alen1,Tline,Tcol,A1,Alen1}}; + {A1,Alen1,Ics1,L1,C1,_S1} -> % Take what we got + C2 = adjust_col(Tcs, Alen1, C1), + token_cont(Ics1, L1, C2, yyaction(A1, Alen1, Tcs, Tline,Tcol)); %% After a non-accepting state, maybe reach accept state later. - {A1,Alen1,Tlen1,[],L1,S1} -> % Need more chars to check - {more,{token,S1,L1,Tcs,Tlen1,Tline,A1,Alen1}}; - {reject,_Alen1,Tlen1,eof,L1,_S1} -> % No token match + {A1,Alen1,Tlen1,[],L1,C1,S1} -> % Need more chars to check + {more,{token,S1,L1,C1,Tcs,Tlen1,Tline,Tcol,A1,Alen1}}; + {reject,_Alen1,Tlen1,eof,L1,C1,_S1} -> % No token match %% Check for partial token which is error. - Ret = if Tlen1 > 0 -> {error,{Tline,?MODULE, + Ret = if Tlen1 > 0 -> {error,{{Tline,Tcol},?MODULE, %% Skip eof tail in Tcs. - {illegal,yypre(Tcs, Tlen1)}},L1}; - true -> {eof,L1} + {illegal,yypre(Tcs, Tlen1)}},{L1,C1}}; + true -> {eof,{L1,C1}} end, {done,Ret,eof}; - {reject,_Alen1,Tlen1,Ics1,L1,_S1} -> % No token match - Error = {Tline,?MODULE,{illegal,yypre(Tcs, Tlen1+1)}}, - {done,{error,Error,L1},Ics1}; - {A1,Alen1,Tlen1,_Ics1,L1,_S1} -> % Use last accept match + {reject,_Alen1,Tlen1,Ics1,_L1,_C1,_S1} -> % No token match + Error = {{Tline,Tcol},?MODULE,{illegal,yypre(Tcs, Tlen1+1)}}, + {done,{error,Error,{Tline,Tcol}},Ics1}; + {A1,Alen1,Tlen1,_Ics1,L1,_C1,_S1} -> % Use last accept match Tcs1 = yysuf(Tcs, Alen1), L2 = adjust_line(Tlen1, Alen1, Tcs1, L1), - token_cont(Tcs1, L2, yyaction(A1, Alen1, Tcs, Tline)) + C2 = C0 + Alen1, + token_cont(Tcs1, L2, C2, yyaction(A1, Alen1, Tcs, Tline, Tcol)) end. -%% token_cont(RestChars, Line, Token) +%% token_cont(RestChars, Line, Col, Token) %% If we have a token or error then return done, else if we have a %% skip_token then continue. --dialyzer({nowarn_function, token_cont/3}). +-dialyzer({nowarn_function, token_cont/4}). -token_cont(Rest, Line, {token,T}) -> - {done,{ok,T,Line},Rest}; -token_cont(Rest, Line, {token,T,Push}) -> +token_cont(Rest, Line, Col, {token,T}) -> + {done,{ok,T,{Line,Col}},Rest}; +token_cont(Rest, Line, Col, {token,T,Push}) -> NewRest = Push ++ Rest, - {done,{ok,T,Line},NewRest}; -token_cont(Rest, Line, {end_token,T}) -> - {done,{ok,T,Line},Rest}; -token_cont(Rest, Line, {end_token,T,Push}) -> + {done,{ok,T,{Line,Col}},NewRest}; +token_cont(Rest, Line, Col, {end_token,T}) -> + {done,{ok,T,{Line,Col}},Rest}; +token_cont(Rest, Line, Col, {end_token,T,Push}) -> NewRest = Push ++ Rest, - {done,{ok,T,Line},NewRest}; -token_cont(Rest, Line, skip_token) -> - token(yystate(), Rest, Line, Rest, 0, Line, reject, 0); -token_cont(Rest, Line, {skip_token,Push}) -> + {done,{ok,T,{Line,Col}},NewRest}; +token_cont(Rest, Line, Col, skip_token) -> + token(yystate(), Rest, Line, Col, Rest, 0, Line, Col, reject, 0); +token_cont(Rest, Line, Col, {skip_token,Push}) -> NewRest = Push ++ Rest, - token(yystate(), NewRest, Line, NewRest, 0, Line, reject, 0); -token_cont(Rest, Line, {error,S}) -> - {done,{error,{Line,?MODULE,{user,S}},Line},Rest}. + token(yystate(), NewRest, Line, Col, NewRest, 0, Line, Col, reject, 0); +token_cont(Rest, Line, Col, {error,S}) -> + {done,{error,{{Line, Col},?MODULE,{user,S}},{Line, Col}},Rest}. -%% tokens(Continuation, Chars, Line) -> +%% tokens(Continuation, Chars) -> +%% tokens(Continuation, Chars, Loc) -> %% {more,Continuation} | {done,ReturnVal,RestChars}. %% Must be careful when re-entering to append the latest characters to the %% after characters in an accept. The continuation is: -%% {tokens,State,CurrLine,TokenChars,TokenLen,TokenLine,Tokens,AccAction,AccLen} -%% {skip_tokens,State,CurrLine,TokenChars,TokenLen,TokenLine,Error,AccAction,AccLen} +%% {tokens,State,CurrLine,CurrCol,TokenChars,TokenLen,TokenLine,TokenCur,Tokens,AccAction,AccLen} +%% {skip_tokens,State,CurrLine,CurrCol,TokenChars,TokenLen,TokenLine,TokenCur,Error,AccAction,AccLen} -tokens(Cont, Chars) -> tokens(Cont, Chars, 1). +##tks -tokens([], Chars, Line) -> - tokens(yystate(), Chars, Line, Chars, 0, Line, [], reject, 0); -tokens({tokens,State,Line,Tcs,Tlen,Tline,Ts,Action,Alen}, Chars, _) -> - tokens(State, Chars, Line, Tcs ++ Chars, Tlen, Tline, Ts, Action, Alen); -tokens({skip_tokens,State,Line,Tcs,Tlen,Tline,Error,Action,Alen}, Chars, _) -> - skip_tokens(State, Chars, Line, Tcs ++ Chars, Tlen, Tline, Error, Action, Alen). +do_tokens([], Chars, Line, Col) -> + tokens(yystate(), Chars, Line, Col, Chars, 0, Line, Col, [], reject, 0); +do_tokens({tokens,State,Line,Col,Tcs,Tlen,Tline,Tcol,Ts,Action,Alen}, Chars, _,_) -> + tokens(State, Chars, Line, Col, Tcs ++ Chars, Tlen, Tline, Tcol, Ts, Action, Alen); +do_tokens({skip_tokens,State,Line, Col, Tcs,Tlen,Tline,Tcol,Error,Action,Alen}, Chars, _,_) -> + skip_tokens(State, Chars, Line, Col, Tcs ++ Chars, Tlen, Tline, Tcol, Error, Action, Alen). -%% tokens(State, InChars, Line, TokenChars, TokenLen, TokenLine, Tokens, +%% tokens(State, InChars, Line, Col, TokenChars, TokenLen, TokenLine, TokenCol,Tokens, %% AcceptAction, AcceptLen) -> %% {more,Continuation} | {done,ReturnVal,RestChars}. -tokens(S0, Ics0, L0, Tcs, Tlen0, Tline, Ts, A0, Alen0) -> - case yystate(S0, Ics0, L0, Tlen0, A0, Alen0) of +tokens(S0, Ics0, L0, C0, Tcs, Tlen0, Tline, Tcol, Ts, A0, Alen0) -> + case yystate(S0, Ics0, L0, C0, Tlen0, A0, Alen0) of %% Accepting end state, we have a token. - {A1,Alen1,Ics1,L1} -> - tokens_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline), Ts); + {A1,Alen1,Ics1,L1,C1} -> + C2 = adjust_col(Tcs, Alen1, C1), + tokens_cont(Ics1, L1, C2, yyaction(A1, Alen1, Tcs, Tline, Tcol), Ts); %% Accepting transition state, can take more chars. - {A1,Alen1,[],L1,S1} -> % Need more chars to check - {more,{tokens,S1,L1,Tcs,Alen1,Tline,Ts,A1,Alen1}}; - {A1,Alen1,Ics1,L1,_S1} -> % Take what we got - tokens_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline), Ts); + {A1,Alen1,[],L1,C1,S1} -> % Need more chars to check + {more,{tokens,S1,L1,C1,Tcs,Alen1,Tline,Tcol,Ts,A1,Alen1}}; + {A1,Alen1,Ics1,L1,C1,_S1} -> % Take what we got + C2 = adjust_col(Tcs, Alen1, C1), + tokens_cont(Ics1, L1, C2, yyaction(A1, Alen1, Tcs, Tline,Tcol), Ts); %% After a non-accepting state, maybe reach accept state later. - {A1,Alen1,Tlen1,[],L1,S1} -> % Need more chars to check - {more,{tokens,S1,L1,Tcs,Tlen1,Tline,Ts,A1,Alen1}}; - {reject,_Alen1,Tlen1,eof,L1,_S1} -> % No token match + {A1,Alen1,Tlen1,[],L1,C1,S1} -> % Need more chars to check + {more,{tokens,S1,L1,C1,Tcs,Tlen1,Tline,Tcol,Ts,A1,Alen1}}; + {reject,_Alen1,Tlen1,eof,L1,C1,_S1} -> % No token match %% Check for partial token which is error, no need to skip here. - Ret = if Tlen1 > 0 -> {error,{Tline,?MODULE, + Ret = if Tlen1 > 0 -> {error,{{Tline,Tcol},?MODULE, %% Skip eof tail in Tcs. - {illegal,yypre(Tcs, Tlen1)}},L1}; - Ts == [] -> {eof,L1}; - true -> {ok,yyrev(Ts),L1} + {illegal,yypre(Tcs, Tlen1)}},{L1,C1}}; + Ts == [] -> {eof,{L1,C1}}; + true -> {ok,yyrev(Ts),{L1,C1}} end, {done,Ret,eof}; - {reject,_Alen1,Tlen1,_Ics1,L1,_S1} -> + {reject,_Alen1,Tlen1,_Ics1,L1,C1,_S1} -> %% Skip rest of tokens. - Error = {L1,?MODULE,{illegal,yypre(Tcs, Tlen1+1)}}, - skip_tokens(yysuf(Tcs, Tlen1+1), L1, Error); - {A1,Alen1,Tlen1,_Ics1,L1,_S1} -> - Token = yyaction(A1, Alen1, Tcs, Tline), + Error = {{L1,C1},?MODULE,{illegal,yypre(Tcs, Tlen1+1)}}, + skip_tokens(yysuf(Tcs, Tlen1+1), L1, C1, Error); + {A1,Alen1,Tlen1,_Ics1,L1,_C1,_S1} -> + Token = yyaction(A1, Alen1, Tcs, Tline,Tcol), Tcs1 = yysuf(Tcs, Alen1), L2 = adjust_line(Tlen1, Alen1, Tcs1, L1), - tokens_cont(Tcs1, L2, Token, Ts) + C2 = C0 + Alen1, + tokens_cont(Tcs1, L2, C2, Token, Ts) end. -%% tokens_cont(RestChars, Line, Token, Tokens) +%% tokens_cont(RestChars, Line, Column, Token, Tokens) %% If we have an end_token or error then return done, else if we have %% a token then save it and continue, else if we have a skip_token %% just continue. --dialyzer({nowarn_function, tokens_cont/4}). +-dialyzer({nowarn_function, tokens_cont/5}). -tokens_cont(Rest, Line, {token,T}, Ts) -> - tokens(yystate(), Rest, Line, Rest, 0, Line, [T|Ts], reject, 0); -tokens_cont(Rest, Line, {token,T,Push}, Ts) -> +tokens_cont(Rest, Line, Col, {token,T}, Ts) -> + tokens(yystate(), Rest, Line, Col, Rest, 0, Line, Col, [T|Ts], reject, 0); +tokens_cont(Rest, Line, Col, {token,T,Push}, Ts) -> NewRest = Push ++ Rest, - tokens(yystate(), NewRest, Line, NewRest, 0, Line, [T|Ts], reject, 0); -tokens_cont(Rest, Line, {end_token,T}, Ts) -> - {done,{ok,yyrev(Ts, [T]),Line},Rest}; -tokens_cont(Rest, Line, {end_token,T,Push}, Ts) -> + tokens(yystate(), NewRest, Line, Col, NewRest, 0, Line, Col, [T|Ts], reject, 0); +tokens_cont(Rest, Line, Col, {end_token,T}, Ts) -> + {done,{ok,yyrev(Ts, [T]),{Line,Col}},Rest}; +tokens_cont(Rest, Line, Col, {end_token,T,Push}, Ts) -> NewRest = Push ++ Rest, - {done,{ok,yyrev(Ts, [T]),Line},NewRest}; -tokens_cont(Rest, Line, skip_token, Ts) -> - tokens(yystate(), Rest, Line, Rest, 0, Line, Ts, reject, 0); -tokens_cont(Rest, Line, {skip_token,Push}, Ts) -> + {done,{ok,yyrev(Ts, [T]),{Line, Col}},NewRest}; +tokens_cont(Rest, Line, Col, skip_token, Ts) -> + tokens(yystate(), Rest, Line, Col, Rest, 0, Line, Col, Ts, reject, 0); +tokens_cont(Rest, Line, Col, {skip_token,Push}, Ts) -> NewRest = Push ++ Rest, - tokens(yystate(), NewRest, Line, NewRest, 0, Line, Ts, reject, 0); -tokens_cont(Rest, Line, {error,S}, _Ts) -> - skip_tokens(Rest, Line, {Line,?MODULE,{user,S}}). + tokens(yystate(), NewRest, Line, Col, NewRest, 0, Line, Col, Ts, reject, 0); +tokens_cont(Rest, Line, Col, {error,S}, _Ts) -> + skip_tokens(Rest, Line, Col, {{Line,Col},?MODULE,{user,S}}). -%%skip_tokens(InChars, Line, Error) -> {done,{error,Error,Line},Ics}. +%% skip_tokens(InChars, Line, Col, Error) -> {done,{error,Error,{Line,Col}},Ics}. %% Skip tokens until an end token, junk everything and return the error. -skip_tokens(Ics, Line, Error) -> - skip_tokens(yystate(), Ics, Line, Ics, 0, Line, Error, reject, 0). +skip_tokens(Ics, Line, Col, Error) -> + skip_tokens(yystate(), Ics, Line, Col, Ics, 0, Line, Col, Error, reject, 0). -%% skip_tokens(State, InChars, Line, TokenChars, TokenLen, TokenLine, Tokens, +%% skip_tokens(State, InChars, Line, Col, TokenChars, TokenLen, TokenLine, TokenCol, Tokens, %% AcceptAction, AcceptLen) -> %% {more,Continuation} | {done,ReturnVal,RestChars}. -skip_tokens(S0, Ics0, L0, Tcs, Tlen0, Tline, Error, A0, Alen0) -> - case yystate(S0, Ics0, L0, Tlen0, A0, Alen0) of - {A1,Alen1,Ics1,L1} -> % Accepting end state - skip_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline), Error); - {A1,Alen1,[],L1,S1} -> % After an accepting state - {more,{skip_tokens,S1,L1,Tcs,Alen1,Tline,Error,A1,Alen1}}; - {A1,Alen1,Ics1,L1,_S1} -> - skip_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, Tline), Error); - {A1,Alen1,Tlen1,[],L1,S1} -> % After a non-accepting state - {more,{skip_tokens,S1,L1,Tcs,Tlen1,Tline,Error,A1,Alen1}}; - {reject,_Alen1,_Tlen1,eof,L1,_S1} -> - {done,{error,Error,L1},eof}; - {reject,_Alen1,Tlen1,_Ics1,L1,_S1} -> - skip_tokens(yysuf(Tcs, Tlen1+1), L1, Error); - {A1,Alen1,Tlen1,_Ics1,L1,_S1} -> - Token = yyaction(A1, Alen1, Tcs, Tline), +skip_tokens(S0, Ics0, L0, C0, Tcs, Tlen0, Tline, Tcol, Error, A0, Alen0) -> + case yystate(S0, Ics0, L0, C0, Tlen0, A0, Alen0) of + {A1,Alen1,Ics1,L1, C1} -> % Accepting end state + skip_cont(Ics1, L1, C1, yyaction(A1, Alen1, Tcs, Tline, Tcol), Error); + {A1,Alen1,[],L1,C1, S1} -> % After an accepting state + {more,{skip_tokens,S1,L1,C1,Tcs,Alen1,Tline,Tcol,Error,A1,Alen1}}; + {A1,Alen1,Ics1,L1,C1,_S1} -> + skip_cont(Ics1, L1, C1, yyaction(A1, Alen1, Tcs, Tline, Tcol), Error); + {A1,Alen1,Tlen1,[],L1,C1,S1} -> % After a non-accepting state + {more,{skip_tokens,S1,L1,C1,Tcs,Tlen1,Tline,Tcol,Error,A1,Alen1}}; + {reject,_Alen1,_Tlen1,eof,L1,C1,_S1} -> + {done,{error,Error,{L1,C1}},eof}; + {reject,_Alen1,Tlen1,_Ics1,L1,C1,_S1} -> + skip_tokens(yysuf(Tcs, Tlen1+1), L1, C1,Error); + {A1,Alen1,Tlen1,_Ics1,L1,C1,_S1} -> + Token = yyaction(A1, Alen1, Tcs, Tline, Tcol), Tcs1 = yysuf(Tcs, Alen1), L2 = adjust_line(Tlen1, Alen1, Tcs1, L1), - skip_cont(Tcs1, L2, Token, Error) + skip_cont(Tcs1, L2, C1, Token, Error) end. -%% skip_cont(RestChars, Line, Token, Error) +%% skip_cont(RestChars, Line, Col, Token, Error) %% Skip tokens until we have an end_token or error then return done %% with the original rror. --dialyzer({nowarn_function, skip_cont/4}). +-dialyzer({nowarn_function, skip_cont/5}). -skip_cont(Rest, Line, {token,_T}, Error) -> - skip_tokens(yystate(), Rest, Line, Rest, 0, Line, Error, reject, 0); -skip_cont(Rest, Line, {token,_T,Push}, Error) -> +skip_cont(Rest, Line, Col, {token,_T}, Error) -> + skip_tokens(yystate(), Rest, Line, Col, Rest, 0, Line, Col, Error, reject, 0); +skip_cont(Rest, Line, Col, {token,_T,Push}, Error) -> NewRest = Push ++ Rest, - skip_tokens(yystate(), NewRest, Line, NewRest, 0, Line, Error, reject, 0); -skip_cont(Rest, Line, {end_token,_T}, Error) -> - {done,{error,Error,Line},Rest}; -skip_cont(Rest, Line, {end_token,_T,Push}, Error) -> + skip_tokens(yystate(), NewRest, Line, Col, NewRest, 0, Line, Col, Error, reject, 0); +skip_cont(Rest, Line, Col, {end_token,_T}, Error) -> + {done,{error,Error,{Line,Col}},Rest}; +skip_cont(Rest, Line, Col, {end_token,_T,Push}, Error) -> NewRest = Push ++ Rest, - {done,{error,Error,Line},NewRest}; -skip_cont(Rest, Line, skip_token, Error) -> - skip_tokens(yystate(), Rest, Line, Rest, 0, Line, Error, reject, 0); -skip_cont(Rest, Line, {skip_token,Push}, Error) -> + {done,{error,Error,{Line,Col}},NewRest}; +skip_cont(Rest, Line, Col, skip_token, Error) -> + skip_tokens(yystate(), Rest, Line, Col, Rest, 0, Line, Col, Error, reject, 0); +skip_cont(Rest, Line, Col, {skip_token,Push}, Error) -> NewRest = Push ++ Rest, - skip_tokens(yystate(), NewRest, Line, NewRest, 0, Line, Error, reject, 0); -skip_cont(Rest, Line, {error,_S}, Error) -> - skip_tokens(yystate(), Rest, Line, Rest, 0, Line, Error, reject, 0). + skip_tokens(yystate(), NewRest, Line, Col, NewRest, 0, Line, Col, Error, reject, 0); +skip_cont(Rest, Line, Col, {error,_S}, Error) -> + skip_tokens(yystate(), Rest, Line, Col, Rest, 0, Line, Col, Error, reject, 0). -compile({nowarn_unused_function, [yyrev/1, yyrev/2, yypre/2, yysuf/2]}). @@ -292,21 +300,44 @@ adjust_line(T, A, [$\n|Cs], L) -> adjust_line(T, A, [_|Cs], L) -> adjust_line(T-1, A, Cs, L). +%% adjust_col(Chars, AcceptLength, Col) -> NewCol +%% Handle newlines, tabs and unicode chars. +adjust_col(_, 0, Col) -> + Col; +adjust_col([$\n | R], L, _) -> + adjust_col(R, L-1, 1); +adjust_col([$\t | R], L, Col) -> + adjust_col(R, L-1, tab_forward(Col)+1); +adjust_col([C | R], L, Col) when C>=0 andalso C=< 16#7F -> + adjust_col(R, L-1, Col+1); +adjust_col([C | R], L, Col) when C>= 16#80 andalso C=< 16#7FF -> + adjust_col(R, L-1, Col+2); +adjust_col([C | R], L, Col) when C>= 16#800 andalso C=< 16#FFFF -> + adjust_col(R, L-1, Col+3); +adjust_col([C | R], L, Col) when C>= 16#10000 andalso C=< 16#10FFFF -> + adjust_col(R, L-1, Col+4). + +tab_forward(C) -> + D = C rem tab_size(), + A = tab_size()-D, + C+A. + +##tab_size + %% yystate() -> InitialState. -%% yystate(State, InChars, Line, CurrTokLen, AcceptAction, AcceptLen) -> -%% {Action, AcceptLen, RestChars, Line} | -%% {Action, AcceptLen, RestChars, Line, State} | -%% {reject, AcceptLen, CurrTokLen, RestChars, Line, State} | -%% {Action, AcceptLen, CurrTokLen, RestChars, Line, State}. +%% yystate(State, InChars, Line, Col, CurrTokLen, AcceptAction, AcceptLen) -> +%% {Action, AcceptLen, RestChars, Line, Col} | +%% {Action, AcceptLen, RestChars, Line, Col, State} | +%% {reject, AcceptLen, CurrTokLen, RestChars, Line, Col, State} | +%% {Action, AcceptLen, CurrTokLen, RestChars, Line, Col, State}. %% Generated state transition functions. The non-accepting end state %% return signal either an unrecognised character or end of current %% input. ##dfa -%% yyaction(Action, TokenLength, TokenChars, TokenLine) -> +%% yyaction(Action, TokenLength, TokenChars, TokenLine, TokenCol) -> %% {token,Token} | {end_token, Token} | skip_token | {error,String}. %% Generated action function. ##actions - diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl index b764678516..c5e61b8149 100644 --- a/lib/parsetools/src/leex.erl +++ b/lib/parsetools/src/leex.erl @@ -119,6 +119,8 @@ file(File) -> file(File, []). | {'verbose', boolean()} | {'warnings_as_errors', boolean()} | {'deterministic', boolean()} + | {'error_location', line | column} + | {'tab_size', pos_integer()} | 'dfa_graph' | 'report_errors' | 'report_warnings' | 'report' | 'return_errors' | 'return_warnings' | 'return' @@ -281,6 +283,12 @@ check_options([{Option, Boolean} | Options], AllOptions, L) false -> badarg end; +check_options([{error_location, Loc}=O | Options], AllOptions, L) + when Loc =:= line; Loc =:= column -> + check_options(Options, AllOptions, [O | L]); +check_options([{tab_size, S}=O | Options], AllOptions, L) + when is_integer(S) andalso S>0 -> + check_options(Options, AllOptions, [O | L]); check_options([], _AllOptions, L) -> L; check_options(_Options, _, _L) -> @@ -289,7 +297,7 @@ check_options(_Options, _, _L) -> all_options() -> [dfa_graph,includefile,report_errors,report_warnings, return_errors,return_warnings,scannerfile,verbose, - warnings_as_errors, deterministic]. + warnings_as_errors,deterministic,error_location,tab_size]. default_option(dfa_graph) -> false; default_option(includefile) -> []; @@ -300,7 +308,9 @@ default_option(return_warnings) -> false; default_option(scannerfile) -> []; default_option(verbose) -> false; default_option(warnings_as_errors) -> false; -default_option(deterministic) -> false. +default_option(deterministic) -> false; +default_option(error_location) -> line; +default_option(tab_size) -> 8. atom_option(dfa_graph) -> {dfa_graph,true}; atom_option(report_errors) -> {report_errors,true}; @@ -596,7 +606,9 @@ parse_rule(S, Line, Atoks, Ms, N, St) -> TokenChars = var_used('TokenChars', Atoks), TokenLen = var_used('TokenLen', Atoks), TokenLine = var_used('TokenLine', Atoks), - {ok,{R,N},{N,Atoks,TokenChars,TokenLen,TokenLine},St}; + TokenCol = var_used('TokenCol', Atoks), + TokenLoc = var_used('TokenLoc', Atoks), + {ok,{R,N},{N,Atoks,TokenChars,TokenLen,TokenLine,TokenCol,TokenLoc},St}; {error,E} -> add_error({Line,leex,E}, St) end. @@ -1415,6 +1427,10 @@ out_file(Ifile, Ofile, St, DFA, DF, Actions, Code, L) -> case string:slice(Line, 0, 5) of "##mod" -> out_module(Ofile, St); "##cod" -> out_erlang_code(Ofile, St, Code, L); + "##str" -> out_string(Ofile, St#leex.opts); + "##tkn" -> out_token(Ofile, St#leex.opts); + "##tks" -> out_tokens(Ofile, St#leex.opts); + "##tab" -> out_tab_size(Ofile, St#leex.opts); "##dfa" -> out_dfa(Ofile, St, DFA, Code, DF, L); "##act" -> out_actions(Ofile, St#leex.xfile, Deterministic, Actions); _ -> io:put_chars(Ofile, Line) @@ -1440,6 +1456,92 @@ out_erlang_code(File, St, Code, L) -> io:nl(File), output_file_directive(File, St#leex.ifile, Deterministic, L). +out_tab_size(File, Opts) -> + Size = proplists:get_value(tab_size, Opts), + io:fwrite(File, "tab_size() -> ~p.\n", [Size]). + +%% Exclude column number if needed +out_string(File, Opts) -> + out_string_1(File, Opts), + out_string_2(File, Opts), + Vars = lists:join(", ",["Ics","L0","C0","Tcs","Ts"]), + out_head(File,string,Vars), + EL = proplists:get_value(error_location, Opts), + case EL of + column -> + io:fwrite(File," do_string(~s).\n",[Vars]); + line -> + io:fwrite(File," case do_string(~s) of\n",[Vars]), + io:fwrite(File," {ok, T, {L,_}} -> {ok, T, L};\n",[]), + io:fwrite(File," {error, {{EL,_},M,D}, {L,_}} ->\n",[]), + io:fwrite(File," EI = {EL,M,D},\n",[]), + io:fwrite(File," {error, EI, L}\n",[]), + io:fwrite(File," end.\n",[]) + end. + +out_string_1(File, Opts) -> + out_head(File,string,"Ics"), + EL = proplists:get_value(error_location, Opts), + DefLoc = case EL of + column -> "{1,1}"; + line -> "1" + end, + io:fwrite(File," string(~s).\n",["Ics,"++DefLoc]). + +out_string_2(File, Opts) -> + EL = proplists:get_value(error_location, Opts), + case EL of + column -> + out_head(File,string,"Ics,{L0,C0}"), + CallVars = lists:join(", ", ["Ics","L0","C0","Ics","[]"]), + io:fwrite(File," string(~s).\n",[CallVars]); + line -> + out_head(File,string,"Ics,L0"), + CallVars = lists:join(", ", ["Ics","L0","1","Ics","[]"]), + io:fwrite(File," string(~s).\n",[CallVars]) + end. + +out_token(File, Opts) -> + out_tokens_wrapper(File, Opts, token). + +out_tokens(File, Opts) -> + out_tokens_wrapper(File, Opts, tokens). + +out_tokens_wrapper(File, Opts, Fun) -> + out_token_2(File, Opts, Fun), + EL = proplists:get_value(error_location, Opts), + case EL of + column -> + VarsCol = lists:join(", ",["Cont","Chars","{Line,Col}"]), + out_head(File, Fun, VarsCol), + io:fwrite(File," do_~s(~s).\n",[Fun,"Cont,Chars,Line,Col"]); + line -> + VarsCol = lists:join(", ",["Cont","Chars","Line"]), + out_head(File, Fun, VarsCol), + io:fwrite(File," case do_~s(~s) of\n",[Fun,"Cont,Chars,Line,1"]), + io:fwrite(File," {more, _} = C -> C;\n",[]), + io:fwrite(File," {done, Ret0, R} ->\n",[]), + io:fwrite(File," Ret1 = case Ret0 of\n",[]), + io:fwrite(File," {ok, T, {L,_}} -> {ok, T, L};\n",[]), + io:fwrite(File," {eof, {L,_}} -> {eof, L};\n",[]), + io:fwrite(File," {error, {{EL,_},M,D},{L,_}} -> {error, {EL,M,D},L}\n",[]), + io:fwrite(File," end,\n",[]), + io:fwrite(File," {done, Ret1, R}\n",[]), + io:fwrite(File," end.\n",[]) + end. + +out_token_2(File, Opts, Fun) -> + out_head(File, Fun, "Cont,Chars"), + EL = proplists:get_value(error_location, Opts), + DefLoc = case EL of + column -> "{1,1}"; + line -> "1" + end, + io:fwrite(File," ~s(~s).\n",[Fun,"Cont,Chars,"++DefLoc]). + +out_head(File, Fun, Vars) -> + io:fwrite(File, "~s(~s) -> \n",[Fun,Vars]). + file_copy(From, To) -> case io:get_line(From, leex) of eof -> ok; @@ -1455,36 +1557,36 @@ out_dfa(File, St, DFA, Code, DF, L) -> output_file_directive(File, St#leex.efile, Deterministic, L+(NCodeLines-1)+3), io:fwrite(File, "yystate() -> ~w.~n~n", [DF]), foreach(fun (S) -> out_trans(File, S) end, DFA), - io:fwrite(File, "yystate(S, Ics, Line, Tlen, Action, Alen) ->~n", []), - io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,S}.~n", []). + io:fwrite(File, "yystate(S, Ics, Line, Col, Tlen, Action, Alen) ->~n", []), + io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,Col,S}.~n", []). out_trans(File, #dfa_state{no=N,trans=[],accept={accept,A}}) -> %% Accepting end state, guaranteed done. - io:fwrite(File, "yystate(~w, Ics, Line, Tlen, _, _) ->~n", [N]), - io:fwrite(File, " {~w,Tlen,Ics,Line};~n", [A]); + io:fwrite(File, "yystate(~w, Ics, Line, Col, Tlen, _, _) ->~n", [N]), + io:fwrite(File, " {~w,Tlen,Ics,Line,Col};~n", [A]); out_trans(File, #dfa_state{no=N,trans=Tr,accept={accept,A}}) -> %% Accepting state, but there maybe more. foreach(fun (T) -> out_accept_tran(File, N, A, T) end, pack_trans(Tr)), - io:fwrite(File, "yystate(~w, Ics, Line, Tlen, _, _) ->~n", [N]), - io:fwrite(File, " {~w,Tlen,Ics,Line,~w};~n", [A,N]); + io:fwrite(File, "yystate(~w, Ics, Line, Col, Tlen, _, _) ->~n", [N]), + io:fwrite(File, " {~w,Tlen,Ics,Line,Col,~w};~n", [A,N]); out_trans(File, #dfa_state{no=N,trans=Tr,accept=noaccept}) -> %% Non-accepting transition state. foreach(fun (T) -> out_noaccept_tran(File, N, T) end, pack_trans(Tr)), - io:fwrite(File, "yystate(~w, Ics, Line, Tlen, Action, Alen) ->~n", [N]), - io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,~w};~n", [N]). + io:fwrite(File, "yystate(~w, Ics, Line, Col, Tlen, Action, Alen) ->~n", [N]), + io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,Col,~w};~n", [N]). out_accept_tran(File, N, A, {{Cf,maxchar},S}) -> out_accept_head_max(File, N, Cf), - out_accept_body(File, S, "Line", A); + out_accept_body(File, S, "Line", "Col", A); out_accept_tran(File, N, A, {{Cf,Cl},S}) -> out_accept_head_range(File, N, Cf, Cl), - out_accept_body(File, S, "Line", A); + out_accept_body(File, S, "Line", "Col", A); out_accept_tran(File, N, A, {$\n,S}) -> out_accept_head_1(File, N, $\n), - out_accept_body(File, S, "Line+1", A); + out_accept_body(File, S, "Line+1", "1", A); out_accept_tran(File, N, A, {C,S}) -> out_accept_head_1(File, N, C), - out_accept_body(File, S, "Line", A). + out_accept_body(File, S, "Line", "Col", A). out_accept_head_1(File, State, Char) -> out_head_1(File, State, Char, "_", "_"). @@ -1495,21 +1597,21 @@ out_accept_head_max(File, State, Min) -> out_accept_head_range(File, State, Min, Max) -> out_head_range(File, State, Min, Max, "_", "_"). -out_accept_body(File, Next, Line, Action) -> - out_body(File, Next, Line, io_lib:write(Action), "Tlen"). +out_accept_body(File, Next, Line, Col, Action) -> + out_body(File, Next, Line, Col, io_lib:write(Action), "Tlen"). out_noaccept_tran(File, N, {{Cf,maxchar},S}) -> out_noaccept_head_max(File, N, Cf), - out_noaccept_body(File, S, "Line"); + out_noaccept_body(File, S, "Line", "Col"); out_noaccept_tran(File, N, {{Cf,Cl},S}) -> out_noaccept_head_range(File, N, Cf, Cl), - out_noaccept_body(File, S, "Line"); + out_noaccept_body(File, S, "Line", "Col"); out_noaccept_tran(File, N, {$\n,S}) -> out_noaccept_head_1(File, N, $\n), - out_noaccept_body(File, S, "Line+1"); + out_noaccept_body(File, S, "Line+1", "1"); out_noaccept_tran(File, N, {C,S}) -> out_noaccept_head_1(File, N, C), - out_noaccept_body(File, S, "Line"). + out_noaccept_body(File, S, "Line", "Col"). out_noaccept_head_1(File, State, Char) -> out_head_1(File, State, Char, "Action", "Alen"). @@ -1520,24 +1622,27 @@ out_noaccept_head_max(File, State, Min) -> out_noaccept_head_range(File, State, Min, Max) -> out_head_range(File, State, Min, Max, "Action", "Alen"). -out_noaccept_body(File, Next, Line) -> - out_body(File, Next, Line, "Action", "Alen"). +out_noaccept_body(File, Next, Line, Col) -> + out_body(File, Next, Line, Col, "Action", "Alen"). +out_head_1(File, State, Char = $\n, Action, Alen) -> + io:fwrite(File, "yystate(~w, [~w|Ics], Line, _, Tlen, ~s, ~s) ->\n", + [State,Char,Action,Alen]); out_head_1(File, State, Char, Action, Alen) -> - io:fwrite(File, "yystate(~w, [~w|Ics], Line, Tlen, ~s, ~s) ->\n", + io:fwrite(File, "yystate(~w, [~w|Ics], Line, Col, Tlen, ~s, ~s) ->\n", [State,Char,Action,Alen]). out_head_max(File, State, Min, Action, Alen) -> - io:fwrite(File, "yystate(~w, [C|Ics], Line, Tlen, ~s, ~s) when C >= ~w ->\n", + io:fwrite(File, "yystate(~w, [C|Ics], Line, Col, Tlen, ~s, ~s) when C >= ~w ->\n", [State,Action,Alen,Min]). out_head_range(File, State, Min, Max, Action, Alen) -> - io:fwrite(File, "yystate(~w, [C|Ics], Line, Tlen, ~s, ~s) when C >= ~w, C =< ~w ->\n", + io:fwrite(File, "yystate(~w, [C|Ics], Line, Col, Tlen, ~s, ~s) when C >= ~w, C =< ~w ->\n", [State,Action,Alen,Min,Max]). -out_body(File, Next, Line, Action, Alen) -> - io:fwrite(File, " yystate(~w, Ics, ~s, Tlen+1, ~s, ~s);\n", - [Next,Line,Action,Alen]). +out_body(File, Next, Line, Col, Action, Alen) -> + io:fwrite(File, " yystate(~w, Ics, ~s, ~s, Tlen+1, ~s, ~s);\n", + [Next,Line,Col,Action,Alen]). %% pack_trans([{Crange,State}]) -> [{Crange,State}] when %% Crange = {Char,Char} | Char. @@ -1581,31 +1686,32 @@ pack_trans([], Pt) -> Pt. out_actions(File, XrlFile, Deterministic, As) -> As1 = prep_out_actions(As), foreach(fun (A) -> out_action(File, A) end, As1), - io:fwrite(File, "yyaction(_, _, _, _) -> error.~n", []), + io:fwrite(File, "yyaction(_, _, _, _, _) -> error.~n", []), foreach(fun (A) -> out_action_code(File, XrlFile, Deterministic, A) end, As1). prep_out_actions(As) -> map(fun ({A,empty_action}) -> {A,empty_action}; - ({A,Code,TokenChars,TokenLen,TokenLine}) -> + ({A,Code,TokenChars,TokenLen,TokenLine,TokenCol,TokenLoc}) -> Vs = [{TokenChars,"TokenChars"}, {TokenLen,"TokenLen"}, - {TokenLine,"TokenLine"}, + {TokenLine or TokenLoc,"TokenLine"}, + {TokenCol or TokenLoc,"TokenCol"}, {TokenChars,"YYtcs"}, {TokenLen or TokenChars,"TokenLen"}], Vars = [if F -> S; true -> "_" end || {F,S} <- Vs], Name = list_to_atom(lists:concat([yyaction_,A])), - [Chars,Len,Line,_,_] = Vars, - Args = [V || V <- [Chars,Len,Line], V =/= "_"], + [Chars,Len,Line,Col,_,_] = Vars, + Args = [V || V <- [Chars,Len,Line,Col], V =/= "_"], ArgsChars = lists:join(", ", Args), - {A,Code,Vars,Name,Args,ArgsChars} + {A,Code,Vars,Name,Args,ArgsChars, TokenLoc} end, As). out_action(File, {A,empty_action}) -> - io:fwrite(File, "yyaction(~w, _, _, _) -> skip_token;~n", [A]); -out_action(File, {A,_Code,Vars,Name,_Args,ArgsChars}) -> - [_,_,Line,Tcs,Len] = Vars, - io:fwrite(File, "yyaction(~w, ~s, ~s, ~s) ->~n", [A,Len,Tcs,Line]), + io:fwrite(File, "yyaction(~w, _, _, _, _) -> skip_token;~n", [A]); +out_action(File, {A,_Code,Vars,Name,_Args,ArgsChars,_TokenLoc}) -> + [_,_,Line,Col,Tcs,Len] = Vars, + io:fwrite(File, "yyaction(~w, ~s, ~s, ~s, ~s) ->~n", [A,Len,Tcs,Line,Col]), if Tcs =/= "_" -> io:fwrite(File, " TokenChars = yypre(YYtcs, TokenLen),~n", []); @@ -1615,13 +1721,17 @@ out_action(File, {A,_Code,Vars,Name,_Args,ArgsChars}) -> out_action_code(_File, _XrlFile, _Deterministic, {_A,empty_action}) -> ok; -out_action_code(File, XrlFile, Deterministic, {_A,Code,_Vars,Name,Args,ArgsChars}) -> +out_action_code(File, XrlFile, Deterministic, {_A,Code,_Vars,Name,Args,ArgsChars, TokenLoc}) -> %% Should set the file to the .erl file, but instead assumes that %% ?LEEXINC is syntactically correct. io:fwrite(File, "\n-compile({inline,~w/~w}).\n", [Name, length(Args)]), L = erl_scan:line(hd(Code)), output_file_directive(File, XrlFile, Deterministic, L-2), io:fwrite(File, "~s(~s) ->~n", [Name, ArgsChars]), + if + TokenLoc -> io:fwrite(File," TokenLoc={TokenLine,TokenCol},~n",[]); + true -> ok + end, io:fwrite(File, " ~ts\n", [pp_tokens(Code, L, File)]). %% pp_tokens(Tokens, Line, File) -> [char()]. diff --git a/lib/parsetools/test/leex_SUITE.erl b/lib/parsetools/test/leex_SUITE.erl index 8d7e44629c..92e74034b5 100644 --- a/lib/parsetools/test/leex_SUITE.erl +++ b/lib/parsetools/test/leex_SUITE.erl @@ -45,7 +45,7 @@ pt/1, man/1, ex/1, ex2/1, not_yet/1, line_wrap/1, otp_10302/1, otp_11286/1, unicode/1, otp_13916/1, otp_14285/1, - otp_17023/1, compiler_warnings/1]). + otp_17023/1, compiler_warnings/1, column_support/1]). % Default timetrap timeout (set in init_per_testcase). -define(default_timeout, test_server:minutes(1)). @@ -66,7 +66,7 @@ all() -> groups() -> [{checks, [], [file, compile, syntax, deterministic]}, - {examples, [], [pt, man, ex, ex2, not_yet, unicode]}, + {examples, [], [pt, man, ex, ex2, not_yet, unicode, column_support]}, {tickets, [], [otp_10302, otp_11286, otp_13916, otp_14285, otp_17023, compiler_warnings]}, {bugs, [], [line_wrap]}]. @@ -118,6 +118,17 @@ file(Config) when is_list(Config) -> {'EXIT', {badarg, _}} = (catch leex:file(Filename, includefile)), + {'EXIT', {badarg, _}} = + (catch leex:file(Filename, {tab_size,0})), + {'EXIT', {badarg, _}} = + (catch leex:file(Filename, {tab_size,"4"})), + {'EXIT', {badarg, _}} = + (catch leex:file(Filename, {tab_size,3.5})), + {'EXIT', {badarg, _}} = + (catch leex:file(Filename, {error_location,{line,column}})), + {'EXIT', {badarg, _}} = + (catch leex:file(Filename, {error_location,col})), + Mini = <<"Definitions.\n" "D = [0-9]\n" "Rules.\n" @@ -417,17 +428,17 @@ pt(Config) when is_list(Config) -> "L = [a-z]\n" "Rules.\n" - "{L}+ : {token,{word,TokenLine,TokenChars}}.\n" + "{L}+ : {token,{word,TokenLoc,TokenChars}}.\n" "abc{D}+ : {skip_token,\"sture\" ++ string:substr(TokenChars, 4)}.\n" - "{D}+ : {token,{integer,TokenLine,list_to_integer(TokenChars)}}.\n" + "{D}+ : {token,{integer,TokenLoc,list_to_integer(TokenChars)}}.\n" "\\s : .\n" "\\r\\n : {end_token,{crlf,TokenLine}}.\n" "Erlang code.\n" "-export([t/0]).\n" "t() -> - {ok,[{word,1,\"sture\"},{integer,1,123}],1} = - string(\"abc123\"), ok. ">>, + {ok,[{word,{1,7},\"sture\"},{integer,{1,12},123}],{1,15}} = + string(\"abc123\"), ok. ">>, default, ok}], @@ -442,10 +453,10 @@ unicode(Config) when is_list(Config) -> "Definitions.\n" "RTLarrow = (â)\n" "Rules.\n" - "{RTLarrow} : {token,{\"â\",TokenLine}}.\n" + "{RTLarrow} : {token,{\"â\",TokenLoc}}.\n" "Erlang code.\n" "-export([t/0]).\n" - "t() -> {ok, [{\"â\", 1}], 1} = string(\"â\"), ok.">>, + "t() -> {ok, [{\"â\", {1,1}}], {1,4}} = string(\"â\"), ok.">>, default, ok}], @@ -460,34 +471,33 @@ man(Config) when is_list(Config) -> <<"Definitions.\n" "Rules.\n" "[a-z][0-9a-zA-Z_]* :\n" - " {token,{atom,TokenLine,list_to_atom(TokenChars)}}.\n" + " {token,{atom,TokenLoc,list_to_atom(TokenChars)}}.\n" "[A-Z_][0-9a-zA-Z_]* :\n" - " {token,{var,TokenLine,list_to_atom(TokenChars)}}.\n" + " {token,{var,TokenLoc,list_to_atom(TokenChars)}}.\n" "(\\+|-)?[0-9]+\\.[0-9]+((E|e)(\\+|-)?[0-9]+)? : \n" - " {token,{float,TokenLine,list_to_float(TokenChars)}}.\n" + " {token,{float,TokenLoc,list_to_float(TokenChars)}}.\n" "\\s : skip_token.\n" "Erlang code.\n" "-export([t/0]).\n" "t() ->\n" - " {ok,[{float,1,3.14},{atom,1,atom},{var,1,'V314'}],1} =\n" + " {ok,[{float,{1,1},3.14},{atom,{1,5},atom},{var,{1,10},'V314'}],{1,14}} =\n" " string(\"3.14atom V314\"),\n" " ok.\n">>, default, ok}, - - {man_2, + {man_2, <<"Definitions.\n" "D = [0-9]\n" "Rules.\n" "{D}+ :\n" - " {token,{integer,TokenLine,list_to_integer(TokenChars)}}.\n" + " {token,{integer,TokenLoc,list_to_integer(TokenChars)}}.\n" "{D}+\\.{D}+((E|e)(\\+|\\-)?{D}+)? :\n" - " {token,{float,TokenLine,list_to_float(TokenChars)}}.\n" + " {token,{float,TokenLoc,list_to_float(TokenChars)}}.\n" "\\s : skip_token.\n" "Erlang code.\n" "-export([t/0]).\n" "t() ->\n" - " {ok,[{float,1,3.14},{integer,1,314}],1} = \n" + " {ok,[{float,{1,1},3.14},{integer,{1,6},314}],{1,9}} = \n" " string(\"3.14 314\"),\n" " ok.\n">>, default, @@ -505,13 +515,13 @@ ex(Config) when is_list(Config) -> "D = [0-543-705-982]\n" "Rules.\n" "{D}+ :\n" - " {token,{integer,TokenLine,list_to_integer(TokenChars)}}.\n" + " {token,{integer,TokenLoc,list_to_integer(TokenChars)}}.\n" "[^235]+ :\n" - " {token,{list_to_atom(TokenChars),TokenLine}}.\n" + " {token,{list_to_atom(TokenChars),TokenLoc}}.\n" "Erlang code.\n" "-export([t/0]).\n" "t() ->\n" - " {ok,[{integer,1,12},{' c\\na',1},{integer,2,34},{b789a,2}],2} =\n" + " {ok,[{integer,{1,1},12},{' c\\na',{1,3}},{integer,{2,2},34},{b789a,{2,4}}],{2,9}} =\n" " string(\"12 c\\na34b789a\"),\n" " ok.\n">>, default, @@ -528,7 +538,7 @@ ex(Config) when is_list(Config) -> "Erlang code.\n" "-export([t/0]).\n" "t() ->\n" - " {ok,[chars,zyx],1} = string(\"abcdef zyx123\"),\n" + " {ok,[chars,zyx],{1,14}} = string(\"abcdef zyx123\"),\n" " ok.\n">>, default, ok}, @@ -541,7 +551,7 @@ ex(Config) when is_list(Config) -> "Erlang code.\n" "-export([t/0]).\n" "t() ->\n" - " {ok,[],1} = string(\"\"), ok.\n">>, % string("a") would loop... + " {ok,[],{1,1}} = string(\"\"), ok.\n">>, % string("a") would loop... default, ok}, @@ -574,12 +584,12 @@ ex(Config) when is_list(Config) -> "Erlang code.\n" "-export([t/0]).\n" "t() ->\n" - " {ok,[{white,\"\\b\\f\"}],1} = string(\"\\b\\f\"),\n" - " {ok,[{form,\"ff\\f\"}],1} = string(\"ff\\f\"),\n" - " {ok,[{string,\"\\\"foo\\\"\"}],1} = string(\"\\\"foo\\\"\"),\n" - " {ok,[{char,\"$.\"}],1} = string(\"$\\.\"),\n" - " {ok,[{list,\"[a,b,c]\"}],1} = string(\"[a,b,c]\"),\n" - " {ok,[{other,\"$^\\\\\"}],1} = string(\"$^\\\\\"),\n" + " {ok,[{white,\"\\b\\f\"}],{1,3}} = string(\"\\b\\f\"),\n" + " {ok,[{form,\"ff\\f\"}],{1,4}} = string(\"ff\\f\"),\n" + " {ok,[{string,\"\\\"foo\\\"\"}],{1,6}} = string(\"\\\"foo\\\"\"),\n" + " {ok,[{char,\"$.\"}],{1,3}} = string(\"$\\.\"),\n" + " {ok,[{list,\"[a,b,c]\"}],{1,8}} = string(\"[a,b,c]\"),\n" + " {ok,[{other,\"$^\\\\\"}],{1,4}} = string(\"$^\\\\\"),\n" " ok.\n">>, default, ok}, @@ -607,7 +617,7 @@ ex(Config) when is_list(Config) -> "Erlang code.\n" "-export([t/0]).\n" "t() ->\n" - " {ok,[{hex,[17,171,48,172]}],1} =\n" + " {ok,[{hex,[17,171,48,172]}],{1,7}} =\n" " string(\"\\x{11}\\xab0\\xac\"),\n" " ok.\n">>, default, @@ -637,47 +647,47 @@ WS = ([\\000-\\s]|%.*) Rules. {D}+\\.{D}+((E|e)(\\+|\\-)?{D}+)? : - {token,{float,TokenLine,list_to_float(TokenChars)}}. -{D}+#{H}+ : base(TokenLine, TokenChars). -{D}+ : {token,{integer,TokenLine,list_to_integer(TokenChars)}}. + {token,{float,TokenLoc,list_to_float(TokenChars)}}. +{D}+#{H}+ : base(TokenLoc, TokenChars). +{D}+ : {token,{integer,TokenLoc,list_to_integer(TokenChars)}}. {L}{A}* : Atom = list_to_atom(TokenChars), {token,case reserved_word(Atom) of - true -> {Atom,TokenLine}; - false -> {atom,TokenLine,Atom} + true -> {Atom,TokenLoc}; + false -> {atom,TokenLoc,Atom} end}. '(\\\\\\^.|\\\\.|[^'])*' : %% Strip quotes. S = lists:sublist(TokenChars, 2, TokenLen - 2), case catch list_to_atom(string_gen(S)) of {'EXIT',_} -> {error,\"illegal atom \" ++ TokenChars}; - Atom -> {token,{atom,TokenLine,Atom}} + Atom -> {token,{atom,TokenLoc,Atom}} end. -({U}|_){A}* : {token,{var,TokenLine,list_to_atom(TokenChars)}}. +({U}|_){A}* : {token,{var,TokenLoc,list_to_atom(TokenChars)}}. \"(\\\\\\^.|\\\\.|[^\"])*\" : %% Strip quotes. S = lists:sublist(TokenChars, 2, TokenLen - 2), - {token,{string,TokenLine,string_gen(S)}}. + {token,{string,TokenLoc,string_gen(S)}}. \\$(\\\\{O}{O}{O}|\\\\\\^.|\\\\.|.) : - {token,{char,TokenLine,cc_convert(TokenChars)}}. --> : {token,{'->',TokenLine}}. -:- : {token,{':-',TokenLine}}. -\\|\\| : {token,{'||',TokenLine}}. -<- : {token,{'<-',TokenLine}}. -\\+\\+ : {token,{'++',TokenLine}}. --- : {token,{'--',TokenLine}}. -=/= : {token,{'=/=',TokenLine}}. -== : {token,{'==',TokenLine}}. -=:= : {token,{'=:=',TokenLine}}. -/= : {token,{'/=',TokenLine}}. ->= : {token,{'>=',TokenLine}}. -=< : {token,{'=<',TokenLine}}. -<= : {token,{'<=',TokenLine}}. -<< : {token,{'<<',TokenLine}}. ->> : {token,{'>>',TokenLine}}. -:: : {token,{'::',TokenLine}}. + {token,{char,TokenLoc,cc_convert(TokenChars)}}. +-> : {token,{'->',TokenLoc}}. +:- : {token,{':-',TokenLoc}}. +\\|\\| : {token,{'||',TokenLoc}}. +<- : {token,{'<-',TokenLoc}}. +\\+\\+ : {token,{'++',TokenLoc}}. +-- : {token,{'--',TokenLoc}}. +=/= : {token,{'=/=',TokenLoc}}. +== : {token,{'==',TokenLoc}}. +=:= : {token,{'=:=',TokenLoc}}. +/= : {token,{'/=',TokenLoc}}. +>= : {token,{'>=',TokenLoc}}. +=< : {token,{'=<',TokenLoc}}. +<= : {token,{'<=',TokenLoc}}. +<< : {token,{'<<',TokenLoc}}. +>> : {token,{'>>',TokenLoc}}. +:: : {token,{'::',TokenLoc}}. []()[}{|!?/;:,.*+#<>=-] : - {token,{list_to_atom(TokenChars),TokenLine}}. -\\.{WS} : {end_token,{dot,TokenLine}}. + {token,{list_to_atom(TokenChars),TokenLoc}}. +\\.{WS} : {end_token,{dot,TokenLoc}}. {WS}+ : skip_token. Erlang code. @@ -775,7 +785,7 @@ escape_char(C) -> C. XrlFile = filename:join(Dir, "erlang_scan.xrl"), ok = file:write_file(XrlFile, Xrl), ErlFile = filename:join(Dir, "erlang_scan.erl"), - {ok, _} = leex:file(XrlFile, []), + {ok, _} = leex:file(XrlFile, [{error_location, column}]), {ok, _} = compile:file(ErlFile, [{outdir,Dir}]), code:purge(erlang_scan), AbsFile = filename:rootname(ErlFile, ".erl"), @@ -785,79 +795,79 @@ escape_char(C) -> C. erlang_scan:tokens(Cont, Chars, Location) end, F1 = fun(Cont, Chars, Location) -> - erlang_scan:token(Cont, Chars, Location) - end, + erlang_scan:token(Cont, Chars, Location) + end, fun() -> S = "ab cd. ", - {ok, Ts, 1} = scan_tokens_1(S, F, 1), - {ok, Ts, 1} = scan_token_1(S, F1, 1), - {ok, Ts, 1} = scan_tokens(S, F, 1), - {ok, Ts, 1} = erlang_scan:string(S, 1) + {ok, Ts, {1,8}} = scan_tokens_1(S, F, {1,1}), + {ok, Ts, {1,8}} = scan_token_1(S, F1, {1,1}), + {ok, Ts, {1,8}} = scan_tokens(S, F, {1,1}), + {ok, Ts, {1,8}} = erlang_scan:string(S, {1,1}) end(), fun() -> S = "'ab\n cd'. ", - {ok, Ts, 2} = scan_tokens_1(S, F, 1), - {ok, Ts, 2} = scan_token_1(S, F1, 1), - {ok, Ts, 2} = scan_tokens(S, F, 1), - {ok, Ts, 2} = erlang_scan:string(S, 1) + {ok, Ts, {2,7}} = scan_tokens_1(S, F, {1,1}), + {ok, Ts, {2,7}} = scan_token_1(S, F1, {1,1}), + {ok, Ts, {2,7}} = scan_tokens(S, F, {1,1}), + {ok, Ts, {2,7}} = erlang_scan:string(S, {1,1}) end(), fun() -> S = "99. ", - {ok, Ts, 1} = scan_tokens_1(S, F, 1), - {ok, Ts, 1} = scan_token_1(S, F1, 1), - {ok, Ts, 1} = scan_tokens(S, F, 1), - {ok, Ts, 1} = erlang_scan:string(S, 1) + {ok, Ts, {1,5}} = scan_tokens_1(S, F, {1,1}), + {ok, Ts, {1,5}} = scan_token_1(S, F1, {1,1}), + {ok, Ts, {1,5}} = scan_tokens(S, F, {1,1}), + {ok, Ts, {1,5}} = erlang_scan:string(S, {1,1}) end(), - {ok,[{integer,1,99},{dot,1}],1} = erlang_scan:string("99. "), + {ok,[{integer,{1,1},99},{dot,{1,3}}],{1,5}} = erlang_scan:string("99. "), fun() -> Atom = "'" ++ lists:duplicate(1000,$a) ++ "'", S = Atom ++ ". ", Reason = "illegal atom " ++ Atom, - Err = {error,{1,erlang_scan,{user,Reason}},1}, - {done,Err,[]} = scan_tokens_1(S, F, 1), - {done,Err,[]} = scan_token_1(S, F1, 1), - {done,Err,[]} = scan_tokens(S, F, 1), - Err = erlang_scan:string(S, 1) + Err = {error,{{1,1003},erlang_scan,{user,Reason}},{1,1003}}, + {done,Err,[]} = scan_tokens_1(S, F, {1,1}), + {done,Err,[]} = scan_token_1(S, F1, {1,1}), + {done,Err,[]} = scan_tokens(S, F, {1,1}), + Err = erlang_scan:string(S, {1,1}) end(), fun() -> S = "\x{aaa}. ", - Err = {error,{1,erlang_scan,{illegal,[2730]}},1}, - {done,Err,[]} = scan_tokens_1(S, F, 1), - {done,Err,[_]} = scan_token_1(S, F1, 1), % Note: Rest non-empty - {done,Err,[]} = scan_tokens(S, F, 1), - Err = erlang_scan:string(S, 1) + Err = {error,{{1,1},erlang_scan,{illegal,[2730]}},{1,1}}, + {done,Err,[]} = scan_tokens_1(S, F, {1,1}), + {done,Err,[_]} = scan_token_1(S, F1, {1,1}), % Note: Rest non-empty + {done,Err,[]} = scan_tokens(S, F, {1,1}), + Err = erlang_scan:string(S, {1,1}) end(), fun() -> S = "\x{aaa} + 1. 34", - Err = {error,{1,erlang_scan,{illegal,[2730]}},1}, - {done,Err,[]} = scan_tokens_1(S, F, 1), - {done,Err,[_]} = scan_token_1(S, F1, 1), % Note: Rest non-empty - {done,Err,"34"} = scan_tokens(S, F, 1), - Err = erlang_scan:string(S, 1) + Err = {error,{{1,1},erlang_scan,{illegal,[2730]}},{1,1}}, + {done,Err,[]} = scan_tokens_1(S, F, {1,1}), + {done,Err,[_]} = scan_token_1(S, F1, {1,1}), % Note: Rest non-empty + {done,Err,"34"} = scan_tokens(S, F, {1,1}), + Err = erlang_scan:string(S, {1,1}) end(), fun() -> S = "\x{aaa} \x{bbb}. 34", - Err = {error,{1,erlang_scan,{illegal,[2730]}},1}, - {done,Err,[]} = scan_tokens_1(S, F, 1), - {done,Err,[_]} = scan_token_1(S, F1, 1), % Note: Rest non-empty - {done,Err,"34"} = scan_tokens(S, F, 1), - Err = erlang_scan:string(S, 1) + Err = {error,{{1,1},erlang_scan,{illegal,[2730]}},{1,1}}, + {done,Err,[]} = scan_tokens_1(S, F, {1,1}), + {done,Err,[_]} = scan_token_1(S, F1, {1,1}), % Note: Rest non-empty + {done,Err,"34"} = scan_tokens(S, F, {1,1}), + Err = erlang_scan:string(S, {1,1}) end(), fun() -> S = "\x{aaa} 18#34. 34", - Err = {error,{1,erlang_scan,{illegal,[2730]}},1}, - {done,Err,[]} = scan_tokens_1(S, F, 1), - {done,Err,[_]} = scan_token_1(S, F1, 1), % Note: Rest non-empty - {done,Err,"34"} = scan_tokens(S, F, 1), - Err = erlang_scan:string(S, 1) + Err = {error,{{1,1},erlang_scan,{illegal,[2730]}},{1,1}}, + {done,Err,[]} = scan_tokens_1(S, F, {1,1}), + {done,Err,[_]} = scan_token_1(S, F1, {1,1}), % Note: Rest non-empty + {done,Err,"34"} = scan_tokens(S, F, {1,1}), + Err = erlang_scan:string(S, {1,1}) end(), fun() -> S = "\x{aaa}"++eof, - Err = {error,{1,erlang_scan,{illegal,[2730]}},1}, - {done,Err,eof} = scan_tokens_1(S, F, 1), - {done,Err,[_]} = scan_token_1(S, F1, 1), % Note: Rest non-empty - {done,Err,eof} = scan_tokens(S, F, 1), - Err = erlang_scan:string(S, 1) + Err = {error,{{1,1},erlang_scan,{illegal,[2730]}},{1,1}}, + {done,Err,eof} = scan_tokens_1(S, F, {1,1}), + {done,Err,[_]} = scan_token_1(S, F1, {1,1}), % Note: Rest non-empty + {done,Err,eof} = scan_tokens(S, F, {1,1}), + Err = erlang_scan:string(S, {1,1}) end(), ok. @@ -912,8 +922,8 @@ line_wrap(Config) when is_list(Config) -> <<" Definitions. Rules. -[a]+[\\n]*= : {token, {first, TokenLine}}. -[a]+ : {token, {second, TokenLine}}. +[a]+[\\n]*= : {token, {first, TokenLoc}}. +[a]+ : {token, {second, TokenLoc}}. [\\s\\r\\n\\t]+ : skip_token. Erlang code. ">>, @@ -928,20 +938,20 @@ Erlang code. code:load_abs(AbsFile, test_line_wrap), fun() -> S = "aaa\naaa", - {ok,[{second,1},{second,2}],2} = test_line_wrap:string(S) + {ok,[{second,{1,1}},{second,{2,1}}],2} = test_line_wrap:string(S) end(), fun() -> S = "aaa\naaa", - {ok,[{second,3},{second,4}],4} = test_line_wrap:string(S, 3) + {ok,[{second,{3,1}},{second,{4,1}}],4} = test_line_wrap:string(S, 3) end(), fun() -> - {done,{ok,{second,1},1},"\na"} = test_line_wrap:token([], "a\na"), + {done,{ok,{second,{1,1}},1},"\na"} = test_line_wrap:token([], "a\na"), {more,Cont1} = test_line_wrap:token([], "\na"), - {done,{ok,{second,2},2},eof} = test_line_wrap:token(Cont1, eof) + {done,{ok,{second,{2,1}},2},eof} = test_line_wrap:token(Cont1, eof) end(), fun() -> {more,Cont1} = test_line_wrap:tokens([], "a\na"), - {done,{ok,[{second,1},{second,2}],2},eof} = test_line_wrap:tokens(Cont1, eof) + {done,{ok,[{second,{1,1}},{second,{2,1}}],2},eof} = test_line_wrap:tokens(Cont1, eof) end(), ok. @@ -1044,7 +1054,7 @@ otp_10302(Config) when is_list(Config) -> "-export([t/0]).\n" "t() ->\n" " %% Häpp, 'Häpp',\"\\x{400}B\",\"örn_Ð\"\n" - " {ok, [R], 1} = string(\"tip\"),\n" + " {ok, [R], {1,4}} = string(\"tip\"),\n" " {tip,foo,'Häpp',[1024,66],[246,114,110,95,1024]} = R,\n" " Häpp = foo,\n" " {tip, Häpp, 'Häpp',\"\\x{400}B\",\"örn_Ð\"} = R,\n" @@ -1065,7 +1075,7 @@ otp_10302(Config) when is_list(Config) -> "-export([t/0]).\n" "t() ->\n" " %% Häpp, 'Häpp',\"\\x{400}B\",\"örn_Ð\"\n" - " {ok, [R], 1} = string(\"tip\"),\n" + " {ok, [R], {1,4}} = string(\"tip\"),\n" " {tip,foo,'Häpp',[1024,66],[195,182,114,110,95,208,128]} = R,\n" " Häpp = foo,\n" " {tip, Häpp, 'Häpp',\"\\x{400}B\",\"örn_Ð\"} = R,\n" @@ -1139,23 +1149,23 @@ otp_13916(Config) when is_list(Config) -> "Rules.\n" "%% mark line break(s) and empty lines by token 'break'\n" "%% in order to use as delimiters\n" - "{B}({S}*{B})+ : {token, {break, TokenLine}}.\n" - "{B} : {token, {break, TokenLine}}.\n" - "{S}+ : {token, {blank, TokenLine, TokenChars}}.\n" - "{W}+ : {token, {word, TokenLine, TokenChars}}.\n" + "{B}({S}*{B})+ : {token, {break, TokenLoc}}.\n" + "{B} : {token, {break, TokenLoc}}.\n" + "{S}+ : {token, {blank, TokenLoc, TokenChars}}.\n" + "{W}+ : {token, {word, TokenLoc, TokenChars}}.\n" "Erlang code.\n" "-export([t/0]).\n" "t() ->\n" - " {ok,[{break,1},{blank,4,\" \"},{word,4,\"breaks\"}],4} =\n" + " {ok,[{break,{1,1}},{blank,{4,1},\" \"},{word,{4,3},\"breaks\"}],{4,9}} =\n" " string(\"\\n\\n \\n breaks\"),\n" - " {ok,[{break,1},{word,4,\"works\"}],4} =\n" + "{ok,[{break,{1,1}},{word,{4,1},\"works\"}],{4,6}} =\n" " string(\"\\n\\n \\nworks\"),\n" - " {ok,[{break,1},{word,4,\"L4\"},{break,4},\n" - " {word,5,\"L5\"},{break,5},{word,7,\"L7\"}], 7} =\n" + " {ok,[{break,{1,1}},{word,{4,1},\"L4\"},{break,{4,3}},\n" + " {word,{5,1},\"L5\"},{break,{5,3}},{word,{7,1},\"L7\"}], {7,3}} =\n" " string(\"\\n\\n \\nL4\\nL5\\n\\nL7\"),\n" - " {ok,[{break,1},{blank,4,\" \"},{word,4,\"L4\"},\n" - " {break,4},{blank,5,\" \"},{word,5,\"L5\"},\n" - " {break,5},{blank,7,\" \"},{word,7,\"L7\"}], 7} =\n" + "{ok,[{break,{1,1}},{blank,{4,1},\" \"},{word,{4,2} ,\"L4\"},\n" + " {break,{4,4}},{blank,{5,1},\" \"},{word,{5,2},\"L5\"},\n" + " {break,{5,4}},{blank,{7,1},\" \"},{word,{7,2},\"L7\"}], {7,4}} =\n" " string(\"\\n\\n \\n L4\\n L5\\n\\n L7\"),\n" " ok.\n">>, default, @@ -1164,6 +1174,7 @@ otp_13916(Config) when is_list(Config) -> ok. otp_14285(Config) -> + %% x{400} takes 2 bytes to represent Ts = [{otp_14285_1, <<"%% encoding: latin-1\n" "Definitions.\n" @@ -1173,11 +1184,11 @@ otp_14285(Config) -> "U = [\\x{400}]\n" "Rules.\n" "{L}+ : {token,l}.\n" - "{U}+ : {token,'\\x{400}'}.\n" + "{U}+ : {token,{TokenLine,'\\x{400}'}}.\n" "Erlang code.\n" "-export([t/0]).\n" "t() ->\n" - " {ok,['\\x{400}'],1} = string(\"\\x{400}\"), ok.\n">>, + " {ok,[{1,'\\x{400}'}],{1,3}} = string(\"\\x{400}\"), ok.\n">>, default, ok}, {otp_14285_2, @@ -1193,7 +1204,7 @@ otp_14285(Config) -> "Erlang code.\n" "-export([t/0]).\n" "t() ->\n" - " {ok,['\x{400}'],1} = string(\"\x{400}\"), ok.\n">>, + " {ok,['\x{400}'],{1,3}} = string(\"\x{400}\"), ok.\n"/utf8>>, default, ok}], run(Config, Ts), @@ -1225,6 +1236,54 @@ otp_17023(Config) -> end, ok. +%% Additional tests added with column support +column_support(Config) -> + Ts = [{token_col_var, + <<"Definitions.\n" + "D = [0-9]\n" + "W = [\\s\\n]\n" + "Rules.\n" + "{W}+ :\n" + "skip_token.\n" + "{D}+ :\n" + "{token,{integer,{TokenLine,TokenCol},list_to_integer(TokenChars)}}.\n" + "{D}+\\.{D}+((E|e)(\\+|\\-)?{D}+)? :\n" + "{token,{float,{TokenLine,TokenCol},list_to_float(TokenChars)}}.\n" + "Erlang code.\n" + "-export([t/0]).\n" + "t() ->\n" + "{ok,[{float, {2,1}, 4.44},{integer, {3,3}, 5},{integer, {7,3}, 7}],{8,2}}" + "= string(\"\n4.44 \n 5 \n \n\n\n 7 \n \"), ok.\n">>, + default, + ok}, + {tab, + <<"Definitions.\n" + "Rules.\n" + "[a]+[\\n]*= : {token, {first, TokenLoc}}.\n" + "[a]+ : {token, {second, TokenLoc}}.\n" + "[\\s\\r\\n\\t]+ : skip_token.\n" + "Erlang code.\n" + "-export([t/0]).\n" + "t() ->\n" + "{ok,[{second,{1,27}},{second,{2,19}}],{2,25}} = string(\" \t \t\t a\\n \t \t aaa\t\"), ok.\n">>, + default, + ok}, + {tab_custom_size, + <<"Definitions.\n" + "Rules.\n" + "[a]+[\\n]*= : {token, {first, TokenLoc}}.\n" + "[a]+ : {token, {second, TokenLoc}}.\n" + "[\\s\\r\\n\\t]+ : skip_token.\n" + "Erlang code.\n" + "-export([t/0]).\n" + "t() ->\n" + "{ok,[{second,{1,15}},{second,{2,9}}],{2,16}} = string(\" \t \t\t a\\n \t \t aaa\t\"), ok.\n">>, + default, + [{tab_size,3}], + ok}], + run(Config, Ts), + ok. + %% OTP-17499. GH-4918. compiler_warnings(Config) -> Xrl = @@ -1256,18 +1315,23 @@ writable(Fname) -> ok = file:write_file_info(Fname, Info#file_info{mode = Mode}). run(Config, Tests) -> - F = fun({N,P,Pre,E}) -> - case catch run_test(Config, P, Pre) of - E -> - ok; - Bad -> - ct:fail("~nTest ~p failed. Expected~n ~p~n" - "but got~n ~p~n", [N, E, Bad]) - end + F = fun F({N,P,Pre,E}) -> + F({N,P,Pre,[],E}); + F({N,P,Pre,Opts,E}) -> + case catch run_test(Config,P,Pre,Opts) of + E -> + ok; + Bad -> + ct:fail("~nTest ~p failed. Expected~n ~p~n" + "but got~n ~p~n", [N, E, Bad]) + end end, lists:foreach(F, Tests). run_test(Config, Def, Pre) -> + run_test(Config, Def, Pre, []). + +run_test(Config, Def, Pre, LOpts0) -> %% io:format("testing ~s~n", [binary_to_list(Def)]), DefFile = 'leex_test.xrl', Filename = 'leex_test.erl', @@ -1276,14 +1340,14 @@ run_test(Config, Def, Pre) -> ErlFile = filename:join(DataDir, Filename), Opts = [return, warn_unused_vars,{outdir,DataDir}], ok = file:write_file(XrlFile, Def), - LOpts = [return, {report, false} | + LOpts = LOpts0 ++ [return, {report, false} | case Pre of default -> []; _ -> [{includefile,Pre}] end], - XOpts = [verbose, dfa_graph], % just to get some code coverage... + XOpts = [verbose, dfa_graph, {error_location, column}], % just to get some code coverage... LRet = leex:file(XrlFile, XOpts ++ LOpts), case LRet of {ok, _Outfile, _LWs} -> |