%%-*-erlang-*- %% %CopyrightBegin% %% %% Copyright Ericsson AB 2008-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at %% %% http://www.apache.org/licenses/LICENSE-2.0 %% %% Unless required by applicable law or agreed to in writing, software %% distributed under the License is distributed on an "AS IS" BASIS, %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. %% %% %CopyrightEnd% %%---------------------------------------------------------------------- %% Start of common source %%---------------------------------------------------------------------- %-compile(export_all). %%---------------------------------------------------------------------- %% Include files %%---------------------------------------------------------------------- -include("xmerl_sax_parser.hrl"). %%---------------------------------------------------------------------- %% External exports %%---------------------------------------------------------------------- -export([parse/2, parse_dtd/2, is_name_char/1, is_name_start/1]). %%---------------------------------------------------------------------- %% Internal exports %%---------------------------------------------------------------------- -export([ cf/3, cf/4, cf/5 ]). %%---------------------------------------------------------------------- %% Records %%---------------------------------------------------------------------- %%---------------------------------------------------------------------- %% Macros %%---------------------------------------------------------------------- -define(HTTP_DEF_PORT, 80). %%====================================================================== %% External functions %%====================================================================== %%---------------------------------------------------------------------- %% Function: parse(Xml, State) -> Result %% Input: Xml = string() | binary() %% State = #xmerl_sax_parser_state{} %% Output: Result = {ok, Rest, EventState} | %% EventState = term() %% Description: Parsing XML from input stream. %%---------------------------------------------------------------------- parse(Xml, State) -> RefTable = maps:new(), try State1 = event_callback(startDocument, State), Result = parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}), handle_end_document(Result) catch throw:Exception -> handle_end_document(Exception); _:OtherError -> handle_end_document({other, OtherError, State}) end. % case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of % {ok, Rest, State2} -> % State3 = event_callback(endDocument, State2), % case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of % true -> % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; % false -> % format_error(fatal_error, State3, "Input found after legal document") % end; % {fatal_error, {State2, Reason}} -> % State3 = event_callback(endDocument, State2), % format_error(fatal_error, State3, Reason); % {event_receiver_error, State2, {Tag, Reason}} -> % State3 = event_callback(endDocument, State2), % format_error(Tag, State3, Reason); % {endDocument, Rest, State2} -> % State3 = event_callback(endDocument, State2), % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; % Other -> % _State2 = event_callback(endDocument, State1), % {fatal_error, Other} % end. %%---------------------------------------------------------------------- %% Function: parse_dtd(Xml, State) -> Result %% Input: Xml = string() | binary() %% State = #xmerl_sax_parser_state{} %% Output: Result = {ok, Rest, EventState} | %% EventState = term() %% Description: Parsing XML DTD from input stream. %%---------------------------------------------------------------------- parse_dtd(Xml, State) -> RefTable = maps:new(), try State1 = event_callback(startDocument, State), Result = parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}, []), handle_end_document(Result) catch throw:Exception -> handle_end_document(Exception); _:OtherError -> handle_end_document({other, OtherError, State}) end. % case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of % {fatal_error, {State2, Reason}} -> % State3 = event_callback(endDocument, State2), % format_error(fatal_error, State3, Reason); % {event_receiver_error, State2, {Tag, Reason}} -> % State3 = event_callback(endDocument, State2), % format_error(Tag, State3, Reason); % {Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> % State3 = event_callback(endDocument, State2), % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; % {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> % State3 = event_callback(endDocument, State2), % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; % Other -> % _State2 = event_callback(endDocument, State1), % {fatal_error, Other} % end. %%====================================================================== %% Internal functions %%====================================================================== %%---------------------------------------------------------------------- %% Function: handle_end_document(ParserResult) -> Result %% Input: ParseResult = term() %% Output: Result = {ok, Rest, EventState} | %% EventState = term() %% Description: Ends the parsing and formats output %%---------------------------------------------------------------------- handle_end_document({ok, Rest, State}) -> %%ok case from parse try State1 = event_callback(endDocument, State), case check_if_rest_ok(State1#xmerl_sax_parser_state.input_type, Rest) of true -> {ok, State1#xmerl_sax_parser_state.event_state, Rest}; false -> format_error(fatal_error, State1, "Input found after legal document") end catch throw:{event_receiver_error, State2, {Tag, Reason}} -> format_error(Tag, State2, Reason); _:Other -> {fatal_error, Other} end; handle_end_document({endDocument, Rest, State}) -> %% ok case from parse and parse_dtd try State1 = event_callback(endDocument, State), {ok, State1#xmerl_sax_parser_state.event_state, Rest} catch throw:{event_receiver_error, State2, {Tag, Reason}} -> format_error(Tag, State2, Reason); _:Other -> {fatal_error, Other} end; handle_end_document({fatal_error, {State, Reason}}) -> try State1 = event_callback(endDocument, State), format_error(fatal_error, State1, Reason) catch throw:{event_receiver_error, State2, {Tag, Reason}} -> format_error(Tag, State2, Reason); _:Other -> {fatal_error, Other} end; handle_end_document({event_receiver_error, State, {Tag, Reason}}) -> try State1 = event_callback(endDocument, State), format_error(Tag, State1, Reason) catch throw:{event_receiver_error, State2, {Tag, Reason}} -> format_error(Tag, State2, Reason); _:Other -> {fatal_error, Other} end; handle_end_document({Rest, State}) when is_record(State, xmerl_sax_parser_state) -> %%ok case from parse_dtd try State1 = event_callback(endDocument, State), {ok, State1#xmerl_sax_parser_state.event_state, Rest} catch throw:{event_receiver_error, State2, {Tag, Reason}} -> format_error(Tag, State2, Reason); _:Other -> {fatal_error, Other} end; handle_end_document({other, Error, State}) -> try _State1 = event_callback(endDocument, State), {fatal_error, Error} catch throw:{event_receiver_error, State2, {Tag, Reason}} -> format_error(Tag, State2, Reason); _:Other -> {fatal_error, Other} end. %%---------------------------------------------------------------------- %% Function: parse_document(Rest, State) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Output: Result = {ok, Rest, State} %% Description: Parsing an XML document %% [1] document ::= prolog element Misc* %%---------------------------------------------------------------------- parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) -> {Rest1, State1} = parse_byte_order_mark(Rest, State), {Rest2, State2} = parse_misc(Rest1, State1, true), {ok, Rest2, State2}. ?PARSE_BYTE_ORDER_MARK(Bytes, State). %%---------------------------------------------------------------------- %% Function: parse_xml_decl(Rest, State) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Output: Result = {Rest, State} %% Description: Parsing the xml directive in the prolog. %% [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? %% [23] XMLDecl ::= '' %%---------------------------------------------------------------------- parse_xml_decl(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_xml_decl/2); parse_xml_decl(?STRING("<") = Bytes, State) -> cf(Bytes, State, fun parse_xml_decl/2); parse_xml_decl(?STRING(" cf(Bytes, State, fun parse_xml_decl/2); parse_xml_decl(?STRING(" cf(Bytes, State, fun parse_xml_decl/2); parse_xml_decl(?STRING(" cf(Bytes, State, fun parse_xml_decl/2); parse_xml_decl(?STRING(" cf(Bytes, State, fun parse_xml_decl/2); parse_xml_decl(?STRING_REST(" parse_xml_decl_rest(Rest1, State); ?PARSE_XML_DECL(Bytes, State). parse_xml_decl_rest(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_xml_decl_rest/2); parse_xml_decl_rest(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) -> if ?is_whitespace(C) -> {_XmlAttributes, Rest1, State1} = parse_version_info(Rest, State, []), parse_prolog(Rest1, State1); true -> parse_prolog(?STRING_REST(" unicode_incomplete_check([Bytes, State, fun parse_xml_decl_rest/2], undefined). %%---------------------------------------------------------------------- %% Function: parse_text_decl(Rest, State) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Output: Result = {Rest, State} %% Description: Parsing the text declaration in an external parsed entity. %% [77] TextDecl ::= '' %%---------------------------------------------------------------------- parse_text_decl(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_text_decl/2); parse_text_decl(?STRING("<") = Bytes, State) -> cf(Bytes, State, fun parse_text_decl/2); parse_text_decl(?STRING(" cf(Bytes, State, fun parse_text_decl/2); parse_text_decl(?STRING(" cf(Bytes, State, fun parse_text_decl/2); parse_text_decl(?STRING(" cf(Bytes, State, fun parse_text_decl/2); parse_text_decl(?STRING(" cf(Bytes, State, fun parse_text_decl/2); parse_text_decl(?STRING_REST(" parse_text_decl_1(Rest1, State); parse_text_decl(Bytes, State) -> {Bytes, State}. parse_text_decl_1(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_text_decl_1/2); parse_text_decl_1(?STRING("?") = Rest, State) -> cf(Rest, State, fun parse_text_decl_1/2); parse_text_decl_1(?STRING("v") = Rest, State) -> cf(Rest, State, fun parse_text_decl_1/2); parse_text_decl_1(?STRING("e") = Rest, State) -> cf(Rest, State, fun parse_text_decl_2/2); parse_text_decl_1(?STRING_REST("?>", _Rest) = _Bytes, State) -> ?fatal_error(State, "expecting attribute encoding"); parse_text_decl_1(?STRING_UNBOUND_REST(C, _) = Rest, State) when ?is_whitespace(C) -> {_WS, Rest1, State1} = whitespace(Rest, State, []), parse_text_decl_1(Rest1, State1); parse_text_decl_1(?STRING_REST("v", Rest) = _Bytes, State) -> case parse_name(Rest, State, [$v]) of {"version", Rest1, State1} -> {Rest2, State2} = parse_eq(Rest1, State1), {_Version, Rest3, State3} = parse_att_value(Rest2, State2), parse_text_decl_2(Rest3, State3); {_, _, State1} -> ?fatal_error(State1, "expecting attribute version") end; parse_text_decl_1(?STRING_REST("e", _) = Bytes, State) -> parse_text_decl_2(Bytes, State); parse_text_decl_1(?STRING_UNBOUND_REST(_, _), State) -> ?fatal_error(State, "expecting attribute encoding or version"); parse_text_decl_1(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_text_decl_1/2], "expecting attribute encoding or version"). parse_text_decl_2(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_text_decl_2/2); parse_text_decl_2(?STRING("e") = Rest, State) -> cf(Rest, State, fun parse_text_decl_2/2); parse_text_decl_2(?STRING_UNBOUND_REST(C, _) = Rest, State) when ?is_whitespace(C) -> {_WS, Rest1, State1} = whitespace(Rest, State, []), parse_text_decl_2(Rest1, State1); parse_text_decl_2(?STRING_REST("e", Rest) = _Bytes, State) -> case parse_name(Rest, State, [$e]) of {"encoding", Rest1, State1} -> {Rest2, State2} = parse_eq(Rest1, State1), {_Version, Rest3, State3} = parse_att_value(Rest2, State2), parse_text_decl_3(Rest3, State3); {_, _, State1} -> ?fatal_error(State1, "expecting attribute encoding") end; parse_text_decl_2(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_text_decl_2/2], "expecting attribute encoding"). parse_text_decl_3(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_text_decl_3/2); parse_text_decl_3(?STRING("?") = Rest, State) -> cf(Rest, State, fun parse_text_decl_3/2); parse_text_decl_3(?STRING_REST("?>", Rest) = _Bytes, State) -> {Rest, State}; parse_text_decl_3(?STRING_UNBOUND_REST(C, _) = Rest, State) when ?is_whitespace(C) -> {_WS, Rest1, State1} = whitespace(Rest, State, []), parse_text_decl_3(Rest1, State1); parse_text_decl_3(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_text_decl_3/2], "expecting ?>"). %%---------------------------------------------------------------------- %% Function: parse_prolog(Rest, State) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Output: Result = {Rest, State} %% Description: Parsing XML prolog %% [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? %%---------------------------------------------------------------------- parse_prolog(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_prolog/2); parse_prolog(?STRING("<") = Bytes, State) -> cf(Bytes, State, fun parse_prolog/2); parse_prolog(?STRING_REST(" case parse_pi(Rest, State) of {Rest1, State1} -> parse_prolog(Rest1, State1); {endDocument, Rest1, State1} -> parse_prolog(Rest1, State1) end; parse_prolog(?STRING_REST(" parse_prolog_1(Rest, State); parse_prolog(?STRING_REST("<", Rest), State) -> parse_stag(Rest, State); parse_prolog(?STRING_UNBOUND_REST(C, _) = Rest, State) when ?is_whitespace(C) -> {_WS, Rest1, State1} = whitespace(Rest, State, []), parse_prolog(Rest1, State1); parse_prolog(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_prolog/2], "expecting < or whitespace"). parse_prolog_1(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_prolog_1/2); parse_prolog_1(?STRING("D") = Bytes, State) -> cf(Bytes, State, fun parse_prolog_1/2); parse_prolog_1(?STRING("DO") = Bytes, State) -> cf(Bytes, State, fun parse_prolog_1/2); parse_prolog_1(?STRING("DOC") = Bytes, State) -> cf(Bytes, State, fun parse_prolog_1/2); parse_prolog_1(?STRING("DOCT") = Bytes, State) -> cf(Bytes, State, fun parse_prolog_1/2); parse_prolog_1(?STRING("DOCTY") = Bytes, State) -> cf(Bytes, State, fun parse_prolog_1/2); parse_prolog_1(?STRING("DOCTYP") = Bytes, State) -> cf(Bytes, State, fun parse_prolog_1/2); parse_prolog_1(?STRING_REST("DOCTYPE", Rest), State) -> {Rest1, State1} = parse_doctype(Rest, State), ok = check_ref_cycle(State1), State2 = event_callback(endDTD, State1), parse_prolog(Rest1, State2); parse_prolog_1(?STRING("-"), State) -> cf(?STRING("-"), State, fun parse_prolog_1/2); parse_prolog_1(?STRING_REST("--", Rest), State) -> {Rest1, State1} = parse_comment(Rest, State, []), parse_prolog(Rest1, State1); parse_prolog_1(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_prolog_1/2], "expecting comment or DOCTYPE"). %%---------------------------------------------------------------------- %% Function: parse_version_info(Rest, State, Acc) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Acc = [{Name, Value}] %% Name = string() %% Value = string() %% Output: Result = {[{Name, Value}], Rest, State} %% Description: Parsing the version number in the XML directive. %% [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") %%---------------------------------------------------------------------- parse_version_info(?STRING_EMPTY, State, Acc) -> cf(?STRING_EMPTY, State, Acc, fun parse_version_info/3); parse_version_info(?STRING_UNBOUND_REST(C, _) = Rest, State, Acc) when ?is_whitespace(C) -> {_WS, Rest1, State1} = whitespace(Rest, State, []), parse_version_info(Rest1, State1, Acc); parse_version_info(?STRING_UNBOUND_REST(C,Rest), State, Acc) -> case is_name_start(C) of true -> case parse_name(Rest, State, [C]) of {"version", Rest1, State1} -> {Rest2, State2} = parse_eq(Rest1, State1), case parse_att_value(Rest2, State2) of {"1." ++ SubVersion, Rest3, State3} -> % any 1.N version is valid but will be handled as 1.0 case lists:all(fun(D) when D >= $0, D =< $9 -> true; (_) -> false end, SubVersion) of true -> parse_xml_decl_rest(Rest3, State3, [{"version","1.0"}|Acc]); false -> ?fatal_error(State3, "unsupported version: 1." ++ SubVersion) end; {Version, _Rest3, State3} -> ?fatal_error(State3, "unsupported version: " ++ Version) end; {_, _, State1} -> ?fatal_error(State1, "expecting attribute version") end; false -> ?fatal_error(State, "expecting attribute version") end; parse_version_info(Bytes, State, Acc) -> unicode_incomplete_check([Bytes, State, Acc, fun parse_version_info/3], undefined). %%---------------------------------------------------------------------- %% Function: parse_xml_decl_rest(Rest, State, Acc) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Acc = [{Name, Value}] %% Name = string() %% Value = string() %% Output: Result = {[{Name, Value}], Rest, State} %% Description: Checks if there is more to parse in the XML directive. %%---------------------------------------------------------------------- parse_xml_decl_rest(?STRING_EMPTY, State, Acc) -> cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_rest/3); parse_xml_decl_rest(?STRING("?") = Rest, State, Acc) -> cf(Rest, State, Acc, fun parse_xml_decl_rest/3); parse_xml_decl_rest(?STRING_REST("?>", Rest), State, Acc) -> {lists:reverse(Acc), Rest, State}; parse_xml_decl_rest(?STRING_UNBOUND_REST(C, _) = Rest, State, Acc) when ?is_whitespace(C) -> {_WS, Rest1, State1} = whitespace(Rest, State, []), parse_xml_decl_encoding(Rest1, State1, Acc); parse_xml_decl_rest(Bytes, State, Acc) -> unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_rest/3], "expecting encoding, standalone, whitespace or ?>"). %%---------------------------------------------------------------------- %% Function: parse_xml_decl_encoding(Rest, State, Acc) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Acc = [{Name, Value}] %% Name = string() %% Value = string() %% Output: Result = {[{Name, Value}], Rest, State} %% Description: Parse the encoding attribute in the XML directive. %% [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) % [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* %%---------------------------------------------------------------------- parse_xml_decl_encoding(?STRING_EMPTY, State, Acc) -> cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_encoding/3); parse_xml_decl_encoding(?STRING_REST("e", Rest), State, Acc) -> case parse_name(Rest, State,[$e]) of {"encoding", Rest1, State1} -> {Rest2, State2} = parse_eq(Rest1, State1), {Enc, Rest3, State3} = parse_att_value(Rest2, State2), parse_xml_decl_encoding_1(Rest3, State3, [{"encoding",Enc} |Acc]); {Name, _Rest1, State1} -> ?fatal_error(State1, "Attribute " ++ Name ++ " not allowed in xml declaration") end; parse_xml_decl_encoding(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> parse_xml_decl_standalone(Bytes, State, Acc); parse_xml_decl_encoding(Bytes, State, Acc) -> unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_encoding/3], undefined). parse_xml_decl_encoding_1(?STRING_EMPTY, State, Acc) -> cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_encoding_1/3); parse_xml_decl_encoding_1(?STRING_UNBOUND_REST(C, _) = Bytes, State, Acc) when ?is_whitespace(C) -> {_WS, Rest1, State1} = whitespace(Bytes, State, []), parse_xml_decl_standalone(Rest1, State1, Acc); parse_xml_decl_encoding_1(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> parse_xml_decl_rest(Bytes, State, Acc); parse_xml_decl_encoding_1(Bytes, State, Acc) -> unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_encoding_1/3], undefined). %%---------------------------------------------------------------------- %% Function: parse_xml_decl_standalone(Rest, State, Acc) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Acc = [{Name, Value}] %% Name = string() %% Value = string() %% Output: Result = {[{Name, Value}], Rest, State} %% Description: Parse the standalone attribute in the XML directive. %% [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | %% ('"' ('yes' | 'no') '"')) %%---------------------------------------------------------------------- parse_xml_decl_standalone(?STRING_EMPTY, State, Acc) -> cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_standalone/3); parse_xml_decl_standalone(?STRING_REST("s", Rest), State, Acc) -> case parse_name(Rest, State,[$s]) of {"standalone", Rest1, State1} -> {Rest2, State2} = parse_eq(Rest1, State1), {Standalone, Rest3, State3} = parse_att_value(Rest2, State2), case Standalone of "yes" -> ok; "no" -> ok; _ -> ?fatal_error(State3, "Wrong value of attribute standalone in xml declaration, must be yes or no") end, {_WS, Rest4, State4} = whitespace(Rest3, State3, []), parse_xml_decl_rest(Rest4, State4#xmerl_sax_parser_state{standalone=list_to_atom(Standalone)}, [{"standalone",Standalone} |Acc]); {Name, _Rest1, State1} -> ?fatal_error(State1, "Attribute " ++ Name ++ " not allowed in xml declaration") end; parse_xml_decl_standalone(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> parse_xml_decl_rest(Bytes, State, Acc); parse_xml_decl_standalone(Bytes, State, Acc) -> unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_standalone/3], undefined). %%---------------------------------------------------------------------- %% Function: parse_pi(Rest, State) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Output: Result = {Rest, State} %% Description: Parse processing instructions. %% [16] PI ::= '' Char*)))? '?>' %% [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) %%---------------------------------------------------------------------- parse_pi(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_pi/2); parse_pi(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) -> case is_name_start(C) of true -> {PiTarget, Rest1, State1} = parse_name(Rest, State, [C]), case string:to_lower(PiTarget) of "xml" -> case check_if_new_doc_allowed(State#xmerl_sax_parser_state.input_type, State#xmerl_sax_parser_state.end_tags) of true -> {endDocument, Bytes, State}; false -> ?fatal_error(State1, " not first in document") end; _ -> {PiData, Rest2, State2} = parse_pi_1(Rest1, State1), State3 = event_callback({processingInstruction, PiTarget, PiData}, State2), {Rest2, State3} end; false -> ?fatal_error(State, "expecting name") end; parse_pi(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_pi/2], undefined). check_if_new_doc_allowed(stream, []) -> true; check_if_new_doc_allowed(_, _) -> false. check_if_rest_ok(file, []) -> true; check_if_rest_ok(file, <<>>) -> true; check_if_rest_ok(stream, _) -> true; check_if_rest_ok(_, _) -> false. %%---------------------------------------------------------------------- %% Function: parse_pi_1(Rest, State) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Output: Result = {Rest, State} %% Description: Parse processing instructions. %%---------------------------------------------------------------------- parse_pi_1(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_pi_1/2); parse_pi_1(?STRING("?") = Rest, State) -> cf(Rest, State, fun parse_pi_1/2); parse_pi_1(?STRING_UNBOUND_REST(C,_) = Rest, State) when ?is_whitespace(C) -> {_WS, Rest1, State1} = whitespace(Rest, State, []), parse_pi_data(Rest1, State1, []); parse_pi_1(?STRING_REST("?>", Rest), State) -> {[], Rest, State}; parse_pi_1(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_pi/2], "expecting whitespace or '?>'"). %%---------------------------------------------------------------------- %% Function: parse_name(Rest, State, Acc) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Acc = string() %% Output: Result = {Name, Rest, State} %% Name = string() %% Description: Parse a name. Next character is put in the accumulator %% if it's a valid name character. %% [5] Name ::= (Letter | '_' | ':') (NameChar)* %%---------------------------------------------------------------------- parse_name(?STRING_EMPTY, State, Acc) -> cf(?STRING_EMPTY, State, Acc, fun parse_name/3); parse_name(?STRING_UNBOUND_REST(C, Rest) = Bytes, State, Acc) -> case is_name_char(C) of true -> parse_name(Rest, State, [C|Acc]); false -> {lists:reverse(Acc), Bytes, State} end; parse_name(Bytes, State, Acc) -> unicode_incomplete_check([Bytes, State, Acc, fun parse_name/3], undefined). %%---------------------------------------------------------------------- %% Function: parse_ns_name(Rest, State, Prefix, Name) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Prefix = string() %% Name = string() %% Output: Result = {{Prefix, Name}, Rest, State} %% Name = string() %% Description: Parse a namespace name. Next character is put in the %% accumulator if it's a valid name character. %% The difference between this function and parse_name/3 is %% that a colon is interpreted as a separator between the %% namespace prefix and the name. %%---------------------------------------------------------------------- parse_ns_name(?STRING_EMPTY, State, Prefix, Name) -> cf(?STRING_EMPTY, State, Prefix, Name, fun parse_ns_name/4); parse_ns_name(?STRING_UNBOUND_REST($:, Rest), State, [], Name) -> parse_ns_name(Rest, State, lists:reverse(Name), []); parse_ns_name(?STRING_UNBOUND_REST(C, Rest) = Bytes, State, Prefix, Name) -> case is_name_char(C) of true -> parse_ns_name(Rest, State, Prefix, [C|Name]); false -> {{Prefix,lists:reverse(Name)}, Bytes, State} end; parse_ns_name(Bytes, State, Prefix, Name) -> unicode_incomplete_check([Bytes, State, Prefix, Name, fun parse_ns_name/4], undefined). %%---------------------------------------------------------------------- %% Function: parse_pi_data(Rest, State, Acc) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Acc = string() %% Output: Result = {PiData, Rest, State} %% PiData = string() %% Description: Parse the data part of the processing instruction. %% If next character is valid it's put in the accumulator. %%---------------------------------------------------------------------- parse_pi_data(?STRING_EMPTY, State, Acc) -> cf(?STRING_EMPTY, State, Acc, fun parse_pi_data/3); parse_pi_data(?STRING("?") = Bytes, State, Acc) -> cf(Bytes, State, Acc, fun parse_pi_data/3); parse_pi_data(?STRING("\r") = Bytes, State, Acc) -> cf(Bytes, State, Acc, fun parse_pi_data/3); parse_pi_data(?STRING_REST("?>", Rest), State, Acc) -> {lists:reverse(Acc), Rest, State}; parse_pi_data(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); parse_pi_data(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); parse_pi_data(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); parse_pi_data(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_char(C)-> parse_pi_data(Rest, State, [C|Acc]); parse_pi_data(Bytes, State, Acc) -> unicode_incomplete_check([Bytes, State, Acc, fun parse_pi_data/3], "not an character"). %%---------------------------------------------------------------------- %% Function: parse_cdata(Rest, State) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Output: Result = {Rest, State} %% Description: Start the parsing of a CDATA block. %% [18] CDSect ::= CDStart CData CDEnd %% [19] CDStart ::= '' Char*)) %% [21] CDEnd ::= ']]>' %%---------------------------------------------------------------------- parse_cdata(?STRING_EMPTY, State) -> cf(?STRING_EMPTY, State, fun parse_cdata/2); parse_cdata(?STRING("[") = Bytes, State) -> cf(Bytes, State, fun parse_cdata/2); parse_cdata(?STRING("[C") = Bytes, State) -> cf(Bytes, State, fun parse_cdata/2); parse_cdata(?STRING("[CD") = Bytes, State) -> cf(Bytes, State, fun parse_cdata/2); parse_cdata(?STRING("[CDA") = Bytes, State) -> cf(Bytes, State, fun parse_cdata/2); parse_cdata(?STRING("[CDAT") = Bytes, State) -> cf(Bytes, State, fun parse_cdata/2); parse_cdata(?STRING("[CDATA") = Bytes, State) -> cf(Bytes, State, fun parse_cdata/2); parse_cdata(?STRING_REST("[CDATA[", Rest), State) -> State1 = event_callback(startCDATA, State), parse_cdata(Rest, State1, []); parse_cdata(Bytes, State) -> unicode_incomplete_check([Bytes, State, fun parse_cdata/2], "expecting comment or CDATA"). %%---------------------------------------------------------------------- %% Function: parse_cdata(Rest, State, Acc) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Acc = string() %% Output: Result = {Rest, State} %% Description: Parse a CDATA block. %%---------------------------------------------------------------------- parse_cdata(?STRING_EMPTY, State, Acc) -> cf(?STRING_EMPTY, State, Acc, fun parse_cdata/3); parse_cdata(?STRING("\r") = Bytes, State, Acc) -> cf(Bytes, State, Acc, fun parse_cdata/3); parse_cdata(?STRING("]") = Bytes, State, Acc) -> cf(Bytes, State, Acc, fun parse_cdata/3); parse_cdata(?STRING("]]") = Bytes, State, Acc) -> cf(Bytes, State, Acc, fun parse_cdata/3); parse_cdata(?STRING_REST("]]>", Rest), State, Acc) -> State1 = event_callback({characters, lists:reverse(Acc)}, State), State2 = event_callback(endCDATA, State1), parse_content(Rest, State2, [], true); parse_cdata(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); parse_cdata(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); parse_cdata(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); parse_cdata(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_char(C) -> parse_cdata(Rest, State, [C|Acc]); parse_cdata(?STRING_UNBOUND_REST(C, _), State, _) -> ?fatal_error(State, "CDATA contains bad character value: " ++ [C]); parse_cdata(Bytes, State, Acc) -> unicode_incomplete_check([Bytes, State, Acc, fun parse_cdata/3], undefined). %%---------------------------------------------------------------------- %% Function: parse_comment(Rest, State, Acc) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Acc = string() %% Output: Result = {Rest, State} %% Description: Parse a comment. %% [15] Comment ::= '' %%---------------------------------------------------------------------- parse_comment(?STRING_EMPTY, State, Acc) -> cf(?STRING_EMPTY, State, Acc, fun parse_comment/3); parse_comment(?STRING("\r") = Bytes, State, Acc) -> cf(Bytes, State, Acc, fun parse_comment/3); parse_comment(?STRING("-") = Bytes, State, Acc) -> cf(Bytes, State, Acc, fun parse_comment/3); parse_comment(?STRING("--") = Bytes, State, Acc) -> cf(Bytes, State, Acc, fun parse_comment/3); parse_comment(?STRING_REST("-->", Rest), State, Acc) -> State1 = event_callback({comment, lists:reverse(Acc)}, State), {Rest, State1}; parse_comment(?STRING_REST("--", _), State, _) -> ?fatal_error(State, "comment contains '--'"); parse_comment(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]); parse_comment(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]); parse_comment(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]); parse_comment(?STRING_UNBOUND_REST(C, Rest), State, Acc) -> if ?is_char(C) -> parse_comment(Rest, State, [C|Acc]); true -> ?fatal_error(State, "Bad character in comment: " ++ C) end; parse_comment(Bytes, State, Acc) -> unicode_incomplete_check([Bytes, State, Acc, fun parse_comment/3], undefined). %%---------------------------------------------------------------------- %% Function: parse_misc(Rest, State, Eod) -> Result %% Input: Rest = string() | binary() %% State = #xmerl_sax_parser_state{} %% Eod = true |false %% Output: Result = {Rest, State} %% Description: Parse a misc clause, could be a comment, a processing %% instruction or whitespace. If the input stream is empty %% (Eod parameter true) then we return current state and quit. %% [27] Misc ::= Comment | PI | S %%---------------------------------------------------------------------- parse_misc(?STRING_EMPTY, State, true) -> {?STRING_EMPTY, State}; parse_misc(?STRING_EMPTY, State, Eod) -> cf(?STRING_EMPTY, State, Eod, fun parse_misc/3); parse_misc(?STRING("<") = Rest, State, Eod) -> cf(Rest, State, Eod, fun parse_misc/3); parse_misc(?STRING_REST(" case parse_pi(Rest, State) of {Rest1, State1} -> parse_misc(Rest1, State1, Eod); {endDocument, _Rest1, State1} -> IValue = ?TO_INPUT_FORMAT(" cf(Rest, State, Eod, fun parse_misc/3); parse_misc(?STRING(" cf(Rest, State, Eod, fun parse_misc/3); parse_misc(?STRING_REST("