diff options
Diffstat (limited to 'test/tags4.lm')
-rw-r--r-- | test/tags4.lm | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/test/tags4.lm b/test/tags4.lm new file mode 100644 index 0000000..f710378 --- /dev/null +++ b/test/tags4.lm @@ -0,0 +1,350 @@ +##### LM ##### +# +# +# This is somewhat broken. missing_close_id is cuasing close ids to be parseed +# when they shouldn't. Maybe remove it. +# +# + +context tags + # + # Regular Definitions + # + rl def_name_char /[\-A-Za-z0-9._:?]/ + rl def_name /[A-Za-z_:] def_name_char*/ + rl def_system_literal /'"' [^"]* '"' | "'" [^']* "'"/ + + # + # Scanner for tag names. + # + lex + ignore /space+/ + token tag_id /def_name/ + end + + # + # Scanner for attributes names + # + lex + ignore /space+/ + token attr_name /def_name_char+/ + literal `= + end + + literal `> `/> + + # Scanner for attribute values. + lex + ignore /space+/ + token dquote_val /'"' ([^"] | '\\' any)* '"'/ + token squote_val /"'" ([^'] | '\\' any)* "'"/ + token unq_val /[^ \t\r\n<>"'] [^ \t\r\n<>]*/ + end + + # + # Tokens + # + + lex + ignore /space+/ + + literal `< `</ `<!DOCTYPE + token close_tag + /'</' [\t ]* [a-zA-Z]+ '>'/ + + token doc_data /[^<]+/ + token comment /'<!--' any* :>> '-->'/ + end + + # + # Tags + # + + bool inTagStack( id: str ) + { + LocalTagStack: tag_stack = TagStack + for Tag: tag_id in LocalTagStack { + if id == Tag.data + return true + } + return false + } + + # This scanner is just for the id in close tags. The id needs to be looked up + # in the tag stack so we can determine if it is a stray. + lex + # Ignore whitespace. + ignore /space+/ + + token stray_close_id // + token missing_close_id // + + token close_id /def_name/ + { + # If it is in the tag stack then it is a close_id. If not then it's a + # stray_close_id. + send_id: int = typeid<stray_close_id> + + if ( inTagStack( match_text ) ) { + print( 'CLOSE \'' match_text '\' IN TAG STACK\n' ) + + # The tag is in the stack, send missing close tags until we get to it. + match TagStack [Top:tag_id Rest:tag_stack] + TagStack = Rest + while ( Top.data != match_text ) { + print( 'SENDING missing close\n' ) + input.push( make_token( typeid<missing_close_id> '' ) ) + match TagStack [Top2:tag_id Rest2:tag_stack] + Top = Top2 + TagStack = Rest2 + } + + print( 'SENDING close\n' ) + input.push( make_token( typeid<close_id> input.pull( match_length ) ) ) + } + else { + print( 'CLOSE \'' match_text '\' NOT IN TAG STACK\n' ) + # The tag is not in the tag stack so send the id as a stray close. + input.push( make_token( typeid<stray_close> input.pull( match_length ) ) ) + } + } + end + + # + # Tag Stack + # + + def tag_stack + [tag_id tag_stack] + | [] + + TagStack: tag_stack + + # + # Document Type + # + # This scanner handles inside DOCTYPE tags (except keywords). + lex + ignore /space+/ + token dt_name /def_name/ + token dt_literal /def_system_literal/ + token dt_bl /"[" [^\]]* "]"/ + token dt_close /'>'/ + end + + # Using a separate scanner for the keywords in DOCTYPE prevents them from + # covering dt_name + lex + ignore /space+/ + literal `SYSTEM `PUBLIC + end + + def DOCTYPE [`<!DOCTYPE dt_name external_id dt_bl? dt_close] + + def external_id + [`SYSTEM dt_literal?] + | [`PUBLIC dt_literal dt_literal?] + + # + # Tags, with optionanal close. + # + + def tag + [open_tag item* close_tag] + + def unclosed_tag + [open_tag item* missing_close_id] + + def open_tag + [`< tag_id attr* `>] + { + TagStack = construct tag_stack + [r2 TagStack] + } + + # + # Empty tags + # + def empty_tag + [`< tag_id attr* `/>] + + # + # Stray close tags + # + def stray_close + [close_tag] + + + # + # Attributes + # + + def attr + [attr_name eql_attr_val?] + + def eql_attr_val [`= attr_val] + + def attr_val + [squote_val] + | [dquote_val] + | [unq_val] + | [] + + # + # Items + # + + def item + [DOCTYPE] + | [tag] + | [unclosed_tag] + | [empty_tag] + | [stray_close] + | [doc_data] + | [comment] + + + token trailing /any*/ + + def start + [item* trailing] + + # + # END GRAMMAR + # + + int addDefaultAltTags( Start: ref<start> ) + { + for T: open_tag in Start { + require T + ["<img" AttrList: attr* '>'] + + haveAlt: bool = false + for A: attr in T { + if match A ["alt=" attr_val] + haveAlt = true + } + + if !haveAlt { + for AL: attr* in T { + if match AL [] { + AL = construct attr* + [" alt=\"default alt\""] + break + } + } + } + } + } + + int printLinks( Start: start ) + { + for A:tag in Start { + require A + ["<a" AttrList: attr* ">" I: item* "</a>"] + + for Attr: attr in AttrList { + if match Attr ["href = " AttrVal: attr_val] + print( 'link: ' I '\ntarget: ' AttrVal '\n\n' ) + } + } + } + + + bool should_close( TI: tag_id ) + { + return true + } + + bool should_flatten( TI: tag_id ) + { + return true + } +end # tags + +# Finds unclosed tags and puts the content after the tag. Afterwards +# all unclosed tags will be empty 'inside'. +#int flatten( Start: ref<start> ) +#{ +# for TL: item* in Start { +# require TL +# [OT: open_tag Inside: item* Trailing: item*] +# +# match OT +# ['<' TagId: tag_id attr* '>'] +# +# if should_flatten( TagId ) +# { +# require Inside +# [item item*] +# +# # Put Trailing at the end of inside. +# for END: item* in Inside { +# if match END [] { +# END = Trailing +# break +# } +# } +# +# str empty = '' +# missing_close_id Missing = construct missing_close_id [empty] +# opt_close_tag EmptyCloseTag = +# construct opt_close_tag [Missing] +# +# # Close the tag and put inside after it. +# TL = construct item* +# [OT EmptyCloseTag Inside] +# } +# } +#} +# +#int close( Start: ref<start> ) +#{ +# for TL: item in Start { +# require TL +# [OpenTag: open_tag Inside: item*] +# +# match OpenTag +# ['<' TagId: tag_id attr* '>'] +# +# if should_close( TagId ) +# { +# close_id CloseId = construct close_id +# [TagId.data] +# +# opt_close_tag CloseTag = +# construct opt_close_tag ['</' CloseId '>'] +# +# # Close the tag and put inside after it. +# TL = construct item +# [OpenTag Inside CloseTag] +# } +# } +#} + +cons Tags: tags[] +Tags.TagStack = construct tags::tag_stack [] +parse HTML: tags::start(Tags)[ stdin ] +print( HTML ) + +#print_xml( HTML ) +#for C: close_tag in HTML +# print( C '\n' ) +##### IN ##### +<t1> + + <t2> + <a href="foo">&FOO</a> + <t3> + </t3> + +</t1> +##### EXP ##### +<t1> + + <t2> + <a href="foo">&FOO</a> + <t3> + </t3> + +</t1> |