summaryrefslogtreecommitdiff
path: root/test/tags4.lm
diff options
context:
space:
mode:
Diffstat (limited to 'test/tags4.lm')
-rw-r--r--test/tags4.lm350
1 files changed, 350 insertions, 0 deletions
diff --git a/test/tags4.lm b/test/tags4.lm
new file mode 100644
index 0000000..f710378
--- /dev/null
+++ b/test/tags4.lm
@@ -0,0 +1,350 @@
+##### LM #####
+#
+#
+# This is somewhat broken. missing_close_id is cuasing close ids to be parseed
+# when they shouldn't. Maybe remove it.
+#
+#
+
+context tags
+ #
+ # Regular Definitions
+ #
+ rl def_name_char /[\-A-Za-z0-9._:?]/
+ rl def_name /[A-Za-z_:] def_name_char*/
+ rl def_system_literal /'"' [^"]* '"' | "'" [^']* "'"/
+
+ #
+ # Scanner for tag names.
+ #
+ lex
+ ignore /space+/
+ token tag_id /def_name/
+ end
+
+ #
+ # Scanner for attributes names
+ #
+ lex
+ ignore /space+/
+ token attr_name /def_name_char+/
+ literal `=
+ end
+
+ literal `> `/>
+
+ # Scanner for attribute values.
+ lex
+ ignore /space+/
+ token dquote_val /'"' ([^"] | '\\' any)* '"'/
+ token squote_val /"'" ([^'] | '\\' any)* "'"/
+ token unq_val /[^ \t\r\n<>"'] [^ \t\r\n<>]*/
+ end
+
+ #
+ # Tokens
+ #
+
+ lex
+ ignore /space+/
+
+ literal `< `</ `<!DOCTYPE
+ token close_tag
+ /'</' [\t ]* [a-zA-Z]+ '>'/
+
+ token doc_data /[^<]+/
+ token comment /'<!--' any* :>> '-->'/
+ end
+
+ #
+ # Tags
+ #
+
+ bool inTagStack( id: str )
+ {
+ LocalTagStack: tag_stack = TagStack
+ for Tag: tag_id in LocalTagStack {
+ if id == Tag.data
+ return true
+ }
+ return false
+ }
+
+ # This scanner is just for the id in close tags. The id needs to be looked up
+ # in the tag stack so we can determine if it is a stray.
+ lex
+ # Ignore whitespace.
+ ignore /space+/
+
+ token stray_close_id //
+ token missing_close_id //
+
+ token close_id /def_name/
+ {
+ # If it is in the tag stack then it is a close_id. If not then it's a
+ # stray_close_id.
+ send_id: int = typeid<stray_close_id>
+
+ if ( inTagStack( match_text ) ) {
+ print( 'CLOSE \'' match_text '\' IN TAG STACK\n' )
+
+ # The tag is in the stack, send missing close tags until we get to it.
+ match TagStack [Top:tag_id Rest:tag_stack]
+ TagStack = Rest
+ while ( Top.data != match_text ) {
+ print( 'SENDING missing close\n' )
+ input.push( make_token( typeid<missing_close_id> '' ) )
+ match TagStack [Top2:tag_id Rest2:tag_stack]
+ Top = Top2
+ TagStack = Rest2
+ }
+
+ print( 'SENDING close\n' )
+ input.push( make_token( typeid<close_id> input.pull( match_length ) ) )
+ }
+ else {
+ print( 'CLOSE \'' match_text '\' NOT IN TAG STACK\n' )
+ # The tag is not in the tag stack so send the id as a stray close.
+ input.push( make_token( typeid<stray_close> input.pull( match_length ) ) )
+ }
+ }
+ end
+
+ #
+ # Tag Stack
+ #
+
+ def tag_stack
+ [tag_id tag_stack]
+ | []
+
+ TagStack: tag_stack
+
+ #
+ # Document Type
+ #
+ # This scanner handles inside DOCTYPE tags (except keywords).
+ lex
+ ignore /space+/
+ token dt_name /def_name/
+ token dt_literal /def_system_literal/
+ token dt_bl /"[" [^\]]* "]"/
+ token dt_close /'>'/
+ end
+
+ # Using a separate scanner for the keywords in DOCTYPE prevents them from
+ # covering dt_name
+ lex
+ ignore /space+/
+ literal `SYSTEM `PUBLIC
+ end
+
+ def DOCTYPE [`<!DOCTYPE dt_name external_id dt_bl? dt_close]
+
+ def external_id
+ [`SYSTEM dt_literal?]
+ | [`PUBLIC dt_literal dt_literal?]
+
+ #
+ # Tags, with optionanal close.
+ #
+
+ def tag
+ [open_tag item* close_tag]
+
+ def unclosed_tag
+ [open_tag item* missing_close_id]
+
+ def open_tag
+ [`< tag_id attr* `>]
+ {
+ TagStack = construct tag_stack
+ [r2 TagStack]
+ }
+
+ #
+ # Empty tags
+ #
+ def empty_tag
+ [`< tag_id attr* `/>]
+
+ #
+ # Stray close tags
+ #
+ def stray_close
+ [close_tag]
+
+
+ #
+ # Attributes
+ #
+
+ def attr
+ [attr_name eql_attr_val?]
+
+ def eql_attr_val [`= attr_val]
+
+ def attr_val
+ [squote_val]
+ | [dquote_val]
+ | [unq_val]
+ | []
+
+ #
+ # Items
+ #
+
+ def item
+ [DOCTYPE]
+ | [tag]
+ | [unclosed_tag]
+ | [empty_tag]
+ | [stray_close]
+ | [doc_data]
+ | [comment]
+
+
+ token trailing /any*/
+
+ def start
+ [item* trailing]
+
+ #
+ # END GRAMMAR
+ #
+
+ int addDefaultAltTags( Start: ref<start> )
+ {
+ for T: open_tag in Start {
+ require T
+ ["<img" AttrList: attr* '>']
+
+ haveAlt: bool = false
+ for A: attr in T {
+ if match A ["alt=" attr_val]
+ haveAlt = true
+ }
+
+ if !haveAlt {
+ for AL: attr* in T {
+ if match AL [] {
+ AL = construct attr*
+ [" alt=\"default alt\""]
+ break
+ }
+ }
+ }
+ }
+ }
+
+ int printLinks( Start: start )
+ {
+ for A:tag in Start {
+ require A
+ ["<a" AttrList: attr* ">" I: item* "</a>"]
+
+ for Attr: attr in AttrList {
+ if match Attr ["href = " AttrVal: attr_val]
+ print( 'link: ' I '\ntarget: ' AttrVal '\n\n' )
+ }
+ }
+ }
+
+
+ bool should_close( TI: tag_id )
+ {
+ return true
+ }
+
+ bool should_flatten( TI: tag_id )
+ {
+ return true
+ }
+end # tags
+
+# Finds unclosed tags and puts the content after the tag. Afterwards
+# all unclosed tags will be empty 'inside'.
+#int flatten( Start: ref<start> )
+#{
+# for TL: item* in Start {
+# require TL
+# [OT: open_tag Inside: item* Trailing: item*]
+#
+# match OT
+# ['<' TagId: tag_id attr* '>']
+#
+# if should_flatten( TagId )
+# {
+# require Inside
+# [item item*]
+#
+# # Put Trailing at the end of inside.
+# for END: item* in Inside {
+# if match END [] {
+# END = Trailing
+# break
+# }
+# }
+#
+# str empty = ''
+# missing_close_id Missing = construct missing_close_id [empty]
+# opt_close_tag EmptyCloseTag =
+# construct opt_close_tag [Missing]
+#
+# # Close the tag and put inside after it.
+# TL = construct item*
+# [OT EmptyCloseTag Inside]
+# }
+# }
+#}
+#
+#int close( Start: ref<start> )
+#{
+# for TL: item in Start {
+# require TL
+# [OpenTag: open_tag Inside: item*]
+#
+# match OpenTag
+# ['<' TagId: tag_id attr* '>']
+#
+# if should_close( TagId )
+# {
+# close_id CloseId = construct close_id
+# [TagId.data]
+#
+# opt_close_tag CloseTag =
+# construct opt_close_tag ['</' CloseId '>']
+#
+# # Close the tag and put inside after it.
+# TL = construct item
+# [OpenTag Inside CloseTag]
+# }
+# }
+#}
+
+cons Tags: tags[]
+Tags.TagStack = construct tags::tag_stack []
+parse HTML: tags::start(Tags)[ stdin ]
+print( HTML )
+
+#print_xml( HTML )
+#for C: close_tag in HTML
+# print( C '\n' )
+##### IN #####
+<t1>
+
+ <t2>
+ <a href="foo">&FOO</a>
+ <t3>
+ </t3>
+
+</t1>
+##### EXP #####
+<t1>
+
+ <t2>
+ <a href="foo">&FOO</a>
+ <t3>
+ </t3>
+
+</t1>