from pygments.lexer import RegexLexer from pygments.token import Text, Comment, Operator, Name, String from pygments.lexers import _lexer_cache from pygments.lexers._mapping import LEXERS def italic_attr(lexer, m): yield m.start(), String, '"' yield m.start() + 2, Comment, m.group()[3:-1] yield m.end() - 1, String, '"' def italic_generic(lexer, m): yield m.start(), Comment, m.group() def italic_tag(lexer, m): yield m.start(), Name.Tag, "<" name = m.group()[3:] if name.endswith(">"): yield m.start() + 1, Comment, name[:-1] yield m.end() - 1, Name.Tag, ">" else: yield m.start() + 1, Comment, name def italic_tag_close(lexer, m): yield m.start(), Name.Tag, "" def repeat_tag_close(lexer, m): before, _, after = m.group().partition("[]") yield m.start(), Name.Tag, before yield m.start() + len(before), Operator, "[]" yield m.start() + len(before) + 2, Name.Tag, after def italic_attr_name(lexer, m): name, _, after = m.group().partition("*") yield m.start(), Name.Attribute, name yield m.start() + len(name), Operator, "*" yield m.start() + len(name) + 1, Name.Attribute, after class CustomXMLLexer(RegexLexer): name = "customxml" aliases = ["cxml"] tokens = { 'root': [ (r'\s*\.\.\.\w*', Comment), ('[^<&]+', Text), (r'&\S*?;', Name.Entity), (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc), (r'', Comment.Multiline), (r'<\?.*?\?>', Comment.Preproc), (']*>', Comment.Preproc), (r'<\s*[\w:.-]+', Name.Tag, 'tag'), (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag), (r'<\s*\?\?[\w:.-]+', italic_tag, 'tag'), (r'<\s*/\s*\?\?[\w:.-]+\s*>', italic_tag_close), (r'<\s*/\s*[\w:.-]+\[\]\s*>', repeat_tag_close), ], 'tag': [ (r'\*', Operator), (r'\[\]', Operator), (r'\s+', Text), (r'\.\.\.\w*', italic_generic), (r'[\w.:-]+\s*=', Name.Attribute, 'attr'), (r'[\w.:-]+\*\s*=', italic_attr_name, 'attr'), (r'/?\s*>', Name.Tag, '#pop'), ], 'attr': [ (r'\s+', Text), (r'"\?\?[^"]*?"', italic_attr, "#pop"), ('".*?"', String, '#pop'), ("'.*?'", String, '#pop'), (r'[^\s>]+', String, '#pop'), ], } def analyse_text(text): print("hi?") _lexer_cache[CustomXMLLexer.__name__] = CustomXMLLexer LEXERS["CustomXMLLexer"] = ("xml_lexer", "CustomXMLLexer", ("cxml", ), (), ()) __all__ = ("CustomXMLLexer", )