diff options
Diffstat (limited to 'tdemarkdown/md4c/test/pathological_tests.py')
-rwxr-xr-x | tdemarkdown/md4c/test/pathological_tests.py | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/tdemarkdown/md4c/test/pathological_tests.py b/tdemarkdown/md4c/test/pathological_tests.py new file mode 100755 index 000000000..76cb9dfc0 --- /dev/null +++ b/tdemarkdown/md4c/test/pathological_tests.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import re +import argparse +import sys +import platform +from cmark import CMark +from timeit import default_timer as timer + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Run cmark tests.') + parser.add_argument('-p', '--program', dest='program', nargs='?', default=None, + help='program to test') + parser.add_argument('--library-dir', dest='library_dir', nargs='?', + default=None, help='directory containing dynamic library') + args = parser.parse_args(sys.argv[1:]) + +cmark = CMark(prog=args.program, library_dir=args.library_dir) + +# list of pairs consisting of input and a regex that must match the output. +pathological = { + # note - some pythons have limit of 65535 for {num-matches} in re. + "U+0000": + ("abc\u0000de\u0000", + re.compile("abc\ufffd?de\ufffd?")), + "U+FEFF (Unicode BOM)": + ("\ufefffoo", + re.compile("<p>foo</p>")), + "nested strong emph": + (("*a **a " * 65000) + "b" + (" a** a*" * 65000), + re.compile("(<em>a <strong>a ){65000}b( a</strong> a</em>){65000}")), + "many emph closers with no openers": + (("a_ " * 65000), + re.compile("(a[_] ){64999}a_")), + "many emph openers with no closers": + (("_a " * 65000), + re.compile("(_a ){64999}_a")), + "many 3-emph openers with no closers": + (("a***" * 65000), + re.compile("(a<em><strong>a</strong></em>){32500}")), + "many link closers with no openers": + (("a]" * 65000), + re.compile("(a\]){65000}")), + "many link openers with no closers": + (("[a" * 65000), + re.compile("(\[a){65000}")), + "mismatched openers and closers": + (("*a_ " * 50000), + re.compile("([*]a[_] ){49999}[*]a_")), + "openers and closers multiple of 3": + (("a**b" + ("c* " * 50000)), + re.compile("a[*][*]b(c[*] ){49999}c[*]")), + "link openers and emph closers": + (("[ a_" * 50000), + re.compile("(\[ a_){50000}")), + "hard link/emph case": + ("**x [a*b**c*](d)", + re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")), + "nested brackets": + (("[" * 50000) + "a" + ("]" * 50000), + re.compile("\[{50000}a\]{50000}")), + "nested block quotes": + ((("> " * 50000) + "a"), + re.compile("(<blockquote>\r?\n){50000}")), + "backticks": + ("".join(map(lambda x: ("e" + "`" * x), range(1,1000))), + re.compile("^<p>[e`]*</p>\r?\n$")), + "many links": + ("[t](/u) " * 50000, + re.compile("(<a href=\"/u\">t</a> ?){50000}")), + "many references": + ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,20000 * 16))) + "[0] " * 20000, + re.compile("(\[0\] ){19999}")), + "deeply nested lists": + ("".join(map(lambda x: (" " * x + "* a\n"), range(0,1000))), + re.compile("<ul>\r?\n(<li>a<ul>\r?\n){999}<li>a</li>\r?\n</ul>\r?\n(</li>\r?\n</ul>\r?\n){999}")), + "many html openers and closers": + (("<>" * 50000), + re.compile("(<>){50000}")), + "many html proc. inst. openers": + (("x" + "<?" * 50000), + re.compile("x(<\\?){50000}")), + "many html CDATA openers": + (("x" + "<![CDATA[" * 50000), + re.compile("x(<!\\[CDATA\\[){50000}")), + "many backticks and escapes": + (("\\``" * 50000), + re.compile("(``){50000}")), + "many broken link titles": + (("[ (](" * 50000), + re.compile("(\[ \(\]\(){50000}")), + "broken thematic break": + (("* " * 50000 + "a"), + re.compile("<ul>\r?\n(<li><ul>\r?\n){49999}<li>a</li>\r?\n</ul>\r?\n(</li>\r?\n</ul>\r?\n){49999}")), + "nested invalid link references": + (("[" * 50000 + "]" * 50000 + "\n\n[a]: /b"), + re.compile("\[{50000}\]{50000}")) +} + +whitespace_re = re.compile('/s+/') +passed = 0 +errored = 0 +failed = 0 + +#print("Testing pathological cases:") +for description in pathological: + (inp, regex) = pathological[description] + start = timer() + [rc, actual, err] = cmark.to_html(inp) + end = timer() + if rc != 0: + errored += 1 + print('{:35} [ERRORED (return code %d)]'.format(description, rc)) + print(err) + elif regex.search(actual): + print('{:35} [PASSED] {:.3f} secs'.format(description, end-start)) + passed += 1 + else: + print('{:35} [FAILED]'.format(description)) + print(repr(actual)) + failed += 1 + +print("%d passed, %d failed, %d errored" % (passed, failed, errored)) +if (failed == 0 and errored == 0): + exit(0) +else: + exit(1) |