1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
import argparse
import sys
import platform
from cmark import CMark
from timeit import default_timer as timer
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run cmark tests.')
parser.add_argument('-p', '--program', dest='program', nargs='?', default=None,
help='program to test')
parser.add_argument('--library-dir', dest='library_dir', nargs='?',
default=None, help='directory containing dynamic library')
args = parser.parse_args(sys.argv[1:])
cmark = CMark(prog=args.program, library_dir=args.library_dir)
# list of pairs consisting of input and a regex that must match the output.
pathological = {
# note - some pythons have limit of 65535 for {num-matches} in re.
"U+0000":
("abc\u0000de\u0000",
re.compile("abc\ufffd?de\ufffd?")),
"U+FEFF (Unicode BOM)":
("\ufefffoo",
re.compile("<p>foo</p>")),
"nested strong emph":
(("*a **a " * 65000) + "b" + (" a** a*" * 65000),
re.compile("(<em>a <strong>a ){65000}b( a</strong> a</em>){65000}")),
"many emph closers with no openers":
(("a_ " * 65000),
re.compile("(a[_] ){64999}a_")),
"many emph openers with no closers":
(("_a " * 65000),
re.compile("(_a ){64999}_a")),
"many 3-emph openers with no closers":
(("a***" * 65000),
re.compile("(a<em><strong>a</strong></em>){32500}")),
"many link closers with no openers":
(("a]" * 65000),
re.compile("(a\]){65000}")),
"many link openers with no closers":
(("[a" * 65000),
re.compile("(\[a){65000}")),
"mismatched openers and closers":
(("*a_ " * 50000),
re.compile("([*]a[_] ){49999}[*]a_")),
"openers and closers multiple of 3":
(("a**b" + ("c* " * 50000)),
re.compile("a[*][*]b(c[*] ){49999}c[*]")),
"link openers and emph closers":
(("[ a_" * 50000),
re.compile("(\[ a_){50000}")),
"hard link/emph case":
("**x [a*b**c*](d)",
re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")),
"nested brackets":
(("[" * 50000) + "a" + ("]" * 50000),
re.compile("\[{50000}a\]{50000}")),
"nested block quotes":
((("> " * 50000) + "a"),
re.compile("(<blockquote>\r?\n){50000}")),
"backticks":
("".join(map(lambda x: ("e" + "`" * x), range(1,1000))),
re.compile("^<p>[e`]*</p>\r?\n$")),
"many links":
("[t](/u) " * 50000,
re.compile("(<a href=\"/u\">t</a> ?){50000}")),
"many references":
("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,20000 * 16))) + "[0] " * 20000,
re.compile("(\[0\] ){19999}")),
"deeply nested lists":
("".join(map(lambda x: (" " * x + "* a\n"), range(0,1000))),
re.compile("<ul>\r?\n(<li>a<ul>\r?\n){999}<li>a</li>\r?\n</ul>\r?\n(</li>\r?\n</ul>\r?\n){999}")),
"many html openers and closers":
(("<>" * 50000),
re.compile("(<>){50000}")),
"many html proc. inst. openers":
(("x" + "<?" * 50000),
re.compile("x(<\\?){50000}")),
"many html CDATA openers":
(("x" + "<![CDATA[" * 50000),
re.compile("x(<!\\[CDATA\\[){50000}")),
"many backticks and escapes":
(("\\``" * 50000),
re.compile("(``){50000}")),
"many broken link titles":
(("[ (](" * 50000),
re.compile("(\[ \(\]\(){50000}")),
"broken thematic break":
(("* " * 50000 + "a"),
re.compile("<ul>\r?\n(<li><ul>\r?\n){49999}<li>a</li>\r?\n</ul>\r?\n(</li>\r?\n</ul>\r?\n){49999}")),
"nested invalid link references":
(("[" * 50000 + "]" * 50000 + "\n\n[a]: /b"),
re.compile("\[{50000}\]{50000}"))
}
whitespace_re = re.compile('/s+/')
passed = 0
errored = 0
failed = 0
#print("Testing pathological cases:")
for description in pathological:
(inp, regex) = pathological[description]
start = timer()
[rc, actual, err] = cmark.to_html(inp)
end = timer()
if rc != 0:
errored += 1
print('{:35} [ERRORED (return code %d)]'.format(description, rc))
print(err)
elif regex.search(actual):
print('{:35} [PASSED] {:.3f} secs'.format(description, end-start))
passed += 1
else:
print('{:35} [FAILED]'.format(description))
print(repr(actual))
failed += 1
print("%d passed, %d failed, %d errored" % (passed, failed, errored))
if (failed == 0 and errored == 0):
exit(0)
else:
exit(1)
|