summaryrefslogtreecommitdiffstats
path: root/tdemarkdown/md4c/scripts/build_folding_map.py
diff options
context:
space:
mode:
authorMavridis Philippe <mavridisf@gmail.com>2022-02-05 17:44:26 +0000
committerSlávek Banko <slavek.banko@axis.cz>2022-04-17 18:35:01 +0200
commit95279fbf6dfeb43d80590740a9259d7caa614177 (patch)
tree8c3ff4de77102f1e55357dd81d650e9f22be69a0 /tdemarkdown/md4c/scripts/build_folding_map.py
parenta291f3a0a3fab073c009f77a36745c5c7bd48e9b (diff)
downloadtdelibs-95279fbf6dfeb43d80590740a9259d7caa614177.tar.gz
tdelibs-95279fbf6dfeb43d80590740a9259d7caa614177.zip
Add tdemarkdown part - embeddable lightweight markdown viewing component.
TDEMarkdown is based on the md4c library and using TDEHTML for rendering its output. For enhanced safety, on HTML widget is turned off everything we don't need for viewing. It integrates nicely into Konqueror and supports both Commonmark and GitHub markdown syntaxes. Signed-off-by: Mavridis Philippe <mavridisf@gmail.com> Prepare to merge tdemarkdown into tdelibs. Signed-off-by: Slávek Banko <slavek.banko@axis.cz>
Diffstat (limited to 'tdemarkdown/md4c/scripts/build_folding_map.py')
-rw-r--r--tdemarkdown/md4c/scripts/build_folding_map.py120
1 files changed, 120 insertions, 0 deletions
diff --git a/tdemarkdown/md4c/scripts/build_folding_map.py b/tdemarkdown/md4c/scripts/build_folding_map.py
new file mode 100644
index 000000000..b401775f5
--- /dev/null
+++ b/tdemarkdown/md4c/scripts/build_folding_map.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import textwrap
+
+
+self_path = os.path.dirname(os.path.realpath(__file__));
+f = open(self_path + "/unicode/CaseFolding.txt", "r")
+
+status_list = [ "C", "F" ]
+
+folding_list = [ dict(), dict(), dict() ]
+
+# Filter the foldings for "full" folding.
+for line in f:
+ comment_off = line.find("#")
+ if comment_off >= 0:
+ line = line[:comment_off]
+ line = line.strip()
+ if not line:
+ continue
+
+ raw_codepoint, status, raw_mapping, ignored_tail = line.split(";", 3)
+ if not status.strip() in status_list:
+ continue
+ codepoint = int(raw_codepoint.strip(), 16)
+ mapping = [int(it, 16) for it in raw_mapping.strip().split(" ")]
+ mapping_len = len(mapping)
+
+ if mapping_len in range(1, 4):
+ folding_list[mapping_len-1][codepoint] = mapping
+ else:
+ assert(False)
+f.close()
+
+
+# If we assume that (index0 ... index-1) makes a range (as defined below),
+# check that the newly provided index is compatible with the range too; i.e.
+# verify that the range can be extended without breaking its properties.
+#
+# Currently, we can handle ranges which:
+#
+# (1) either form consecutive sequence of codepoints and which map that range
+# to other consecutive range of codepoints (of the same length);
+#
+# (2) or a consecutive sequence of codepoints with step 2 where each codepoint
+# CP is mapped to the codepoint CP+1
+# (e.g. 0x1234 -> 0x1235; 0x1236 -> 0x1237; 0x1238 -> 0x1239; ...).
+#
+# Note: When the codepoints in the range are mapped to multiple codepoints,
+# only the 1st mapped codepoint is considered. All the other ones have to be
+# shared by all the mappings covered by the range.
+def is_range_compatible(folding, codepoint_list, index0, index):
+ N = index - index0
+ codepoint0 = codepoint_list[index0]
+ codepoint1 = codepoint_list[index0+1]
+ codepointN = codepoint_list[index]
+ mapping0 = folding[codepoint0]
+ mapping1 = folding[codepoint1]
+ mappingN = folding[codepointN]
+
+ # Check the range type (1):
+ if codepoint1 - codepoint0 == 1 and codepointN - codepoint0 == N \
+ and mapping1[0] - mapping0[0] == 1 and mapping1[1:] == mapping0[1:] \
+ and mappingN[0] - mapping0[0] == N and mappingN[1:] == mapping0[1:]:
+ return True
+
+ # Check the range type (2):
+ if codepoint1 - codepoint0 == 2 and codepointN - codepoint0 == 2 * N \
+ and mapping0[0] - codepoint0 == 1 \
+ and mapping1[0] - codepoint1 == 1 and mapping1[1:] == mapping0[1:] \
+ and mappingN[0] - codepointN == 1 and mappingN[1:] == mapping0[1:]:
+ return True
+
+ return False
+
+
+def mapping_str(list, mapping):
+ return ",".join("0x{:04x}".format(x) for x in mapping)
+
+for mapping_len in range(1, 4):
+ folding = folding_list[mapping_len-1]
+ codepoint_list = list(folding)
+
+ index0 = 0
+ count = len(folding)
+
+ records = list()
+ data_records = list()
+
+ while index0 < count:
+ index1 = index0 + 1
+ while index1 < count and is_range_compatible(folding, codepoint_list, index0, index1):
+ index1 += 1
+
+ if index1 - index0 > 2:
+ # Range of codepoints
+ records.append("R(0x{:04x},0x{:04x})".format(codepoint_list[index0], codepoint_list[index1-1]))
+ data_records.append(mapping_str(data_records, folding[codepoint_list[index0]]))
+ data_records.append(mapping_str(data_records, folding[codepoint_list[index1-1]]))
+ index0 = index1
+ else:
+ # Single codepoint
+ records.append("S(0x{:04x})".format(codepoint_list[index0]))
+ data_records.append(mapping_str(data_records, folding[codepoint_list[index0]]))
+ index0 += 1
+
+ sys.stdout.write("static const unsigned FOLD_MAP_{}[] = {{\n".format(mapping_len))
+ sys.stdout.write("\n".join(textwrap.wrap(", ".join(records), 110,
+ initial_indent = " ", subsequent_indent=" ")))
+ sys.stdout.write("\n};\n")
+
+ sys.stdout.write("static const unsigned FOLD_MAP_{}_DATA[] = {{\n".format(mapping_len))
+ sys.stdout.write("\n".join(textwrap.wrap(", ".join(data_records), 110,
+ initial_indent = " ", subsequent_indent=" ")))
+ sys.stdout.write("\n};\n")
+
+
+