1 files changed, 316 insertions, 0 deletions
diff --git a/debian/uncrustify-trinity/uncrustify-trinity-0.74.0/scripts/tokenizer.py b/debian/uncrustify-trinity/uncrustify-trinity-0.74.0/scripts/tokenizer.py
new file mode 100755
index 00000000..0bc33bac
--- /dev/null
+++ b/debian/uncrustify-trinity/uncrustify-trinity-0.74.0/scripts/tokenizer.py
@@ -0,0 +1,316 @@
+#! /usr/bin/env python
+# tokenize.py
+#
+# Parses a C/C++/C#/D/Java/Pawn/whatever file in an array of
+# tuples (string, type)
+#
+
+# punctuator lookup table
+punc_table = [
+   [ '!',  25,  26, '!'   ],   #   0: '!'
+   [ '#',  24,  35, '#'   ],   #   1: '#'
+   [ '$',  23,   0, '$'   ],   #   2: '$'
+   [ '%',  22,  36, '%'   ],   #   3: '%'
+   [ '&',  21,  41, '&'   ],   #   4: '&'
+   [ '(',  20,   0, '('   ],   #   5: '('
+   [ ')',  19,   0, ')'   ],   #   6: ')'
+   [ '*',  18,  43, '*'   ],   #   7: '*'
+   [ '+',  17,  44, '+'   ],   #   8: '+'
+   [ ',',  16,   0, ','   ],   #   9: ','
+   [ '-',  15,  46, '-'   ],   #  10: '-'
+   [ '.',  14,  50, '.'   ],   #  11: '.'
+   [ '/',  13,  53, '/'   ],   #  12: '/'
+   [ ':',  12,  54, ':'   ],   #  13: ':'
+   [ ';',  11,   0, ';'   ],   #  14: ';'
+   [ '<',  10,  56, '<'   ],   #  15: '<'
+   [ '=',   9,  63, '='   ],   #  16: '='
+   [ '>',   8,  65, '>'   ],   #  17: '>'
+   [ '?',   7,   0, '?'   ],   #  18: '?'
+   [ '[',   6,  70, '['   ],   #  19: '['
+   [ ']',   5,   0, ']'   ],   #  20: ']'
+   [ '^',   4,  71, '^'   ],   #  21: '^'
+   [ '{',   3,   0, '{'   ],   #  22: '{'
+   [ '|',   2,  72, '|'   ],   #  23: '|'
+   [ '}',   1,   0, '}'   ],   #  24: '}'
+   [ '~',   0,  74, '~'   ],   #  25: '~'
+   [ '<',   3,  30, '!<'  ],   #  26: '!<'
+   [ '=',   2,  33, '!='  ],   #  27: '!='
+   [ '>',   1,  34, '!>'  ],   #  28: '!>'
+   [ '~',   0,   0, '!~'  ],   #  29: '!~'
+   [ '=',   1,   0, '!<=' ],   #  30: '!<='
+   [ '>',   0,  32, '!<>' ],   #  31: '!<>'
+   [ '=',   0,   0, '!<>='],   #  32: '!<>='
+   [ '=',   0,   0, '!==' ],   #  33: '!=='
+   [ '=',   0,   0, '!>=' ],   #  34: '!>='
+   [ '#',   0,   0, '##'  ],   #  35: '##'
+   [ ':',   2,  39, '%:'  ],   #  36: '%:'
+   [ '=',   1,   0, '%='  ],   #  37: '%='
+   [ '>',   0,   0, '%>'  ],   #  38: '%>'
+   [ '%',   0,  40, None  ],   #  39: '%:%'
+   [ ':',   0,   0, '%:%:'],   #  40: '%:%:'
+   [ '&',   1,   0, '&&'  ],   #  41: '&&'
+   [ '=',   0,   0, '&='  ],   #  42: '&='
+   [ '=',   0,   0, '*='  ],   #  43: '*='
+   [ '+',   1,   0, '++'  ],   #  44: '++'
+   [ '=',   0,   0, '+='  ],   #  45: '+='
+   [ '-',   2,   0, '--'  ],   #  46: '--'
+   [ '=',   1,   0, '-='  ],   #  47: '-='
+   [ '>',   0,  49, '->'  ],   #  48: '->'
+   [ '*',   0,   0, '->*' ],   #  49: '->*'
+   [ '*',   1,   0, '.*'  ],   #  50: '.*'
+   [ '.',   0,  52, '..'  ],   #  51: '..'
+   [ '.',   0,   0, '...' ],   #  52: '...'
+   [ '=',   0,   0, '/='  ],   #  53: '/='
+   [ ':',   1,   0, '::'  ],   #  54: '::'
+   [ '>',   0,   0, ':>'  ],   #  55: ':>'
+   [ '%',   4,   0, '<%'  ],   #  56: '<%'
+   [ ':',   3,   0, '<:'  ],   #  57: '<:'
+   [ '<',   2,  61, '<<'  ],   #  58: '<<'
+   [ '=',   1,   0, '<='  ],   #  59: '<='
+   [ '>',   0,  62, '<>'  ],   #  60: '<>'
+   [ '=',   0,   0, '<<=' ],   #  61: '<<='
+   [ '=',   0,   0, '<>=' ],   #  62: '<>='
+   [ '=',   0,  64, '=='  ],   #  63: '=='
+   [ '=',   0,   0, '===' ],   #  64: '==='
+   [ '=',   1,   0, '>='  ],   #  65: '>='
+   [ '>',   0,  67, '>>'  ],   #  66: '>>'
+   [ '=',   1,   0, '>>=' ],   #  67: '>>='
+   [ '>',   0,  69, '>>>' ],   #  68: '>>>'
+   [ '=',   0,   0, '>>>='],   #  69: '>>>='
+   [ ']',   0,   0, '[]'  ],   #  70: '[]'
+   [ '=',   0,   0, '^='  ],   #  71: '^='
+   [ '=',   1,   0, '|='  ],   #  72: '|='
+   [ '|',   0,   0, '||'  ],   #  73: '||'
+   [ '=',   1,   0, '~='  ],   #  74: '~='
+   [ '~',   0,   0, '~~'  ],   #  75: '~~'
+]
+
+
+#
+# Token types:
+#  0 = newline
+#  1 = punctuator
+#  2 = integer
+#  3 = float
+#  4 = string
+#  5 = identifier
+#
+class Tokenizer:
+    def __init__(self):
+        self.tokens = []
+        self.text = ''
+        self.text_idx = 0
+
+    def tokenize_text(self, in_text):
+        self.tokens = []
+        self.text = in_text
+        self.text_idx = 0
+
+        print(in_text)
+        try:
+            while self.text_idx < len(self.text):
+                if self.parse_whitespace():
+                    continue
+                elif self.text[self.text_idx] == '\\' and self.text[self.text_idx + 1] == '\n':
+                    self.text_idx += 2
+                    continue
+                elif self.parse_comment():
+                    continue
+                elif self.parse_number():
+                    continue
+                elif self.parse_identifier():
+                    continue
+                elif self.parse_string():
+                    continue
+                elif self.parse_punctuator():
+                    continue
+                else:
+                    print("confused: %s" % self.text[self.text_idx:])
+                    break
+        except:
+            print("bombed")
+            raise
+
+    def parse_whitespace(self):
+        start_idx = self.text_idx
+        hit_newline = False
+        while self.text_idx < len(self.text):
+            if self.text[self.text_idx] in '\n\r':
+                hit_newline = True
+            elif not self.text[self.text_idx] in ' \t':
+                break
+            self.text_idx += 1
+
+        if hit_newline:
+            self.tokens.append(('\n', 0))
+        return start_idx != self.text_idx
+
+    def parse_comment(self):
+        if not self.text[self.text_idx] == '/' or not self.text[self.text_idx + 1] in '/*':
+            return False
+        if self.text[self.text_idx + 1] == '/':
+            while self.text_idx < len(self.text):
+                if self.text[self.text_idx] in '\n\r':
+                    break
+                self.text_idx += 1
+        else:
+            while self.text_idx < len(self.text) - 1:
+                if self.text[self.text_idx] == '*' and self.text[self.text_idx + 1] == '/':
+                    self.text_idx += 2
+                    break
+                self.text_idx += 1
+        return True
+
+    def parse_identifier(self):
+        if not self.text[self.text_idx].upper() in '@_ABCDEFGHIJKLMNOPQRSTUVWXYZ':
+            return False
+        start_idx = self.text_idx
+        while self.text_idx < len(self.text) and \
+                self.text[self.text_idx].upper() in '@_ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890':
+            self.text_idx += 1
+        self.tokens.append((self.text[start_idx : self.text_idx], 5))
+        return True
+
+    def parse_string(self):
+        starter = 0
+        start_ch = self.text[self.text_idx]
+        if start_ch == 'L':
+            starter = 1
+            start_ch = self.text[self.text_idx + 1]
+        if not start_ch in '"\'':
+            return False
+        start_idx = self.text_idx
+        self.text_idx += starter + 1
+        escaped = False
+        while self.text_idx < len(self.text):
+            if escaped:
+                escaped = False
+            else:
+                if self.text[self.text_idx] == '\\':
+                    escaped = True
+                elif self.text[self.text_idx] == start_ch:
+                    self.text_idx += 1
+                    break
+            self.text_idx += 1
+
+        self.tokens.append((self.text[start_idx : self.text_idx], 4))
+        return True
+
+    # Checks for punctuators
+    # Returns whether a punctuator was consumed (True or False)
+    def parse_punctuator(self):
+        tab_idx = 0
+        punc_len = 0
+        saved_punc = None
+        while 1:
+            pte = punc_table[tab_idx]
+            if pte[0] == self.text[self.text_idx]:
+                if pte[3] is not None:
+                    saved_punc = pte[3]
+                self.text_idx += 1
+                tab_idx = pte[2]
+                if tab_idx == 0:
+                    break
+            elif pte[1] == 0:
+                break
+            else:
+                tab_idx += 1
+        if saved_punc is not None:
+            self.tokens.append((saved_punc, 1))
+            return True
+        return False
+
+    def parse_number(self):
+        # A number must start with a digit or a dot followed by a digit
+        ch = self.text[self.text_idx]
+        if not ch.isdigit() and (ch != '.' or not self.text[self.text_idx + 1].isdigit()):
+            return False
+        token_type = 2 # integer
+        if ch == '.':
+            token_type = 3 # float
+        did_hex = False
+        start_idx = self.text_idx
+
+        # Check for Hex, Octal, or Binary
+        # Note that only D and Pawn support binary, but who cares?
+        #
+        if ch == '0':
+            self.text_idx += 1
+            ch = self.text[self.text_idx].upper()
+            if ch == 'X':                # hex
+                did_hex = True
+                self.text_idx += 1
+                while self.text[self.text_idx] in '_0123456789abcdefABCDEF':
+                    self.text_idx += 1
+            elif ch == 'B':              # binary
+                self.text_idx += 1
+                while self.text[self.text_idx] in '_01':
+                    self.text_idx += 1
+            elif ch >= '0' and ch <= 7:  # octal (but allow decimal)
+                self.text_idx += 1
+                while self.text[self.text_idx] in '_0123456789':
+                    self.text_idx += 1
+            else:
+                # either just 0 or 0.1 or 0UL, etc
+                pass
+        else:
+            # Regular int or float
+            while self.text[self.text_idx] in '_0123456789':
+                self.text_idx += 1
+
+        # Check if we stopped on a decimal point
+        if self.text[self.text_idx] == '.':
+            self.text_idx += 1
+            token_type = 3 # float
+            if did_hex:
+                while self.text[self.text_idx] in '_0123456789abcdefABCDEF':
+                    self.text_idx += 1
+            else:
+                while self.text[self.text_idx] in '_0123456789':
+                    self.text_idx += 1
+
+        # Check exponent
+        # Valid exponents per language (not that it matters):
+        # C/C++/D/Java: eEpP
+        # C#/Pawn:      eE
+        if self.text[self.text_idx] in 'eEpP':
+            token_type = 3 # float
+            self.text_idx += 1
+            if self.text[self.text_idx] in '+-':
+                self.text_idx += 1
+            while self.text[self.text_idx] in '_0123456789':
+                self.text_idx += 1
+
+        # Check the suffixes
+        # Valid suffixes per language (not that it matters):
+        #        Integer       Float
+        # C/C++: uUlL          lLfF
+        # C#:    uUlL          fFdDMm
+        # D:     uUL           ifFL
+        # Java:  lL            fFdD
+        # Pawn:  (none)        (none)
+        #
+        # Note that i, f, d, and m only appear in floats.
+        while 1:
+            if self.text[self.text_idx] in 'tTfFdDmM':
+                token_type = 3 # float
+            elif not self.text[self.text_idx] in 'lLuU':
+                break
+            self.text_idx += 1
+
+        self.tokens.append((self.text[start_idx : self.text_idx], token_type))
+        return True
+
+text = """
+1.23+4-3*16%2 *sin(1.e-3 + .5p32) "hello" and "hello\\"there"
+123 // some comment
+a = b + c;
+#define abc \\
+        5
+d = 5 /* hello */ + 3;
+"""
+
+t = Tokenizer()
+t.tokenize_text(text)
+print(t.tokens)
+