1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
import re
class DLexToken:
"""
DLexToken contains:
'id' - the ID of the token (match with the value DLexer.AddToken returns).
'val' - the token's string.
'lineNumber' - the line the token was encountered on.
"""
pass
class DLexState:
pass
class DLexer:
"""
DLex is a simple lexer simulator. Here is how to use it.
1. Call AddToken to add the regular expressions that it will parse. Add them in
order of precedence. Store the value returned from AddToken so you can compare
it to the token ID returned by GetToken to determine what kind of token was found.
2. Call BeginRead or BeginReadFile to setup the initial file.
3. Repeatedly call GetToken.
If it returns None, then there are no more tokens that match your specifications.
If it returns a value, then it is a DLexToken with.
"""
def __init__( self, bSkipWhitespace=1 ):
self.__tokens = []
self.__curTokenID = 0
self.__notnewline = re.compile( '[^\\r\\n]*' )
self.__bSkipWhitespace = bSkipWhitespace
if bSkipWhitespace:
self.__whitespace = re.compile( '[ \\t\\f\\v]+' )
self.__newline = re.compile( '[\\r\\n]' )
def GetErrorTokenID( self ):
return -1
def AddToken( self, expr, flags=0 ):
tokenID = self.__curTokenID
self.__tokens.append( [tokenID, re.compile( expr, flags )] )
self.__curTokenID += 1
return tokenID
# Store and restore the state.
def BackupState( self ):
ret = DLexState()
ret.lineNumber = self.__lineNumber
ret.currentCharacter = self.__currentCharacter
ret.fileLen = self.__fileLen
return ret
def RestoreState( self, state ):
self.__lineNumber = state.lineNumber
self.__currentCharacter = state.currentCharacter
self.__fileLen = state.fileLen
def BeginRead( self, str ):
self.__curString = str
self.__lineNumber = 1
self.__currentCharacter = 0
self.__fileLen = len( str )
def BeginReadFile( self, fileName ):
file = open( fileName, 'r' )
self.BeginRead( file.read() )
file.close()
def GetToken( self ):
# Skip whitespace.
self.__SkipWhitespace()
# Now return the first token that we have a match for.
for token in self.__tokens:
m = token[1].match( self.__curString, self.__currentCharacter )
if m:
ret = DLexToken()
ret.id = token[0]
ret.val = self.__curString[ m.start() : m.end() ]
ret.lineNumber = self.__lineNumber
self.__currentCharacter = m.end()
return ret
if self.__currentCharacter < self.__fileLen:
print "NO MATCH FOR '%s'" % self.__curString[ self.__currentCharacter : self.__currentCharacter+35 ]
ret = DLexToken()
ret.id = self.GetErrorTokenID()
ret.val = self.__curString[ self.__currentCharacter : ]
self.__currentCharacter = self.__fileLen
return ret
#print "%d" % t
return None
def GetLineNumber( self ):
return self.__lineNumber
def GetPercentComplete( self ):
return (self.__currentCharacter * 100) / self.__fileLen
def GetLineContents( self ):
m = self.__notnewline.match( self.__curString, self.__currentCharacter )
if m:
return self.__curString[ m.start() : m.end() ]
else:
return ""
def __SkipWhitespace( self ):
if self.__bSkipWhitespace:
while 1:
a = self.__whitespace.match( self.__curString, self.__currentCharacter )
b = self.__newline.match( self.__curString, self.__currentCharacter )
if a:
self.__currentCharacter = a.end()
continue
elif b:
self.__currentCharacter = b.end()
self.__lineNumber += 1
continue
else:
break
|