devtools/dlexer.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140

import re


class DLexToken:
	"""
	DLexToken contains:
		'id'  - the ID of the token (match with the value DLexer.AddToken returns).
		'val' - the token's string.
		'lineNumber' - the line the token was encountered on.
	"""
	pass	


class DLexState:
	pass


class DLexer:
	"""
	DLex is a simple lexer simulator. Here is how to use it.
	
	1. Call AddToken to add the regular expressions that it will parse. Add them in 
	   order of precedence. Store the value returned from AddToken so you can compare
	   it to the token ID returned by GetToken to determine what kind of token was found.

	2. Call BeginRead or BeginReadFile to setup the initial file.

	3. Repeatedly call GetToken. 
	       If it returns None, then there are no more tokens that match your specifications. 
		   If it returns a value, then it is a DLexToken with.
	"""

	def __init__( self, bSkipWhitespace=1 ):
		self.__tokens = []
		self.__curTokenID = 0
		self.__notnewline = re.compile( '[^\\r\\n]*' )
		
		self.__bSkipWhitespace = bSkipWhitespace
		if bSkipWhitespace:
			self.__whitespace = re.compile( '[ \\t\\f\\v]+' )
			self.__newline = re.compile( '[\\r\\n]' )


	def GetErrorTokenID( self ):
		return -1
	

	def AddToken( self, expr, flags=0 ):
		tokenID = self.__curTokenID
		self.__tokens.append( [tokenID, re.compile( expr, flags )] )
		self.__curTokenID += 1
		return tokenID


	# Store and restore the state.
	def BackupState( self ):
		ret = DLexState()
		ret.lineNumber = self.__lineNumber
		ret.currentCharacter = self.__currentCharacter
		ret.fileLen = self.__fileLen
		return ret

	def RestoreState( self, state ):
		self.__lineNumber = state.lineNumber
		self.__currentCharacter = state.currentCharacter
		self.__fileLen = state.fileLen


	def BeginRead( self, str ):
		self.__curString = str
		self.__lineNumber = 1
		self.__currentCharacter = 0
		self.__fileLen = len( str )


	def BeginReadFile( self, fileName ):
		file = open( fileName, 'r' )
		self.BeginRead( file.read() )
		file.close()

	
	def GetToken( self ):
		# Skip whitespace.
		self.__SkipWhitespace()

		# Now return the first token that we have a match for.
		for token in self.__tokens:
			m = token[1].match( self.__curString, self.__currentCharacter )
			if m:
				ret = DLexToken()
				ret.id = token[0]
				ret.val = self.__curString[ m.start() : m.end() ]
				ret.lineNumber = self.__lineNumber
				self.__currentCharacter = m.end()
				return ret
		
		if self.__currentCharacter < self.__fileLen:
			print "NO MATCH FOR '%s'" % self.__curString[ self.__currentCharacter : self.__currentCharacter+35 ]
			ret = DLexToken()
			ret.id = self.GetErrorTokenID()
			ret.val = self.__curString[ self.__currentCharacter : ]
			self.__currentCharacter = self.__fileLen
			return ret
			#print "%d" % t

		return None


	def GetLineNumber( self ):
		return self.__lineNumber


	def GetPercentComplete( self ):
		return (self.__currentCharacter * 100) / self.__fileLen


	def GetLineContents( self ):
		m = self.__notnewline.match( self.__curString, self.__currentCharacter )
		if m:
			return self.__curString[ m.start() : m.end() ]
		else:
			return ""


	def __SkipWhitespace( self ):
		if self.__bSkipWhitespace:
			while 1:
				a = self.__whitespace.match( self.__curString, self.__currentCharacter )
				b = self.__newline.match( self.__curString, self.__currentCharacter )
				if a:
					self.__currentCharacter = a.end()
					continue
				elif b:
					self.__currentCharacter = b.end()
					self.__lineNumber += 1
					continue
				else:
					break