comparison diff-colorize.py @ 27:5f17911c4fe6

Expanded this comment to acknowledge that the same sub-expression covers both decimal and octal (beginning with 0) number literals.
author Peter Hosey <hg@boredzo.org>
date Sat, 08 Jan 2011 01:27:44 -0800
parents 3b33b1c48880
children 58221373fc6f
comparison
equal deleted inserted replaced
26:3b33b1c48880 27:5f17911c4fe6
172 def common_and_distinct_substrings(a, b): 172 def common_and_distinct_substrings(a, b):
173 "Takes two strings, a and b, tokenizes them, and returns a linked list whose nodes contain runs of either common or unique tokens." 173 "Takes two strings, a and b, tokenizes them, and returns a linked list whose nodes contain runs of either common or unique tokens."
174 def tokenize(a): 174 def tokenize(a):
175 "Each token is an identifier, a number, or a single character." 175 "Each token is an identifier, a number, or a single character."
176 import re 176 import re
177 # Word in identifier, word in macro name (MACRO_NAME), binary number, hex number, decimal number, operator, other punctuation. 177 # Word in identifier, word in macro name (MACRO_NAME), binary number, hex number, decimal or octal number, operator, other punctuation.
178 token_exp = re.compile('[_A-Z]*[_a-z0-9]+:?|_??[A-Z0-9]+:?|0b[01]+|0[xX][0-9A-Fa-f]+|[0-9]+|[-+*|&^/%\[\]<=>,]|[()\\\\;`{}]') 178 token_exp = re.compile('[_A-Z]*[_a-z0-9]+:?|_??[A-Z0-9]+:?|0b[01]+|0[xX][0-9A-Fa-f]+|[0-9]+|[-+*|&^/%\[\]<=>,]|[()\\\\;`{}]')
179 start = 0 179 start = 0
180 for match in token_exp.finditer(a): 180 for match in token_exp.finditer(a):
181 for ch in a[start:match.start()]: 181 for ch in a[start:match.start()]:
182 yield ch 182 yield ch