comparison diff-colorize.py @ 28:58221373fc6f draft default tip

python3: port to Python 3 Pleasantly few changes required.
author Augie Fackler <raf@durin42.com>
date Thu, 14 Jul 2022 10:20:03 -0400
parents 5f17911c4fe6
children
comparison
equal deleted inserted replaced
27:5f17911c4fe6 28:58221373fc6f
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import sys 3 import sys
4 import os 4 import os
5 import fileinput 5 import fileinput
6 import functools
6 7
7 has_256_color = (os.environ.get('TERM', None) == 'xterm-256color') 8 has_256_color = (os.environ.get('TERM', None) == 'xterm-256color')
8 9
9 index_color = int(os.environ.get('DIFF_INDEX_COLOR', 10 index_color = int(os.environ.get('DIFF_INDEX_COLOR',
10 32 if has_256_color else 36)) 11 32 if has_256_color else 36))
46 def interleave(*sequences): 47 def interleave(*sequences):
47 "Generator that yields one object from each sequence in turn." 48 "Generator that yields one object from each sequence in turn."
48 49
49 def zip_pad(*iterables, **kw): 50 def zip_pad(*iterables, **kw):
50 "Downloaded from http://code.activestate.com/recipes/497007/" 51 "Downloaded from http://code.activestate.com/recipes/497007/"
51 from itertools import izip, chain 52 from itertools import chain
52 if kw: 53 if kw:
53 assert len(kw) == 1 54 assert len(kw) == 1
54 pad = kw["pad"] 55 pad = kw["pad"]
55 else: 56 else:
56 pad = None 57 pad = None
60 return 61 return
61 done[0] -= 1 62 done[0] -= 1
62 while 1: 63 while 1:
63 yield pad 64 yield pad
64 iterables = [chain(seq, pad_iter()) for seq in iterables] 65 iterables = [chain(seq, pad_iter()) for seq in iterables]
65 return izip(*iterables) 66 return zip(*iterables)
66 67
67 for objects in zip_pad(*sequences): 68 for objects in zip_pad(*sequences):
68 for obj in objects: 69 for obj in objects:
69 if obj is not None: 70 if obj is not None:
70 yield obj 71 yield obj
71 72
73 @functools.total_ordering
72 class Substring(object): 74 class Substring(object):
73 def __init__(self, a, a_start, a_stop, b, b_start, b_stop): 75 def __init__(self, a, a_start, a_stop, b, b_start, b_stop):
74 self.a = a 76 self.a = a
75 self.a_start = a_start 77 self.a_start = a_start
76 self.a_stop = a_stop 78 self.a_stop = a_stop
91 def after_b_substring(self): 93 def after_b_substring(self):
92 return self.b[self.b_stop:] 94 return self.b[self.b_stop:]
93 95
94 def __hash__(self): 96 def __hash__(self):
95 return hash(self.substring()) 97 return hash(self.substring())
96 def __cmp__(self, other): 98 def __lt__(self, other):
97 return cmp(self.a_start, other.a_start) 99 return self.a_start < other.a_start
98 def __eq__(self, other): 100 def __eq__(self, other):
99 return self.substring() == other.substring() 101 return self.substring() == other.substring()
100 def __str__(self): 102 def __str__(self):
101 return self.substring() 103 return self.substring()
102 def __repr__(self): 104 def __repr__(self):
112 114
113 Clarified and slightly modified (to use a special Substring object) from http://en.wikibooks.org/w/index.php?title=Algorithm_implementation/Strings/Longest_common_substring&oldid=1419225#Python 115 Clarified and slightly modified (to use a special Substring object) from http://en.wikibooks.org/w/index.php?title=Algorithm_implementation/Strings/Longest_common_substring&oldid=1419225#Python
114 """ 116 """
115 a_len = len(a) 117 a_len = len(a)
116 b_len = len(b) 118 b_len = len(b)
117 lengths = [[0] * (b_len + 1) for i in xrange(a_len + 1)] 119 lengths = [[0] * (b_len + 1) for i in range(a_len + 1)]
118 substrings = set() 120 substrings = set()
119 greatest_length = current_run_length = 0 121 greatest_length = current_run_length = 0
120 for a_idx in xrange(a_len): 122 for a_idx in range(a_len):
121 for b_idx in xrange(b_len): 123 for b_idx in range(b_len):
122 if a[a_idx] == b[b_idx]: 124 if a[a_idx] == b[b_idx]:
123 current_run_length = lengths[a_idx][b_idx] + 1 125 current_run_length = lengths[a_idx][b_idx] + 1
124 lengths[a_idx+1][b_idx+1] = current_run_length 126 lengths[a_idx+1][b_idx+1] = current_run_length
125 if current_run_length > greatest_length: 127 if current_run_length > greatest_length:
126 greatest_length = current_run_length 128 greatest_length = current_run_length
138 140
139 def common_subsequence(a, b): 141 def common_subsequence(a, b):
140 "Returns all common substrings between a and b, which can be any finite indexable sliceable sequences, as Substring objects. Determines this by recursively calling itself on slices of a and b before and after each longest common substring." 142 "Returns all common substrings between a and b, which can be any finite indexable sliceable sequences, as Substring objects. Determines this by recursively calling itself on slices of a and b before and after each longest common substring."
141 # Inspired by http://en.wikibooks.org/w/index.php?title=Algorithm_Implementation/Strings/Longest_common_subsequence&oldid=1912924#Python 143 # Inspired by http://en.wikibooks.org/w/index.php?title=Algorithm_Implementation/Strings/Longest_common_subsequence&oldid=1912924#Python
142 def LCS_length_matrix(a, b): 144 def LCS_length_matrix(a, b):
143 matrix = [[0] * (len(b) + 1) for i in xrange(len(a) + 1)] 145 matrix = [[0] * (len(b) + 1) for i in range(len(a) + 1)]
144 for i, a_ch in enumerate(a): 146 for i, a_ch in enumerate(a):
145 for j, b_ch in enumerate(b): 147 for j, b_ch in enumerate(b):
146 if a_ch == b_ch: 148 if a_ch == b_ch:
147 matrix[i + 1][j + 1] = matrix[i][j] + 1 149 matrix[i + 1][j + 1] = matrix[i][j] + 1
148 else: 150 else: