123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595 |
- #!/usr/bin/env python
-
- """ icdiff.py
-
- Author: Jeff Kaufman, derived from difflib.HtmlDiff
-
- License: This code is usable under the same open terms as the rest of
- python. See: http://www.python.org/psf/license/
-
- """
-
- import os
- import sys
- import errno
- import difflib
- import optparse
- import re
- import filecmp
- import unicodedata
-
- color_codes = {
- "red": '\033[0;31m',
- "green": '\033[0;32m',
- "yellow": '\033[0;33m',
- "blue": '\033[0;34m',
- "magenta": '\033[0;35m',
- "cyan": '\033[0;36m',
- "none": '\033[m',
- "red_bold": '\033[1;31m',
- "green_bold": '\033[1;32m',
- "yellow_bold": '\033[1;33m',
- "blue_bold": '\033[1;34m',
- "magenta_bold": '\033[1;35m',
- "cyan_bold": '\033[1;36m',
- }
-
- class ConsoleDiff(object):
- """Console colored side by side comparison with change highlights.
-
- Based on difflib.HtmlDiff
-
- This class can be used to create a text-mode table showing a side
-
- by side, line by line comparison of text with inter-line and
- intra-line change highlights in ansi color escape sequences as
- intra-line change highlights in ansi color escape sequences as
- read by xterm. The table can be generated in either full or
- contextual difference mode.
-
- To generate the table, call make_table.
-
- Usage is the almost the same as HtmlDiff except only make_table is
- implemented and the file can be invoked on the command line.
- Run::
-
- python icdiff.py --help
-
- for command line usage information.
-
- """
-
- def __init__(self, tabsize=8, wrapcolumn=None, linejunk=None,
- charjunk=difflib.IS_CHARACTER_JUNK, cols=80,
- line_numbers=False,
- show_all_spaces=False,
- highlight=False,
- no_bold=False):
- """ConsoleDiff instance initializer
-
- Arguments:
- tabsize -- tab stop spacing, defaults to 8.
- wrapcolumn -- column number where lines are broken and wrapped,
- defaults to None where lines are not wrapped.
- linejunk, charjunk -- keyword arguments passed into ndiff() (used by
- ConsoleDiff() to generate the side by side differences). See
- ndiff() documentation for argument default values and descriptions.
- """
-
- self._tabsize = tabsize
- self.line_numbers = line_numbers
- self.cols = cols
- self.show_all_spaces = show_all_spaces
- self.highlight = highlight
- self.no_bold = no_bold
-
- if wrapcolumn is None:
- if not line_numbers:
- wrapcolumn = self.cols // 2 - 2
- else:
- wrapcolumn = self.cols // 2 - 10
-
- self._wrapcolumn = wrapcolumn
- self._linejunk = linejunk
- self._charjunk = charjunk
-
-
- def _tab_newline_replace(self, fromlines, tolines):
- """Returns from/to line lists with tabs expanded and newlines removed.
-
- Instead of tab characters being replaced by the number of spaces
- needed to fill in to the next tab stop, this function will fill
- the space with tab characters. This is done so that the difference
- algorithms can identify changes in a file when tabs are replaced by
- spaces and vice versa. At the end of the table generation, the tab
- characters will be replaced with a space.
- """
- def expand_tabs(line):
- # hide real spaces
- line = line.replace(' ', '\0')
- # expand tabs into spaces
- line = line.expandtabs(self._tabsize)
- # relace spaces from expanded tabs back into tab characters
- # (we'll replace them with markup after we do differencing)
- line = line.replace(' ', '\t')
- return line.replace('\0', ' ').rstrip('\n')
- fromlines = [expand_tabs(line) for line in fromlines]
- tolines = [expand_tabs(line) for line in tolines]
- return fromlines, tolines
-
- def _display_len(self, s):
- # Handle wide characters like chinese.
- def width(c):
- if type(c) == type(u"") and unicodedata.east_asian_width(c) == 'W':
- return 2
- return 1
-
- return sum(width(c) for c in s)
-
- def _split_line(self, data_list, line_num, text):
- """Builds list of text lines by splitting text lines at wrap point
-
- This function will determine if the input text line needs to be
- wrapped (split) into separate lines. If so, the first wrap point
- will be determined and the first line appended to the output
- text line list. This function is used recursively to handle
- the second part of the split line to further split it.
- """
- # if blank line or context separator, just add it to the output list
- if not line_num:
- data_list.append((line_num, text))
- return
-
- # if line text doesn't need wrapping, just add it to the output list
- size = self._display_len(text)
- if (size <= self._wrapcolumn) or ((size - (text.count('\0') * 3)) <= self._wrapcolumn):
- data_list.append((line_num, text))
- return
-
- # scan text looking for the wrap point, keeping track if the wrap
- # point is inside markers
- i = 0
- n = 0
- mark = ''
- while n < self._wrapcolumn and i < size:
- if text[i] == '\0':
- i += 1
- mark = text[i]
- i += 1
- elif text[i] == '\1':
- i += 1
- mark = ''
- else:
- i += 1
- n += self._display_len(text[i])
-
- # wrap point is inside text, break it up into separate lines
- line1 = text[:i]
- line2 = text[i:]
-
- # if wrap point is inside markers, place end marker at end of first
- # line and start marker at beginning of second line because each
- # line will have its own table tag markup around it.
- if mark:
- line1 = line1 + '\1'
- line2 = '\0' + mark + line2
-
- # tack on first line onto the output list
- data_list.append((line_num, line1))
-
- # use this routine again to wrap the remaining text
- self._split_line(data_list, '>', line2)
-
- def _line_wrapper(self, diffs):
- """Returns iterator that splits (wraps) mdiff text lines"""
-
- # pull from/to data and flags from mdiff iterator
- for fromdata, todata, flag in diffs:
- # check for context separators and pass them through
- if flag is None:
- yield fromdata, todata, flag
- continue
- (fromline, fromtext), (toline, totext) = fromdata, todata
- # for each from/to line split it at the wrap column to form
- # list of text lines.
- fromlist, tolist = [], []
- self._split_line(fromlist, fromline, fromtext)
- self._split_line(tolist, toline, totext)
- # yield from/to line in pairs inserting blank lines as
- # necessary when one side has more wrapped lines
- while fromlist or tolist:
- if fromlist:
- fromdata = fromlist.pop(0)
- else:
- fromdata = ('', ' ')
- if tolist:
- todata = tolist.pop(0)
- else:
- todata = ('', ' ')
- yield fromdata, todata, flag
-
- def _collect_lines(self, diffs):
- """Collects mdiff output into separate lists
-
- Before storing the mdiff from/to data into a list, it is converted
- into a single line of text with console markup.
- """
-
- fromlist, tolist, flaglist = [], [], []
- # pull from/to data and flags from mdiff style iterator
- for fromdata, todata, flag in diffs:
- try:
- # store HTML markup of the lines into the lists
- fromlist.append(self._format_line(0, flag, *fromdata))
- tolist.append(self._format_line(1, flag, *todata))
- except TypeError:
- # exceptions occur for lines where context separators go
- fromlist.append(None)
- tolist.append(None)
- flaglist.append(flag)
- return fromlist, tolist, flaglist
-
- def _format_line(self, side, flag, linenum, text):
- """Returns HTML markup of "from" / "to" text lines
-
- side -- 0 or 1 indicating "from" or "to" text
- flag -- indicates if difference on line
- linenum -- line number (used for line number column)
- text -- line text to be marked up
- """
- try:
- lid = '%d' % linenum
- except TypeError:
- # handle blank lines where linenum is '>' or ''
- lid = ''
-
- text = text.rstrip()
-
- if not self.line_numbers:
- return text
- return '%s %s' % (self._rpad(lid, 8), text)
-
- def _real_len(self, s):
- l = 0
- in_esc = False
- prev = ' '
- for c in s.replace('\0+', "").replace('\0-', "").replace('\0^', "").replace('\1', "").replace('\t', ' '):
- if in_esc:
- if c == "m":
- in_esc = False
- else:
- if c == "[" and prev == "\033":
- in_esc = True
- l -= 1 # we counted prev when we shouldn't have
- else:
- l += self._display_len(c)
- prev = c
-
- #print("len '%s' is %d." % (s, l))
- return l
-
-
- def _rpad(self, s, field_width):
- return self._pad(s, field_width) + s
-
- def _pad(self, s, field_width):
- return " " * (field_width - self._real_len(s))
-
- def _lpad(self, s, field_width):
- target = s + self._pad(s, field_width)
- #if self._real_len(target) != field_width:
- # print("Warning: bad line %r is not of length %d" % (target, field_width))
- return target
-
- def _convert_flags(self, fromlist, tolist, flaglist, context, numlines):
- """Makes list of "next" links"""
-
- # all anchor names will be generated using the unique "to" prefix
-
- # process change flags, generating middle column of next anchors/links
- next_id = [''] * len(flaglist)
- next_href = [''] * len(flaglist)
- num_chg, in_change = 0, False
- last = 0
- toprefix = ''
- for i, flag in enumerate(flaglist):
- if flag:
- if not in_change:
- in_change = True
- last = i
- # at the beginning of a change, drop an anchor a few lines
- # (the context lines) before the change for the previous
- # link
- i = max([0, i - numlines])
- next_id[i] = ' id="difflib_chg_%s_%d"' % (toprefix, num_chg)
- # at the beginning of a change, drop a link to the next
- # change
- num_chg += 1
- next_href[last] = '<a href="#difflib_chg_%s_%d">n</a>' % (
- toprefix, num_chg)
- else:
- in_change = False
- # check for cases where there is no content to avoid exceptions
- if not flaglist:
- flaglist = [False]
- next_id = ['']
- next_href = ['']
- last = 0
- if context:
- fromlist = ['No Differences Found']
- tolist = fromlist
- else:
- fromlist = tolist = ['Empty File']
- # if not a change on first line, drop a link
- if not flaglist[0]:
- next_href[0] = '<a href="#difflib_chg_%s_0">f</a>' % toprefix
- # redo the last link to link to the top
- next_href[last] = '<a href="#difflib_chg_%s_top">t</a>' % (toprefix)
-
- return fromlist, tolist, flaglist, next_href, next_id
-
- def make_table(self, fromlines, tolines, fromdesc='', todesc='', context=False,
- numlines=5):
- """Returns table of side by side comparison with change highlights
-
- Arguments:
- fromlines -- list of "from" lines
- tolines -- list of "to" lines
- fromdesc -- "from" file column header string
- todesc -- "to" file column header string
- context -- set to True for contextual differences (defaults to False
- which shows full differences).
- numlines -- number of context lines. When context is set True,
- controls number of lines displayed before and after the change.
- When context is False, controls the number of lines to place
- the "next" link anchors before the next change (so click of
- "next" link jumps to just before the change).
- """
-
- # change tabs to spaces before it gets more difficult after we insert
- # markkup
- fromlines, tolines = self._tab_newline_replace(fromlines, tolines)
-
- # create diffs iterator which generates side by side from/to data
- if context:
- context_lines = numlines
- else:
- context_lines = None
- diffs = difflib._mdiff(fromlines, tolines, context_lines, linejunk=self._linejunk,
- charjunk=self._charjunk)
-
-
- # set up iterator to wrap lines that exceed desired width
- if self._wrapcolumn:
- diffs = self._line_wrapper(diffs)
-
- # collect up from/to lines and flags into lists (also format the lines)
- fromlist, tolist, flaglist = self._collect_lines(diffs)
-
- # process change flags, generating middle column of next anchors/links
- fromlist, tolist, flaglist, next_href, next_id = self._convert_flags(
- fromlist, tolist, flaglist, context, numlines)
-
- s = []
-
- if fromdesc or todesc:
- s.append((simple_colorize(fromdesc, "blue"),
- simple_colorize(todesc, "blue")))
-
- for i in range(len(flaglist)):
- if flaglist[i] is None:
- # mdiff yields None on separator lines; skip the bogus ones
- # generated for the first line
-
- if i > 0:
- s.append((simple_colorize('---', "blue"),
- simple_colorize('---', "blue")))
- else:
- s.append((fromlist[i], tolist[i]))
-
- table_lines = []
- for sides in s:
- line = []
- for side in sides:
- line.append(self._lpad(side, self.cols // 2 - 1))
- table_lines.append(" ".join(line))
-
- table_line_string = "\n".join(table_lines)
-
- colorized_table_line_string = self.colorize(table_line_string)
-
- return colorized_table_line_string
-
- def colorize(self, s):
- def background(color):
- return color.replace("\033[1;", "\033[7;")
-
- if self.no_bold:
- C_ADD = color_codes["green"]
- C_SUB = color_codes["red"]
- C_CHG = color_codes["yellow"]
- else:
- C_ADD = color_codes["green_bold"]
- C_SUB = color_codes["red_bold"]
- C_CHG = color_codes["yellow_bold"]
-
- if self.highlight:
- C_ADD, C_SUB, C_CHG = background(C_ADD), background(C_SUB), background(C_CHG)
-
- C_NONE = color_codes["none"]
- colors = (C_ADD, C_SUB, C_CHG, C_NONE)
-
- s = s.replace('\0+', C_ADD).replace('\0-', C_SUB).replace('\0^', C_CHG).replace('\1', C_NONE).replace('\t', ' ')
-
- if self.highlight:
- return s
-
- if not self.show_all_spaces:
- # If there's a change consisting entirely of whitespace, don't color it.
- return re.sub("\033\\[[01];3([123])m(\\s+)(\033\\[)", "\033[7;3\\1m\\2\\3", s)
-
- def will_see_coloredspace(i, s):
- while i < len(s) and s[i].isspace():
- i += 1
- if i < len(s) and s[i] == '\033':
- return False
- return True
-
- n_s = []
- in_color = False
- seen_coloredspace = False
- for i, c in enumerate(s):
- if len(n_s) > 6 and n_s[-1] == "m":
- ns_end = "".join(n_s[-7:])
- for color in colors:
- if ns_end.endswith(color):
- if color != in_color:
- seen_coloredspace = False
- in_color = color
- if ns_end.endswith(C_NONE):
- in_color = False
-
- if c.isspace() and in_color and (self.show_all_spaces or not (seen_coloredspace or will_see_coloredspace(i, s))):
- n_s.extend([C_NONE, background(in_color), c, C_NONE, in_color])
- else:
- if in_color:
- seen_coloredspace = True
- n_s.append(c)
-
- joined = "".join(n_s)
-
- return joined
-
- def simple_colorize(s, chosen_color):
- return "%s%s%s" % (color_codes[chosen_color], s, color_codes["none"])
-
- def start():
- # If you change any of these, also update README.
- parser = optparse.OptionParser(usage="usage: %prog [options] left_file right_file",
- description="Show differences between files in a two column view.")
- parser.add_option("--cols", default=None,
- help="specify the width of the screen. Autodetection is Linux only")
- parser.add_option("--head", default=0,
- help="consider only the first N lines of each file")
- parser.add_option("--highlight", default=False,
- action="store_true",
- help="color by changing the background color instead of the foreground color. Very fast, ugly, displays all changes")
- parser.add_option("--line-numbers", default=False,
- action="store_true",
- help="generate output with line numbers")
- parser.add_option("--no-bold", default=False,
- action="store_true",
- help="use non-bold colors; recommended for with solarized")
- parser.add_option("--no-headers", default=False,
- action="store_true",
- help="don't label the left and right sides with their file names")
- parser.add_option("--numlines", default=5,
- help="how many lines of context to print; can't be combined with --whole-file")
- parser.add_option("--recursive", default=False,
- action="store_true",
- help="recursively compare subdirectories")
- parser.add_option("--show-all-spaces", default=False,
- action="store_true",
- help="color all non-matching whitespace including that which is not needed for drawing the eye to changes. Slow, ugly, displays all changes")
- parser.add_option("--version", default=False,
- action="store_true",
- help="print version and exit")
- parser.add_option("--whole-file", default=False,
- action="store_true",
- help="show the whole file instead of just changed lines and context")
-
- (options, args) = parser.parse_args()
-
- if options.version:
- print("icdiff version 1.2.1")
- sys.exit()
-
- if len(args) != 2:
- parser.print_help()
- sys.exit()
-
- a, b = args
-
- if not options.cols:
- def ioctl_GWINSZ(fd):
- try:
- import fcntl, termios, struct
- cr = struct.unpack('hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))
- except Exception:
- return None
- return cr
- cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
- if cr:
- options.cols = cr[1]
- else:
- options.cols = 80
-
- if options.recursive:
- diff_recursively(options, a, b)
- else:
- diff_files(options, a, b)
-
- def diff_recursively(options, a, b):
- def print_meta(s):
- print(simple_colorize(s, "magenta"))
-
- if os.path.isfile(a) and os.path.isfile(b):
- if not filecmp.cmp(a, b):
- diff_files(options, a, b)
-
- elif os.path.isdir(a) and os.path.isdir(b):
- a_contents = set(os.listdir(a))
- b_contents = set(os.listdir(b))
-
- for child in sorted(a_contents.union(b_contents)):
- if child not in b_contents:
- print_meta("Only in %s: %s" % (a, child))
- elif child not in a_contents:
- print_meta("Only in %s: %s" % (b, child))
- else:
- diff_recursively(options,
- os.path.join(a, child),
- os.path.join(b, child))
-
- elif os.path.isdir(a) and os.path.isfile(b):
- print_meta("File %s is a directory while %s is a file" % (a, b))
-
- elif os.path.isfile(a) and os.path.isdir(b):
- print_meta("File %s is a file while %s is a directory" % (a, b))
-
- def diff_files(options, a, b):
- headers = a, b
- if options.no_headers:
- headers = None, None
-
- head = int(options.head)
-
- for x in [a, b]:
- if os.path.isdir(x):
- sys.stderr.write("error: %s is a directory; did you mean to pass --recursive?\n" % x)
- sys.exit(1)
- lines_a = open(a, "U").readlines()
- lines_b = open(b, "U").readlines()
-
- if head != 0:
- lines_a = lines_a[:head]
- lines_b = lines_b[:head]
-
- print(ConsoleDiff(cols=int(options.cols),
- show_all_spaces=options.show_all_spaces,
- highlight=options.highlight,
- no_bold=options.no_bold,
- line_numbers=options.line_numbers).make_table(
- lines_a, lines_b, headers[0], headers[1], context=(not options.whole_file), numlines=int(options.numlines)))
- sys.stdout.flush()
-
- if __name__ == "__main__":
- try:
- start()
- except KeyboardInterrupt:
- pass
- except IOError as e:
- if e.errno == errno.EPIPE:
- pass
- else:
- raise
|