2

I need to display character difference per line in a unix unified diff like style. Is there a way to do that using difflib?

I can get "unified diff" and "character per line diff" separately using difflib.unified_diff and difflib.Differ() (ndiff) respectively, but how can I combine them?

This is what I am looking for:

#
# This is difflib.unified
#
>>> print ''.join(difflib.unified_diff('one\ntwo\nthree\n'.splitlines(1), 'ore\ntree\nemu\n'.splitlines(1), 'old', 'new'))
--- old
+++ new
@@ -1,3 +1,3 @@
-one
-two
-three
+ore
+tree
+emu

>>> 

#
# This is difflib.Differ
#
>>> print ''.join(difflib.ndiff('one\ntwo\nthree\n'.splitlines(1), 'ore\ntree\nemu\n'.splitlines(1))),
- one
?  ^
+ ore
?  ^
- two
- three
?  -
+ tree
+ emu
>>> 


#
# I want the merge of above two, something like this...
#
>>> print ''.join(unified_with_ndiff('one\ntwo\nthree\n'.splitlines(1), 'ore\ntree\nemu\n'.splitlines(1))),
--- old
+++ new
@@ -1,3 +1,3 @@
- one
?  ^
+ ore
?  ^
- two
- three
?  -
+ tree
+ emu

>>> 

1 Answer 1

3

Found the answer on my own after digging into the source code of difflib.

'''
# mydifflib.py
@author: Amit Barik
@summary: Overrides difflib.Differ to present the user with unified format (for Python 2.7).

Its basically merging of difflib.unified_diff() and difflib.Differ.compare()
'''

from difflib import SequenceMatcher
from difflib import Differ

class UnifiedDiffer(Differ):
    def unified_diff(self, a, b, fromfile='', tofile='', fromfiledate='',
                 tofiledate='', n=3, lineterm='\n'):
        r"""
        Compare two sequences of lines; generate the resulting delta, in unified
        format

        Each sequence must contain individual single-line strings ending with
        newlines. Such sequences can be obtained from the `readlines()` method
        of file-like objects.  The delta generated also consists of newline-
        terminated strings, ready to be printed as-is via the writeline()
        method of a file-like object.

        Example:

        >>> print ''.join(Differ().unified_diff('one\ntwo\nthree\n'.splitlines(1),
        ...                                'ore\ntree\nemu\n'.splitlines(1)),
        ...                                'old.txt', 'new.txt', 'old-date', 'new-date'),
        --- old.txt    old-date
        +++ new.txt    new-date
        @@ -1,5 +1,5 @@
          context1
        - one
        ?  ^
        + ore
        ?  ^
        - two
        - three
        ?  -
        + tree
        + emu
          context2
        """

        started = False
        for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
            if not started:
                fromdate = '\t%s' % fromfiledate if fromfiledate else ''
                todate = '\t%s' % tofiledate if tofiledate else ''
                yield '--- %s%s%s' % (fromfile, fromdate, lineterm)
                yield '+++ %s%s%s' % (tofile, todate, lineterm)
                started = True
            i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
            yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
            for tag, i1, i2, j1, j2 in group:
                if tag == 'replace':
                    for line in a[i1:i2]:
                        g = self._fancy_replace(a, i1, i2, b, j1, j2)
                elif tag == 'equal':
                    for line in a[i1:i2]:
                        g = self._dump(' ', a, i1, i2)
                    if n > 0:
                        for line in g:
                            yield line
                    continue
                elif tag == 'delete':
                    for line in a[i1:i2]:
                        g = self._dump('-', a, i1, i2)
                elif tag == 'insert':
                    for line in b[j1:j2]:
                        g = self._dump('+', b, j1, j2)
                else:
                    raise ValueError, 'unknown tag %r' % (tag,)

                for line in g:
                    yield line


def main():
    # Test
    a ='context1\none\ntwo\nthree\ncontext2\n'.splitlines(1)
    b = 'context1\nore\ntree\nemu\ncontext2\n'.splitlines(1)
    x = UnifiedDiffer().unified_diff(a, b, 'old.txt', 'new.txt', 'old-date', 'new-date', n=1)
    print ''.join(x)

if __name__ == '__main__':          
    main()
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.