Package lxml :: Package html :: Module _diffcommand
[hide private]
[frames] | no frames]

Source Code for Module lxml.html._diffcommand

 1  import optparse 
 2  import sys 
 3  import re 
 4  import os 
 5  from lxml.html.diff import htmldiff 
 6   
 7  description = """\ 
 8  """ 
 9   
10  parser = optparse.OptionParser( 
11      usage="%prog [OPTIONS] FILE1 FILE2\n" 
12      "%prog --annotate [OPTIONS] INFO1 FILE1 INFO2 FILE2 ...", 
13      description=description, 
14      ) 
15   
16  parser.add_option( 
17      '-o', '--output', 
18      metavar="FILE", 
19      dest="output", 
20      default="-", 
21      help="File to write the difference to", 
22      ) 
23   
24  parser.add_option( 
25      '-a', '--annotation', 
26      action="store_true", 
27      dest="annotation", 
28      help="Do an annotation") 
29   
30 -def main(args=None):
31 if args is None: 32 args = sys.argv[1:] 33 options, args = parser.parse_args(args) 34 if options.annotation: 35 return annotate(options, args) 36 if len(args) != 2: 37 print 'Error: you must give two files' 38 parser.print_help() 39 sys.exit(1) 40 file1, file2 = args 41 input1 = read_file(file1) 42 input2 = read_file(file2) 43 body1 = split_body(input1)[1] 44 pre, body2, post = split_body(input2) 45 result = htmldiff(body1, body2) 46 result = pre + result + post 47 if options.output == '-': 48 if not result.endswith('\n'): 49 result += '\n' 50 sys.stdout.write(result) 51 else: 52 f = open(options.output, 'wb') 53 f.write(result) 54 f.close()
55
56 -def read_file(filename):
57 if filename == '-': 58 c = sys.stdin.read() 59 elif not os.path.exists(filename): 60 raise OSError( 61 "Input file %s does not exist" % filename) 62 else: 63 f = open(filename, 'rb') 64 c = f.read() 65 f.close() 66 return c
67 68 body_start_re = re.compile( 69 r"<body.*?>", re.I|re.S) 70 body_end_re = re.compile( 71 r"</body.*?>", re.I|re.S) 72
73 -def split_body(html):
74 match = body_start_re.search(html) 75 if match: 76 pre = html[:match.end()] 77 html = html[match.end():] 78 match = body_end_re.search(html) 79 if match: 80 post = html[match.start():] 81 html = html[:match.start()] 82 return pre, html, post
83
84 -def annotate(options, args):
85 print "Not yet implemented" 86 sys.exit(1)
87