zulip/tools/lister.py

114 lines
4.4 KiB
Python
Executable File

#!/usr/bin/env python
from __future__ import print_function
from __future__ import absolute_import
import os
import sys
import subprocess
import re
from collections import defaultdict
import argparse
from six.moves import filter
def get_ftype(fpath, use_shebang):
ext = os.path.splitext(fpath)[1]
if ext:
return ext[1:]
elif use_shebang:
# opening a file may throw an OSError
with open(fpath) as f:
first_line = f.readline()
if re.search(r'^#!.*\bpython', first_line):
return 'py'
elif re.search(r'^#!.*sh', first_line):
return 'sh'
elif re.search(r'^#!.*\bperl', first_line):
return 'pl'
elif re.search(r'^#!.*\bnode', first_line):
return 'js'
elif re.search(r'^#!.*\bruby', first_line):
return 'rb'
elif re.search(r'^#!', first_line):
print('Error: Unknown shebang in file "%s":\n%s' % (fpath, first_line), file=sys.stderr)
return ''
else:
return ''
else:
return ''
def list_files(targets=[], ftypes=[], use_shebang=True, modified_only=False,
exclude=[], group_by_ftype=False):
"""
List files tracked by git.
Returns a list of files which are either in targets or in directories in targets.
If targets is [], list of all tracked files in current directory is returned.
Other arguments:
ftypes - List of file types on which to filter the search.
If ftypes is [], all files are included.
use_shebang - Determine file type of extensionless files from their shebang.
modified_only - Only include files which have been modified.
exclude - List of paths to be excluded.
group_by_ftype - If True, returns a dict of lists keyed by file type.
If False, returns a flat list of files.
"""
ftypes = [x.strip('.') for x in ftypes]
ftypes_set = set(ftypes)
cmdline = ['git', 'ls-files'] + targets
if modified_only:
cmdline.append('-m')
files_gen = (x.strip() for x in subprocess.check_output(cmdline, universal_newlines=True).split('\n'))
# throw away empty lines and non-files (like symlinks)
files = list(filter(os.path.isfile, files_gen))
result = defaultdict(list) if group_by_ftype else []
for fpath in files:
# this will take a long time if exclude is very large
in_exclude = False
for expath in exclude:
expath = expath.rstrip('/')
if fpath == expath or fpath.startswith(expath + '/'):
in_exclude = True
if in_exclude:
continue
if ftypes or group_by_ftype:
try:
filetype = get_ftype(fpath, use_shebang)
except (OSError, UnicodeDecodeError) as e:
etype = e.__class__.__name__
print('Error: %s while determining type of file "%s":' % (etype, fpath), file=sys.stderr)
print(e, file=sys.stderr)
filetype = ''
if ftypes and filetype not in ftypes_set:
continue
if group_by_ftype:
result[filetype].append(fpath)
else:
result.append(fpath)
return result
if __name__=="__main__":
parser = argparse.ArgumentParser(description="List files tracked by git and optionally filter by type")
parser.add_argument('targets', nargs='*',
help='''files and directories to include in the result.
If this is not specified, the current directory is used''')
parser.add_argument('-m', '--modified', action='store_true', default=False, help='list only modified files')
parser.add_argument('-f', '--ftypes', nargs='+',
help="list of file types to filter on. All files are included if this option is absent")
parser.add_argument('--ext-only', dest='extonly', action='store_true', default=False, help='only use extension to determine file type')
parser.add_argument('--exclude', nargs='+', help='list of files and directories to exclude from listing')
args = parser.parse_args()
args.targets = args.targets or []
args.ftypes = args.ftypes or []
args.exclude = args.exclude or []
listing = list_files(targets=args.targets, ftypes=args.ftypes, use_shebang=not args.extonly,
modified_only=args.modified, exclude=args.exclude)
for l in listing:
print(l)