2016-03-22 20:00:23 +01:00
|
|
|
#!/usr/bin/env python
|
|
|
|
from __future__ import print_function
|
|
|
|
|
2016-04-03 14:34:22 +02:00
|
|
|
from __future__ import absolute_import
|
2016-03-22 20:00:23 +01:00
|
|
|
import os
|
2016-03-22 21:13:21 +01:00
|
|
|
import sys
|
2016-03-22 20:00:23 +01:00
|
|
|
import subprocess
|
2016-03-22 21:13:21 +01:00
|
|
|
import re
|
2016-03-22 20:50:20 +01:00
|
|
|
from collections import defaultdict
|
2016-03-24 17:48:00 +01:00
|
|
|
import argparse
|
2016-04-03 14:34:22 +02:00
|
|
|
from six.moves import filter
|
2016-03-22 20:00:23 +01:00
|
|
|
|
2016-03-22 21:13:21 +01:00
|
|
|
def get_ftype(fpath, use_shebang):
|
2016-03-22 20:50:20 +01:00
|
|
|
ext = os.path.splitext(fpath)[1]
|
|
|
|
if ext:
|
|
|
|
return ext[1:]
|
2016-03-22 21:13:21 +01:00
|
|
|
elif use_shebang:
|
|
|
|
# opening a file may throw an OSError
|
|
|
|
with open(fpath) as f:
|
|
|
|
first_line = f.readline()
|
|
|
|
if re.search(r'^#!.*\bpython', first_line):
|
|
|
|
return 'py'
|
|
|
|
elif re.search(r'^#!.*sh', first_line):
|
|
|
|
return 'sh'
|
|
|
|
elif re.search(r'^#!.*\bperl', first_line):
|
|
|
|
return 'pl'
|
2016-04-02 00:23:19 +02:00
|
|
|
elif re.search(r'^#!.*\bnode', first_line):
|
|
|
|
return 'js'
|
|
|
|
elif re.search(r'^#!.*\bruby', first_line):
|
|
|
|
return 'rb'
|
2016-03-22 21:13:21 +01:00
|
|
|
elif re.search(r'^#!', first_line):
|
|
|
|
print('Error: Unknown shebang in file "%s":\n%s' % (fpath, first_line), file=sys.stderr)
|
|
|
|
return ''
|
|
|
|
else:
|
|
|
|
return ''
|
2016-03-22 20:50:20 +01:00
|
|
|
else:
|
|
|
|
return ''
|
|
|
|
|
2016-03-22 21:13:21 +01:00
|
|
|
def list_files(targets=[], ftypes=[], use_shebang=True, modified_only=False,
|
|
|
|
exclude=[], group_by_ftype=False):
|
2016-03-22 20:00:23 +01:00
|
|
|
"""
|
|
|
|
List files tracked by git.
|
|
|
|
Returns a list of files which are either in targets or in directories in targets.
|
|
|
|
If targets is [], list of all tracked files in current directory is returned.
|
2016-03-22 20:19:43 +01:00
|
|
|
|
|
|
|
Other arguments:
|
2016-03-22 20:50:20 +01:00
|
|
|
ftypes - List of file types on which to filter the search.
|
|
|
|
If ftypes is [], all files are included.
|
2016-03-22 21:13:21 +01:00
|
|
|
use_shebang - Determine file type of extensionless files from their shebang.
|
2016-03-22 20:19:43 +01:00
|
|
|
modified_only - Only include files which have been modified.
|
2016-03-22 20:34:17 +01:00
|
|
|
exclude - List of paths to be excluded.
|
2016-03-22 20:50:20 +01:00
|
|
|
group_by_ftype - If True, returns a dict of lists keyed by file type.
|
|
|
|
If False, returns a flat list of files.
|
2016-03-22 20:00:23 +01:00
|
|
|
"""
|
2016-03-22 20:50:20 +01:00
|
|
|
ftypes = [x.strip('.') for x in ftypes]
|
|
|
|
ftypes_set = set(ftypes)
|
|
|
|
|
2016-03-22 20:00:23 +01:00
|
|
|
cmdline = ['git', 'ls-files'] + targets
|
2016-03-22 20:19:43 +01:00
|
|
|
if modified_only:
|
|
|
|
cmdline.append('-m')
|
2016-03-22 20:00:23 +01:00
|
|
|
|
|
|
|
files_gen = (x.strip() for x in subprocess.check_output(cmdline, universal_newlines=True).split('\n'))
|
|
|
|
# throw away empty lines and non-files (like symlinks)
|
|
|
|
files = list(filter(os.path.isfile, files_gen))
|
|
|
|
|
2016-03-22 20:50:20 +01:00
|
|
|
result = defaultdict(list) if group_by_ftype else []
|
2016-03-22 20:34:17 +01:00
|
|
|
|
|
|
|
for fpath in files:
|
|
|
|
# this will take a long time if exclude is very large
|
|
|
|
in_exclude = False
|
|
|
|
for expath in exclude:
|
|
|
|
expath = expath.rstrip('/')
|
|
|
|
if fpath == expath or fpath.startswith(expath + '/'):
|
|
|
|
in_exclude = True
|
|
|
|
if in_exclude:
|
|
|
|
continue
|
|
|
|
|
2016-03-22 20:50:20 +01:00
|
|
|
if ftypes or group_by_ftype:
|
2016-03-22 21:13:21 +01:00
|
|
|
try:
|
|
|
|
filetype = get_ftype(fpath, use_shebang)
|
|
|
|
except (OSError, UnicodeDecodeError) as e:
|
|
|
|
etype = e.__class__.__name__
|
|
|
|
print('Error: %s while determining type of file "%s":' % (etype, fpath), file=sys.stderr)
|
|
|
|
print(e, file=sys.stderr)
|
|
|
|
filetype = ''
|
2016-03-22 20:50:20 +01:00
|
|
|
if ftypes and filetype not in ftypes_set:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if group_by_ftype:
|
2016-03-23 03:57:20 +01:00
|
|
|
result[filetype].append(fpath)
|
2016-03-22 20:50:20 +01:00
|
|
|
else:
|
|
|
|
result.append(fpath)
|
2016-03-22 20:34:17 +01:00
|
|
|
|
|
|
|
return result
|
2016-03-24 17:48:00 +01:00
|
|
|
|
|
|
|
if __name__=="__main__":
|
|
|
|
parser = argparse.ArgumentParser(description="List files tracked by git and optionally filter by type")
|
|
|
|
parser.add_argument('targets', nargs='*',
|
|
|
|
help='''files and directories to include in the result.
|
|
|
|
If this is not specified, the current directory is used''')
|
|
|
|
parser.add_argument('-m', '--modified', action='store_true', default=False, help='list only modified files')
|
|
|
|
parser.add_argument('-f', '--ftypes', nargs='+',
|
|
|
|
help="list of file types to filter on. All files are included if this option is absent")
|
|
|
|
parser.add_argument('--ext-only', dest='extonly', action='store_true', default=False, help='only use extension to determine file type')
|
|
|
|
parser.add_argument('--exclude', nargs='+', help='list of files and directories to exclude from listing')
|
|
|
|
args = parser.parse_args()
|
|
|
|
args.targets = args.targets or []
|
|
|
|
args.ftypes = args.ftypes or []
|
|
|
|
args.exclude = args.exclude or []
|
|
|
|
listing = list_files(targets=args.targets, ftypes=args.ftypes, use_shebang=not args.extonly,
|
|
|
|
modified_only=args.modified, exclude=args.exclude)
|
|
|
|
for l in listing:
|
|
|
|
print(l)
|