2016-03-22 20:00:23 +01:00
|
|
|
#!/usr/bin/env python
|
|
|
|
from __future__ import print_function
|
2016-04-03 14:34:22 +02:00
|
|
|
from __future__ import absolute_import
|
2016-05-24 15:26:38 +02:00
|
|
|
|
2016-03-22 20:00:23 +01:00
|
|
|
import os
|
2016-05-24 15:26:38 +02:00
|
|
|
from os.path import abspath
|
2016-03-22 21:13:21 +01:00
|
|
|
import sys
|
2016-03-22 20:00:23 +01:00
|
|
|
import subprocess
|
2016-03-22 21:13:21 +01:00
|
|
|
import re
|
2016-03-22 20:50:20 +01:00
|
|
|
from collections import defaultdict
|
2016-03-24 17:48:00 +01:00
|
|
|
import argparse
|
2016-04-03 14:34:22 +02:00
|
|
|
from six.moves import filter
|
2016-05-01 05:42:26 +02:00
|
|
|
from typing import Union, List, Dict
|
2016-03-22 20:00:23 +01:00
|
|
|
|
2016-03-22 21:13:21 +01:00
|
|
|
def get_ftype(fpath, use_shebang):
|
2016-05-01 05:42:26 +02:00
|
|
|
# type: (str, bool) -> str
|
2016-03-22 20:50:20 +01:00
|
|
|
ext = os.path.splitext(fpath)[1]
|
|
|
|
if ext:
|
|
|
|
return ext[1:]
|
2016-03-22 21:13:21 +01:00
|
|
|
elif use_shebang:
|
|
|
|
# opening a file may throw an OSError
|
|
|
|
with open(fpath) as f:
|
|
|
|
first_line = f.readline()
|
|
|
|
if re.search(r'^#!.*\bpython', first_line):
|
|
|
|
return 'py'
|
|
|
|
elif re.search(r'^#!.*sh', first_line):
|
|
|
|
return 'sh'
|
|
|
|
elif re.search(r'^#!.*\bperl', first_line):
|
|
|
|
return 'pl'
|
2016-04-02 00:23:19 +02:00
|
|
|
elif re.search(r'^#!.*\bnode', first_line):
|
|
|
|
return 'js'
|
|
|
|
elif re.search(r'^#!.*\bruby', first_line):
|
|
|
|
return 'rb'
|
2016-03-22 21:13:21 +01:00
|
|
|
elif re.search(r'^#!', first_line):
|
|
|
|
print('Error: Unknown shebang in file "%s":\n%s' % (fpath, first_line), file=sys.stderr)
|
|
|
|
return ''
|
|
|
|
else:
|
|
|
|
return ''
|
2016-03-22 20:50:20 +01:00
|
|
|
else:
|
|
|
|
return ''
|
|
|
|
|
2016-03-22 21:13:21 +01:00
|
|
|
def list_files(targets=[], ftypes=[], use_shebang=True, modified_only=False,
|
2016-07-20 20:32:46 +02:00
|
|
|
exclude=[], group_by_ftype=False, extless_only=False):
|
|
|
|
# type: (List[str], List[str], bool, bool, List[str], bool, bool) -> Union[Dict[str, List[str]], List[str]]
|
2016-03-22 20:00:23 +01:00
|
|
|
"""
|
|
|
|
List files tracked by git.
|
2016-05-30 23:25:09 +02:00
|
|
|
|
2016-03-22 20:00:23 +01:00
|
|
|
Returns a list of files which are either in targets or in directories in targets.
|
|
|
|
If targets is [], list of all tracked files in current directory is returned.
|
2016-03-22 20:19:43 +01:00
|
|
|
|
|
|
|
Other arguments:
|
2016-03-22 20:50:20 +01:00
|
|
|
ftypes - List of file types on which to filter the search.
|
|
|
|
If ftypes is [], all files are included.
|
2016-03-22 21:13:21 +01:00
|
|
|
use_shebang - Determine file type of extensionless files from their shebang.
|
2016-03-22 20:19:43 +01:00
|
|
|
modified_only - Only include files which have been modified.
|
2016-05-30 23:25:09 +02:00
|
|
|
exclude - List of paths to be excluded, relative to repository root.
|
2016-03-22 20:50:20 +01:00
|
|
|
group_by_ftype - If True, returns a dict of lists keyed by file type.
|
|
|
|
If False, returns a flat list of files.
|
2016-07-20 20:32:46 +02:00
|
|
|
extless_only - Only include extensionless files in output.
|
2016-03-22 20:00:23 +01:00
|
|
|
"""
|
2016-03-22 20:50:20 +01:00
|
|
|
ftypes = [x.strip('.') for x in ftypes]
|
|
|
|
ftypes_set = set(ftypes)
|
|
|
|
|
2016-05-30 23:25:09 +02:00
|
|
|
# Really this is all bytes -- it's a file path -- but we get paths in
|
|
|
|
# sys.argv as str, so that battle is already lost. Settle for hoping
|
|
|
|
# everything is UTF-8.
|
|
|
|
repository_root = subprocess.check_output(['git', 'rev-parse', '--show-toplevel']).strip().decode('utf-8')
|
|
|
|
exclude_abspaths = [os.path.join(repository_root, fpath).rstrip('/') for fpath in exclude]
|
|
|
|
|
2016-03-22 20:00:23 +01:00
|
|
|
cmdline = ['git', 'ls-files'] + targets
|
2016-03-22 20:19:43 +01:00
|
|
|
if modified_only:
|
|
|
|
cmdline.append('-m')
|
2016-03-22 20:00:23 +01:00
|
|
|
|
|
|
|
files_gen = (x.strip() for x in subprocess.check_output(cmdline, universal_newlines=True).split('\n'))
|
|
|
|
# throw away empty lines and non-files (like symlinks)
|
|
|
|
files = list(filter(os.path.isfile, files_gen))
|
|
|
|
|
2016-05-01 05:42:26 +02:00
|
|
|
result_dict = defaultdict(list) # type: Dict[str, List[str]]
|
|
|
|
result_list = [] # type: List[str]
|
2016-03-22 20:34:17 +01:00
|
|
|
|
|
|
|
for fpath in files:
|
|
|
|
# this will take a long time if exclude is very large
|
2016-07-20 20:32:46 +02:00
|
|
|
ext = os.path.splitext(fpath)[1]
|
|
|
|
if extless_only and ext:
|
|
|
|
continue
|
2016-05-24 15:26:38 +02:00
|
|
|
absfpath = abspath(fpath)
|
2016-05-30 23:25:09 +02:00
|
|
|
if any(absfpath == expath or absfpath.startswith(expath + '/')
|
|
|
|
for expath in exclude_abspaths):
|
2016-03-22 20:34:17 +01:00
|
|
|
continue
|
|
|
|
|
2016-03-22 20:50:20 +01:00
|
|
|
if ftypes or group_by_ftype:
|
2016-03-22 21:13:21 +01:00
|
|
|
try:
|
|
|
|
filetype = get_ftype(fpath, use_shebang)
|
|
|
|
except (OSError, UnicodeDecodeError) as e:
|
|
|
|
etype = e.__class__.__name__
|
|
|
|
print('Error: %s while determining type of file "%s":' % (etype, fpath), file=sys.stderr)
|
|
|
|
print(e, file=sys.stderr)
|
|
|
|
filetype = ''
|
2016-03-22 20:50:20 +01:00
|
|
|
if ftypes and filetype not in ftypes_set:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if group_by_ftype:
|
2016-05-01 05:42:26 +02:00
|
|
|
result_dict[filetype].append(fpath)
|
2016-03-22 20:50:20 +01:00
|
|
|
else:
|
2016-05-01 05:42:26 +02:00
|
|
|
result_list.append(fpath)
|
2016-03-22 20:34:17 +01:00
|
|
|
|
2016-05-01 05:42:26 +02:00
|
|
|
if group_by_ftype:
|
|
|
|
return result_dict
|
|
|
|
else:
|
|
|
|
return result_list
|
2016-03-24 17:48:00 +01:00
|
|
|
|
|
|
|
if __name__=="__main__":
|
|
|
|
parser = argparse.ArgumentParser(description="List files tracked by git and optionally filter by type")
|
2016-05-01 03:31:52 +02:00
|
|
|
parser.add_argument('targets', nargs='*', default=[],
|
2016-03-24 17:48:00 +01:00
|
|
|
help='''files and directories to include in the result.
|
|
|
|
If this is not specified, the current directory is used''')
|
|
|
|
parser.add_argument('-m', '--modified', action='store_true', default=False, help='list only modified files')
|
2016-05-01 03:31:52 +02:00
|
|
|
parser.add_argument('-f', '--ftypes', nargs='+', default=[],
|
2016-03-24 17:48:00 +01:00
|
|
|
help="list of file types to filter on. All files are included if this option is absent")
|
2016-05-01 03:31:52 +02:00
|
|
|
parser.add_argument('--ext-only', dest='extonly', action='store_true', default=False,
|
|
|
|
help='only use extension to determine file type')
|
|
|
|
parser.add_argument('--exclude', nargs='+', default=[],
|
2016-05-30 23:25:09 +02:00
|
|
|
help='list of files and directories to exclude from results, relative to repo root')
|
2016-07-20 20:32:46 +02:00
|
|
|
parser.add_argument('--extless-only', dest='extless_only', action='store_true', default=False,
|
|
|
|
help='only include extensionless files in output')
|
2016-03-24 17:48:00 +01:00
|
|
|
args = parser.parse_args()
|
|
|
|
listing = list_files(targets=args.targets, ftypes=args.ftypes, use_shebang=not args.extonly,
|
2016-07-20 20:32:46 +02:00
|
|
|
modified_only=args.modified, exclude=args.exclude, extless_only=args.extless_only)
|
2016-03-24 17:48:00 +01:00
|
|
|
for l in listing:
|
|
|
|
print(l)
|