-
Notifications
You must be signed in to change notification settings - Fork 0
/
search
executable file
·517 lines (459 loc) · 22.4 KB
/
search
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
#!/usr/bin/env python
# Copyright (c) 2017 Jonathan Simmonds
#
# Licensed under the MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
"""
'search' is a general purpose, extensible search utility, written in Python.
This file is the entry point and CLI driver for 'search'. It handles the loading
and initialising of all available search modules, parsing of the command line
and directing of search requests to the correct module. It ensures all search
modules have a common interface and help text.
Maintained at https://github.com/jonsim/search
"""
import argparse
import os
import re
import sys
# Load all modules (this makes use of search_modules' initialisation routine).
from search_modules import *
# The main search version.
__version__ = '1.0'
# List of SearchModules, as constructed by the search_modules package
# initialisation routine. This places the list in the global namespace.
SEARCH_MODULES = globals()['_search_modules']
# String name of the default search module to use if none is specified on the
# command line. This must exist in the loaded modules.
DEFAULT_MODULE = 'files'
class SearchArgument(object):
"""An encapsulation of an argument to add to an argparse.ArgumentParser.
This forms a reproducible way of passing the same argument to multiple
parsers and storing the state of the argument without resorting to walking
internal, non-API attributes on the actual argparse.ArgumentParser.
Attributes:
No public attributes.
"""
def __init__(self, *args, **kwargs):
"""Initialises the SearchArgument.
Args:
All arguments (positional and keyword) will be passed through
directly to the argparse.ArgumentParser.add_argument(...) method
when added to the parser.
"""
self._args = args
self._kwargs = kwargs
self._original_help = self._kwargs['help']
def add_to_parser(self, parser, hidden=False):
"""Adds this argument to a given parser.
Args:
parser: The ArgumentParser to add the argument to (or any object
exposing an add_argument method, such as a subparser).
hidden: A boolean, False to add the help text for this argument to
the parser, True to omit adding it. Defaults to False.
Returns:
argparse.Action object representing the added argument's Action.
This can mostly be ignored, but is returned for compatibility.
"""
if hidden:
self._kwargs['help'] = argparse.SUPPRESS
else:
self._kwargs['help'] = self._original_help
return parser.add_argument(*self._args, **self._kwargs)
def get_opts(self):
"""Retrieves a list of all options and aliases for this argument.
Returns:
List of strings representing the options which can select this
argument.
"""
return list(self._args)
class SearchParser(argparse.ArgumentParser):
"""Custom argparse.ArgumentParser for parsing the search command line.
This is for the most part a thin wrapper around argparse.ArgumentParser, but
it provides explicit support for the way in which the driver structures its
subparsers.
The search driver would like most of the options and parsing magic to be on
the base parser, with subparsers being permitted to add their own specific
options if necessary. This means it provides a common interface and modules
which need to can contribute specific additional functionality.
Sadly argparse.ArgumentParser has a different model of subparsers, where
each subparser is a completely separate command and has its own help text
and completely discrete argument sets. It does not well support having
common argument sets. It can produce passable help formatting, but it cannot
actually parse such setups.
The workaround for this is to encapsulate the desired driver-centric setup
in this parser and then actually pass all options separately onto each of
the subparsers when they are added. The common options are then never added
onto the base parser, except in the case of formatting help text (and even
in this case some reformatting is required to make it readable).
Attributes:
global_args: List of SearchArguments representing the global
arguments, i.e. those which apply directly to the driver itself.
common_args: List of SearchArguments representing the common
optional arguments, i.e. those which are applicable to all
subparsers.
positional_args: List of SearchArguments representing the common
positional arguments, i.e. those which are applicable to all
subparsers.
subparsers: List of subparser objects contributed to the parser.
These are for the most part similar to argparser.ArgumentParsers, in
that they can have SearchArguments added to them.
global_args_group: Argument group object representing the argument
group to add global_args to. This does not impact the actual parsing
but is used when formatting the help text.
common_args_group: Argument group object representing the argument
group to add common_args to. This does not impact the actual parsing
but is used when formatting the help text.
subparsers_handle: Special handle object (argparse._SubParsersAction)
which can be used to add subparsers to this parser.
"""
def __init__(self, *args, **kwargs):
"""Initialises this ArgumentParser.
Args:
All arguments (positional and keyword) will be passed through
directly to argparse.ArgumentParser(...) constructor.
"""
self._super = super(SearchParser, self)
self._super.__init__(*args, **kwargs)
self.global_args = []
self.common_args = []
self.positional_args = []
self.subparsers = []
self.global_args_group = self._super.add_argument_group('global arguments')
self.subparsers_handle = self._super.add_subparsers(
title='search modules',
help='Select which search module to use. Defaults to %s.' % (DEFAULT_MODULE),
parser_class=argparse.ArgumentParser)
self.common_args_group = self._super.add_argument_group('common arguments')
def add_global_arg(self, *args, **kwargs):
"""Creates a new global argument and adds it to the internal list.
This will also add the argument to the base parser.
Args:
All arguments (positional and keywork) will be passed through
directly to the SearchArgument constructor.
"""
arg = SearchArgument(*args, **kwargs)
arg.add_to_parser(self.global_args_group)
self.global_args.append(arg)
def add_common_arg(self, *args, **kwargs):
"""Creates a new common optional argument and adds it to the list.
Args:
All arguments (positional and keywork) will be passed through
directly to the SearchArgument constructor.
"""
arg = SearchArgument(*args, **kwargs)
self.common_args.append(arg)
def add_positional_arg(self, *args, **kwargs):
"""Creates a new common position argument and adds it to the list.
Args:
All arguments (positional and keywork) will be passed through
directly to the SearchArgument constructor.
"""
arg = SearchArgument(*args, **kwargs)
self.positional_args.append(arg)
def add_module(self, module):
"""Adds a SearchModule to this parser.
This will apply any global/common arguments specified by this parser.
Args:
module: SearchModule object to add.
"""
subparser = module.create_subparser(self.subparsers_handle)
# Add positional path/regex arguments.
for arg in self.global_args + self.common_args + self.positional_args:
arg.add_to_parser(subparser, hidden=True)
self.subparsers.append(subparser)
def format_help(self):
"""Overrides the argparse.ArgumentParser.format_help() method.
This will rebuild the options on the parser to expose appropriate common
arguments etc, even though argparse does not have support for parsing
such common arguments. It is assumed once this method is called any
parsing has been done and thus it is okay to modify the parser.
"""
def _split_bracketed_expression(expression):
"""Splits an expresion on whitespace, keeping bracketed portions
together.
Args:
expression: String single-line expression to split.
Returns:
List of strings, representing the split expression.
"""
parts = []
current_part = -1
last_part = 0
unmatched = ''
for i, char in enumerate(expression):
if char in '[({':
if current_part == -1:
current_part = i
if last_part != current_part:
part = expression[last_part:current_part].strip()
if part:
parts.append(part)
last_part = i+1
unmatched += char
elif char in '])}':
if not unmatched:
raise Exception('Unmatched closing brace found')
if char == ']' and unmatched[-1] != '[' or \
char == ')' and unmatched[-1] != '(' or \
char == '}' and unmatched[-1] != '{':
raise Exception('Mismatched closing brace found')
unmatched = unmatched[:-1]
if not unmatched:
part = expression[current_part:i+1].strip()
if part:
parts.append(part)
last_part = i+1
current_part = -1
# Add any trailing part.
if last_part != len(expression):
part = expression[last_part:].strip()
if part:
parts.append(part)
return parts
def _reflow_usage(usage):
"""Wraps a formatted, single line usage text over multiple lines,
splitting it appropriately (if necessary).
Args:
usage: String single-line of usage text.
Returns:
String of appropriately wrapped usage text.
"""
# Retrieve the maximum width to print to.
try:
max_width = int(os.environ['COLUMNS']) - 2
except (KeyError, ValueError):
max_width = 80 - 2
# Split usage string into parts.
parts = _split_bracketed_expression(usage)
# Recombine the parts into strings of at most max_width.
prefix = 'usage:'
lines = [prefix]
for part in parts:
if len(part) + len(lines[-1]) < max_width:
lines[-1] += ' ' + part
else:
lines.append(' ' * (len(prefix) + 5) + part)
return '\n'.join(lines)
def _format_usage(base_pre, base_post, modules):
"""Builds a usage string for the search driver and its subparsers
from the default usage strings for the parsers.
Args:
base_pre: String of formatted usage text for the 'standard'
base parser before any help-formatting-only arguments are
added.
base_post: String of formatted usage text for the modified
base parser with the help-formatting-only common optional
and positional arguments added to it.
modules: List of strings of usage text for each of the
search modules.
"""
# Extract the program, global options and subparser options from the
# standard base parser.
match = re.match(r'^(\S+)(\s+.+)?\s+({([^}]*)} ...)$',
base_pre.replace('\n', ' '))
if not match:
raise Exception('Failed to parse initial usage information')
usage_base_prog = match.group(1)
usage_base_glob = match.group(2).strip()
usage_base_subp = match.group(3)
# Extract the common optional and positional arguments from the
# modified base parser.
match = re.match(r'^%s\s+%s\s+(\S.+\s+)?%s(\s+.+)?$' % (
re.escape(usage_base_prog), re.escape(usage_base_glob),
re.escape(usage_base_subp)), base_post.replace('\n', ' '))
if not match:
raise Exception('Failed to parse rebuilt usage information')
usage_base_comn = match.group(1).strip()
usage_base_posi = match.group(2).strip()
# Form a combined modules option from the individual modules.
usage_modules_combined = '[%s]' % (' | '.join(modules))
# Take the extracted parts and recombine them into the final usage.
parts = [p for p in [usage_base_prog, usage_base_glob,
usage_modules_combined, usage_base_comn,
usage_base_posi] if p]
# Reflow the result to ensure it is properly wrapped.
return _reflow_usage(' '.join(parts))
usage_prefix_len = len('usage: ')
# Generate the base parser's usage text before rebuilding the parser.
usage_base_pre = self._super.format_usage().strip()[usage_prefix_len:]
# Read the usage text for each of the module parsers.
usage_modules = [m.subparser.format_usage().strip()[usage_prefix_len:]
for m in SEARCH_MODULES]
# Rebuild the base parser to add common options.
for arg in self.common_args:
arg.add_to_parser(self.common_args_group)
for arg in self.positional_args:
arg.add_to_parser(self._super)
# Regenerate the base parser's usage text.
usage_base_post = self._super.format_usage().strip()[usage_prefix_len:]
# Use all these pieces to format the proper usage text.
formatted_usage = _format_usage(usage_base_pre, usage_base_post, usage_modules)
# As we have formatted our own usage, suppress usage generation from the
# help formatter and add the formatted help to the returned text.
self.usage = argparse.SUPPRESS
return '%s\n\n%s' % (formatted_usage, self._super.format_help())
def get_commandline(parser):
"""Retrieves the program's command line, re-ordered appropriately.
argparse can be fussy about which order the options come in when dealing
with subparsers (particularly when it comes to attributing options to either
the base parser or the subparsers). Help it out be re-ordering the command
line to ensure the options come in the right order.
Args:
parser: SearchParser to re-order the arguments for.
Returns:
List of strings representing the (correctly ordered) command line.
"""
args = sys.argv[1:]
# Is the command line empty? If so, bail and let argparse handle it.
if not args:
return []
# Are any of the global options in the command line? If so drop everything
# else.
for global_arg in parser.global_args:
for opt in global_arg.get_opts():
if opt in args:
return [opt]
# Is the command line too short to contain a module name? If so add the
# default module to the beginning.
if len(args) == 1:
return [DEFAULT_MODULE] + args
# Find the position of the module name in the command line.
module_names = [m.name for m in SEARCH_MODULES]
module_i = -1
for i, arg in enumerate(args):
if arg in module_names:
module_i = i
break
# If no module name was found add the default module to the beginning.
if module_i == -1:
return [DEFAULT_MODULE] + args
# If the module name was in the right place, just return it.
if module_i == 0:
return args
# Otherwise, promote the found module name to the front of the list.
return [args[module_i]] + args[:module_i] + args[module_i+1:]
def validate_search_modules():
"""Raise an Exception if the search modules are misconfigured."""
# Check we have modules.
if not SEARCH_MODULES:
raise Exception('Failed to locate any search modules.')
def validate_default_module():
"""Raise an Exception if the default module is misconfigured."""
# Check the default module exists. This isn't strictly necessary, but it
# gives a clearer error message than failing later on.
if DEFAULT_MODULE not in [m.name for m in SEARCH_MODULES]:
raise Exception('Failed to locate default module: %s' % (DEFAULT_MODULE))
def main():
"""Main method."""
class HelpAction(argparse.Action):
"""Custom argparse.Action for printing the help text.
The default argparse help action will only print the help from the
base parser (or the parser which is handling the help command). For the
search driver it makes more sense that _all_ parsers return the same
help text: the base parser and recursively all its subparsers.
"""
def __call__(self, parser, namespace, values, option_string=None):
# Print the base parser's help text.
base_parser.print_help()
# Print the help text of each available module.
for module in SEARCH_MODULES:
# Get the help text, less the usage information at the front.
help_full = module.subparser.format_help()
help_usage = module.subparser.format_usage()
help_text = help_full[len(help_usage):].strip()
# If there is no help text there must be no arguments.
if not help_text:
help_text = 'no additional arguments'
# Indent the output.
help_text = '\n'.join([' ' + s for s in help_text.split('\n')])
print '\n%s module:' % (module.name)
print help_text
parser.exit()
class VersionAction(argparse.Action):
"""Custom argparse.Action for printing the version text.
The default argparse version action will only print a single line of
version text. For the search driver it makes sense to print the version
information of all loaded modules as well as that of the main driver.
"""
def __call__(self, parser, namespace, values, option_string=None):
parser.exit(message=self.version_string())
def version_string(self):
"""Generates the version string for the driver.
Returns:
String multi-line version output.
"""
return '%s %s\n' \
'Copyright (c) 2017 Jonathan Simmonds\n\n' \
'Loaded modules:\n' \
'%s\n' % (os.path.basename(sys.argv[0]), __version__,
'\n'.join([' ' + str(m) for m in SEARCH_MODULES]))
# Start by asserting the modules were loaded correctly.
validate_search_modules()
validate_default_module()
# Build the base parser.
base_parser = SearchParser(
add_help=False,
description='A module-based, recursive file searching utility.')
# Add global arguments.
base_parser.add_global_arg(
'-h', '--help',
nargs=0, action=HelpAction,
help='Show this help message and exit.')
base_parser.add_global_arg(
'--version',
nargs=0, action=VersionAction,
help='Show the version number of the program and its installed modules '
'and exit.')
# Add common arguments.
base_parser.add_common_arg(
'-i', '--ignore-case',
dest='ignore_case', action='store_const', const=True, default=False,
help='Enable case-insensitive searching.')
base_parser.add_common_arg(
'-v', '--verbose',
dest='verbose', action='store_const', const=True, default=False,
help='Enable verbose, full replication of the result column, even if '
'it means taking multiple lines per match (by default the result '
'will be condensed to keep one line per match if possible).')
# Add common positional arguments.
base_parser.add_positional_arg(
'paths',
metavar='path', type=str, default=['.'], nargs='*',
help='Optional path(s) to perform the search in or on. If omitted the '
'current working directory is used.')
base_parser.add_positional_arg(
'regex',
type=str,
help='Perl-style regular expression to search for. It is recommended '
'to pass this in single quotes to prevent shell expansion or '
'interpretation of the regex characters.')
# Add sub commands for all loaded modules.
for module in SEARCH_MODULES:
base_parser.add_module(module)
# Retrieve (and potentially reorder) the command line.
args = get_commandline(base_parser)
# Actually parse the arguments.
args = base_parser.parse_args(args)
# Invoke the parsed module search callback.
args.search(args)
# Entry point.
if __name__ == '__main__':
main()