-
Notifications
You must be signed in to change notification settings - Fork 761
/
collectors.py
1337 lines (1111 loc) · 48.7 KB
/
collectors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
"""Flows for handling the collection for artifacts."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import logging
from typing import Optional
from typing import Sequence
from typing import Text
from grr_response_core import config
from grr_response_core.lib import artifact_utils
from grr_response_core.lib import parsers
from grr_response_core.lib import rdfvalue
# For file collection artifacts. pylint: disable=unused-import
from grr_response_core.lib.parsers import registry_init
# pylint: enable=unused-import
from grr_response_core.lib.parsers import windows_persistence
from grr_response_core.lib.rdfvalues import artifacts as rdf_artifacts
from grr_response_core.lib.rdfvalues import client as rdf_client
from grr_response_core.lib.rdfvalues import client_action as rdf_client_action
from grr_response_core.lib.rdfvalues import client_fs as rdf_client_fs
from grr_response_core.lib.rdfvalues import file_finder as rdf_file_finder
from grr_response_core.lib.rdfvalues import paths as rdf_paths
from grr_response_core.lib.rdfvalues import structs as rdf_structs
from grr_response_core.lib.util import collection
from grr_response_core.lib.util import compatibility
from grr_response_proto import flows_pb2
from grr_response_server import artifact
from grr_response_server import artifact_registry
from grr_response_server import data_store
from grr_response_server import flow_base
from grr_response_server import server_stubs
from grr_response_server.flows.general import artifact_fallbacks
from grr_response_server.flows.general import file_finder
from grr_response_server.flows.general import filesystem
from grr_response_server.flows.general import transfer
_MAX_DEBUG_RESPONSES_STRING_LENGTH = 100000
def _ReadClientKnowledgeBase(client_id, allow_uninitialized=False):
client = data_store.REL_DB.ReadClientSnapshot(client_id)
return artifact.GetKnowledgeBase(
client, allow_uninitialized=allow_uninitialized)
def _GetPathType(args: rdf_artifacts.ArtifactCollectorFlowArgs,
client_os: str) -> rdf_paths.PathSpec.PathType:
if args.use_tsk or args.use_raw_filesystem_access:
if client_os == "Windows":
return config.CONFIG["Server.raw_filesystem_access_pathtype"]
else:
return rdf_paths.PathSpec.PathType.TSK
else:
return rdf_paths.PathSpec.PathType.OS
class ArtifactCollectorFlow(flow_base.FlowBase):
"""Flow that takes a list of artifacts and collects them.
This flow is the core of the Artifact implementation for GRR. Artifacts are
defined using a standardized data format that includes what to collect and
how to process the things collected. This flow takes that data driven format
and makes it useful.
The core functionality of Artifacts is split into ArtifactSources and
Processors.
An Artifact defines a set of ArtifactSources that are used to retrieve data
from the client. These can specify collection of files, registry keys, command
output and others. The first part of this flow "Collect" handles running those
collections by issuing GRR flows and client actions.
The results of those are then collected and GRR searches for Processors that
know how to process the output of the ArtifactSources. The Processors all
inherit from the Parser class, and each Parser specifies which Artifacts it
knows how to process.
So this flow hands off the collected rdfvalue results to the Processors which
then return modified or different rdfvalues. These final results are then
either:
1. Sent to the calling flow.
2. Written to a collection.
"""
category = "/Collectors/"
args_type = rdf_artifacts.ArtifactCollectorFlowArgs
progress_type = rdf_artifacts.ArtifactCollectorFlowProgress
behaviours = flow_base.BEHAVIOUR_BASIC
def Start(self):
"""For each artifact, create subflows for each collector."""
self.state.artifacts_failed = []
self.state.artifacts_skipped_due_to_condition = []
self.state.called_fallbacks = set()
self.state.failed_count = 0
self.state.knowledge_base = self.args.knowledge_base
self.state.response_count = 0
self.state.progress = rdf_artifacts.ArtifactCollectorFlowProgress()
if self.args.use_tsk and self.args.use_raw_filesystem_access:
raise ValueError(
"Only one of use_tsk and use_raw_filesystem_access can be set.")
if (self.args.dependencies ==
rdf_artifacts.ArtifactCollectorFlowArgs.Dependency.FETCH_NOW):
# String due to dependency loop with discover.py.
self.CallFlow(
"Interrogate", next_state=compatibility.GetName(self.StartCollection))
return
elif (self.args.dependencies == rdf_artifacts.ArtifactCollectorFlowArgs
.Dependency.USE_CACHED) and (not self.state.knowledge_base):
# If not provided, get a knowledge base from the client.
try:
self.state.knowledge_base = _ReadClientKnowledgeBase(self.client_id)
except artifact_utils.KnowledgeBaseUninitializedError:
# If no-one has ever initialized the knowledge base, we should do so
# now.
if not self._AreArtifactsKnowledgeBaseArtifacts():
# String due to dependency loop with discover.py.
self.CallFlow(
"Interrogate",
next_state=compatibility.GetName(self.StartCollection))
return
# In all other cases start the collection state.
self.CallState(next_state=compatibility.GetName(self.StartCollection))
def _GetArtifactFromName(self, name):
"""Gets an artifact from the registry, refreshing the registry if needed."""
try:
return artifact_registry.REGISTRY.GetArtifact(name)
except rdf_artifacts.ArtifactNotRegisteredError:
# If we don't have an artifact, things shouldn't have passed validation
# so we assume it's a new one in the datastore.
artifact_registry.REGISTRY.ReloadDatastoreArtifacts()
return artifact_registry.REGISTRY.GetArtifact(name)
def StartCollection(self, responses):
"""Start collecting."""
if not responses.success:
raise artifact_utils.KnowledgeBaseUninitializedError(
"Attempt to initialize Knowledge Base failed.")
if not self.state.knowledge_base:
self.state.knowledge_base = _ReadClientKnowledgeBase(
self.client_id, allow_uninitialized=True)
for artifact_name in self.args.artifact_list:
artifact_obj = self._GetArtifactFromName(artifact_name)
# Ensure artifact has been written sanely. Note that this could be
# removed if it turns out to be expensive. Artifact tests should catch
# these.
artifact_registry.Validate(artifact_obj)
self.Collect(artifact_obj)
def Collect(self, artifact_obj):
"""Collect the raw data from the client for this artifact."""
artifact_name = artifact_obj.name
# Ensure attempted artifacts are shown in progress, even with 0 results.
self._GetOrInsertArtifactProgress(artifact_name)
test_conditions = list(artifact_obj.conditions)
os_conditions = ConvertSupportedOSToConditions(artifact_obj)
if os_conditions:
test_conditions.append(os_conditions)
# Check each of the conditions match our target.
for condition in test_conditions:
if not artifact_utils.CheckCondition(condition,
self.state.knowledge_base):
logging.debug("Artifact %s condition %s failed on %s", artifact_name,
condition, self.client_id)
self.state.artifacts_skipped_due_to_condition.append(
(artifact_name, condition))
return
# Call the source defined action for each source.
for source in artifact_obj.sources:
# Check conditions on the source.
source_conditions_met = True
test_conditions = list(source.conditions)
os_conditions = ConvertSupportedOSToConditions(source)
if os_conditions:
test_conditions.append(os_conditions)
for condition in test_conditions:
if not artifact_utils.CheckCondition(condition,
self.state.knowledge_base):
source_conditions_met = False
if source_conditions_met:
type_name = source.type
source_type = rdf_artifacts.ArtifactSource.SourceType
self.current_artifact_name = artifact_name
if type_name == source_type.COMMAND:
self.RunCommand(source)
elif type_name == source_type.DIRECTORY:
self.Glob(source, _GetPathType(self.args, self.client_os))
elif type_name == source_type.FILE:
self.GetFiles(source, _GetPathType(self.args, self.client_os),
self.args.max_file_size)
elif type_name == source_type.GREP:
self.Grep(source, _GetPathType(self.args, self.client_os))
elif type_name == source_type.PATH:
# TODO(user): GRR currently ignores PATH types, they are currently
# only useful to plaso during bootstrapping when the registry is
# unavailable. The intention is to remove this type in favor of a
# default fallback mechanism.
pass
elif type_name == source_type.REGISTRY_KEY:
self.GetRegistryKey(source)
elif type_name == source_type.REGISTRY_VALUE:
self.GetRegistryValue(source)
elif type_name == source_type.WMI:
self.WMIQuery(source)
elif type_name == source_type.REKALL_PLUGIN:
raise NotImplementedError(
"Running Rekall artifacts is not supported anymore.")
elif type_name == source_type.ARTIFACT_GROUP:
self.CollectArtifacts(source)
elif type_name == source_type.ARTIFACT_FILES:
self.CollectArtifactFiles(source)
elif type_name == source_type.GRR_CLIENT_ACTION:
self.RunGrrClientAction(source)
else:
raise RuntimeError("Invalid type %s in %s" %
(type_name, artifact_name))
else:
logging.debug(
"Artifact %s no sources run due to all sources "
"having failing conditions on %s", artifact_name, self.client_id)
def _AreArtifactsKnowledgeBaseArtifacts(self):
knowledgebase_list = config.CONFIG["Artifacts.knowledge_base"]
for artifact_name in self.args.artifact_list:
if artifact_name not in knowledgebase_list:
return False
return True
def GetFiles(self, source, path_type, max_size):
"""Get a set of files."""
new_path_list = []
for path in source.attributes["paths"]:
try:
interpolated = artifact_utils.InterpolateKbAttributes(
path, self.state.knowledge_base)
except artifact_utils.KbInterpolationMissingAttributesError as error:
logging.error(str(error))
if not self.args.ignore_interpolation_errors:
raise
else:
interpolated = []
new_path_list.extend(interpolated)
action = rdf_file_finder.FileFinderAction.Download(max_size=max_size)
self.CallFlow(
file_finder.FileFinder.__name__,
paths=new_path_list,
pathtype=path_type,
action=action,
request_data={
"artifact_name": self.current_artifact_name,
"source": source.ToPrimitiveDict()
},
next_state=compatibility.GetName(self.ProcessFileFinderResults))
def ProcessFileFinderResults(self, responses):
if not responses.success:
self.Log("Failed to fetch files %s" %
responses.request_data["artifact_name"])
else:
self.CallStateInline(
next_state=compatibility.GetName(self.ProcessCollected),
request_data=responses.request_data,
messages=[r.stat_entry for r in responses])
def Glob(self, source, pathtype):
"""Glob paths, return StatEntry objects."""
self.CallFlow(
filesystem.Glob.__name__,
paths=self.InterpolateList(source.attributes.get("paths", [])),
pathtype=pathtype,
request_data={
"artifact_name": self.current_artifact_name,
"source": source.ToPrimitiveDict()
},
next_state=compatibility.GetName(self.ProcessCollected))
def _CombineRegex(self, regex_list):
if len(regex_list) == 1:
return regex_list[0]
regex_combined = b""
for regex in regex_list:
if regex_combined:
regex_combined = b"%s|(%s)" % (regex_combined, regex)
else:
regex_combined = b"(%s)" % regex
return regex_combined
def Grep(self, source, pathtype):
"""Grep files in paths for any matches to content_regex_list.
Args:
source: artifact source
pathtype: pathspec path type When multiple regexes are supplied, combine
them into a single regex as an OR match so that we check all regexes at
once.
"""
path_list = self.InterpolateList(source.attributes.get("paths", []))
# `content_regex_list` elements should be binary strings, but forcing
# artifact creators to use verbose YAML syntax for binary literals would
# be cruel. Therefore, we allow both kind of strings and we convert to bytes
# if required.
content_regex_list = []
for content_regex in source.attributes.get("content_regex_list", []):
if isinstance(content_regex, Text):
content_regex = content_regex.encode("utf-8")
content_regex_list.append(content_regex)
content_regex_list = self.InterpolateList(content_regex_list)
regex_condition = rdf_file_finder.FileFinderContentsRegexMatchCondition(
regex=self._CombineRegex(content_regex_list),
bytes_before=0,
bytes_after=0,
mode="ALL_HITS")
file_finder_condition = rdf_file_finder.FileFinderCondition(
condition_type=(
rdf_file_finder.FileFinderCondition.Type.CONTENTS_REGEX_MATCH),
contents_regex_match=regex_condition)
self.CallFlow(
file_finder.FileFinder.__name__,
paths=path_list,
conditions=[file_finder_condition],
action=rdf_file_finder.FileFinderAction(),
pathtype=pathtype,
request_data={
"artifact_name": self.current_artifact_name,
"source": source.ToPrimitiveDict()
},
next_state=compatibility.GetName(self.ProcessCollected))
def GetRegistryKey(self, source):
self.CallFlow(
filesystem.Glob.__name__,
paths=self.InterpolateList(source.attributes.get("keys", [])),
pathtype=rdf_paths.PathSpec.PathType.REGISTRY,
request_data={
"artifact_name": self.current_artifact_name,
"source": source.ToPrimitiveDict()
},
next_state=compatibility.GetName(self.ProcessCollected))
def GetRegistryValue(self, source):
"""Retrieve directly specified registry values, returning Stat objects."""
new_paths = set()
has_glob = False
for kvdict in source.attributes["key_value_pairs"]:
if "*" in kvdict["key"] or rdf_paths.GROUPING_PATTERN.search(
kvdict["key"]):
has_glob = True
if kvdict["value"]:
# This currently only supports key value pairs specified using forward
# slash.
path = "\\".join((kvdict["key"], kvdict["value"]))
else:
# If value is not set, we want to get the default value. In
# GRR this is done by specifying the key only, so this is what
# we do here.
path = kvdict["key"]
try:
expanded_paths = artifact_utils.InterpolateKbAttributes(
path, self.state.knowledge_base)
except artifact_utils.KbInterpolationMissingAttributesError as error:
logging.error(str(error))
if not self.args.ignore_interpolation_errors:
raise
else:
expanded_paths = []
new_paths.update(expanded_paths)
if has_glob:
self.CallFlow(
filesystem.Glob.__name__,
paths=new_paths,
pathtype=rdf_paths.PathSpec.PathType.REGISTRY,
request_data={
"artifact_name": self.current_artifact_name,
"source": source.ToPrimitiveDict()
},
next_state=compatibility.GetName(self.ProcessCollected))
else:
# We call statfile directly for keys that don't include globs because it
# is faster and some artifacts rely on getting an IOError to trigger
# fallback processing.
for new_path in new_paths:
pathspec = rdf_paths.PathSpec(
path=new_path, pathtype=rdf_paths.PathSpec.PathType.REGISTRY)
# TODO(hanuszczak): Support for old clients ends on 2021-01-01.
# This conditional should be removed after that date.
if not self.client_version or self.client_version >= 3221:
stub = server_stubs.GetFileStat
request = rdf_client_action.GetFileStatRequest(pathspec=pathspec)
else:
stub = server_stubs.StatFile
request = rdf_client_action.ListDirRequest(pathspec=pathspec)
self.CallClient(
stub,
request,
request_data={
"artifact_name": self.current_artifact_name,
"source": source.ToPrimitiveDict()
},
next_state=compatibility.GetName(
self.ProcessCollectedRegistryStatEntry))
def _StartSubArtifactCollector(self, artifact_list, source, next_state):
self.CallFlow(
ArtifactCollectorFlow.__name__,
artifact_list=artifact_list,
use_raw_filesystem_access=(self.args.use_raw_filesystem_access or
self.args.use_tsk),
apply_parsers=self.args.apply_parsers,
max_file_size=self.args.max_file_size,
ignore_interpolation_errors=self.args.ignore_interpolation_errors,
dependencies=self.args.dependencies,
knowledge_base=self.args.knowledge_base,
request_data={
"artifact_name": self.current_artifact_name,
"source": source.ToPrimitiveDict()
},
next_state=next_state)
def CollectArtifacts(self, source):
self._StartSubArtifactCollector(
artifact_list=source.attributes["names"],
source=source,
next_state=compatibility.GetName(self.ProcessCollected))
def CollectArtifactFiles(self, source):
"""Collect files from artifact pathspecs."""
self._StartSubArtifactCollector(
artifact_list=source.attributes["artifact_list"],
source=source,
next_state=compatibility.GetName(self.ProcessCollectedArtifactFiles))
def RunCommand(self, source):
"""Run a command."""
self.CallClient(
server_stubs.ExecuteCommand,
cmd=source.attributes["cmd"],
args=source.attributes.get("args", []),
request_data={
"artifact_name": self.current_artifact_name,
"source": source.ToPrimitiveDict()
},
next_state=compatibility.GetName(self.ProcessCollected))
def WMIQuery(self, source):
"""Run a Windows WMI Query."""
query = source.attributes["query"]
queries = self._Interpolate(query)
base_object = source.attributes.get("base_object")
for query in queries:
self.CallClient(
server_stubs.WmiQuery,
query=query,
base_object=base_object,
request_data={
"artifact_name": self.current_artifact_name,
"source": source.ToPrimitiveDict()
},
next_state=compatibility.GetName(self.ProcessCollected))
def _GetSingleExpansion(self, value):
results = list(self._Interpolate(value))
if len(results) > 1:
raise ValueError("Interpolation generated multiple results, use a"
" list for multi-value expansions. %s yielded: %s" %
(value, results))
return results[0]
def InterpolateDict(self, input_dict):
"""Interpolate all items from a dict.
Args:
input_dict: dict to interpolate
Returns:
original dict with all string values interpolated
"""
new_args = {}
for key, value in input_dict.items():
if isinstance(value, Text) or isinstance(value, bytes):
new_args[key] = self._GetSingleExpansion(value)
elif isinstance(value, list):
new_args[key] = self.InterpolateList(value)
else:
new_args[key] = value
return new_args
def InterpolateList(self, input_list):
"""Interpolate all items from a given source array.
Args:
input_list: list of values to interpolate
Returns:
original list of values extended with strings interpolated
"""
new_args = []
for value in input_list:
if isinstance(value, Text) or isinstance(value, bytes):
results = self._Interpolate(value)
if not results and self.args.old_client_snapshot_fallback:
client_id = self.client_id
snapshots = data_store.REL_DB.ReadClientSnapshotHistory(client_id)
for snapshot in snapshots:
results = self._Interpolate(value, snapshot.knowledge_base)
if results:
break
new_args.extend(results)
else:
new_args.extend(value)
return new_args
def _Interpolate(self,
pattern: Text,
knowledgebase: Optional[rdf_client.KnowledgeBase] = None)\
-> Sequence[Text]:
"""Performs a knowledgebase interpolation.
Args:
pattern: A pattern to interpolate.
knowledgebase: Knowledgebase to use for interpolation. If no knowledgebase
is provided, the one provided as a flow argument is used.
Returns:
A list of possible interpolation results.
"""
if knowledgebase is None:
knowledgebase = self.state.knowledge_base
try:
return artifact_utils.InterpolateKbAttributes(pattern, knowledgebase)
except artifact_utils.KbInterpolationMissingAttributesError as error:
if self.args.old_client_snapshot_fallback:
return []
if self.args.ignore_interpolation_errors:
logging.error(str(error))
return []
raise
def RunGrrClientAction(self, source):
"""Call a GRR Client Action."""
# Retrieve the correct rdfvalue to use for this client action.
action_name = source.attributes["client_action"]
try:
action_stub = server_stubs.ClientActionStub.classes[action_name]
except KeyError:
raise RuntimeError("Client action %s not found." % action_name)
self.CallClient(
action_stub,
request_data={
"artifact_name": self.current_artifact_name,
"source": source.ToPrimitiveDict()
},
next_state=compatibility.GetName(self.ProcessCollected),
**self.InterpolateDict(source.attributes.get("action_args", {})))
def CallFallback(self, artifact_name, request_data):
if artifact_name not in artifact_fallbacks.FALLBACK_REGISTRY:
return False
fallback_flow = artifact_fallbacks.FALLBACK_REGISTRY[artifact_name]
if artifact_name in self.state.called_fallbacks:
self.Log("Already called fallback class %s for artifact: %s",
fallback_flow, artifact_name)
return False
self.Log("Calling fallback class %s for artifact: %s", fallback_flow,
artifact_name)
self.CallFlow(
fallback_flow,
request_data=request_data.ToDict(),
artifact_name=artifact_name,
next_state=compatibility.GetName(self.ProcessCollected))
# Make sure we only try this once
self.state.called_fallbacks.add(artifact_name)
return True
def ProcessCollected(self, responses):
"""Each individual collector will call back into here.
Args:
responses: Responses from the collection.
Raises:
artifact_utils.ArtifactDefinitionError: On bad definition.
artifact_utils.ArtifactProcessingError: On failure to process.
"""
flow_name = self.__class__.__name__
artifact_name = str(responses.request_data["artifact_name"])
source = responses.request_data.GetItem("source", None)
if not responses.success:
self.Log("Artifact %s data collection failed. Status: %s.", artifact_name,
responses.status)
if not self.CallFallback(artifact_name, responses.request_data):
self.state.failed_count += 1
self.state.artifacts_failed.append(artifact_name)
return
self.Log(
"Artifact data collection %s completed successfully in flow %s "
"with %d responses", artifact_name, flow_name, len(responses))
# Now process the responses.
self._ParseResponses(list(responses), artifact_name, source)
def ProcessCollectedRegistryStatEntry(self, responses):
"""Create AFF4 objects for registry statentries.
We need to do this explicitly because we call StatFile client action
directly for performance reasons rather than using one of the flows that do
this step automatically.
Args:
responses: Response objects from the artifact source.
"""
if not responses.success:
self.CallStateInline(
next_state=compatibility.GetName(self.ProcessCollected),
responses=responses)
return
stat_entries = list(map(rdf_client_fs.StatEntry, responses))
filesystem.WriteStatEntries(stat_entries, client_id=self.client_id)
self.CallStateInline(
next_state=compatibility.GetName(self.ProcessCollected),
request_data=responses.request_data,
messages=stat_entries)
def ProcessCollectedArtifactFiles(self, responses):
"""Schedule files for download based on pathspec attribute.
Args:
responses: Response objects from the artifact source.
Raises:
RuntimeError: if pathspec value is not a PathSpec instance and not
a str.
"""
self.download_list = []
source = responses.request_data.GetItem("source")
pathspec_attribute = source["attributes"].get("pathspec_attribute", None)
for response in responses:
if pathspec_attribute:
if response.HasField(pathspec_attribute):
pathspec = response.Get(pathspec_attribute)
else:
self.Log("Missing pathspec field %s: %s", pathspec_attribute,
response)
continue
else:
pathspec = response
# Check the default .pathspec attribute.
if not isinstance(pathspec, rdf_paths.PathSpec):
try:
pathspec = response.pathspec
except AttributeError:
pass
if isinstance(pathspec, Text):
pathspec = rdf_paths.PathSpec(path=pathspec)
if self.args.use_raw_filesystem_access or self.args.use_tsk:
pathspec.pathtype = rdf_paths.PathSpec.PathType.TSK
else:
pathspec.pathtype = rdf_paths.PathSpec.PathType.OS
if isinstance(pathspec, rdf_paths.PathSpec):
if not pathspec.path:
self.Log("Skipping empty pathspec.")
continue
self.download_list.append(pathspec)
else:
raise RuntimeError(
"Response must be a string path, a pathspec, or have "
"pathspec_attribute set. Got: %s" % pathspec)
if self.download_list:
request_data = responses.request_data.ToDict()
self.CallFlow(
transfer.MultiGetFile.__name__,
pathspecs=self.download_list,
request_data=request_data,
next_state=compatibility.GetName(self.ProcessCollected))
else:
self.Log("No files to download")
def _GetArtifactReturnTypes(self, source):
"""Get a list of types we expect to handle from our responses."""
if source:
return source["returned_types"]
def _ParseResponses(self, responses, artifact_name, source):
"""Create a result parser sending different arguments for diff parsers.
Args:
responses: A list of responses.
artifact_name: Name of the artifact that generated the responses.
source: The source responsible for producing the responses.
"""
artifact_return_types = self._GetArtifactReturnTypes(source)
if self.args.apply_parsers:
parser_factory = parsers.ArtifactParserFactory(artifact_name)
results = artifact.ApplyParsersToResponses(parser_factory, responses,
self)
else:
results = responses
# Increment artifact result count in flow progress.
progress = self._GetOrInsertArtifactProgress(artifact_name)
progress.num_results += len(results)
for result in results:
result_type = result.__class__.__name__
if result_type == "Anomaly":
self.SendReply(result)
elif (not artifact_return_types or result_type in artifact_return_types):
self.state.response_count += 1
self.SendReply(result, tag="artifact:%s" % artifact_name)
def GetProgress(self) -> rdf_artifacts.ArtifactCollectorFlowProgress:
return self.state.progress
def _GetOrInsertArtifactProgress(self,
name: str) -> rdf_artifacts.ArtifactProgress:
try:
return next(a for a in self.state.progress.artifacts if a.name == name)
except StopIteration:
progress = rdf_artifacts.ArtifactProgress(name=name)
self.state.progress.artifacts.append(progress)
return progress
def End(self, responses):
del responses
# If we got no responses, and user asked for it, we error out.
if self.args.error_on_no_results and self.state.response_count == 0:
raise artifact_utils.ArtifactProcessingError(
"Artifact collector returned 0 responses.")
class ArtifactFilesDownloaderFlowArgs(rdf_structs.RDFProtoStruct):
protobuf = flows_pb2.ArtifactFilesDownloaderFlowArgs
rdf_deps = [
rdf_artifacts.ArtifactName,
rdfvalue.ByteSize,
]
class ArtifactFilesDownloaderResult(rdf_structs.RDFProtoStruct):
protobuf = flows_pb2.ArtifactFilesDownloaderResult
rdf_deps = [
rdf_paths.PathSpec,
rdf_client_fs.StatEntry,
]
def GetOriginalResultType(self):
if self.HasField("original_result_type"):
return rdfvalue.RDFValue.classes.get(self.original_result_type)
class ArtifactFilesDownloaderFlow(transfer.MultiGetFileLogic,
flow_base.FlowBase):
"""Flow that downloads files referenced by collected artifacts."""
category = "/Collectors/"
args_type = ArtifactFilesDownloaderFlowArgs
result_types = (ArtifactFilesDownloaderResult,)
def _FindMatchingPathspecs(self, response):
# If we're dealing with plain file StatEntry, just
# return it's pathspec - there's nothing to parse
# and guess.
if (isinstance(response, rdf_client_fs.StatEntry) and
response.pathspec.pathtype in [
rdf_paths.PathSpec.PathType.TSK,
rdf_paths.PathSpec.PathType.OS,
rdf_paths.PathSpec.PathType.NTFS,
]):
return [response.pathspec]
knowledge_base = _ReadClientKnowledgeBase(self.client_id)
if self.args.use_raw_filesystem_access or self.args.use_tsk:
path_type = rdf_paths.PathSpec.PathType.TSK
else:
path_type = rdf_paths.PathSpec.PathType.OS
p = windows_persistence.WindowsPersistenceMechanismsParser()
parsed_items = p.ParseResponse(knowledge_base, response)
parsed_pathspecs = [item.pathspec for item in parsed_items]
for pathspec in parsed_pathspecs:
pathspec.pathtype = path_type
return parsed_pathspecs
def Start(self):
super().Start()
if self.args.use_tsk and self.args.use_raw_filesystem_access:
raise ValueError(
"Only one of use_tsk and use_raw_filesystem_access can be set.")
self.state.file_size = self.args.max_file_size
self.state.results_to_download = []
self.CallFlow(
ArtifactCollectorFlow.__name__,
next_state=compatibility.GetName(self._DownloadFiles),
artifact_list=self.args.artifact_list,
use_raw_filesystem_access=(self.args.use_tsk or
self.args.use_raw_filesystem_access),
max_file_size=self.args.max_file_size)
def _DownloadFiles(self, responses):
if not responses.success:
self.Log("Failed to run ArtifactCollectorFlow: %s", responses.status)
return
results_with_pathspecs = []
results_without_pathspecs = []
for response in responses:
pathspecs = self._FindMatchingPathspecs(response)
if pathspecs:
for pathspec in pathspecs:
result = ArtifactFilesDownloaderResult(
original_result_type=response.__class__.__name__,
original_result=response,
found_pathspec=pathspec)
results_with_pathspecs.append(result)
else:
result = ArtifactFilesDownloaderResult(
original_result_type=response.__class__.__name__,
original_result=response)
results_without_pathspecs.append(result)
grouped_results = collection.Group(
results_with_pathspecs, lambda x: x.found_pathspec.CollapsePath())
for _, group in grouped_results.items():
self.StartFileFetch(
group[0].found_pathspec, request_data=dict(results=group))
for result in results_without_pathspecs:
self.SendReply(result)
def ReceiveFetchedFile(self,
stat_entry,
file_hash,
request_data=None,
is_duplicate=False):
"""See MultiGetFileLogic."""
del is_duplicate # Unused.
if not request_data:
raise RuntimeError("Expected non-empty request_data")
for result in request_data["results"]:
result.downloaded_file = stat_entry
self.SendReply(result)
def FileFetchFailed(self, pathspec, request_data=None, status=None):
"""See MultiGetFileLogic."""
if not request_data:
raise RuntimeError("Expected non-empty request_data")
for result in request_data["results"]:
self.SendReply(result)
class ClientArtifactCollector(flow_base.FlowBase):
"""A client side artifact collector."""
category = "/Collectors/"
args_type = rdf_artifacts.ArtifactCollectorFlowArgs
behaviours = flow_base.BEHAVIOUR_ADVANCED
def Start(self):
"""Issue the artifact collection request."""
super().Start()
self.state.knowledge_base = self.args.knowledge_base
self.state.response_count = 0
if not self.args.recollect_knowledge_base:
dependency = rdf_artifacts.ArtifactCollectorFlowArgs.Dependency
if self.args.dependencies == dependency.FETCH_NOW:
# String due to dependency loop with discover.py.
self.CallFlow(
"Interrogate",
next_state=compatibility.GetName(self.StartCollection))
return
if (self.args.dependencies == dependency.USE_CACHED and
not self.state.knowledge_base):
# If not provided, get a knowledge base from the client.
try:
self.state.knowledge_base = _ReadClientKnowledgeBase(self.client_id)
except artifact_utils.KnowledgeBaseUninitializedError:
# If no-one has ever initialized the knowledge base, we should do so
# now.
if not self._AreArtifactsKnowledgeBaseArtifacts():
# String due to dependency loop with discover.py
self.CallFlow(
"Interrogate",
next_state=compatibility.GetName(self.StartCollection))
return
# In all other cases start the collection state.
self.CallStateInline(next_state=compatibility.GetName(self.StartCollection))
def StartCollection(self, responses):
"""Start collecting."""
if not responses.success:
raise artifact_utils.KnowledgeBaseUninitializedError(
"Attempt to initialize Knowledge Base failed.")
# TODO(hanuszczak): Flow arguments also appear to have some knowledgebase.
# Do we use it in any way?
if not self.state.knowledge_base:
self.state.knowledge_base = _ReadClientKnowledgeBase(
self.client_id, allow_uninitialized=True)
request = GetArtifactCollectorArgs(self.args, self.state.knowledge_base)
self.CollectArtifacts(request)
def _AreArtifactsKnowledgeBaseArtifacts(self):
knowledgebase_list = config.CONFIG["Artifacts.knowledge_base"]
for artifact_name in self.args.artifact_list:
if artifact_name not in knowledgebase_list:
return False
return True
def CollectArtifacts(self, client_artifact_collector_args):
"""Start the client side artifact collection."""
self.CallClient(
server_stubs.ArtifactCollector,
request=client_artifact_collector_args,
next_state=compatibility.GetName(self.ProcessCollected))
def ProcessCollected(self, responses):
flow_name = self.__class__.__name__
if responses.success:
self.Log(
"Artifact data collection completed successfully in flow %s "
"with %d responses", flow_name, len(responses))
else:
self.Log("Artifact data collection failed. Status: %s.", responses.status)
return
for response in responses:
# The ClientArtifactCollector returns a `ClientArtifactCollectorResult`
# (an rdf object that contains a knowledge base and the list of collected
# artifacts, each collected artifact has a name and a list of responses).