Skip to content

Commit

Permalink
Added YAML-based filter file support #1537 (#2519)
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz authored May 14, 2019
1 parent e17642f commit 7c24390
Show file tree
Hide file tree
Showing 13 changed files with 739 additions and 172 deletions.
5 changes: 5 additions & 0 deletions config/end-to-end.ini
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ case=image_export
filter_file=data/filter_windows.txt
source=test_data/image.qcow2

[image_export_with_yaml_filter_file]
case=image_export
filter_file=data/filter_windows.yaml
source=test_data/image.qcow2

[multi_extract_and_output1]
case=multi_extract_and_output
output_file=multi_extract_and_output1.log
Expand Down
119 changes: 119 additions & 0 deletions data/filter_windows.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Filter file for log2timeline for triaging Windows systems.
#
# This file can be used by image_export or log2timeline to selectively export
# few key files of a Windows system. This file will collect:
# * The MFT file, LogFile and the UsnJrnl
# * Contents of the Recycle Bin/Recycler.
# * Windows Registry files, e.g. SYSTEM and NTUSER.DAT.
# * Shortcut (LNK) files from recent files.
# * Jump list files, automatic and custom destination.
# * Windows Event Log files.
# * Prefetch files.
# * SetupAPI file.
# * Application Compatibility files, the Recentfilecache and AmCachefile.
# * Windows At job files.
# * Browser history: IE, Firefox and Chrome.
# * Browser cookie files: IE.
# * Flash cookies, or LSO/SOL files from the Flash player.
#
description: File system metadata files.
type: include
path_separator: '\'
paths:
- '\\[$]Extend\\[$]UsnJrnl'
- '\\[$]LogFile'
- '\\[$]MFT'
---
description: Recycle Bin and Recycler.
type: include
path_separator: '\'
paths:
- '\\[$]Recycle.Bin'
- '\\[$]Recycle.Bin\\.+'
- '\\[$]Recycle.Bin\\.+\\.+'
- '\\\\RECYCLER'
- '\\RECYCLER\\.+'
- '\\RECYCLER\\.+\\.+'
---
description: Windows Registry files.
type: include
path_separator: '\'
paths:
- '\\(Users|Documents And Settings)\\.+\\NTUSER[.]DAT'
- '\\Users\\.+\\AppData\\Local\\Microsoft\\Windows\\Usrclass[.]dat'
- '\\Documents And Settings\\.+\\Local Settings\\Application Data\\Microsoft\\Windows\\Usrclass[.]dat'
- '%SystemRoot%\\System32\\config\\(SAM|SOFTWARE|SECURITY|SYSTEM)'
---
description: Recent activity files.
type: include
path_separator: '\'
paths:
- '\\Users\\.+\\AppData\\Roaming\\Microsoft\\Windows\\Recent\\.+[.]lnk'
- '\\Users\\.+\\AppData\\Roaming\\Microsoft\\Office\\Recent\\.+[.]lnk'
- '\\Documents And Settings\\.+\\Recent\\.+[.]lnk'
---
description: Jump List files.
type: include
path_separator: '\'
paths:
- '\\Users\\.+\\AppData\\Roaming\\Microsoft\\Windows\\Recent\\Automaticdestinations\\.+[.]automaticDestinations-ms'
- '\\Users\\.+\\AppData\\Roaming\\Microsoft\\Windows\\Recent\\Customdestinations\\.+[.].customDestinations-ms'
---
description: Windows Event Log files.
type: include
path_separator: '\'
paths:
- '%SystemRoot%\\System32\\winevt\\Logs\\.+[.]evtx'
- '%SystemRoot%\\System32\\config\\.+[.]evt'
---
description: Various log files.
type: include
path_separator: '\'
paths:
- '%SystemRoot%\\inf\\setupapi[.].+[.]log'
- '%SystemRoot%\\setupapi.log'
- '%SystemRoot%\\System32\\LogFiles\\.+\\.+[.]txt'
---
description: Windows execution artifact files.
type: include
path_separator: '\'
paths:
- '%SystemRoot%\\Tasks\\.+[.]job'
- '%SystemRoot%\\Appcompat\\Programs\\Recentfilecache[.]bcf'
- '%SystemRoot%\\Appcompat\\Programs\\AMcache[.]hve'
---
description: Prefetch files.
type: include
path_separator: '\'
paths:
- '%SystemRoot%\\Prefetch\\.+[.]pf'
---
description: Browser history artifact files.
type: include
path_separator: '\'
paths:
- '\\Users\\.+\\AppData\\Local\\Microsoft\\Windows\\History\\History.IE5\\index[.]dat'
- '\\Users\\.+\\AppData\\Local\\Microsoft\\Windows\\History\\History.IE5\\MSHist.+\\index[.]dat'
- '\\Users\\.+\\AppData\\Local\\Microsoft\\Windows\\History\\Low\\History.IE5\\index[.]dat'
- '\\Users\\.+\\AppData\\Local\\Microsoft\\Windows\\History\\Low\\History.IE5\\MSHist.+\\index[.]dat'
- '\\Users\\.+\\AppData\\Local\\Microsoft\\Windows\\Temporary Internet Files\\Content.IE5\\index[.]dat'
- '\\Users\\.+\\AppData\\Local\\Microsoft\\Windows\\Temporary Internet Files\\Low\\Content.IE5\\index[.]dat'
- '\\Users\\.+\\AppData\\Roaming\\Microsoft\\Windows\\Cookies\\index[.]dat'
- '\\Users\\.+\\AppData\\Roaming\\Microsoft\\Windows\\Cookies\\Low\\index[.]dat'
- '\\Users\\.+\\AppData\\Local\\Microsoft\\Internet Explorer\\Recovery\\.+\\.+[.]dat'
- '\\Users\\.+\\AppData\\Local\\Microsoft\\Internet Explorer\\Recovery\\Immersive\\.+\\.+[.]dat'
- '\\Users\\.+\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles\\.+\\.+[.]sqlite'
- '\\Users\\.+\\AppData\\Local\\Microsoft\\Windows\\WebCache\\.+[.]dat'
- '\\Users\\.+\\AppData\\Local\\Google\\Chrome\\User Data\\.+\\History'
- '\\Users\\.+\\AppData\\Local\\Google\\Chrome\\User Data\\.+\\Current Session'
- '\\Users\\.+\\AppData\\Local\\Google\\Chrome\\User Data\\.+\\Last Session'
- '\\Users\\.+\\AppData\\Local\\Google\\Chrome\\User Data\\.+\\Current Tabs'
- '\\Users\\.+\\AppData\\Local\\Google\\Chrome\\User Data\\.+\\Last Tabs'
- '\\Users\\.+\\AppData\\Roaming\\Macromedia\\FlashPlayer\\#SharedObjects\\.+\\.+\\.+[.]sol'
- '\\Documents And Settings\\.+\\Local Settings\\History\\History.IE5\\index[.]dat'
- '\\Documents And Settings\\.+\\Local Settings\\History\\History.IE5\\MSHist.+\\index[.]dat'
- '\\Documents And Settings\\.+\\Local Settings\\Temporary Internet Files\\Content.IE5\\index[.]dat'
- '\\Documents And Settings\\.+\\Cookies\\index[.]dat'
- '\\Documents And Settings\\.+\\Application Data\\Mozilla\\Firefox\\Profiles\\.+\\.+[.]sqlite'
- '\\Documents And Settings\\.+\\Local Settings\\Application Data\\Google\\Chrome\\User Data\\.+\\History'
- '\\Documents And Settings\\.+\\Local Settings\\Application Data\\Google\\Chrome\\.+'
27 changes: 20 additions & 7 deletions plaso/engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@
from plaso.engine import filter_file
from plaso.engine import knowledge_base
from plaso.engine import logger
from plaso.engine import path_filters
from plaso.engine import processing_status
from plaso.engine import profilers
from plaso.engine import yaml_filter_file
from plaso.lib import definitions
from plaso.lib import errors
from plaso.preprocessors import manager as preprocess_manager
Expand All @@ -47,6 +49,7 @@ def __init__(self):
self._guppy_memory_profiler = None
self._memory_profiler = None
self._name = 'Main'
self._path_filters_helper = None
self._processing_status = processing_status.ProcessingStatus()
self._processing_profiler = None
self._serializers_profiler = None
Expand Down Expand Up @@ -335,7 +338,6 @@ def BuildFilterFindSpecs(
environment_variables=environment_variables)

find_specs = self._artifacts_filter_helper.file_system_find_specs

if not find_specs:
raise errors.InvalidFilter(
'No valid file system find specifications were built from '
Expand All @@ -346,14 +348,25 @@ def BuildFilterFindSpecs(
'building find specification based on filter file: {0:s}'.format(
filter_file_path))

filter_file_object = filter_file.FilterFile(filter_file_path)
find_specs = filter_file_object.BuildFindSpecs(
environment_variables=environment_variables)
filter_file_path_lower = filter_file_path.lower()
if (filter_file_path_lower.endswith('.yaml') or
filter_file_path_lower.endswith('.yml')):
filter_file_object = yaml_filter_file.YAMLFilterFile()
else:
filter_file_object = filter_file.FilterFile()

filter_file_path_filters = filter_file_object.ReadFromFile(
filter_file_path)

self._path_filters_helper = path_filters.PathFiltersHelper()
self._path_filters_helper.BuildFindSpecs(
filter_file_path_filters, environment_variables=environment_variables)

find_specs = self._path_filters_helper.file_system_find_specs
if not find_specs:
raise errors.InvalidFilter(
raise errors.InvalidFilter((
'No valid file system find specifications were built from filter '
'file.')
'file: {0:s}.').format(filter_file_path))

return find_specs

Expand All @@ -370,7 +383,7 @@ def BuildArtifactsRegistry(
artifacts.ArtifactDefinitionsRegistry: artifact definitions registry.
Raises:
RuntimeError: if no valid FindSpecs are built.
BadConfigOption: if no valid FindSpecs are built.
"""
if artifact_definitions_path and not os.path.isdir(
artifact_definitions_path):
Expand Down
90 changes: 21 additions & 69 deletions plaso/engine/filter_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@

from __future__ import unicode_literals

from dfvfs.helpers import file_system_searcher
import io

from plaso.engine import logger
from plaso.lib import py2to3
from plaso.engine import path_filters


class FilterFile(object):
Expand All @@ -26,79 +25,32 @@ class FilterFile(object):
"{{123-AF25-E523}}" will be replaced with "{123-AF25-E523}" at runtime.
"""

def __init__(self, path):
"""Initializes a filter file.
def _ReadFromFileObject(self, file_object):
"""Reads the path filters from the filter file-like object.
Args:
path (str): path to a file that contains one or more path filters.
file_object (file): filter file-like object.
Yields:
PathFilter: path filter.
"""
super(FilterFile, self).__init__()
self._path = path
paths = []
for line in file_object:
line = line.strip()
if line and not line.startswith('#'):
paths.append(line)

# TODO: split read and validation from BuildFindSpecs, raise instead of log
# TODO: determine how to apply the path filters for exclusion.
yield path_filters.PathFilter(
path_filters.PathFilter.FILTER_TYPE_INCLUDE, paths=paths)

def BuildFindSpecs(self, environment_variables=None):
"""Build find specification from a filter file.
def ReadFromFile(self, path):
"""Reads the path filters from the filter file.
Args:
environment_variables (Optional[list[EnvironmentVariableArtifact]]):
environment variables.
path (str): path to a filter file.
Returns:
list[dfvfs.FindSpec]: find specification.
list[PathFilter]: path filters.
"""
path_attributes = {}
if environment_variables:
for environment_variable in environment_variables:
attribute_name = environment_variable.name.lower()
attribute_value = environment_variable.value
if not isinstance(attribute_value, py2to3.STRING_TYPES):
continue

# Remove the drive letter.
if len(attribute_value) > 2 and attribute_value[1] == ':':
_, _, attribute_value = attribute_value.rpartition(':')

if attribute_value.startswith('\\'):
attribute_value = attribute_value.replace('\\', '/')

path_attributes[attribute_name] = attribute_value

find_specs = []
with open(self._path, 'r') as file_object:
for line in file_object:
line = line.strip()
if line.startswith('#'):
continue

if path_attributes:
try:
line = line.format(**path_attributes)
except KeyError as exception:
logger.error((
'Unable to expand path filter: {0:s} with error: '
'{1!s}').format(line, exception))
continue

if not line.startswith('/'):
logger.warning((
'The path filter must be defined as an absolute path: '
'{0:s}').format(line))
continue

# Convert the path filters into a list of path segments and strip
# the root path segment.
path_segments = line.split('/')
path_segments.pop(0)

if not path_segments[-1]:
logger.warning(
'Empty last path segment in path filter: {0:s}'.format(line))
continue

find_spec = file_system_searcher.FindSpec(
location_regex=path_segments, case_sensitive=False)
find_specs.append(find_spec)

return find_specs
with io.open(path, 'r', encoding='utf-8') as file_object:
return list(self._ReadFromFileObject(file_object))
Loading

0 comments on commit 7c24390

Please sign in to comment.