From 53c7ea11f88ba76dfb6567ccd2f7931388e3d876 Mon Sep 17 00:00:00 2001 From: Sastry Duri Date: Tue, 24 Oct 2017 08:49:10 -0400 Subject: [PATCH 01/14] added jar plugin Signed-off-by: Sastry Duri --- .../systems/jar_container_crawler.plugin | 8 ++ .../plugins/systems/jar_container_crawler.py | 54 +++++++++++ .../plugins/systems/jar_host_crawler.plugin | 8 ++ crawler/plugins/systems/jar_host_crawler.py | 26 ++++++ crawler/utils/features.py | 6 ++ crawler/utils/jar_utils.py | 93 +++++++++++++++++++ tests/unit/test_jar_utils.py | 50 ++++++++++ 7 files changed, 245 insertions(+) create mode 100644 crawler/plugins/systems/jar_container_crawler.plugin create mode 100644 crawler/plugins/systems/jar_container_crawler.py create mode 100644 crawler/plugins/systems/jar_host_crawler.plugin create mode 100644 crawler/plugins/systems/jar_host_crawler.py create mode 100644 crawler/utils/jar_utils.py create mode 100644 tests/unit/test_jar_utils.py diff --git a/crawler/plugins/systems/jar_container_crawler.plugin b/crawler/plugins/systems/jar_container_crawler.plugin new file mode 100644 index 00000000..3247d56e --- /dev/null +++ b/crawler/plugins/systems/jar_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = jar_container +Module = jar_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Produces md5 hashes for jar files and class files in jars diff --git a/crawler/plugins/systems/jar_container_crawler.py b/crawler/plugins/systems/jar_container_crawler.py new file mode 100644 index 00000000..6d886fe3 --- /dev/null +++ b/crawler/plugins/systems/jar_container_crawler.py @@ -0,0 +1,54 @@ +import logging + +import utils.dockerutils +import utils.misc +from icrawl_plugin import IContainerCrawler +from utils.jar_utils import crawl_jar_files +from utils.namespace import run_as_another_namespace + +logger = logging.getLogger('crawlutils') + + +class JarContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'jar' + + def crawl( + self, + container_id=None, + avoid_setns=False, + root_dir='/', + exclude_dirs=[ + '/boot', + '/dev', + '/proc', + '/sys', + '/mnt', + '/tmp', + '/var/cache', + '/usr/share/man', + '/usr/share/doc', + '/usr/share/mime'], + **kwargs): + inspect = utils.dockerutils.exec_dockerinspect(container_id) + state = inspect['State'] + pid = str(state['Pid']) + logger.debug('Crawling jars for container %s' % container_id) + + if avoid_setns: + rootfs_dir = utils.dockerutils.get_docker_container_rootfs_path( + container_id) + exclude_dirs = [utils.misc.join_abs_paths(rootfs_dir, d) + for d in exclude_dirs] + return crawl_files( + root_dir=utils.misc.join_abs_paths(rootfs_dir, root_dir), + exclude_dirs=exclude_dirs, + root_dir_alias=root_dir) + else: # in all other cases, including wrong mode set + return run_as_another_namespace(pid, + ['mnt'], + crawl_jar_files, + root_dir, + exclude_dirs, + None) diff --git a/crawler/plugins/systems/jar_host_crawler.plugin b/crawler/plugins/systems/jar_host_crawler.plugin new file mode 100644 index 00000000..f91d443b --- /dev/null +++ b/crawler/plugins/systems/jar_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = jar_host +Module = jar_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Produces md5 hashes for jar files and class files in jars diff --git a/crawler/plugins/systems/jar_host_crawler.py b/crawler/plugins/systems/jar_host_crawler.py new file mode 100644 index 00000000..24ba6ed0 --- /dev/null +++ b/crawler/plugins/systems/jar_host_crawler.py @@ -0,0 +1,26 @@ +from icrawl_plugin import IHostCrawler +from utils.jar_utils import crawl_jar_files + + +class JarHostCrawler(IHostCrawler): + + def get_feature(self): + return 'jar' + + def crawl( + self, + root_dir='/', + exclude_dirs=[ + '/boot', + '/dev', + '/proc', + '/sys', + '/mnt', + '/tmp', + '/var/cache', + '/usr/share/man', + '/usr/share/doc', + '/usr/share/mime'], + **kwargs): + return crawl_jar_files(root_dir=root_dir, + exclude_dirs=exclude_dirs) diff --git a/crawler/utils/features.py b/crawler/utils/features.py index 09c96ea7..8e72475b 100644 --- a/crawler/utils/features.py +++ b/crawler/utils/features.py @@ -121,3 +121,9 @@ 'cpu_khz', 'cpu_cache_size_kb', 'cpu_num_cores']) +JarFeature = namedtuple('JarFeature', [ + 'name', + 'path', + 'jarhash', + 'hashes', +]) diff --git a/crawler/utils/jar_utils.py b/crawler/utils/jar_utils.py new file mode 100644 index 00000000..e09b6763 --- /dev/null +++ b/crawler/utils/jar_utils.py @@ -0,0 +1,93 @@ +import fnmatch +import logging +import os +import re +import hashlib +import zipfile + +from utils.features import JarFeature + +logger = logging.getLogger('crawlutils') + + +def crawl_jar_files( + root_dir='/', + exclude_dirs=[], + root_dir_alias=None, + accessed_since=0): + + if not os.path.isdir(root_dir): + return + + saved_args = locals() + logger.debug('crawl_jar_files: %s' % (saved_args)) + + assert os.path.isdir(root_dir) + if root_dir_alias is None: + root_dir_alias = root_dir + exclude_dirs = [os.path.join(root_dir, d) for d in + exclude_dirs] + exclude_regex = r'|'.join([fnmatch.translate(d) + for d in exclude_dirs]) or r'$.' + + # walk the directory hierarchy starting at 'root_dir' in BFS + # order + + for (root_dirpath, dirs, files) in os.walk(root_dir): + dirs[:] = [os.path.join(root_dirpath, d) for d in + dirs] + dirs[:] = [d for d in dirs + if not re.match(exclude_regex, d)] + files = [os.path.join(root_dirpath, f) for f in + files] + files = [f for f in files + if not re.match(exclude_regex, f)] + + for fpath in files: + if not fpath.endswith('.jar'): + continue + feature = _crawl_jar_file(root_dir, fpath, + root_dir_alias) + if feature: + yield (feature.path, feature, 'jar') + + +# crawl a single file +def _crawl_jar_file( + root_dir, + fpath, + root_dir_alias, +): + if not fpath.endswith('.jar'): + return + + hashes = [] + with zipfile.ZipFile(fpath, 'r') as zf: + for info in zf.infolist(): + if not info.filename.endswith('.class'): + continue + data = zf.read(info.filename) + md = hashlib.md5() + md.update(data) + hashes.append(md.hexdigest()) + + # compute hash of jar file + with open(fpath, 'rb') as jarin: + md = hashlib.md5() + md.update(jarin.read()) + jarhash = md.hexdigest() + # This replaces `//a/b/c` with `//a/b/c` + frelpath = os.path.join(root_dir_alias, + os.path.relpath(fpath, root_dir)) + + # This converts something like `/.` to `/` + + frelpath = os.path.normpath(frelpath) + + (_, fname) = os.path.split(frelpath) + return JarFeature( + os.path.basename(fpath), + fpath, + jarhash, + hashes + ) diff --git a/tests/unit/test_jar_utils.py b/tests/unit/test_jar_utils.py new file mode 100644 index 00000000..50da4551 --- /dev/null +++ b/tests/unit/test_jar_utils.py @@ -0,0 +1,50 @@ +import unittest + +import os +import tempfile +from zipfile import ZipFile, ZipInfo + +from utils import jar_utils +from utils.features import JarFeature + +# +# https://security.openstack.org/guidelines/dg_using-temporary-files-securely.html +# + +class JarUtilsTests(unittest.TestCase): + + def setUp(self): + pass + + def tearDown(self): + pass + + def test_get_dpkg_packages(self): + tmpdir = tempfile.mkdtemp() + jar_file_name = 'myfile.jar' + + # Ensure the file is read/write by the creator only + saved_umask = os.umask(0077) + + path = os.path.join(tmpdir, jar_file_name) + try: + with ZipFile(path, "w") as myjar: + myjar.writestr(ZipInfo('first.class',(1980,1,1,1,1,1)), "first secrets!") + myjar.writestr(ZipInfo('second.class',(1980,1,1,1,1,1)), "second secrets!") + myjar.writestr(ZipInfo('second.txt',(1980,1,1,1,1,1)), "second secrets!") + + jars = list(jar_utils.crawl_jar_files(root_dir=tmpdir)) + print jars + jar_feature = jars[0][1] + assert 'myfile.jar' == jar_feature.name + assert '48ac85a26ffa7ff5cefdd5c73a9fb888' == jar_feature.jarhash + assert ['ddc6eff37020aa858e26b1ba8a49ee0e', + 'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes + assert 'jar' == jars[0][2] + + except IOError as e: + print 'IOError' + finally: + os.remove(path) + os.umask(saved_umask) + os.rmdir(tmpdir) From 393885eaae21218fcbb34e61106c1794933a8542 Mon Sep 17 00:00:00 2001 From: Sastry Duri Date: Tue, 24 Oct 2017 11:40:09 -0400 Subject: [PATCH 02/14] added jar plugin test, fixed name in jar util test case Signed-off-by: Sastry Duri --- tests/unit/test_jar_plugin.py | 56 +++++++++++++++++++++++++++++++++++ tests/unit/test_jar_utils.py | 2 +- 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 tests/unit/test_jar_plugin.py diff --git a/tests/unit/test_jar_plugin.py b/tests/unit/test_jar_plugin.py new file mode 100644 index 00000000..6d1d8da8 --- /dev/null +++ b/tests/unit/test_jar_plugin.py @@ -0,0 +1,56 @@ +import unittest + +import os +import sys +import tempfile +from zipfile import ZipFile, ZipInfo + +from utils import jar_utils +from utils.features import JarFeature + +# +# https://security.openstack.org/guidelines/dg_using-temporary-files-securely.html +# + +sys.path.append('tests/unit/') +from plugins.systems.jar_host_crawler import JarHostCrawler + + +class GPUPluginTests(unittest.TestCase): + + def setUp(self): + pass + + def tearDown(self): + pass + + def test_jar_host_crawler_plugin(self, *args): + tmpdir = tempfile.mkdtemp() + jar_file_name = 'myfile.jar' + + # Ensure the file is read/write by the creator only + saved_umask = os.umask(0077) + + path = os.path.join(tmpdir, jar_file_name) + try: + with ZipFile(path, "w") as myjar: + myjar.writestr(ZipInfo('first.class',(1980,1,1,1,1,1)), "first secrets!") + myjar.writestr(ZipInfo('second.class',(1980,1,1,1,1,1)), "second secrets!") + myjar.writestr(ZipInfo('second.txt',(1980,1,1,1,1,1)), "second secrets!") + + fc = JarHostCrawler() + jars = list(fc.crawl(root_dir=tmpdir)) + #jars = list(jar_utils.crawl_jar_files(root_dir=tmpdir)) + print jars + jar_feature = jars[0][1] + assert 'myfile.jar' == jar_feature.name + assert '48ac85a26ffa7ff5cefdd5c73a9fb888' == jar_feature.jarhash + assert ['ddc6eff37020aa858e26b1ba8a49ee0e', + 'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes + assert 'jar' == jars[0][2] + + except IOError as e: + print 'IOError' + finally: + os.remove(path) + os.umask(saved_umask) diff --git a/tests/unit/test_jar_utils.py b/tests/unit/test_jar_utils.py index 50da4551..5b37be7a 100644 --- a/tests/unit/test_jar_utils.py +++ b/tests/unit/test_jar_utils.py @@ -19,7 +19,7 @@ def setUp(self): def tearDown(self): pass - def test_get_dpkg_packages(self): + def test_get_jar_features(self): tmpdir = tempfile.mkdtemp() jar_file_name = 'myfile.jar' From bc3fef7e367fab26b65bebcc78ccef6d9116efc5 Mon Sep 17 00:00:00 2001 From: Sastry Duri Date: Tue, 24 Oct 2017 13:27:08 -0400 Subject: [PATCH 03/14] fixed whitespace Signed-off-by: Sastry Duri --- crawler/plugins/systems/jar_host_crawler.py | 2 +- crawler/utils/jar_utils.py | 19 +++++++++---------- tests/unit/test_jar_plugin.py | 2 +- tests/unit/test_jar_utils.py | 2 +- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/crawler/plugins/systems/jar_host_crawler.py b/crawler/plugins/systems/jar_host_crawler.py index 24ba6ed0..0b9b9e19 100644 --- a/crawler/plugins/systems/jar_host_crawler.py +++ b/crawler/plugins/systems/jar_host_crawler.py @@ -23,4 +23,4 @@ def crawl( '/usr/share/mime'], **kwargs): return crawl_jar_files(root_dir=root_dir, - exclude_dirs=exclude_dirs) + exclude_dirs=exclude_dirs) diff --git a/crawler/utils/jar_utils.py b/crawler/utils/jar_utils.py index e09b6763..a5e93181 100644 --- a/crawler/utils/jar_utils.py +++ b/crawler/utils/jar_utils.py @@ -46,8 +46,7 @@ def crawl_jar_files( for fpath in files: if not fpath.endswith('.jar'): continue - feature = _crawl_jar_file(root_dir, fpath, - root_dir_alias) + feature = _crawl_jar_file(root_dir, fpath,root_dir_alias) if feature: yield (feature.path, feature, 'jar') @@ -63,13 +62,13 @@ def _crawl_jar_file( hashes = [] with zipfile.ZipFile(fpath, 'r') as zf: - for info in zf.infolist(): - if not info.filename.endswith('.class'): - continue - data = zf.read(info.filename) - md = hashlib.md5() - md.update(data) - hashes.append(md.hexdigest()) + for info in zf.infolist(): + if not info.filename.endswith('.class'): + continue + data = zf.read(info.filename) + md = hashlib.md5() + md.update(data) + hashes.append(md.hexdigest()) # compute hash of jar file with open(fpath, 'rb') as jarin: @@ -90,4 +89,4 @@ def _crawl_jar_file( fpath, jarhash, hashes - ) + ) diff --git a/tests/unit/test_jar_plugin.py b/tests/unit/test_jar_plugin.py index 6d1d8da8..024c6176 100644 --- a/tests/unit/test_jar_plugin.py +++ b/tests/unit/test_jar_plugin.py @@ -46,7 +46,7 @@ def test_jar_host_crawler_plugin(self, *args): assert 'myfile.jar' == jar_feature.name assert '48ac85a26ffa7ff5cefdd5c73a9fb888' == jar_feature.jarhash assert ['ddc6eff37020aa858e26b1ba8a49ee0e', - 'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes + 'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes assert 'jar' == jars[0][2] except IOError as e: diff --git a/tests/unit/test_jar_utils.py b/tests/unit/test_jar_utils.py index 5b37be7a..db121962 100644 --- a/tests/unit/test_jar_utils.py +++ b/tests/unit/test_jar_utils.py @@ -39,7 +39,7 @@ def test_get_jar_features(self): assert 'myfile.jar' == jar_feature.name assert '48ac85a26ffa7ff5cefdd5c73a9fb888' == jar_feature.jarhash assert ['ddc6eff37020aa858e26b1ba8a49ee0e', - 'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes + 'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes assert 'jar' == jars[0][2] except IOError as e: From 2a06638f1213486172830fc4375e9f5350467a0a Mon Sep 17 00:00:00 2001 From: Sastry Duri Date: Tue, 24 Oct 2017 13:33:03 -0400 Subject: [PATCH 04/14] fixed whitespace Signed-off-by: Sastry Duri --- crawler/utils/jar_utils.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/crawler/utils/jar_utils.py b/crawler/utils/jar_utils.py index a5e93181..74fec3bf 100644 --- a/crawler/utils/jar_utils.py +++ b/crawler/utils/jar_utils.py @@ -84,9 +84,10 @@ def _crawl_jar_file( frelpath = os.path.normpath(frelpath) (_, fname) = os.path.split(frelpath) - return JarFeature( - os.path.basename(fpath), - fpath, - jarhash, - hashes + return + JarFeature( + os.path.basename(fpath), + fpath, + jarhash, + hashes ) From 9ef0a030e68afa6fc746091e586efaae9bc04389 Mon Sep 17 00:00:00 2001 From: Sastry Duri Date: Tue, 24 Oct 2017 13:44:35 -0400 Subject: [PATCH 05/14] fixed whitespace issues Signed-off-by: Sastry Duri --- crawler/utils/jar_utils.py | 13 ++++++------- tests/functional/test_functional_dockerevents.py | 2 +- tests/functional/test_logs_in_volumes1.py | 2 +- tests/unit/test_app_apache.py | 2 +- tests/unit/test_app_db2.py | 2 +- tests/unit/test_app_liberty.py | 2 +- tests/unit/test_app_nginx.py | 2 +- tests/unit/test_app_redis.py | 2 +- tests/unit/test_app_tomcat.py | 2 +- tests/unit/test_emitter.py | 4 ++-- 10 files changed, 16 insertions(+), 17 deletions(-) diff --git a/crawler/utils/jar_utils.py b/crawler/utils/jar_utils.py index 74fec3bf..07eb0198 100644 --- a/crawler/utils/jar_utils.py +++ b/crawler/utils/jar_utils.py @@ -84,10 +84,9 @@ def _crawl_jar_file( frelpath = os.path.normpath(frelpath) (_, fname) = os.path.split(frelpath) - return - JarFeature( - os.path.basename(fpath), - fpath, - jarhash, - hashes - ) + return JarFeature( + os.path.basename(fpath), + fpath, + jarhash, + hashes + ) diff --git a/tests/functional/test_functional_dockerevents.py b/tests/functional/test_functional_dockerevents.py index d0116da4..aa320c8a 100644 --- a/tests/functional/test_functional_dockerevents.py +++ b/tests/functional/test_functional_dockerevents.py @@ -200,7 +200,7 @@ def testCrawlContainer2(self): time.sleep(30) subprocess.call(['/bin/chmod', '-R', '777', self.tempd]) - + files = os.listdir(self.tempd + '/out') docker_server_version = self.docker.version()['Version'] if VERSION_SPEC.match(semantic_version.Version(_fix_version(docker_server_version))): diff --git a/tests/functional/test_logs_in_volumes1.py b/tests/functional/test_logs_in_volumes1.py index 486d49c3..0fbf13b1 100644 --- a/tests/functional/test_logs_in_volumes1.py +++ b/tests/functional/test_logs_in_volumes1.py @@ -67,7 +67,7 @@ def test_get_logfiles_list(self, *args): "docker_image_registry": "image_registry", "owner_namespace": "owner_namespace", "NetworkSettings": {}} - + plugins_manager.runtime_env = None self.docker_container = \ dockercontainer.DockerContainer(inspect['Id'], inspect) diff --git a/tests/unit/test_app_apache.py b/tests/unit/test_app_apache.py index d1a16c25..7f1d55b2 100644 --- a/tests/unit/test_app_apache.py +++ b/tests/unit/test_app_apache.py @@ -86,7 +86,7 @@ def __init__( ): ports = "[ {\"containerPort\" : \"80\"} ]" self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels": - {"annotation.io.kubernetes.container.ports": ports}}} + {"annotation.io.kubernetes.container.ports": ports}}} class MockedApacheContainer2(object): diff --git a/tests/unit/test_app_db2.py b/tests/unit/test_app_db2.py index 71afe85b..7e8d8dfe 100644 --- a/tests/unit/test_app_db2.py +++ b/tests/unit/test_app_db2.py @@ -19,7 +19,7 @@ class MockedDB2Container1(object): def __init__(self, container_id): ports = "[ {\"containerPort\" : \"50000\"} ]" self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels": - {"annotation.io.kubernetes.container.ports": ports}}} + {"annotation.io.kubernetes.container.ports": ports}}} class MockedDB2Container2(object): diff --git a/tests/unit/test_app_liberty.py b/tests/unit/test_app_liberty.py index ba7291d4..a2d04575 100644 --- a/tests/unit/test_app_liberty.py +++ b/tests/unit/test_app_liberty.py @@ -23,7 +23,7 @@ class MockedLibertyContainer1(object): def __init__(self, container_id): ports = "[ {\"containerPort\" : \"9443\"} ]" self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels": - {"annotation.io.kubernetes.container.ports": ports}}} + {"annotation.io.kubernetes.container.ports": ports}}} class MockedLibertyContainer2(object): diff --git a/tests/unit/test_app_nginx.py b/tests/unit/test_app_nginx.py index 197f3fd1..5ef176ff 100644 --- a/tests/unit/test_app_nginx.py +++ b/tests/unit/test_app_nginx.py @@ -47,7 +47,7 @@ class MockedNginxContainer1(object): def __init__(self, container_id): ports = "[ {\"containerPort\" : \"80\"} ]" self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels": - {"annotation.io.kubernetes.container.ports": ports}}} + {"annotation.io.kubernetes.container.ports": ports}}} class MockedNginxContainer2(object): diff --git a/tests/unit/test_app_redis.py b/tests/unit/test_app_redis.py index 12a6187e..aaca14f9 100644 --- a/tests/unit/test_app_redis.py +++ b/tests/unit/test_app_redis.py @@ -133,7 +133,7 @@ class MockedRedisContainer1(object): def __init__(self, container_id): ports = "[ {\"containerPort\" : \"6379\"} ]" self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels": - {"annotation.io.kubernetes.container.ports": ports}}} + {"annotation.io.kubernetes.container.ports": ports}}} class MockedRedisContainer2(object): diff --git a/tests/unit/test_app_tomcat.py b/tests/unit/test_app_tomcat.py index 2338e70b..65ae9aa7 100644 --- a/tests/unit/test_app_tomcat.py +++ b/tests/unit/test_app_tomcat.py @@ -77,7 +77,7 @@ class MockedTomcatContainer1(object): def __init__(self, container_id): ports = "[ {\"containerPort\" : \"8080\"} ]" self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels": - {"annotation.io.kubernetes.container.ports": ports}}} + {"annotation.io.kubernetes.container.ports": ports}}} class MockedTomcatContainer2(object): diff --git a/tests/unit/test_emitter.py b/tests/unit/test_emitter.py index 539ed65e..c87a024a 100644 --- a/tests/unit/test_emitter.py +++ b/tests/unit/test_emitter.py @@ -53,10 +53,10 @@ def mocked_get_sas_token(): return ('sas-token', 'cloudoe', 'access-group') class RandomKafkaException(Exception): - pass + pass def raise_value_error(*args, **kwargs): - raise ValueError() + raise ValueError() def mock_call_with_retries(function, max_retries=10, exception_type=Exception, From f24f91e314f08c3579700cbd55efcb0725f05c80 Mon Sep 17 00:00:00 2001 From: Sastry Duri Date: Tue, 24 Oct 2017 13:55:48 -0400 Subject: [PATCH 06/14] fixed whitespace issues Signed-off-by: Sastry Duri --- crawler/plugins/systems/jar_container_crawler.py | 2 +- crawler/utils/jar_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crawler/plugins/systems/jar_container_crawler.py b/crawler/plugins/systems/jar_container_crawler.py index 6d886fe3..4a7638ea 100644 --- a/crawler/plugins/systems/jar_container_crawler.py +++ b/crawler/plugins/systems/jar_container_crawler.py @@ -41,7 +41,7 @@ def crawl( container_id) exclude_dirs = [utils.misc.join_abs_paths(rootfs_dir, d) for d in exclude_dirs] - return crawl_files( + return crawl_jar_files( root_dir=utils.misc.join_abs_paths(rootfs_dir, root_dir), exclude_dirs=exclude_dirs, root_dir_alias=root_dir) diff --git a/crawler/utils/jar_utils.py b/crawler/utils/jar_utils.py index 07eb0198..8203ee9b 100644 --- a/crawler/utils/jar_utils.py +++ b/crawler/utils/jar_utils.py @@ -46,7 +46,7 @@ def crawl_jar_files( for fpath in files: if not fpath.endswith('.jar'): continue - feature = _crawl_jar_file(root_dir, fpath,root_dir_alias) + feature = _crawl_jar_file(root_dir, fpath, root_dir_alias) if feature: yield (feature.path, feature, 'jar') From 0bea3e94d39a9ae09dde92f9ecf42b6cad8cf7a0 Mon Sep 17 00:00:00 2001 From: Sastry Duri Date: Mon, 30 Oct 2017 16:19:39 -0400 Subject: [PATCH 07/14] fixed test case name, added container test case Signed-off-by: Sastry Duri --- tests/unit/test_jar_plugin.py | 2 +- tests/unit/test_plugins.py | 43 ++++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_jar_plugin.py b/tests/unit/test_jar_plugin.py index 024c6176..19c6adc8 100644 --- a/tests/unit/test_jar_plugin.py +++ b/tests/unit/test_jar_plugin.py @@ -16,7 +16,7 @@ from plugins.systems.jar_host_crawler import JarHostCrawler -class GPUPluginTests(unittest.TestCase): +class JarHashesPluginTests(unittest.TestCase): def setUp(self): pass diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py index 4d191c00..84a1abcb 100644 --- a/tests/unit/test_plugins.py +++ b/tests/unit/test_plugins.py @@ -2,6 +2,14 @@ import unittest from collections import namedtuple +import os +import sys +import tempfile +from zipfile import ZipFile, ZipInfo + +from utils import jar_utils +sys.path.append('tests/unit/') + import mock from plugins.systems.config_container_crawler import ConfigContainerCrawler from plugins.systems.config_host_crawler import ConfigHostCrawler @@ -20,6 +28,8 @@ from plugins.systems.interface_container_crawler import InterfaceContainerCrawler from plugins.systems.interface_host_crawler import InterfaceHostCrawler from plugins.systems.interface_vm_crawler import InterfaceVmCrawler +from plugins.systems.jar_container_crawler import JarContainerCrawler +from plugins.systems.jar_host_crawler import JarHostCrawler from plugins.systems.load_container_crawler import LoadContainerCrawler from plugins.systems.load_host_crawler import LoadHostCrawler from plugins.systems.memory_container_crawler import MemoryContainerCrawler @@ -48,7 +58,8 @@ CpuFeature, InterfaceFeature, LoadFeature, - DockerPSFeature) + DockerPSFeature, + JarFeature) # for OUTVM psvmi @@ -605,6 +616,36 @@ def test_file_container_crawler(self, *args): assert args[2].call_count == 2 # isdir args[2].assert_called_with('/') + def test_jar_container_crawler_plugin(self, *args): + tmpdir = tempfile.mkdtemp() + jar_file_name = 'myfile.jar' + + # Ensure the file is read/write by the creator only + saved_umask = os.umask(0077) + + path = os.path.join(tmpdir, jar_file_name) + try: + with ZipFile(path, "w") as myjar: + myjar.writestr(ZipInfo('first.class',(1980,1,1,1,1,1)), "first secrets!") + myjar.writestr(ZipInfo('second.class',(1980,1,1,1,1,1)), "second secrets!") + myjar.writestr(ZipInfo('second.txt',(1980,1,1,1,1,1)), "second secrets!") + + fc = JarHostCrawler() + jars = list(fc.crawl(root_dir=tmpdir)) + #jars = list(jar_utils.crawl_jar_files(root_dir=tmpdir)) + print jars + jar_feature = jars[0][1] + assert 'myfile.jar' == jar_feature.name + assert '48ac85a26ffa7ff5cefdd5c73a9fb888' == jar_feature.jarhash + assert ['ddc6eff37020aa858e26b1ba8a49ee0e', + 'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes + assert 'jar' == jars[0][2] + + except IOError as e: + print 'IOError' + finally: + os.remove(path) + @mock.patch( ("plugins.systems.file_container_crawler." "utils.dockerutils.exec_dockerinspect"), From d59f7dd3755b6fabb6d29c2c646e19c7605d921a Mon Sep 17 00:00:00 2001 From: Sahil Suneja Date: Sun, 6 Aug 2017 16:19:49 -0400 Subject: [PATCH 08/14] adding support for getting mmaped files in process feature with get_mmap_files option in crawler.conf Signed-off-by: Sahil Suneja --- crawler/crawler.conf | 1 + .../systems/process_container_crawler.py | 17 ++++++++++++++++- crawler/utils/features.py | 1 + tests/functional/test_functional_plugins.py | 5 +++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/crawler/crawler.conf b/crawler/crawler.conf index 88b042c6..046fc28c 100644 --- a/crawler/crawler.conf +++ b/crawler/crawler.conf @@ -7,6 +7,7 @@ target = CONTAINER [[ process_container ]] + get_mmap_files = False [[ os_vm ]] diff --git a/crawler/plugins/systems/process_container_crawler.py b/crawler/plugins/systems/process_container_crawler.py index 28332d29..88723631 100644 --- a/crawler/plugins/systems/process_container_crawler.py +++ b/crawler/plugins/systems/process_container_crawler.py @@ -1,5 +1,5 @@ import logging - +import os import psutil import utils.dockerutils @@ -24,6 +24,8 @@ def crawl(self, container_id, avoid_setns=False, **kwargs): if avoid_setns: raise NotImplementedError() + self.get_mmap_files = kwargs.get('get_mmap_files', 'False') + return run_as_another_namespace(pid, ALL_NAMESPACES, self._crawl_in_system) @@ -39,6 +41,15 @@ def _crawl_in_system(self): continue yield self._crawl_single_process(p) + def _get_mmap_files(self, p): + mmapfiles = [] + if self.get_mmap_files == 'True': + for mmap in p.memory_maps(): + mmap_path = getattr(mmap, 'path') + if os.path.isabs(mmap_path): + mmapfiles.append(mmap_path) + return mmapfiles + def _crawl_single_process(self, p): """Returns a ProcessFeature""" create_time = ( @@ -85,6 +96,9 @@ def _crawl_single_process(self, p): for f in p.get_open_files(): openfiles.append(f.path) openfiles.sort() + + mmapfiles = self._get_mmap_files(p) + feature_key = '{0}/{1}'.format(name, pid) return (feature_key, ProcessFeature( str(' '.join(cmdline)), @@ -92,6 +106,7 @@ def _crawl_single_process(self, p): cwd, name, openfiles, + mmapfiles, pid, ppid, num_threads, diff --git a/crawler/utils/features.py b/crawler/utils/features.py index 8e72475b..681f8872 100644 --- a/crawler/utils/features.py +++ b/crawler/utils/features.py @@ -45,6 +45,7 @@ 'cwd', 'pname', 'openfiles', + 'mmapfiles', 'pid', 'ppid', 'threads', diff --git a/tests/functional/test_functional_plugins.py b/tests/functional/test_functional_plugins.py index 9b3cfaeb..8fd9fc69 100644 --- a/tests/functional/test_functional_plugins.py +++ b/tests/functional/test_functional_plugins.py @@ -70,6 +70,11 @@ def test_crawl_outcontainer_processes(self): # sleep + crawler assert len(list(fc.crawl(self.container['Id']))) == 2 + def test_crawl_outcontainer_processes_mmapfiles(self): + fc = ProcessContainerCrawler() + output = "%s" % list(fc.crawl(self.container['Id'], get_mmap_files='True')) + assert '/bin/busybox' in output + def test_crawl_outcontainer_mem(self): fc = MemoryContainerCrawler() output = "%s" % list(fc.crawl(self.container['Id'])) From b44043a28aca6efa65d50b6f323c6fe01c6a9b42 Mon Sep 17 00:00:00 2001 From: Sahil Suneja Date: Sun, 6 Aug 2017 16:42:38 -0400 Subject: [PATCH 09/14] fix vm and host crawler with new mmapfiles process feature field Signed-off-by: Sahil Suneja --- crawler/plugins/systems/process_host_crawler.py | 4 ++++ crawler/plugins/systems/process_vm_crawler.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/crawler/plugins/systems/process_host_crawler.py b/crawler/plugins/systems/process_host_crawler.py index 27714b99..642f8c4e 100644 --- a/crawler/plugins/systems/process_host_crawler.py +++ b/crawler/plugins/systems/process_host_crawler.py @@ -73,6 +73,9 @@ def _crawl_single_process(self, p): for f in p.get_open_files(): openfiles.append(f.path) openfiles.sort() + + mmapfiles = [] + feature_key = '{0}/{1}'.format(name, pid) return (feature_key, ProcessFeature( str(' '.join(cmdline)), @@ -80,6 +83,7 @@ def _crawl_single_process(self, p): cwd, name, openfiles, + mmapfiles, pid, ppid, num_threads, diff --git a/crawler/plugins/systems/process_vm_crawler.py b/crawler/plugins/systems/process_vm_crawler.py index 8ee595b7..7adbd589 100644 --- a/crawler/plugins/systems/process_vm_crawler.py +++ b/crawler/plugins/systems/process_vm_crawler.py @@ -84,6 +84,9 @@ def _crawl_single_process(self, p): for f in p.get_open_files(): openfiles.append(f.path) openfiles.sort() + + mmapfiles = [] + feature_key = '{0}/{1}'.format(name, pid) return (feature_key, ProcessFeature( str(' '.join(cmdline)), @@ -91,6 +94,7 @@ def _crawl_single_process(self, p): cwd, name, openfiles, + mmapfiles, pid, ppid, num_threads, From ea7f39e7b2c61deb0048fb75c54760dc551709ed Mon Sep 17 00:00:00 2001 From: Sastry Duri Date: Tue, 31 Oct 2017 14:26:46 -0400 Subject: [PATCH 10/14] fixed jst host crawler usage in test plugins Signed-off-by: Sastry Duri --- tests/unit/test_plugins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py index 84a1abcb..b4a58373 100644 --- a/tests/unit/test_plugins.py +++ b/tests/unit/test_plugins.py @@ -630,7 +630,7 @@ def test_jar_container_crawler_plugin(self, *args): myjar.writestr(ZipInfo('second.class',(1980,1,1,1,1,1)), "second secrets!") myjar.writestr(ZipInfo('second.txt',(1980,1,1,1,1,1)), "second secrets!") - fc = JarHostCrawler() + fc = JarContainerCrawler() jars = list(fc.crawl(root_dir=tmpdir)) #jars = list(jar_utils.crawl_jar_files(root_dir=tmpdir)) print jars From 8c283b9f6ffb4c0cc19182d7b4c7e97ec1824601 Mon Sep 17 00:00:00 2001 From: Sastry Duri Date: Wed, 1 Nov 2017 14:40:35 -0400 Subject: [PATCH 11/14] updated test case Signed-off-by: Sastry Duri --- tests/unit/test_plugins.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py index b4a58373..5f45382b 100644 --- a/tests/unit/test_plugins.py +++ b/tests/unit/test_plugins.py @@ -616,6 +616,20 @@ def test_file_container_crawler(self, *args): assert args[2].call_count == 2 # isdir args[2].assert_called_with('/') + @mock.patch( + ("plugins.systems.file_container_crawler." + "utils.dockerutils.exec_dockerinspect"), + side_effect=lambda long_id: {'State': {'Pid': 123}}) + @mock.patch( + ("plugins.systems.file_container_crawler." + "run_as_another_namespace"), + side_effect=mocked_run_as_another_namespace) + @mock.patch('utils.file_utils.os.path.isdir', + side_effect=lambda p: True) + @mock.patch('utils.file_utils.os.walk', + side_effect=mocked_os_walk) + @mock.patch('utils.file_utils.os.lstat', + side_effect=mocked_os_lstat) def test_jar_container_crawler_plugin(self, *args): tmpdir = tempfile.mkdtemp() jar_file_name = 'myfile.jar' From a05c09c79459e7ed8ad086a0ee7d215e4965610e Mon Sep 17 00:00:00 2001 From: Sastry Duri Date: Wed, 1 Nov 2017 15:09:44 -0400 Subject: [PATCH 12/14] test case fix Signed-off-by: Sastry Duri --- tests/unit/test_plugins.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py index 5f45382b..88accd84 100644 --- a/tests/unit/test_plugins.py +++ b/tests/unit/test_plugins.py @@ -617,19 +617,13 @@ def test_file_container_crawler(self, *args): args[2].assert_called_with('/') @mock.patch( - ("plugins.systems.file_container_crawler." + ("plugins.systems.jar_container_crawler." "utils.dockerutils.exec_dockerinspect"), side_effect=lambda long_id: {'State': {'Pid': 123}}) @mock.patch( - ("plugins.systems.file_container_crawler." + ("plugins.systems.jar_container_crawler." "run_as_another_namespace"), side_effect=mocked_run_as_another_namespace) - @mock.patch('utils.file_utils.os.path.isdir', - side_effect=lambda p: True) - @mock.patch('utils.file_utils.os.walk', - side_effect=mocked_os_walk) - @mock.patch('utils.file_utils.os.lstat', - side_effect=mocked_os_lstat) def test_jar_container_crawler_plugin(self, *args): tmpdir = tempfile.mkdtemp() jar_file_name = 'myfile.jar' From c12e200c8e0084c5ceff09c57db73ec1699ee01c Mon Sep 17 00:00:00 2001 From: Sastry Duri Date: Wed, 1 Nov 2017 15:37:34 -0400 Subject: [PATCH 13/14] added a new test case Signed-off-by: Sastry Duri --- tests/unit/test_plugins.py | 39 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py index 88accd84..67d2dd8b 100644 --- a/tests/unit/test_plugins.py +++ b/tests/unit/test_plugins.py @@ -654,6 +654,45 @@ def test_jar_container_crawler_plugin(self, *args): finally: os.remove(path) + + @mock.patch( + ("plugins.systems.jar_container_crawler." + "utils.dockerutils.exec_dockerinspect"), + side_effect=lambda long_id: {'State': {'Pid': 123}}) + @mock.patch( + ("plugins.systems.jar_container_crawler.utils.dockerutils." + "get_docker_container_rootfs_path"), + side_effect=lambda long_id: '/1/2/3') + def test_jar_container_crawler_avoidsetns(self, *args): + tmpdir = tempfile.mkdtemp() + jar_file_name = 'myfile.jar' + + # Ensure the file is read/write by the creator only + saved_umask = os.umask(0077) + + path = os.path.join(tmpdir, jar_file_name) + try: + with ZipFile(path, "w") as myjar: + myjar.writestr(ZipInfo('first.class',(1980,1,1,1,1,1)), "first secrets!") + myjar.writestr(ZipInfo('second.class',(1980,1,1,1,1,1)), "second secrets!") + myjar.writestr(ZipInfo('second.txt',(1980,1,1,1,1,1)), "second secrets!") + + fc = JarContainerCrawler() + jars = list(fc.crawl(root_dir=tmpdir)) + #jars = list(jar_utils.crawl_jar_files(root_dir=tmpdir)) + print jars + jar_feature = jars[0][1] + assert 'myfile.jar' == jar_feature.name + assert '48ac85a26ffa7ff5cefdd5c73a9fb888' == jar_feature.jarhash + assert ['ddc6eff37020aa858e26b1ba8a49ee0e', + 'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes + assert 'jar' == jars[0][2] + + except IOError as e: + print 'IOError' + finally: + os.remove(path) + @mock.patch( ("plugins.systems.file_container_crawler." "utils.dockerutils.exec_dockerinspect"), From c9b3f88f46681f3b6c745201b7623e20d8b9da95 Mon Sep 17 00:00:00 2001 From: Sastry Duri Date: Wed, 1 Nov 2017 16:46:07 -0400 Subject: [PATCH 14/14] updated test case Signed-off-by: Sastry Duri --- tests/unit/test_plugins.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py index 67d2dd8b..224552e9 100644 --- a/tests/unit/test_plugins.py +++ b/tests/unit/test_plugins.py @@ -662,7 +662,7 @@ def test_jar_container_crawler_plugin(self, *args): @mock.patch( ("plugins.systems.jar_container_crawler.utils.dockerutils." "get_docker_container_rootfs_path"), - side_effect=lambda long_id: '/1/2/3') + side_effect=lambda long_id: '/tmp') def test_jar_container_crawler_avoidsetns(self, *args): tmpdir = tempfile.mkdtemp() jar_file_name = 'myfile.jar' @@ -678,8 +678,7 @@ def test_jar_container_crawler_avoidsetns(self, *args): myjar.writestr(ZipInfo('second.txt',(1980,1,1,1,1,1)), "second secrets!") fc = JarContainerCrawler() - jars = list(fc.crawl(root_dir=tmpdir)) - #jars = list(jar_utils.crawl_jar_files(root_dir=tmpdir)) + jars = list(fc.crawl(root_dir=os.path.basename(tmpdir), avoid_setns=True)) print jars jar_feature = jars[0][1] assert 'myfile.jar' == jar_feature.name