diff --git a/crawler/plugins/systems/jar_container_crawler.plugin b/crawler/plugins/systems/jar_container_crawler.plugin new file mode 100644 index 00000000..3247d56e --- /dev/null +++ b/crawler/plugins/systems/jar_container_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = jar_container +Module = jar_container_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Produces md5 hashes for jar files and class files in jars diff --git a/crawler/plugins/systems/jar_container_crawler.py b/crawler/plugins/systems/jar_container_crawler.py new file mode 100644 index 00000000..4a7638ea --- /dev/null +++ b/crawler/plugins/systems/jar_container_crawler.py @@ -0,0 +1,54 @@ +import logging + +import utils.dockerutils +import utils.misc +from icrawl_plugin import IContainerCrawler +from utils.jar_utils import crawl_jar_files +from utils.namespace import run_as_another_namespace + +logger = logging.getLogger('crawlutils') + + +class JarContainerCrawler(IContainerCrawler): + + def get_feature(self): + return 'jar' + + def crawl( + self, + container_id=None, + avoid_setns=False, + root_dir='/', + exclude_dirs=[ + '/boot', + '/dev', + '/proc', + '/sys', + '/mnt', + '/tmp', + '/var/cache', + '/usr/share/man', + '/usr/share/doc', + '/usr/share/mime'], + **kwargs): + inspect = utils.dockerutils.exec_dockerinspect(container_id) + state = inspect['State'] + pid = str(state['Pid']) + logger.debug('Crawling jars for container %s' % container_id) + + if avoid_setns: + rootfs_dir = utils.dockerutils.get_docker_container_rootfs_path( + container_id) + exclude_dirs = [utils.misc.join_abs_paths(rootfs_dir, d) + for d in exclude_dirs] + return crawl_jar_files( + root_dir=utils.misc.join_abs_paths(rootfs_dir, root_dir), + exclude_dirs=exclude_dirs, + root_dir_alias=root_dir) + else: # in all other cases, including wrong mode set + return run_as_another_namespace(pid, + ['mnt'], + crawl_jar_files, + root_dir, + exclude_dirs, + None) diff --git a/crawler/plugins/systems/jar_host_crawler.plugin b/crawler/plugins/systems/jar_host_crawler.plugin new file mode 100644 index 00000000..f91d443b --- /dev/null +++ b/crawler/plugins/systems/jar_host_crawler.plugin @@ -0,0 +1,8 @@ +[Core] +Name = jar_host +Module = jar_host_crawler + +[Documentation] +Author = IBM +Version = 0.1 +Description = Produces md5 hashes for jar files and class files in jars diff --git a/crawler/plugins/systems/jar_host_crawler.py b/crawler/plugins/systems/jar_host_crawler.py new file mode 100644 index 00000000..0b9b9e19 --- /dev/null +++ b/crawler/plugins/systems/jar_host_crawler.py @@ -0,0 +1,26 @@ +from icrawl_plugin import IHostCrawler +from utils.jar_utils import crawl_jar_files + + +class JarHostCrawler(IHostCrawler): + + def get_feature(self): + return 'jar' + + def crawl( + self, + root_dir='/', + exclude_dirs=[ + '/boot', + '/dev', + '/proc', + '/sys', + '/mnt', + '/tmp', + '/var/cache', + '/usr/share/man', + '/usr/share/doc', + '/usr/share/mime'], + **kwargs): + return crawl_jar_files(root_dir=root_dir, + exclude_dirs=exclude_dirs) diff --git a/crawler/utils/features.py b/crawler/utils/features.py index cf167307..681f8872 100644 --- a/crawler/utils/features.py +++ b/crawler/utils/features.py @@ -122,3 +122,9 @@ 'cpu_khz', 'cpu_cache_size_kb', 'cpu_num_cores']) +JarFeature = namedtuple('JarFeature', [ + 'name', + 'path', + 'jarhash', + 'hashes', +]) diff --git a/crawler/utils/jar_utils.py b/crawler/utils/jar_utils.py new file mode 100644 index 00000000..8203ee9b --- /dev/null +++ b/crawler/utils/jar_utils.py @@ -0,0 +1,92 @@ +import fnmatch +import logging +import os +import re +import hashlib +import zipfile + +from utils.features import JarFeature + +logger = logging.getLogger('crawlutils') + + +def crawl_jar_files( + root_dir='/', + exclude_dirs=[], + root_dir_alias=None, + accessed_since=0): + + if not os.path.isdir(root_dir): + return + + saved_args = locals() + logger.debug('crawl_jar_files: %s' % (saved_args)) + + assert os.path.isdir(root_dir) + if root_dir_alias is None: + root_dir_alias = root_dir + exclude_dirs = [os.path.join(root_dir, d) for d in + exclude_dirs] + exclude_regex = r'|'.join([fnmatch.translate(d) + for d in exclude_dirs]) or r'$.' + + # walk the directory hierarchy starting at 'root_dir' in BFS + # order + + for (root_dirpath, dirs, files) in os.walk(root_dir): + dirs[:] = [os.path.join(root_dirpath, d) for d in + dirs] + dirs[:] = [d for d in dirs + if not re.match(exclude_regex, d)] + files = [os.path.join(root_dirpath, f) for f in + files] + files = [f for f in files + if not re.match(exclude_regex, f)] + + for fpath in files: + if not fpath.endswith('.jar'): + continue + feature = _crawl_jar_file(root_dir, fpath, root_dir_alias) + if feature: + yield (feature.path, feature, 'jar') + + +# crawl a single file +def _crawl_jar_file( + root_dir, + fpath, + root_dir_alias, +): + if not fpath.endswith('.jar'): + return + + hashes = [] + with zipfile.ZipFile(fpath, 'r') as zf: + for info in zf.infolist(): + if not info.filename.endswith('.class'): + continue + data = zf.read(info.filename) + md = hashlib.md5() + md.update(data) + hashes.append(md.hexdigest()) + + # compute hash of jar file + with open(fpath, 'rb') as jarin: + md = hashlib.md5() + md.update(jarin.read()) + jarhash = md.hexdigest() + # This replaces `//a/b/c` with `//a/b/c` + frelpath = os.path.join(root_dir_alias, + os.path.relpath(fpath, root_dir)) + + # This converts something like `/.` to `/` + + frelpath = os.path.normpath(frelpath) + + (_, fname) = os.path.split(frelpath) + return JarFeature( + os.path.basename(fpath), + fpath, + jarhash, + hashes + ) diff --git a/tests/functional/test_functional_dockerevents.py b/tests/functional/test_functional_dockerevents.py index d0116da4..aa320c8a 100644 --- a/tests/functional/test_functional_dockerevents.py +++ b/tests/functional/test_functional_dockerevents.py @@ -200,7 +200,7 @@ def testCrawlContainer2(self): time.sleep(30) subprocess.call(['/bin/chmod', '-R', '777', self.tempd]) - + files = os.listdir(self.tempd + '/out') docker_server_version = self.docker.version()['Version'] if VERSION_SPEC.match(semantic_version.Version(_fix_version(docker_server_version))): diff --git a/tests/functional/test_logs_in_volumes1.py b/tests/functional/test_logs_in_volumes1.py index 486d49c3..0fbf13b1 100644 --- a/tests/functional/test_logs_in_volumes1.py +++ b/tests/functional/test_logs_in_volumes1.py @@ -67,7 +67,7 @@ def test_get_logfiles_list(self, *args): "docker_image_registry": "image_registry", "owner_namespace": "owner_namespace", "NetworkSettings": {}} - + plugins_manager.runtime_env = None self.docker_container = \ dockercontainer.DockerContainer(inspect['Id'], inspect) diff --git a/tests/unit/test_app_apache.py b/tests/unit/test_app_apache.py index d1a16c25..7f1d55b2 100644 --- a/tests/unit/test_app_apache.py +++ b/tests/unit/test_app_apache.py @@ -86,7 +86,7 @@ def __init__( ): ports = "[ {\"containerPort\" : \"80\"} ]" self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels": - {"annotation.io.kubernetes.container.ports": ports}}} + {"annotation.io.kubernetes.container.ports": ports}}} class MockedApacheContainer2(object): diff --git a/tests/unit/test_app_db2.py b/tests/unit/test_app_db2.py index 71afe85b..7e8d8dfe 100644 --- a/tests/unit/test_app_db2.py +++ b/tests/unit/test_app_db2.py @@ -19,7 +19,7 @@ class MockedDB2Container1(object): def __init__(self, container_id): ports = "[ {\"containerPort\" : \"50000\"} ]" self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels": - {"annotation.io.kubernetes.container.ports": ports}}} + {"annotation.io.kubernetes.container.ports": ports}}} class MockedDB2Container2(object): diff --git a/tests/unit/test_app_liberty.py b/tests/unit/test_app_liberty.py index ba7291d4..a2d04575 100644 --- a/tests/unit/test_app_liberty.py +++ b/tests/unit/test_app_liberty.py @@ -23,7 +23,7 @@ class MockedLibertyContainer1(object): def __init__(self, container_id): ports = "[ {\"containerPort\" : \"9443\"} ]" self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels": - {"annotation.io.kubernetes.container.ports": ports}}} + {"annotation.io.kubernetes.container.ports": ports}}} class MockedLibertyContainer2(object): diff --git a/tests/unit/test_app_nginx.py b/tests/unit/test_app_nginx.py index 197f3fd1..5ef176ff 100644 --- a/tests/unit/test_app_nginx.py +++ b/tests/unit/test_app_nginx.py @@ -47,7 +47,7 @@ class MockedNginxContainer1(object): def __init__(self, container_id): ports = "[ {\"containerPort\" : \"80\"} ]" self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels": - {"annotation.io.kubernetes.container.ports": ports}}} + {"annotation.io.kubernetes.container.ports": ports}}} class MockedNginxContainer2(object): diff --git a/tests/unit/test_app_redis.py b/tests/unit/test_app_redis.py index 12a6187e..aaca14f9 100644 --- a/tests/unit/test_app_redis.py +++ b/tests/unit/test_app_redis.py @@ -133,7 +133,7 @@ class MockedRedisContainer1(object): def __init__(self, container_id): ports = "[ {\"containerPort\" : \"6379\"} ]" self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels": - {"annotation.io.kubernetes.container.ports": ports}}} + {"annotation.io.kubernetes.container.ports": ports}}} class MockedRedisContainer2(object): diff --git a/tests/unit/test_app_tomcat.py b/tests/unit/test_app_tomcat.py index 2338e70b..65ae9aa7 100644 --- a/tests/unit/test_app_tomcat.py +++ b/tests/unit/test_app_tomcat.py @@ -77,7 +77,7 @@ class MockedTomcatContainer1(object): def __init__(self, container_id): ports = "[ {\"containerPort\" : \"8080\"} ]" self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels": - {"annotation.io.kubernetes.container.ports": ports}}} + {"annotation.io.kubernetes.container.ports": ports}}} class MockedTomcatContainer2(object): diff --git a/tests/unit/test_emitter.py b/tests/unit/test_emitter.py index 539ed65e..c87a024a 100644 --- a/tests/unit/test_emitter.py +++ b/tests/unit/test_emitter.py @@ -53,10 +53,10 @@ def mocked_get_sas_token(): return ('sas-token', 'cloudoe', 'access-group') class RandomKafkaException(Exception): - pass + pass def raise_value_error(*args, **kwargs): - raise ValueError() + raise ValueError() def mock_call_with_retries(function, max_retries=10, exception_type=Exception, diff --git a/tests/unit/test_jar_plugin.py b/tests/unit/test_jar_plugin.py new file mode 100644 index 00000000..19c6adc8 --- /dev/null +++ b/tests/unit/test_jar_plugin.py @@ -0,0 +1,56 @@ +import unittest + +import os +import sys +import tempfile +from zipfile import ZipFile, ZipInfo + +from utils import jar_utils +from utils.features import JarFeature + +# +# https://security.openstack.org/guidelines/dg_using-temporary-files-securely.html +# + +sys.path.append('tests/unit/') +from plugins.systems.jar_host_crawler import JarHostCrawler + + +class JarHashesPluginTests(unittest.TestCase): + + def setUp(self): + pass + + def tearDown(self): + pass + + def test_jar_host_crawler_plugin(self, *args): + tmpdir = tempfile.mkdtemp() + jar_file_name = 'myfile.jar' + + # Ensure the file is read/write by the creator only + saved_umask = os.umask(0077) + + path = os.path.join(tmpdir, jar_file_name) + try: + with ZipFile(path, "w") as myjar: + myjar.writestr(ZipInfo('first.class',(1980,1,1,1,1,1)), "first secrets!") + myjar.writestr(ZipInfo('second.class',(1980,1,1,1,1,1)), "second secrets!") + myjar.writestr(ZipInfo('second.txt',(1980,1,1,1,1,1)), "second secrets!") + + fc = JarHostCrawler() + jars = list(fc.crawl(root_dir=tmpdir)) + #jars = list(jar_utils.crawl_jar_files(root_dir=tmpdir)) + print jars + jar_feature = jars[0][1] + assert 'myfile.jar' == jar_feature.name + assert '48ac85a26ffa7ff5cefdd5c73a9fb888' == jar_feature.jarhash + assert ['ddc6eff37020aa858e26b1ba8a49ee0e', + 'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes + assert 'jar' == jars[0][2] + + except IOError as e: + print 'IOError' + finally: + os.remove(path) + os.umask(saved_umask) diff --git a/tests/unit/test_jar_utils.py b/tests/unit/test_jar_utils.py new file mode 100644 index 00000000..db121962 --- /dev/null +++ b/tests/unit/test_jar_utils.py @@ -0,0 +1,50 @@ +import unittest + +import os +import tempfile +from zipfile import ZipFile, ZipInfo + +from utils import jar_utils +from utils.features import JarFeature + +# +# https://security.openstack.org/guidelines/dg_using-temporary-files-securely.html +# + +class JarUtilsTests(unittest.TestCase): + + def setUp(self): + pass + + def tearDown(self): + pass + + def test_get_jar_features(self): + tmpdir = tempfile.mkdtemp() + jar_file_name = 'myfile.jar' + + # Ensure the file is read/write by the creator only + saved_umask = os.umask(0077) + + path = os.path.join(tmpdir, jar_file_name) + try: + with ZipFile(path, "w") as myjar: + myjar.writestr(ZipInfo('first.class',(1980,1,1,1,1,1)), "first secrets!") + myjar.writestr(ZipInfo('second.class',(1980,1,1,1,1,1)), "second secrets!") + myjar.writestr(ZipInfo('second.txt',(1980,1,1,1,1,1)), "second secrets!") + + jars = list(jar_utils.crawl_jar_files(root_dir=tmpdir)) + print jars + jar_feature = jars[0][1] + assert 'myfile.jar' == jar_feature.name + assert '48ac85a26ffa7ff5cefdd5c73a9fb888' == jar_feature.jarhash + assert ['ddc6eff37020aa858e26b1ba8a49ee0e', + 'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes + assert 'jar' == jars[0][2] + + except IOError as e: + print 'IOError' + finally: + os.remove(path) + os.umask(saved_umask) + os.rmdir(tmpdir) diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py index 4d191c00..224552e9 100644 --- a/tests/unit/test_plugins.py +++ b/tests/unit/test_plugins.py @@ -2,6 +2,14 @@ import unittest from collections import namedtuple +import os +import sys +import tempfile +from zipfile import ZipFile, ZipInfo + +from utils import jar_utils +sys.path.append('tests/unit/') + import mock from plugins.systems.config_container_crawler import ConfigContainerCrawler from plugins.systems.config_host_crawler import ConfigHostCrawler @@ -20,6 +28,8 @@ from plugins.systems.interface_container_crawler import InterfaceContainerCrawler from plugins.systems.interface_host_crawler import InterfaceHostCrawler from plugins.systems.interface_vm_crawler import InterfaceVmCrawler +from plugins.systems.jar_container_crawler import JarContainerCrawler +from plugins.systems.jar_host_crawler import JarHostCrawler from plugins.systems.load_container_crawler import LoadContainerCrawler from plugins.systems.load_host_crawler import LoadHostCrawler from plugins.systems.memory_container_crawler import MemoryContainerCrawler @@ -48,7 +58,8 @@ CpuFeature, InterfaceFeature, LoadFeature, - DockerPSFeature) + DockerPSFeature, + JarFeature) # for OUTVM psvmi @@ -605,6 +616,82 @@ def test_file_container_crawler(self, *args): assert args[2].call_count == 2 # isdir args[2].assert_called_with('/') + @mock.patch( + ("plugins.systems.jar_container_crawler." + "utils.dockerutils.exec_dockerinspect"), + side_effect=lambda long_id: {'State': {'Pid': 123}}) + @mock.patch( + ("plugins.systems.jar_container_crawler." + "run_as_another_namespace"), + side_effect=mocked_run_as_another_namespace) + def test_jar_container_crawler_plugin(self, *args): + tmpdir = tempfile.mkdtemp() + jar_file_name = 'myfile.jar' + + # Ensure the file is read/write by the creator only + saved_umask = os.umask(0077) + + path = os.path.join(tmpdir, jar_file_name) + try: + with ZipFile(path, "w") as myjar: + myjar.writestr(ZipInfo('first.class',(1980,1,1,1,1,1)), "first secrets!") + myjar.writestr(ZipInfo('second.class',(1980,1,1,1,1,1)), "second secrets!") + myjar.writestr(ZipInfo('second.txt',(1980,1,1,1,1,1)), "second secrets!") + + fc = JarContainerCrawler() + jars = list(fc.crawl(root_dir=tmpdir)) + #jars = list(jar_utils.crawl_jar_files(root_dir=tmpdir)) + print jars + jar_feature = jars[0][1] + assert 'myfile.jar' == jar_feature.name + assert '48ac85a26ffa7ff5cefdd5c73a9fb888' == jar_feature.jarhash + assert ['ddc6eff37020aa858e26b1ba8a49ee0e', + 'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes + assert 'jar' == jars[0][2] + + except IOError as e: + print 'IOError' + finally: + os.remove(path) + + + @mock.patch( + ("plugins.systems.jar_container_crawler." + "utils.dockerutils.exec_dockerinspect"), + side_effect=lambda long_id: {'State': {'Pid': 123}}) + @mock.patch( + ("plugins.systems.jar_container_crawler.utils.dockerutils." + "get_docker_container_rootfs_path"), + side_effect=lambda long_id: '/tmp') + def test_jar_container_crawler_avoidsetns(self, *args): + tmpdir = tempfile.mkdtemp() + jar_file_name = 'myfile.jar' + + # Ensure the file is read/write by the creator only + saved_umask = os.umask(0077) + + path = os.path.join(tmpdir, jar_file_name) + try: + with ZipFile(path, "w") as myjar: + myjar.writestr(ZipInfo('first.class',(1980,1,1,1,1,1)), "first secrets!") + myjar.writestr(ZipInfo('second.class',(1980,1,1,1,1,1)), "second secrets!") + myjar.writestr(ZipInfo('second.txt',(1980,1,1,1,1,1)), "second secrets!") + + fc = JarContainerCrawler() + jars = list(fc.crawl(root_dir=os.path.basename(tmpdir), avoid_setns=True)) + print jars + jar_feature = jars[0][1] + assert 'myfile.jar' == jar_feature.name + assert '48ac85a26ffa7ff5cefdd5c73a9fb888' == jar_feature.jarhash + assert ['ddc6eff37020aa858e26b1ba8a49ee0e', + 'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes + assert 'jar' == jars[0][2] + + except IOError as e: + print 'IOError' + finally: + os.remove(path) + @mock.patch( ("plugins.systems.file_container_crawler." "utils.dockerutils.exec_dockerinspect"),