Skip to content

Commit

Permalink
Javahashes (#341)
Browse files Browse the repository at this point in the history
* added jar plugin

Signed-off-by: Sastry Duri <[email protected]>
  • Loading branch information
sastryduri authored and sahilsuneja1 committed Nov 7, 2017
1 parent 7f10a45 commit 824a898
Show file tree
Hide file tree
Showing 18 changed files with 398 additions and 11 deletions.
8 changes: 8 additions & 0 deletions crawler/plugins/systems/jar_container_crawler.plugin
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[Core]
Name = jar_container
Module = jar_container_crawler

[Documentation]
Author = IBM
Version = 0.1
Description = Produces md5 hashes for jar files and class files in jars
54 changes: 54 additions & 0 deletions crawler/plugins/systems/jar_container_crawler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import logging

import utils.dockerutils
import utils.misc
from icrawl_plugin import IContainerCrawler
from utils.jar_utils import crawl_jar_files
from utils.namespace import run_as_another_namespace

logger = logging.getLogger('crawlutils')


class JarContainerCrawler(IContainerCrawler):

def get_feature(self):
return 'jar'

def crawl(
self,
container_id=None,
avoid_setns=False,
root_dir='/',
exclude_dirs=[
'/boot',
'/dev',
'/proc',
'/sys',
'/mnt',
'/tmp',
'/var/cache',
'/usr/share/man',
'/usr/share/doc',
'/usr/share/mime'],
**kwargs):
inspect = utils.dockerutils.exec_dockerinspect(container_id)
state = inspect['State']
pid = str(state['Pid'])
logger.debug('Crawling jars for container %s' % container_id)

if avoid_setns:
rootfs_dir = utils.dockerutils.get_docker_container_rootfs_path(
container_id)
exclude_dirs = [utils.misc.join_abs_paths(rootfs_dir, d)
for d in exclude_dirs]
return crawl_jar_files(
root_dir=utils.misc.join_abs_paths(rootfs_dir, root_dir),
exclude_dirs=exclude_dirs,
root_dir_alias=root_dir)
else: # in all other cases, including wrong mode set
return run_as_another_namespace(pid,
['mnt'],
crawl_jar_files,
root_dir,
exclude_dirs,
None)
8 changes: 8 additions & 0 deletions crawler/plugins/systems/jar_host_crawler.plugin
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[Core]
Name = jar_host
Module = jar_host_crawler

[Documentation]
Author = IBM
Version = 0.1
Description = Produces md5 hashes for jar files and class files in jars
26 changes: 26 additions & 0 deletions crawler/plugins/systems/jar_host_crawler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from icrawl_plugin import IHostCrawler
from utils.jar_utils import crawl_jar_files


class JarHostCrawler(IHostCrawler):

def get_feature(self):
return 'jar'

def crawl(
self,
root_dir='/',
exclude_dirs=[
'/boot',
'/dev',
'/proc',
'/sys',
'/mnt',
'/tmp',
'/var/cache',
'/usr/share/man',
'/usr/share/doc',
'/usr/share/mime'],
**kwargs):
return crawl_jar_files(root_dir=root_dir,
exclude_dirs=exclude_dirs)
6 changes: 6 additions & 0 deletions crawler/utils/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,9 @@
'cpu_khz',
'cpu_cache_size_kb',
'cpu_num_cores'])
JarFeature = namedtuple('JarFeature', [
'name',
'path',
'jarhash',
'hashes',
])
92 changes: 92 additions & 0 deletions crawler/utils/jar_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import fnmatch
import logging
import os
import re
import hashlib
import zipfile

from utils.features import JarFeature

logger = logging.getLogger('crawlutils')


def crawl_jar_files(
root_dir='/',
exclude_dirs=[],
root_dir_alias=None,
accessed_since=0):

if not os.path.isdir(root_dir):
return

saved_args = locals()
logger.debug('crawl_jar_files: %s' % (saved_args))

assert os.path.isdir(root_dir)
if root_dir_alias is None:
root_dir_alias = root_dir
exclude_dirs = [os.path.join(root_dir, d) for d in
exclude_dirs]
exclude_regex = r'|'.join([fnmatch.translate(d)
for d in exclude_dirs]) or r'$.'

# walk the directory hierarchy starting at 'root_dir' in BFS
# order

for (root_dirpath, dirs, files) in os.walk(root_dir):
dirs[:] = [os.path.join(root_dirpath, d) for d in
dirs]
dirs[:] = [d for d in dirs
if not re.match(exclude_regex, d)]
files = [os.path.join(root_dirpath, f) for f in
files]
files = [f for f in files
if not re.match(exclude_regex, f)]

for fpath in files:
if not fpath.endswith('.jar'):
continue
feature = _crawl_jar_file(root_dir, fpath, root_dir_alias)
if feature:
yield (feature.path, feature, 'jar')


# crawl a single file
def _crawl_jar_file(
root_dir,
fpath,
root_dir_alias,
):
if not fpath.endswith('.jar'):
return

hashes = []
with zipfile.ZipFile(fpath, 'r') as zf:
for info in zf.infolist():
if not info.filename.endswith('.class'):
continue
data = zf.read(info.filename)
md = hashlib.md5()
md.update(data)
hashes.append(md.hexdigest())

# compute hash of jar file
with open(fpath, 'rb') as jarin:
md = hashlib.md5()
md.update(jarin.read())
jarhash = md.hexdigest()
# This replaces `/<root_dir>/a/b/c` with `/<root_dir_alias>/a/b/c`
frelpath = os.path.join(root_dir_alias,
os.path.relpath(fpath, root_dir))

# This converts something like `/.` to `/`

frelpath = os.path.normpath(frelpath)

(_, fname) = os.path.split(frelpath)
return JarFeature(
os.path.basename(fpath),
fpath,
jarhash,
hashes
)
2 changes: 1 addition & 1 deletion tests/functional/test_functional_dockerevents.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def testCrawlContainer2(self):
time.sleep(30)

subprocess.call(['/bin/chmod', '-R', '777', self.tempd])

files = os.listdir(self.tempd + '/out')
docker_server_version = self.docker.version()['Version']
if VERSION_SPEC.match(semantic_version.Version(_fix_version(docker_server_version))):
Expand Down
2 changes: 1 addition & 1 deletion tests/functional/test_logs_in_volumes1.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_get_logfiles_list(self, *args):
"docker_image_registry": "image_registry",
"owner_namespace": "owner_namespace",
"NetworkSettings": {}}

plugins_manager.runtime_env = None
self.docker_container = \
dockercontainer.DockerContainer(inspect['Id'], inspect)
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_app_apache.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def __init__(
):
ports = "[ {\"containerPort\" : \"80\"} ]"
self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels":
{"annotation.io.kubernetes.container.ports": ports}}}
{"annotation.io.kubernetes.container.ports": ports}}}


class MockedApacheContainer2(object):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_app_db2.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class MockedDB2Container1(object):
def __init__(self, container_id):
ports = "[ {\"containerPort\" : \"50000\"} ]"
self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels":
{"annotation.io.kubernetes.container.ports": ports}}}
{"annotation.io.kubernetes.container.ports": ports}}}


class MockedDB2Container2(object):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_app_liberty.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class MockedLibertyContainer1(object):
def __init__(self, container_id):
ports = "[ {\"containerPort\" : \"9443\"} ]"
self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels":
{"annotation.io.kubernetes.container.ports": ports}}}
{"annotation.io.kubernetes.container.ports": ports}}}


class MockedLibertyContainer2(object):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_app_nginx.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class MockedNginxContainer1(object):
def __init__(self, container_id):
ports = "[ {\"containerPort\" : \"80\"} ]"
self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels":
{"annotation.io.kubernetes.container.ports": ports}}}
{"annotation.io.kubernetes.container.ports": ports}}}


class MockedNginxContainer2(object):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_app_redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class MockedRedisContainer1(object):
def __init__(self, container_id):
ports = "[ {\"containerPort\" : \"6379\"} ]"
self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels":
{"annotation.io.kubernetes.container.ports": ports}}}
{"annotation.io.kubernetes.container.ports": ports}}}


class MockedRedisContainer2(object):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_app_tomcat.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class MockedTomcatContainer1(object):
def __init__(self, container_id):
ports = "[ {\"containerPort\" : \"8080\"} ]"
self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels":
{"annotation.io.kubernetes.container.ports": ports}}}
{"annotation.io.kubernetes.container.ports": ports}}}


class MockedTomcatContainer2(object):
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_emitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,10 @@ def mocked_get_sas_token():
return ('sas-token', 'cloudoe', 'access-group')

class RandomKafkaException(Exception):
pass
pass

def raise_value_error(*args, **kwargs):
raise ValueError()
raise ValueError()

def mock_call_with_retries(function, max_retries=10,
exception_type=Exception,
Expand Down
56 changes: 56 additions & 0 deletions tests/unit/test_jar_plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import unittest

import os
import sys
import tempfile
from zipfile import ZipFile, ZipInfo

from utils import jar_utils
from utils.features import JarFeature

#
# https://security.openstack.org/guidelines/dg_using-temporary-files-securely.html
#

sys.path.append('tests/unit/')
from plugins.systems.jar_host_crawler import JarHostCrawler


class JarHashesPluginTests(unittest.TestCase):

def setUp(self):
pass

def tearDown(self):
pass

def test_jar_host_crawler_plugin(self, *args):
tmpdir = tempfile.mkdtemp()
jar_file_name = 'myfile.jar'

# Ensure the file is read/write by the creator only
saved_umask = os.umask(0077)

path = os.path.join(tmpdir, jar_file_name)
try:
with ZipFile(path, "w") as myjar:
myjar.writestr(ZipInfo('first.class',(1980,1,1,1,1,1)), "first secrets!")
myjar.writestr(ZipInfo('second.class',(1980,1,1,1,1,1)), "second secrets!")
myjar.writestr(ZipInfo('second.txt',(1980,1,1,1,1,1)), "second secrets!")

fc = JarHostCrawler()
jars = list(fc.crawl(root_dir=tmpdir))
#jars = list(jar_utils.crawl_jar_files(root_dir=tmpdir))
print jars
jar_feature = jars[0][1]
assert 'myfile.jar' == jar_feature.name
assert '48ac85a26ffa7ff5cefdd5c73a9fb888' == jar_feature.jarhash
assert ['ddc6eff37020aa858e26b1ba8a49ee0e',
'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes
assert 'jar' == jars[0][2]

except IOError as e:
print 'IOError'
finally:
os.remove(path)
os.umask(saved_umask)
50 changes: 50 additions & 0 deletions tests/unit/test_jar_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import unittest

import os
import tempfile
from zipfile import ZipFile, ZipInfo

from utils import jar_utils
from utils.features import JarFeature

#
# https://security.openstack.org/guidelines/dg_using-temporary-files-securely.html
#

class JarUtilsTests(unittest.TestCase):

def setUp(self):
pass

def tearDown(self):
pass

def test_get_jar_features(self):
tmpdir = tempfile.mkdtemp()
jar_file_name = 'myfile.jar'

# Ensure the file is read/write by the creator only
saved_umask = os.umask(0077)

path = os.path.join(tmpdir, jar_file_name)
try:
with ZipFile(path, "w") as myjar:
myjar.writestr(ZipInfo('first.class',(1980,1,1,1,1,1)), "first secrets!")
myjar.writestr(ZipInfo('second.class',(1980,1,1,1,1,1)), "second secrets!")
myjar.writestr(ZipInfo('second.txt',(1980,1,1,1,1,1)), "second secrets!")

jars = list(jar_utils.crawl_jar_files(root_dir=tmpdir))
print jars
jar_feature = jars[0][1]
assert 'myfile.jar' == jar_feature.name
assert '48ac85a26ffa7ff5cefdd5c73a9fb888' == jar_feature.jarhash
assert ['ddc6eff37020aa858e26b1ba8a49ee0e',
'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes
assert 'jar' == jars[0][2]

except IOError as e:
print 'IOError'
finally:
os.remove(path)
os.umask(saved_umask)
os.rmdir(tmpdir)
Loading

0 comments on commit 824a898

Please sign in to comment.