Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Javahashes #341

Merged
merged 19 commits into from
Nov 7, 2017
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions crawler/plugins/systems/jar_container_crawler.plugin
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[Core]
Name = jar_container
Module = jar_container_crawler

[Documentation]
Author = IBM
Version = 0.1
Description = Produces md5 hashes for jar files and class files in jars
54 changes: 54 additions & 0 deletions crawler/plugins/systems/jar_container_crawler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import logging

import utils.dockerutils
import utils.misc
from icrawl_plugin import IContainerCrawler
from utils.jar_utils import crawl_jar_files
from utils.namespace import run_as_another_namespace

logger = logging.getLogger('crawlutils')


class JarContainerCrawler(IContainerCrawler):

def get_feature(self):
return 'jar'

def crawl(
self,
container_id=None,
avoid_setns=False,
root_dir='/',
exclude_dirs=[
'/boot',
'/dev',
'/proc',
'/sys',
'/mnt',
'/tmp',
'/var/cache',
'/usr/share/man',
'/usr/share/doc',
'/usr/share/mime'],
**kwargs):
inspect = utils.dockerutils.exec_dockerinspect(container_id)
state = inspect['State']
pid = str(state['Pid'])
logger.debug('Crawling jars for container %s' % container_id)

if avoid_setns:
rootfs_dir = utils.dockerutils.get_docker_container_rootfs_path(
container_id)
exclude_dirs = [utils.misc.join_abs_paths(rootfs_dir, d)
for d in exclude_dirs]
return crawl_jar_files(
root_dir=utils.misc.join_abs_paths(rootfs_dir, root_dir),
exclude_dirs=exclude_dirs,
root_dir_alias=root_dir)
else: # in all other cases, including wrong mode set
return run_as_another_namespace(pid,
['mnt'],
crawl_jar_files,
root_dir,
exclude_dirs,
None)
8 changes: 8 additions & 0 deletions crawler/plugins/systems/jar_host_crawler.plugin
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[Core]
Name = jar_host
Module = jar_host_crawler

[Documentation]
Author = IBM
Version = 0.1
Description = Produces md5 hashes for jar files and class files in jars
26 changes: 26 additions & 0 deletions crawler/plugins/systems/jar_host_crawler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from icrawl_plugin import IHostCrawler
from utils.jar_utils import crawl_jar_files


class JarHostCrawler(IHostCrawler):

def get_feature(self):
return 'jar'

def crawl(
self,
root_dir='/',
exclude_dirs=[
'/boot',
'/dev',
'/proc',
'/sys',
'/mnt',
'/tmp',
'/var/cache',
'/usr/share/man',
'/usr/share/doc',
'/usr/share/mime'],
**kwargs):
return crawl_jar_files(root_dir=root_dir,
exclude_dirs=exclude_dirs)
6 changes: 6 additions & 0 deletions crawler/utils/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,9 @@
'cpu_khz',
'cpu_cache_size_kb',
'cpu_num_cores'])
JarFeature = namedtuple('JarFeature', [
'name',
'path',
'jarhash',
'hashes',
])
92 changes: 92 additions & 0 deletions crawler/utils/jar_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import fnmatch
import logging
import os
import re
import hashlib
import zipfile

from utils.features import JarFeature

logger = logging.getLogger('crawlutils')


def crawl_jar_files(
root_dir='/',
exclude_dirs=[],
root_dir_alias=None,
accessed_since=0):

if not os.path.isdir(root_dir):
return

saved_args = locals()
logger.debug('crawl_jar_files: %s' % (saved_args))

assert os.path.isdir(root_dir)
if root_dir_alias is None:
root_dir_alias = root_dir
exclude_dirs = [os.path.join(root_dir, d) for d in
exclude_dirs]
exclude_regex = r'|'.join([fnmatch.translate(d)
for d in exclude_dirs]) or r'$.'

# walk the directory hierarchy starting at 'root_dir' in BFS
# order

for (root_dirpath, dirs, files) in os.walk(root_dir):
dirs[:] = [os.path.join(root_dirpath, d) for d in
dirs]
dirs[:] = [d for d in dirs
if not re.match(exclude_regex, d)]
files = [os.path.join(root_dirpath, f) for f in
files]
files = [f for f in files
if not re.match(exclude_regex, f)]

for fpath in files:
if not fpath.endswith('.jar'):
continue
feature = _crawl_jar_file(root_dir, fpath, root_dir_alias)
if feature:
yield (feature.path, feature, 'jar')


# crawl a single file
def _crawl_jar_file(
root_dir,
fpath,
root_dir_alias,
):
if not fpath.endswith('.jar'):
return

hashes = []
with zipfile.ZipFile(fpath, 'r') as zf:
for info in zf.infolist():
if not info.filename.endswith('.class'):
continue
data = zf.read(info.filename)
md = hashlib.md5()
md.update(data)
hashes.append(md.hexdigest())

# compute hash of jar file
with open(fpath, 'rb') as jarin:
md = hashlib.md5()
md.update(jarin.read())
jarhash = md.hexdigest()
# This replaces `/<root_dir>/a/b/c` with `/<root_dir_alias>/a/b/c`
frelpath = os.path.join(root_dir_alias,
os.path.relpath(fpath, root_dir))

# This converts something like `/.` to `/`

frelpath = os.path.normpath(frelpath)

(_, fname) = os.path.split(frelpath)
return JarFeature(
os.path.basename(fpath),
fpath,
jarhash,
hashes
)
2 changes: 1 addition & 1 deletion tests/functional/test_functional_dockerevents.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def testCrawlContainer2(self):
time.sleep(30)

subprocess.call(['/bin/chmod', '-R', '777', self.tempd])

files = os.listdir(self.tempd + '/out')
docker_server_version = self.docker.version()['Version']
if VERSION_SPEC.match(semantic_version.Version(_fix_version(docker_server_version))):
Expand Down
2 changes: 1 addition & 1 deletion tests/functional/test_logs_in_volumes1.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_get_logfiles_list(self, *args):
"docker_image_registry": "image_registry",
"owner_namespace": "owner_namespace",
"NetworkSettings": {}}

plugins_manager.runtime_env = None
self.docker_container = \
dockercontainer.DockerContainer(inspect['Id'], inspect)
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_app_apache.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def __init__(
):
ports = "[ {\"containerPort\" : \"80\"} ]"
self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels":
{"annotation.io.kubernetes.container.ports": ports}}}
{"annotation.io.kubernetes.container.ports": ports}}}


class MockedApacheContainer2(object):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_app_db2.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class MockedDB2Container1(object):
def __init__(self, container_id):
ports = "[ {\"containerPort\" : \"50000\"} ]"
self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels":
{"annotation.io.kubernetes.container.ports": ports}}}
{"annotation.io.kubernetes.container.ports": ports}}}


class MockedDB2Container2(object):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_app_liberty.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class MockedLibertyContainer1(object):
def __init__(self, container_id):
ports = "[ {\"containerPort\" : \"9443\"} ]"
self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels":
{"annotation.io.kubernetes.container.ports": ports}}}
{"annotation.io.kubernetes.container.ports": ports}}}


class MockedLibertyContainer2(object):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_app_nginx.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class MockedNginxContainer1(object):
def __init__(self, container_id):
ports = "[ {\"containerPort\" : \"80\"} ]"
self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels":
{"annotation.io.kubernetes.container.ports": ports}}}
{"annotation.io.kubernetes.container.ports": ports}}}


class MockedNginxContainer2(object):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_app_redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class MockedRedisContainer1(object):
def __init__(self, container_id):
ports = "[ {\"containerPort\" : \"6379\"} ]"
self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels":
{"annotation.io.kubernetes.container.ports": ports}}}
{"annotation.io.kubernetes.container.ports": ports}}}


class MockedRedisContainer2(object):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_app_tomcat.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class MockedTomcatContainer1(object):
def __init__(self, container_id):
ports = "[ {\"containerPort\" : \"8080\"} ]"
self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels":
{"annotation.io.kubernetes.container.ports": ports}}}
{"annotation.io.kubernetes.container.ports": ports}}}


class MockedTomcatContainer2(object):
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_emitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,10 @@ def mocked_get_sas_token():
return ('sas-token', 'cloudoe', 'access-group')

class RandomKafkaException(Exception):
pass
pass

def raise_value_error(*args, **kwargs):
raise ValueError()
raise ValueError()

def mock_call_with_retries(function, max_retries=10,
exception_type=Exception,
Expand Down
56 changes: 56 additions & 0 deletions tests/unit/test_jar_plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import unittest

import os
import sys
import tempfile
from zipfile import ZipFile, ZipInfo

from utils import jar_utils
from utils.features import JarFeature

#
# https://security.openstack.org/guidelines/dg_using-temporary-files-securely.html
#

sys.path.append('tests/unit/')
from plugins.systems.jar_host_crawler import JarHostCrawler
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please also test JarContainerCrawler class with a dummy container?



class GPUPluginTests(unittest.TestCase):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please rename this :)


def setUp(self):
pass

def tearDown(self):
pass

def test_jar_host_crawler_plugin(self, *args):
tmpdir = tempfile.mkdtemp()
jar_file_name = 'myfile.jar'

# Ensure the file is read/write by the creator only
saved_umask = os.umask(0077)

path = os.path.join(tmpdir, jar_file_name)
try:
with ZipFile(path, "w") as myjar:
myjar.writestr(ZipInfo('first.class',(1980,1,1,1,1,1)), "first secrets!")
myjar.writestr(ZipInfo('second.class',(1980,1,1,1,1,1)), "second secrets!")
myjar.writestr(ZipInfo('second.txt',(1980,1,1,1,1,1)), "second secrets!")

fc = JarHostCrawler()
jars = list(fc.crawl(root_dir=tmpdir))
#jars = list(jar_utils.crawl_jar_files(root_dir=tmpdir))
print jars
jar_feature = jars[0][1]
assert 'myfile.jar' == jar_feature.name
assert '48ac85a26ffa7ff5cefdd5c73a9fb888' == jar_feature.jarhash
assert ['ddc6eff37020aa858e26b1ba8a49ee0e',
'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes
assert 'jar' == jars[0][2]

except IOError as e:
print 'IOError'
finally:
os.remove(path)
os.umask(saved_umask)
50 changes: 50 additions & 0 deletions tests/unit/test_jar_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import unittest

import os
import tempfile
from zipfile import ZipFile, ZipInfo

from utils import jar_utils
from utils.features import JarFeature

#
# https://security.openstack.org/guidelines/dg_using-temporary-files-securely.html
#

class JarUtilsTests(unittest.TestCase):

def setUp(self):
pass

def tearDown(self):
pass

def test_get_jar_features(self):
tmpdir = tempfile.mkdtemp()
jar_file_name = 'myfile.jar'

# Ensure the file is read/write by the creator only
saved_umask = os.umask(0077)

path = os.path.join(tmpdir, jar_file_name)
try:
with ZipFile(path, "w") as myjar:
myjar.writestr(ZipInfo('first.class',(1980,1,1,1,1,1)), "first secrets!")
myjar.writestr(ZipInfo('second.class',(1980,1,1,1,1,1)), "second secrets!")
myjar.writestr(ZipInfo('second.txt',(1980,1,1,1,1,1)), "second secrets!")

jars = list(jar_utils.crawl_jar_files(root_dir=tmpdir))
print jars
jar_feature = jars[0][1]
assert 'myfile.jar' == jar_feature.name
assert '48ac85a26ffa7ff5cefdd5c73a9fb888' == jar_feature.jarhash
assert ['ddc6eff37020aa858e26b1ba8a49ee0e',
'cbe2a13eb99c1c8ac5f30d0a04f8c492'] == jar_feature.hashes
assert 'jar' == jars[0][2]

except IOError as e:
print 'IOError'
finally:
os.remove(path)
os.umask(saved_umask)
os.rmdir(tmpdir)