From 2c596afe953526f467c0b16921f1eafd918a21e7 Mon Sep 17 00:00:00 2001 From: Michiya Takahashi Date: Tue, 27 Jan 2015 16:39:00 +0900 Subject: [PATCH] add zip_compression option for the blob file handler --- README.rst | 9 +++- azure_storage_logging/handlers.py | 32 ++++++++--- setup.py | 2 +- tests/tests.py | 90 ++++++++++++++++++++++++++----- 4 files changed, 110 insertions(+), 23 deletions(-) diff --git a/README.rst b/README.rst index 73ec5ce..f7295c0 100644 --- a/README.rst +++ b/README.rst @@ -13,7 +13,7 @@ the standard Python logging APIs to Microsoft Azure Storage. Dependencies ------------ -* azure +* azure 0.9 or newer Installation ------------ @@ -198,7 +198,7 @@ The **BlobStorageTimedRotatingFileHandler** class is a subclass of of log files and storing the outdated log files to the specified container of Azure blob storage at certain timed intervals. -* *class* azure_storage_logging.handlers.BlobStorageTimedRotatingFileHandler(*filename, when='h', interval=1, encoding=None, delay=False, utc=False, account_name=None, account_key=None, protocol='https', container='logs'*) +* *class* azure_storage_logging.handlers.BlobStorageTimedRotatingFileHandler(*filename, when='h', interval=1, encoding=None, delay=False, utc=False, account_name=None, account_key=None, protocol='https', container='logs', zip_compression=False*) Returns a new instance of the **BlobStorageTimedRotatingFileHandler** class. The instance is initialized with the name and the key of your @@ -221,6 +221,10 @@ Azure blob storage at certain timed intervals. Azure Storage and your application, ``http`` and ``https`` are supported. + The *zip_compression* specifies the necessity for compressing + every outdated log file in zip format before putting it in + the container. + The only two formatters ``%(hostname)s`` and ``%(process)d`` are acceptable as a part of the *filename* or the *container*. You can save log files in a blob container dedicated to each host or process by @@ -281,6 +285,7 @@ three different types of storage from the logger: 'when': 'D', 'interval': 1, 'container': 'logs-%(hostname)s', + 'zip_compression': False, }, 'queue': { 'account_name': 'mystorageaccountname', diff --git a/azure_storage_logging/handlers.py b/azure_storage_logging/handlers.py index b876c75..d8072a4 100644 --- a/azure_storage_logging/handlers.py +++ b/azure_storage_logging/handlers.py @@ -1,4 +1,4 @@ -# Copyright 2013-2014 Michiya Takahashi +# Copyright 2013-2015 Michiya Takahashi # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,6 +18,8 @@ from base64 import b64encode from logging.handlers import TimedRotatingFileHandler from socket import gethostname +from tempfile import mkstemp +from zipfile import ZIP_DEFLATED, ZipFile from azure.storage.blobservice import BlobService from azure.storage.queueservice import QueueService @@ -54,6 +56,7 @@ def __init__(self, account_key=None, protocol='https', container='logs', + zip_compression=False, ): hostname = gethostname() self.meta = {'hostname': hostname, 'process': os.getpid()} @@ -70,6 +73,7 @@ def __init__(self, self.meta['hostname'] = hostname.replace('_', '-') container = container % self.meta self.container = container.lower() + self.zip_compression = zip_compression self.meta['hostname'] = hostname def _put_log(self, dirName, fileName): @@ -79,13 +83,25 @@ def _put_log(self, dirName, fileName): if not self.container_created: self.service.create_container(self.container) self.container_created = True - with open(os.path.join(dirName, fileName), mode='rb') as f: - self.service.put_blob(self.container, - fileName, - f.read(), - 'BlockBlob', - x_ms_blob_content_type='text/plain', - ) + fd, tmpfile_path = None, '' + try: + file_path = os.path.join(dirName, fileName) + if self.zip_compression: + suffix, content_type = '.zip', 'application/zip' + fd, tmpfile_path = mkstemp(suffix=suffix) + with os.fdopen(fd, 'wb') as f: + with ZipFile(f, 'w', ZIP_DEFLATED) as z: + z.write(file_path, arcname=fileName) + file_path = tmpfile_path + else: + suffix, content_type = '', 'text/plain' + self.service.put_block_blob_from_path(self.container, + fileName + suffix, + file_path, + x_ms_blob_content_type=content_type) + finally: + if self.zip_compression and fd: + os.remove(tmpfile_path) def emit(self, record): """ diff --git a/setup.py b/setup.py index 4c3ecfd..3316d6b 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ license='Apache License 2.0', packages=['azure_storage_logging'], install_requires=[ - 'azure', + 'azure>=0.9', ], classifiers=CLASSIFIERS, keywords='azure logging', diff --git a/tests/tests.py b/tests/tests.py index a7f8b75..f922d9e 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -4,6 +4,7 @@ import sys import time import unittest +import zipfile from base64 import b64encode from datetime import datetime @@ -56,18 +57,31 @@ }, 'handlers': { # BlobStorageTimedFileRotatingHandlerTest - 'file': { + 'rotation': { 'account_name': ACCOUNT_NAME, 'account_key': ACCOUNT_KEY, 'protocol': 'https', 'level': 'DEBUG', 'class': 'azure_storage_logging.handlers.BlobStorageTimedRotatingFileHandler', 'formatter': 'verbose', - 'filename': os.path.join(_LOGFILE_TMPDIR, 'test.log'), + 'filename': os.path.join(_LOGFILE_TMPDIR, 'rotation.log'), 'when': 'S', - 'interval': 10, + 'interval': 10, # assumes that the test begins within the interval time 'container': 'logs-%s' % gethostname(), }, + 'zip_compression': { + 'account_name': ACCOUNT_NAME, + 'account_key': ACCOUNT_KEY, + 'protocol': 'https', + 'level': 'DEBUG', + 'class': 'azure_storage_logging.handlers.BlobStorageTimedRotatingFileHandler', + 'formatter': 'verbose', + 'filename': os.path.join(_LOGFILE_TMPDIR, 'zip_compression.log'), + 'when': 'S', + 'interval': 30, # assumes that the test begins within the interval time + 'container': 'logs-%s' % gethostname(), + 'zip_compression': True, + }, # QueueStorageHandlerTest 'queue': { 'account_name': ACCOUNT_NAME, @@ -161,8 +175,12 @@ }, 'loggers': { # BlobStorageTimedFileRotatingHandlerTest - 'file': { - 'handlers': ['file'], + 'rotation': { + 'handlers': ['rotation'], + 'level': 'DEBUG', + }, + 'zip_compression': { + 'handlers': ['zip_compression'], 'level': 'DEBUG', }, # QueueStorageHandlerTest @@ -240,10 +258,10 @@ def _get_container_name(self, handler_name): container = container.replace('_', '-').lower() return container - def _get_interval_in_second(self): + def _get_interval_in_second(self, handler_name): options = {'S': 1, 'M': 60, 'H': 3600, 'D': 86400 } - seconds = options[_get_handler_config_value('file', 'when')] - return int(_get_handler_config_value('file', 'interval')) * seconds + seconds = options[_get_handler_config_value(handler_name, 'when')] + return int(_get_handler_config_value(handler_name, 'interval')) * seconds def setUp(self): self.service = BlobService(ACCOUNT_NAME, ACCOUNT_KEY) @@ -259,7 +277,7 @@ def setUp(self): def test_rotation(self): # get the logger for the test - logger_name = 'file' + logger_name = 'rotation' logger = logging.getLogger(logger_name) handler_name = _get_handler_name(logger_name) @@ -268,7 +286,7 @@ def test_rotation(self): logger.info(log_text_1) # perform logging again after the interval - time.sleep(self._get_interval_in_second()+5) + time.sleep(self._get_interval_in_second(handler_name)+5) log_text_2 = 'this will be the first line in the new log file.' logger.info(log_text_2) @@ -279,11 +297,59 @@ def test_rotation(self): blobs = iter(self.service.list_blobs(container, prefix=basename)) blob = next(blobs) self.assertTrue(blob.name.startswith(basename)) - #blob_text = self.service.get_blob_to_text(container, blob.name) + self.assertEqual(blob.properties.content_type, 'text/plain') blob_text = self.service.get_blob(container, blob.name) self.assertRegex(blob_text.decode('utf-8'), log_text_1) - # confirm that there's no more message in the queue + # confirm that there's no more blob in the container + with self.assertRaises(StopIteration): + next(blobs) + + # confirm that the current log file has correct logs + with open(filename, 'r') as f: + self.assertRegex(f.readline(), log_text_2) + + def test_zip_compression(self): + # get the logger for the test + logger_name = 'zip_compression' + logger = logging.getLogger(logger_name) + handler_name = _get_handler_name(logger_name) + + # perform logging + log_text_1 = 'this will be the last line in the compressed log file.' + logger.info(log_text_1) + + # perform logging again after the interval + time.sleep(self._get_interval_in_second(handler_name)+5) + log_text_2 = 'this will be the first line in the new log file.' + logger.info(log_text_2) + + # confirm that the outdated log file is saved in the container + container = self._get_container_name(handler_name) + filename = _get_handler_config_value(handler_name, 'filename') + basename = os.path.basename(filename) + blobs = iter(self.service.list_blobs(container, prefix=basename)) + blob = next(blobs) + self.assertTrue(blob.name.startswith(basename)) + self.assertTrue(blob.name.endswith('.zip')) + self.assertEqual(blob.properties.content_type, 'application/zip') + + # confirm that the blob is a zip file + zipfile_path = os.path.join(_LOGFILE_TMPDIR, blob.name) + self.service.get_blob_to_path(container, blob.name, zipfile_path) + self.assertTrue(zipfile.is_zipfile(zipfile_path)) + + # confirm that the zip file only has the rotated log file + extract_dir = mkdtemp(dir=_LOGFILE_TMPDIR) + with zipfile.ZipFile(zipfile_path, 'r') as z: + files = z.namelist() + self.assertEqual(len(files), 1) + self.assertEqual(files[0], blob.name.rpartition('.zip')[0]) + z.extractall(path=extract_dir) + with open(os.path.join(extract_dir, files[0]), 'r') as f: + self.assertRegex(f.readline(), log_text_1) + + # confirm that there's no more blob in the container with self.assertRaises(StopIteration): next(blobs)