Skip to content

Commit

Permalink
Added additional tests and various other tweaks.
Browse files Browse the repository at this point in the history
  • Loading branch information
justinlittman committed Mar 20, 2019
1 parent 01a02e1 commit 02661bb
Show file tree
Hide file tree
Showing 19 changed files with 564 additions and 136 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
name: run pylint
command: |
. venv/bin/activate
pylint *.py twarccloud
pylint *.py twarccloud tests
- store_artifacts:
path: test-reports
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ pip install pylint
then:
```
python -m unittest discover
pylint *.py twarccloud
pylint *.py twarccloud tests
```

Expand Down
75 changes: 75 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import unittest
import logging
from twarccloud.collection_config import CollectionConfig


class TestCase(unittest.TestCase):
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('twarc-cloud').setLevel(logging.DEBUG)


def extract_dict(changeset):
del changeset['change_timestamp']
return changeset


def timeline_config():
config = CollectionConfig(
{
'id': 'foo',
'type': 'user_timeline',
'keys': {
'consumer_key': 'mBbq9ruEckInQHUir8Kn0',
'consumer_secret': 'Pf28yReBUD90pLVOsb4r5ZnKCQ6xlOomBAjD5npFEQ6Rm',
'access_token': '481186914-5yIyfryJqcHV29YVL37BOzjseYuRzCLmwO6',
'access_token_secret': 'S51yY5Hjffts4WMKMgvGendxbZVsZO014Z38Tfvc'
},
'users': {
'481186914': {
'screen_name': 'justin_littman'
},
'6253282': {
'screen_name': 'twitterapi'
},
'12': {
'screen_name': 'jack',
'since_id': '12345'
}
}
})
return config


def filter_config():
config = CollectionConfig({
'id': 'foo',
'type': 'filter',
'keys': {
'consumer_key': 'mBbq9ruEckInQHUir8Kn0',
'consumer_secret': 'Pf28yReBUD90pLVOsb4r5ZnKCQ6xlOomBAjD5npFEQ6Rm',
'access_token': '481186914-5yIyfryJqcHV29YVL37BOzjseYuRzCLmwO6',
'access_token_secret': 'S51yY5Hjffts4WMKMgvGendxbZVsZO014Z38Tfvc'
},
'filter': {
'track': 'foo,#bar'
}
})
return config


def search_config():
config = CollectionConfig(
{
'id': 'foo',
'type': 'search',
'keys': {
'consumer_key': 'mBbq9ruEckInQHUir8Kn0',
'consumer_secret': 'Pf28yReBUD90pLVOsb4r5ZnKCQ6xlOomBAjD5npFEQ6Rm',
'access_token': '481186914-5yIyfryJqcHV29YVL37BOzjseYuRzCLmwO6',
'access_token_secret': 'S51yY5Hjffts4WMKMgvGendxbZVsZO014Z38Tfvc'
},
'search': {
'query': 'foo'
}
})
return config
103 changes: 103 additions & 0 deletions tests/test_tweet_harvester.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
from tempfile import mkdtemp
import shutil
import json
import os
import socket
from contextlib import closing
from unittest.mock import patch, MagicMock
from threading import Timer
import requests
from tweet_harvester import TweetHarvester
from twarccloud.harvester.twarc_thread import TwarcThread
from twarccloud.filepaths_helper import get_collection_config_filepath, get_harvest_file, get_changesets_path
from tests import TestCase, timeline_config


class TestTweetHarvester(TestCase):
def setUp(self):
self.collection_id = 'test_id'
self.collections_path = mkdtemp()
self.collection_config_filepath = get_collection_config_filepath(self.collection_id,
collections_path=self.collections_path)
self.write_collection_config()

def tearDown(self):
shutil.rmtree(self.collections_path, ignore_errors=True)

@patch('tweet_harvester.TwarcThread')
def test_harvest(self, mock_twarc_thread_class):
mock_twarc_thread = MagicMock(TwarcThread, exception=None)
mock_twarc_thread_class.return_value = mock_twarc_thread

harvester = TweetHarvester(self.collection_id, self.collections_path, shutdown=True, port=self.find_free_port())
# Make a change to changeset
harvester.changeset.update_user('screen_name', 'real_justin_littman', '481186914')
harvester.harvest()

# Test collection config written to harvest
harvest_collection_config_filepath = get_harvest_file(self.collection_id, harvester.harvest_timestamp,
'collection.json', collections_path=self.collections_path)
self.assertTrue(os.path.exists(harvest_collection_config_filepath))
harvest_collection_config = self.load_collection_config(harvest_collection_config_filepath)
self.assertFalse('consumer_secret' in harvest_collection_config['keys'])
self.assertFalse('access_token_secret' in harvest_collection_config['keys'])

# Test changeset
collection_config = self.load_collection_config(self.collection_config_filepath)
self.assertEqual('real_justin_littman', collection_config['users']['481186914']['screen_name'])
self.assertEqual(1, len(
os.listdir(get_changesets_path(self.collection_id, collections_path=self.collections_path))))

# Test events
self.assertTrue(harvester.stopped_event.is_set())
self.assertTrue(harvester.shutdown_event.is_set())

@patch('tweet_harvester.TwarcThread')
def test_harvest_exception(self, mock_twarc_thread_class):
mock_twarc_thread = MagicMock(TwarcThread, exception=Exception('Darn'))
mock_twarc_thread_class.return_value = mock_twarc_thread

harvester = TweetHarvester(self.collection_id, self.collections_path, shutdown=True, port=self.find_free_port())
with self.assertRaises(Exception):
harvester.harvest()

@patch('tweet_harvester.TwarcThread')
def test_harvest_without_shutdown(self, mock_twarc_thread_class):
mock_twarc_thread = MagicMock(TwarcThread, exception=None)
mock_twarc_thread_class.return_value = mock_twarc_thread

harvester = TweetHarvester(self.collection_id, self.collections_path, shutdown=False,
port=self.find_free_port())

def test_shutdown_timer():
self.assertFalse(harvester.shutdown_event.is_set())

Timer(.5, test_shutdown_timer).start()

def shutdown_timer():
requests.get('http://localhost:{}/shutdown'.format(harvester.port))

Timer(1, shutdown_timer).start()
harvester.harvest()

# Test events
self.assertTrue(harvester.stopped_event.is_set())
self.assertTrue(harvester.shutdown_event.is_set())

def write_collection_config(self):
os.makedirs(os.path.dirname(self.collection_config_filepath))
with open(self.collection_config_filepath, 'w') as file:
json.dump(timeline_config(), file)

@staticmethod
def load_collection_config(filepath):
with open(filepath) as file:
return json.load(file)

@staticmethod
def find_free_port():
# pylint: disable=no-member
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.bind(('', 0))
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
return sock.getsockname()[1]
68 changes: 0 additions & 68 deletions tests/twarccloud/__init__.py
Original file line number Diff line number Diff line change
@@ -1,68 +0,0 @@
from twarccloud.collection_config import CollectionConfig


def extract_dict(changeset):
del changeset['change_timestamp']
return changeset


def timeline_config():
config = CollectionConfig(
{
'id': 'foo',
'type': 'user_timeline',
'keys': {
'consumer_key': 'mBbq9ruEckInQHUir8Kn0',
'consumer_secret': 'Pf28yReBUD90pLVOsb4r5ZnKCQ6xlOomBAjD5npFEQ6Rm',
'access_token': '481186914-5yIyfryJqcHV29YVL37BOzjseYuRzCLmwO6',
'access_token_secret': 'S51yY5Hjffts4WMKMgvGendxbZVsZO014Z38Tfvc'
},
'users': {
'481186914': {
'screen_name': 'justin_littman'
},
'6253282': {
'screen_name': 'twitterapi'
},
'12': {
'screen_name': 'jack',
'since_id': '12345'
}
}
})
return config


def filter_config():
config = CollectionConfig({
'id': 'foo',
'type': 'filter',
'keys': {
'consumer_key': 'mBbq9ruEckInQHUir8Kn0',
'consumer_secret': 'Pf28yReBUD90pLVOsb4r5ZnKCQ6xlOomBAjD5npFEQ6Rm',
'access_token': '481186914-5yIyfryJqcHV29YVL37BOzjseYuRzCLmwO6',
'access_token_secret': 'S51yY5Hjffts4WMKMgvGendxbZVsZO014Z38Tfvc'
},
'filter': {
'track': 'foo,#bar'
}
})
return config


def search_config():
config = CollectionConfig(
{
'id': 'foo',
'type': 'search',
'keys': {
'consumer_key': 'mBbq9ruEckInQHUir8Kn0',
'consumer_secret': 'Pf28yReBUD90pLVOsb4r5ZnKCQ6xlOomBAjD5npFEQ6Rm',
'access_token': '481186914-5yIyfryJqcHV29YVL37BOzjseYuRzCLmwO6',
'access_token_secret': 'S51yY5Hjffts4WMKMgvGendxbZVsZO014Z38Tfvc'
},
'search': {
'query': 'foo'
}
})
return config
Empty file.
50 changes: 50 additions & 0 deletions tests/twarccloud/harvester/test_collection_lock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from tempfile import mkdtemp
import shutil
from datetime import datetime
from queue import Queue
import os
from twarccloud.harvester.collection_lock import CollectionLock, AddFile, DeleteFile, is_locked, assert_locked, \
LockedException
from twarccloud.filepaths_helper import get_lock_file, get_last_harvest_file
from tests import TestCase


class TestCollectionLock(TestCase):
def setUp(self):
self.collections_path = mkdtemp()
self.timestamp = datetime.utcnow()
self.file_queue = Queue()
self.collection_id = 'test_id'
self.lock_file = get_lock_file(self.collection_id, collections_path=self.collections_path)
self.last_harvest_file = get_last_harvest_file(self.collection_id, collections_path=self.collections_path)

def tearDown(self):
shutil.rmtree(self.collections_path, ignore_errors=True)

def test_lock(self):
with CollectionLock(self.collections_path, self.collection_id, self.file_queue,
harvest_timestamp=self.timestamp):
self.assertTrue(os.path.exists(self.lock_file))
self.assertQueuedFile(self.lock_file)

self.assertTrue(os.path.exists(self.last_harvest_file))
self.assertQueuedFile(self.last_harvest_file)
self.assertFalse(os.path.exists(get_lock_file(self.collection_id, collections_path=self.collections_path)))
self.assertQueuedFile(self.lock_file, is_add=False)

def test_is_locked(self):
self.assertFalse(is_locked(self.lock_file))
with CollectionLock(self.collections_path, self.collection_id, self.file_queue,
harvest_timestamp=self.timestamp):
self.assertTrue(is_locked(self.lock_file))
with self.assertRaises(LockedException):
assert_locked(self.lock_file)
self.assertFalse(is_locked(self.lock_file))
assert_locked(self.lock_file)

# pylint: disable=invalid-name
def assertQueuedFile(self, filepath, is_add=True):
queued_file = self.file_queue.get()
self.file_queue.task_done()
self.assertIsInstance(queued_file, AddFile if is_add else DeleteFile)
self.assertEqual(filepath, queued_file.filepath)
65 changes: 65 additions & 0 deletions tests/twarccloud/harvester/test_file_mover_thread.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from unittest.mock import patch, MagicMock
from tempfile import mkdtemp
from queue import Queue
import os
from twarccloud.filepaths_helper import get_collection_file
from twarccloud.harvester.file_mover_thread import S3FileMoverThread, AddFile, DeleteFile
from tests import TestCase


class TestS3FileMoverThread(TestCase):
def setUp(self):
self.collections_path = mkdtemp()
self.file_queue = Queue()
self.collection_id = 'test_id'
self.filepath = get_collection_file(self.collection_id, 'test.txt', collections_path=self.collections_path)
self.bucket = 'test_bucket'

def test_no_bucket(self):
with S3FileMoverThread(self.file_queue, self.collections_path, None):
os.makedirs(os.path.dirname(self.filepath))
with open(self.filepath, 'w') as file:
file.write('test')
self.file_queue.put(AddFile(self.filepath, True))
self.assertTrue(self.file_queue.empty())
self.assertTrue(os.path.exists(self.filepath))

@patch('twarccloud.harvester.file_mover_thread.aws_client')
def test_move(self, mock_aws_client_factory):
mock_aws_client = MagicMock()
mock_aws_client_factory.return_value = mock_aws_client
with S3FileMoverThread(self.file_queue, self.collections_path, self.bucket):
os.makedirs(os.path.dirname(self.filepath))
with open(self.filepath, 'w') as file:
file.write('test')
self.file_queue.put(AddFile(self.filepath, True))
self.assertTrue(self.file_queue.empty())
# File was deleted.
self.assertFalse(os.path.exists(self.filepath))
mock_aws_client.upload_file.assert_called_once_with(self.filepath, self.bucket,
get_collection_file(self.collection_id, 'test.txt'))

@patch('twarccloud.harvester.file_mover_thread.aws_client')
def test_move_without_delete(self, mock_aws_client_factory):
mock_aws_client = MagicMock()
mock_aws_client_factory.return_value = mock_aws_client
with S3FileMoverThread(self.file_queue, self.collections_path, self.bucket):
os.makedirs(os.path.dirname(self.filepath))
with open(self.filepath, 'w') as file:
file.write('test')
self.file_queue.put(AddFile(self.filepath, False))
self.assertTrue(self.file_queue.empty())
# File was not deleted.
self.assertTrue(os.path.exists(self.filepath))
mock_aws_client.upload_file.assert_called_once_with(self.filepath, self.bucket,
get_collection_file(self.collection_id, 'test.txt'))

@patch('twarccloud.harvester.file_mover_thread.aws_client')
def test_delete(self, mock_aws_client_factory):
mock_aws_client = MagicMock()
mock_aws_client_factory.return_value = mock_aws_client
with S3FileMoverThread(self.file_queue, self.collections_path, self.bucket):
self.file_queue.put(DeleteFile(self.filepath))
self.assertTrue(self.file_queue.empty())
mock_aws_client.delete_object.assert_called_once_with(Bucket=self.bucket,
Key=get_collection_file(self.collection_id, 'test.txt'))
Loading

0 comments on commit 02661bb

Please sign in to comment.