-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added additional tests and various other tweaks.
- Loading branch information
1 parent
01a02e1
commit 02661bb
Showing
19 changed files
with
564 additions
and
136 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import unittest | ||
import logging | ||
from twarccloud.collection_config import CollectionConfig | ||
|
||
|
||
class TestCase(unittest.TestCase): | ||
logging.basicConfig(level=logging.DEBUG) | ||
logging.getLogger('twarc-cloud').setLevel(logging.DEBUG) | ||
|
||
|
||
def extract_dict(changeset): | ||
del changeset['change_timestamp'] | ||
return changeset | ||
|
||
|
||
def timeline_config(): | ||
config = CollectionConfig( | ||
{ | ||
'id': 'foo', | ||
'type': 'user_timeline', | ||
'keys': { | ||
'consumer_key': 'mBbq9ruEckInQHUir8Kn0', | ||
'consumer_secret': 'Pf28yReBUD90pLVOsb4r5ZnKCQ6xlOomBAjD5npFEQ6Rm', | ||
'access_token': '481186914-5yIyfryJqcHV29YVL37BOzjseYuRzCLmwO6', | ||
'access_token_secret': 'S51yY5Hjffts4WMKMgvGendxbZVsZO014Z38Tfvc' | ||
}, | ||
'users': { | ||
'481186914': { | ||
'screen_name': 'justin_littman' | ||
}, | ||
'6253282': { | ||
'screen_name': 'twitterapi' | ||
}, | ||
'12': { | ||
'screen_name': 'jack', | ||
'since_id': '12345' | ||
} | ||
} | ||
}) | ||
return config | ||
|
||
|
||
def filter_config(): | ||
config = CollectionConfig({ | ||
'id': 'foo', | ||
'type': 'filter', | ||
'keys': { | ||
'consumer_key': 'mBbq9ruEckInQHUir8Kn0', | ||
'consumer_secret': 'Pf28yReBUD90pLVOsb4r5ZnKCQ6xlOomBAjD5npFEQ6Rm', | ||
'access_token': '481186914-5yIyfryJqcHV29YVL37BOzjseYuRzCLmwO6', | ||
'access_token_secret': 'S51yY5Hjffts4WMKMgvGendxbZVsZO014Z38Tfvc' | ||
}, | ||
'filter': { | ||
'track': 'foo,#bar' | ||
} | ||
}) | ||
return config | ||
|
||
|
||
def search_config(): | ||
config = CollectionConfig( | ||
{ | ||
'id': 'foo', | ||
'type': 'search', | ||
'keys': { | ||
'consumer_key': 'mBbq9ruEckInQHUir8Kn0', | ||
'consumer_secret': 'Pf28yReBUD90pLVOsb4r5ZnKCQ6xlOomBAjD5npFEQ6Rm', | ||
'access_token': '481186914-5yIyfryJqcHV29YVL37BOzjseYuRzCLmwO6', | ||
'access_token_secret': 'S51yY5Hjffts4WMKMgvGendxbZVsZO014Z38Tfvc' | ||
}, | ||
'search': { | ||
'query': 'foo' | ||
} | ||
}) | ||
return config |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
from tempfile import mkdtemp | ||
import shutil | ||
import json | ||
import os | ||
import socket | ||
from contextlib import closing | ||
from unittest.mock import patch, MagicMock | ||
from threading import Timer | ||
import requests | ||
from tweet_harvester import TweetHarvester | ||
from twarccloud.harvester.twarc_thread import TwarcThread | ||
from twarccloud.filepaths_helper import get_collection_config_filepath, get_harvest_file, get_changesets_path | ||
from tests import TestCase, timeline_config | ||
|
||
|
||
class TestTweetHarvester(TestCase): | ||
def setUp(self): | ||
self.collection_id = 'test_id' | ||
self.collections_path = mkdtemp() | ||
self.collection_config_filepath = get_collection_config_filepath(self.collection_id, | ||
collections_path=self.collections_path) | ||
self.write_collection_config() | ||
|
||
def tearDown(self): | ||
shutil.rmtree(self.collections_path, ignore_errors=True) | ||
|
||
@patch('tweet_harvester.TwarcThread') | ||
def test_harvest(self, mock_twarc_thread_class): | ||
mock_twarc_thread = MagicMock(TwarcThread, exception=None) | ||
mock_twarc_thread_class.return_value = mock_twarc_thread | ||
|
||
harvester = TweetHarvester(self.collection_id, self.collections_path, shutdown=True, port=self.find_free_port()) | ||
# Make a change to changeset | ||
harvester.changeset.update_user('screen_name', 'real_justin_littman', '481186914') | ||
harvester.harvest() | ||
|
||
# Test collection config written to harvest | ||
harvest_collection_config_filepath = get_harvest_file(self.collection_id, harvester.harvest_timestamp, | ||
'collection.json', collections_path=self.collections_path) | ||
self.assertTrue(os.path.exists(harvest_collection_config_filepath)) | ||
harvest_collection_config = self.load_collection_config(harvest_collection_config_filepath) | ||
self.assertFalse('consumer_secret' in harvest_collection_config['keys']) | ||
self.assertFalse('access_token_secret' in harvest_collection_config['keys']) | ||
|
||
# Test changeset | ||
collection_config = self.load_collection_config(self.collection_config_filepath) | ||
self.assertEqual('real_justin_littman', collection_config['users']['481186914']['screen_name']) | ||
self.assertEqual(1, len( | ||
os.listdir(get_changesets_path(self.collection_id, collections_path=self.collections_path)))) | ||
|
||
# Test events | ||
self.assertTrue(harvester.stopped_event.is_set()) | ||
self.assertTrue(harvester.shutdown_event.is_set()) | ||
|
||
@patch('tweet_harvester.TwarcThread') | ||
def test_harvest_exception(self, mock_twarc_thread_class): | ||
mock_twarc_thread = MagicMock(TwarcThread, exception=Exception('Darn')) | ||
mock_twarc_thread_class.return_value = mock_twarc_thread | ||
|
||
harvester = TweetHarvester(self.collection_id, self.collections_path, shutdown=True, port=self.find_free_port()) | ||
with self.assertRaises(Exception): | ||
harvester.harvest() | ||
|
||
@patch('tweet_harvester.TwarcThread') | ||
def test_harvest_without_shutdown(self, mock_twarc_thread_class): | ||
mock_twarc_thread = MagicMock(TwarcThread, exception=None) | ||
mock_twarc_thread_class.return_value = mock_twarc_thread | ||
|
||
harvester = TweetHarvester(self.collection_id, self.collections_path, shutdown=False, | ||
port=self.find_free_port()) | ||
|
||
def test_shutdown_timer(): | ||
self.assertFalse(harvester.shutdown_event.is_set()) | ||
|
||
Timer(.5, test_shutdown_timer).start() | ||
|
||
def shutdown_timer(): | ||
requests.get('http://localhost:{}/shutdown'.format(harvester.port)) | ||
|
||
Timer(1, shutdown_timer).start() | ||
harvester.harvest() | ||
|
||
# Test events | ||
self.assertTrue(harvester.stopped_event.is_set()) | ||
self.assertTrue(harvester.shutdown_event.is_set()) | ||
|
||
def write_collection_config(self): | ||
os.makedirs(os.path.dirname(self.collection_config_filepath)) | ||
with open(self.collection_config_filepath, 'w') as file: | ||
json.dump(timeline_config(), file) | ||
|
||
@staticmethod | ||
def load_collection_config(filepath): | ||
with open(filepath) as file: | ||
return json.load(file) | ||
|
||
@staticmethod | ||
def find_free_port(): | ||
# pylint: disable=no-member | ||
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: | ||
sock.bind(('', 0)) | ||
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) | ||
return sock.getsockname()[1] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,68 +0,0 @@ | ||
from twarccloud.collection_config import CollectionConfig | ||
|
||
|
||
def extract_dict(changeset): | ||
del changeset['change_timestamp'] | ||
return changeset | ||
|
||
|
||
def timeline_config(): | ||
config = CollectionConfig( | ||
{ | ||
'id': 'foo', | ||
'type': 'user_timeline', | ||
'keys': { | ||
'consumer_key': 'mBbq9ruEckInQHUir8Kn0', | ||
'consumer_secret': 'Pf28yReBUD90pLVOsb4r5ZnKCQ6xlOomBAjD5npFEQ6Rm', | ||
'access_token': '481186914-5yIyfryJqcHV29YVL37BOzjseYuRzCLmwO6', | ||
'access_token_secret': 'S51yY5Hjffts4WMKMgvGendxbZVsZO014Z38Tfvc' | ||
}, | ||
'users': { | ||
'481186914': { | ||
'screen_name': 'justin_littman' | ||
}, | ||
'6253282': { | ||
'screen_name': 'twitterapi' | ||
}, | ||
'12': { | ||
'screen_name': 'jack', | ||
'since_id': '12345' | ||
} | ||
} | ||
}) | ||
return config | ||
|
||
|
||
def filter_config(): | ||
config = CollectionConfig({ | ||
'id': 'foo', | ||
'type': 'filter', | ||
'keys': { | ||
'consumer_key': 'mBbq9ruEckInQHUir8Kn0', | ||
'consumer_secret': 'Pf28yReBUD90pLVOsb4r5ZnKCQ6xlOomBAjD5npFEQ6Rm', | ||
'access_token': '481186914-5yIyfryJqcHV29YVL37BOzjseYuRzCLmwO6', | ||
'access_token_secret': 'S51yY5Hjffts4WMKMgvGendxbZVsZO014Z38Tfvc' | ||
}, | ||
'filter': { | ||
'track': 'foo,#bar' | ||
} | ||
}) | ||
return config | ||
|
||
|
||
def search_config(): | ||
config = CollectionConfig( | ||
{ | ||
'id': 'foo', | ||
'type': 'search', | ||
'keys': { | ||
'consumer_key': 'mBbq9ruEckInQHUir8Kn0', | ||
'consumer_secret': 'Pf28yReBUD90pLVOsb4r5ZnKCQ6xlOomBAjD5npFEQ6Rm', | ||
'access_token': '481186914-5yIyfryJqcHV29YVL37BOzjseYuRzCLmwO6', | ||
'access_token_secret': 'S51yY5Hjffts4WMKMgvGendxbZVsZO014Z38Tfvc' | ||
}, | ||
'search': { | ||
'query': 'foo' | ||
} | ||
}) | ||
return config | ||
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from tempfile import mkdtemp | ||
import shutil | ||
from datetime import datetime | ||
from queue import Queue | ||
import os | ||
from twarccloud.harvester.collection_lock import CollectionLock, AddFile, DeleteFile, is_locked, assert_locked, \ | ||
LockedException | ||
from twarccloud.filepaths_helper import get_lock_file, get_last_harvest_file | ||
from tests import TestCase | ||
|
||
|
||
class TestCollectionLock(TestCase): | ||
def setUp(self): | ||
self.collections_path = mkdtemp() | ||
self.timestamp = datetime.utcnow() | ||
self.file_queue = Queue() | ||
self.collection_id = 'test_id' | ||
self.lock_file = get_lock_file(self.collection_id, collections_path=self.collections_path) | ||
self.last_harvest_file = get_last_harvest_file(self.collection_id, collections_path=self.collections_path) | ||
|
||
def tearDown(self): | ||
shutil.rmtree(self.collections_path, ignore_errors=True) | ||
|
||
def test_lock(self): | ||
with CollectionLock(self.collections_path, self.collection_id, self.file_queue, | ||
harvest_timestamp=self.timestamp): | ||
self.assertTrue(os.path.exists(self.lock_file)) | ||
self.assertQueuedFile(self.lock_file) | ||
|
||
self.assertTrue(os.path.exists(self.last_harvest_file)) | ||
self.assertQueuedFile(self.last_harvest_file) | ||
self.assertFalse(os.path.exists(get_lock_file(self.collection_id, collections_path=self.collections_path))) | ||
self.assertQueuedFile(self.lock_file, is_add=False) | ||
|
||
def test_is_locked(self): | ||
self.assertFalse(is_locked(self.lock_file)) | ||
with CollectionLock(self.collections_path, self.collection_id, self.file_queue, | ||
harvest_timestamp=self.timestamp): | ||
self.assertTrue(is_locked(self.lock_file)) | ||
with self.assertRaises(LockedException): | ||
assert_locked(self.lock_file) | ||
self.assertFalse(is_locked(self.lock_file)) | ||
assert_locked(self.lock_file) | ||
|
||
# pylint: disable=invalid-name | ||
def assertQueuedFile(self, filepath, is_add=True): | ||
queued_file = self.file_queue.get() | ||
self.file_queue.task_done() | ||
self.assertIsInstance(queued_file, AddFile if is_add else DeleteFile) | ||
self.assertEqual(filepath, queued_file.filepath) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from unittest.mock import patch, MagicMock | ||
from tempfile import mkdtemp | ||
from queue import Queue | ||
import os | ||
from twarccloud.filepaths_helper import get_collection_file | ||
from twarccloud.harvester.file_mover_thread import S3FileMoverThread, AddFile, DeleteFile | ||
from tests import TestCase | ||
|
||
|
||
class TestS3FileMoverThread(TestCase): | ||
def setUp(self): | ||
self.collections_path = mkdtemp() | ||
self.file_queue = Queue() | ||
self.collection_id = 'test_id' | ||
self.filepath = get_collection_file(self.collection_id, 'test.txt', collections_path=self.collections_path) | ||
self.bucket = 'test_bucket' | ||
|
||
def test_no_bucket(self): | ||
with S3FileMoverThread(self.file_queue, self.collections_path, None): | ||
os.makedirs(os.path.dirname(self.filepath)) | ||
with open(self.filepath, 'w') as file: | ||
file.write('test') | ||
self.file_queue.put(AddFile(self.filepath, True)) | ||
self.assertTrue(self.file_queue.empty()) | ||
self.assertTrue(os.path.exists(self.filepath)) | ||
|
||
@patch('twarccloud.harvester.file_mover_thread.aws_client') | ||
def test_move(self, mock_aws_client_factory): | ||
mock_aws_client = MagicMock() | ||
mock_aws_client_factory.return_value = mock_aws_client | ||
with S3FileMoverThread(self.file_queue, self.collections_path, self.bucket): | ||
os.makedirs(os.path.dirname(self.filepath)) | ||
with open(self.filepath, 'w') as file: | ||
file.write('test') | ||
self.file_queue.put(AddFile(self.filepath, True)) | ||
self.assertTrue(self.file_queue.empty()) | ||
# File was deleted. | ||
self.assertFalse(os.path.exists(self.filepath)) | ||
mock_aws_client.upload_file.assert_called_once_with(self.filepath, self.bucket, | ||
get_collection_file(self.collection_id, 'test.txt')) | ||
|
||
@patch('twarccloud.harvester.file_mover_thread.aws_client') | ||
def test_move_without_delete(self, mock_aws_client_factory): | ||
mock_aws_client = MagicMock() | ||
mock_aws_client_factory.return_value = mock_aws_client | ||
with S3FileMoverThread(self.file_queue, self.collections_path, self.bucket): | ||
os.makedirs(os.path.dirname(self.filepath)) | ||
with open(self.filepath, 'w') as file: | ||
file.write('test') | ||
self.file_queue.put(AddFile(self.filepath, False)) | ||
self.assertTrue(self.file_queue.empty()) | ||
# File was not deleted. | ||
self.assertTrue(os.path.exists(self.filepath)) | ||
mock_aws_client.upload_file.assert_called_once_with(self.filepath, self.bucket, | ||
get_collection_file(self.collection_id, 'test.txt')) | ||
|
||
@patch('twarccloud.harvester.file_mover_thread.aws_client') | ||
def test_delete(self, mock_aws_client_factory): | ||
mock_aws_client = MagicMock() | ||
mock_aws_client_factory.return_value = mock_aws_client | ||
with S3FileMoverThread(self.file_queue, self.collections_path, self.bucket): | ||
self.file_queue.put(DeleteFile(self.filepath)) | ||
self.assertTrue(self.file_queue.empty()) | ||
mock_aws_client.delete_object.assert_called_once_with(Bucket=self.bucket, | ||
Key=get_collection_file(self.collection_id, 'test.txt')) |
Oops, something went wrong.