diff --git a/docs/administration.md b/docs/administration.md index 04b1f0b..a7cc691 100644 --- a/docs/administration.md +++ b/docs/administration.md @@ -4,7 +4,12 @@ To prevent multiple harvests being performed concurrently for a collection, a lock file (`lock.json`) is written to a collection's base directory during a harvest. Harvesters check to see if the lock file is present before beginning. -If a harvest fails, it is possible that the lock file is not removed. To force it to be removed, you can delete `lock.json` +If a harvest raises a `LockedException` this indicates that a harvest is currently in process or a previous harvest +exited uncleanly. + +If a collection is locked because multiple harvests are attempting to run concurrently then adjust the schedule. + +If a collection is locked because a previous harvest exited uncleanly, then force it be unlocked. To unlock, delete `lock.json` or execute `tweet_harvester`'s `aws unlock` command. For example: $ python3 tweet_harvester.py aws unlock twarc_cloud test_collection diff --git a/twarccloud/harvester/collection_lock.py b/twarccloud/harvester/collection_lock.py index 169fd6d..648626a 100644 --- a/twarccloud/harvester/collection_lock.py +++ b/twarccloud/harvester/collection_lock.py @@ -49,7 +49,9 @@ def __exit__(self, *args): class LockedException(Exception): - pass + def __init__(self): + Exception.__init__(self, 'Collection is locked. This is because a harvest is currently running or a harvest ' \ + 'terminated uncleanly.') # Returns True if lock file exists at the provided filepath.