Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add backfill study function #14

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 75 additions & 2 deletions mano/sync/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,81 @@

spinner = itertools.cycle(['-', '/', '|', '\\'])

def backfill_study(
Keyring, study_id, output_dir, user_ids = None,
start_date=BACKFILL_START_DATE, data_streams=None, lock=None,
passphrase=None, backfill_window = BACKFILL_WINDOW
):
'''Backfill data for all users in a study

This function allows you to backfill for multiple users at a time. If no
user IDs are selected, the function attempts to download data for all users
on the server. If you use the defaults for start_date and backfill_window,
this function will run very slow. You can fix this by using a start_date
closer to what you think the minimum day for data is, and by increasing the
value of backfill_window. In addition, the function will run a lot faster
if you download only the data streams you need.

:param Keyring: Keyring dictionary
:type Keyring: dict
:param study_id: Study ID
:type study_id: str
:param output_dir: Directory to write raw data to
:type output_dir: str
:param user_ids: Subject IDs
:type user_ids: list
:param start_date: Earliest day to check for data on the server; YYYY-MM-DD
:type start_date: str
:param data_streams: Data streams to download
:type data_streams: list
:param lock: Data streams to lock, if desired
:type lock: list
:param passphrase: Passphrase to use to lock any locked data streams
:type passphrase: str
:param backfill_window: Number of days to attempt to download at once
:type backfill_window: int
:returns: Writes raw data to output_dir
:rtype: None'''
if user_ids is None:
logger.info('Obtaining list of users...')
num_tries = 0
# Put this inside a for loop to catch any errors due to internet connection
while num_tries < 5:
try:
user_ids = [u for u in mano.users(Keyring, study_id)]
num_tries = 6
except KeyboardInterrupt:
logger.info("Someone closed the program")
sys.exit()
except requests.exceptions.ChunkedEncodingError:
logger.warning(f'Network failed in obtaining list of users"'
f'", try {num_tries}')
num_tries = num_tries + 1
if user_ids is None: #the function never successfully ran.
logger.error("Unable to obtain user IDs from server")
return
for user_id in user_ids:
download_success = False
num_tries = 0
while not download_success:
try:
backfill(Keyring, study_id, user_id, output_dir, start_date,
data_streams, lock, passphrase, backfill_window)
download_success = True
except requests.exceptions.ChunkedEncodingError:
logger.warning(f'Network failed in download of {user_id}, try {num_tries}')
except KeyboardInterrupt:
logger.info("Someone closed the program")
sys.exit()
num_tries = num_tries + 1
if num_tries > 5:
download_success = True
logger.warning("Too many failures; skipping user %s", user_id)


def backfill(Keyring, study_id, user_id, output_dir, start_date=BACKFILL_START_DATE,
data_streams=None, lock=None, passphrase=None):
data_streams=None, lock=None, passphrase=None,
backfill_window = BACKFILL_WINDOW):
'''
Backfill a user (participant)
'''
Expand Down Expand Up @@ -58,7 +131,7 @@ def backfill(Keyring, study_id, user_id, output_dir, start_date=BACKFILL_START_D
timestamp = start_date
logger.debug('no backfill timestamp found, using: %s', timestamp)
# get download window and next resume point
start,stop,resume = _window(timestamp, BACKFILL_WINDOW)
start,stop,resume = _window(timestamp, backfill_window)
logger.info('processing window is [%s, %s]', start, stop)
# download window of data
archive = download(
Expand Down