-
Notifications
You must be signed in to change notification settings - Fork 12
/
kmeans_dynamo_trigger.py
59 lines (49 loc) · 1.72 KB
/
kmeans_dynamo_trigger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import boto3
import json
from storage import DynamoTable
from storage.dynamo import dynamo_operator
def handler(event, context):
function_name = "lambda_core"
# dataset setting
dataset_name = 'higgs'
data_bucket = "higgs-10"
dataset_type = "dense_libsvm"
n_features = 30
tmp_table_name = "tmp-params"
merged_table_name = "merged-params"
key_col = "key"
# hyper-parameters
n_clusters = 10
n_epochs = 10
threshold = 0.0001
# training setting
sync_mode = "reduce" # reduce or reduce_scatter
n_workers = 10
# clear dynamodb table
dynamo_client = dynamo_operator.get_client()
tmp_tb = DynamoTable(dynamo_client, tmp_table_name)
merged_tb = DynamoTable(dynamo_client, tmp_table_name)
tmp_tb.clear(key_col)
merged_tb.clear(key_col)
# lambda payload
payload = dict()
payload['dataset'] = dataset_name
payload['data_bucket'] = data_bucket
payload['dataset_type'] = dataset_type
payload['n_features'] = n_features
payload['tmp_table_name'] = tmp_table_name
payload['merged_table_name'] = merged_table_name
payload['key_col'] = key_col
payload['n_clusters'] = n_clusters
payload['n_epochs'] = n_epochs
payload['threshold'] = threshold
payload['sync_mode'] = sync_mode
payload['n_workers'] = n_workers
# invoke functions
lambda_client = boto3.client('lambda')
for i in range(n_workers):
payload['worker_index'] = i
payload['file'] = '{}_{}'.format(i, n_workers)
lambda_client.invoke(FunctionName=function_name,
InvocationType='Event',
Payload=json.dumps(payload))