From 3789317bc1063c3e76bcf6203cc0a666ebb89116 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Sun, 2 Dec 2018 17:05:55 +0800
Subject: [PATCH 01/22] Improve model knob API with changes to model examples
 (lacking worker-side changes)

---
 examples/models/image_classification/SkDt.py  |  30 ++--
 examples/models/image_classification/SkSvm.py |  46 ++----
 .../image_classification/TfFeedForward.py     |  95 +++++-------
 .../models/image_classification/TfVgg16.py    |  50 +++----
 examples/models/pos_tagging/BigramHmm.py      |  12 +-
 examples/models/pos_tagging/PyBiLstm.py       |  72 +++------
 rafiki/advisor/btb_gp_advisor.py              |  58 ++++---
 rafiki/model/__init__.py                      |   1 +
 rafiki/model/knob.py                          | 118 +++++++++++++++
 rafiki/model/model.py                         | 141 +++++++++---------
 10 files changed, 324 insertions(+), 299 deletions(-)
 create mode 100644 rafiki/model/knob.py

diff --git a/examples/models/image_classification/SkDt.py b/examples/models/image_classification/SkDt.py
index eee91a4e..4f6d53ff 100644
--- a/examples/models/image_classification/SkDt.py
+++ b/examples/models/image_classification/SkDt.py
@@ -5,34 +5,28 @@
 import base64
 import numpy as np
 
-from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class
+from rafiki.config import APP_MODE
+from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class, \
+                        IntegerKnob, CategoricalKnob
 from rafiki.constants import TaskType, ModelDependency
 
 class SkDt(BaseModel):
     '''
     Implements a decision tree classifier on Scikit-Learn for simple image classification
     '''
-
-    def get_knob_config(self):
+    @staticmethod
+    def get_knob_config():
         return {
-            'knobs': {
-                'max_depth': {
-                    'type': 'int',
-                    'range': [2, 8]
-                },
-                'criterion': {
-                    'type': 'string',
-                    'values': ['gini', 'entropy']
-                },
-            }
+            'max_depth': IntegerKnob(2, 16 if APP_MODE != 'DEV' else 8),
+            'criterion': CategoricalKnob(['gini', 'entropy'])
         }
 
-    def init(self, knobs):
-        self._max_depth = knobs.get('max_depth') 
-        self._criterion = knobs.get('criterion') 
+    def __init__(self, **knobs):
+        super().__init__(**knobs)
+        self._knobs = knobs
         self._clf = self._build_classifier(
-            self._max_depth,
-            self._criterion
+            self._knobs.get('max_depth'),
+            self._knobs.get('criterion')
         )
         
     def train(self, dataset_uri):
diff --git a/examples/models/image_classification/SkSvm.py b/examples/models/image_classification/SkSvm.py
index 2a7162dc..39b86915 100644
--- a/examples/models/image_classification/SkSvm.py
+++ b/examples/models/image_classification/SkSvm.py
@@ -5,46 +5,32 @@
 import base64
 import numpy as np
 
-from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class
+from rafiki.config import APP_MODE
+from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class, \
+                        IntegerKnob, CategoricalKnob, FloatKnob
 from rafiki.constants import TaskType, ModelDependency
 
 class SkSvm(BaseModel):
     '''
     Implements a SVM on Scikit-Learn for simple image classification
     '''
-
-    def get_knob_config(self):
+    @staticmethod
+    def get_knob_config():
         return {
-            'knobs': {
-                'max_iter': {
-                    'type': 'int',
-                    'range': [10, 10]
-                },
-                'kernel': {
-                    'type': 'string',
-                    'values': ['rbf', 'linear']
-                },
-                'gamma': {
-                    'type': 'string',
-                    'values': ['scale', 'auto']
-                },
-                'C': {
-                    'type': 'float_exp',
-                    'range': [1e-2, 1e2]
-                }
-            }
+            'max_iter': IntegerKnob(10, 40 if APP_MODE != 'DEV' else 10),
+            'kernel': CategoricalKnob(['rbf', 'linear']),
+            'gamma': CategoricalKnob(['scale', 'auto']),
+            'C': FloatKnob(1e-2, 1e2, is_exp=True)
         }
 
-    def init(self, knobs):
-        self._max_iter = knobs.get('max_iter') 
-        self._kernel = knobs.get('kernel') 
-        self._gamma = knobs.get('gamma') 
-        self._C = knobs.get('C') 
+    def __init__(self, **knobs):
+        super().__init__(**knobs)
+        self._knobs = knobs
         self._clf = self._build_classifier(
-            self._max_iter,
-            self._kernel,
-            self._gamma,
-            self._C
+            knobs.get('max_iter'),
+            knobs.get('kernel'),
+            knobs.get('gamma') ,
+            knobs.get('C')
         )
         
     def train(self, dataset_uri):
diff --git a/examples/models/image_classification/TfFeedForward.py b/examples/models/image_classification/TfFeedForward.py
index a94bae34..b907c66b 100644
--- a/examples/models/image_classification/TfFeedForward.py
+++ b/examples/models/image_classification/TfFeedForward.py
@@ -8,7 +8,8 @@
 import base64
 
 from rafiki.config import APP_MODE
-from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class
+from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class, \
+                        IntegerKnob, CategoricalKnob, FloatKnob
 from rafiki.constants import TaskType, ModelDependency
 
 class TfFeedForward(BaseModel):
@@ -16,53 +17,20 @@ class TfFeedForward(BaseModel):
     Implements a fully-connected feed-forward neural network with variable hidden layers on Tensorflow 
     for simple image classification
     '''
-
-    def get_knob_config(self):
-        epochs_range = [3, 100]
-        hidden_layer_count_range = [1, 8]
-        
-        if APP_MODE == 'DEV':
-            print('WARNING: In DEV mode, `epochs` is set to 3 and `hidden_layer_count` is set to 2.')
-            epochs_range = [3, 3]
-            hidden_layer_count_range = [2, 2]
-
+    @staticmethod
+    def get_knob_config():
         return {
-            'knobs': {
-                'epochs': {
-                    'type': 'int',
-                    'range': epochs_range
-                },
-                'hidden_layer_count': {
-                    'type': 'int',
-                    'range': hidden_layer_count_range
-                },
-                'hidden_layer_units': {
-                    'type': 'int',
-                    'range': [2, 128]
-                },
-                'learning_rate': {
-                    'type': 'float_exp',
-                    'range': [1e-5, 1e-1]
-                },
-                'batch_size': {
-                    'type': 'int_cat',
-                    'values': [16, 32, 64, 128]
-                },
-                'image_size': {
-                    'type': 'int_cat',
-                    'values': [8, 16, 32]
-                }
-            }
+            'epochs': IntegerKnob(3, 10 if APP_MODE != 'DEV' else 3),
+            'hidden_layer_count': IntegerKnob(1, 8 if APP_MODE != 'DEV' else 2),
+            'hidden_layer_units': IntegerKnob(2, 128),
+            'learning_rate': FloatKnob(1e-5, 1e-1, is_exp=True),
+            'batch_size': CategoricalKnob([16, 32, 64, 128]),
+            'image_size': CategoricalKnob([8, 16, 32]),
         }
 
-    def init(self, knobs):
-        self._batch_size = knobs.get('batch_size')
-        self._hidden_layer_units = knobs.get('hidden_layer_units')
-        self._hidden_layer_count = knobs.get('hidden_layer_count')
-        self._learning_rate = knobs.get('learning_rate')
-        self._epochs = knobs.get('epochs')
-        self._image_size = knobs.get('image_size')
-
+    def __init__(self, **knobs):
+        super().__init__(**knobs)
+        self._knobs = knobs
         self._graph = tf.Graph()
         config = tf.ConfigProto()
         config.gpu_options.allow_growth = True
@@ -70,7 +38,11 @@ def init(self, knobs):
         self._define_plots()
         
     def train(self, dataset_uri):
-        dataset = self.utils.load_dataset_of_image_files(dataset_uri, image_size=[self._image_size, self._image_size])
+        im_sz = self._knobs.get('image_size')
+        bs = self._knobs.get('batch_size')
+        ep = self._knobs.get('epochs')
+
+        dataset = self.utils.load_dataset_of_image_files(dataset_uri, image_size=[im_sz, im_sz])
         num_classes = dataset.classes
         (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
         images = np.asarray(images)
@@ -85,8 +57,8 @@ def train(self, dataset_uri):
                     images, 
                     classes, 
                     verbose=0,
-                    epochs=self._epochs,
-                    batch_size=self._batch_size,
+                    epochs=ep,
+                    batch_size=bs,
                     callbacks=[
                         tf.keras.callbacks.LambdaCallback(on_epoch_end=self._on_train_epoch_end)
                     ]
@@ -98,7 +70,9 @@ def train(self, dataset_uri):
                 self.utils.log('Train accuracy: {}'.format(accuracy))
 
     def evaluate(self, dataset_uri):
-        dataset = self.utils.load_dataset_of_image_files(dataset_uri, image_size=[self._image_size, self._image_size])
+        im_sz = self._knobs.get('image_size')
+
+        dataset = self.utils.load_dataset_of_image_files(dataset_uri, image_size=[im_sz, im_sz])
         (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
         images = np.asarray(images)
         classes = np.asarray(classes)
@@ -111,7 +85,9 @@ def evaluate(self, dataset_uri):
         return accuracy
 
     def predict(self, queries):
-        X = self.utils.resize_as_images(queries, image_size=[self._image_size, self._image_size])
+        im_sz = self._knobs.get('image_size')
+
+        X = self.utils.resize_as_images(queries, image_size=[im_sz, im_sz])
         with self._graph.as_default():
             with self._sess.as_default():
                 probs = self._model.predict(X)
@@ -166,20 +142,17 @@ def _define_plots(self):
         self.utils.define_plot('Loss Over Time', ['loss'])
 
     def _build_model(self, num_classes):
-        hidden_layer_units = self._hidden_layer_units
-        hidden_layer_count = self._hidden_layer_count
-        learning_rate = self._learning_rate
-        image_size = self._image_size
+        units = self._knobs.get('hidden_layer_units')
+        layers = self._knobs.get('hidden_layer_count')
+        lr = self._knobs.get('learning_rate')
+        im_sz = self._knobs.get('image_size')
 
         model = keras.Sequential()
-        model.add(keras.layers.Flatten(input_shape=(image_size, image_size,)))
+        model.add(keras.layers.Flatten(input_shape=(im_sz, im_sz,)))
         model.add(keras.layers.BatchNormalization())
 
-        for _ in range(hidden_layer_count):
-            model.add(keras.layers.Dense(
-                hidden_layer_units,
-                activation=tf.nn.relu
-            ))
+        for _ in range(layers):
+            model.add(keras.layers.Dense(units, activation=tf.nn.relu))
 
         model.add(keras.layers.Dense(
             num_classes, 
@@ -187,7 +160,7 @@ def _build_model(self, num_classes):
         ))
         
         model.compile(
-            optimizer=keras.optimizers.Adam(lr=learning_rate),
+            optimizer=keras.optimizers.Adam(lr=lr),
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy']
         )
diff --git a/examples/models/image_classification/TfVgg16.py b/examples/models/image_classification/TfVgg16.py
index b783e35f..9a88bad1 100644
--- a/examples/models/image_classification/TfVgg16.py
+++ b/examples/models/image_classification/TfVgg16.py
@@ -8,7 +8,8 @@
 import abc
 from urllib.parse import urlparse, parse_qs 
 
-from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class
+from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class, \
+                        IntegerKnob, FloatKnob, CategoricalKnob
 from rafiki.constants import TaskType, ModelDependency
 from rafiki.config import APP_MODE
 
@@ -16,42 +17,26 @@ class TfVgg16(BaseModel):
     '''
     Implements VGG16 on Tensorflow for simple image classification
     '''
-
-    def get_knob_config(self):
-        epochs_range = [1, 20]
-        
-        if APP_MODE == 'DEV':
-            print('WARNING: In DEV mode, `epochs` is set to 1.')
-            epochs_range = [1, 1]
-
+    @staticmethod
+    def get_knob_config():
         return {
-            'knobs': {
-                'epochs': {
-                    'type': 'int',
-                    'range': epochs_range
-                },
-                'learning_rate': {
-                    'type': 'float_exp',
-                    'range': [1e-5, 1e-1]
-                },
-                'batch_size': {
-                    'type': 'int_cat',
-                    'values': [16, 32, 64, 128]
-                }
-            }
+            'epochs': IntegerKnob(1, 1 if APP_MODE != 'DEV' else 10),
+            'learning_rate': FloatKnob(1e-5, 1e-1, is_exp=True),
+            'batch_size': CategoricalKnob([16, 32, 64, 128]),
         }
 
-    def init(self, knobs):
-        self._batch_size = knobs.get('batch_size')
-        self._epochs = knobs.get('epochs')
-        self._learning_rate = knobs.get('learning_rate')
-
+    def __init__(self, **knobs):
+        super().__init__(**knobs)
+        self._knobs = knobs
         self._graph = tf.Graph()
         config = tf.ConfigProto()
         config.gpu_options.allow_growth = True
         self._sess = tf.Session(graph=self._graph, config=config)
 
     def train(self, dataset_uri):
+        ep = self._knobs.get('epochs')
+        bs = self._knobs.get('batch_size')
+
         dataset = self.utils.load_dataset_of_image_files(dataset_uri, image_size=[48, 48])
         num_classes = dataset.classes
         (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
@@ -65,8 +50,8 @@ def train(self, dataset_uri):
                 self._model.fit(
                     images, 
                     classes, 
-                    epochs=self._epochs, 
-                    batch_size=self._batch_size
+                    epochs=ep, 
+                    batch_size=bs
                 )
 
     def evaluate(self, dataset_uri):
@@ -129,7 +114,8 @@ def load_parameters(self, params):
                     self._model = keras.models.load_model(tmp.name)
                 
     def _build_model(self, num_classes):
-        learning_rate = self._learning_rate
+        lr = self._knobs.get('learning_rate')
+
         model = keras.applications.VGG16(
             include_top=True,
             input_shape=(48, 48, 3),
@@ -138,7 +124,7 @@ def _build_model(self, num_classes):
         )
 
         model.compile(
-            optimizer=keras.optimizers.Adam(lr=learning_rate),
+            optimizer=keras.optimizers.Adam(lr=lr),
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy']
         )
diff --git a/examples/models/pos_tagging/BigramHmm.py b/examples/models/pos_tagging/BigramHmm.py
index 219f3a07..b27efc12 100644
--- a/examples/models/pos_tagging/BigramHmm.py
+++ b/examples/models/pos_tagging/BigramHmm.py
@@ -18,14 +18,12 @@ class BigramHmm(BaseModel):
     '''
     Implements Bigram Hidden Markov Model (HMM) for POS tagging
     '''
+    @staticmethod
+    def get_knob_config():
+        return {}
 
-    def get_knob_config(self):
-        return {
-            'knobs': {}
-        }
-
-    def init(self, knobs):
-        pass
+    def __init__(self, **knobs):
+        super().__init__(**knobs)
 
     def train(self, dataset_uri):
         dataset = self.utils.load_dataset_of_corpus(dataset_uri)
diff --git a/examples/models/pos_tagging/PyBiLstm.py b/examples/models/pos_tagging/PyBiLstm.py
index c906021e..603fe9eb 100644
--- a/examples/models/pos_tagging/PyBiLstm.py
+++ b/examples/models/pos_tagging/PyBiLstm.py
@@ -12,7 +12,8 @@
 import torch.optim as optim
 from torch.utils.data.dataset import Dataset
 
-from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class
+from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class, \
+                        IntegerKnob, FloatKnob, CategoricalKnob
 from rafiki.constants import TaskType, ModelDependency
 from rafiki.config import APP_MODE
 
@@ -20,50 +21,20 @@ class PyBiLstm(BaseModel):
     '''
     Implements a Bidrectional LSTM model in Pytorch for POS tagging
     '''
-
-    def get_knob_config(self):
-        epochs_range = [10, 50]
-
-        if APP_MODE == 'DEV':
-            print('WARNING: In DEV mode, `epochs` is set to 10.')
-            epochs_range = [10, 10]
-        
+    @staticmethod
+    def get_knob_config():
         return {
-            'knobs': {
-                'epochs': {
-                    'type': 'int',
-                    'range': epochs_range
-                },
-                'batch_size': {
-                    'type': 'int_cat',
-                    'values': [16, 32, 64, 128]
-                },             
-                'word_embed_dims': {
-                    'type': 'int',
-                    'range': [16, 128]
-                },
-                'learning_rate': {
-                    'type': 'float_exp',
-                    'range': [1e-2, 1e-1]
-                },
-                'word_rnn_hidden_size': {
-                    'type': 'int',
-                    'range': [16, 128]
-                },
-                'word_dropout': {
-                    'type': 'float_exp',
-                    'range': [1e-3, 2e-1]
-                }
-            }
+            'epochs': IntegerKnob(10, 50 if APP_MODE != 'DEV' else 10),
+            'word_embed_dims': IntegerKnob(16, 128),
+            'word_rnn_hidden_size': IntegerKnob(16, 128),
+            'word_dropout': FloatKnob(1e-3, 2e-1, is_exp=True),
+            'learning_rate': FloatKnob(1e-2, 1e-1, is_exp=True),
+            'batch_size': CategoricalKnob([16, 32, 64, 128]),
         }
 
-    def init(self, knobs):
-        self._epochs = knobs.get('epochs')
-        self._word_embed_dims = knobs.get('word_embed_dims')
-        self._word_rnn_hidden_size = knobs.get('word_rnn_hidden_size')
-        self._word_dropout = knobs.get('word_dropout')
-        self._batch_size = knobs.get('batch_size')
-        self._learning_rate = knobs.get('learning_rate')
+    def __init__(self, **knobs):
+        super().__init__(**knobs)
+        self._knobs = knobs
         self._define_plots()
 
     def train(self, dataset_uri):
@@ -160,7 +131,7 @@ def _prepare_batch(self, dataset, lo, hi, Tensor, has_tags=True):
         return (words_tsr, tags_tsr)
 
     def _predict(self, dataset):
-        N = self._batch_size
+        N = self._knobs.get('batch_size')
         net = self._net
         B = math.ceil(len(dataset) / N) # No. of batches
         word_count = len(self._word_dict)
@@ -196,8 +167,8 @@ def _predict(self, dataset):
         return sents_pred_tags
 
     def _train(self, dataset):
-        N = self._batch_size
-        epochs = self._epochs
+        N = self._knobs.get('batch_size')
+        ep = self._knobs.get('epochs')
         null_tag = self._tag_count # Tag to ignore (from padding of sentences during batching)
         B = math.ceil(len(dataset) / N) # No. of batches
 
@@ -211,7 +182,7 @@ def _train(self, dataset):
 
         loss_func = nn.CrossEntropyLoss(ignore_index=null_tag)
 
-        for epoch in range(epochs):
+        for epoch in range(ep):
             total_loss = 0
             for i in range(B):
                 # Extract batch from dataset 
@@ -256,10 +227,15 @@ def _compute_accuracy(self, dataset, sents_tags):
         return correct / total
 
     def _create_model(self):
+        word_embed_dims = self._knobs.get('word_embed_dims')
+        word_rnn_hidden_size = self._knobs.get('word_rnn_hidden_size')
+        word_dropout = self._knobs.get('word_dropout')
+        lr = self._knobs.get('learning_rate')
+
         word_count = len(self._word_dict)
         net =  PyNet(word_count + 1, self._tag_count + 1, \
-                self._word_embed_dims, self._word_rnn_hidden_size, self._word_dropout)
-        optimizer = optim.Adam(net.parameters(), lr=self._learning_rate)
+                word_embed_dims, word_rnn_hidden_size, word_dropout)
+        optimizer = optim.Adam(net.parameters(), lr=lr)
         return (net, optimizer)
 
 class PyNet(nn.Module):
diff --git a/rafiki/advisor/btb_gp_advisor.py b/rafiki/advisor/btb_gp_advisor.py
index 95acc53b..ee02305a 100644
--- a/rafiki/advisor/btb_gp_advisor.py
+++ b/rafiki/advisor/btb_gp_advisor.py
@@ -1,6 +1,7 @@
 from btb.tuning import GP
 from btb import HyperParameter, ParamTypes
 
+from rafiki.model import BaseKnob, FloatKnob, IntegerKnob, CategoricalKnob
 from .advisor import BaseAdvisor
 
 class BtbGpAdvisor(BaseAdvisor):
@@ -9,8 +10,7 @@ class BtbGpAdvisor(BaseAdvisor):
     '''   
     def __init__(self, knob_config):
         # TODO: Support conditional knobs
-        knobs = knob_config['knobs']
-        tunables = self._get_tunables(knobs)
+        tunables = self._get_tunables(knob_config)
 
         # TODO: Allow configuration of tuner
         self._tuner = GP(tunables=tunables)
@@ -22,37 +22,31 @@ def propose(self):
     def feedback(self, knobs, score):
         self._tuner.add(knobs, score)
 
-    def _get_tunables(self, knobs):
+    def _get_tunables(self, knob_config):
         tunables = [
-            _knob_to_tunable(name, knob_config)
-                for (name, knob_config)
-                in knobs.items()
+            (name, _knob_to_tunable(x))
+                for (name, x)
+                in knob_config.items()
         ]
         return tunables
 
-_KNOB_TYPE_TO_TUNABLE_TYPE = {
-    'int': ParamTypes.INT,
-    'int_exp': ParamTypes.INT_EXP,
-    'int_cat': ParamTypes.INT_CAT,
-    'float': ParamTypes.FLOAT,
-    'float_exp': ParamTypes.FLOAT_EXP,
-    'float_cat': ParamTypes.FLOAT_CAT,
-    'string': ParamTypes.STRING,
-    'bool': ParamTypes.BOOL
-}
-
-_KNOB_CONFIG_TO_TUNABLE_RANGE = {
-    ParamTypes.INT: (lambda x: x['range']),
-    ParamTypes.INT_EXP: (lambda x: x['range']),
-    ParamTypes.INT_CAT: (lambda x: x['values']),
-    ParamTypes.FLOAT: (lambda x: x['range']),
-    ParamTypes.FLOAT_EXP: (lambda x: x['range']),
-    ParamTypes.FLOAT_CAT: (lambda x: x['values']),
-    ParamTypes.STRING: (lambda x: x['values']),
-    ParamTypes.BOOL: (lambda x: x['values'])
-}
-
-def _knob_to_tunable(name, knob_config):
-    tunable_type = _KNOB_TYPE_TO_TUNABLE_TYPE[knob_config['type']]
-    tunable_range = _KNOB_CONFIG_TO_TUNABLE_RANGE[tunable_type](knob_config)
-    return (name, HyperParameter(tunable_type, tunable_range))
\ No newline at end of file
+def _knob_to_tunable(knob):
+    if isinstance(knob, CategoricalKnob):
+        if knob.value_type is int:
+            return HyperParameter(ParamTypes.INT_CAT, knob.values)
+        elif knob.value_type is float:
+            return HyperParameter(ParamTypes.FLOAT_CAT, knob.values)
+        elif knob.value_type is str:
+            return HyperParameter(ParamTypes.STRING, knob.values)
+        elif knob.value_type is bool:
+            return HyperParameter(ParamTypes.BOOL, knob.values)
+    elif isinstance(knob, IntegerKnob):
+        if knob.is_exp:
+            return HyperParameter(ParamTypes.INT_EXP, [knob.value_min, knob.value_max])
+        else:
+            return HyperParameter(ParamTypes.INT, [knob.value_min, knob.value_max])
+    elif isinstance(knob, FloatKnob):
+        if knob.is_exp:
+            return HyperParameter(ParamTypes.FLOAT_EXP, [knob.value_min, knob.value_max])
+        else:
+            return HyperParameter(ParamTypes.FLOAT, [knob.value_min, knob.value_max])
\ No newline at end of file
diff --git a/rafiki/model/__init__.py b/rafiki/model/__init__.py
index 8dc86e14..d7af85de 100644
--- a/rafiki/model/__init__.py
+++ b/rafiki/model/__init__.py
@@ -2,3 +2,4 @@
     parse_model_install_command, InvalidModelClassException, InvalidModelParamsException, \
     ModelUtils
 from .log import ModelLogUtilsLogger
+from .knob import BaseKnob, CategoricalKnob, IntegerKnob, FloatKnob
\ No newline at end of file
diff --git a/rafiki/model/knob.py b/rafiki/model/knob.py
new file mode 100644
index 00000000..91b76920
--- /dev/null
+++ b/rafiki/model/knob.py
@@ -0,0 +1,118 @@
+import abc
+
+# TODO: Add documentation for each knob
+
+class BaseKnob(abc.ABC):
+    # TODO: Support conditional and validation logic
+    pass
+
+class CategoricalKnob(BaseKnob):
+    '''
+    Knob representing a categorical value of type `int`, `float`, `bool` or `str`.
+    A generated value of this knob must be an element of `values`.
+    '''
+
+    def __init__(self, values):
+        self._values = values
+        (self._value_type) = self._validate_values(values)
+
+    @property
+    def value_type(self):
+        return self._value_type
+
+    @property
+    def values(self):
+        return self._values
+
+    @staticmethod
+    def _validate_values(values):
+        if len(values) == 0:
+            raise ValueError('Length of `values` should at least 1')
+    
+        if isinstance(values[0], int):
+            value_type = int
+        elif isinstance(values[0], float):
+            value_type = float
+        elif isinstance(values[0], bool):
+            value_type = bool
+        elif isinstance(values[0], str):
+            value_type = str
+        else:
+            raise TypeError('Only the following types for `values` are supported: `int`, `float`, `bool`, `str`')
+        
+        if any([not isinstance(x, value_type) for x in values]):
+            raise TypeError('`values` should have elements of the same type')
+
+        return (value_type)
+
+class IntegerKnob(BaseKnob):
+    '''
+    Knob representing any `int` value within a specific interval (`value_min`, `value_max`).
+    `is_exp` specifies whether the knob value should be scaled exponentially.
+    '''
+
+    def __init__(self, value_min, value_max, is_exp=False):
+        self._validate_values(value_min, value_max)
+        self._value_min = value_min
+        self._value_max = value_max
+        self._is_exp = is_exp
+    
+    @property
+    def value_min(self):
+        return self._value_min
+
+    @property
+    def value_max(self):
+        return self._value_max
+    
+    @property
+    def is_exp(self):
+        return self._is_exp
+
+    @staticmethod
+    def _validate_values(value_min, value_max):
+        if not isinstance(value_min, int):
+            raise ValueError('`value_min` should be an `int`')
+        
+        if not isinstance(value_max, int):
+            raise ValueError('`value_max` should be an `int`')
+
+        if value_min > value_max:
+            raise ValueError('`value_max` should be at least `value_min`')
+        
+
+class FloatKnob(BaseKnob):
+    '''
+    Knob representing any `float` value within a specific interval (`value_min`, `value_max`).
+    `is_exp` specifies whether the knob value should be scaled exponentially.
+    '''
+
+    def __init__(self, value_min, value_max, is_exp=False):
+        self._validate_values(value_min, value_max)
+        self._value_min = value_min
+        self._value_max = value_max
+        self._is_exp = is_exp
+    
+    @property
+    def value_min(self):
+        return self._value_min
+
+    @property
+    def value_max(self):
+        return self._value_max
+    
+    @property
+    def is_exp(self):
+        return self._is_exp
+
+    @staticmethod
+    def _validate_values(value_min, value_max):
+        if not isinstance(value_min, float) and not isinstance(value_min, int):
+            raise ValueError('`value_min` should be a `float` or `int`')
+        
+        if not isinstance(value_max, float) and not isinstance(value_max, int):
+            raise ValueError('`value_max` should be a `float` or `int`')
+
+        if value_min > value_max:
+            raise ValueError('`value_max` should be at least `value_min`')
+        
\ No newline at end of file
diff --git a/rafiki/model/model.py b/rafiki/model/model.py
index b30b6286..9e12620c 100644
--- a/rafiki/model/model.py
+++ b/rafiki/model/model.py
@@ -5,6 +5,7 @@
 import pickle
 import uuid
 from importlib import import_module
+import inspect
 
 from rafiki.advisor import Advisor, AdvisorType
 from rafiki.predictor import ensemble_predictions
@@ -12,6 +13,7 @@
 
 from .dataset import ModelDatasetUtils
 from .log import ModelLogUtils
+from .knob import BaseKnob
 
 class InvalidModelClassException(Exception): pass
 class InvalidModelParamsException(Exception): pass
@@ -24,57 +26,33 @@ def __init__(self):
 class BaseModel(abc.ABC):
     '''
     Rafiki's base model class that Rafiki models should extend. 
-    Rafiki models should implement all abstract methods according to their associated tasks' specifications.
+    Rafiki models should implement all abstract methods according to their associated tasks' specifications,
+    including the static method `get_knob_config()`.
     '''   
 
-    def __init__(self):
+    def __init__(self, **knobs):
+        '''
+        Initialize a model instance with generated knob values. 
+        These knob values will be chosen by Rafiki based on the model's knob config.
+        Call `super().__init__(**knobs)` as the first line of the model's `__init__` method, 
+        followed by the model's initialization logic.
+
+        :param knobs: Dictionary of knob values for this model instance
+        :type knobs: dict[str, any]
+        '''
         self.utils = ModelUtils()
-        super().__init__()
 
-    @abc.abstractmethod
-    def get_knob_config(self):
+    @staticmethod
+    def get_knob_config():
         '''
-        Return a dictionary defining this model's knob configuration 
+        Return a dictionary defining this model class' knob configuration 
         (i.e. list of knob names, their data types and their ranges).
 
         :returns: Dictionary defining this model's knob configuration 
-        :rtype:
-            ::
-
-                {
-                    'knobs': {
-                        'hidden_layer_units': {
-                            'type': 'int',
-                            'range': [2, 128]
-                        },
-                        'epochs': {
-                            'type': 'int',
-                            'range': [1, 100]
-                        },
-                        'learning_rate': {
-                            'type': 'float_exp',
-                            'range': [1e-5, 1e-1]
-                        },
-                        'batch_size': {
-                            'type': 'int_cat',
-                            'values': [1, 2, 4, 8, 16, 32, 64, 128]
-                        }
-                    }
-                }
-            
+        :rtype: dict[str, rafiki.model.BaseKnob]
         '''
         raise NotImplementedError()
 
-    def init(self, knobs):
-        '''
-        Initialize the model with a dictionary of knob values. 
-        These knob values will be chosen by Rafiki based on the model's knob config.
-
-        :param knobs: Dictionary of knob values for this model instance
-        :type knobs: dict[str, any]
-        '''
-        pass
-
     @abc.abstractmethod
     def train(self, dataset_uri):
         '''
@@ -164,7 +142,9 @@ def test_model_class(model_file_path, model_class, task, dependencies, \
     :returns: The trained model
     '''
     try:
-        print('Testing model installation...')
+        _print_header('Installing & checking model dependencies...')
+        _check_dependencies(dependencies)
+
         # Test installation
         if not isinstance(dependencies, dict):
             raise Exception('`dependencies` should be a dict[str, str]')
@@ -173,30 +153,24 @@ def test_model_class(model_file_path, model_class, task, dependencies, \
         exit_code = os.system(install_command)
         if exit_code != 0: raise Exception('Error in installing model dependencies')
 
-        print('Testing loading of model...')
+        _print_header('Checking loading of model & model definition...')
         f = open(model_file_path, 'rb')
         model_file_bytes = f.read()
-        py_model_class = load_model_class(model_file_bytes, model_class)
-        model_inst = py_model_class()
-        if not isinstance(model_inst, BaseModel):
-            raise Exception('Model should extend `rafiki.model.BaseModel`')
-
-        knob_config = model_inst.get_knob_config()
-        if not isinstance(knob_config, dict):
-            raise Exception('`get_knob_config()` should return a dict[str, any]')
+        py_model_class = load_model_class(model_file_bytes, model_class, temp_mod_name='your-model-file-temp')
+        _check_model_class(py_model_class)
 
-        if 'knobs' not in knob_config:
-            raise Exception('`knob_config` should have a \'knobs\' key')
+        _print_header('Checking model knob configuration...')
+        knob_config = py_model_class.get_knob_config()
+        _check_knob_config(knob_config)
 
-        print('Checking model dependencies & methods...')
-        _check_dependencies(py_model_class, dependencies)
-        _check_methods(py_model_class)
-
-        print('Testing training & evaluation of model...')
+        _print_header('Checking model initialization...')
         advisor = Advisor(knob_config, advisor_type=AdvisorType.BTB_GP)
         if knobs is None: knobs = advisor.propose()
         print('Using knobs: {}'.format(knobs))
-        model_inst.init(knobs)
+        model_inst = py_model_class(**knobs)
+        _check_model_inst(model_inst)
+
+        _print_header('Checking training & evaluation of model...')
         model_inst.train(train_dataset_uri)
         score = model_inst.evaluate(test_dataset_uri)
 
@@ -205,7 +179,7 @@ def test_model_class(model_file_path, model_class, task, dependencies, \
 
         print('Score: {}'.format(score))
 
-        print('Testing dumping of parameters of model...')
+        _print_header('Checking dumping of parameters of model...')
         parameters = model_inst.dump_parameters()
 
         if not isinstance(parameters, dict):
@@ -218,13 +192,12 @@ def test_model_class(model_file_path, model_class, task, dependencies, \
             traceback.print_stack()
             raise Exception('`parameters` should be serializable by `pickle`')
 
-        print('Testing loading of parameters of model...')
+        _print_header('Checking loading of parameters of model...')
         model_inst.destroy()
-        model_inst = py_model_class()
-        model_inst.init(knobs)
+        model_inst = py_model_class(**knobs)
         model_inst.load_parameters(parameters)
 
-        print('Testing predictions with model...')
+        _print_header('Checking predictions with model...')
         print('Using queries: {}'.format(queries))
         predictions = model_inst.predict(queries)
 
@@ -239,15 +212,18 @@ def test_model_class(model_file_path, model_class, task, dependencies, \
         predictions = ensemble_predictions([predictions], task)
 
         print('Predictions: {}'.format(predictions))
-        print('The model definition is valid!')
+
+        _info('The model definition is valid!')
     
         return model_inst
 
     except Exception as e:
         raise InvalidModelClassException(e)
 
-def load_model_class(model_file_bytes, model_class):
-    temp_mod_name = str(uuid.uuid4())
+def load_model_class(model_file_bytes, model_class, temp_mod_name=None):
+    if temp_mod_name is None:
+        temp_mod_name = str(uuid.uuid4())
+
     temp_model_file_name ='{}.py'.format(temp_mod_name)
 
     # Temporarily save the model file to disk
@@ -290,7 +266,7 @@ def parse_model_install_command(dependencies, enable_gpu=False):
 
     return ' '.join(commands)
 
-def _check_dependencies(py_model_class, dependencies):
+def _check_dependencies(dependencies):
     for (dep, ver) in dependencies.items():
         # Warn that TF models need to cater for GPU sharing
         if dep == ModelDependency.TENSORFLOW:
@@ -302,15 +278,38 @@ def _check_dependencies(py_model_class, dependencies):
         elif dep == ModelDependency.KERAS:
             _warn('Keras models can enable GPU usage with by adding a `tensorflow` dependency.')
 
-def _check_methods(py_model_class):
-    model_inst = py_model_class()
-    if getattr(model_inst, 'get_predict_label_mapping', None) is not None:
+def _check_model_class(py_model_class):
+    if not issubclass(py_model_class, BaseModel):
+        raise Exception('Model should extend `rafiki.model.BaseModel`')
+
+    if inspect.isfunction(getattr(py_model_class, 'get_predict_label_mapping', None)):
         _warn('`get_predict_label_mapping` has been deprecated')
+    
+    if inspect.isfunction(getattr(py_model_class, 'init', None)):
+        _warn('`init` has been deprecated - use `__init__` for your model\'s initialization logic instead')
+
+    if inspect.isfunction(getattr(py_model_class, 'get_knob_config', None)) and \
+        not isinstance(py_model_class.__dict__.get('get_knob_config', None), staticmethod):
+        _warn('`get_knob_config` has been changed to a `@staticmethod`')
+
+def _check_model_inst(model_inst):
+    if getattr(model_inst, 'utils', None) is None:
+        raise Exception('`super().__init__(**knobs)` should be called as the first line of the model\'s `__init__` method.')
+
+def _check_knob_config(knob_config):
+    if not isinstance(knob_config, dict) or \
+        any([(not isinstance(name, str) or not isinstance(knob, BaseKnob)) for (name, knob) in knob_config.items()]):
+        raise Exception('Static method `get_knob_config()` should return a dict[str, BaseKnob]')
 
 def _info(msg):
     msg_color = '\033[94m'
     end_color = '\033[0m'
-    print('{}INFO: {}{}'.format(msg_color, msg, end_color))
+    print('{}{}{}'.format(msg_color, msg, end_color))
+
+def _print_header(msg):
+    print('-' * (len(msg) + 4))
+    print('| {} |'.format(msg))
+    print('-' * (len(msg) + 4))
 
 def _warn(msg):
     msg_color = '\033[93m'

From 7c4094297a865ce588033d9e4d4d4e824e3f0fd0 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Mon, 3 Dec 2018 12:38:15 +0800
Subject: [PATCH 02/22] Make worker-side changes for changes to knobs API

---
 dockerfiles/advisor.Dockerfile | 2 ++
 rafiki/model/knob.py           | 4 ++--
 rafiki/worker/inference.py     | 3 +--
 rafiki/worker/train.py         | 9 +++------
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/dockerfiles/advisor.Dockerfile b/dockerfiles/advisor.Dockerfile
index 5b17bb9a..e9ceda37 100644
--- a/dockerfiles/advisor.Dockerfile
+++ b/dockerfiles/advisor.Dockerfile
@@ -21,6 +21,8 @@ ENV PYTHONPATH $DOCKER_WORKDIR_PATH
 # Install python dependencies
 COPY rafiki/utils/requirements.txt utils/requirements.txt
 RUN pip install -r utils/requirements.txt
+RUN pip install -r model/requirements.txt
+COPY rafiki/container/requirements.txt container/requirements.txt
 COPY rafiki/advisor/requirements.txt advisor/requirements.txt
 RUN pip install -r advisor/requirements.txt
 
diff --git a/rafiki/model/knob.py b/rafiki/model/knob.py
index 91b76920..c7325bff 100644
--- a/rafiki/model/knob.py
+++ b/rafiki/model/knob.py
@@ -47,7 +47,7 @@ def _validate_values(values):
 
 class IntegerKnob(BaseKnob):
     '''
-    Knob representing any `int` value within a specific interval (`value_min`, `value_max`).
+    Knob representing any `int` value within a specific interval [`value_min`, `value_max`].
     `is_exp` specifies whether the knob value should be scaled exponentially.
     '''
 
@@ -83,7 +83,7 @@ def _validate_values(value_min, value_max):
 
 class FloatKnob(BaseKnob):
     '''
-    Knob representing any `float` value within a specific interval (`value_min`, `value_max`).
+    Knob representing any `float` value within a specific interval [`value_min`, `value_max`].
     `is_exp` specifies whether the knob value should be scaled exponentially.
     '''
 
diff --git a/rafiki/worker/inference.py b/rafiki/worker/inference.py
index ffa884e7..332e37ac 100644
--- a/rafiki/worker/inference.py
+++ b/rafiki/worker/inference.py
@@ -80,8 +80,7 @@ def _load_model(self, trial_id):
 
         # Load model based on trial
         clazz = load_model_class(model.model_file_bytes, model.model_class)
-        model_inst = clazz()
-        model_inst.init(trial.knobs)
+        model_inst = clazz(**trial.knobs)
 
         # Unpickle model parameters and load it
         parameters = pickle.loads(trial.parameters)
diff --git a/rafiki/worker/train.py b/rafiki/worker/train.py
index b22c7851..8ef2a4c5 100644
--- a/rafiki/worker/train.py
+++ b/rafiki/worker/train.py
@@ -133,15 +133,13 @@ def stop(self):
 
     def _train_and_evaluate_model(self, clazz, knobs, train_dataset_uri, 
                                     test_dataset_uri):
-        model_inst = clazz()
+        # Initialize model
+        model_inst = clazz(**knobs)
 
         # Insert model training logger
         model_logger = TrainModelLogUtilsLogger()
         model_inst.utils.set_logger(model_logger)
 
-        # Initialize model
-        model_inst.init(knobs)
-
         # Train model
         model_inst.train(train_dataset_uri)
 
@@ -189,8 +187,7 @@ def _stop_worker(self):
         
     def _create_advisor(self, clazz):
         # Retrieve knob config for model of worker 
-        model_inst = clazz()
-        knob_config = model_inst.get_knob_config()
+        knob_config = clazz.get_knob_config()
 
         # Create advisor associated with worker
         res = self._client.create_advisor(knob_config, advisor_id=self._service_id)

From ad4b25cd83d799ea91f0f650146f2117d6d01012 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Mon, 3 Dec 2018 12:38:37 +0800
Subject: [PATCH 03/22] Increment version no

---
 .env.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.env.sh b/.env.sh
index 4a14a676..15497c8f 100644
--- a/.env.sh
+++ b/.env.sh
@@ -1,6 +1,6 @@
 # Core configuration for Rafiki
 export DOCKER_NETWORK=rafiki
-export RAFIKI_VERSION=0.0.7
+export RAFIKI_VERSION=0.0.8
 export RAFIKI_IP_ADDRESS=127.0.0.1
 export ADMIN_EXT_PORT=3000
 export ADMIN_WEB_EXT_PORT=3001

From f819e88327ea4d1f7bd44432ef34c1130c37047d Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Mon, 3 Dec 2018 14:13:36 +0800
Subject: [PATCH 04/22] Improve docs on new `__init__` of BaseModel

---
 .../src/user/client-create-models.include.rst |  5 +--
 docs/src/user/creating-models.rst             |  7 +++--
 examples/models/image_classification/SkDt.py  |  4 +--
 examples/models/image_classification/SkSvm.py |  2 +-
 rafiki/model/model.py                         | 31 ++++++++++++-------
 5 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/docs/src/user/client-create-models.include.rst b/docs/src/user/client-create-models.include.rst
index cda9fd06..53b4472c 100644
--- a/docs/src/user/client-create-models.include.rst
+++ b/docs/src/user/client-create-models.include.rst
@@ -1,6 +1,7 @@
 
-To create a model, you will need to submit a model class that extends :class:`rafiki.model.BaseModel` in a single Python file,
-where the model's implementation conforms to a specific task (see :ref:`tasks`). 
+To create a model, you will need to submit a model class that conforms to the specification
+by :class:`rafiki.model.BaseModel`, written in a `single` Python file.
+The model's implementation should conform to a specific task (see :ref:`tasks`).
 
 Refer to the parameters of :meth:`rafiki.client.Client.create_model` for configuring how your model runs on Rafiki,
 and refer to :ref:`creating-models` to understand more about how to write & test models for Rafiki.
diff --git a/docs/src/user/creating-models.rst b/docs/src/user/creating-models.rst
index 894e5203..2aca6cb1 100644
--- a/docs/src/user/creating-models.rst
+++ b/docs/src/user/creating-models.rst
@@ -6,9 +6,10 @@ Creating Models
 
 .. contents:: Table of Contents
 
-
-To create a model on Rafiki, use the :meth:`rafiki.client.Client.create_model` method.
-
+To create a model, you will need to submit a model class that conforms to the specification
+by :class:`rafiki.model.BaseModel`, written in a `single` Python file.
+The model's implementation should conform to a specific task (see :ref:`tasks`).
+To submit the model to Rafiki, use the :meth:`rafiki.client.Client.create_model` method.
 
 Model Environment
 --------------------------------------------------------------------
diff --git a/examples/models/image_classification/SkDt.py b/examples/models/image_classification/SkDt.py
index 4f6d53ff..984922d0 100644
--- a/examples/models/image_classification/SkDt.py
+++ b/examples/models/image_classification/SkDt.py
@@ -25,8 +25,8 @@ def __init__(self, **knobs):
         super().__init__(**knobs)
         self._knobs = knobs
         self._clf = self._build_classifier(
-            self._knobs.get('max_depth'),
-            self._knobs.get('criterion')
+            knobs.get('max_depth'),
+            knobs.get('criterion')
         )
         
     def train(self, dataset_uri):
diff --git a/examples/models/image_classification/SkSvm.py b/examples/models/image_classification/SkSvm.py
index 39b86915..35b4b761 100644
--- a/examples/models/image_classification/SkSvm.py
+++ b/examples/models/image_classification/SkSvm.py
@@ -29,7 +29,7 @@ def __init__(self, **knobs):
         self._clf = self._build_classifier(
             knobs.get('max_iter'),
             knobs.get('kernel'),
-            knobs.get('gamma') ,
+            knobs.get('gamma'),
             knobs.get('C')
         )
         
diff --git a/rafiki/model/model.py b/rafiki/model/model.py
index 9e12620c..7610bf31 100644
--- a/rafiki/model/model.py
+++ b/rafiki/model/model.py
@@ -27,19 +27,28 @@ class BaseModel(abc.ABC):
     '''
     Rafiki's base model class that Rafiki models should extend. 
     Rafiki models should implement all abstract methods according to their associated tasks' specifications,
-    including the static method `get_knob_config()`.
-    '''   
+    together with the static method ``get_knob_config()``.
 
-    def __init__(self, **knobs):
-        '''
-        Initialize a model instance with generated knob values. 
-        These knob values will be chosen by Rafiki based on the model's knob config.
-        Call `super().__init__(**knobs)` as the first line of the model's `__init__` method, 
-        followed by the model's initialization logic.
+    In the model's ``__init__`` method, call ``super().__init__(**knobs)`` as the first line, 
+    followed by the model's initialization logic. The model should be initialize itself with ``knobs``, 
+    a set of generated knob values for the instance, and possibly save the knobs' values as 
+    attribute(s) of the model instance. These knob values will be chosen by Rafiki based on the model's knob config. 
+    
+    For example:
 
-        :param knobs: Dictionary of knob values for this model instance
-        :type knobs: dict[str, any]
-        '''
+    ::
+
+        def __init__(self, **knobs):
+            super().__init__(**knobs)
+            self.__dict__.update(knobs)
+            ...
+            self._build_model(self.knob1, self.knob2)
+
+
+    :param knobs: Dictionary of knob values for this model instance
+    :type knobs: dict[str, any]
+    '''   
+    def __init__(self, **knobs):
         self.utils = ModelUtils()
 
     @staticmethod

From f8a16ddf31a7180f83c35bad8fee5cd174c427c0 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Mon, 3 Dec 2018 16:23:41 +0800
Subject: [PATCH 05/22] Finish worker-side changes for model knob API update,
 with serialization & deserialization of knob classes

---
 dockerfiles/advisor.Dockerfile |  2 +-
 rafiki/advisor/app.py          |  8 ++++
 rafiki/client/client.py        |  6 +--
 rafiki/model/__init__.py       |  3 +-
 rafiki/model/knob.py           | 69 ++++++++++++++++++++++++++++------
 rafiki/model/model.py          |  6 ++-
 rafiki/worker/train.py         |  5 ++-
 scripts/start_worker.py        |  3 +-
 8 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/dockerfiles/advisor.Dockerfile b/dockerfiles/advisor.Dockerfile
index e9ceda37..dea42998 100644
--- a/dockerfiles/advisor.Dockerfile
+++ b/dockerfiles/advisor.Dockerfile
@@ -21,8 +21,8 @@ ENV PYTHONPATH $DOCKER_WORKDIR_PATH
 # Install python dependencies
 COPY rafiki/utils/requirements.txt utils/requirements.txt
 RUN pip install -r utils/requirements.txt
+COPY rafiki/model/requirements.txt model/requirements.txt
 RUN pip install -r model/requirements.txt
-COPY rafiki/container/requirements.txt container/requirements.txt
 COPY rafiki/advisor/requirements.txt advisor/requirements.txt
 RUN pip install -r advisor/requirements.txt
 
diff --git a/rafiki/advisor/app.py b/rafiki/advisor/app.py
index 915e08f8..72253281 100644
--- a/rafiki/advisor/app.py
+++ b/rafiki/advisor/app.py
@@ -1,7 +1,9 @@
 from flask import Flask, request, jsonify
 import os
 import traceback
+import json
 
+from rafiki.model import deserialize_knob_config
 from rafiki.constants import UserType
 from rafiki.config import SUPERADMIN_EMAIL, SUPERADMIN_PASSWORD
 from rafiki.utils.auth import generate_token, decode_token, UnauthorizedException, auth
@@ -40,6 +42,12 @@ def generate_user_token():
 @auth([UserType.ADMIN, UserType.APP_DEVELOPER])
 def create_advisor(auth):
     params = get_request_params()
+
+    # Deserialize knob config
+    if 'knob_config_str' in params:
+        params['knob_config'] = deserialize_knob_config(params['knob_config_str'])
+        del params['knob_config_str']
+
     return jsonify(service.create_advisor(**params))
 
 @app.route('/advisors/<advisor_id>/propose', methods=['POST'])
diff --git a/rafiki/client/client.py b/rafiki/client/client.py
index 8a609562..0d210f5f 100644
--- a/rafiki/client/client.py
+++ b/rafiki/client/client.py
@@ -365,19 +365,19 @@ def stop_inference_job(self, app, app_version=-1):
     # Advisors
     ####################################
 
-    def create_advisor(self, knob_config, advisor_id=None):
+    def create_advisor(self, knob_config_str, advisor_id=None):
         '''
         Creates a Rafiki advisor. If `advisor_id` is passed, it will create an advisor
         of that ID, or do nothing if an advisor of that ID has already been created.
 
         :param knob_config: Knob configuration for advisor session
-        :type knob_config: dict[str, any]
+        :type knob_config_str: Knob config, serialized
         :param str advisor_id: ID of advisor to create
         '''
         data = self._post('/advisors', target='advisor',
                             json={
                                 'advisor_id': advisor_id,
-                                'knob_config': knob_config
+                                'knob_config_str': knob_config_str
                             })
         return data
 
diff --git a/rafiki/model/__init__.py b/rafiki/model/__init__.py
index d7af85de..68f59c24 100644
--- a/rafiki/model/__init__.py
+++ b/rafiki/model/__init__.py
@@ -2,4 +2,5 @@
     parse_model_install_command, InvalidModelClassException, InvalidModelParamsException, \
     ModelUtils
 from .log import ModelLogUtilsLogger
-from .knob import BaseKnob, CategoricalKnob, IntegerKnob, FloatKnob
\ No newline at end of file
+from .knob import BaseKnob, CategoricalKnob, IntegerKnob, FloatKnob, \
+                    serialize_knob_config, deserialize_knob_config
\ No newline at end of file
diff --git a/rafiki/model/knob.py b/rafiki/model/knob.py
index c7325bff..7ab10d07 100644
--- a/rafiki/model/knob.py
+++ b/rafiki/model/knob.py
@@ -1,18 +1,46 @@
 import abc
-
-# TODO: Add documentation for each knob
+import json
 
 class BaseKnob(abc.ABC):
+    '''
+    The base class for a knob type.
+    '''
+
     # TODO: Support conditional and validation logic
-    pass
+
+    def __init__(self, knob_args={}):
+        self._knob_args = knob_args
+
+    def to_json(self):
+        return json.dumps({
+            'type': self.__class__.__name__,
+            'args': self._knob_args
+        })
+
+    @classmethod
+    def from_json(cls, json_str):
+        json_dict = json.loads(json_str)
+
+        if 'type' not in json_dict or 'args' not in json_dict:
+            raise ValueError('Invalid JSON representation of knob: {}.'.format(json_str))
+
+        knob_type = json_dict['type']
+        knob_args = json_dict['args']
+        knob_classes = [CategoricalKnob, IntegerKnob, FloatKnob]
+        for clazz in knob_classes:
+            if clazz.__name__ == knob_type:
+                return clazz(**knob_args)
+
+        raise ValueError('Invalid knob type: {}'.format(knob_type))
 
 class CategoricalKnob(BaseKnob):
     '''
-    Knob representing a categorical value of type `int`, `float`, `bool` or `str`.
-    A generated value of this knob must be an element of `values`.
+    Knob type representing a categorical value of type ``int``, ``float``, ``bool`` or ``str``.
+    A generated value of this knob would be an element of ``values``.
     '''
-
     def __init__(self, values):
+        knob_args = { 'values': values }
+        super().__init__(knob_args)
         self._values = values
         (self._value_type) = self._validate_values(values)
 
@@ -47,11 +75,13 @@ def _validate_values(values):
 
 class IntegerKnob(BaseKnob):
     '''
-    Knob representing any `int` value within a specific interval [`value_min`, `value_max`].
-    `is_exp` specifies whether the knob value should be scaled exponentially.
+    Knob type epresenting `any` ``int`` value within a specific interval [``value_min``, ``value_max``].
+    ``is_exp`` specifies whether the knob value should be scaled exponentially.
     '''
 
     def __init__(self, value_min, value_max, is_exp=False):
+        knob_args = { 'value_min': value_min, 'value_max': value_max, 'is_exp': is_exp }
+        super().__init__(knob_args)
         self._validate_values(value_min, value_max)
         self._value_min = value_min
         self._value_max = value_max
@@ -83,11 +113,13 @@ def _validate_values(value_min, value_max):
 
 class FloatKnob(BaseKnob):
     '''
-    Knob representing any `float` value within a specific interval [`value_min`, `value_max`].
-    `is_exp` specifies whether the knob value should be scaled exponentially.
+    Knob type representing `any` ``float`` value within a specific interval [``value_min``, ``value_max``].
+    ``is_exp`` specifies whether the knob value should be scaled exponentially.
     '''
 
     def __init__(self, value_min, value_max, is_exp=False):
+        knob_args = { 'value_min': value_min, 'value_max': value_max, 'is_exp': is_exp }
+        super().__init__(knob_args)
         self._validate_values(value_min, value_max)
         self._value_min = value_min
         self._value_max = value_max
@@ -115,4 +147,19 @@ def _validate_values(value_min, value_max):
 
         if value_min > value_max:
             raise ValueError('`value_max` should be at least `value_min`')
-        
\ No newline at end of file
+
+
+def deserialize_knob_config(knob_config_str):
+    knob_config = {
+        name: BaseKnob.from_json(knob_str)
+        for (name, knob_str) in json.loads(knob_config_str).items()
+    }
+    return knob_config
+
+def serialize_knob_config(knob_config):
+    knob_config_str = json.dumps({
+        name: knob.to_json()
+        for (name, knob) in knob_config.items()
+    })
+    return knob_config_str
+    
\ No newline at end of file
diff --git a/rafiki/model/model.py b/rafiki/model/model.py
index 7610bf31..e46cb818 100644
--- a/rafiki/model/model.py
+++ b/rafiki/model/model.py
@@ -13,7 +13,7 @@
 
 from .dataset import ModelDatasetUtils
 from .log import ModelLogUtils
-from .knob import BaseKnob
+from .knob import BaseKnob, serialize_knob_config, deserialize_knob_config
 
 class InvalidModelClassException(Exception): pass
 class InvalidModelParamsException(Exception): pass
@@ -310,6 +310,10 @@ def _check_knob_config(knob_config):
         any([(not isinstance(name, str) or not isinstance(knob, BaseKnob)) for (name, knob) in knob_config.items()]):
         raise Exception('Static method `get_knob_config()` should return a dict[str, BaseKnob]')
 
+    # Try serializing and deserialize knob config
+    knob_config_str = serialize_knob_config(knob_config)
+    knob_config = deserialize_knob_config(knob_config_str)
+
 def _info(msg):
     msg_color = '\033[94m'
     end_color = '\033[0m'
diff --git a/rafiki/worker/train.py b/rafiki/worker/train.py
index 8ef2a4c5..b287af6b 100644
--- a/rafiki/worker/train.py
+++ b/rafiki/worker/train.py
@@ -7,7 +7,7 @@
 
 from rafiki.config import SUPERADMIN_EMAIL, SUPERADMIN_PASSWORD
 from rafiki.constants import TrainJobStatus, TrialStatus, BudgetType
-from rafiki.model import load_model_class
+from rafiki.model import load_model_class, serialize_knob_config
 from rafiki.utils.log import JobLogger
 from rafiki.model import ModelLogUtilsLogger
 from rafiki.db import Database
@@ -188,9 +188,10 @@ def _stop_worker(self):
     def _create_advisor(self, clazz):
         # Retrieve knob config for model of worker 
         knob_config = clazz.get_knob_config()
+        knob_config_str = serialize_knob_config(knob_config)
 
         # Create advisor associated with worker
-        res = self._client.create_advisor(knob_config, advisor_id=self._service_id)
+        res = self._client.create_advisor(knob_config_str, advisor_id=self._service_id)
         advisor_id = res['id']
         return advisor_id
 
diff --git a/scripts/start_worker.py b/scripts/start_worker.py
index 25863251..c6560ef6 100644
--- a/scripts/start_worker.py
+++ b/scripts/start_worker.py
@@ -12,7 +12,8 @@ def start_service(service_id, service_type):
     install_command = os.environ.get('WORKER_INSTALL_COMMAND', '')
     exit_code = os.system(install_command)
     if exit_code != 0: 
-        raise Exception('Install command gave non-zero exit code: {}'.format(install_command))
+        # TODO: Fix failing install command for `pip install torch==0.4.1;``
+        raise Exception('Install command gave non-zero exit code: "{}"'.format(install_command))
 
     if service_type == ServiceType.TRAIN:
         from rafiki.worker import TrainWorker

From 92f46e19ebbbacd95c36cf472cdd43f15a210c17 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Mon, 3 Dec 2018 16:24:07 +0800
Subject: [PATCH 06/22] Improve docs for model developers, describing new knob
 classes

---
 docs/src/python/rafiki.model.rst  | 27 ++++++++++++++++++++++++
 docs/src/user/creating-models.rst | 35 ++++++++++++++++++++-----------
 2 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/docs/src/python/rafiki.model.rst b/docs/src/python/rafiki.model.rst
index 30cc9da2..cd5ce407 100644
--- a/docs/src/python/rafiki.model.rst
+++ b/docs/src/python/rafiki.model.rst
@@ -1,9 +1,36 @@
 rafiki.model
 ====================================================================
 
+.. contents:: Table of Contents
+
+Core Classes
+--------------------------------------------------------------------
+
 .. autoclass:: rafiki.model.BaseModel
     :members:
 
+.. autoclass:: rafiki.model.BaseKnob
+    :members:
+
+
+.. _`knob-types`:
+
+Knob Classes
+--------------------------------------------------------------------
+
+.. autoclass:: rafiki.model.CategoricalKnob
+    :members:
+
+.. autoclass:: rafiki.model.IntegerKnob
+    :members:
+
+.. autoclass:: rafiki.model.FloatKnob
+    :members:
+
+
+Utility Classes & Methods
+--------------------------------------------------------------------
+
 .. automethod:: rafiki.model.test_model_class
 
 .. autoclass:: rafiki.model.log.ModelLogUtils
diff --git a/docs/src/user/creating-models.rst b/docs/src/user/creating-models.rst
index 2aca6cb1..6594b760 100644
--- a/docs/src/user/creating-models.rst
+++ b/docs/src/user/creating-models.rst
@@ -11,6 +11,28 @@ by :class:`rafiki.model.BaseModel`, written in a `single` Python file.
 The model's implementation should conform to a specific task (see :ref:`tasks`).
 To submit the model to Rafiki, use the :meth:`rafiki.client.Client.create_model` method.
 
+Implementing Models
+--------------------------------------------------------------------
+
+Details on how to implement a model are located in the documentation of :class:`rafiki.model.BaseModel`.
+
+In defining the hyperparameters (knobs) of a model, refer to the documentation at :ref:`knob-types` for the full list of knob types.
+
+After implementing your model, it is highly recommended to use :meth:`rafiki.model.test_model_class` 
+to test your model. This method simulates a full train-inference flow on your model, ensuring that 
+it is likely to work on Rafiki.
+
+
+Logging & Dataset Loading in Models
+--------------------------------------------------------------------
+
+:class:`rafiki.model.BaseModel` has a property ``utils`` that subclasses the model utility classes
+:class:`rafiki.model.log.ModelLogUtils` and :class:`rafiki.model.dataset.ModelDatasetUtils`. They 
+help with model logging & dataset loading respectively. 
+
+Refer to the sample usage in the implementation of `./examples/models/image_classification/TfSingleHiddenLayer.py <https://github.com/nginyc/rafiki/tree/master/examples/models/image_classification/TfSingleHiddenLayer.py>`_.
+
+
 Model Environment
 --------------------------------------------------------------------
 
@@ -33,14 +55,13 @@ Models should run at least run on CPU-only machines and optionally leverage on a
 
 Refer to the parameters of :meth:`rafiki.client.Client.create_model` for configuring how your model runs on Rafiki.
 
-Testing Models
+Sample Models
 --------------------------------------------------------------------
 
 To illustrate how to write models on Rafiki, we have written the following:
 
     - Sample pre-processing logic to convert common dataset formats to Rafiki's own dataset formats in `./examples/datasets/ <https://github.com/nginyc/rafiki/tree/master/examples/datasets/>`_ 
     - Sample models in `./examples/models/ <https://github.com/nginyc/rafiki/tree/master/examples/models/>`_
-    - A method :meth:`rafiki.model.test_model_class` that simulates a full train-inference flow on any Rafiki model 
 
 To start testing your model, first install the Python dependencies at ``rafiki/model/requirements.txt``:
 
@@ -94,13 +115,3 @@ Example: Testing Models for ``POS_TAGGING``
 
         python examples/models/pos_tagging/BigramHmm.py
         python examples/models/pos_tagging/PyBiLstm.py
-
-
-Model Logging & Dataset Loading
---------------------------------------------------------------------
-
-:class:`rafiki.model.BaseModel` has a property ``utils`` that subclasses the model utility classes
-:class:`rafiki.model.log.ModelLogUtils` and :class:`rafiki.model.dataset.ModelDatasetUtils`. They 
-help with model logging & dataset loading respectively. 
-
-Refer to the sample usage in the implementation of `./examples/models/image_classification/TfSingleHiddenLayer.py <https://github.com/nginyc/rafiki/tree/master/examples/models/image_classification/TfSingleHiddenLayer.py>`_.
\ No newline at end of file

From 42713b69ca2ace1a999721523f546926c195e8e1 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Thu, 6 Dec 2018 20:03:39 +0800
Subject: [PATCH 07/22] Rework model logging implementation (stream logs into
 DB) & slightly tweak model logging API

---
 examples/models/image_classification/SkDt.py  |   2 +-
 .../image_classification/TfFeedForward.py     |  22 ++-
 examples/models/pos_tagging/BigramHmm.py      |   2 +-
 examples/models/pos_tagging/PyBiLstm.py       |  22 ++-
 rafiki/admin/admin.py                         |   9 +-
 rafiki/db/database.py                         |  29 +--
 rafiki/db/schema.py                           |  14 +-
 rafiki/model/__init__.py                      |   2 +-
 rafiki/model/log.py                           | 170 +++++++++++++-----
 rafiki/model/model.py                         |   6 +-
 rafiki/worker/train.py                        |  76 ++++----
 11 files changed, 225 insertions(+), 129 deletions(-)

diff --git a/examples/models/image_classification/SkDt.py b/examples/models/image_classification/SkDt.py
index 984922d0..5263e921 100644
--- a/examples/models/image_classification/SkDt.py
+++ b/examples/models/image_classification/SkDt.py
@@ -39,7 +39,7 @@ def train(self, dataset_uri):
         # Compute train accuracy
         preds = self._clf.predict(X)
         accuracy = sum(y == preds) / len(y)
-        self.utils.log('Train accuracy: {}'.format(accuracy))
+        self.logger.log('Train accuracy: {}'.format(accuracy))
 
     def evaluate(self, dataset_uri):
         dataset = self.utils.load_dataset_of_image_files(dataset_uri)
diff --git a/examples/models/image_classification/TfFeedForward.py b/examples/models/image_classification/TfFeedForward.py
index b907c66b..238fdbd3 100644
--- a/examples/models/image_classification/TfFeedForward.py
+++ b/examples/models/image_classification/TfFeedForward.py
@@ -35,21 +35,24 @@ def __init__(self, **knobs):
         config = tf.ConfigProto()
         config.gpu_options.allow_growth = True
         self._sess = tf.Session(graph=self._graph, config=config)
-        self._define_plots()
         
     def train(self, dataset_uri):
         im_sz = self._knobs.get('image_size')
         bs = self._knobs.get('batch_size')
         ep = self._knobs.get('epochs')
 
+        self.logger.log('Available devices: {}'.format(str(device_lib.list_local_devices())))
+
+        # Define 2 plots: Loss against time, loss against epochs
+        self.logger.define_loss_plot()
+        self.logger.define_plot('Loss Over Time', ['loss'])
+
         dataset = self.utils.load_dataset_of_image_files(dataset_uri, image_size=[im_sz, im_sz])
         num_classes = dataset.classes
         (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
         images = np.asarray(images)
         classes = np.asarray(classes)
 
-        self.utils.log('Available devices: {}'.format(str(device_lib.list_local_devices())))
-
         with self._graph.as_default():
             self._model = self._build_model(num_classes)
             with self._sess.as_default():
@@ -66,8 +69,8 @@ def train(self, dataset_uri):
 
                 # Compute train accuracy
                 (loss, accuracy) = self._model.evaluate(images, classes)
-                self.utils.log('Train loss: {}'.format(loss))
-                self.utils.log('Train accuracy: {}'.format(accuracy))
+                self.logger.log('Train loss: {}'.format(loss))
+                self.logger.log('Train accuracy: {}'.format(accuracy))
 
     def evaluate(self, dataset_uri):
         im_sz = self._knobs.get('image_size')
@@ -80,7 +83,7 @@ def evaluate(self, dataset_uri):
         with self._graph.as_default():
             with self._sess.as_default():
                 (loss, accuracy) = self._model.evaluate(images, classes)
-                self.utils.log('Test loss: {}'.format(loss))
+                self.logger.log('Test loss: {}'.format(loss))
 
         return accuracy
 
@@ -134,12 +137,7 @@ def load_parameters(self, params):
 
     def _on_train_epoch_end(self, epoch, logs):
         loss = logs['loss']
-        self.utils.log_loss_metric(loss, epoch)
-
-    def _define_plots(self):
-        # Define 2 plots: Loss against time, loss against epochs
-        self.utils.define_loss_plot()
-        self.utils.define_plot('Loss Over Time', ['loss'])
+        self.logger.log_loss(loss, epoch)
 
     def _build_model(self, num_classes):
         units = self._knobs.get('hidden_layer_units')
diff --git a/examples/models/pos_tagging/BigramHmm.py b/examples/models/pos_tagging/BigramHmm.py
index b27efc12..15b2cfc0 100644
--- a/examples/models/pos_tagging/BigramHmm.py
+++ b/examples/models/pos_tagging/BigramHmm.py
@@ -30,7 +30,7 @@ def train(self, dataset_uri):
         (sents_tokens, sents_tags) = zip(*[zip(*sent) for sent in dataset])
         self._num_tags = dataset.tag_num_classes[0]
         (self._trans_probs, self._emiss_probs) = self._compute_probs(self._num_tags, sents_tokens, sents_tags)
-        self.utils.log('No. of tags: {}'.format(self._num_tags))
+        self.logger.log('No. of tags: {}'.format(self._num_tags))
 
     def evaluate(self, dataset_uri):
         dataset = self.utils.load_dataset_of_corpus(dataset_uri)
diff --git a/examples/models/pos_tagging/PyBiLstm.py b/examples/models/pos_tagging/PyBiLstm.py
index 603fe9eb..cedc6997 100644
--- a/examples/models/pos_tagging/PyBiLstm.py
+++ b/examples/models/pos_tagging/PyBiLstm.py
@@ -35,21 +35,20 @@ def get_knob_config():
     def __init__(self, **knobs):
         super().__init__(**knobs)
         self._knobs = knobs
-        self._define_plots()
 
     def train(self, dataset_uri):
         dataset = self.utils.load_dataset_of_corpus(dataset_uri)
         self._word_dict = self._extract_word_dict(dataset)
         self._tag_count = dataset.tag_num_classes[0] 
 
-        self.utils.log('No. of unique words: {}'.format(len(self._word_dict)))
-        self.utils.log('No. of tags: {}'.format(self._tag_count))
+        self.logger.log('No. of unique words: {}'.format(len(self._word_dict)))
+        self.logger.log('No. of tags: {}'.format(self._tag_count))
         
         (self._net, self._optimizer) = self._train(dataset)
         sents_tags = self._predict(dataset)
         acc = self._compute_accuracy(dataset, sents_tags)
 
-        self.utils.log('Train accuracy: {}'.format(acc))
+        self.logger.log('Train accuracy: {}'.format(acc))
 
     def evaluate(self, dataset_uri):
         dataset = self.utils.load_dataset_of_corpus(dataset_uri)
@@ -139,7 +138,7 @@ def _predict(self, dataset):
 
         Tensor = torch.LongTensor
         if torch.cuda.is_available():
-            self.utils.log('Using CUDA...')
+            self.logger.log('Using CUDA...')
             net = net.cuda()
             Tensor = torch.cuda.LongTensor
 
@@ -172,11 +171,15 @@ def _train(self, dataset):
         null_tag = self._tag_count # Tag to ignore (from padding of sentences during batching)
         B = math.ceil(len(dataset) / N) # No. of batches
 
+        # Define 2 plots: Loss against time, loss against epochs
+        self.logger.define_loss_plot()
+        self.logger.define_plot('Loss Over Time', ['loss'])
+
         (net, optimizer) = self._create_model()
 
         Tensor = torch.LongTensor
         if torch.cuda.is_available():
-            self.utils.log('Using CUDA...')
+            self.logger.log('Using CUDA...')
             net = net.cuda()
             Tensor = torch.cuda.LongTensor
 
@@ -206,15 +209,10 @@ def _train(self, dataset):
 
                 total_loss += loss.item()
 
-            self.utils.log_loss_metric(loss=(total_loss / B), epoch=epoch)
+            self.logger.log_loss(loss=(total_loss / B), epoch=epoch)
 
         return (net, optimizer)
 
-    def _define_plots(self):
-        # Define 2 plots: Loss against time, loss against epochs
-        self.utils.define_loss_plot()
-        self.utils.define_plot('Loss Over Time', ['loss'])
-
     def _compute_accuracy(self, dataset, sents_tags):
         total = 0
         correct = 0
diff --git a/rafiki/admin/admin.py b/rafiki/admin/admin.py
index 0ee523cd..a9da05b4 100644
--- a/rafiki/admin/admin.py
+++ b/rafiki/admin/admin.py
@@ -7,8 +7,8 @@
 from rafiki.db import Database
 from rafiki.constants import ServiceStatus, UserType, ServiceType, TrainJobStatus
 from rafiki.config import MIN_SERVICE_PORT, MAX_SERVICE_PORT, SUPERADMIN_EMAIL, SUPERADMIN_PASSWORD
+from rafiki.model import ModelLogger
 from rafiki.container import DockerSwarmContainerManager 
-from rafiki.utils.log import JobLogger
 
 from .services_manager import ServicesManager
 
@@ -256,10 +256,9 @@ def get_trial_logs(self, trial_id):
         if trial is None:
             raise InvalidTrialException()
 
-        job_logger = JobLogger()
-        job_logger.import_logs(trial.logs)
-        (plots, metrics, messages) = job_logger.read_logs()
-        job_logger.destroy()
+        trial_logs = self._db.get_trial_logs(trial_id)
+        log_lines = [x.line for x in trial_logs]
+        (messages, metrics, plots) = ModelLogger.parse_logs(log_lines)
         
         return {
             'plots': plots,
diff --git a/rafiki/db/database.py b/rafiki/db/database.py
index 4d3b8103..5ec9c58a 100644
--- a/rafiki/db/database.py
+++ b/rafiki/db/database.py
@@ -7,7 +7,8 @@
     TrialStatus, ServiceStatus, InferenceJobStatus
 
 from .schema import Base, TrainJob, TrainJobWorker, \
-    InferenceJob, Trial, Model, User, Service, InferenceJobWorker
+    InferenceJob, Trial, Model, User, Service, InferenceJobWorker, \
+    TrialLog
 
 class Database(object):
     def __init__(self, 
@@ -199,11 +200,6 @@ def get_inference_jobs_of_app(self, app):
 
         return inference_jobs
 
-    def get_workers_of_inference_job(self, inference_job_id):
-        workers = self._session.query(InferenceJobWorker) \
-            .filter(InferenceJobWorker.inference_job_id == inference_job_id).all()
-        return workers
-
     ####################################
     # Inference Job Workers
     ####################################
@@ -335,10 +331,17 @@ def get_trial(self, id):
 
         return trial
 
+    def get_trial_logs(self, id):
+        trial_logs = self._session.query(TrialLog) \
+            .filter(TrialLog.trial_id == id) \
+            .all()
+            
+        return trial_logs
+
     def get_best_trials_of_train_job(self, train_job_id, max_count=3):
         trials = self._session.query(Trial) \
             .filter(Trial.train_job_id == train_job_id) \
-            .filter(Trial.status == TrainJobStatus.COMPLETED) \
+            .filter(Trial.status == TrialStatus.COMPLETED) \
             .order_by(Trial.score.desc()) \
             .limit(max_count).all()
 
@@ -354,8 +357,7 @@ def get_trials_of_app(self, app):
 
     def get_trials_of_train_job(self, train_job_id):
         trials = self._session.query(Trial) \
-            .join(TrainJob, Trial.train_job_id == TrainJob.id) \
-            .filter(TrainJob.id == train_job_id) \
+            .filter(Trial.train_job_id == train_job_id) \
             .order_by(Trial.datetime_started.desc()).all()
 
         return trials
@@ -366,15 +368,19 @@ def mark_trial_as_errored(self, trial):
         self._session.add(trial)
         return trial
 
-    def mark_trial_as_complete(self, trial, score, parameters, logs):
+    def mark_trial_as_complete(self, trial, score, parameters):
         trial.status = TrialStatus.COMPLETED
         trial.score = score
         trial.datetime_stopped = datetime.datetime.utcnow()
         trial.parameters = parameters
-        trial.logs = logs
         self._session.add(trial)
         return trial
 
+    def add_trial_log(self, trial, line, level):
+        trial_log = TrialLog(trial_id=trial.id, line=line, level=level)
+        self._session.add(trial_log)
+        return trial_log
+
     def mark_trial_as_terminated(self, trial):
         trial.status = TrialStatus.TERMINATED
         trial.datetime_stopped = datetime.datetime.utcnow()
@@ -418,5 +424,4 @@ def _make_connection_url(self, host, port, db, user, password):
 
     def _define_tables(self):
         Base.metadata.create_all(bind=self._engine)
-        
 
diff --git a/rafiki/db/schema.py b/rafiki/db/schema.py
index 2fe4fe3e..a8150222 100644
--- a/rafiki/db/schema.py
+++ b/rafiki/db/schema.py
@@ -64,7 +64,6 @@ class Service(Base):
     container_service_name = Column(String)
     container_service_id = Column(String)
 
-
 class TrainJob(Base):
     __tablename__ = 'train_job'
 
@@ -87,21 +86,28 @@ class TrainJobWorker(Base):
     train_job_id = Column(String, ForeignKey('train_job.id'))
     model_id = Column(String, ForeignKey('model.id'), nullable=False)
 
-
 class Trial(Base):
     __tablename__ = 'trial'
 
     id = Column(String, primary_key=True, default=generate_uuid)
     knobs = Column(JSON, nullable=False)
     datetime_started = Column(DateTime, nullable=False, default=generate_datetime)
-    train_job_id = Column(String, ForeignKey('train_job.id'), nullable=False)
+    train_job_id = Column(String, ForeignKey('train_job.id'), nullable=False, index=True)
     model_id = Column(String, ForeignKey('model.id'), nullable=False)
     status = Column(String, nullable=False, default=TrialStatus.RUNNING)
     score = Column(Float, default=0)
     parameters = Column(Binary, default=None)
-    logs = Column(Binary, default=None)
     datetime_stopped = Column(DateTime, default=None)
 
+class TrialLog(Base):
+    __tablename__ = 'trial_log'
+
+    id = Column(String, primary_key=True, default=generate_uuid)
+    datetime = Column(DateTime, default=generate_datetime)
+    trial_id = Column(String, ForeignKey('trial.id'), nullable=False, index=True)
+    line = Column(String, nullable=False)
+    level = Column(String)
+
 class User(Base):
     __tablename__ = 'user'
 
diff --git a/rafiki/model/__init__.py b/rafiki/model/__init__.py
index 68f59c24..79a2df30 100644
--- a/rafiki/model/__init__.py
+++ b/rafiki/model/__init__.py
@@ -1,6 +1,6 @@
 from .model import BaseModel, test_model_class, load_model_class, \
     parse_model_install_command, InvalidModelClassException, InvalidModelParamsException, \
     ModelUtils
-from .log import ModelLogUtilsLogger
+from .log import LogType, ModelLogger
 from .knob import BaseKnob, CategoricalKnob, IntegerKnob, FloatKnob, \
                     serialize_knob_config, deserialize_knob_config
\ No newline at end of file
diff --git a/rafiki/model/log.py b/rafiki/model/log.py
index 7912d0a7..6adcd66d 100644
--- a/rafiki/model/log.py
+++ b/rafiki/model/log.py
@@ -1,73 +1,161 @@
 import os
 import traceback
 import datetime
+import json
+import logging
 
-class DuplicatePlotException(Exception): pass
+MODEL_LOG_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'
 
-class ModelLogUtils():
-    '''
-    Collection of utility methods for logging and plotting of messages & metrics during training.
-    '''   
-    def __init__(self):
-        # Add logging to stdout for local debugging
-        self._logger = ModelLogUtilsLogger()
+class LogType():
+    PLOT = 'PLOT'
+    METRICS = 'METRICS'
+    MESSAGE = 'MESSAGE'
 
-    def set_logger(self, logger):
-        if not isinstance(logger, ModelLogUtilsLogger):
-            raise Exception('`logger` should subclass `ModelLogUtilsLogger`')
-        
+class ModelLogger():
+    def __init__(self):        
+        # By default, set a logging handler to print to stdout (for debugging)
+        logger = logging.getLogger(__name__)
+        logger.setLevel(level=logging.INFO)
+        logger.addHandler(ModelLoggerDebugHandler())
         self._logger = logger
 
-    def log(self, message):
-        '''
-        Logs a message for analysis of model training.
-        '''
-        self._logger.log(message)
-
     def define_loss_plot(self):
         '''
         Convenience method of defining a plot of ``loss`` against ``epoch``.
-        To be used with ``log_loss_metric()``.
+        To be used with :meth:`rafiki.model.ModeLogger.log_loss`.
         '''
         self.define_plot('Loss Over Epochs', ['loss'], x_axis='epoch')
   
-    def log_loss_metric(self, loss, epoch):
+    def log_loss(self, loss, epoch):
         '''
         Convenience method for logging `loss` against `epoch`.
-        To be used with ``define_loss_plot()``.
+        To be used with :meth:`rafiki.model.ModeLogger.define_loss_plot`..
         '''
-        self.log_metrics(loss=loss, epoch=epoch)
+        self.log(loss=loss, epoch=epoch)
 
     def define_plot(self, title, metrics, x_axis=None):
         '''
         Defines a plot for a set of metrics for analysis of model training.
         By default, metrics will be plotted against time.
+
+        For example, a model's precision & recall logged with e.g. ``log(precision=0.1, recall=0.6, epoch=1)``
+        can be visualized in the plots generated by
+        ``define_plot('Precision & Recall', y_axis=['precision', 'recall'])`` (against time) or
+        ``define_plot('Precision & Recall', y_axis=['precision', 'recall'], x_axis=['epoch'])`` (against epochs).
+
+        Only call this method in :meth:`rafiki.model.BaseModel.train`.
+
+        :param str title: Title of the plot
+        :param str metrics: List of metrics that should be plotted on the y-axis
+        :type metrics: str[]
+        :param str x_axis: Metric that should be plotted on the x-axis, against all other metrics. Defaults to ``'time'``, which is automatically logged
         '''
-        self._logger.define_plot(title, metrics, x_axis)
+        self._log(LogType.PLOT, { 'title': title, 'metrics': metrics, 'x_axis': x_axis })
 
-    def log_metrics(self, **kwargs):
+    def log(self, msg='', **metrics):
         '''
-        Logs metrics for a single point in time { <metric>: <value> }.
-        <value> should be a number.
+        Logs a message and/or a set of metrics at a single point in time.
+
+        Logged messages will be viewable on Rafiki's administrative UI. 
+        To visualize logged metrics on plots, a plot must be defined via :meth:`rafiki.model.ModeLogger.define_plot`.
+
+        Only call this method in :meth:`rafiki.model.BaseModel.train` and :meth:`rafiki.model.BaseModel.evaluate`.
+
+        :param str msg: Message to be logged
+        :param metrics: Set of metrics & their values to be logged as { <metric>: <value> }, where <value> should be a number.
+        :type metrics: dict[str, int|float]
         '''
-        self._logger.log_metrics(**kwargs)
+        if msg:
+            self._log(LogType.MESSAGE, { 'message': msg })
+        
+        if metrics:
+            self._log(LogType.METRICS, metrics)
+    
+    # Set the Python logger internally used.
+    # During model training, this method will be called by Rafiki to inject a Python logger 
+    # to generate logs for an instance of model training.
+    def set_logger(self, logger):
+        self._logger = logger
 
-class ModelLogUtilsLogger():
+    def _log(self, log_type, log_dict={}):
+        log_dict['type'] = log_type
+        log_dict['time'] = datetime.datetime.now().strftime(MODEL_LOG_DATETIME_FORMAT)
+        log_line = json.dumps(log_dict)
+        self._logger.info(log_line)
+
+    @staticmethod
+    # Parses a logged line into a dictionary.
+    def parse_log_line(log_line):
+        try:
+            return json.loads(log_line)
+        except ValueError:
+            return {}
+
+    @staticmethod
+    # Parses logs into (messages, metrics, plots) for visualization.
+    def parse_logs(log_lines):
+        plots = []
+        metrics = []
+        messages = []
+
+        for log_line in log_lines:
+            log_dict = ModelLogger.parse_log_line(log_line)            
+
+            if 'time' not in log_dict or 'type' not in log_dict:
+                continue
+
+            log_datetime = log_dict['time']
+            log_type = log_dict['type']
+            del log_dict['time']
+            del log_dict['type']
+
+            if log_type == LogType.MESSAGE:
+                messages.append({
+                    'time': log_datetime,
+                    'message': log_dict.get('message')
+                })
+
+            elif log_type == LogType.METRICS:
+                metrics.append({
+                    'time': log_datetime,
+                    **log_dict
+                })
+
+            elif log_type == LogType.PLOT:
+                plots.append({
+                    **log_dict
+                })
+
+        return (messages, metrics, plots)
+
+class ModelLoggerDebugHandler(logging.Handler):
     def __init__(self):
-        self._plots = set()
-    
-    def log(self, message):
-        self._print(message)
+        logging.Handler.__init__(self)
+
+    def emit(self, record):
+        log_line = record.msg
+        log_dict = ModelLogger.parse_log_line(log_line)
+        log_type = log_dict.get('type')
 
-    def define_plot(self, title, metrics, x_axis):
-        if title in self._plots:
-            raise DuplicatePlotException('Plot {} already defined'.format(title))
-        self._plots.add(title)
-        self._print('Plot with title `{}` of {} against {} will be registered when this model is being trained on Rafiki' \
-            .format(title, ', '.join(metrics), x_axis or 'time'))
+        if log_type == LogType.PLOT:
 
-    def log_metrics(self, **kwargs):
-        self._print(', '.join(['{}={}'.format(metric, value) for (metric, value) in kwargs.items()]))
+            title = log_dict.get('title')
+            metrics = log_dict.get('metrics')
+            x_axis = log_dict.get('x_axis')
 
+            self._print('Plot `{}` of {} against {} will be registered when this model is being trained on Rafiki' \
+                .format(title, ', '.join(metrics), x_axis or 'time'))
+
+        elif log_type == LogType.METRICS:
+            metrics_log = ', '.join(['{}={}'.format(metric, value) for (metric, value) in log_dict.items()])
+            self._print('Metric(s) logged: {}'.format(metrics_log))
+
+        elif log_type == LogType.MESSAGE:
+            msg = log_dict.get('message')
+            self._print(msg)
+
+        else:
+            self._print(log_line)
+        
     def _print(self, message):
-        print(message)
\ No newline at end of file
+        print('[{}]'.format(__name__), message)
diff --git a/rafiki/model/model.py b/rafiki/model/model.py
index e46cb818..2b9fb3d6 100644
--- a/rafiki/model/model.py
+++ b/rafiki/model/model.py
@@ -12,16 +12,15 @@
 from rafiki.constants import TaskType, ModelDependency
 
 from .dataset import ModelDatasetUtils
-from .log import ModelLogUtils
+from .log import ModelLogger
 from .knob import BaseKnob, serialize_knob_config, deserialize_knob_config
 
 class InvalidModelClassException(Exception): pass
 class InvalidModelParamsException(Exception): pass
 
-class ModelUtils(ModelDatasetUtils, ModelLogUtils):
+class ModelUtils(ModelDatasetUtils):
     def __init__(self):
         ModelDatasetUtils.__init__(self)
-        ModelLogUtils.__init__(self)
 
 class BaseModel(abc.ABC):
     '''
@@ -49,6 +48,7 @@ def __init__(self, **knobs):
     :type knobs: dict[str, any]
     '''   
     def __init__(self, **knobs):
+        self.logger = ModelLogger()
         self.utils = ModelUtils()
 
     @staticmethod
diff --git a/rafiki/worker/train.py b/rafiki/worker/train.py
index b287af6b..3894e90c 100644
--- a/rafiki/worker/train.py
+++ b/rafiki/worker/train.py
@@ -7,9 +7,7 @@
 
 from rafiki.config import SUPERADMIN_EMAIL, SUPERADMIN_PASSWORD
 from rafiki.constants import TrainJobStatus, TrialStatus, BudgetType
-from rafiki.model import load_model_class, serialize_knob_config
-from rafiki.utils.log import JobLogger
-from rafiki.model import ModelLogUtilsLogger
+from rafiki.model import load_model_class, serialize_knob_config, LogType
 from rafiki.db import Database
 from rafiki.client import Client
 
@@ -77,9 +75,8 @@ def start(self):
             logger.info('Received proposal of knobs from advisor:')
             logger.info(pprint.pformat(knobs))
             logger.info('Creating new trial in DB...')
-            trial = self._create_new_trial(model_id, train_job_id, knobs)
-            self._trial_id = trial.id
-            logger.info('Created trial of ID "{}" in DB'.format(trial.id))
+            self._trial_id = self._create_new_trial(model_id, train_job_id, knobs)
+            logger.info('Created trial of ID "{}" in DB'.format(self._trial_id))
 
             # Don't keep DB connection while training model
             self._db.disconnect()
@@ -89,14 +86,20 @@ def start(self):
             try:
                 logger.info('Starting trial...')
                 logger.info('Training & evaluating model...')
-                (score, parameters, logs) = self._train_and_evaluate_model(clazz, knobs, train_dataset_uri, 
-                                                                        test_dataset_uri)
+
+                def handle_log(log_line, log_lvl):
+                    with self._db:
+                        trial = self._db.get_trial(self._trial_id)
+                        self._db.add_trial_log(trial, log_line, log_lvl)
+
+                (score, parameters) = self._train_and_evaluate_model(clazz, knobs, train_dataset_uri, 
+                                                                    test_dataset_uri, handle_log)
                 logger.info('Trial score: {}'.format(score))
                 
                 with self._db:
                     logger.info('Marking trial as complete in DB...')
                     trial = self._db.get_trial(self._trial_id)
-                    self._db.mark_trial_as_complete(trial, score, parameters, logs)
+                    self._db.mark_trial_as_complete(trial, score, parameters)
 
                 self._trial_id = None
             except Exception:
@@ -131,14 +134,22 @@ def stop(self):
             logger.error('Error marking trial as terminated:')
             logger.error(traceback.format_exc())
 
-    def _train_and_evaluate_model(self, clazz, knobs, train_dataset_uri, 
-                                    test_dataset_uri):
+    def _train_and_evaluate_model(self, clazz, knobs, train_dataset_uri, \
+                                test_dataset_uri, handle_log):
+
         # Initialize model
         model_inst = clazz(**knobs)
 
-        # Insert model training logger
-        model_logger = TrainModelLogUtilsLogger()
-        model_inst.utils.set_logger(model_logger)
+        # Add logs handlers for trial, including adding handler to root logger 
+        # to handle logs emitted during model training with level above INFO
+        log_handler = ModelLoggerHandler(handle_log)
+        root_logger = logging.getLogger()
+        root_logger.addHandler(log_handler)
+        logger = logging.getLogger('{}.trial'.format(__name__))
+        logger.setLevel(logging.INFO)
+        logger.propagate = False # Avoid duplicate logs in root logger
+        logger.addHandler(log_handler)
+        model_inst.logger.set_logger(logger)
 
         # Train model
         model_inst.train(train_dataset_uri)
@@ -146,16 +157,15 @@ def _train_and_evaluate_model(self, clazz, knobs, train_dataset_uri,
         # Evaluate model
         score = model_inst.evaluate(test_dataset_uri)
 
+        # Remove log handler for trial
+        root_logger.removeHandler(log_handler)
+
         # Dump and pickle model parameters
         parameters = model_inst.dump_parameters()
         parameters = pickle.dumps(parameters)
         model_inst.destroy()
 
-        # Export model logs
-        logs = model_logger.export_logs()
-        model_logger.destroy()
-
-        return (score, parameters, logs)
+        return (score, parameters)
 
     # Creates a new trial in the DB
     def _create_new_trial(self, model_id, train_job_id, knobs):
@@ -165,7 +175,7 @@ def _create_new_trial(self, model_id, train_job_id, knobs):
             knobs=knobs
         )
         self._db.commit()
-        return trial
+        return trial.id
 
     # Gets proposal of a set of knob values from advisor
     def _get_proposal_from_advisor(self, advisor_id):
@@ -252,21 +262,13 @@ def _make_client(self):
         client.login(email=superadmin_email, password=superadmin_password)
         return client
 
-class TrainModelLogUtilsLogger(ModelLogUtilsLogger):
-    def __init__(self):
-        self._job_logger = JobLogger()
-
-    def log(self, message):
-        return self._job_logger.log(message)
-        
-    def define_plot(self, title, metrics, x_axis):
-        return self._job_logger.define_plot(title, metrics, x_axis)
-
-    def log_metrics(self, **kwargs):
-        return self._job_logger.log_metrics(**kwargs)
-
-    def export_logs(self):
-        return self._job_logger.export_logs()
+class ModelLoggerHandler(logging.Handler):
+    def __init__(self, handle_log):
+        logging.Handler.__init__(self)
+        self._handle_log = handle_log
 
-    def destroy(self):
-        return self._job_logger.destroy()
\ No newline at end of file
+    def emit(self, record):
+        log_line = record.msg
+        log_lvl = record.levelname
+        self._handle_log(log_line, log_lvl)
+      
\ No newline at end of file

From 7bdccf1980058354257bc83bec41188e28735e1a Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Thu, 6 Dec 2018 20:03:52 +0800
Subject: [PATCH 08/22] Remove unused `JobLogger`

---
 rafiki/utils/log.py | 226 --------------------------------------------
 1 file changed, 226 deletions(-)

diff --git a/rafiki/utils/log.py b/rafiki/utils/log.py
index d5bc45bb..bb2aeb6c 100644
--- a/rafiki/utils/log.py
+++ b/rafiki/utils/log.py
@@ -10,235 +10,9 @@
 
 logger = logging.getLogger(__name__)
 
-JOB_LOGGER_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'
-
 def configure_logging(process_name):
     # Configure all logging to a log file
     logs_folder_path = LOGS_FOLDER_PATH
     logging.basicConfig(level=logging.INFO, 
                     format='%(asctime)s %(levelname)s %(message)s', 
                     filename='{}/{}.log'.format(logs_folder_path, process_name))
-
-class JobLogger():
-    def __init__(self):
-        self._log_file = tempfile.NamedTemporaryFile(delete=False, mode='w+', encoding='utf-8')
-
-    def define_plot(self, title, metrics, x_axis):
-        self._log_line(has_time=False, type='PLOT', title=title, metrics=metrics, x_axis=x_axis)
-
-    def log(self, message):
-        self._log_line(type='MESSAGE', message=message)
-        
-    def log_metrics(self, **kwargs):
-        self._log_line(type='METRICS', **kwargs)
-
-    # Clears all logs (excluding plot definitions) before a specific time
-    def clear_logs(self, datetime_before=None):
-        if datetime_before is None:
-            datetime_before = datetime.now()
-        
-        self._log_file.seek(0)
-        new_log_file = tempfile.NamedTemporaryFile(delete=False, mode='w+', encoding='utf-8')
-
-        # Only copy over lines in new file that are not before `datetime_before`
-        for line in self._log_file:
-            (log_datetime, _) = self._parse_line(line)
-            log_datetime = datetime.strptime(log_datetime, JOB_LOGGER_DATETIME_FORMAT) \
-                            if log_datetime is not None else None
-            if log_datetime is None or log_datetime >= datetime_before:
-                new_log_file.write(line)
-
-        # Switch to new log file
-        self._log_file.close()
-        os.remove(self._log_file.name)
-        self._log_file = new_log_file
-
-    # Read all logs as bytes
-    def export_logs(self):
-        self._log_file.seek(0)
-        logs_bytes = self._log_file.read().encode('utf-8')
-        return logs_bytes
-
-    def destroy(self):
-        # Remove temporary internal log file
-        self._log_file.close()
-        os.remove(self._log_file.name)
-
-    # Import and completely replace all logs
-    def import_logs(self, logs_bytes):
-        if logs_bytes is None: return
-        self._log_file.seek(0)
-        self._log_file.write(logs_bytes.decode('utf-8'))
-        self._log_file.truncate()
-
-    '''
-    Read logs as (plots, metrics, messages)
-
-    plots: Plot[]
-    Plot: { title, metrics, x_axis }
-    metrics: Metric[]
-    Metric: { time: Datetime, [name]: [value]}
-    messages: { time: Datetime, message: string }[]
-    Datetime: string (%Y-%m-%dT%H:%M:%S)
-    '''
-    def read_logs(self):
-        self._log_file.seek(0)
-
-        plots = []
-        metrics = []
-        messages = []
-        for line in self._log_file:
-            (log_datetime, log_dict) = self._parse_line(line)
-            
-            if 'type' not in log_dict:
-                continue
-
-            log_type = log_dict['type']
-            del log_dict['type']
-
-            if log_type == 'MESSAGE':
-                messages.append({
-                    'time': log_datetime,
-                    'message': log_dict.get('message')
-                })
-
-            elif log_type == 'METRICS':
-                metrics.append({
-                    'time': log_datetime,
-                    **log_dict 
-                })
-
-            elif log_type == 'PLOT':
-                plots.append({
-                    **log_dict
-                })
-
-        return (plots, metrics, messages)
-
-    # Logs dictionary to temporary internal log file in JSON as line, appending current time
-    def _log_line(self, has_time=True, **kwargs):
-        if has_time:
-            kwargs['time'] = datetime.now().strftime(JOB_LOGGER_DATETIME_FORMAT)
-        self._log_file.write('{}\n'.format(json.dumps(kwargs)))
-
-    # Parses a log line as (log_datetime, log_dict)
-    def _parse_line(self, line):
-        log = None
-        try:
-            log = json.loads(line)
-        except: 
-            logger.warn('Error while reading line in log: "{}"'.format(line))
-            logger.warn(traceback.format_exc())
-            return (None, {})
-
-        log_datetime = None
-        if 'time' in log:
-            log_datetime = log['time']
-            del log['time']
-
-        return (log_datetime, log)
-
-def _test_job_logger_for_train_worker():
-    l = JobLogger()
-    l.define_plot('Model Loss', ['loss'], None)
-
-    # Model is being trained
-    l.log('START')
-    time.sleep(1)
-    l.log_metrics(loss=3.42, learning_rate=0.01)
-    time.sleep(1)
-    l.log_metrics(loss=3.21, learning_rate=0.01)
-    time.sleep(1)
-    l.log_metrics(loss=3.11)
-    l.log('END')
-
-    # At the end of training, logs are exported and saved
-    logs_bytes = l.export_logs()
-    assert isinstance(logs_bytes, bytes)
-    l.destroy()
-
-    # App developer checks on logs
-    l2 = JobLogger()
-    l2.import_logs(logs_bytes)
-    (plots, metrics, messages) = l2.read_logs()
-    l2.destroy()
-
-    assert len(plots) == 1
-    assert plots[0] == { 'title': 'Model Loss', 'metrics': ['loss'], 'x_axis': None }
-    assert len(metrics) == 3
-    assert [x.get('loss') for x in metrics] == [3.42, 3.21, 3.11]
-    assert [x.get('learning_rate') for x in metrics] == [0.01, 0.01, None]
-    assert [isinstance(x.get('time'), str) for x in metrics] == [True, True, True] 
-    assert [x.get('message') for x in messages] == ['START', 'END']
-    
-def _test_job_logger_for_predictor():
-    l = JobLogger()
-    l.define_plot('Queries', ['queries'], None)
-
-    l.log('UP')
-
-    # Predictor receives queries
-    time.sleep(1)
-    queries = 0
-    while queries < 3:
-        l.log_metrics(query=True)
-        queries += 1
-    time.sleep(2)
-    while queries < 23:
-        l.log_metrics(query=True)
-        queries += 1
-    time.sleep(1)
-
-    # Predictor's logs are exported and cleared periodically
-    logs_bytes = l.export_logs()
-    l.clear_logs()
-
-    # App developer checks on this period's logs
-    l2 = JobLogger()
-    l2.import_logs(logs_bytes)
-    (plots, metrics, messages) = l2.read_logs()
-    l2.destroy()
-
-    assert len(plots) == 1
-    assert len(metrics) == 23
-    assert metrics[4].get('query') == True
-    assert [x.get('message') for x in messages] == ['UP']
-
-    # Predictor receives more queries
-    time.sleep(1)
-    queries = 0
-    while queries < 40:
-        l.log_metrics(query=True)
-        queries += 1
-    time.sleep(2)
-    while queries < 43:
-        l.log_metrics(query=True)
-        queries += 1
-    
-    l.log('KILLED')
-    l.log('DOWN')
-
-    # Predictor's logs are exported and cleared periodically
-    logs_bytes = l.export_logs()
-    l.clear_logs()
-    l.destroy()
-
-    # App developer checks on this period's logs
-    l2 = JobLogger()
-    l2.import_logs(logs_bytes)
-    (plots, metrics, messages) = l2.read_logs()
-    l2.destroy()
-
-    assert len(plots) == 1
-    assert len(metrics) == 43
-    assert metrics[8].get('query') == True
-    assert isinstance(metrics[9].get('time'), str) is True
-    assert [x.get('message') for x in messages] == ['KILLED', 'DOWN']
-
-if __name__ == '__main__':
-    print('Testing `JobLogger` for train worker...')
-    _test_job_logger_for_train_worker()
-    print('Testing `JobLogger` for predictor...')
-    _test_job_logger_for_predictor()
-    print('All tests pass!')
-    
\ No newline at end of file

From 3d658335b42255c6e730c798161852d6c00fa3ce Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Thu, 6 Dec 2018 20:04:07 +0800
Subject: [PATCH 09/22] Update docs on model logging

---
 docs/src/python/rafiki.model.rst  |  2 +-
 docs/src/user/creating-models.rst | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/docs/src/python/rafiki.model.rst b/docs/src/python/rafiki.model.rst
index cd5ce407..87f3d85f 100644
--- a/docs/src/python/rafiki.model.rst
+++ b/docs/src/python/rafiki.model.rst
@@ -33,7 +33,7 @@ Utility Classes & Methods
 
 .. automethod:: rafiki.model.test_model_class
 
-.. autoclass:: rafiki.model.log.ModelLogUtils
+.. autoclass:: rafiki.model.ModelLogger
     :members:
 
 .. autoclass:: rafiki.model.dataset.ModelDatasetUtils
diff --git a/docs/src/user/creating-models.rst b/docs/src/user/creating-models.rst
index 6594b760..0e2bcd7b 100644
--- a/docs/src/user/creating-models.rst
+++ b/docs/src/user/creating-models.rst
@@ -22,17 +22,23 @@ After implementing your model, it is highly recommended to use :meth:`rafiki.mod
 to test your model. This method simulates a full train-inference flow on your model, ensuring that 
 it is likely to work on Rafiki.
 
+Logging in Models
+--------------------------------------------------------------------
+
+:class:`rafiki.model.BaseModel` has an attribute ``logger`` that is of the class :class:`rafiki.model.ModelLogger`. 
+It allows you to log messages and metrics while your model is being trained, and you can 
+define plots to visualize your model's training on Rafiki's Admin Web interface.
 
-Logging & Dataset Loading in Models
+.. seealso:: :ref:`using-admin-web` 
+
+Dataset Loading in Models
 --------------------------------------------------------------------
 
-:class:`rafiki.model.BaseModel` has a property ``utils`` that subclasses the model utility classes
-:class:`rafiki.model.log.ModelLogUtils` and :class:`rafiki.model.dataset.ModelDatasetUtils`. They 
-help with model logging & dataset loading respectively. 
+:class:`rafiki.model.BaseModel` has an attribute ``utils`` that subclasses the model utility class
+:class:`rafiki.model.dataset.ModelDatasetUtils`. It helps with dataset loading. 
 
 Refer to the sample usage in the implementation of `./examples/models/image_classification/TfSingleHiddenLayer.py <https://github.com/nginyc/rafiki/tree/master/examples/models/image_classification/TfSingleHiddenLayer.py>`_.
 
-
 Model Environment
 --------------------------------------------------------------------
 

From 8807408d343b2b587a40a669d54d372e7df2cbe6 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Thu, 6 Dec 2018 20:06:45 +0800
Subject: [PATCH 10/22] Remove "failing install command" TODO that seems to be
 fixed

---
 scripts/start_worker.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/start_worker.py b/scripts/start_worker.py
index c6560ef6..2b400139 100644
--- a/scripts/start_worker.py
+++ b/scripts/start_worker.py
@@ -12,7 +12,6 @@ def start_service(service_id, service_type):
     install_command = os.environ.get('WORKER_INSTALL_COMMAND', '')
     exit_code = os.system(install_command)
     if exit_code != 0: 
-        # TODO: Fix failing install command for `pip install torch==0.4.1;``
         raise Exception('Install command gave non-zero exit code: "{}"'.format(install_command))
 
     if service_type == ServiceType.TRAIN:

From 08bbdfd33a2e393ae2db9dab0e63ac2b8386aa12 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Fri, 7 Dec 2018 13:05:55 +0800
Subject: [PATCH 11/22] Add error throwing when depreceated methods are called
 in model

---
 rafiki/model/model.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/rafiki/model/model.py b/rafiki/model/model.py
index 2b9fb3d6..f2be1aec 100644
--- a/rafiki/model/model.py
+++ b/rafiki/model/model.py
@@ -305,6 +305,19 @@ def _check_model_inst(model_inst):
     if getattr(model_inst, 'utils', None) is None:
         raise Exception('`super().__init__(**knobs)` should be called as the first line of the model\'s `__init__` method.')
 
+    # Throw error when deprecated methods are called
+    def deprecated_func(desc):
+        def throw_error(*args, **kwargs):
+            raise AttributeError(desc)
+        
+        return throw_error
+        
+    model_inst.utils.log = deprecated_func('`self.utils.log(...)` has been changed to `self.logger.log(...)`')
+    model_inst.utils.log_metrics = deprecated_func('`self.utils.log_metrics(...)` has been changed to `self.logger.log(...)`')
+    model_inst.utils.define_plot = deprecated_func('`self.utils.define_plot(...)` has been renamed to `self.logger.define_plot(...)`')
+    model_inst.utils.define_loss_plot = deprecated_func('`self.utils.define_loss_plot(...)` has been renamed to `self.logger.define_loss_plot(...)`')
+    model_inst.utils.log_loss_metric = deprecated_func('`self.utils.log_loss_metric(...)` has been renamed to `self.logger.log_loss(...)`')
+
 def _check_knob_config(knob_config):
     if not isinstance(knob_config, dict) or \
         any([(not isinstance(name, str) or not isinstance(knob, BaseKnob)) for (name, knob) in knob_config.items()]):

From 89165ac64cab77ff00e60a4f6fd23bcb0ae5206c Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Fri, 7 Dec 2018 23:39:20 +0800
Subject: [PATCH 12/22] Move logging & dataset loading functionality from
 instance attributes to separate python imports

---
 examples/models/image_classification/SkDt.py  |  8 ++---
 examples/models/image_classification/SkSvm.py |  6 ++--
 .../image_classification/TfFeedForward.py     | 22 ++++++-------
 .../models/image_classification/TfVgg16.py    |  8 ++---
 examples/models/pos_tagging/BigramHmm.py      |  8 ++---
 examples/models/pos_tagging/PyBiLstm.py       | 22 ++++++-------
 rafiki/model/__init__.py                      |  8 ++---
 rafiki/model/dataset.py                       |  1 +
 rafiki/model/log.py                           |  2 ++
 rafiki/model/model.py                         | 33 ++++++++-----------
 rafiki/worker/train.py                        | 12 +++----
 11 files changed, 64 insertions(+), 66 deletions(-)

diff --git a/examples/models/image_classification/SkDt.py b/examples/models/image_classification/SkDt.py
index 5263e921..cd1ffe4d 100644
--- a/examples/models/image_classification/SkDt.py
+++ b/examples/models/image_classification/SkDt.py
@@ -7,7 +7,7 @@
 
 from rafiki.config import APP_MODE
 from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class, \
-                        IntegerKnob, CategoricalKnob
+                        IntegerKnob, CategoricalKnob, dataset_utils, logger
 from rafiki.constants import TaskType, ModelDependency
 
 class SkDt(BaseModel):
@@ -30,7 +30,7 @@ def __init__(self, **knobs):
         )
         
     def train(self, dataset_uri):
-        dataset = self.utils.load_dataset_of_image_files(dataset_uri)
+        dataset = dataset_utils.load_dataset_of_image_files(dataset_uri)
         (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
         X = self._prepare_X(images)
         y = classes
@@ -39,10 +39,10 @@ def train(self, dataset_uri):
         # Compute train accuracy
         preds = self._clf.predict(X)
         accuracy = sum(y == preds) / len(y)
-        self.logger.log('Train accuracy: {}'.format(accuracy))
+        logger.log('Train accuracy: {}'.format(accuracy))
 
     def evaluate(self, dataset_uri):
-        dataset = self.utils.load_dataset_of_image_files(dataset_uri)
+        dataset = dataset_utils.load_dataset_of_image_files(dataset_uri)
         (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
         X = self._prepare_X(images)
         y = classes
diff --git a/examples/models/image_classification/SkSvm.py b/examples/models/image_classification/SkSvm.py
index 35b4b761..894e2565 100644
--- a/examples/models/image_classification/SkSvm.py
+++ b/examples/models/image_classification/SkSvm.py
@@ -7,7 +7,7 @@
 
 from rafiki.config import APP_MODE
 from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class, \
-                        IntegerKnob, CategoricalKnob, FloatKnob
+                        IntegerKnob, CategoricalKnob, FloatKnob, dataset_utils
 from rafiki.constants import TaskType, ModelDependency
 
 class SkSvm(BaseModel):
@@ -34,14 +34,14 @@ def __init__(self, **knobs):
         )
         
     def train(self, dataset_uri):
-        dataset = self.utils.load_dataset_of_image_files(dataset_uri)
+        dataset = dataset_utils.load_dataset_of_image_files(dataset_uri)
         (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
         X = self._prepare_X(images)
         y = classes
         self._clf.fit(X, y)
 
     def evaluate(self, dataset_uri):
-        dataset = self.utils.load_dataset_of_image_files(dataset_uri)
+        dataset = dataset_utils.load_dataset_of_image_files(dataset_uri)
         (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
         X = self._prepare_X(images)
         y = classes
diff --git a/examples/models/image_classification/TfFeedForward.py b/examples/models/image_classification/TfFeedForward.py
index 238fdbd3..fea27cb3 100644
--- a/examples/models/image_classification/TfFeedForward.py
+++ b/examples/models/image_classification/TfFeedForward.py
@@ -9,7 +9,7 @@
 
 from rafiki.config import APP_MODE
 from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class, \
-                        IntegerKnob, CategoricalKnob, FloatKnob
+                        IntegerKnob, CategoricalKnob, FloatKnob, dataset_utils, logger
 from rafiki.constants import TaskType, ModelDependency
 
 class TfFeedForward(BaseModel):
@@ -41,13 +41,13 @@ def train(self, dataset_uri):
         bs = self._knobs.get('batch_size')
         ep = self._knobs.get('epochs')
 
-        self.logger.log('Available devices: {}'.format(str(device_lib.list_local_devices())))
+        logger.log('Available devices: {}'.format(str(device_lib.list_local_devices())))
 
         # Define 2 plots: Loss against time, loss against epochs
-        self.logger.define_loss_plot()
-        self.logger.define_plot('Loss Over Time', ['loss'])
+        logger.define_loss_plot()
+        logger.define_plot('Loss Over Time', ['loss'])
 
-        dataset = self.utils.load_dataset_of_image_files(dataset_uri, image_size=[im_sz, im_sz])
+        dataset = dataset_utils.load_dataset_of_image_files(dataset_uri, image_size=[im_sz, im_sz])
         num_classes = dataset.classes
         (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
         images = np.asarray(images)
@@ -69,13 +69,13 @@ def train(self, dataset_uri):
 
                 # Compute train accuracy
                 (loss, accuracy) = self._model.evaluate(images, classes)
-                self.logger.log('Train loss: {}'.format(loss))
-                self.logger.log('Train accuracy: {}'.format(accuracy))
+                logger.log('Train loss: {}'.format(loss))
+                logger.log('Train accuracy: {}'.format(accuracy))
 
     def evaluate(self, dataset_uri):
         im_sz = self._knobs.get('image_size')
 
-        dataset = self.utils.load_dataset_of_image_files(dataset_uri, image_size=[im_sz, im_sz])
+        dataset = dataset_utils.load_dataset_of_image_files(dataset_uri, image_size=[im_sz, im_sz])
         (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
         images = np.asarray(images)
         classes = np.asarray(classes)
@@ -83,14 +83,14 @@ def evaluate(self, dataset_uri):
         with self._graph.as_default():
             with self._sess.as_default():
                 (loss, accuracy) = self._model.evaluate(images, classes)
-                self.logger.log('Test loss: {}'.format(loss))
+                logger.log('Test loss: {}'.format(loss))
 
         return accuracy
 
     def predict(self, queries):
         im_sz = self._knobs.get('image_size')
 
-        X = self.utils.resize_as_images(queries, image_size=[im_sz, im_sz])
+        X = dataset_utils.resize_as_images(queries, image_size=[im_sz, im_sz])
         with self._graph.as_default():
             with self._sess.as_default():
                 probs = self._model.predict(X)
@@ -137,7 +137,7 @@ def load_parameters(self, params):
 
     def _on_train_epoch_end(self, epoch, logs):
         loss = logs['loss']
-        self.logger.log_loss(loss, epoch)
+        logger.log_loss(loss, epoch)
 
     def _build_model(self, num_classes):
         units = self._knobs.get('hidden_layer_units')
diff --git a/examples/models/image_classification/TfVgg16.py b/examples/models/image_classification/TfVgg16.py
index 9a88bad1..50d36239 100644
--- a/examples/models/image_classification/TfVgg16.py
+++ b/examples/models/image_classification/TfVgg16.py
@@ -9,7 +9,7 @@
 from urllib.parse import urlparse, parse_qs 
 
 from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class, \
-                        IntegerKnob, FloatKnob, CategoricalKnob
+                        IntegerKnob, FloatKnob, CategoricalKnob, dataset_utils
 from rafiki.constants import TaskType, ModelDependency
 from rafiki.config import APP_MODE
 
@@ -37,7 +37,7 @@ def train(self, dataset_uri):
         ep = self._knobs.get('epochs')
         bs = self._knobs.get('batch_size')
 
-        dataset = self.utils.load_dataset_of_image_files(dataset_uri, image_size=[48, 48])
+        dataset = dataset_utils.load_dataset_of_image_files(dataset_uri, image_size=[48, 48])
         num_classes = dataset.classes
         (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
         images = np.asarray(images)
@@ -55,7 +55,7 @@ def train(self, dataset_uri):
                 )
 
     def evaluate(self, dataset_uri):
-        dataset = self.utils.load_dataset_of_image_files(dataset_uri, image_size=[48, 48])
+        dataset = dataset_utils.load_dataset_of_image_files(dataset_uri, image_size=[48, 48])
         (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
         images = np.asarray(images)
         images = np.stack([images] * 3, axis=-1)
@@ -67,7 +67,7 @@ def evaluate(self, dataset_uri):
         return accuracy
 
     def predict(self, queries):
-        images = self.utils.resize_as_images(queries, image_size=[48, 48])
+        images = dataset_utils.resize_as_images(queries, image_size=[48, 48])
         images = np.stack([images] * 3, axis=-1)
         with self._graph.as_default():
             with self._sess.as_default():
diff --git a/examples/models/pos_tagging/BigramHmm.py b/examples/models/pos_tagging/BigramHmm.py
index 15b2cfc0..c1a31fcf 100644
--- a/examples/models/pos_tagging/BigramHmm.py
+++ b/examples/models/pos_tagging/BigramHmm.py
@@ -8,7 +8,7 @@
 import pprint
 import json
 
-from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class
+from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class, logger, dataset_utils
 from rafiki.constants import TaskType
 
 # Min numeric value
@@ -26,14 +26,14 @@ def __init__(self, **knobs):
         super().__init__(**knobs)
 
     def train(self, dataset_uri):
-        dataset = self.utils.load_dataset_of_corpus(dataset_uri)
+        dataset = dataset_utils.load_dataset_of_corpus(dataset_uri)
         (sents_tokens, sents_tags) = zip(*[zip(*sent) for sent in dataset])
         self._num_tags = dataset.tag_num_classes[0]
         (self._trans_probs, self._emiss_probs) = self._compute_probs(self._num_tags, sents_tokens, sents_tags)
-        self.logger.log('No. of tags: {}'.format(self._num_tags))
+        logger.log('No. of tags: {}'.format(self._num_tags))
 
     def evaluate(self, dataset_uri):
-        dataset = self.utils.load_dataset_of_corpus(dataset_uri)
+        dataset = dataset_utils.load_dataset_of_corpus(dataset_uri)
         (sents_tokens, sents_tags) = zip(*[zip(*sent) for sent in dataset])
         (sents_pred_tags) = self._tag_sents(self._num_tags, sents_tokens, self._trans_probs, self._emiss_probs)
         acc = self._compute_accuracy(sents_tags, sents_pred_tags)
diff --git a/examples/models/pos_tagging/PyBiLstm.py b/examples/models/pos_tagging/PyBiLstm.py
index cedc6997..a998aa65 100644
--- a/examples/models/pos_tagging/PyBiLstm.py
+++ b/examples/models/pos_tagging/PyBiLstm.py
@@ -13,7 +13,7 @@
 from torch.utils.data.dataset import Dataset
 
 from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class, \
-                        IntegerKnob, FloatKnob, CategoricalKnob
+                        IntegerKnob, FloatKnob, CategoricalKnob, logger, dataset_utils
 from rafiki.constants import TaskType, ModelDependency
 from rafiki.config import APP_MODE
 
@@ -37,21 +37,21 @@ def __init__(self, **knobs):
         self._knobs = knobs
 
     def train(self, dataset_uri):
-        dataset = self.utils.load_dataset_of_corpus(dataset_uri)
+        dataset = dataset_utils.load_dataset_of_corpus(dataset_uri)
         self._word_dict = self._extract_word_dict(dataset)
         self._tag_count = dataset.tag_num_classes[0] 
 
-        self.logger.log('No. of unique words: {}'.format(len(self._word_dict)))
-        self.logger.log('No. of tags: {}'.format(self._tag_count))
+        logger.log('No. of unique words: {}'.format(len(self._word_dict)))
+        logger.log('No. of tags: {}'.format(self._tag_count))
         
         (self._net, self._optimizer) = self._train(dataset)
         sents_tags = self._predict(dataset)
         acc = self._compute_accuracy(dataset, sents_tags)
 
-        self.logger.log('Train accuracy: {}'.format(acc))
+        logger.log('Train accuracy: {}'.format(acc))
 
     def evaluate(self, dataset_uri):
-        dataset = self.utils.load_dataset_of_corpus(dataset_uri)
+        dataset = dataset_utils.load_dataset_of_corpus(dataset_uri)
         sents_tags = self._predict(dataset)
         acc = self._compute_accuracy(dataset, sents_tags)
         return acc
@@ -138,7 +138,7 @@ def _predict(self, dataset):
 
         Tensor = torch.LongTensor
         if torch.cuda.is_available():
-            self.logger.log('Using CUDA...')
+            logger.log('Using CUDA...')
             net = net.cuda()
             Tensor = torch.cuda.LongTensor
 
@@ -172,14 +172,14 @@ def _train(self, dataset):
         B = math.ceil(len(dataset) / N) # No. of batches
 
         # Define 2 plots: Loss against time, loss against epochs
-        self.logger.define_loss_plot()
-        self.logger.define_plot('Loss Over Time', ['loss'])
+        logger.define_loss_plot()
+        logger.define_plot('Loss Over Time', ['loss'])
 
         (net, optimizer) = self._create_model()
 
         Tensor = torch.LongTensor
         if torch.cuda.is_available():
-            self.logger.log('Using CUDA...')
+            logger.log('Using CUDA...')
             net = net.cuda()
             Tensor = torch.cuda.LongTensor
 
@@ -209,7 +209,7 @@ def _train(self, dataset):
 
                 total_loss += loss.item()
 
-            self.logger.log_loss(loss=(total_loss / B), epoch=epoch)
+            logger.log_loss(loss=(total_loss / B), epoch=epoch)
 
         return (net, optimizer)
 
diff --git a/rafiki/model/__init__.py b/rafiki/model/__init__.py
index 79a2df30..fe124769 100644
--- a/rafiki/model/__init__.py
+++ b/rafiki/model/__init__.py
@@ -1,6 +1,6 @@
 from .model import BaseModel, test_model_class, load_model_class, \
-    parse_model_install_command, InvalidModelClassException, InvalidModelParamsException, \
-    ModelUtils
-from .log import LogType, ModelLogger
+    parse_model_install_command, InvalidModelClassException, InvalidModelParamsException
 from .knob import BaseKnob, CategoricalKnob, IntegerKnob, FloatKnob, \
-                    serialize_knob_config, deserialize_knob_config
\ No newline at end of file
+                    serialize_knob_config, deserialize_knob_config
+from .dataset import dataset_utils, ModelDatasetUtils, CorpusDataset, ImageFilesDataset
+from .log import logger, ModelLogger
\ No newline at end of file
diff --git a/rafiki/model/dataset.py b/rafiki/model/dataset.py
index 6ddce71a..957a29cc 100644
--- a/rafiki/model/dataset.py
+++ b/rafiki/model/dataset.py
@@ -254,3 +254,4 @@ def download_dataset_from_uri(self, dataset_uri):
         return dataset_path
 
 
+dataset_utils = ModelDatasetUtils()
\ No newline at end of file
diff --git a/rafiki/model/log.py b/rafiki/model/log.py
index 6adcd66d..4575c8fa 100644
--- a/rafiki/model/log.py
+++ b/rafiki/model/log.py
@@ -159,3 +159,5 @@ def emit(self, record):
         
     def _print(self, message):
         print('[{}]'.format(__name__), message)
+
+logger = ModelLogger()
\ No newline at end of file
diff --git a/rafiki/model/model.py b/rafiki/model/model.py
index f2be1aec..13c2fc81 100644
--- a/rafiki/model/model.py
+++ b/rafiki/model/model.py
@@ -12,16 +12,11 @@
 from rafiki.constants import TaskType, ModelDependency
 
 from .dataset import ModelDatasetUtils
-from .log import ModelLogger
 from .knob import BaseKnob, serialize_knob_config, deserialize_knob_config
 
 class InvalidModelClassException(Exception): pass
 class InvalidModelParamsException(Exception): pass
 
-class ModelUtils(ModelDatasetUtils):
-    def __init__(self):
-        ModelDatasetUtils.__init__(self)
-
 class BaseModel(abc.ABC):
     '''
     Rafiki's base model class that Rafiki models should extend. 
@@ -48,8 +43,7 @@ def __init__(self, **knobs):
     :type knobs: dict[str, any]
     '''   
     def __init__(self, **knobs):
-        self.logger = ModelLogger()
-        self.utils = ModelUtils()
+        pass
 
     @staticmethod
     def get_knob_config():
@@ -291,9 +285,6 @@ def _check_model_class(py_model_class):
     if not issubclass(py_model_class, BaseModel):
         raise Exception('Model should extend `rafiki.model.BaseModel`')
 
-    if inspect.isfunction(getattr(py_model_class, 'get_predict_label_mapping', None)):
-        _warn('`get_predict_label_mapping` has been deprecated')
-    
     if inspect.isfunction(getattr(py_model_class, 'init', None)):
         _warn('`init` has been deprecated - use `__init__` for your model\'s initialization logic instead')
 
@@ -302,21 +293,25 @@ def _check_model_class(py_model_class):
         _warn('`get_knob_config` has been changed to a `@staticmethod`')
 
 def _check_model_inst(model_inst):
-    if getattr(model_inst, 'utils', None) is None:
-        raise Exception('`super().__init__(**knobs)` should be called as the first line of the model\'s `__init__` method.')
-
     # Throw error when deprecated methods are called
     def deprecated_func(desc):
         def throw_error(*args, **kwargs):
             raise AttributeError(desc)
         
         return throw_error
-        
-    model_inst.utils.log = deprecated_func('`self.utils.log(...)` has been changed to `self.logger.log(...)`')
-    model_inst.utils.log_metrics = deprecated_func('`self.utils.log_metrics(...)` has been changed to `self.logger.log(...)`')
-    model_inst.utils.define_plot = deprecated_func('`self.utils.define_plot(...)` has been renamed to `self.logger.define_plot(...)`')
-    model_inst.utils.define_loss_plot = deprecated_func('`self.utils.define_loss_plot(...)` has been renamed to `self.logger.define_loss_plot(...)`')
-    model_inst.utils.log_loss_metric = deprecated_func('`self.utils.log_loss_metric(...)` has been renamed to `self.logger.log_loss(...)`')
+
+    class DeprecatedModelUtils():
+        log = deprecated_func('`self.utils.log(...)` has been moved to `logger.log(...)`')
+        log_metrics = deprecated_func('`self.utils.log_metrics(...)` has been moved to `logger.log(...)`')
+        define_plot = deprecated_func('`self.utils.define_plot(...)` has been moved to `logger.define_plot(...)`')
+        define_loss_plot = deprecated_func('`self.utils.define_loss_plot(...)` has been moved to `logger.define_loss_plot(...)`')
+        log_loss_metric = deprecated_func('`self.utils.log_loss_metric(...)` has been moved to `logger.log_loss(...)`')
+        load_dataset_of_image_files = deprecated_func('`self.utils.load_dataset_of_image_files(...)` has been moved to `dataset_utils.load_dataset_of_image_files(...)`')
+        load_dataset_of_corpus = deprecated_func('`self.utils.load_dataset_of_corpus(...)` has been moved to `dataset_utils.load_dataset_of_corpus(...)`')
+        resize_as_images = deprecated_func('`self.utils.resize_as_images(...)` has been moved to `dataset_utils.resize_as_images(...)`')
+        download_dataset_from_uri = deprecated_func('`self.utils.download_dataset_from_uri(...)` has been moved to `dataset_utils.download_dataset_from_uri(...)`')
+
+    model_inst.utils = DeprecatedModelUtils()
 
 def _check_knob_config(knob_config):
     if not isinstance(knob_config, dict) or \
diff --git a/rafiki/worker/train.py b/rafiki/worker/train.py
index 3894e90c..18ac4459 100644
--- a/rafiki/worker/train.py
+++ b/rafiki/worker/train.py
@@ -7,7 +7,7 @@
 
 from rafiki.config import SUPERADMIN_EMAIL, SUPERADMIN_PASSWORD
 from rafiki.constants import TrainJobStatus, TrialStatus, BudgetType
-from rafiki.model import load_model_class, serialize_knob_config, LogType
+from rafiki.model import load_model_class, serialize_knob_config, logger as model_logger
 from rafiki.db import Database
 from rafiki.client import Client
 
@@ -145,11 +145,11 @@ def _train_and_evaluate_model(self, clazz, knobs, train_dataset_uri, \
         log_handler = ModelLoggerHandler(handle_log)
         root_logger = logging.getLogger()
         root_logger.addHandler(log_handler)
-        logger = logging.getLogger('{}.trial'.format(__name__))
-        logger.setLevel(logging.INFO)
-        logger.propagate = False # Avoid duplicate logs in root logger
-        logger.addHandler(log_handler)
-        model_inst.logger.set_logger(logger)
+        py_model_logger = logging.getLogger('{}.trial'.format(__name__))
+        py_model_logger.setLevel(logging.INFO)
+        py_model_logger.propagate = False # Avoid duplicate logs in root logger
+        py_model_logger.addHandler(log_handler)
+        model_logger.set_logger(py_model_logger)
 
         # Train model
         model_inst.train(train_dataset_uri)

From 743928b83ba0ea956cc8cafc7a169d2279e070ad Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Fri, 7 Dec 2018 23:39:46 +0800
Subject: [PATCH 13/22] Update docs on changes in doing logging & dataset
 loading

---
 docs/src/python/rafiki.model.rst  |   6 +-
 docs/src/user/creating-models.rst |  15 ++-
 rafiki/model/dataset.py           | 181 ++++++++++++++++--------------
 rafiki/model/log.py               |  19 ++++
 4 files changed, 128 insertions(+), 93 deletions(-)

diff --git a/docs/src/python/rafiki.model.rst b/docs/src/python/rafiki.model.rst
index 87f3d85f..0c8eaed4 100644
--- a/docs/src/python/rafiki.model.rst
+++ b/docs/src/python/rafiki.model.rst
@@ -36,9 +36,9 @@ Utility Classes & Methods
 .. autoclass:: rafiki.model.ModelLogger
     :members:
 
-.. autoclass:: rafiki.model.dataset.ModelDatasetUtils
+.. autoclass:: rafiki.model.ModelDatasetUtils
     :members:
 
-.. autoclass:: rafiki.model.dataset.ImageFilesDataset
+.. autoclass:: rafiki.model.ImageFilesDataset
 
-.. autoclass:: rafiki.model.dataset.CorpusDataset
\ No newline at end of file
+.. autoclass:: rafiki.model.CorpusDataset
\ No newline at end of file
diff --git a/docs/src/user/creating-models.rst b/docs/src/user/creating-models.rst
index 0e2bcd7b..68684e55 100644
--- a/docs/src/user/creating-models.rst
+++ b/docs/src/user/creating-models.rst
@@ -14,7 +14,8 @@ To submit the model to Rafiki, use the :meth:`rafiki.client.Client.create_model`
 Implementing Models
 --------------------------------------------------------------------
 
-Details on how to implement a model are located in the documentation of :class:`rafiki.model.BaseModel`.
+Full details on how to implement a model are located in the documentation of :class:`rafiki.model.BaseModel`,
+and sample model implementations are located in `./examples/models/ <https://github.com/nginyc/rafiki/tree/master/examples/models/>`_.
 
 In defining the hyperparameters (knobs) of a model, refer to the documentation at :ref:`knob-types` for the full list of knob types.
 
@@ -25,19 +26,21 @@ it is likely to work on Rafiki.
 Logging in Models
 --------------------------------------------------------------------
 
-:class:`rafiki.model.BaseModel` has an attribute ``logger`` that is of the class :class:`rafiki.model.ModelLogger`. 
-It allows you to log messages and metrics while your model is being trained, and you can 
+By importing the global ``logger`` instance in the ``rafiki.model`` module, 
+you can log messages and metrics while your model is being trained, and you can 
 define plots to visualize your model's training on Rafiki's Admin Web interface.
 
+Refer to :class:`rafiki.model.ModelLogger` for full usage instructions.
+
 .. seealso:: :ref:`using-admin-web` 
 
 Dataset Loading in Models
 --------------------------------------------------------------------
 
-:class:`rafiki.model.BaseModel` has an attribute ``utils`` that subclasses the model utility class
-:class:`rafiki.model.dataset.ModelDatasetUtils`. It helps with dataset loading. 
+The global ``dataset_utils`` instance in the ``rafiki.model`` module provides
+a set of built-in dataset loading methods for common dataset types on Rafiki.
 
-Refer to the sample usage in the implementation of `./examples/models/image_classification/TfSingleHiddenLayer.py <https://github.com/nginyc/rafiki/tree/master/examples/models/image_classification/TfSingleHiddenLayer.py>`_.
+Refer to :class:`rafiki.model.ModelDatasetUtils` for full usage instructions.
 
 Model Environment
 --------------------------------------------------------------------
diff --git a/rafiki/model/dataset.py b/rafiki/model/dataset.py
index 957a29cc..2a78f34c 100644
--- a/rafiki/model/dataset.py
+++ b/rafiki/model/dataset.py
@@ -22,6 +22,103 @@ class InvalidDatasetProtocolException(Exception): pass
 class InvalidDatasetTypeException(Exception): pass 
 class InvalidDatasetFormatException(Exception): pass 
 
+class ModelDatasetUtils():
+    '''
+    Collection of utility methods to help with the loading of datasets.
+
+    To use these utility methods, import the global ``dataset_utils`` instance from the module ``rafiki.model``.
+
+    For example:
+
+    ::
+
+        from rafiki.model import dataset_utils
+        ...
+        def train(self, dataset_uri):
+            ...
+            dataset_utils.load_dataset_of_image_files(dataset_uri)
+            ...
+    '''   
+    
+    def __init__(self):
+        # Caches downloaded datasets
+        self._dataset_uri_to_path = {}
+
+    def load_dataset_of_corpus(self, dataset_uri, tags=['tag'], split_by='\\n'):
+        '''
+            Loads dataset with type `CORPUS`.
+            
+            :param str dataset_uri: URI of the dataset file
+            :returns: An instance of ``CorpusDataset``.
+        '''
+        dataset_path = self.download_dataset_from_uri(dataset_uri)
+        return CorpusDataset(dataset_path, tags, split_by)
+
+    def load_dataset_of_image_files(self, dataset_uri, image_size=None):
+        '''
+            Loads dataset with type `IMAGE_FILES`.
+
+            :param str dataset_uri: URI of the dataset file
+            :param str image_size: dimensions to resize all images to (None for no resizing)
+            :returns: An instance of ``ImageFilesDataset``.
+        '''
+        dataset_path = self.download_dataset_from_uri(dataset_uri)
+        return ImageFilesDataset(dataset_path, image_size)
+
+    def resize_as_images(self, images, image_size):
+        '''
+            Resize a list of N grayscale images to another size.
+
+            :param int[][][] images: images to resize as a N x 2D lists (grayscale)
+            :param int image_size: dimensions to resize all images to (None for no resizing)
+            :returns: images as N x 2D numpy arrays
+        '''
+        images = [Image.fromarray(np.asarray(x, dtype=np.uint8)) for x in images]
+        images = [np.asarray(x.resize(image_size)) for x in images]
+        return np.asarray(images)
+                
+    def download_dataset_from_uri(self, dataset_uri):
+        '''
+            Maybe download the dataset at URI, ensuring that the dataset ends up in the local filesystem.
+
+            :param str dataset_uri: URI of the dataset file
+            :returns: file path of the dataset file in the local filesystem
+        '''
+        if dataset_uri in self._dataset_uri_to_path:
+            return self._dataset_uri_to_path[dataset_uri]
+
+        dataset_path = None
+
+        parsed_uri = urlparse(dataset_uri)
+        protocol = '{uri.scheme}'.format(uri=parsed_uri).lower().strip()
+
+        # Download dataset over HTTP/HTTPS
+        if protocol == 'http' or protocol == 'https':
+
+            r = requests.get(dataset_uri, stream=True)
+            temp_file = tempfile.NamedTemporaryFile(delete=False)
+
+            # Show a progress bar while downloading
+            total_size = int(r.headers.get('content-length', 0)); 
+            block_size = 1024
+            iters = math.ceil(total_size / block_size) 
+            for data in tqdm(r.iter_content(block_size), total=iters, unit='KB'):
+                temp_file.write(data)
+                
+            temp_file.close()
+            
+            dataset_path = temp_file.name
+
+        # Assume it is on filesystem
+        elif protocol == '' or protocol == 'file':
+            dataset_path = dataset_uri
+        else:
+            raise InvalidDatasetProtocolException()
+
+        # Cache dataset path to possibly prevent re-downloading
+        self._dataset_uri_to_path[dataset_uri] = dataset_path
+        return dataset_path
+
 class ModelDataset():
     '''
     Abstract that helps loading of dataset of a specific type
@@ -170,88 +267,4 @@ def _load(self, dataset_path):
 
         return (num_samples, num_classes, image_paths, image_classes, dataset_dir)
 
-class ModelDatasetUtils():
-    '''
-    Collection of utility methods to help with the loading of datasets
-    '''   
-    def __init__(self):
-        # Caches downloaded datasets
-        self._dataset_uri_to_path = {}
-
-    def load_dataset_of_corpus(self, dataset_uri, tags=['tag'], split_by='\\n'):
-        '''
-            Loads dataset with type `CORPUS`.
-            
-            :param str dataset_uri: URI of the dataset file
-            :returns: An instance of ``CorpusDataset``.
-        '''
-        dataset_path = self.download_dataset_from_uri(dataset_uri)
-        return CorpusDataset(dataset_path, tags, split_by)
-
-    def load_dataset_of_image_files(self, dataset_uri, image_size=None):
-        '''
-            Loads dataset with type `IMAGE_FILES`.
-
-            :param str dataset_uri: URI of the dataset file
-            :param str image_size: dimensions to resize all images to (None for no resizing)
-            :returns: An instance of ``ImageFilesDataset``.
-        '''
-        dataset_path = self.download_dataset_from_uri(dataset_uri)
-        return ImageFilesDataset(dataset_path, image_size)
-
-    def resize_as_images(self, images, image_size):
-        '''
-            Resize a list of N grayscale images to another size.
-
-            :param int[][][] images: images to resize as a N x 2D lists (grayscale)
-            :param int image_size: dimensions to resize all images to (None for no resizing)
-            :returns: images as N x 2D numpy arrays
-        '''
-        images = [Image.fromarray(np.asarray(x, dtype=np.uint8)) for x in images]
-        images = [np.asarray(x.resize(image_size)) for x in images]
-        return np.asarray(images)
-                
-    def download_dataset_from_uri(self, dataset_uri):
-        '''
-            Maybe download the dataset at URI, ensuring that the dataset ends up in the local filesystem.
-
-            :param str dataset_uri: URI of the dataset file
-            :returns: file path of the dataset file in the local filesystem
-        '''
-        if dataset_uri in self._dataset_uri_to_path:
-            return self._dataset_uri_to_path[dataset_uri]
-
-        dataset_path = None
-
-        parsed_uri = urlparse(dataset_uri)
-        protocol = '{uri.scheme}'.format(uri=parsed_uri).lower().strip()
-
-        # Download dataset over HTTP/HTTPS
-        if protocol == 'http' or protocol == 'https':
-
-            r = requests.get(dataset_uri, stream=True)
-            temp_file = tempfile.NamedTemporaryFile(delete=False)
-
-            # Show a progress bar while downloading
-            total_size = int(r.headers.get('content-length', 0)); 
-            block_size = 1024
-            iters = math.ceil(total_size / block_size) 
-            for data in tqdm(r.iter_content(block_size), total=iters, unit='KB'):
-                temp_file.write(data)
-                
-            temp_file.close()
-            
-            dataset_path = temp_file.name
-
-        # Assume it is on filesystem
-        elif protocol == '' or protocol == 'file':
-            dataset_path = dataset_uri
-        else:
-            raise InvalidDatasetProtocolException()
-
-        # Cache dataset path to possibly prevent re-downloading
-        self._dataset_uri_to_path[dataset_uri] = dataset_path
-        return dataset_path
-
-
 dataset_utils = ModelDatasetUtils()
\ No newline at end of file
diff --git a/rafiki/model/log.py b/rafiki/model/log.py
index 4575c8fa..6a6afbc5 100644
--- a/rafiki/model/log.py
+++ b/rafiki/model/log.py
@@ -12,6 +12,25 @@ class LogType():
     MESSAGE = 'MESSAGE'
 
 class ModelLogger():
+    '''
+    Allows models to log messages and metrics during model training, and 
+    define plots for visualization of model training.
+
+    To use this logger, import the global ``logger`` instance from the module ``rafiki.model``.
+
+    For example:
+
+    ::
+
+        from rafiki.model import logger
+        ...
+        def train(self, dataset_uri):
+            ...
+            logger.log('Starting model training...')
+            ...
+
+    '''
+    
     def __init__(self):        
         # By default, set a logging handler to print to stdout (for debugging)
         logger = logging.getLogger(__name__)

From 65ca9b165d88a807d4d83aeaa85284d02a3e7e06 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Sat, 8 Dec 2018 21:29:32 +0800
Subject: [PATCH 14/22] Add `FixedKnob`

---
 .../image_classification/TfFeedForward.py     |  4 +--
 rafiki/advisor/btb_gp_advisor.py              | 14 ++++++--
 rafiki/model/__init__.py                      |  2 +-
 rafiki/model/knob.py                          | 36 ++++++++++++++++++-
 4 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/examples/models/image_classification/TfFeedForward.py b/examples/models/image_classification/TfFeedForward.py
index fea27cb3..1ce4cb4f 100644
--- a/examples/models/image_classification/TfFeedForward.py
+++ b/examples/models/image_classification/TfFeedForward.py
@@ -9,7 +9,7 @@
 
 from rafiki.config import APP_MODE
 from rafiki.model import BaseModel, InvalidModelParamsException, test_model_class, \
-                        IntegerKnob, CategoricalKnob, FloatKnob, dataset_utils, logger
+                        IntegerKnob, CategoricalKnob, FloatKnob, FixedKnob, dataset_utils, logger
 from rafiki.constants import TaskType, ModelDependency
 
 class TfFeedForward(BaseModel):
@@ -25,7 +25,7 @@ def get_knob_config():
             'hidden_layer_units': IntegerKnob(2, 128),
             'learning_rate': FloatKnob(1e-5, 1e-1, is_exp=True),
             'batch_size': CategoricalKnob([16, 32, 64, 128]),
-            'image_size': CategoricalKnob([8, 16, 32]),
+            'image_size': FixedKnob(32)
         }
 
     def __init__(self, **knobs):
diff --git a/rafiki/advisor/btb_gp_advisor.py b/rafiki/advisor/btb_gp_advisor.py
index ee02305a..3af2e013 100644
--- a/rafiki/advisor/btb_gp_advisor.py
+++ b/rafiki/advisor/btb_gp_advisor.py
@@ -1,7 +1,7 @@
 from btb.tuning import GP
 from btb import HyperParameter, ParamTypes
 
-from rafiki.model import BaseKnob, FloatKnob, IntegerKnob, CategoricalKnob
+from rafiki.model import BaseKnob, FloatKnob, IntegerKnob, CategoricalKnob, FixedKnob
 from .advisor import BaseAdvisor
 
 class BtbGpAdvisor(BaseAdvisor):
@@ -40,6 +40,15 @@ def _knob_to_tunable(knob):
             return HyperParameter(ParamTypes.STRING, knob.values)
         elif knob.value_type is bool:
             return HyperParameter(ParamTypes.BOOL, knob.values)
+    elif isinstance(knob, FixedKnob):
+        if knob.value_type is int:
+            return HyperParameter(ParamTypes.INT_CAT, [knob.value])
+        elif knob.value_type is float:
+            return HyperParameter(ParamTypes.FLOAT_CAT, [knob.value])
+        elif knob.value_type is str:
+            return HyperParameter(ParamTypes.STRING, [knob.value])
+        elif knob.value_type is bool:
+            return HyperParameter(ParamTypes.BOOL, [knob.value])
     elif isinstance(knob, IntegerKnob):
         if knob.is_exp:
             return HyperParameter(ParamTypes.INT_EXP, [knob.value_min, knob.value_max])
@@ -49,4 +58,5 @@ def _knob_to_tunable(knob):
         if knob.is_exp:
             return HyperParameter(ParamTypes.FLOAT_EXP, [knob.value_min, knob.value_max])
         else:
-            return HyperParameter(ParamTypes.FLOAT, [knob.value_min, knob.value_max])
\ No newline at end of file
+            return HyperParameter(ParamTypes.FLOAT, [knob.value_min, knob.value_max])
+    
\ No newline at end of file
diff --git a/rafiki/model/__init__.py b/rafiki/model/__init__.py
index fe124769..fd8cebb5 100644
--- a/rafiki/model/__init__.py
+++ b/rafiki/model/__init__.py
@@ -1,6 +1,6 @@
 from .model import BaseModel, test_model_class, load_model_class, \
     parse_model_install_command, InvalidModelClassException, InvalidModelParamsException
-from .knob import BaseKnob, CategoricalKnob, IntegerKnob, FloatKnob, \
+from .knob import BaseKnob, CategoricalKnob, IntegerKnob, FloatKnob, FixedKnob, \
                     serialize_knob_config, deserialize_knob_config
 from .dataset import dataset_utils, ModelDatasetUtils, CorpusDataset, ImageFilesDataset
 from .log import logger, ModelLogger
\ No newline at end of file
diff --git a/rafiki/model/knob.py b/rafiki/model/knob.py
index 7ab10d07..ee8b2173 100644
--- a/rafiki/model/knob.py
+++ b/rafiki/model/knob.py
@@ -26,7 +26,7 @@ def from_json(cls, json_str):
 
         knob_type = json_dict['type']
         knob_args = json_dict['args']
-        knob_classes = [CategoricalKnob, IntegerKnob, FloatKnob]
+        knob_classes = [CategoricalKnob, IntegerKnob, FloatKnob, FixedKnob]
         for clazz in knob_classes:
             if clazz.__name__ == knob_type:
                 return clazz(**knob_args)
@@ -73,6 +73,40 @@ def _validate_values(values):
 
         return (value_type)
 
+class FixedKnob(BaseKnob):
+    '''
+    Knob type representing a single fixed value of type ``int``, ``float``, ``bool`` or ``str``.
+    Essentially, this represents a knob that does not require tuning.
+    '''
+    def __init__(self, value):
+        knob_args = { 'value': value }
+        super().__init__(knob_args)
+        self._value = value
+        (self._value_type) = self._validate_value(value)
+
+    @property
+    def value_type(self):
+        return self._value_type
+
+    @property
+    def value(self):
+        return self._value
+
+    @staticmethod
+    def _validate_value(value):
+        if isinstance(value, int):
+            value_type = int
+        elif isinstance(value, float):
+            value_type = float
+        elif isinstance(value, bool):
+            value_type = bool
+        elif isinstance(value, str):
+            value_type = str
+        else:
+            raise TypeError('Only the following types for `value` are supported: `int`, `float`, `bool`, `str`')
+        
+        return (value_type)
+
 class IntegerKnob(BaseKnob):
     '''
     Knob type epresenting `any` ``int`` value within a specific interval [``value_min``, ``value_max``].

From 973a84a1ba05d82e5c45b794dc045219261544bf Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Sat, 8 Dec 2018 21:29:51 +0800
Subject: [PATCH 15/22] Gitignore IPython notebook files

---
 .gitignore | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 733de8c6..71a1103f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,4 +25,8 @@ data/*
 # Logs
 *.log
 logs/*
-!logs/.gitkeep
\ No newline at end of file
+!logs/.gitkeep
+
+# IPython notebooks
+.ipynb_checkpoints/*
+*.ipynb
\ No newline at end of file

From 5a7f261cc567d3c4e20d0389f4f43fa0a227d2f8 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Wed, 12 Dec 2018 20:12:40 +0800
Subject: [PATCH 16/22] Add GPU-aware train worker placement & node
 configuration

---
 rafiki/admin/services_manager.py      | 20 +++++++++++-----
 rafiki/container/__init__.py          |  2 +-
 rafiki/container/container_manager.py | 14 +++++++++---
 rafiki/container/docker_swarm.py      | 29 +++++++++++++++++++----
 rafiki/db/database.py                 |  5 ++--
 rafiki/db/schema.py                   |  3 ++-
 scripts/setup_node.sh                 | 33 +++++++++++++++++++++++++++
 scripts/start.sh                      |  5 ++++
 8 files changed, 94 insertions(+), 17 deletions(-)
 create mode 100644 scripts/setup_node.sh

diff --git a/rafiki/admin/services_manager.py b/rafiki/admin/services_manager.py
index b71058a8..333790fe 100644
--- a/rafiki/admin/services_manager.py
+++ b/rafiki/admin/services_manager.py
@@ -8,7 +8,7 @@
 from rafiki.config import MIN_SERVICE_PORT, MAX_SERVICE_PORT, \
     TRAIN_WORKER_REPLICAS_PER_MODEL, INFERENCE_WORKER_REPLICAS_PER_TRIAL, \
     INFERENCE_MAX_BEST_TRIALS, SERVICE_STATUS_WAIT
-from rafiki.container import DockerSwarmContainerManager 
+from rafiki.container import DockerSwarmContainerManager, ServiceRequirement, InvalidServiceRequest
 from rafiki.model import parse_model_install_command
 
 logger = logging.getLogger(__name__)
@@ -185,11 +185,16 @@ def _create_train_job_worker(self, train_job, model, replicas):
             **({'CUDA_VISIBLE_DEVICES': -1} if not enable_gpu else {}) # Hide GPU if not enabled
         }
 
+        requirements = []
+        if enable_gpu:
+            requirements.append(ServiceRequirement.GPU)
+
         service = self._create_service(
             service_type=service_type,
             docker_image=model.docker_image,
             replicas=replicas,
-            environment_vars=environment_vars
+            environment_vars=environment_vars,
+            requirements=requirements
         )
 
         self._db.create_train_job_worker(
@@ -241,14 +246,15 @@ def _wait_until_services_running(self, services):
 
     def _create_service(self, service_type, docker_image,
                         replicas, environment_vars={}, args=[], 
-                        container_port=None):
+                        container_port=None, requirements=[]):
         
         # Create service in DB
         container_manager_type = type(self._container_manager).__name__
         service = self._db.create_service(
             container_manager_type=container_manager_type,
             service_type=service_type,
-            docker_image=docker_image
+            docker_image=docker_image,
+            requirements=requirements
         )
         self._db.commit()
 
@@ -284,7 +290,8 @@ def _create_service(self, service_type, docker_image,
                 args=args,
                 environment_vars=environment_vars,
                 mounts=mounts,
-                publish_port=publish_port
+                publish_port=publish_port,
+                requirements=requirements
             )
             
             container_service_id = container_service['id']
@@ -303,11 +310,12 @@ def _create_service(self, service_type, docker_image,
             )
             self._db.commit()
 
-        except Exception:
+        except Exception as e:
             logger.error('Error while creating service with ID {}'.format(service.id))
             logger.error(traceback.format_exc())
             self._db.mark_service_as_errored(service)
             self._db.commit()
+            raise e
 
         return service
 
diff --git a/rafiki/container/__init__.py b/rafiki/container/__init__.py
index f76a4d2b..93904798 100644
--- a/rafiki/container/__init__.py
+++ b/rafiki/container/__init__.py
@@ -1,2 +1,2 @@
 from .container_manager import ContainerManager
-from .docker_swarm import DockerSwarmContainerManager
\ No newline at end of file
+from .docker_swarm import DockerSwarmContainerManager, ServiceRequirement, InvalidServiceRequest
\ No newline at end of file
diff --git a/rafiki/container/container_manager.py b/rafiki/container/container_manager.py
index 4b7a939e..22152041 100644
--- a/rafiki/container/container_manager.py
+++ b/rafiki/container/container_manager.py
@@ -1,13 +1,20 @@
 import abc
 import os
 
+class InvalidServiceRequest(Exception):
+    pass
+
+class ServiceRequirement():
+    GPU = 'gpu'
+
 class ContainerManager(abc.ABC):
     def __init__(self, **kwargs):
         raise NotImplementedError()
 
     @abc.abstractmethod
-    def create_service(self, service_name, docker_image, 
-        replicas, args, environment_vars, mounts={}, publish_port=None):
+    def create_service(self, service_name, docker_image, replicas, 
+                        args, environment_vars, mounts={}, publish_port=None,
+                        requirements=[]):
         '''
             Creates a service with a set number of replicas.
 
@@ -23,7 +30,8 @@ def create_service(self, service_name, docker_image,
                 mounts: {String: String} - Dict of host directory to container directory for mounting of volumes onto container
                 publish_port: (<host_port>, <container_port>) - host port (port to be published) to container port 
                     The service should then be reachable at the host port on the host
-
+                requirements: [ServiceRequirement] - List of requirements for the service
+                
             Returns {String: String} where
                 id: String - ID for the service created
                 hostname: String - Hostname for the service created (in the internal network)
diff --git a/rafiki/container/docker_swarm.py b/rafiki/container/docker_swarm.py
index 2d21f0f5..50d41f41 100644
--- a/rafiki/container/docker_swarm.py
+++ b/rafiki/container/docker_swarm.py
@@ -4,7 +4,7 @@
 import docker
 import logging
 
-from .container_manager import ContainerManager
+from .container_manager import ContainerManager, ServiceRequirement, InvalidServiceRequest
 
 logger = logging.getLogger(__name__)
 
@@ -15,7 +15,9 @@ def __init__(self,
         self._client = docker.from_env()
 
     def create_service(self, service_name, docker_image, replicas, 
-                        args, environment_vars, mounts={}, publish_port=None):
+                        args, environment_vars, mounts={}, publish_port=None,
+                        requirements=[]):
+            
         env = [
             '{}={}'.format(k, v)
             for (k, v) in environment_vars.items()
@@ -38,6 +40,16 @@ def create_service(self, service_name, docker_image, replicas,
                 'PublishedPort': published_port, 
                 'TargetPort': container_port
             }]
+
+        # Gather list of constraints
+        constraints = []
+        if ServiceRequirement.GPU in requirements:
+            # Check if there are nodes with GPU, raise error otherwise
+            has_gpu = self._if_any_node_has_gpu()
+            if not has_gpu:
+                raise InvalidServiceRequest('There are no nodes with GPU to deploy the service on')
+
+            constraints.append('node.labels.gpu!=0')
         
         service = self._client.services.create(
             image=docker_image,
@@ -50,6 +62,7 @@ def create_service(self, service_name, docker_image, replicas,
             restart_policy={
                 'Condition': 'on-failure'
             },
+            constraints=constraints,
             endpoint_spec={
                 'Ports': ports_list
             },
@@ -84,5 +97,13 @@ def destroy_service(self, service_id):
         service.remove()
 
         logger.info('Deleted service of ID {}'.format(service_id))
-        
-        
\ No newline at end of file
+                
+    def _if_any_node_has_gpu(self):
+        nodes = self._client.nodes.list()
+        has_gpu = False
+        for node in nodes:
+            gpu = int(node.attrs.get('Spec', {}).get('Labels', {}).get('gpu', 0))
+            if gpu > 0:
+                has_gpu = True
+
+        return has_gpu
\ No newline at end of file
diff --git a/rafiki/db/database.py b/rafiki/db/database.py
index 5ec9c58a..8bef9e8e 100644
--- a/rafiki/db/database.py
+++ b/rafiki/db/database.py
@@ -227,11 +227,12 @@ def get_workers_of_inference_job(self, inference_job_id):
     ####################################
 
     def create_service(self, service_type, container_manager_type, 
-                        docker_image):
+                        docker_image, requirements):
         service = Service(
             service_type=service_type,
             docker_image=docker_image,
-            container_manager_type=container_manager_type
+            container_manager_type=container_manager_type,
+            requirements=requirements
         )
         self._session.add(service)
         return service
diff --git a/rafiki/db/schema.py b/rafiki/db/schema.py
index a8150222..a53e3d66 100644
--- a/rafiki/db/schema.py
+++ b/rafiki/db/schema.py
@@ -1,6 +1,6 @@
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy import Column, String, Float, ForeignKey, Integer, Binary, DateTime
-from sqlalchemy.dialects.postgresql import JSON
+from sqlalchemy.dialects.postgresql import JSON, ARRAY
 import uuid
 import datetime
 
@@ -63,6 +63,7 @@ class Service(Base):
     port = Column(Integer)
     container_service_name = Column(String)
     container_service_id = Column(String)
+    requirements = Column(ARRAY(String))
 
 class TrainJob(Base):
     __tablename__ = 'train_job'
diff --git a/scripts/setup_node.sh b/scripts/setup_node.sh
new file mode 100644
index 00000000..a0044fa7
--- /dev/null
+++ b/scripts/setup_node.sh
@@ -0,0 +1,33 @@
+# Determine whether node is Docker Swarm manager or worker
+swarm_role=$1
+if [ -z "$swarm_role" ] 
+then
+    while true; do
+        read -p "Is this node a Docker Swarm manager running Rafiki? (y/n) " yn
+        case $yn in
+            [Yy]* ) swarm_role="manager" ; break;;
+            [Nn]* ) swarm_role="worker" ; break;;
+            * ) echo "Please answer yes or no.";;
+        esac
+    done
+fi
+
+# For workers, join Docker Swarm
+if [ "$swarm_role" = "worker" ]
+then
+    read -p "IP address of Docker Swarm manager? " ip_addr    
+    read -p "Docker Swarm join token? " join_token
+    docker swarm leave $1
+    docker swarm join --token $join_token $ip_addr
+fi
+
+# Add node label that specifies no. of GPUs
+hostname=$(docker node inspect self | sed -n 's/"Hostname".*"\(.*\)".*/\1/p' | xargs)
+while true; do
+    read -p "No. of GPUs (0-9)? " gpus
+    case $gpus in
+        [0-9] ) break;;
+        * ) echo "Please answer a integer from 0-9.";;
+    esac
+done
+docker node update --label-add gpu=$gpus $hostname
\ No newline at end of file
diff --git a/scripts/start.sh b/scripts/start.sh
index 49af49e1..d739a8fb 100644
--- a/scripts/start.sh
+++ b/scripts/start.sh
@@ -33,6 +33,11 @@ ensure_stable()
 title "Creating Docker swarm for Rafiki..."
 bash $FILE_DIR/create_docker_swarm.sh
 
+# Setup node for Rafiki
+
+title "Setting up node for Rafiki..."
+bash $FILE_DIR/setup_node.sh manager
+
 # Pull images from Docker Hub
 
 title "Pulling images for Rafiki from Docker Hub..."

From 3cd65a7582cfcc13954c76aa26d83cec6f635ac0 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Wed, 12 Dec 2018 20:18:58 +0800
Subject: [PATCH 17/22] Update docs for new node configuration step

---
 docs/src/dev/setup.rst | 14 +++++++++++++-
 scripts/setup_node.sh  |  2 +-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/docs/src/dev/setup.rst b/docs/src/dev/setup.rst
index 0abcbc65..c1e6b988 100644
--- a/docs/src/dev/setup.rst
+++ b/docs/src/dev/setup.rst
@@ -36,7 +36,19 @@ Adding Nodes to Rafiki
 
 Rafiki has with its dynamic stack (e.g. train workers, inference workes, predictors) 
 running as `Docker Swarm Services <https://docs.docker.com/engine/swarm/services/>`_.
-Horizontal scaling can be done by `adding more nodes to the swarm <https://docs.docker.com/engine/swarm/join-nodes/>`_.
+
+Horizontal scaling can be done by adding more nodes to the swarm. 
+
+Perform the following for *each* worker node to be added:
+
+1. Connect the node to the same network as the master, so that the node can `join the master's Docker Swarm <https://docs.docker.com/engine/swarm/join-nodes/>`_.
+
+2. Configure the node with the script:
+
+    .. code-block:: shell
+
+        bash scripts/setup_node.sh
+
 
 Exposing Rafiki Publicly
 --------------------------------------------------------------------
diff --git a/scripts/setup_node.sh b/scripts/setup_node.sh
index a0044fa7..21ae8c1b 100644
--- a/scripts/setup_node.sh
+++ b/scripts/setup_node.sh
@@ -24,7 +24,7 @@ fi
 # Add node label that specifies no. of GPUs
 hostname=$(docker node inspect self | sed -n 's/"Hostname".*"\(.*\)".*/\1/p' | xargs)
 while true; do
-    read -p "No. of GPUs (0-9)? " gpus
+    read -p "No. of GPUs? (0-9) " gpus
     case $gpus in
         [0-9] ) break;;
         * ) echo "Please answer a integer from 0-9.";;

From ff985db760b7eebd971dc2695d8fcd467b26c836 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Wed, 12 Dec 2018 20:49:02 +0800
Subject: [PATCH 18/22] Clean and correct docs

---
 docs/src/python/rafiki.model.rst  |  3 +++
 docs/src/user/creating-models.rst |  2 +-
 docs/src/user/quickstart.rst      | 20 ++++++++------------
 rafiki/client/client.py           |  3 +--
 rafiki/model/log.py               | 22 +++++++++++++++-------
 5 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/docs/src/python/rafiki.model.rst b/docs/src/python/rafiki.model.rst
index 0c8eaed4..2de46e76 100644
--- a/docs/src/python/rafiki.model.rst
+++ b/docs/src/python/rafiki.model.rst
@@ -27,6 +27,9 @@ Knob Classes
 .. autoclass:: rafiki.model.FloatKnob
     :members:
 
+.. autoclass:: rafiki.model.FixedKnob
+    :members:
+
 
 Utility Classes & Methods
 --------------------------------------------------------------------
diff --git a/docs/src/user/creating-models.rst b/docs/src/user/creating-models.rst
index 68684e55..782a1e8f 100644
--- a/docs/src/user/creating-models.rst
+++ b/docs/src/user/creating-models.rst
@@ -57,7 +57,7 @@ prior to model training and inference. This is configurable with the ``dependenc
 during model creation. 
 
 Alternatively, you can build a custom Docker image that extends ``rafikiai/rafiki_worker``,
-installing the required dependencies for your model. This is configurable with ``docker_image``) option
+installing the required dependencies for your model. This is configurable with ``docker_image`` option
 during model creation.
 
 Models should run at least run on CPU-only machines and optionally leverage on a shared GPU, if it is available.
diff --git a/docs/src/user/quickstart.rst b/docs/src/user/quickstart.rst
index 866f0259..cc1593dc 100644
--- a/docs/src/user/quickstart.rst
+++ b/docs/src/user/quickstart.rst
@@ -7,21 +7,17 @@ Quick Start
 
 .. note::
 
-    If you're a *Model Developer* just looking to contribute models to a running instance of Rafiki, refer to :ref:`quickstart-model-developers`.
+    - If you're a *Model Developer* just looking to contribute models to a running instance of Rafiki, refer to :ref:`quickstart-model-developers`.
+    - If you're an *Application Developer* just looking to train and deploy models on a running instance of Rafiki, refer to :ref:`quickstart-app-developers`.
+    - If you're an *Application User* just looking to make predictions to deployed models on a running instance of Rafiki, refer to :ref:`quickstart-app-users`.
 
-.. note::
-
-    If you're an *Application Developer* just looking to train and deploy models on a running instance of Rafiki, refer to :ref:`quickstart-app-developers`.
-
-.. note::
-
-    If you're an *Application User* just looking to make predictions to deployed models on a running instance of Rafiki, refer to :ref:`quickstart-app-users`.
 
+This guide assumes you have deployed your an empty instance of Rafiki and you want to try a *full* train-inference flow, 
+including adding of models, submitting a train job and submitting a inference job to Rafiki.
 
-This guide assumes you have deployed your an empty instance of Rafiki and you want to do a *full* train-inference flow, 
-including preparation of dataset and adding of models to Rafiki. Below, the sequence of examples submit the 
-`Fashion MNIST dataset <https://github.com/zalandoresearch/fashion-mnist>`_ for training and inference. 
-Alternatively, after installing Rafiki Client's dependencies, you can run `./examples/scripts/client_quickstart.py <https://github.com/nginyc/rafiki/blob/master/examples/scripts/client_quickstart.py>`_.
+The sequence of examples below submits the `Fashion MNIST dataset <https://github.com/zalandoresearch/fashion-mnist>`_ for training and inference. 
+Alternatively, after installing the Rafiki Client's dependencies, you can refer and run the scripted version of this quickstart 
+`./examples/scripts/client_quickstart.py <https://github.com/nginyc/rafiki/blob/master/examples/scripts/client_quickstart.py>`_.
 
 .. note::
 
diff --git a/rafiki/client/client.py b/rafiki/client/client.py
index 0d210f5f..6fbf12c8 100644
--- a/rafiki/client/client.py
+++ b/rafiki/client/client.py
@@ -370,8 +370,7 @@ def create_advisor(self, knob_config_str, advisor_id=None):
         Creates a Rafiki advisor. If `advisor_id` is passed, it will create an advisor
         of that ID, or do nothing if an advisor of that ID has already been created.
 
-        :param knob_config: Knob configuration for advisor session
-        :type knob_config_str: Knob config, serialized
+        :param str knob_config_str: Serialized knob configuration for advisor session
         :param str advisor_id: ID of advisor to create
         '''
         data = self._post('/advisors', target='advisor',
diff --git a/rafiki/model/log.py b/rafiki/model/log.py
index 6a6afbc5..1e8661b2 100644
--- a/rafiki/model/log.py
+++ b/rafiki/model/log.py
@@ -22,12 +22,19 @@ class ModelLogger():
 
     ::
 
-        from rafiki.model import logger
+        from rafiki.model import logger, BaseModel
         ...
-        def train(self, dataset_uri):
-            ...
-            logger.log('Starting model training...')
+        class MyModel(BaseModel):
             ...
+            def train(self, dataset_uri):
+                ...
+                logger.log('Starting model training...')
+                logger.define_plot('Precision & Recall', y_axis=['precision', 'recall'])
+                ...
+                logger.log(precision=0.1, recall=0.6, epoch=1)
+                ...
+                logger.log('Ending model training...')
+                ...
 
     '''
     
@@ -41,14 +48,14 @@ def __init__(self):
     def define_loss_plot(self):
         '''
         Convenience method of defining a plot of ``loss`` against ``epoch``.
-        To be used with :meth:`rafiki.model.ModeLogger.log_loss`.
+        To be used with :meth:`rafiki.model.ModelLogger.log_loss`.
         '''
         self.define_plot('Loss Over Epochs', ['loss'], x_axis='epoch')
   
     def log_loss(self, loss, epoch):
         '''
         Convenience method for logging `loss` against `epoch`.
-        To be used with :meth:`rafiki.model.ModeLogger.define_loss_plot`..
+        To be used with :meth:`rafiki.model.ModelLogger.define_loss_plot`.
         '''
         self.log(loss=loss, epoch=epoch)
 
@@ -76,7 +83,8 @@ def log(self, msg='', **metrics):
         Logs a message and/or a set of metrics at a single point in time.
 
         Logged messages will be viewable on Rafiki's administrative UI. 
-        To visualize logged metrics on plots, a plot must be defined via :meth:`rafiki.model.ModeLogger.define_plot`.
+        
+        To visualize logged metrics on plots, a plot must be defined via :meth:`rafiki.model.ModelLogger.define_plot`.
 
         Only call this method in :meth:`rafiki.model.BaseModel.train` and :meth:`rafiki.model.BaseModel.evaluate`.
 

From 462aa7e105fbc92749f4f3e9ea33acd5ffd8d04f Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Wed, 12 Dec 2018 22:36:32 +0800
Subject: [PATCH 19/22] Mark workers as `RUNNING` only after installation of
 dependencies complete; fix bug of null predictions in client quickstart

---
 examples/scripts/client_quickstart.py      | 11 ++++-------
 examples/scripts/tasks/test_pos_tagging.py |  6 ++----
 rafiki/admin/services_manager.py           | 11 ++++++++---
 rafiki/worker/inference.py                 |  7 ++++---
 scripts/start_worker.py                    | 12 ++++++------
 5 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/examples/scripts/client_quickstart.py b/examples/scripts/client_quickstart.py
index ea275514..4d526086 100644
--- a/examples/scripts/client_quickstart.py
+++ b/examples/scripts/client_quickstart.py
@@ -61,20 +61,19 @@ def wait_until_train_job_has_completed(client, app):
             pass
 
 # Returns `predictor_host` of inference job
-def wait_until_inference_job_is_running(client, app):
+def get_predictor_host(client, app):
     while True:
-        time.sleep(10)
         try:
             inference_job = client.get_running_inference_job(app)
             status = inference_job.get('status')
-            if status  == InferenceJobStatus.RUNNING:
+            if status == InferenceJobStatus.RUNNING:
                 return inference_job.get('predictor_host')
             elif status in [InferenceJobStatus.ERRORED, InferenceJobStatus.STOPPED]:
                 # Inference job has either errored or been stopped
                 return False
             else:
+                time.sleep(10)
                 continue
-
         except:
             pass
 
@@ -169,9 +168,7 @@ def make_predictions(client, predictor_host, queries):
 
     print('Creating inference job for app "{}" on Rafiki...'.format(app))
     pprint.pprint(client.create_inference_job(app))
-
-    print('Waiting for inference job to be running...')
-    predictor_host = wait_until_inference_job_is_running(client, app)
+    predictor_host = get_predictor_host(client, app)
     if not predictor_host: raise Exception('Inference job has errored or stopped')
     print('Inference job is running!')
 
diff --git a/examples/scripts/tasks/test_pos_tagging.py b/examples/scripts/tasks/test_pos_tagging.py
index 9c759e4e..bc9f2a96 100644
--- a/examples/scripts/tasks/test_pos_tagging.py
+++ b/examples/scripts/tasks/test_pos_tagging.py
@@ -5,7 +5,7 @@
 from rafiki.client import Client
 from rafiki.constants import TaskType, BudgetType, UserType, ModelDependency
 from examples.scripts.client_quickstart import create_user, create_model, \
-    create_train_job, wait_until_inference_job_is_running, wait_until_train_job_has_completed, \
+    create_train_job, get_predictor_host, wait_until_train_job_has_completed, \
     make_predictions, RAFIKI_HOST, ADMIN_PORT, ADMIN_WEB_PORT, SUPERADMIN_EMAIL, MODEL_DEVELOPER_EMAIL, \
     APP_DEVELOPER_EMAIL, USER_PASSWORD, ENABLE_GPU
 
@@ -55,9 +55,7 @@
 
     print('Creating inference job for app "{}" on Rafiki...'.format(app))
     pprint.pprint(client.create_inference_job(app))
-
-    print('Waiting for inference job to be running...')
-    predictor_host = wait_until_inference_job_is_running(client, app)
+    predictor_host = get_predictor_host(client, app)
     if not predictor_host: raise Exception('Inference job has errored or stopped')
     print('Inference job is running!')
 
diff --git a/rafiki/admin/services_manager.py b/rafiki/admin/services_manager.py
index 333790fe..5e5eda11 100644
--- a/rafiki/admin/services_manager.py
+++ b/rafiki/admin/services_manager.py
@@ -46,8 +46,8 @@ def create_inference_services(self, inference_job_id):
                 service = self._create_inference_job_worker(inference_job, trial, replicas)
                 worker_services.append(service)
 
-            # Ensure that predictor service is running
-            self._wait_until_services_running([predictor_service])
+            # Ensure that all services are running
+            self._wait_until_services_running([predictor_service, *worker_services])
 
             # Mark inference job as running
             self._db.mark_inference_job_as_running(inference_job)
@@ -85,8 +85,13 @@ def create_train_services(self, train_job_id):
         # Create a worker service for each model
         models = self._db.get_models_of_task(train_job.task)
         model_to_replicas = self._compute_train_worker_replicas_for_models(models)
+        worker_services = []
         for (model, replicas) in model_to_replicas.items():
-            self._create_train_job_worker(train_job, model, replicas)
+            service = self._create_train_job_worker(train_job, model, replicas)
+            worker_services.append(service)
+
+        # Ensure that all services are running
+        self._wait_until_services_running(worker_services)
 
         # Mark train job as running
         self._db.mark_train_job_as_running(train_job)
diff --git a/rafiki/worker/inference.py b/rafiki/worker/inference.py
index 332e37ac..773449d2 100644
--- a/rafiki/worker/inference.py
+++ b/rafiki/worker/inference.py
@@ -34,11 +34,12 @@ def start(self):
         
         with self._db:
             (inference_job_id, trial_id) = self._read_worker_info()
+
+            # Add to inference job's set of running workers
+            self._cache.add_worker_of_inference_job(self._service_id, inference_job_id)
+
             self._model = self._load_model(trial_id)
 
-        # Add to inference job's set of running workers
-        self._cache.add_worker_of_inference_job(self._service_id, inference_job_id)
-            
         while True:
             (query_ids, queries) = \
                 self._cache.pop_queries_of_worker(self._service_id, INFERENCE_WORKER_PREDICT_BATCH_SIZE)
diff --git a/scripts/start_worker.py b/scripts/start_worker.py
index 2b400139..b44f9c74 100644
--- a/scripts/start_worker.py
+++ b/scripts/start_worker.py
@@ -3,17 +3,17 @@
 from rafiki.db import Database
 from rafiki.constants import ServiceType
 
+# Run install command
+install_command = os.environ.get('WORKER_INSTALL_COMMAND', '')
+exit_code = os.system(install_command)
+if exit_code != 0: 
+    raise Exception('Install command gave non-zero exit code: "{}"'.format(install_command))
+
 worker = None
 
 def start_service(service_id, service_type):
     global worker
 
-    # Run install command
-    install_command = os.environ.get('WORKER_INSTALL_COMMAND', '')
-    exit_code = os.system(install_command)
-    if exit_code != 0: 
-        raise Exception('Install command gave non-zero exit code: "{}"'.format(install_command))
-
     if service_type == ServiceType.TRAIN:
         from rafiki.worker import TrainWorker
         worker = TrainWorker(service_id)

From 1a52945fa13cc07e304e5518f90c7f7de68f9b56 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Thu, 13 Dec 2018 20:00:13 +0800
Subject: [PATCH 20/22] Use `self.__dict__` way to initialize knobs as
 attributes in some sample models

---
 examples/models/image_classification/SkDt.py  | 7 ++-----
 examples/models/image_classification/SkSvm.py | 9 ++-------
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/examples/models/image_classification/SkDt.py b/examples/models/image_classification/SkDt.py
index cd1ffe4d..0b390096 100644
--- a/examples/models/image_classification/SkDt.py
+++ b/examples/models/image_classification/SkDt.py
@@ -23,11 +23,8 @@ def get_knob_config():
 
     def __init__(self, **knobs):
         super().__init__(**knobs)
-        self._knobs = knobs
-        self._clf = self._build_classifier(
-            knobs.get('max_depth'),
-            knobs.get('criterion')
-        )
+        self.__dict__.update(knobs)
+        self._clf = self._build_classifier(self.max_depth, self.criterion)
         
     def train(self, dataset_uri):
         dataset = dataset_utils.load_dataset_of_image_files(dataset_uri)
diff --git a/examples/models/image_classification/SkSvm.py b/examples/models/image_classification/SkSvm.py
index 894e2565..6c0cf6c3 100644
--- a/examples/models/image_classification/SkSvm.py
+++ b/examples/models/image_classification/SkSvm.py
@@ -25,13 +25,8 @@ def get_knob_config():
 
     def __init__(self, **knobs):
         super().__init__(**knobs)
-        self._knobs = knobs
-        self._clf = self._build_classifier(
-            knobs.get('max_iter'),
-            knobs.get('kernel'),
-            knobs.get('gamma'),
-            knobs.get('C')
-        )
+        self.__dict__.update(knobs)
+        self._clf = self._build_classifier(self.max_iter, self.kernel, self.gamma, self.C)
         
     def train(self, dataset_uri):
         dataset = dataset_utils.load_dataset_of_image_files(dataset_uri)

From 3a3fbe8fa546f3e1a6763cca83ad4c2f6d0dfd40 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Thu, 13 Dec 2018 20:01:53 +0800
Subject: [PATCH 21/22] Correct docs about logging

---
 rafiki/model/log.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/rafiki/model/log.py b/rafiki/model/log.py
index 1e8661b2..c7dbea4e 100644
--- a/rafiki/model/log.py
+++ b/rafiki/model/log.py
@@ -29,7 +29,7 @@ class MyModel(BaseModel):
             def train(self, dataset_uri):
                 ...
                 logger.log('Starting model training...')
-                logger.define_plot('Precision & Recall', y_axis=['precision', 'recall'])
+                logger.define_plot('Precision & Recall', ['precision', 'recall'], x_axis=['epoch'])
                 ...
                 logger.log(precision=0.1, recall=0.6, epoch=1)
                 ...
@@ -66,13 +66,13 @@ def define_plot(self, title, metrics, x_axis=None):
 
         For example, a model's precision & recall logged with e.g. ``log(precision=0.1, recall=0.6, epoch=1)``
         can be visualized in the plots generated by
-        ``define_plot('Precision & Recall', y_axis=['precision', 'recall'])`` (against time) or
-        ``define_plot('Precision & Recall', y_axis=['precision', 'recall'], x_axis=['epoch'])`` (against epochs).
+        ``define_plot('Precision & Recall', ['precision', 'recall'])`` (against time) or
+        ``define_plot('Precision & Recall', ['precision', 'recall'], x_axis='epoch')`` (against epochs).
 
         Only call this method in :meth:`rafiki.model.BaseModel.train`.
 
         :param str title: Title of the plot
-        :param str metrics: List of metrics that should be plotted on the y-axis
+        :param metrics: List of metrics that should be plotted on the y-axis
         :type metrics: str[]
         :param str x_axis: Metric that should be plotted on the x-axis, against all other metrics. Defaults to ``'time'``, which is automatically logged
         '''
@@ -89,7 +89,7 @@ def log(self, msg='', **metrics):
         Only call this method in :meth:`rafiki.model.BaseModel.train` and :meth:`rafiki.model.BaseModel.evaluate`.
 
         :param str msg: Message to be logged
-        :param metrics: Set of metrics & their values to be logged as { <metric>: <value> }, where <value> should be a number.
+        :param metrics: Set of metrics & their values to be logged as ``{ <metric>: <value> }``, where ``<value>`` should be a number.
         :type metrics: dict[str, int|float]
         '''
         if msg:

From 700b8dcb292d959054c8305ebfa6c6b049498bb0 Mon Sep 17 00:00:00 2001
From: Ngin Yun Chuan <yunchuann@gmail.com>
Date: Thu, 13 Dec 2018 20:08:07 +0800
Subject: [PATCH 22/22] Make small correction in model logging docs

---
 examples/models/image_classification/SkDt.py | 2 +-
 rafiki/model/log.py                          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/models/image_classification/SkDt.py b/examples/models/image_classification/SkDt.py
index 0b390096..c47f06c9 100644
--- a/examples/models/image_classification/SkDt.py
+++ b/examples/models/image_classification/SkDt.py
@@ -25,7 +25,7 @@ def __init__(self, **knobs):
         super().__init__(**knobs)
         self.__dict__.update(knobs)
         self._clf = self._build_classifier(self.max_depth, self.criterion)
-        
+       
     def train(self, dataset_uri):
         dataset = dataset_utils.load_dataset_of_image_files(dataset_uri)
         (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
diff --git a/rafiki/model/log.py b/rafiki/model/log.py
index c7dbea4e..fecff004 100644
--- a/rafiki/model/log.py
+++ b/rafiki/model/log.py
@@ -29,7 +29,7 @@ class MyModel(BaseModel):
             def train(self, dataset_uri):
                 ...
                 logger.log('Starting model training...')
-                logger.define_plot('Precision & Recall', ['precision', 'recall'], x_axis=['epoch'])
+                logger.define_plot('Precision & Recall', ['precision', 'recall'], x_axis='epoch')
                 ...
                 logger.log(precision=0.1, recall=0.6, epoch=1)
                 ...