Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Done] Feature/mnist train api #971

Merged
merged 35 commits into from
Dec 27, 2016
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
c0e687b
Refine Code
reyoung Dec 20, 2016
06944ee
Merge branch 'feature/add_const_in_parameter_updater' into feature/mn…
reyoung Dec 20, 2016
8b4cbcf
Start doing mnist_train_api
reyoung Dec 20, 2016
ad6cb60
Merge branch 'feature/clean_gradient_machine_start' into feature/mnis…
reyoung Dec 20, 2016
025e3e9
Add GradientMachine::start/finish to API
reyoung Dec 20, 2016
677c79b
Merge branch 'feature/clean_parameter_updater_finish_pass' into featu…
reyoung Dec 20, 2016
27d87db
Wait for reading data.
reyoung Dec 21, 2016
9f5e742
A tiny fix in PyDataProvider2
reyoung Dec 21, 2016
ad93b8f
Merge branch 'feature/fix_param_hidden_in_pydp2' into feature/mnist_t…
reyoung Dec 21, 2016
5f6c4af
Try to read data in mnist
reyoung Dec 21, 2016
36d1e61
Use numpy in DenseScanner.
reyoung Dec 21, 2016
efb5c10
Merge branch 'feature/fix_swig_dense_scanner' into feature/mnist_trai…
reyoung Dec 21, 2016
20249e8
Try expose ParamUpdater::update
reyoung Dec 21, 2016
05ab22c
A simplest train file for mnist added.
reyoung Dec 21, 2016
1f4f044
A tiny fix in PyDataProvider2
reyoung Dec 21, 2016
cf5bf5b
Merge branch 'feature/fix_param_hidden_in_pydp2' into feature/mnist_t…
reyoung Dec 21, 2016
1e6c87b
Merge branch 'feature/add_const_in_gradient_machine_eval' into featur…
reyoung Dec 21, 2016
eaba2e2
Expose Evaluator API
reyoung Dec 21, 2016
409a577
Complete a very simple mnist demo.
reyoung Dec 21, 2016
06dc66b
Merge branch 'feature/fix_param_hidden_in_pydp2' into feature/mnist_t…
reyoung Dec 21, 2016
680dd92
Add AverageOptimizer, Add save parameter
reyoung Dec 22, 2016
5bca268
Add gitignore
reyoung Dec 22, 2016
59009ba
Always use copy method for numpy.
reyoung Dec 22, 2016
a31ef0c
Merge branch 'feature/mnist_train_api' of github.com:reyoung/Paddle i…
reyoung Dec 22, 2016
f06b64f
Test GPU
reyoung Dec 22, 2016
65e957c
Merge branch 'feature/mnist_train_api' of github.com:reyoung/Paddle i…
reyoung Dec 22, 2016
5a68584
Test on GPU
reyoung Dec 22, 2016
16ea66e
Merge branch 'develop' of github.com:baidu/Paddle into feature/mnist_…
reyoung Dec 22, 2016
3a80272
Add comments.
reyoung Dec 22, 2016
843b63b
add config_parser in trainer_config_helpers to seperate trainer config
jacquesqiao Dec 21, 2016
763a30f
add config_parser_utils
jacquesqiao Dec 22, 2016
9b41b08
Remove unnecessary import in api_train.py
reyoung Dec 22, 2016
f8e4b0b
Merge branch 'develop' of github.com:baidu/Paddle into feature/mnist_…
reyoung Dec 26, 2016
eefe5a7
Merge branch 'develop' of github.com:baidu/Paddle into feature/mnist_…
reyoung Dec 27, 2016
eca4592
Fix merge errors.
reyoung Dec 27, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions demo/mnist/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ mnist_vgg_model
plot.png
train.log
*pyc
.ipynb_checkpoints
205 changes: 205 additions & 0 deletions demo/mnist/api_train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
"""
A very basic example for how to use current Raw SWIG API to train mnist network.

Current implementation uses Raw SWIG, which means the API call is directly \
passed to C++ side of Paddle.

The user api could be simpler and carefully designed.
"""
import py_paddle.swig_paddle as api
from py_paddle import DataProviderConverter
import paddle.trainer.PyDataProvider2 as dp
import numpy as np
import random
from mnist_util import read_from_mnist
from paddle.trainer_config_helpers import *


def optimizer_config():
settings(
learning_rate=1e-4,
learning_method=AdamOptimizer(),
batch_size=1000,
model_average=ModelAverage(average_window=0.5),
regularization=L2Regularization(rate=0.5))


def network_config():
imgs = data_layer(name='pixel', size=784)
hidden1 = fc_layer(input=imgs, size=200)
hidden2 = fc_layer(input=hidden1, size=200)
inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
cost = classification_cost(
input=inference, label=data_layer(
name='label', size=10))
outputs(cost)


def init_parameter(network):
assert isinstance(network, api.GradientMachine)
for each_param in network.getParameters():
assert isinstance(each_param, api.Parameter)
array_size = len(each_param)
array = np.random.uniform(-1.0, 1.0, array_size).astype('float32')
each_param.getBuf(api.PARAMETER_VALUE).copyFromNumpyArray(array)


def generator_to_batch(generator, batch_size):
ret_val = list()
for each_item in generator:
ret_val.append(each_item)
if len(ret_val) == batch_size:
yield ret_val
ret_val = list()
if len(ret_val) != 0:
yield ret_val


class BatchPool(object):
def __init__(self, generator, batch_size):
self.data = list(generator)
self.batch_size = batch_size

def __call__(self):
random.shuffle(self.data)
for offset in xrange(0, len(self.data), self.batch_size):
limit = min(offset + self.batch_size, len(self.data))
yield self.data[offset:limit]


def input_order_converter(generator):
for each_item in generator:
yield each_item['pixel'], each_item['label']


def main():
api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores

# get enable_types for each optimizer.
# enable_types = [value, gradient, momentum, etc]
# For each optimizer(SGD, Adam), GradientMachine should enable different
# buffers.
opt_config_proto = parse_optimizer_config(optimizer_config)
opt_config = api.OptimizationConfig.createFromProto(opt_config_proto)
_temp_optimizer_ = api.ParameterOptimizer.create(opt_config)
enable_types = _temp_optimizer_.getParameterTypes()

# Create Simple Gradient Machine.
model_config = parse_network_config(network_config)
m = api.GradientMachine.createFromConfigProto(
model_config, api.CREATE_MODE_NORMAL, enable_types)

# This type check is not useful. Only enable type hint in IDE.
# Such as PyCharm
assert isinstance(m, api.GradientMachine)

# Initialize Parameter by numpy.
init_parameter(network=m)

# Create Local Updater. Local means not run in cluster.
# For a cluster training, here we can change to createRemoteUpdater
# in future.
updater = api.ParameterUpdater.createLocalUpdater(opt_config)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

我不常用Python,所以我去看了一下各种style guide里关于naming的描述。以下style guide都要求function names和method names都是 function_name的形式,而不是 createLocalUpdater 的形式。

  1. PEP style guide
  2. Google style guide
  3. Python.net

如果我们的API要被Python社区接受,我估计得是Python style的吧。

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ParameterUpdater和createLocalUpdater是直接从cpp中用swig expose出来的结构和方法,所以是cpp中的命名规范。我理解这些都不是直接暴露给用户的,而是经过我们封装一下,统一成python style。

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

是的,不过这就是swig api的悲剧点之一了。

这个名字直接从C++的头文件PaddleAPI.h自动化翻译过来的,没有办法自定义。

如果是C-API会好很多。

assert isinstance(updater, api.ParameterUpdater)

# Initialize ParameterUpdater.
updater.init(m)

# DataProvider Converter is a utility convert Python Object to Paddle C++
# Input. The input format is as same as Paddle's DataProvider.
converter = DataProviderConverter(
input_types=[dp.dense_vector(784), dp.integer_value(10)])

train_file = './data/raw_data/train'
test_file = './data/raw_data/t10k'

# start gradient machine.
# the gradient machine must be started before invoke forward/backward.
# not just for training, but also for inference.
m.start()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

start得有一个宾语。 如果是method,有时候class就是宾语,也有时候得明确指定宾语。比如

class BashCommand {
 public:
  void Run(); // Run the bash command.
  void SetStderrPipe(Pipe* p); // Set is the predicate, Pipe is the subject.
}

这里比较诡异的是m的类型是GradientMachine,如果method叫 start,我不明白是 "start computing gradient” 还是 "start machine”。根据下文看,貌似是 start_training,那么最好的安排貌似是m的类型起名叫做 Trainer 且method name是 start,这样就成了 start trainer 的意思了。

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

好,不过用户态的代码应该不这样。。可能类似于

with gradient_machine.enter_training():
gradient_machine.forwardBackward这样。


# evaluator can print error rate, etc. It is a C++ class.
batch_evaluator = m.makeEvaluator()
test_evaluator = m.makeEvaluator()

# Get Train Data.
# TrainData will stored in a data pool. Currently implementation is not care
# about memory, speed. Just a very naive implementation.
train_data_generator = input_order_converter(read_from_mnist(train_file))
train_data = BatchPool(train_data_generator, 512)

# outArgs is Neural Network forward result. Here is not useful, just passed
# to gradient_machine.forward
outArgs = api.Arguments.createArguments(0)

for pass_id in xrange(2): # we train 2 passes.
updater.startPass()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

恩,之前的code开发了一半,发现其他的一些问题,然后交另一个PR了。。

这个后面的code补上了。


for batch_id, data_batch in enumerate(train_data()):
# data_batch is input images.
# here, for online learning, we could get data_batch from network.

# Start update one batch.
pass_type = updater.startBatch(len(data_batch))

# Start BatchEvaluator.
# batch_evaluator can be used between start/finish.
batch_evaluator.start()

# forwardBackward is a shortcut for forward and backward.
# It is sometimes faster than invoke forward/backward separately,
# because in GradientMachine, it may be async.
m.forwardBackward(converter(data_batch), outArgs, pass_type)

for each_param in m.getParameters():
updater.update(each_param)

# Get cost. We use numpy to calculate total cost for this batch.
cost_vec = outArgs.getSlotValue(0)
cost_vec = cost_vec.copyToNumpyMat()
cost = cost_vec.sum() / len(data_batch)

# Make evaluator works.
m.eval(batch_evaluator)

# Print logs.
print 'Pass id', pass_id, 'Batch id', batch_id, 'with cost=', \
cost, batch_evaluator

batch_evaluator.finish()
# Finish batch.
# * will clear gradient.
# * ensure all values should be updated.
updater.finishBatch(cost)

# testing stage. use test data set to test current network.
updater.apply()
test_evaluator.start()
test_data_generator = input_order_converter(read_from_mnist(test_file))
for data_batch in generator_to_batch(test_data_generator, 512):
# in testing stage, only forward is needed.
m.forward(converter(data_batch), outArgs, api.PASS_TEST)
m.eval(test_evaluator)

# print error rate for test data set
print 'Pass', pass_id, ' test evaluator: ', test_evaluator
test_evaluator.finish()
updater.restore()

updater.catchUpWith()
params = m.getParameters()
for each_param in params:
assert isinstance(each_param, api.Parameter)
value = each_param.getBuf(api.PARAMETER_VALUE)
value = value.copyToNumpyArray()

# Here, we could save parameter to every where you want
print each_param.getName(), value

updater.finishPass()

m.finish()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

m.complete_training

另外,finish和complete的区别是“完蛋”和“完美”的区别,比如:

If you married a wrong woman, you are finished.

If you married the right woman, your are completed.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

haha



if __name__ == '__main__':
main()
28 changes: 3 additions & 25 deletions demo/mnist/mnist_provider.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from paddle.trainer.PyDataProvider2 import *
import numpy
from mnist_util import read_from_mnist


# Define a py data provider
Expand All @@ -8,27 +8,5 @@
'label': integer_value(10)},
cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename): # settings is not used currently.
imgf = filename + "-images-idx3-ubyte"
labelf = filename + "-labels-idx1-ubyte"
f = open(imgf, "rb")
l = open(labelf, "rb")

f.read(16)
l.read(8)

# Define number of samples for train/test
if "train" in filename:
n = 60000
else:
n = 10000

images = numpy.fromfile(
f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
images = images / 255.0 * 2.0 - 1.0
labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")

for i in xrange(n):
yield {"pixel": images[i, :], 'label': labels[i]}

f.close()
l.close()
for each in read_from_mnist(filename):
yield each
30 changes: 30 additions & 0 deletions demo/mnist/mnist_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import numpy

__all__ = ['read_from_mnist']


def read_from_mnist(filename):
imgf = filename + "-images-idx3-ubyte"
labelf = filename + "-labels-idx1-ubyte"
f = open(imgf, "rb")
l = open(labelf, "rb")

f.read(16)
l.read(8)

# Define number of samples for train/test
if "train" in filename:
n = 60000
else:
n = 10000

images = numpy.fromfile(
f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
images = images / 255.0 * 2.0 - 1.0
labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")

for i in xrange(n):
yield {"pixel": images[i, :], 'label': labels[i]}

f.close()
l.close()
11 changes: 11 additions & 0 deletions paddle/api/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
set(API_SOURCES
Arguments.cpp
ConfigParser.cpp
Evaluator.cpp
GradientMachine.cpp
Matrix.cpp
Parameter.cpp
ParameterOptimizer.cpp
ParameterUpdater.cpp
SequenceGenerator.cpp
Trainer.cpp
Util.cpp
Expand Down Expand Up @@ -63,6 +65,15 @@ install(DIRECTORY ${PROJ_ROOT}/paddle/dist/

add_custom_target(python_api_wheel ALL DEPENDS
${PROJ_ROOT}/paddle/dist/.timestamp)
add_dependencies(python_api_wheel python_swig_sources
paddle_parameter
paddle_math
paddle_utils
paddle_gserver
paddle_pserver
paddle_trainer
paddle_api
paddle_cuda)

if(WITH_TESTING)
add_subdirectory(test)
Expand Down
29 changes: 29 additions & 0 deletions paddle/api/Evaluator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <sstream>
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"

Evaluator::Evaluator() : m(new EvaluatorPrivate()) {}
Evaluator::~Evaluator() { delete m; }

void Evaluator::start() { m->rawPtr->start(); }

void Evaluator::finish() { m->rawPtr->finish(); }

std::string Evaluator::toString() {
std::ostringstream sout;
m->rawPtr->printStats(sout);
return sout.str();
}
14 changes: 14 additions & 0 deletions paddle/api/GradientMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ GradientMachine* GradientMachine::createByModelConfig(
return GradientMachine::createFromPaddleModelPtr(confPtr, mode, types);
}

void GradientMachine::start() { m->machine->start(); }

void GradientMachine::finish() { m->machine->finish(); }

void GradientMachine::onPassEnd() { m->machine->onPassEnd(); }

void GradientMachine::prefetch(const Arguments& inArgs) {
Expand Down Expand Up @@ -166,3 +170,13 @@ SequenceGenerator* GradientMachine::asSequenceGenerator(
r->setBeamSize(beam_size);
return r;
}

Evaluator* GradientMachine::makeEvaluator() {
auto ev = new Evaluator();
ev->m->rawPtr = m->machine->makeEvaluator();
return ev;
}

void GradientMachine::eval(Evaluator* evaluator) {
m->machine->eval(evaluator->m->rawPtr);
}
6 changes: 5 additions & 1 deletion paddle/api/Paddle.swig
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,9 @@ namespace std {
%rename(__getitem__) Vector::get;
%rename(__setitem__) Vector::set;
%rename(__len__) Vector::getSize;
%rename(__len__) Parameter::getSize;
%rename(__call__) ParameterTraverseCallback::apply;
%rename(__repr__) Evaluator::toString;

%apply (float* INPLACE_ARRAY2, int DIM1, int DIM2) {
(float* data, int dim1, int dim2)
Expand Down Expand Up @@ -167,13 +169,15 @@ namespace std {
%newobject GradientMachine::asSequenceGenerator;
%newobject GradientMachine::getParameter;
%newobject GradientMachine::getLayerOutput;
%newobject GradientMachine::makeEvaluator;
%newobject TrainerConfig::createFromTrainerConfigFile;
%newobject TrainerConfig::getModelConfig;
%newobject TrainerConfig::getOptimizationConfig;
%newobject Parameter::getBuf;
%newobject Parameter::getConfig;
%newobject ParameterOptimizer::create;
%newobject ParameterOptimizer::needSpecialTraversal;
%newobject ParameterUpdater::createLocalUpdater;

%feature("director") UpdateCallback;
%feature("autodoc", 1); // To generate method stub, for code hint in ide
Expand All @@ -193,4 +197,4 @@ namespace std {
%ignore OptimizationConfigPrivate;
%ignore ParameterTraverseCallbackPrivate;
%include "utils/GlobalConstants.h"
%include "api/PaddleAPI.h"
%include "api/PaddleAPI.h"
Loading