diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 00000000..4167a9c1
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,22 @@
+# EditorConfig is awesome: http://EditorConfig.org
+
+# top-most EditorConfig file
+root = true
+
+# Unix-style newlines with a newline ending every file
+[*]
+end_of_line = lf
+insert_final_newline = true
+
+# Set default charset
+[*.py]
+charset = utf-8
+
+# 2 space indentation (TF style)
+[*.py]
+indent_style = space
+indent_size = 2
+
+[{.travis.yml}]
+indent_style = space
+indent_size = 2
diff --git a/.gitignore b/.gitignore
index e0d25df6..6afa2474 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,15 @@
 
 # Python egg metadata, regenerated from source files by setuptools.
 /*.egg-info
+
+# Documentation build files.
+/docs/_build/
+
+# Test coverage results.
+.coverage
+
+# Mac DS_Store files
+.DS_Store
+
+# IPython Notebook checkpoints
+.ipynb_checkpoints
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 00000000..d7284bea
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,24 @@
+dist: trusty
+language: python
+python:
+  - "2.7"
+  - "3.4"
+  - "3.5"
+  - "3.6"
+env:
+  matrix:
+    - TF_VERSION=1.0
+    - TF_VERSION=1.1
+    - TF_VERSION=1.2
+    - TF_VERSION=1.3
+    - TF_VERSION=1.4
+# command to install dependencies
+install:
+  # Python 2.7 has very old pip by default that doesn't have tensorflow.
+  - pip install numpy tensorflow==$TF_VERSION
+  - pip install coveralls
+# command to run tests
+script:
+  nosetests  --logging-level=WARNING --with-coverage --cover-package=t3f
+after_success:
+  coveralls
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7e6497a4..048df107 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,47 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 
 ## [Unreleased]
 
+## [1.0.0] - 2018-01-08
+### Added
+- API reference documentation.
+- Coveralls test coverage.
+- add_projected function for adding tensors from the same tangent space.
+- add_n_projected function.
+- Test TF 1.0-1.4
+- Test Python 2.7, 3.4, 3.5, 3.6
+- tf.kronecker module for working with Kornecker product matrices (TT-matrices of TT-rank 1).
+- Example Jupyter notebooks with basic functionality, TensorNet (training neural networks with params in the TT-format), Riemannian optimization, and tensor completion.
+- A profiler for basic operations on CPU and GPU.
+- approximate.add_n function for adding together a batch of tensors and rounding after each summation.
+- Manually import all the relevant function in __init__ and avoid importing internal tools.
+- gather_nd for gathering several elements from a TT-objects at once.
+- Overload operations to support `a - b`, `a + -b`, `0.4 * a`.
+- Add complexities of some functions into the docstrings.
+
+### Changed
+- Speed improvements (in particular to quadratic_form).
+- Bug fixes.
+- Multiplication by a number uniformly multiplies all cores (improved stability for large tensors).
+- Better test coverage.
+
+## [0.3.0] - 2017-04-20
+### Added
+- Python 3 support.
+- Speed improvements.
+- Bug fixes.
+- Travis CI.
+- project_matmul -- fast projection of matrix-by-vector product.
+- pairwise_flat_inner_projected -- fast pairwise dot products between projections on the same tangent space.
+- multiply_along_batch_dim
+- Support for indexing with placeholders or tf.Tensors.
+- Support for t3f.cast(batch_tt).
+- A function to replace tf.svd by np.svd.
+
+### Changed
+- Better Frobenius norm implementation (via QR).
+- Riemannian projection API: now it's project(what, where). 
+
+
 ## [0.2.0] - 2017-03-23
 ### Added
 - (Partial) support for batches of TT-tensors.
@@ -33,4 +74,4 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 
 [Unreleased]: https://github.com/Bihaqo/t3f/compare/master...develop
 [0.2.0]: https://github.com/Bihaqo/t3f/compare/0.1.0...0.2.0
-[0.1.0]: https://github.com/Bihaqo/t3f/compare/f24409508...0.1.0
\ No newline at end of file
+[0.1.0]: https://github.com/Bihaqo/t3f/compare/f24409508...0.1.0
diff --git a/LICENSE b/LICENSE
index 00c7d3f2..ebfe71a2 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright 2017 Alexander Novikov, Pavel Izmailov, Ivan Oseledets, Mikhail Trofimov
+Copyright 2017 Alexander Novikov, Pavel Izmailov, Ivan Oseledets, Mikhail Trofimov, Valentin Khrulkov
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 
diff --git a/README.md b/README.md
index cbadead1..8231e3ff 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,15 @@
+[![Build Status](https://travis-ci.org/Bihaqo/t3f.svg?branch=develop)](https://travis-ci.org/Bihaqo/t3f)
+[![Coverage Status](https://coveralls.io/repos/github/Bihaqo/t3f/badge.svg?branch=develop)](https://coveralls.io/github/Bihaqo/t3f?branch=develop)
+
 TensorFlow implementation of the Tensor Train (TT) -Toolbox.
 
+# API
+API is available via [readthedocs](https://t3f.readthedocs.io/en/latest/).
+
 # Installation
-First, [install TensorFlow](https://www.tensorflow.org/install/) v1 or higher. Then simply run
+T3f assumes you have a working TensorFlow installation, supported versions are from 1.0 to 1.4 (see [here](https://www.tensorflow.org/versions/r1.4/install/) for TF 1.4 installation instructions).
+We don't include it into pip requirements since the installation of TensorFlow varies depending on your setup.
+Then simply run
 ```bash
 pip install t3f
 ```
@@ -46,7 +54,7 @@ sum_round = t3f.round(t3f.add(a, b_tt), max_tt_rank=2)
 # Inner product (sum of products of all elements).
 a = t3f.random_tensor((3, 2, 2), tt_rank=3)
 b = t3f.random_tensor((3, 2, 2), tt_rank=4)
-inner_prod = t3f.tt_tt_flat_inner(a, b)
+inner_prod = t3f.flat_inner(a, b)
 ```
 
 ### Matrix operations
@@ -60,49 +68,26 @@ matvec = t3f.matmul(A, b)
 b_dense = tf.random_normal((18, 1))
 matvec2 = t3f.matmul(A, b_dense)
 ```
+# More examples
+For more examples (e.g. how to build neural networks or how to do Riemannian optimization) see ```examples``` folder.
 
+# Benchmarking
+Here are the results im ms of benchmarking T3F on CPU and GPU and comparing against the [TTPY library](https://github.com/oseledets/ttpy)
+<img src="examples/profile/results.png" height="200">
 
-# TensorNet example
-As an example lets create a neural network with a TT-matrix as a fully-connected layer:
-```python
-def build_tt_model(x):
-  # A 784 x 625 TT-matrix.
-  matrix_shape = ((4, 7, 4, 7), (5, 5, 5, 5))
-  tt_W_1 = t3f.get_variable('tt_W_1', initializer=t3f.random_matrix(matrix_shape))
-  h_1 = tf.nn.relu(t3f.matmul(tt_W_1, x))
-  W_2 = tf.get_variable('W_2', shape=[625, 10])
-  y = tf.matmul(W_2, h_1)
-  return y
-```
-
-If you want to start from already trained network and compress its fully-connected layer, you may load the network, find the closes approximation of the existing layer matrix in the TT-format, and then finetune the model
-```python
-y = build_tt_model(x)
-with tf.variable_scope("", reuse=True):
-  tt_W_1 = t3f.get_tt_variable('tt_W_1')
-W_1 = tf.get_variable('W_1', shape=[784, 625])
-tt_init_op = t3f.initialize_from_tensor(tt_W_1, W_1)
-loss = tf.nn.softmax_cross_entropy_with_logits(y, labels)
-train_step = tf.train.Adam(0.01).minimize(loss)
-with tf.Session() as sess:
-  sess.run(tf.global_variables_initializer())
-  restore_all_saved(sess, 'checkpoint/path')
-  sess.run(tt_init_op)
-  # Finally do the finetuning.
-  ...
-```
-where
-```python
-def restore_all_saved(sess, path):
-  reader = tf.train.NewCheckpointReader(path)
-  var_names_in_checkpoint = reader.get_variable_to_shape_map().keys()
-  with tf.variable_scope('', reuse=True):
-    vars_in_checkpoint = [tf.get_variable(name) for name in var_names_in_checkpoint]
-  restorer = tf.train.Saver(var_list=vars_in_checkpoint)
-  restorer.restore(sess, path)
-```
+For more details see ```examples/profile``` folder.
 
 # Tests
 ```bash
 nosetests  --logging-level=WARNING
 ```
+
+# Building API documentation
+The documentation is build by sphinx and hosted on readthedocs.org. To rebuild the documentation, install sphinx and compile the docs by
+```bash
+cd docs
+make html
+```
+
+# Other implementations
+There are also implementations of the TT-toolbox in [plain Python](https://github.com/oseledets/ttpy) and [Matlab](https://github.com/oseledets/TT-Toolbox).
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 00000000..4ea9e3e7
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+SPHINXPROJ    = t3f
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 00000000..457abe76
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,161 @@
+# -*- coding: utf-8 -*-
+#
+# t3f documentation build configuration file, created by
+# sphinx-quickstart on Sun Mar 12 10:06:09 2017.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath('../t3f'))
+
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['sphinx.ext.autodoc',
+    'sphinx.ext.doctest',
+    'sphinx.ext.intersphinx',
+    'sphinx.ext.napoleon',
+    'sphinx.ext.mathjax']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u't3f'
+copyright = u'2017, Alexander Novikov, Pavel Izmailov, Ivan Oseledets, Mikhail Trofimov, Valentin Khrulkov'
+author = u'Alexander Novikov, Pavel Izmailov, Ivan Oseledets, Mikhail Trofimov, Valentin Khrulkov'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = u'0.3'
+# The full version, including alpha/beta/rc tags.
+release = u'0.3.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'alabaster'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+
+# -- Options for HTMLHelp output ------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 't3fdoc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 't3f.tex', u't3f Documentation',
+     u'Alexander Novikov, Pavel Izmailov, Ivan Oseledets, Mikhail Trofimov', 'manual'),
+]
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 't3f', u't3f Documentation',
+     [author], 1)
+]
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 't3f', u't3f Documentation',
+     author, 't3f', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+
+# Example configuration for intersphinx: refer to the Python standard library.
+intersphinx_mapping = {'https://docs.python.org/': None}
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 00000000..4f6c21f6
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,33 @@
+.. t3f documentation master file, created by
+   sphinx-quickstart on Sun Mar 12 10:06:09 2017.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to t3f documentation!
+===============================
+
+Module contents
+---------------
+
+.. automodule:: t3f
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+t3f\.utils module
+-----------------
+
+.. automodule:: t3f.utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+t3f\.kronecker module
+-----------------
+
+.. automodule:: t3f.kronecker
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 00000000..db7551cf
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,36 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+set SPHINXPROJ=t3f
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+
+:end
+popd
diff --git a/docs/tf_requirement.txt b/docs/tf_requirement.txt
new file mode 100644
index 00000000..5cd0635d
--- /dev/null
+++ b/docs/tf_requirement.txt
@@ -0,0 +1,7 @@
+# This file is needed since we do not include tensorflow into pip requirements
+# in the main module (since tf comes in many different forms and it's hard
+# to check if one of them is installed), but tf is necessary for building
+# the docs with readthedocs.org: they fetch a fresh version of the
+# library on each build and it doesn't import properly without tensorflow
+# being installed.
+tensorflow>=1.0
\ No newline at end of file
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 00000000..1cf39f70
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,12 @@
+# Examples
+This folder contains examples of basic T3f usage such as:
+
+- Converting a numpy tensor to the Tensor Train format, rounding, various
+operations with TT tensors.
+
+- An example of a Tensor Net - deep neural network based on
+TT-Matrices.
+
+- An example of solving a Tensor Completion problem using tensorflow optimizers
+
+- An example of solving the Approximation problem using Riemannian optimization
diff --git a/examples/profile/README.md b/examples/profile/README.md
new file mode 100644
index 00000000..608d35b5
--- /dev/null
+++ b/examples/profile/README.md
@@ -0,0 +1,21 @@
+## Profiling T3F
+To profile the library, use the following commands
+```bash
+# Running on CPU.
+CUDA_VISIBLE_DEVICES= python profile.py --file_path logs_cpu.pkl
+# Running on GPU.
+CUDA_VISIBLE_DEVICES=0 python profile.py --file_path logs_gpu.pkl
+```
+To visualize the results in a table, see ```results.ipynb``` Jupyter notebook.
+Here are the numbers you can get on NVIDIA DGX-1 server with Tesla V100 GPU and Intel(R) Xeon(R) CPU E5-2698 v4 @ 2.20GHz with 80 logical cores
+ <img src="results.png" height="200">
+ 
+The timing is reported in ms and in the batch case it's time per object (e.g. it takes 0.3 ms to perform matrix-by-vector multiplication for a batch of 100 vectors on a CPU, but we report 0.003 = 0.3 / 100 in the table).
+
+Note that by default TensorFlow has a very slow ```tf.transpose``` op for tensors of more than 5 dimensions, which affects the results a lot. To achieve the performance as described above, apply [the following patch](https://github.com/tensorflow/tensorflow/pull/15893) and [compile TensorFlow from sources](https://www.tensorflow.org/install/install_sources).
+
+## Comparing against TTPY
+To benchmark T3F against another library for Tensor Train decomposition [TTPY](https://github.com/oseledets/ttpy), install TTPY and run the following command in the bash shell
+```bash
+python profile_ttpy.py --file_path logs_ttpy.py
+```
diff --git a/examples/profile/logs_cpu.pkl b/examples/profile/logs_cpu.pkl
new file mode 100644
index 00000000..b8964230
--- /dev/null
+++ b/examples/profile/logs_cpu.pkl
@@ -0,0 +1,47 @@
+(dp0
+S'batch_matmul_time'
+p1
+F0.00035024166107177734
+sS'project_time'
+p2
+F0.00023940801620483398
+sS'batch_matvec_time'
+p3
+F0.00034156084060668943
+sS'round_time'
+p4
+F0.00015906500816345216
+sS'norm_time'
+p5
+F0.001901895046234131
+sS'matmul_time'
+p6
+F0.00012479305267333983
+sS'batch_project_time'
+p7
+F0.013538193702697755
+sS'batch_round_time'
+p8
+F0.0005605721473693848
+sS'matvec_time'
+p9
+F0.00012870097160339356
+sS'batch_gram_time'
+p10
+F0.28847967863082885
+sS'project_rank100_time'
+p11
+F0.0015640497207641602
+sS'batch_project_rank100_time'
+p12
+F0.0017176294326782227
+sS'tt_svd_time'
+p13
+F0.00017525100708007813
+sS'batch_norm_time'
+p14
+F0.04223270702362061
+sS'flatinner_time'
+p15
+F0.0008057281970977783
+s.
\ No newline at end of file
diff --git a/examples/profile/logs_gpu.pkl b/examples/profile/logs_gpu.pkl
new file mode 100644
index 00000000..cd782d23
--- /dev/null
+++ b/examples/profile/logs_gpu.pkl
@@ -0,0 +1,47 @@
+(dp0
+S'batch_matmul_time'
+p1
+F0.00037342071533203125
+sS'project_time'
+p2
+F0.0002398080825805664
+sS'batch_matvec_time'
+p3
+F0.0003491091728210449
+sS'round_time'
+p4
+F0.00016474294662475585
+sS'norm_time'
+p5
+F0.0008932108879089356
+sS'matmul_time'
+p6
+F0.00013312101364135743
+sS'batch_project_time'
+p7
+F0.014079785346984864
+sS'batch_round_time'
+p8
+F0.0005947995185852051
+sS'matvec_time'
+p9
+F0.00012082600593566895
+sS'batch_gram_time'
+p10
+F0.006867189407348633
+sS'project_rank100_time'
+p11
+F0.001657888889312744
+sS'batch_project_rank100_time'
+p12
+F0.001759040355682373
+sS'tt_svd_time'
+p13
+F0.00014748501777648925
+sS'batch_norm_time'
+p14
+F0.004972738981246948
+sS'flatinner_time'
+p15
+F0.0007025179862976074
+s.
\ No newline at end of file
diff --git a/examples/profile/logs_ttpy.pkl b/examples/profile/logs_ttpy.pkl
new file mode 100644
index 00000000..9ec75647
Binary files /dev/null and b/examples/profile/logs_ttpy.pkl differ
diff --git a/examples/profile/profile.py b/examples/profile/profile.py
new file mode 100644
index 00000000..32bf17e9
--- /dev/null
+++ b/examples/profile/profile.py
@@ -0,0 +1,133 @@
+import timeit
+import numpy as np
+import pickle
+import argparse
+import tensorflow as tf
+from tensorflow.python.client import device_lib
+import t3f
+
+parser = argparse.ArgumentParser(description='Measure execution time of various t3f operations.')
+parser.add_argument('--file_path', help='Path to the file to save logs.')
+args = parser.parse_args()
+
+
+# Matvec.
+shape = 10 * np.ones(10, dtype=int)
+matrices = t3f.random_matrix_batch((shape, shape), 10, batch_size=100)
+matrices = t3f.cast(matrices, tf.float64)
+one_matrix = t3f.get_variable('one_matrix', initializer=matrices[0])
+matrices = t3f.get_variable('matrices', initializer=matrices)
+vecs = t3f.random_matrix_batch((shape, None), 10, batch_size=100)
+vecs = t3f.cast(vecs, tf.float64)
+one_vec = t3f.get_variable('one_vec', initializer=vecs[0])
+vecs = t3f.get_variable('vecs', initializer=vecs)
+vecs100 = t3f.random_matrix_batch((shape, None), 100, batch_size=100)
+vecs100 = t3f.cast(vecs100, tf.float64)
+one_vec100 = t3f.get_variable('one_vec100', initializer=vecs100[0])
+vecs100 = t3f.get_variable('vecs100', initializer=vecs100)
+sess = tf.Session()
+sess.run(tf.global_variables_initializer())
+print(device_lib.list_local_devices())
+logs = {}
+
+matvec_op = t3f.matmul(one_matrix, one_vec).op
+# Warmup.
+timeit.timeit(lambda: sess.run(matvec_op), number=10)
+matvec_time = timeit.timeit(lambda: sess.run(matvec_op), number=1000) / 1000
+print('Multiplying %s by %s takes %f seconds.' % (one_matrix, one_vec,
+                                                  matvec_time))
+logs['matvec_time'] = matvec_time
+
+batch_matvec_op = t3f.matmul(one_matrix, vecs).op
+batch_matvec_time = timeit.timeit(lambda: sess.run(batch_matvec_op),
+                                  number=100) / 100
+print('Multiplying %s by %s takes %f seconds.' % (one_matrix, vecs,
+                                                  batch_matvec_time))
+logs['batch_matvec_time'] = batch_matvec_time
+
+matmul_op = t3f.matmul(one_matrix, one_matrix).op
+matmul_time = timeit.timeit(lambda: sess.run(matmul_op),
+                            number=1000) / 1000
+print('Multiplying %s by itself takes %f seconds.' % (one_matrix, matmul_time))
+logs['matmul_time'] = matmul_time
+
+batch_matmul_op = t3f.matmul(one_matrix, matrices).op
+batch_matmul_time = timeit.timeit(lambda: sess.run(batch_matmul_op),
+                                  number=100) / 100
+print('Multiplying %s by %s takes %f seconds.' % (one_matrix, matrices,
+                                                 batch_matmul_time))
+logs['batch_matmul_time'] = batch_matmul_time
+
+norm_op = t3f.frobenius_norm(one_matrix, differentiable=True).op
+norm_time = timeit.timeit(lambda: sess.run(norm_op),
+                            number=1000) / 1000
+print('Computing the norm of %s takes %f seconds.' % (one_matrix, norm_time))
+logs['norm_time'] = norm_time
+
+batch_norm_op = t3f.frobenius_norm(matrices, differentiable=True).op
+batch_norm_time = timeit.timeit(lambda: sess.run(batch_norm_op),
+                                number=1000) / 1000
+print('Computing the norm of %s takes %f seconds.' % (matrices, batch_norm_time))
+logs['batch_norm_time'] = batch_norm_time
+
+flatinner_op = t3f.flat_inner(one_vec, one_vec).op
+flatinner_time = timeit.timeit(lambda: sess.run(flatinner_op),
+                               number=1000) / 1000
+print('Computing the dot product between %s and itself takes %f seconds.' %
+      (one_vec, flatinner_time))
+logs['flatinner_time'] = flatinner_time
+
+gram_op = t3f.gram_matrix(vecs).op
+gram_time = timeit.timeit(lambda: sess.run(gram_op),
+                          number=100) / 100
+print('Computing the gram matrix of %s takes %f seconds.' % (vecs, gram_time))
+logs['batch_gram_time'] = gram_time
+
+tens = tf.cast(tf.random_normal((10, 10, 10, 10)), tf.float64)
+tt_svd_op = t3f.to_tt_tensor(tens, max_tt_rank=10).op
+tt_svd_time = timeit.timeit(lambda: sess.run(tt_svd_op),
+                            number=1000) / 1000
+print('TT-SVD for tensor of shape %s takes %f seconds.' % (tens.get_shape(),
+                                                           tt_svd_time))
+logs['tt_svd_time'] = tt_svd_time
+
+round_op = t3f.round(one_vec100, max_tt_rank=10).op
+round_time = timeit.timeit(lambda: sess.run(round_op),
+                           number=1000) / 1000
+print('Rounding %s takes %f seconds.' % (one_vec100, round_time))
+logs['round_time'] = round_time
+
+batch_round_op = t3f.round(vecs100, max_tt_rank=10).op
+batch_round_time = timeit.timeit(lambda: sess.run(batch_round_op),
+                                 number=100) / 100
+print('Rounding %s takes %f seconds.' % (vecs100, batch_round_time))
+logs['batch_round_time'] = batch_round_time
+
+project_op = t3f.project(one_vec, one_vec).op
+project_time = timeit.timeit(lambda: sess.run(project_op),
+                             number=1000) / 1000
+print('Projecting %s on %s takes %f seconds.' % (one_vec, one_vec, project_time))
+logs['project_time'] = project_time
+
+batch_project_op = t3f.project(vecs, one_vec).op
+batch_project_time = timeit.timeit(lambda: sess.run(batch_project_op),
+                                   number=10) / 10
+print('Projecting %s on %s takes %f seconds.' % (vecs, one_vec,
+                                                 batch_project_time))
+logs['batch_project_time'] = batch_project_time
+
+project100_op = t3f.project(one_vec100, one_vec).op
+project100_time = timeit.timeit(lambda: sess.run(project100_op),
+                                number=100) / 100
+print('Projecting %s on %s takes %f seconds.' % (one_vec100, one_vec, project100_time))
+logs['project_rank100_time'] = project100_time
+
+batch_project100_op = t3f.project(vecs100, one_vec).op
+batch_project100_time = timeit.timeit(lambda: sess.run(batch_project100_op),
+                                      number=100) / 100
+print('Projecting %s on %s takes %f seconds.' % (vecs100, one_vec, batch_project100_time))
+logs['batch_project_rank100_time'] = batch_project100_time
+
+if args.file_path is not None:
+  pickle.dump(logs, open(args.file_path, 'wb'))
+
diff --git a/examples/profile/profile_ttpy.py b/examples/profile/profile_ttpy.py
new file mode 100644
index 00000000..e6bb941c
--- /dev/null
+++ b/examples/profile/profile_ttpy.py
@@ -0,0 +1,66 @@
+import timeit
+import numpy as np
+import pickle
+import argparse
+import tt
+from tt.riemannian import riemannian
+
+parser = argparse.ArgumentParser(description='Measure execution time of various t3f operations.')
+parser.add_argument('--file_path', help='Path to the file to save logs.')
+args = parser.parse_args()
+
+matrix = tt.matrix(tt.rand(100, 10, 10))
+vec = tt.matrix(tt.rand(10, 10, 10), n=[10] * 10, m=[1] * 10)
+vec_100 = tt.matrix(tt.rand(10, 10, 100), n=[10] * 10, m=[1] * 10)
+tens = np.random.randn(10, 10, 10, 10)
+globals = {'matrix': matrix, 'vec': vec, 'vec_100': vec_100, 'tens': tens,
+           'tt': tt, 'riemannian': riemannian}
+
+logs = {'lib': 'ttpy'}
+
+# Warmup
+timeit.timeit("matrix * vec", globals=globals, number=10)
+
+matvec_time = timeit.timeit("matrix * vec", globals=globals, number=1000) / 1000
+print('Multiplying a matrix by a vector takes %f seconds.' % matvec_time)
+logs['matvec_time'] = matvec_time
+
+matmul_time = timeit.timeit("matrix * matrix", globals=globals, number=100) / 100
+print('Multiplying a matrix by a matrix takes %f seconds.' % matmul_time)
+logs['matmul_time'] = matmul_time
+
+norm_time = timeit.timeit("tt.tensor.norm(matrix.tt)", globals=globals,
+                          number=1000) / 1000
+print('Computing the norm of a matrix takes %f seconds.' % norm_time)
+logs['norm_time'] = norm_time
+
+# Time for a single dot product.
+flatinner_time = timeit.timeit("tt.dot(vec.tt, vec.tt)", globals=globals,
+                               number=1000) / 1000
+print('Computing dot product of a vector and itself takes %f seconds.' % flatinner_time)
+logs['flatinner_time'] = flatinner_time
+
+# Time for a Gram matrix x A x of size 100 x 100.
+mat_gram_time = 100 * 100 * timeit.timeit("tt.dot(vec.tt, (matrix * vec).tt)",
+                                      globals=globals, number=100) / 100
+print('Computing the Gram matrix x A x of size 100 x 100 takes %f seconds.' % mat_gram_time)
+logs['batch_mat_gram_time'] = mat_gram_time
+
+tt_svd_time = timeit.timeit("tt.vector(tens)", globals=globals,
+                            number=1000) / 1000
+print('TT-SVD for tensor of shape %s takes %f seconds.' % (tens.shape,
+                                                           tt_svd_time))
+logs['tt_svd_time'] = tt_svd_time
+
+project_time = timeit.timeit("riemannian.project(vec.tt, vec_100.tt)", globals=globals,
+                             number=100) / 100
+print('Projecting a vector of rank 100 on a vector of rank 10 takes %f seconds.' % project_time)
+logs['project_rank100_time'] = project_time
+
+round_time = timeit.timeit("tt.tensor.round(vec_100.tt, eps=0.0, rmax=10)", globals=globals,
+                             number=100) / 100
+print('Rounding a vector of rank 100 to rank 10 takes %f seconds.' % round_time)
+logs['round_time'] = round_time
+
+if args.file_path is not None:
+  pickle.dump(logs, open(args.file_path, 'wb'))
diff --git a/examples/profile/results.ipynb b/examples/profile/results.ipynb
new file mode 100644
index 00000000..1d944aa5
--- /dev/null
+++ b/examples/profile/results.ipynb
@@ -0,0 +1,277 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def pickle_load(path):\n",
+    "    with open(path, 'rb') as f:\n",
+    "        return pickle.load(f)\n",
+    "\n",
+    "    \n",
+    "def load(t3f_cpu_path, t3f_gpu_path, ttpy_path=None):\n",
+    "    cpu = pickle_load(t3f_cpu_path)\n",
+    "    gpu = pickle_load(t3f_gpu_path)\n",
+    "    d = {}\n",
+    "    op_list = ['matvec', 'matmul', 'norm', 'round', 'gram', 'project_rank100']\n",
+    "    \n",
+    "    one_cpu = []\n",
+    "    for op in op_list:\n",
+    "        if op == 'gram':\n",
+    "            # Single element Gram matrix is flat inner.\n",
+    "            curr_time = cpu['flatinner_time'] * 1000\n",
+    "        else:\n",
+    "            curr_time = cpu[op + '_time'] * 1000\n",
+    "        one_cpu.append(curr_time)\n",
+    "    d['one on CPU'] = one_cpu\n",
+    "    \n",
+    "    one_gpu = []\n",
+    "    for op in op_list:\n",
+    "        if op == 'gram':\n",
+    "            # Single element Gram matrix is flat inner.\n",
+    "            curr_time = gpu['flatinner_time'] * 1000\n",
+    "        else:\n",
+    "            curr_time = gpu[op + '_time'] * 1000\n",
+    "        one_gpu.append(curr_time)\n",
+    "    d['one on GPU'] = one_gpu\n",
+    "    \n",
+    "    batch_cpu = []\n",
+    "    for op in op_list:\n",
+    "        if op == 'gram':\n",
+    "            curr_time = cpu['batch_gram_time'] * 1000 / 100**2\n",
+    "        else:\n",
+    "            curr_time = cpu['batch_' + op + '_time'] * 1000 / 100\n",
+    "        batch_cpu.append(curr_time)\n",
+    "    d['batch on CPU'] = batch_cpu\n",
+    "    \n",
+    "    batch_gpu = []\n",
+    "    for op in op_list:\n",
+    "        if op == 'gram':\n",
+    "            curr_time = gpu['batch_gram_time'] * 1000 / 100.**2\n",
+    "        else:\n",
+    "            curr_time = gpu['batch_' + op + '_time'] * 1000 / 100.\n",
+    "#         curr_time = gpu['batch_' + op + '_time'] * 1000\n",
+    "        batch_gpu.append(curr_time)\n",
+    "    d['batch on GPU'] = batch_gpu\n",
+    "    \n",
+    "    if ttpy_path:\n",
+    "        ttpy = pickle_load(ttpy_path)\n",
+    "        ttpy_col = []\n",
+    "        for op in op_list:\n",
+    "            if op == 'gram':\n",
+    "                # Single element Gram matrix is flat inner.\n",
+    "                curr_time = ttpy['flatinner_time'] * 1000\n",
+    "            else:\n",
+    "                curr_time = ttpy[op + '_time'] * 1000\n",
+    "            ttpy_col.append(curr_time)\n",
+    "    d['ttpy, one on CPU'] = ttpy_col\n",
+    "    \n",
+    "    df = pd.DataFrame(d, index=op_list)\n",
+    "    regime_list = ['one on CPU', 'one on GPU', 'batch on CPU', 'batch on GPU']\n",
+    "    if ttpy_path:\n",
+    "        regime_list = ['ttpy, one on CPU'] + regime_list\n",
+    "    df = df[regime_list]\n",
+    "    return df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "df = load('logs_cpu.pkl', 'logs_gpu.pkl', ttpy_path='logs_ttpy.pkl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style>\n",
+       "    .dataframe thead tr:only-child th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ttpy, one on CPU</th>\n",
+       "      <th>one on CPU</th>\n",
+       "      <th>one on GPU</th>\n",
+       "      <th>batch on CPU</th>\n",
+       "      <th>batch on GPU</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>matvec</th>\n",
+       "      <td>11.142</td>\n",
+       "      <td>0.129</td>\n",
+       "      <td>0.121</td>\n",
+       "      <td>0.003</td>\n",
+       "      <td>0.003</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>matmul</th>\n",
+       "      <td>86.191</td>\n",
+       "      <td>0.125</td>\n",
+       "      <td>0.133</td>\n",
+       "      <td>0.004</td>\n",
+       "      <td>0.004</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>norm</th>\n",
+       "      <td>3.790</td>\n",
+       "      <td>1.902</td>\n",
+       "      <td>0.893</td>\n",
+       "      <td>0.422</td>\n",
+       "      <td>0.050</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>round</th>\n",
+       "      <td>73.027</td>\n",
+       "      <td>0.159</td>\n",
+       "      <td>0.165</td>\n",
+       "      <td>0.006</td>\n",
+       "      <td>0.006</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>gram</th>\n",
+       "      <td>0.145</td>\n",
+       "      <td>0.806</td>\n",
+       "      <td>0.703</td>\n",
+       "      <td>0.029</td>\n",
+       "      <td>0.001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>project_rank100</th>\n",
+       "      <td>116.868</td>\n",
+       "      <td>1.564</td>\n",
+       "      <td>1.658</td>\n",
+       "      <td>0.017</td>\n",
+       "      <td>0.018</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 ttpy, one on CPU  one on CPU  one on GPU  batch on CPU  \\\n",
+       "matvec                     11.142       0.129       0.121         0.003   \n",
+       "matmul                     86.191       0.125       0.133         0.004   \n",
+       "norm                        3.790       1.902       0.893         0.422   \n",
+       "round                      73.027       0.159       0.165         0.006   \n",
+       "gram                        0.145       0.806       0.703         0.029   \n",
+       "project_rank100           116.868       1.564       1.658         0.017   \n",
+       "\n",
+       "                 batch on GPU  \n",
+       "matvec                  0.003  \n",
+       "matmul                  0.004  \n",
+       "norm                    0.050  \n",
+       "round                   0.006  \n",
+       "gram                    0.001  \n",
+       "project_rank100         0.018  "
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.round(decimals=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\\begin{tabular}{lrrrrr}\n",
+      "\\toprule\n",
+      "{} &  ttpy, one on CPU &  one on CPU &  one on GPU &  batch on CPU &  batch on GPU \\\\\n",
+      "\\midrule\n",
+      "matvec          &            11.142 &       0.129 &       0.121 &         0.003 &         0.003 \\\\\n",
+      "matmul          &            86.191 &       0.125 &       0.133 &         0.004 &         0.004 \\\\\n",
+      "norm            &             3.790 &       1.902 &       0.893 &         0.422 &         0.050 \\\\\n",
+      "round           &            73.027 &       0.159 &       0.165 &         0.006 &         0.006 \\\\\n",
+      "gram            &             0.145 &       0.806 &       0.703 &         0.029 &         0.001 \\\\\n",
+      "project\\_rank100 &           116.868 &       1.564 &       1.658 &         0.017 &         0.018 \\\\\n",
+      "\\bottomrule\n",
+      "\\end{tabular}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df.round(decimals=3).to_latex())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/profile/results.png b/examples/profile/results.png
new file mode 100644
index 00000000..a6fdf24a
Binary files /dev/null and b/examples/profile/results.png differ
diff --git a/examples/tensor-completion.ipynb b/examples/tensor-completion.ipynb
new file mode 100644
index 00000000..18c54c5a
--- /dev/null
+++ b/examples/tensor-completion.ipynb
@@ -0,0 +1,420 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import tensorflow as tf\n",
+    "import t3f\n",
+    "tf.set_random_seed(0)\n",
+    "np.random.seed(0)\n",
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tensor completion\n",
+    "\n",
+    "In this example we will see how can we do tensor completion with t3f, i.e. observe a fraction of values in a tensor and recover the rest by assuming that the original tensor has low TT-rank.\n",
+    "Mathematically it means that we have a binary mask $P$ and a ground truth tensor $A$, but we observe only a noisy and sparsified version of $A$: $P \\odot (\\hat{A})$, where $\\odot$ is the elementwise product (applying the binary mask) and $\\hat{A} = A + \\text{noise}$. In this case our task reduces to the following optimization problem:\n",
+    "\\begin{equation*}\n",
+    "\\begin{aligned}\n",
+    "& \\underset{X}{\\text{minimize}} \n",
+    "& & \\|P \\odot (X - \\hat{A})\\|_F^2 \\\\\n",
+    "& \\text{subject to} \n",
+    "& & \\text{tt_rank}(X) \\leq r_0\n",
+    "\\end{aligned}\n",
+    "\\end{equation*}\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Generating problem instance,\n",
+    "Lets generate a random matrix $A$, noise, and mask $P$."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "shape = (3, 4, 4, 5, 7, 5)\n",
+    "# Fix random seed so the results are comparable between runs.\n",
+    "tf.set_random_seed(0)\n",
+    "# Generate ground truth tensor A. To make sure that it has low TT-rank,\n",
+    "# let's generate a random tt-rank 5 tensor and apply t3f.full to it to convert to actual tensor.\n",
+    "ground_truth = t3f.full(t3f.random_tensor(shape, tt_rank=5))\n",
+    "# Make a (non trainable) variable out of ground truth. Otherwise, it will be randomly regenerated on each sess.run.\n",
+    "ground_truth = tf.get_variable('ground_truth', initializer=ground_truth, trainable=False)\n",
+    "noise = 1e-2 * tf.get_variable('noise', initializer=tf.random_normal(shape), trainable=False)\n",
+    "noisy_ground_truth = ground_truth + noise\n",
+    "# Observe 25% of the tensor values.\n",
+    "sparsity_mask = tf.cast(tf.random_uniform(shape) <= 0.25, tf.float32)\n",
+    "sparsity_mask = tf.get_variable('sparsity_mask', initializer=sparsity_mask, trainable=False)\n",
+    "sparse_observation = noisy_ground_truth * sparsity_mask"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Initialize the variable and compute the loss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "observed_total = tf.reduce_sum(sparsity_mask)\n",
+    "total = np.prod(shape)\n",
+    "initialization = t3f.random_tensor(shape, tt_rank=5)\n",
+    "estimated = t3f.get_variable('estimated', initializer=initialization)\n",
+    "# Loss is MSE between the estimated and ground-truth tensor as computed in the observed cells.\n",
+    "loss = 1.0 / observed_total * tf.reduce_sum((sparsity_mask * t3f.full(estimated) - sparse_observation)**2)\n",
+    "# Test loss is MSE between the estimated tensor and full (and not noisy) ground-truth tensor A.\n",
+    "test_loss = 1.0 / total * tf.reduce_sum((t3f.full(estimated) - ground_truth)**2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# SGD optimization\n",
+    "The simplest way to solve the optimization problem is Stochastic Gradient Descent: let TensorFlow differentiate the loss w.r.t. the factors (cores) of the TensorTrain decomposition of the estimated tensor and minimize the loss with your favourite SGD variation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "optimizer = tf.train.AdamOptimizer(learning_rate=0.01)\n",
+    "step = optimizer.minimize(loss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0 2.20282 2.26653\n",
+      "1000 0.00158865 0.00234495\n",
+      "2000 0.000334849 0.00045755\n",
+      "3000 9.98362e-05 5.27664e-05\n",
+      "4000 8.28005e-05 2.14205e-05\n",
+      "5000 8.17184e-05 2.07301e-05\n",
+      "6000 8.57184e-05 2.59403e-05\n",
+      "7000 8.1738e-05 2.07167e-05\n",
+      "8000 0.000102493 4.31596e-05\n",
+      "9000 8.5987e-05 2.29819e-05\n"
+     ]
+    }
+   ],
+   "source": [
+    "sess = tf.Session()\n",
+    "sess.run(tf.global_variables_initializer())\n",
+    "train_loss_hist = []\n",
+    "test_loss_hist = []\n",
+    "for i in range(10000):\n",
+    "    _, tr_loss_v, test_loss_v = sess.run([step, loss, test_loss])\n",
+    "    train_loss_hist.append(tr_loss_v)\n",
+    "    test_loss_hist.append(test_loss_v)\n",
+    "    if i % 1000 == 0:\n",
+    "        print(i, tr_loss_v, test_loss_v)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<matplotlib.legend.Legend at 0x1821244048>"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEaCAYAAAAG87ApAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xd4VGX2wPHvSSchCSF0Qu9VEKSI\nKDYEBREbFuyKZXV3f2vvuurqrquurm2t2BV17SiwCqL0gPSWUBNaQkIqSUg5vz/uAAGTMAOZ3GRy\nPs8zz9y59cwV5+Qt931FVTHGGGO8FeR2AMYYY+oWSxzGGGN8YonDGGOMTyxxGGOM8YklDmOMMT6x\nxGGMMcYnljiMqUNEZLOInHGUx14uItOrOyZT/1jiMHWOiJwkInNFJFtEMkVkjoicUG57SxF5XUS2\ni0ieiGwUkcki0t2zvb2IqGdbnojsEpFvReRM975V9Sr3HUP2r1PVD1R1pJtxmcBgicPUKSISA3wL\n/BtoDLQGHgWKPNvjgblAJDAciAaOB34GDk8MjVS1IXAcMAP4QkSu9v+3MKZus8Rh6pquAKr6kaqW\nqmqBqk5X1eWe7f8H5ABXqOoGdWSp6tuq+u+KTqiqO1X1eeAR4O8iUuH/FyLSS0RmeEo5u0TkPs/6\ncBH5l6eEs92zHO7ZNkJEUkXkLhFJE5EdInKeiJwtIus957qv3DUeEZHPROQTEckVkSUiclwl8QSJ\nyD0iskFEMkRkiog09mye7XnP8pSqhorI1SLya7njTxSRRZ6S2yIRObHctlki8pinNJcrItNFpMmR\n/uOY+sESh6lr1gOlIvKOiIwWkbjDtp8BfKGqZUdx7v8CzYBuh28QkWjgf8APQCugM/CjZ/P9wBCg\nH07pZRDwQLnDWwAROKWjh4DXgYnAAJxS0UMi0rHc/uOAT3FKVB8CX4pIaAXx/hE4DzjFE9Me4CXP\ntpM9741UtaGqzjvs+zQGvgNeAOKBZ4HvPCW2/S4DrvHckzDgjgpiMPWQJQ5Tp6hqDnASoDg/wOki\n8rWINPfs0gTYuX9/ETlXRLL2/9V8hNNv97w3rmDbGGCnqj6jqoWqmquqCzzbLgf+qqppqpqOU3V2\nRblji4EnVLUY+NgT4/Oec6wCVgF9y+2/WFU/8+z/LE7SGVJBTDcC96tqqqoW4ZSYLizfrlGFc4Ak\nVX1PVUtU9SNgLTC23D5vq+p6VS0ApuAkRmMscZi6R1XXqOrVqpoA9Mb5a/tfns0ZQMty+36tqo1w\nqrDCjnDq1p73zAq2tQE2VHJcK2BLuc9bPOv2y1DVUs9yged9V7ntBUDDcp9T9i94Sk6ph51vv3Y4\n7TJZIpIFrAFKgeYV7HukmPfH3brc553llvceFqOpxyxxmDpNVdcCk3ESCDjVR+dV1k5xBOOBNGBd\nBdtSgE6VHLcd50d8v7YcLL0cjTb7FzzfI6GS86UAo1W1UblXhKpuwymRVeXwmPfHve0Y4jb1hCUO\nU6eISHcRuV1EEjyf2wCXAvM9uzwLxAHviUgncURTRTWLiDQXkVuBh4F7K2kf+RZoISJ/9jSGR4vI\nYM+2j4AHRKSppwH5IeD9Y/iaA0TkfE+V059xeozNr2C/V4EnRKSd53s0FZFxnm3pQBnQsYLjAKYC\nXUXkMhEJEZEJQE/P9zSmSpY4TF2TCwwGFohIPs4P6krgdgBV3Y3THlAI/OrZfylOt9ybDztXlucc\nK4CzgYtU9a2KLqqquTjdecfiVOEkAad6Nj8OJALLPeda4ll3tL4CJuA0dl8BnO9p7zjc88DXwHQR\nycW5F4M98e4FngDmeKqyDmkjUdUMnHab23Gq9+4CxnjunzFVEpvIyZjaQ0QeATqr6kS3YzGmMlbi\nMMYY4xNLHMYYY3xiVVXGGGN8YiUOY4wxPrHEYYwxxifeDE1QZ4jIWGBsdHT0DV27dnU7HGOMqVMW\nL168W1WbHmm/gGzjGDhwoCYmJrodhjHG1CkislhVBx5pP6uqMsYY4xNLHMYYY3xiicMYY4xPArJx\nvHPnzm6HYoypY4qLi0lNTaWwsNDtUPwuIiKChIQEQkMrmh/syKxx3BhjgE2bNhEdHU18fDwi4nY4\nfqOqZGRkkJubS4cOHQ7ZZo3jxhjjg8LCwoBPGgAiQnx8/DGVrCxxGGOMR6Anjf2O9Xta4jDGmFoi\nKyuLl19+2efjzj77bLKysvwQUcUscRhjTC1RWeIoLS2tYO+Dpk6dSqNGjfwV1u8EVK+qY1W2+B2C\ndiyDmFYQ0/rQ97BIt8MzxgS4e+65hw0bNtCvXz9CQ0Np2LAhLVu2ZOnSpaxevZrzzjuPlJQUCgsL\n+dOf/sSkSZMAaN++PYmJieTl5TF69GhOOukk5s6dS+vWrfnqq69o0KBBtcYZUInjWLvjzpk/l97p\n3xFH7u+2lYTFojGtCG6UQFBs63KJpVxyCY8+xm9gjKkNHv1mFau351TrOXu2iuHhsb2q3Oepp55i\n5cqVLF26lFmzZnHOOeewcuXKA72f3nrrLRo3bkxBQQEnnHACF1xwAfHx8YecIykpiY8++ojXX3+d\niy++mM8//5yJE6t3QsmAShyq+g3wzcCBA284muNzT36EZzfcxu49Wezbk4rkbCd6XxotJZMWJZm0\nLMikZVoyrYIW0pjs3x1fGhYDMa0Iim2NVFRqiWsHYVHH+jWNMfXEoEGDDuky+8ILL/DFF18AkJKS\nQlJS0u8SR4cOHejXrx8AAwYMYPPmzdUeV0AljmN1dp+WnN2n5SHr8opK2JFVwLasArZnFbIsq4Dt\n2QWk7cmmeM8OgvO20aQs45Dk0jp9M62CfiNO9xDEoc/JlDVqT1DzntCsBzTr6bziO0NIWE1+VWNM\nFY5UMqgpUVEH/9CcNWsW//vf/5g3bx6RkZGMGDGiwi614eHhB5aDg4MpKCio9rgscRxBw/AQujSP\npkvziquhysqU3XlFBxJLSlYB87MK2JFdwK49eRRnbSe8YBetJIP2spNuGSn0zl5B23U/EEwZABoU\nAvFdkEMSSg9o1B6CrP+CMfVFdHQ0ubm/ryoHyM7OJi4ujsjISNauXcv8+fNrOLqDLHEco6AgoVlM\nBM1iIujftuJ9CvaVsj27gI3p+azZkcN323NI3pFB8J4NdJMUugWl0Ct9Gz0zf6XZys8PHKehkUiz\nHtB6ACQMgjYnQKN2UE/6mhtT38THxzNs2DB69+5NgwYNaN68+YFto0aN4tVXX6Vv375069aNIUOG\nuBanDTnioryiEtbtzGH1jlzW7Mhh9fYcUnam0aZkK12DUugRlMLxYan0KEsmTJ0iqUY1QxJOcJJI\nwgnQqr+1mxhTDdasWUOPHj3cDqPGVPR9vR1yxEocLmoYHsKAdo0Z0K7xgXWlZcqWjHzWeJLJs9uy\nWbZlN632baJ/UDInFmzkhI3LaLbuOwBUgpHmvaDtUOhwMrQfBg3i3PpKxph6wBJHLRMcJHRs2pCO\nTRtyTl+nob60TFm/K5fEzZlM27yHxzdnUlCYTv+gZIaEbuCkPZvomjaZ0IX/QRGkRR9PEhkO7U6E\niBiXv5UxJpDU+sQhIlHAy8A+YJaqfuBySDUuOEjo0TKGHi1juGJoe1SVbVkFJG7ew6LNmfzf5kw2\nZWdxnCRzWsQ6zsheR6cFrxE870WQIKc6q9Np0Ol0SBgIwUc3lLIxxoBLiUNE3gLGAGmq2rvc+lHA\n80Aw8IaqPgWcD3ymqt+IyCdAvUschxMREuIiSYiL5Lz+rQFIyy1kTvIAfknazeSk3WTvzeX4oCRG\nN0zi1KzVJPzyDDL7aQiLdkojnU9zkknjji5/G2NMXeNWiWMy8CLw7v4VIhIMvAScCaQCi0TkayAB\nWOHZreoBW+qxZtERjO+fwPj+Cagq63fl8UvScfyUvJu/bcwgrDiXk0PXcGHYOgZu+Y2GnjYS4jo4\nCaTz6U7VllVrGWOOwJXEoaqzRaT9YasHAcmquhFARD4GxuEkkQRgKVUMyigik4BJAG3bVtIvtp4Q\nEbq1iKZbi2iuH96RwuJSFmzKZObaXjy6Pp1NWXm0l52cH7OOUWWr6bT0I4IT3wQJhjaDnCqtTqdB\nq34QFOz21zHG1DK1qY2jNZBS7nMqMBh4AXhRRM4BvqnsYFV9DXgNnO64foyzzokIDeaUrk05pWtT\nADbtzmfWujRmruvLixsz0JJ9DA3bwIS49QzJWkb8zMdh5uNO76yOIw4mktjWrn4PYwJdVlYWH374\nIbfccovPx/7rX/9i0qRJREb6f0DW2pQ4KnqqTVU1H7jGqxPYnONe6dAkig5NOnDNsA7s3VfC/I0Z\nzFzbiSfX9SN1z1gak8OFcUmMiVpL941zCVvljI1Dk25OlVan06DdMBsx2Jhqtn9Y9aNNHBMnTqx3\niSMVaFPucwKw3ZcTHOsgh/VRZFgIp3Vvzmndm6OqbEjPY9a6dGau68DbmwZSXHoZ/cJ3cHmTDZzE\nMloseguZ/zIEhznPjuxPJM172xPtxhyj8sOqn3nmmTRr1owpU6ZQVFTE+PHjefTRR8nPz+fiiy8m\nNTWV0tJSHnzwQXbt2sX27ds59dRTadKkCTNnzvRrnLUpcSwCuohIB2AbcAlwmS8nsBLHsREROjeL\npnMzp20kr6iEOcm7mbUujWfXdeTO7OGEs48Lm2xlXPRaemctJnLGQzDjIYhq5lRrtR0CbQY7Y21Z\n+4ipq76/B3auOPJ+vmjRB0Y/VeUu5YdVnz59Op999hkLFy5EVTn33HOZPXs26enptGrViu++czq4\nZGdnExsby7PPPsvMmTNp0qRJ9cZdAbe6434EjACaiEgq8LCqvikitwLTcLrjvqWqq3w5r5U4qlfD\n8BDO6tWCs3q1QFVZtyuXmWvTmbmuBZdu6UJp2Rg6ReRwdfNNnBKykoQNMwlaMcU5OCwaEjxjbLU+\nHpr3gtg2VioxxkvTp09n+vTp9O/fH4C8vDySkpIYPnw4d9xxB3fffTdjxoxh+PDhNR6bW72qLq1k\n/VRg6tGe10oc/iMidG8RQ/cWMdw8ohPZBcXMSd7NzLVpvLC+KQ/mHgdcxomN8zknbiuDgpNok7OK\n8F/+iagzCjARsU6VVvNeznuL3hDfxboAm9rnCCWDmqCq3Hvvvdx4442/27Z48WKmTp3Kvffey8iR\nI3nooYdqNLbaVFV1zKzEUXNiG4QemL+krExZvSOHX5N3k7h5D//cEseevT2BcbSIKGF0s0wGRW6n\nG1toUZBMg6UfIvvyDp4sMt55nqRxB+c9rj3EtIToltCwudO7y0oqph4oP6z6WWedxYMPPsjll19O\nw4YN2bZtG6GhoZSUlNC4cWMmTpxIw4YNmTx58iHHBmxVlQksQUFC79ax9G4dC6c4fylt3J3P4i17\nWLx5Dwu3N+aDpBbsK3VmJQsPhmHx+QyP2UX3sDRal+0gft92GmydT9DKz2F/CWW/4HCIbg4NWzhJ\nJiLWeTVo5Fne/x4DoZEQEgGhDQ6+71+25GNqufLDqo8ePZrLLruMoUOHAtCwYUPef/99kpOTufPO\nOwkKCiI0NJRXXnkFgEmTJjF69Ghatmzp98bxgBpWvVxV1Q1JSUluh2PKKS4tOzAfyZodOazekcPa\nnbmk5xYdsl+rhkEMaJRH5wb5tAvLoWVQFk0li0alGUTt203YvmykMBsKs6Ho99P3VikkwpNMIg8m\nlAOJZf9yJIRGQHgMRDWByCYQ1RSi4p3lhs1ttsYAZcOq19Nh1a2qqvYKDQ468DT7/vG1AHILi9mS\nsZctGXvZnJHPlox8tmTsZXlmITuyC9lXcmjpI0igaXQ4zWMiaN4ylLZRJbSJLKZleBHNwoqIDysj\nLqyEqKBigkoKoaQQigs873uheP/nAs/yXudzYbbzXlzgrCvMgbLiCr6JQMNmzjzysQkHX43aOjM3\nxnWwWRtNwAuoxGHqnuiI0IPVXIdRVbL2FrMju5CdOQXOu+e1K7eIlKxCFqcUkZm/z3NEsOcVSkhQ\nJM2im9EsJoLmMZ5EExNBs8YHl5vHhBPbIBSpqApLFYpyIH+389q7G/LTIXcXZKdAzjZIXwvJP0Jx\n/sHjwho63S7bn+SM/dVmkFOSMSaABFTisF5VgUVEiIsKIy4qjJ6tKu95VVRSSnpuEbtyikjLKWRX\njpNYduUUkpZTxMb0fOZtyCCnsOR3x0aFBZMQF0mbxpG0adyANocsRxEVHwvxnSoPUhUK9sCeTbBr\nFexcCdsWwy/PwuynnfaZDidDz3HQ/RyIbFz5uYypIwKqjWO/ujJ1rKlZhcWlpOUUsSvXSS47swtJ\n3VNA6p69pGQWkLJnL3v3HToAc9PocLo1j6ZL84Z0bR7teTUkOuIIc5oU5sDWebBxFqz9FrK2QlCI\nM+7XwGuhy5n2gGQts2bNGrp3715xCTTAqCpr16496jYOSxzGeKgqmfn7SNlTQErmXlL27GVjej7r\nd+WStCuPguKDSaVj0yj6t4mjf9tG9G/biG7NowkJrqRtQxV2LIVVX8KyjyFvJ8QkwOAb4YTrbcyv\nWmLTpk1ER0cTHx8f0MlDVcnIyCA3N5cOHTocss0ShyUOU43KypTUPQWs35XL2p05LE3J5rete8jw\ntK9EhQUztFM8J3dtyvAuTWkfH1nxj09pMaz7Hha+Bpt/cYZqGX67k0CCA6rmuM4pLi4mNTWVwsJC\nt0Pxu4iICBISEggNPbTkXC8Th3XHNTVJ1UkmS7buYeGmTH5J2s3WzL0AtG0cyVm9mjOmbyv6JsRW\nnES2zIOZTzgJpNXxMO5F56l6Y1xSLxPHflbiMG7ZvDufX5LS+WltGr8m76a4VGnTuAFj+7bi4oFt\naN8k6tADVGHVf2HqXbAvD879N/S92J3gTb1nicMSh3FZ9t5ipq/eybfLd/Br8m5Ky5RhneO55IS2\nnNWrBWEh5dpE8tLh06tgyxyn6uq0B+1Jd1PjLHFY4jC1yK6cQj5NTOGjhSlsyyqgRUwEk07uyKWD\n2tIgzNO7qrQYpt4BiyfD4Jth1JOWPEyNssRhicPUQqVlyuykdF6dtYEFmzJp0jCce0d35/zjWzvt\nIKow7T6Y/zIM+xOc+Ve3Qzb1SL1MHNY4buqShZsyefL7Nfy2NYuTOjfhuQn9aBod7iSPqXfAojdg\nzHPOcx/G1IB6mTj2sxKHqSvKypQPF27lsW9XE9sglNeuHEi/No2gtAQ+vtQZ0mTi59DpVLdDNfWA\nt4nDRmMzxkVBQcLEIe348g/DCA8N4rLX5zMnebfzTMeFb0GTLvD59ZCzw+1QjTnAEocxtUCPljF8\nftOJtImL5Jq3F/Fr0m4Ij4aL33VG7P3sWqcUYkwtYInDmFqiWUwEn9w4hI5No7jxvURWbsuGpt1g\n7POwdS789JjbIRoDWOIwplZpFBnGO9cOolFkGFe/vZBtWQXQ9yIYcA3M+Resn+52iMZY4jCmtmke\nE8E71w6isLiMWz5YQlFJKYx6Cpr3hi9vsvYO47qAShwiMlZEXsvO9nFKUWNqmc7NGvLPi/qyLCWL\nJ75b40xne+HbTnvHf2+AstIjn8QYPwmoxKGq36jqpNjY388mZ0xdM6p3S24Y3oF3523h+xU7oGlX\nOPufzqCIvzzrdnimHguoxGFMoLlrVHf6tI7lvi9WkJZbCP0ugz4Xw6y/wZa5bodn6ilLHMbUYqHB\nQTw34Tjy95Vy339XoABjnoW49s7zHXszXY7Q1EeWOIyp5To3i+buUd3535o0Pk1MdZ7vuPAtyEuD\nr251higxpgZZ4jCmDrjmxPYM6diYR79ZRUrmXmjV3xkAcd13sPB1t8Mz9YwlDmPqgKAg4Z8XHQfA\n3Z8vp6xMYcjN0HUUTL8fdix3OUJTn1jiMKaOSIiL5IExPZm7IYP3F2xx5uoY9zJExsNn10BRntsh\nmnqi1icOEekoIm+KyGdux2KM2y45oQ0nd23Kk1PXsiUjH6Li4YI3IHMjTL3T7fBMPeHXxCEib4lI\nmoisPGz9KBFZJyLJInJPVedQ1Y2qep0/4zSmrhAR/n5BH0KChTs/9VRZtT8JTr4Lln0Iyz52O0RT\nD/i7xDEZGFV+hYgEAy8Bo4GewKUi0lNE+ojIt4e9mvk5PmPqnJaxDXhoTE8Wbs7krTmbnJUn3wnt\nhsG3f4Hdye4GaAKeXxOHqs4GDu9oPghI9pQk9gEfA+NUdYWqjjnslebP+Iypqy4ckMDp3Zvx9LR1\nbEjPc+bvOP91CAlz2jtKitwO0QQwN9o4WgMp5T6netZVSETiReRVoL+I3FvFfpNEJFFEEtPT06sv\nWmNqIRHhyfP7EBEazB2fLqO0TCG2tdNYvnM5zHjY7RBNAHMjcUgF6yp9gklVM1T1JlXtpKpPVrHf\na8CjwJKwsLBqCNOY2q1ZTAR/HdeL37Zm8fovG52V3c+GwTfBgldg7VR3AzQBy43EkQq0Kfc5Adhe\nHSe2QQ5NfXPuca0Y1asFz05fz/pduc7KM/8KLfrCV7dA9jZ3AzQByY3EsQjoIiIdRCQMuAT4ujpO\nbMOqm/pGRHh8fG8aRoRw+5RlFJeWQUi4MwR7SRF8f5fbIZoA5O/uuB8B84BuIpIqItepaglwKzAN\nWANMUdVV1XE9K3GY+qhJw3AeG9ebFduyeXXWBs/Kzk5Pq7XfQtIMdwM0AUc0AAdIGzhwoCYmJrod\nhjE16tYPlzBt1U6++sNJ9GwVAyX74JWhoGVwy3ynJGJMFURksaoOPNJ+tf7JcV9YVZWpzx4b15vY\nBmHc8eky9pWUOV1zR//Deap87gtuh2cCSEAlDquqMvVZXFQYT4zvzeodObw00/MQYOfTocdYmP0M\nZKVUfQJjvORV4hCRBiLSzd/BHCsrcZj67qxeLTivXytempnMqu2e/w9GPgEozHjQ1dhM4Dhi4hCR\nscBS4AfP534iUi29oKqblTiMgUfO7UVcVBi3T/FUWcW1g5P+D1Z9AZt+cTs8EwC8KXE8gjNMSBaA\nqi4F2vsvJGPMsWgUGcaT4/uwdmcuL+6vshr2J4htC9/fDaUl7gZo6jxvEkeJqtaJuh+rqjLGcUbP\n5ozv35qXZyazZkcOhDaAs56AtFWQ+Jbb4Zk6zpvEsVJELgOCRaSLiPwbmOvnuI6KVVUZc9BDY3rS\nKDKUuz5bTklpmdNI3nEEzHwc8ne7HZ6pw7xJHLcBvYAi4CMgB/izP4Myxhy7uKgwHj3XeTDwjV83\nOTMGjvo77MuHnx5zOzxThx0xcajqXlW9X1VPUNWBnuXCmgjOGHNszu7TgrN6NefZGeud4debdYdB\nN8Lid2D7b26HZ+oob3pVzRSRnw5/1URwvrI2DmMOJSI8Nq43ESFB3PO5Z8bAEXdDVBOYehcE4MgR\nxv+8qaq6A7jT83oQp2turRzPw9o4jPm9ZjERPDS2F4s27+G9+VsgIhbOeARSF8LiyS5HZ+oib6qq\nFpd7zVHVvwCDayA2Y0w1ueD41pzStSl//2EtKZl74bjLoMMpMO0+SF/vdnimjvGmqqpxuVcTETkL\naFEDsRljqomI8Lfz+yDAfV+sQEVg/H8gJAI+vxaKC9wO0dQh3lRVLcapmlqMM0T67cB1/gzKGFP9\nWjdqwD2ju/NL0m4+XZwKMS1h/KuwcyV8cROUlbkdoqkjvKmq6qCqHT3vXVR1pKr+WhPB+coax42p\n2uWD2zGoQ2Me+3Y1u3IKoetZzoyBq7+EHx9xOzxTR1Q6H4eInF/Vgar6X79EVA1sPg5jKrdpdz6j\n/jWbk7s25bUrBiAA3/3FeaL8lHvg1HvdDtG4xNv5OEKq2Da2im0K1NrEYYypXIcmUdw+sit/m7qW\nTxalcMmgtnD2M87ETz8/BSiMuNd5YNCYClSaOFT1mpoMxBhTc647qSO/JO3moa9X0bNVDH0TGsG5\n/3Y2/vx32JvhTAIVFOxuoKZW8nY+jnNE5C4ReWj/y9+BGWP8JzhIeP6S/jRtGM7N7y8hM38fBAU5\nyePEP8KiN+CTK2DfXrdDNbWQN91xXwUm4IxZJcBFQDs/x2WM8bPGUWG8OnEA6XlFXP/OIgr2lTrJ\nY+RjMPppWDcV3hkDeWluh2pqGW9KHCeq6pXAHlV9FBgKtPFvWMaYmtAnIZbnJ/Tjt5Qsbv1wiTOK\nLsDgSTDhfdi1Gt44HdLWuhuoqVW8SRz7nwzaKyKtgGKgg/9COnrWHdcY343u05K/juvNj2vTuOvz\n5ZSWeXpa9hgD13wHxYXw5kjYMNPdQE2t4U3i+FZEGgFPA0uAzTjDq9c6NlaVMUfniiHt+MuZXfnv\nkm3cPmXpwZJH6wFww48Q2xo+uNAZVdfUe1V1xwVAVfcP3P+5iHwLRNSVGQGNMd774+ldCA4Snp62\njpIy5bkJ/QgNDoJGbeHaH+DTq+GbP8KeTXDaQ057iKmXvGkcXyYi94lIJ1UtsqRhTOD6w6mduXd0\nd75dvoPr3kkkr8gzP3lELFz2KQy4Bn59Dj67xsa3qse8+ZPhXKAEmCIii0TkDhFp6+e4jDEuufGU\nTjx1fh/mJO9mwn/mkZbjmbctOATGPAcjH4fVX8Fk63FVX3kzVtUWVf2Hqg4ALgP6Apv8HpkxxjWX\nDGrLG1cNZNPufMa/PJekXbnOBhE48TaY8B7sWmU9ruopbx8AbC8idwEfA92Bu/walTHGdad2a8Yn\nk4ayr7SMC16Zy9wNuw9u7DHW6XFVUgRvngkbZ7kWp6l53rRxLMAZlyoIuEhVB6nqM36PzBjjuj4J\nsXxxy4m0iI3gyjcX8mliysGNrQfA9T9CbAJ8OAG2zHUvUFOjvClxXKWqx6vqU6q60e8RVUBEzhOR\n10XkKxEZ6UYMxtRXCXGRfHbziQzpGM+dny3nmenrODCqdqM2cNU3ENsGPrwEdix3N1hTI7xp4zim\nCkwReUtE0kRk5WHrR4nIOhFJFpF7jhDDl6p6A3A1zvAnxpgaFBMRytvXnMCEgW3490/J/PmTpRSV\nlDobo5rAFV9AeEOn5GEN5gGvJjpiTwZGlV8hIsHAS8BooCdwqYj0FJE+IvLtYa9m5Q59wHOcMaaG\nhQYH8dQFfbjzrG58tXQ7E99Y4AyOCE7J49KPoCATPr0GSkvcDdb4ld8Th6rOBjIPWz0ISFbVjaq6\nD6fRfZyqrlDVMYe90sTxd+DHpjDlAAAb8klEQVR7VV3i75iNMRUTEf5wamf+fWl/lqVmc/7Lc9i0\nO9/Z2PI4GPsCbPkVfvqru4Eav/KmcfwiEYn2LD8gIv8VkeOP8bqtgXKtbKR61lXmNuAM4EIRuamS\nOCeJSKKIJKanpx9jeMaYqow9rhUf3TCYnMISxr88h8Vb9jgbjpsAA66GOS/A5jmuxmj8x5sSx4Oq\nmisiJwFnAe8ArxzjdSuaWqziOWwBVX1BVQeo6k2q+mol+7wGPAosCQsLO8bwjDFHMqBdY7645UQa\nNQjl6rcWsnKbZ1CJkU9AXDv48iYoynU3SOMX3iQOTwsY5wCvqOpXwLH+Mqdy6NDsCcD2YzynDXJo\nTA1rFx/FBzcMIToihKveWsjG9DynkXz8fyA7Fabd53aIxg+8SRzbROQ/wMXAVBEJ9/K4qiwCuohI\nBxEJAy4Bvj7Gc9qw6sa4oHWjBrx//WAUuP7dRHILi6HtEOcJ8yXvQvKPbodoqpk3CeBiYBowSlWz\ngMbAnd5eQEQ+AuYB3UQkVUSuU9US4FbPedcAU1R1lc/RH8ZKHMa4o2PThrx8+fFsydjLX6Yso6xM\nYcR90KQrfP1HKLQ/5gKJN4mjJfCdqiaJyAicqWMXensBVb1UVVuqaqiqJqjqm571U1W1q6p2UtUn\njip6Y0ytMaRjPPef3YMZq3fx9tzNEBoB416G3O0w/UG3wzPVyJvE8TlQKiKdgTdxZv/70K9RHSWr\nqjLGXdcMa8/p3Zvxjx/WkpyWC21OgKG3wpJ3rMoqgHiTOMo8VUvnA/9S1f/DKYXUOlZVZYy7RIQn\nL+hDZFgwf5myzJlJ8NT7IL4LfPMnKMxxO0RTDbxJHMUicilwJfCtZ12o/0I6elbiMMZ9zaIjeOy8\n3ixPzWby3M0Q2gDOewVytsGMh9wOz1QDbxLHNcBQ4AlV3SQiHYD3/RvW0bEShzG1wzl9WnJqt6Y8\nN2M9O7ILPFVWf4DFb8OGmW6HZ46RN4McrgbuAFaISG8gVVWf8ntkxpg6S0R49NzelJQpj3272ll5\n6v0Q39npZWUPBtZp3gw5MgJIwhlc8GVgvYic7Oe4jopVVRlTe7SNj+S20zozdcVOZq1Lc6qsxr0M\n2SlWZVXHeVNV9QwwUlVPUdWTcYYdec6/YR0dq6oypna54eSOdGwaxUNfraKwuBTaDnaqrBLfslkD\n6zBvEkeoqq7b/0FV11NLG8eNMbVLeEgwT5zXh62Ze/n3T0nOytMecKqsvrrNqqzqKG8SR6KIvCki\nIzyv14HF/g7MGBMYhnaK5/zjW/Pa7I0k7cr1VFm95Kmyetjt8MxR8CZx3AysAv4I/AlYDdzoz6CO\nlrVxGFM73X92D6LCQ7j/i5XOcCRth8CQWyDxTVj1pdvhGR9506uqSFWfVdXzVXW8qj4HvFcDsfnM\n2jiMqZ3iG4Zz3+geLNycyWeLU52VZzwMCSfAl7dA2hp3AzQ+OdpRbodWaxTGmIB30cAEBrVvzN++\nX0NGXhGEhMPF7znDsH98ORRkuR2i8VJNzDlujDGICE+M701+UQkPf+0ZDDumJVz8LmRtgSlXQEmR\nu0Ear1SaOETk+EpeA7BeVcaYo9CleTR/PK0L3y7fwVdLtzkr2w5xnu/YNBu+uBHKytwN0hxRSBXb\nnqli29rqDqQ6iMhYYGznzp3dDsUYU4mbR3Ri5ro0HvhyJSe0b0yrRg2cucrzdsGMByGqKYz+B0hF\nM0yb2kBUK53qu84aOHCgJiYmuh2GMaYSWzLyOfv5X+ib0Ij3rx9McJCAKkx/AOa9CINvhlFPWvKo\nYSKyWFUHHmk/a+MwxtS4dvFRPHxuL+ZtzODZGZ7ni0Vg5ONON90Fr8D3dznJxNQ6VVVVGWOM31w8\nsA2/bd3DSzM30Kd1I0b1buEkj7P+BhLklDzKSuDsZyDI/satTSxxGGNc88i5vVi9I5fbpyylQ5Nh\ndGsRfbDkERwKvz4HxQVw7osQbD9XtUVVvaomllsedti2W/0ZlDGmfggPCebViccTFR7CVW8tZFtW\ngbNBBE5/2BmKfdlH8Pl1ULLP3WDNAVWV//5Sbvnfh2271g+xGGPqoZaxDXjn2kHk7yvhqrcWkrXX\nkyBE4JS7YOQTsPpL+GQiFBe6G6wBqk4cUslyRZ9rBRurypi6qUfLGF6/ciBbM/ZyZfnkAXDirTDm\nOUiaDh9eBEV57gVqgKoTh1ayXNHnWsHGqjKm7hrSMZ5XJh7P2h25XPb6AjLzyyWPgdfC+Fdh86/w\n/vk2PInLqkoc3UVkuYisKLe8/3O3GorPGFOPnN6jOa9fNZAN6Xlc+tp80nLKVU0ddwlcNBm2LYF3\nz4X8DNfirO8qfQBQRNpVdaCqbvFLRNXAHgA0pm6bk7ybG95NJC4yjHeuPYHOzaIPblw/3WnvaNwR\nrvwSolu4F2iAOeYHAFV1S/kXkAccDzSpzUnDGFP3DevchE8mDaWopIwLXpnHos2ZBzd2HQmXfwpZ\nW+Ht0ZCV4l6g9VRV3XG/FZHenuWWwEqc3lTvicifayg+Y0w91Schli9uOZH4hmFc/sYCpq7YcXBj\nx1Pgii+c6qq3R0PGBvcCrYeqauPooKorPcvXADNUdSwwGOuOa4ypAW0aR/L5TSfSp3Usf/hwCZPn\nbDq4se1guOpr2JcPk8dAdqp7gdYzVSWO4nLLpwNTAVQ1F7Bxj40xNSIuKowPrh/MmT2a88g3q3ny\n+zXO9LMArfrBVd/Avjz44CLrbVVDqkocKSJym4iMx2nb+AFARBpQg/NxiEgPEXlVRD4TkZtr6rrG\nmNojIjSYVyYOYOKQtvzn543c/uky9pV4/n5t0RsmvAe7k5xGc3vC3O+qShzXAb2Aq4EJqro/lQ8B\n3vbm5CLyloikicjKw9aPEpF1IpIsIvdUdQ5VXaOqNwEXA0ds7TfGBKbgIOGxcb25Y2RXvvhtG9e9\ns4i8ohJnY8cRMO5F2PwLfH2bjarrZ1X1qkpT1ZtUdZyqTi+3fqaq/tPL808GRpVfISLBwEvAaKAn\ncKmI9BSRPp4G+fKvZp5jzgV+BX706dsZYwKKiHDraV14+sK+zN2QwWWvzye7wFOrftwlMOI+WP4x\nLHjV3UADXKXDTYrI11UdqKrnHunkqjpbRNoftnoQkKyqGz3X+RgYp6pPAmMqOc/XwNci8h3w4ZGu\na4wJbBcNbEPjqDBufn8JV7y5gPeuG0xsg1A4+U7YuRym3Q/Ne0OH4W6HGpCqqqoaCiQAvwD/xJlK\ntvzraLUGyne8TvWsq5CIjBCRF0TkP3ga6CvZb5KIJIpIYnp6+jGEZ4ypC07v0ZxXJh7Pmh05XPnm\nAqfkERQE570C8Z3g06vsGQ8/qSpxtADuA3oDzwNnArtV9WdV/fkYrlnRAImVVkiq6ixV/aOq3qiq\nL1Wx32uqOlBVBzZt2vQYwjPG1BWn92jOK5cPYPWOHK5/ZxGFxaUQEQMTPnAayT+/DkpL3A4z4FTV\nxlGqqj+o6lU4DeLJwCwRue0Yr5kKtCn3OQHYfoznBGx0XGPqozN6Nue5Cf1YtHkP//fJUqerbtOu\nzoi6KQtg9tNuhxhwqpyPUUTCReR84H3gD8ALwH+P8ZqLgC4i0kFEwoBLgCrbU7xlo+MaUz+N6duK\nB87pwfcrd/LYd6udlX0vguMuhdn/gC1z3Q0wwFQ15Mg7wFycZzgeVdUTVPUxVd3m7clF5CNgHtBN\nRFJF5DpVLQFuBaYBa4ApqrrqmL7FwetZicOYeur64R25dlgH3p6zmQ8XbHVWnv00NGoLn98Ahfa7\nUF2qGh23DMj3fCy/kwCqqjF+ju2o2ei4xtRPpWXKNZMXMW/Dbj65cSjHt42D1ER480w4/koY+7zb\nIdZq1TE6bpCqRnteMeVe0bU1aViJw5j6LThIeOGSfrSMbcDN7y8mLbcQEgbCkFtg8WRnIihzzKps\n46hrrI3DGNMoMoz/XDGAnIISbv3gN0pKy+DU+yGuvfNUeXGB2yHWeQGVOIwxBpw5zP92fm8Wbs7k\npZkbICwSxr4AmRutl1U1CKjEYVVVxpj9xvdPYHz/1jz/43oSN2c6c3j0vQTmvgiZm458AlOpgEoc\nVlVljCnvr+N6kRAXyZ8+Xuo8WX7GwxAUDDMedDu0Oi2gEocxxpQXHRHK85f0Y2dOIQ98uRJiWsHw\nv8Cab2DTL26HV2cFVOKwqipjzOH6t43jz6d34Ztl2/l+xQ4YeqvzbMe0+2z49aMUUInDqqqMMRW5\naUQnereO4cGvVpK5L9gZfn3ncqfkYXwWUInDGGMqEhocxNMXHkd2QTGPfrMK+l4M8V1g1pNQZjNh\n+yqgEodVVRljKtOjZQy3ndaFr5ZuZ/qadBhxD6SthlXHOvxe/RNQicOqqowxVbl5RCd6tozh/i9X\nkt1pLDTrCbOeslKHjwIqcRhjTFVCg4P4x4V9ycgr4pkZSTD8dshIgvXfux3aUdmSkc+zM9ZT2ZiD\n/mKJwxhTr/RuHcuVQ9vz3vwtLI8d4fSwmlM3Bz+8dvIiXvgxidQ9NTuMiiUOY0y9c/vIrjRtGM59\nX62hbPAtzoRPWxe4HZbPikqcKraa7lUcUInDGseNMd6IjgjlobE9Wbkthw+LT4EGcXW21OGGgEoc\n1jhujPHWOX1aMrxLE576MYW8vlfBuqmwZ7PbYfmkXVkKD4e8U+NFjoBKHMYY4y0R4bFxvdlXWsZT\naUNBxJmzow55et/jXBMyjeCcrTV6XUscxph6q32TKP4wojPvryklo/WpsOQ9KNnndlg+u/7dRezK\nKayx61niMMbUazeN6EiHJlH8PX0Y7N0Na752OyQfCAB5RSV8sKDmSh2WOIwx9Vp4SDCPjevNp9ld\nyYpoDYlvuxrPrHVptL/nO35au4vktDwKi0u9O7AG2zkscRhj6r2TujRhzHEJvJ5/Mmz5FTI2uBbL\np4tTAbh2ciJnPPsz57zg3fDvL/yUzHvzNvsvsHICKnFYd1xjzNF64Jwe/BB0MmUIuuxj9wI5rOCw\nIT3/wHJyWh7t7/mOBRszDtlHPO8PfrXKmWPdzwIqcVh3XGPM0WoeE8HEkUOZU9qLgsQPa6zqZ8HG\nDPKKSgBIyy3kuxU7Kt137obdAHyzfDtwMMdcEDybIJyE8f78Lf4L1iPE71cwxpg64ooh7Xh+7pkM\nz3+Ovcm/EtlluF+vl7V3HxNem8+g9o1ZuDmz0v2mJKbQq1XMgZJFSamSkVd0YPufQr5gt8byXulI\nGkWG+TVmCLAShzHGHIuQ4CDOvOAG9mo4a6a9Xm3nnbUujR9W7gTgt617+GbZdnZmF/Lz+nSA3yWN\nUEoO+XzXZ8s554VfyfWUTD5elMKAx/93yD6NyQUgIa5BtcVdGStxGGNMOX07tmZp3Cl0SZ/B6pR0\nerZpesznvPrtRQBsfuocxr88F4C4yFD27C3+3b5jg+by77AXebz4cmaX9WW9tjmw7R8/rDtkXz1Q\nBqlZVuIwxpjDdDntKmJkL59/9gFlZdXX1lG+eqmipAFwVnAiAA+EfsD08LsZJGu8OvegoLV0lRSk\nBnKJJQ5jjDlMVM8zKQ6JokvGTD5elHJU59iYnsfZz/9CdrkEcXj1kjemhD92YDmMYh4OeYdo9gJO\nW8d+w4JXMT387qOK1VeWOIwx5nAh4YR0H805YUt46rsVbN6df+RjDvPizGRW78hhxppd1RbWRcE/\nc03INP4S8mml+zTIXFtt16uMJQ5jjKmA9BxHdFkOg4LW8udPllLs6/MRnsLA/V+s8PqQWPIYEzy/\n0u37u9wGe94rqkSLTf3J6+sdrTqROEQkSkQWi8gYt2MxxtQTnc+A0Ege6JjM0pQsnpm+3qfD9/+o\n759sqbK97gr5mE6yjX6SzAuhL1Z5TqkwVdQ8v/aqEpG3gDFAmqr2Lrd+FPA8EAy8oapPHeFUdwNT\n/BaoMcYcLiwSOp9B+5QfueyEm3j15w0ECdw1qnu1XaIZWdwS8jU3BX9DkFSeFD4Le4Qt2oxlZZ2A\niksa+0kN9LTyd4ljMjCq/AoRCQZeAkYDPYFLRaSniPQRkW8PezUTkTOA1UD1VRQaY4w3up8Debv4\n66ASwkOCeHnWBmauTfPqUK3kyfP+ksS5QU6X3P0liKqSBsDAoPVcEPzrgc9XhczgpuCvK+yOWxIR\n51V8x8KvJQ5VnS0i7Q9bPQhIVtWNACLyMTBOVZ/EKZ0cQkROBaJwkkyBiExV1d+V/URkEjAJoG3b\nttX5NYwx9VXnMwAhZMMMFt53O5e9MZ9rJi/iuIRYvvzDMKRc39eyMmX8y3PYkrmXYZ2aUFJ28Gfq\nh7C7aSR5DCl6iS/CHwYga18U74b93adwRgctOrB8T+jHbCpr/rt9iuK6+PglfedGG0droHz/tlTP\nugqp6v2q+mfgQ+D1ipKGZ7/XVHWgqg5s2vTYH9gxxhiimkDCQFg/jdjIUD68YQgAy1Kz6XDvVNrf\n8x05hcW8N38Ls9ansSw1m6y9xXy3YgfTVh2sJOkelEIL2XPIqX1NGgBDg1d7sZf/q6rceHK8om91\nxBYfVZ18xBOLjAXGdu7c+SjCMsaYCnQ5C2Y+DnlpxDZsxoa/nc3kuZt57FvnR7zvI9NdC62iH9NA\nfQAwFWhT7nMCsL06Tmyj4xpjql3Xkc570gwAgoOE607qwPJHRhIaLJzWvZnXp4olzx8RHiI6wv+D\nHLpR4lgEdBGRDsA24BLgsuo4sZU4jDHVrkVfiG4JSdOg/+UHVsdEhJL0xNkHPhcWl5JbWEJuYTGZ\n+fsoLVNKy5ToiFB4w9lnWcSkag2tfdDv+ww1j4mo1mtUxK8lDhH5CJgHdBORVBG5TlVLgFuBacAa\nYIqqrqqO61mJwxhT7USgy5mwYSaUVjy+FEBEaDBNo8Pp2LQhA9s3ZnDoBk58vxN9Ynx/6vzY1PE2\nDlW9tJL1U4Gp1X09K3EYY/yiy1mw5F3YOg86nOzdMQtfc943eTf1a11SJ54c95aVOIwxftFxhPM+\n828+HOT5y3/P5uqN5YiXrfsPABpjTN0X3tB53zrP92Nn+ZJs6oaAShwiMlZEXsvOznY7FGNMoOk4\nwnnP3ubd/jXRL9YlAZU4rKrKGOM3Zz3pvCd7O6eGW4nDqqqMMaZ2aNYDYhIgycsH/qzEUTdYVZUx\nxm9EnIcBN8yEkqIj7++WAH1y3G+sqsoY41ddzoLifNgy14udrcRhjDGmw3Dn/b3zjryva1VV1sZh\njDG1R1jUweVK5tuoDwIqcVgbhzHG75r3cd53rTzCji6VOOwBQN9YG4cxxu8mfg4SBKu/rnh7cQHs\n2xvITRyBlTiMMcbvoptD++Ew+x/w13gnUYBTdfXdHfBEC/hbS3dj9DNLHMYY46s+FzrvZSWQu8NZ\nLsyCRa8f3Mernlf+YFVVPrE2DmNMjegx9uBy2lrYsQzy0g7dJ3NjzcZUgwIqcVgbhzGmRjSIO7i8\nZQ7852R4aZB78ZRnjePGGFNLnfeq8774HXfjcIElDmOMORr9LnWmlC0pcDuSw1iJwxhjaq8bZkJE\nI7ejqHGWOIwx5mjFtIQL3nA7ikNpqd8vYYnDGGOORadTvZ+HvCYs+9jvlwioxGHdcY0xrrjqG3ho\nD3Q63e1I4NT7/H4J0QAcqGvgwIGamJjodhjGGFOniMhiVR14pP0CqsRhjDHG/yxxGGOM8YklDmOM\nMT6xxGGMMcYnljiMMcb4xBKHMcYYn1jiMMYY4xNLHMYYY3wSkA8Aikg6sMXzMRY4/FHyw9eV/9wE\n2O2n0CqKpbqOqWq/yrZ5c28qWleb75e3x1XX/apovd2vqrfV9/tV1Xa371c7VW16xL1UNaBfwGtH\nWlf+M5BYk7FU1zFV7VfZNm/uTV27X94eV13360j3pz7fr8q21ff7VdX22ny/yr/qQ1XVN16sq2gf\nfzia63h7TFX7VbbNm3tT0brafL+8Pa667ldF6+1+Vb2tvt+vqrbX5vt1QEBWVR0LEUlUL8ZqMQ67\nX76x++Ubu1++qan7VR9KHL56ze0A6hi7X76x++Ubu1++qZH7ZSUOY4wxPrEShzHGGJ9Y4jDGGOMT\nSxzGGGN8YonjCEQkSkTeEZHXReRyt+Op7USko4i8KSKfuR1LXSAi53n+bX0lIiPdjqe2E5EeIvKq\niHwmIje7HU9d4PkNWywiY6rrnPUycYjIWyKSJiIrD1s/SkTWiUiyiNzjWX0+8Jmq3gCcW+PB1gK+\n3C9V3aiq17kTae3g4/360vNv62pgggvhus7H+7VGVW8CLgbqZTddH3+/AO4GplRnDPUycQCTgVHl\nV4hIMPASMBroCVwqIj2BBCDFs1tpDcZYm0zG+/tlju5+PeDZXh9Nxof7JSLnAr8CP9ZsmLXGZLy8\nXyJyBrAa2FWdAdTLxKGqs4HMw1YPApI9fzHvAz4GxgGpOMkD7H6VV9n9qvd8uV/i+DvwvaouqelY\nawNf/32p6teqeiJQL6uOfbxfpwJDgMuAG0SkWn7DQqrjJAGiNQdLFuAkjMHAC8CLInIOLjzaX4tV\neL9EJB54AugvIveq6pOuRFf7VPbv6zbgDCBWRDqr6qtuBFcLVfbvawRO9XE4MNWFuGqrCu+Xqt4K\nICJXA7tVtaw6LmaJ4yCpYJ2qaj5wTU0HUwdUdr8ygJtqOpg6oLL79QLOHyfmUJXdr1nArJoNpU6o\n8H4dWFCdXJ0Xq5dVL5VIBdqU+5wAbHcplrrA7pdv7H75xu6Xb2r0flniOGgR0EVEOohIGHAJ8LXL\nMdVmdr98Y/fLN3a/fFOj96teJg4R+QiYB3QTkVQRuU5VS4BbgWnAGmCKqq5yM87awu6Xb+x++cbu\nl29qw/2yQQ6NMcb4pF6WOIwxxhw9SxzGGGN8YonDGGOMTyxxGGOM8YklDmOMMT6xxGGMMcYnljiM\nqYKI5Hne24vIZdV87vsO+zy3Os9vjL9Y4jDGO+1xRhj1mmeo66ockjg8I74aU+tZ4jDGO08Bw0Vk\nqYj8n4gEi8jTIrJIRJaLyI0AIjJCRGaKyIfACs+6Lz0zsK0SkUmedU8BDTzn+8Czbn/pRjznXiki\nK0RkQrlzz/LMfrdWRD4QkYoGtzPGr2x0XGO8cw9wh6qOAfAkgGxVPUFEwoE5IjLds+8goLeqbvJ8\nvlZVM0WkAbBIRD5X1XtE5FZV7VfBtc4H+gHHAU08x8z2bOsP9MIZwG4OMAxnUiNjaoyVOIw5OiOB\nK0VkKbAAiAe6eLYtLJc0AP4oIsuA+TgjmHahaicBH6lqqaruAn4GTih37lTPvApLcarQjKlRVuIw\n5ugIcJuqTjtkpTPRUP5hn88AhqrqXhGZBUR4ce7KFJVbLsX+HzYusBKHMd7JBaLLfZ4G3CwioQAi\n0lVEoio4LhbY40ka3XGm8dyveP/xh5kNTPC0ozQFTgYWVsu3MKYa2F8rxnhnOVDiqXKaDDyPU020\nxNNAnQ6cV8FxPwA3ichyYB1OddV+rwHLRWSJqpafP/sLYCiwDGcWt7tUdacn8RjjOhtW3RhjjE+s\nqsoYY4xPLHEYY4zxiSUOY4wxPrHEYYwxxieWOIwxxvjEEocxxhifWOIwxhjjE0scxhhjfPL/H7+N\nLtns6cUAAAAASUVORK5CYII=\n",
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x18208f6dd8>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plt.loglog(train_loss_hist, label='train')\n",
+    "plt.loglog(test_loss_hist, label='test')\n",
+    "plt.xlabel('Iteration')\n",
+    "plt.ylabel('MSE Loss value')\n",
+    "plt.title('SGD completion')\n",
+    "plt.legend()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Speeding it up,\n",
+    "The simple solution we have so far assumes that loss is computed by materializing the full estimated tensor and then zeroing out unobserved elements. If the tensors are really large and the fraction of observerd values is small (e.g. less than 1%), it may be much more efficient to directly work only with the observed elements."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "\n",
+    "shape = (10, 10, 10, 10, 10, 10, 10)\n",
+    "\n",
+    "total_observed = np.prod(shape)\n",
+    "# Since now the tensor is too large to work with explicitly,\n",
+    "# we don't want to generate binary mask,\n",
+    "# but we would rather generate indecies of observed cells.\n",
+    "\n",
+    "ratio = 0.001\n",
+    "\n",
+    "# Let us simply randomly pick some indecies (it may happen\n",
+    "# that we will get duplicates but probability of that\n",
+    "# is 10^(-14) so lets not bother for now)\n",
+    "\n",
+    "num_observed = int(ratio * total_observed)\n",
+    "observation_idx = np.random.randint(0, 10, size=(num_observed, len(shape)))\n",
+    "# and let us generate some values of the tensor to be approximated\n",
+    "observations = np.random.randn(num_observed)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Our strategy is to feed the observation_idx\n",
+    "# into the tensor in the Tensor Train format and compute MSE between\n",
+    "# the obtained values and the desired values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "initialization = t3f.random_tensor(shape, tt_rank=16)\n",
+    "estimated = t3f.get_variable('estimated', initializer=initialization)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# To collect the values of a TT tensor (withour forming the full tensor)\n",
+    "# we use the function t3f.gather_nd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "estimated_vals = t3f.gather_nd(estimated, observation_idx)\n",
+    "loss = tf.reduce_mean((estimated_vals - observations) ** 2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "optimizer = tf.train.AdamOptimizer(learning_rate=0.01)\n",
+    "step = optimizer.minimize(loss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0 1.64438\n",
+      "100 0.0472497\n",
+      "200 0.00913698\n",
+      "300 0.00306178\n",
+      "400 0.00155388\n",
+      "500 0.000971667\n",
+      "600 0.000669613\n",
+      "700 0.000499607\n",
+      "800 0.000437507\n",
+      "900 0.000346848\n",
+      "1000 0.000325652\n",
+      "1100 0.000275839\n",
+      "1200 0.000247506\n",
+      "1300 0.000410816\n",
+      "1400 0.000331641\n",
+      "1500 0.000296677\n",
+      "1600 0.00025365\n",
+      "1700 0.000210029\n",
+      "1800 0.000216491\n",
+      "1900 0.000312779\n"
+     ]
+    }
+   ],
+   "source": [
+    "sess = tf.Session()\n",
+    "sess.run(tf.global_variables_initializer())\n",
+    "loss_hist = []\n",
+    "\n",
+    "for i in range(2000):\n",
+    "    _, loss_v = sess.run([step, loss])\n",
+    "    loss_hist.append(loss_v)\n",
+    "\n",
+    "    if i % 100 == 0:\n",
+    "        print(i, loss_v)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEaCAYAAAAG87ApAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xl8FPX9x/HXJwlJCAn3fYMgHqCI\nEe9WWy9UPNt61PtuFbWt9afWX2tb7U97eh9Y7wPvtmK97wuFACL3jRCQm3AFEpJ8fn/sgEvIsZtk\nM5vN+/l4zCM7M9+Z+eyw7Ge/x8yYuyMiIhKrtLADEBGRpkWJQ0RE4qLEISIicVHiEBGRuChxiIhI\nXJQ4REQkLkocIrITMzvCzArrsf0bZnZ+Q8YkyUWJQ5q8+n7R1bDfm8xsoZltMrNCM3u+0vqjzewD\nM9toZmvM7Csz+x8zyw7W32Jm24L1G81sjpnda2bdGjrWsATv8enoZe4+wt2fCCsmSTwlDmnSzCwj\nEfsIfjGfCxzl7rlAPvBe1PofAy8BzwJ93L0DcAbQE+gVtavn3T0PaA+cCnQFJqZS8pDmR4lDGkzw\na3tp8Ot6tpn9MFh+i5m9aGZPB+ummtnuZnajma00syVmdkzUfi40s5lB2QVmdnnUuiOCX///Y2bL\ngTHAG0D3oGawycy6m1mamd1gZvOD2sALZtY+2EdfM3Mzu9jMFgPvV/F2DgDecvf5AO6+3N1HB9sb\n8HfgD+7+sLuvDcrMdvdR7j638s7cfZu7TyeSXFYBv6rhPF4a9f5nmNmwYPmeZvahmRWZ2XQzOylq\nm8fN7P6gmWiTmX1mZl3N7E4zW2dms8xsv6jyi4LzPyNY/9j2mlIV8XQ3s5fNbFVQA7s6WH4ccBNw\nRnDMKcHyD83skuB1mpndbGbfBP/WT5pZm0r/Dueb2WIzW21mv6nuvEjyUOKQBmFmg4CrgAOCX9jH\nAouiiowEngLaAZOBt4h8/noAfwAeiiq7EjgRaA1cCPxj+5dnoCuRX/B9gPOAEcAyd88NpmXA1cAp\nwPeB7sA64L5KYX8f2DOItbIvgPPM7Ndmlm9m6VHrBhGpWbxcy2nZhbuXA/8BDq9qfVCTuSV4X62B\nk4A1ZtYCGAu8DXQGRgHPBOd9u58ANwMdgRJgHDApmH+JSLKL9lMi7303YPdg28rxpAXHnULk3+qH\nwLVmdqy7vwn8iUitKtfd963iLV0QTEcC/YFc4N5KZQ4jck5/CPzWzPas6txIEnF3TZrqPQEDiHzh\nHwW0qLTuFuCdqPmRwCYgPZjPAxxoW82+/w1cE7w+AigFsqPWHwEUVtpmJvDDqPluwDYgA+gbHK9/\nLe/pp8C7wGZgDXBDsPywYPvoGJ4DioBi4Nyo9/10Ffu9AphbzTHf2v5eKy0/HFgOpEUtGwPcErx+\nHHg4at0oYGbU/BCgKGp+EXBF1PzxwPzK5xM4EFhcKZYbgceqe4/Ah8Alwev3gJ9HrRtUxb9Dz6j1\n44Ezw/48a6p5qnf7sAiAu88zs2uJfJHsbWZvAb/0yK9/gBVRxbcAqz3y63v7PER+jRaZ2Qjgd0R+\nBacBOcDUqO1XufvWWkLqA/zLzCqilpUDXaLml9Tynp4h8qu+BZHayzNmNhlYHBTpBiwMyp4JYGaf\nAulV7C5aD2BtNet6AfOrWN4dWOLu0e/nm2Bf21U+x5XncyvtM/r9fxMco7I+RJoBi6KWpQOfVBl9\n1XF/U+k4Gez877A86nVxFXFKklFTlTQYd3/W3Q8j8mXjwB3x7sPMsog0Af0V6OLubYHXAYs+VOVD\nV7GrJcAId28bNWW7+9JattuFR/onXgS+BgYDs4ClwGkxvakoQdPPSKr/4l1CpOmosmVAr2D77XoH\ncdRVdCd+7+AYVcWzsNJ5zHP344P1tZ3DZUQ+D9HHKWPnpCZNjBKHNAgzG2RmPwi++LcS+YVbXstm\nVckEsoh0IJcFtY9jat6EFUCH7Z2ugQeB28ysTxBfJzM7OdYgzOwCMzvBzPKCDt4RwN7Al+7uRDq3\nfxd0ZLeziIHs/Es6en8tgrb7MUT6aCr3N2z3T+A6M9s/2OeA4D18SaTJ7PpgX0cQSUDPxfqeqnCl\nmfUMBg3cBDxfRZnxwIZgMEJLM0s3s8FmdkCwfgXQt1JCizYG+IWZ9TOzXL7rEymrR9wSMiUOaShZ\nwO3AaiJND52JfBnFxd03EunYfoFIh/bZwKu1bDOLyBfUgmDEUXfgrmC7t81sI5HO7gPjCGVDEP9i\nIn0XfwZ+5u6fBsd8nkhn9DlEfpWvDmIeDbwYtZ8zzGxTsI9XifSV7B/VhFf5vbwI3EZkmO9GIv07\n7d29lEhH+YjgWPcD5wXvva6eJdLZviCYbq0innIiCWookWa51USS2/Ykvf29rjGzSVUc41EigyI+\nDrbfSqT/RZowi/x4EpHmxMwWEenAfjfsWKTpUY1DRETiosQhIiJxUVOViIjERTUOERGJixKHiIjE\nJSWvHO/YsaP37ds37DBERJqUiRMnrnb3TrWVS8nE0bdvXwoKCsIOQ0SkSTGzb2ovpaYqERGJkxKH\niIjERYlDRETiklKJw8xGmtno9evXhx2KiEjKSqnE4e5j3f2yNm3a1F5YRETqJKUSh4iIJJ4Sh4iI\nxEWJQ0RE4qLEISIicVHiEBGRuKTkLUfqavLidZSUVdC1dTZdWmfTMjM97JBERJKOEkeUv78zh0/m\nrt4x3zo7g65tIkmkS+vsIKFkfTffJpuOuVmkp1mIUYuINC4ljii3njKYJWu3sHzDVlZETcs3lDBv\n5WpWbiyhvGLnB1+lGXTKy6Jr62w6V0ou0UmndXYGZkowItL0KXFE6dOhFX06tKp2fXmFs2ZTCSs2\nlFSZXBavKWbCorUUFW/bZduWLdLp0jprR3Lp2iabznlZ9Gjbkh7tWtKjbUvat8pUchGRpKfEEYf0\nNKNzULMYQvVXp2/dVs7KapLLivVbmVJYxFvTt1JSVrHTdi1bpO9IIj3ataRn8DryN4fOeVmkqVlM\nREKW9InDzFoB9wOlwIfu/kzIIdUqu0U6vTvk0LtDTrVl3J31W7axtGgLS9dt2fG3MHg9del61m4u\n3WmbzPQ0urXN3imZ9IhKLl3bZNMiXQPlRCSxQkkcZvYocCKw0t0HRy0/DrgLSAf+6e63A6cBL7n7\nWDN7Hkj6xBELM6NtTiZtczLZu3vVtZfi0rJIMqmUVJauK+ajOatYsaFkp/JpBl1bZwe1lZydmsF6\ntmtJ97YtyW6hkWIiUj9h1TgeB+4Fnty+wMzSgfuAo4FCYIKZvQr0BKYGxcobN8xw5WRmMLBLHgO7\n5FW5vqSsnG+LtrK0aAuF64p3SjLjF65l+Yatu3Tmt8tp8V3HfV42Xdrs2qHfPidTTWIiUq1QEoe7\nf2xmfSstHg7Mc/cFAGb2HHAykSTSE/iKGi5YNLPLgMsAevfu3fBBJ6GsjHT6dmxF345Vd+iXlVew\nYmNJUFuJJJZIv0sJKzZsZfqyDazeVILvnFtokW50zoskk0gnfiShdG2dTae8rMiUm0XbnBbqzBdp\nhpKpj6MHsCRqvhA4ELgbuNfMTgDGVrexu48GRgPk5+d7deWak4z0tEhzVduWDO/XvsoyZeUVrNpU\nwvL12zvxg0799VtZsXErs5dv5OM5q9lUUrbr/tOMjrmRRNIxNzP4m7UjuUS/zsvScGSRVJFMiaOq\nbxV3983AhTHtwGwkMHLAgAENGlgqy0hPo1ublnRr07LGcptKylixYSurNpbsmFZvivq7qYQZ325g\nzaZSyip2zduZGWl0ys2iY1BbidRaqk42rbKS6WMpIpUl0//QQqBX1HxPYFk8O3D3scDY/Pz8Sxsy\nMIHcrAxyO+WyW6fcGstVVDhFW7btlFh2er2phMJ1xXy1ZB1rNpfu0kwGkJOZvqM5rHf7nOD6mhz6\ndMihb4dWaiITCVkyJY4JwEAz6wcsBc4Ezg43JIlXWprRvlUm7VtlMoiqO/W3KyuvYG1xaVRyKd0p\nyazYsJUvFqzhlclLd9qudXYGg7rmsVe31uzVvTV7d2/DwC65ZGVoxJhIYwhrOO4Y4Aigo5kVAr9z\n90fM7CrgLSLDcR919+lx7ldNVU1IRnoanfMine812bqtnCVri/lmTTGL1mxm4erNzF6+kZcmFrJ5\nXGSgXUaaMahrHgf0bc+B/dpzQL/2dMzNaoy3IdLsmFfVVtDE5efne0FBQdhhSIJVVDiL1xYz49sN\nTF+2nq+WFDHxm3Vs3Ra5In+3Tq04fGAnjtyjMwf2a69rWERqYWYT3T2/1nJKHJJKSssqmLZsPeMX\nruWLBWsYN38NJWUV5GSmc+iAjhy9VxeO3bsrbVq2CDtUkaTTLBNHVFPVpXPnzg07HEkCW7eVM27+\nGt6btYIPZq1iadEWMtPT+N7unRi5bzeO3qsLOZnJ1NUnEp5mmTi2U41DquLufF24nrFTlvHa19+y\nfMNW8rIyOGW/Hpx9YG/27NY67BBFQqXEocQhNaiocMYvWsvzE5bw36nfUlpWwX6923L59/pz9F5d\n9XAuaZaaZeJQU5XURVFxKa9MWsoT4xbxzZpi+ndsxeXf789pw3rqbsPSrDTLxLGdahxSF+UVzhvT\nvuXBj+YzbekG+nbI4dfH7sHxQ7rqgkNpFmJNHPo5JRJITzNO3Kc7Y686jEcvyCcrI50rn53EKfd9\nxrj5a8IOTyRpKHGIVGJm/GCPLrx+zeH85Uf7sHJjCWc9/AWXPFHAkrXFYYcnErqUaqpSH4ckwtZt\n5Tz22SLueX8uFe6M+sFALjm8n25xIilHfRzq45AGtqxoC398bQZvTFtO/46tuO3UIRy8W4ewwxJp\nMOrjEGlg3du25IFz9ufxCw+g3J2zHv6C3/1nGsWluz6rRCSVKXGIxOmIQZ1585rvceGhfXnyi284\n7s5P+GKBOs+l+VDiEKmDlpnp/G7k3jx36UEAnPXwF/zlrVmUlVeEHJlI4qVU4jCzkWY2ev369WGH\nIs3Egf078Oa1h3NGfi/u+2A+Zz38BcuKtoQdlkhCpVTicPex7n5ZmzZtwg5FmpGczAxuP30f7jpz\nKDOWbeD4uz/hvZkrwg5LJGFSKnGIhOnkoT147erD6d6mJRc/UcCf35xFRRXPXxdp6pQ4RBpQv46t\neOXnh3DW8F7c/+F8LntqIptKNOpKUosSh0gDy26Rzp9OHcLvT9qbD2av5PT7P9cV55JSlDhEEsDM\nOP+Qvjxx4XC+Xb+F0x/4nFnLN4QdlkiDSKnEoVFVkmwOG9iRF684BDP4yYPjmLBobdghidRbSiUO\njaqSZDSoax4v/+wQOuZmcc4/v+TdGRpxJU1bSiUOkWTVs10OL15xMIO65nH50xP579ffhh2SSJ0p\ncYg0kg65WTx76UHs16stVz83WclDmiwlDpFGlJuVweMXDWdY70jyeO3rZWGHJBI3JQ6RRpablcFj\nF0aSxzXPfaXkIU2OEodICHKzMng8Knm8OW152CGJxEyJQyQkrYLksW/PNlw9ZjKfzl0ddkgiMVHi\nEAlRq6wMHrtgOP07teKypwqYtHhd2CGJ1CqlEocuAJSmqE1OC568eDid8rK44NHxzPxWV5hLckup\nxKELAKWp6pyXzdMXH0hOZgbnPjKeRas3hx2SSLVSKnGINGW92ufw9CXDKa+o4LxHx7NmU0nYIYlU\nSYlDJIkM6JzHIxccwIoNW7nkyQK2bisPOySRXShxiCSZYb3bcecZQ/lqSRG/eP4rPQxKko4Sh0gS\nGjGkG785fk/emLacO96cFXY4IjvJCDsAEanaxYf1Y/HaYh76eAG92udwzkF9wg5JBFDiEElaZsZv\nT9yLwnVbuOXV6ezWKZeDd+sQdlgiaqoSSWYZ6WnceeZQ+nTI4efPTNQjaCUpKHGIJLnW2S345/kH\nUF7hXPpkAZtLysIOSZo5JQ6RJqBfx1bcc/Yw5qzYyHUvTtFIKwlV0icOM+tvZo+Y2UthxyISpu/v\n3okbR0RGWt3z/ryww5FmLKGJw8weNbOVZjat0vLjzGy2mc0zsxtq2oe7L3D3ixMZp0hTccnh/Tht\nvx7c+d4cPpqzKuxwpJlKdI3jceC46AVmlg7cB4wA9gLOMrO9zGyImb1Waeqc4PhEmhQz47ZThzCo\nSx7XPjeZZUVbwg5JmqGEJg53/xhYW2nxcGBeUJMoBZ4DTnb3qe5+YqVpZSLjE2mKWmamc/9Ph7Gt\n3Lny2UmUllWEHZI0M2H0cfQAlkTNFwbLqmRmHczsQWA/M7uxhnKXmVmBmRWsWqUqvKS2/p1yueP0\nfZi8uIjb39CV5dK4YroA0MxaAr3dfXYDHNOqWFbtEBF3XwNcUdtO3X00MBogPz9fQ04k5Z2wTzcm\nLOrLo58t5IC+7RgxpFvYIUkzUWuNw8xGAl8BbwbzQ83s1XocsxDoFTXfE1hWj/3toAc5SXNz0/F7\nsm+vtvz6pa9ZqGd4SCOJpanqFiL9EkUA7v4V0Lcex5wADDSzfmaWCZwJ1CcR7aAHOUlzk5mRxn1n\n70d6mvHzZybpNuzSKGJJHGXuXqef8GY2BhgHDDKzQjO72N3LgKuAt4CZwAvuPr0u+xcR6NkuhzvP\nGMrMbzfw+7H6rySJF0sfxzQzOxtIN7OBwNXA57Hs3N3Pqmb568DrMUcZo6BZbeSAAQMaetciSe3I\nPTrz8yN24/4P53NA3/acNqxn2CFJCoulxjEK2BsoAcYAG4BrExlUXampSpqzXx69O8P7tec3/5rG\n3BUbww5HUliticPdi939N+5+gLvnB6+3NkZwIhK7jPQ07jlrP1plpfOzZyZRXKqbIUpixDKq6gMz\ne7/y1BjBxUujqqS569I6m7vO3I/5qzZx87+m4a6R6dLwYunjuC7qdTZwOpCUP2XcfSwwNj8//9Kw\nYxEJy6EDOnLtD3fnH+/OYXi/9pw5vHfYIUmKqTVxuPvESos+M7OPEhSPiDSAq34wgIJv1vLbV6cz\npGcb9u6ufj9pOLE0VbWPmjqa2bFA10aILW5qqhKJSE8z/nHGUNrltODKZyaxceu2sEOSFBLLqKqJ\nQEHwdxzwKyApb3OuUVUi3+mYm8U9Zw1jybot3PDyVPV3SIOJpamqX2MEIiINb3i/9vz62EHc/sYs\nho9rz/mH9A07JEkB1SYOMzutpg3d/ZWGD0dEGtplh/dnwsK13PrfGezbqy1De7UNOyRp4qy66quZ\nPVbDdu7uFyUmpLqLunL80rlz54YdjkjSKCou5cR7PqWs3Bk76jA65WWFHZIkITOb6O75tZZLxXbP\n/Px8LygoCDsMkaQyfdl6Tn/gc/bp0ZZnLj2QFulhPI5HklmsiSOmT46ZnWBm15vZb7dP9Q9RRBrT\n3t3bcMfp+zB+0Vpu++/MsMORJqzWzvHg6Xs5wJHAP4EfAeMTHJeIJMDJQ3swtXA9//x0IYN7tOFH\n++tmiBK/WGoch7j7ecA6d/89cDA7P4hJRJqQG0bswSG7deCmf03l68KisMORJiiWxLEl+FtsZt2B\nbUBSDtHVBYAitctIT+Pes4fRKTeLK56ayOpNJWGHJE1MLInjNTNrC/wFmAQsInJ79aSjCwBFYtO+\nVSYPnbs/azaXcuUzk9hWXhF2SNKExHJb9T+6e5G7vwz0AfZwd3WOizRxg3u04fbTh/DlwrX86XV1\nlkvsYrlX1RQzu8nMdnP3kro+RlZEks+p+/XkokP78dhni3hlUmHY4UgTEUtT1UlEbqP+gplNMLPr\nzEz3aRZJETcevwcH9W/Pja9MZdpS/S6U2sXSVPWNu//Z3fcHzgb2ARYmPDIRaRQtgs7yDq0yufyp\niazdXBp2SJLkYr0AsK+ZXQ88B+wBXJ/QqOpIo6pE6qZjbhYPnZvPqk0lXPXsJMrUWS41iKWP40vg\nlaDsj919uLv/LeGR1YFGVYnU3ZCebfi/U4fw+fw13P7GrLDDkSQWy6Njz3d3fYpEmoHT9+/J1KWR\nK8uH9GzDyUN7hB2SJKFY+jiUNESakd+csCfD+7Xnf17+munL1Owru9LtMUVkJy3S07j/p8Nol5PJ\nZU+qs1x2pcQhIrvomJvFg+fsz6pNJYwao85y2VksneM/NrO84PXNZvaKmQ1LfGgiEqZ9e7XltlMG\n89m8Nfz5rdlhhyNJJJYax/+6+0YzOww4FngCeCCxYYlIMvhxfi/OO7gPoz9ewH++Whp2OJIkYkkc\n5cHfE4AH3P0/QGbiQhKRZPK/J+7F8L6RzvIZyzaEHY4kgVgSx1Izewj4CfC6mWXFuF2j0wWAIg2v\nRXoa9/10GG1bZnL50wWsU2d5sxdLAvgJ8BZwnLsXAe2BXyc0qjrSBYAiidEpL4sHz92fFetLuPq5\nyeosb+ZiSRzdgP+6+1wzOwL4MXp0rEizM7RXW249ZTCfzF3NX95WZ3lzFkvieBkoN7MBwCNEnv73\nbEKjEpGk9JMDenHuQX146KMFjJ2yLOxwJCSxJI4Kdy8DTgPudPdfEKmFiEgz9L8n7kV+n3Zc/9LX\nzPxWneXNUSyJY5uZnQWcB7wWLGuRuJBEJJllZqRx/znDaN0yg8ueKqCoWJ3lzU0sieNC4GDgNndf\naGb9gKcTG5aIJLPOedk8cE6ks3zUmMmUV3jYIUkjiuUmhzOA64CpZjYYKHT32xMemYgktWG92/GH\nk/fmk7mr+Zs6y5uVWm+rHoykegJYBBjQy8zOd/ePExuaiCS7M4f3ZkphEfd/OJ/h/dpzxKDOYYck\njSCWpqq/Ace4+/fd/XtEbjvyj8SGJSJNxe9G7s0eXfP45QtTWLFha9jhSCOIJXG0cPcd9VB3n4M6\nx0UkkN0inXvPHsaW0nKuHqOLA5uDWBJHgZk9YmZHBNPDwMREB7admZ1iZg+b2X/M7JjGOq6IxG5A\n51xuPWUwXy5cy93vzws7HEmwWBLHz4DpwNXANcAM4PJYdm5mj5rZSjObVmn5cWY228zmmdkNNe3D\n3f/t7pcCFwBnxHJcEWl8p+/fkx/t35N73p/L5/NWhx2OJJC5xz+Mzsyed/dav8TN7HvAJuBJdx8c\nLEsH5gBHA4XABOAsIB34v0q7uMjdVwbb/Q14xt0n1Xbc/Px8LygoiOMdiUhDKC4tY+Q9n7Jhaxlv\nXnM4HXKzwg5J4mBmE909v7Zydb3L7cGxFApGXq2ttHg4MM/dF7h7KfAccLK7T3X3EytNKy3iDuCN\nWJKGiIQnJzOD+346jPXF27jxlanU5YepJL8wbo/eA1gSNV8YLKvOKOAo4EdmdkV1hczsMjMrMLOC\nVatWNUykIhK3Pbq25tfHDuLtGSt4saAw7HAkAaq9jqOGx8Ma9RtVZVUsq/ZnibvfDdxd207dfTQw\nGiJNVXWOTkTq7eLD+vH+rJX8fux0Durfgd4dcsIOSRpQTRcA/q2GdbPqccxCoFfUfE+gQW6zaWYj\ngZEDBgxoiN2JSB2lpRl//cm+HPePj/nlC1/x/OUHk55W1W9GaYqqbapy9yNrmupxzAnAQDPrZ2aZ\nwJnAq/XYX3TMepCTSJLo0bYlfzhlbwq+WcdDH88POxxpQAnt4zCzMcA4YJCZFZrZxcEt2q8i8lTB\nmcAL7j69gY6nR8eKJJFThvbghH268Y935jBtqf5fpoo6DcdNdhqOK5I8iopLOeYfH9MuJ5NXRx1K\nVkZ62CFJNRI9HFdEJCZtczK54/R9mL1iI39/Z07Y4UgDqDZxmNk5Ua8PrbTuqkQGVVdqqhJJTkfu\n0Zmzhvdi9McLmLCo8qVd0tTUVOP4ZdTreyqtuygBsdSbOsdFktdvTtiLnu1a8qsXprC5pCzscKQe\nakocVs3rquZFRGqUm5XB3348lCXrivnT6zPDDkfqoabE4dW8rmpeRKRWw/u155LD+vHMl4v5aI7u\n8NBU1ZQ49jCzr81satTr7fODGim+uKiPQyT5/eqYQQzsnMv1L01hffG2sMOROqh2OK6Z9alpQ3f/\nJiERNQANxxVJblML13Pq/Z9x4j7duPPM/cIORwL1Ho7r7t9ET0Rujz4M6JjMSUNEkt+Qnm246gcD\n+PdXy3hj6rdhhyNxqmk47mtmtv0ZGt2AaURGUz1lZtc2UnwikqKuPHIAQ3q04aZ/TWXlRj2rvCmp\nqY+jn7tvf3LfhcA77j4SOJAkHY6rPg6RpqNFehr/OGNfNpeWc5Oe3dGk1JQ4onutfgi8DuDuG4Gk\nfBq9ruMQaVoGdM7j+mMH8e7Mlbw4Uc/uaCpqShxLzGyUmZ1KpG/jTQAza0n9nschIrLDRYf248B+\n7fnD2BkUrisOOxyJQU2J42Jgb+AC4Ax3LwqWHwQ8luC4RKSZSEsz/vrjfXF3fv3i11RUqMkq2dU0\nqmqlu1/h7ie7+9tRyz9w9782TnjxUR+HSNPUq30O/3viXoxbsIYnxi0KOxypRU3XcdT4cCV3Pykh\nETUAXcch0vS4Oxc/UcBn81bz+jWHs1un3LBDanZivY6jpkfHHgwsAcYAX6L7U4lIApkZt582hGPu\n/JhfvjCFl684mIx0PfkhGdX0r9IVuAkYDNwFHA2sdveP3P2jxghORJqXzq2zufWUwUxZUsQDH+px\ns8mqpj6Ocnd/093PJ9IhPg/40MxGNVp0ItLsnLhPd0bu25273pvLpMXrwg5HqlBjPdDMsszsNOBp\n4ErgbuCVxghMRJqvW08ZTNc22Yx6djJFxaVhhyOV1HTLkSeAz4lcw/F7dz/A3f/o7ksbLToRaZba\ntGzBfWcPY+XGrVz34te6qjzJ1FTjOBfYHbgG+NzMNgTTRjPb0DjhxUfDcUVSx7692nLT8Xvy7swV\nPPLpwrDDkSg19XGkuXteMLWOmvLcvXVjBhkr3XJEJLVccEhfjt27C7e/MYvJ6u9IGhrrJiJJy8z4\n8+n70rVNNlc9O1kPfkoSShwiktTa5LTg3qC/41cvTtEtSZKAEoeIJL2hUf0d9384L+xwmj0lDhFp\nEi44pC+n7teDv70zhw9mrQw7nGZNiUNEmgQz40+nDmHPrq25+rnJLFq9OeyQmi0lDhFpMlpmpvPQ\nufuTnmZc9lQBm0vKwg6pWUqpxKHrOERSX6/2Odx71jDmrdzE9S/p4sAwpFTi0HUcIs3DYQM7csOI\nPfjv1G956OMFYYfT7KRU4hC+U5ovAAAOQ0lEQVSR5uPSw/tz4j7d+PObs/h4zqqww2lWlDhEpEky\nM/78o33YvUseo8ZM5ps16ixvLEocItJk5WRm8NC5+2MGFz4+QVeWNxIlDhFp0vp0aMXoc/MpXLuF\ny58uoLSsIuyQUp4Sh4g0ecP7teeOHw3hiwVruelfUzXSKsFqeua4iEiTcep+PVm0upi73ptL3w45\nXPWDgWGHlLKUOEQkZVx71EAWry3mr2/PoXeHVpy0b/ewQ0pJShwikjLMjNtPH8LSdVu47sUpdG2d\nzfB+7cMOK+Woj0NEUkpWRuS2JD3bteSSJyYwe/nGsENKOUocIpJy2rXK5MmLhpPdIp3zHx3P0qIt\nYYeUUpI+cZjZnmb2oJm9ZGY/CzseEWkaerbL4YmLhrO5tIzzHx1PUXFp2CGljIQmDjN71MxWmtm0\nSsuPM7PZZjbPzG6oaR/uPtPdrwB+AuQnMl4RSS17dmvNw+fls3htMRc9PoEtpeVhh5QSEl3jeBw4\nLnqBmaUD9wEjgL2As8xsLzMbYmavVZo6B9ucBHwKvJfgeEUkxRzUvwN3nTGUyUuKuOrZSZSV6wLB\n+kpo4nD3j4G1lRYPB+a5+wJ3LwWeA05296nufmKlaWWwn1fd/RDgp9Udy8wuM7MCMytYtUo3PBOR\n74wY0o0/nDyY92at1AWCDSCM4bg9gCVR84XAgdUVNrMjgNOALOD16sq5+2hgNEB+fr4+FSKyk3MP\n6sOqDVu5+/15dM7L5rpjB4UdUpMVRuKwKpZV+0Xv7h8CHyYqGBFpPn5x9O6s2lTCvR/Mo1NeFucf\n0jfskJqkMBJHIdArar4nsKwhdmxmI4GRAwYMaIjdiUiKMTP+ePJgVm0s5Zax0+mYm8UJ+3QLO6wm\nJ4zhuBOAgWbWz8wygTOBVxtix3oCoIjUJiM9jXvP3o/9e7fjF89/xefzV4cdUpOT6OG4Y4BxwCAz\nKzSzi929DLgKeAuYCbzg7tMb6Hh65riI1Cq7RTr/PD+fPh1yuPzJicxYtiHskJqURI+qOsvdu7l7\nC3fv6e6PBMtfd/fd3X03d7+tAY+nGoeIxKRtTiZPXDSc3OwMzn9sfKM9QfC+D+YxtbDmH7fvzljB\nwN+8zqaSsmrL/P3t2UxevK6hw4tJ0l85LiKSKN3btuTJi4azrbyCM0d/wbfrE39rkr+8NZuR935a\nY5m/vzOHbeXOotXVJ7O735/Hqfd/3tDhxSSlEoeaqkQkXgO75PHMJQeyaWsZFzw6gfVbEvf42Viv\nH7Fg7OnSoi1JecFiSiUONVWJSF3s3b0ND567P/NXbeLIv37IuPlrEnKc8grf6XV1N1/cnl8uf2oi\nf3htRhXrw71ULaUSh4hIXR06oCPPX34QaQbXvzyF4tLq+xfqqiwqcdz57hwOvf19CtcV71Rmydri\nnWo9789auct+KkK+xDmlEoeaqkSkPvbv0557zx7G0nVbGPXsZD6ft5p/TS6scZsFqzZxz3tzY6oF\nVESV+XReZBjw8vVbdypz+J8/2KkmUlFFloiuuUxZUlTrcRtaSiUONVWJSH0d1L/Djvtanf3PL/nF\n81NqTArnPzaev70zhzWba79te3kVScCqupdGlKpqF9EJ6OT7PuOkez9l9aYS1sYQQ0NIqcQhItIQ\nzjmoDzefsOeO+Q9m79pctN3mksit2qtKCtutL97GL1/4ig1bv2v+2v7d/97MlVz2ZEG126bVklgA\nvi5cT/6t7zLsj+/UXrgB6JnjIiJVuOTw/uzRtTXnPPIlv/3PdA7ZrSPZLdJ3Kbc9YZSWVTDz2w10\nysuiY27WTmXu/2ger0xaSpfW2TuWbU8z9384PzLvzrSlu16IaJWqJDO/3cCIuz6pz1urt5SqcaiP\nQ0Qa0mEDO3LPWftRuG4LJ9z9Cc+NXwzA3BUbueXV6ZSVV+zogygtr2DEXZ9w3J27fqmXl0fKRDcx\nUan565FPF1Z7fceNr0xlzorIs9Nfnlhzn0tjSKkah7uPBcbm5+dfGnYsIpIaRu7bnfQ04+fPTOKG\nV6ayrngb789awYRF6zjjgF6UBwlgW3C9xepNJbvsY3srVnSuqNyydet/Z1Z5/KVFWxgzfjFfLljD\n+9cdQXosbVcJllI1DhGRRDh+SDcm3nwUedkZ3PHmLCYsitzq490ZK3YMsZ29fOOO8lu3lbO++Lsh\ntdtrGtGd7NHlY7H9OBVJ8BAqJQ4RkRh0yM2i4OajGNQlb8ey8YvWUloWqWlc89xXO5Zf+mQB+/7h\n7R3z27/so2sZpTFeER7dxeHuLN+wa42msSlxiIjEKCsjnacuHs6VR+4GwCdzq74l+/bl81ZuAr5r\nonrk04V1PrYZvDSxkLFTGuTxRfWSUolDneMikmidW2fz62P34OD+HWote9TfP2LiN+vYsq28zsfb\nnnTSzPi/N2bVeT8NKaUShy4AFJHG8tTFwxnYObfWcsuKttApLzI8t1XmrsN5Y1Ve4Y12gV9tUmpU\nlYhIY8lIT+PtX3yPSYvXcfoD46otN2rM5B2vc7Mz2Fxat9rH4rXFtRdqJClV4xARaUxmxv592jPx\n5qNiKp/IW7Y3JiUOEZF66pCbxdRbjuGNaw6vse9j67bke7ZGXShxiIg0gLzsFuzZrTW3nTo47FAS\nLqUSh0ZViUjY+nfKZf6fjuey7/VnaK+2YYeTEBb2k6QSIT8/3wsKqr/bpIhIYykrr+CPr83giXHf\nNMrxFt1+Qp23NbOJ7p5fW7mUqnGIiCSbjPQ0fn/yYN689vCwQ2kwShwiIo1gj66t2bNb67DDaBBK\nHCIijeT5yw/ijWuafs1DiUNEpJG0DkZe3X3WfmGHUi9KHCIijeykfbuz6PYT+OT6I4H63YokDLrl\niIhISHq1z9kxCmpZ0RamLl3PNc9NrteFghUVTlqCH/akGoeISBLo3rYlx+7dlVl/HMHl3+tf5/3M\nW7WpAaOqWkrVOMxsJDBywIABYYciIlJnNx6/J1f+YABrNpWyaM1mMtPTGNgll6fGfcM5B/XhwD+9\nV+22GY3waNmUShx65riIpIrW2S1ond2Cfh1b7Vj2q2MG1bpdi/TENySpqUpEJIU0RuJIqRqHiEhz\nMPvW4xi/cC2/HzuDMw/oxYn7dGfUmEms2lhCx9zMhB9f96oSERFA96oSEZEEUeIQEZG4KHGIiEhc\nlDhERCQuShwiIhIXJQ4REYmLEoeIiMRFiUNEROKSkhcAmtkqoAhYX2lVmxiWdQRWJy66XVQVUyK3\nr618XdfHcm6rWtbcz3dtZeI531Ut1/mOr0xDn29o3HNe3/Pdx9071VrK3VNyAkbXZRlQEHacidy+\ntvJ1Xa/zXffyNZWJ53xXc351vkM83419zut7vmOdUrmpamw9ljWm+h4/3u1rK1/X9TrfdS9fU5l4\nzndVy3W+4yvT3M93TFKyqao+zKzAY7hXizQMne/GpfPd+FLxnKdyjaOuRocdQDOj8924dL4bX8qd\nc9U4REQkLqpxiIhIXJQ4REQkLkocIiISFyWOWphZKzN7wsweNrOfhh1PqjOz/mb2iJm9FHYszYGZ\nnRJ8tv9jZseEHU+qM7M9zexBM3vJzH4Wdjx11SwTh5k9amYrzWxapeXHmdlsM5tnZjcEi08DXnL3\nS4GTGj3YFBDP+Xb3Be5+cTiRpoY4z/e/g8/2BcAZIYTb5MV5vme6+xXAT4AmO0S3WSYO4HHguOgF\nZpYO3AeMAPYCzjKzvYCewJKgWHkjxphKHif28y319zjxn++bg/USv8eJ43yb2UnAp8B7jRtmw2mW\nicPdPwbWVlo8HJgX/OItBZ4DTgYKiSQPaKbnq77iPN9ST/Gcb4u4A3jD3Sc1dqypIN7Pt7u/6u6H\nAE226VtfhN/pwXc1C4gkjB7AK8DpZvYA4d9OIJVUeb7NrIOZPQjsZ2Y3hhNaSqru8z0KOAr4kZld\nEUZgKaq6z/cRZna3mT0EvB5OaPWXEXYAScSqWObuvhm4sLGDaQaqO99rAH2BNbzqzvfdwN2NHUwz\nUN35/hD4sHFDaXiqcXynEOgVNd8TWBZSLM2Bznfj0vluXCl9vpU4vjMBGGhm/cwsEzgTeDXkmFKZ\nznfj0vluXCl9vptl4jCzMcA4YJCZFZrZxe5eBlwFvAXMBF5w9+lhxpkqdL4bl85342qO51s3ORQR\nkbg0yxqHiIjUnRKHiIjERYlDRETiosQhIiJxUeIQEZG4KHGIiEhclDhEamBmm4K/fc3s7Abe902V\n5j9vyP2LJIoSh0hs+gJxJY7g1to12SlxBHdMFUl6ShwisbkdONzMvjKzX5hZupn9xcwmmNnXZnY5\nQHD30w/M7FlgarDs32Y20cymm9llwbLbgZbB/p4Jlm2v3Viw72lmNtXMzoja94fB0+NmmdkzZlbV\nzfREEkp3xxWJzQ3Ade5+IkCQANa7+wFmlgV8ZmZvB2WHA4PdfWEwf5G7rzWzlsAEM3vZ3W8ws6vc\nfWgVxzoNGArsC3QMtvk4WLcfsDeRG+Z9BhxK5KFAIo1GNQ6RujkGOM/MvgK+BDoAA4N146OSBsDV\nZjYF+ILIHVMHUrPDgDHuXu7uK4CPgAOi9l3o7hXAV0Sa0EQalWocInVjwCh3f2unhWZHAJsrzR8F\nHOzuxWb2IZAdw76rUxL1uhz9H5YQqMYhEpuNQF7U/FvAz8ysBYCZ7W5mrarYrg2wLkgaewAHRa3b\ntn37Sj4Gzgj6UToB3wPGN8i7EGkA+rUiEpuvgbKgyelx4C4izUSTgg7qVcApVWz3JnCFmX0NzCbS\nXLXdaOBrM5vk7tHPn/4XcDAwBXDgendfHiQekdDptuoiIhIXNVWJiEhclDhERCQuShwiIhIXJQ4R\nEYmLEoeIiMRFiUNEROKixCEiInFR4hARkbj8P08s3jabrHatAAAAAElFTkSuQmCC\n",
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x1821c120f0>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plt.loglog(loss_hist)\n",
+    "plt.xlabel('Iteration')\n",
+    "plt.ylabel('MSE Loss value')\n",
+    "plt.title('smarter SGD completion')\n",
+    "plt.legend()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[-0.10904536 -1.38673067 -0.53866088 ..., -0.0059163   0.22185898\n",
+      "  0.74981755]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(sess.run(t3f.gather_nd(estimated, observation_idx)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ -1.27225139e-01  -1.37794858e+00  -5.42469328e-01 ...,  -1.30643336e-03\n",
+      "   2.35629296e-01   7.53320726e-01]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(observations)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Environment (conda_tf)",
+   "language": "python",
+   "name": "conda_tf"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/tensor-nets.ipynb b/examples/tensor-nets.ipynb
new file mode 100644
index 00000000..c09da0bf
--- /dev/null
+++ b/examples/tensor-nets.ipynb
@@ -0,0 +1,585 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import t3f\n",
+    "import numpy as np\n",
+    "import tensorflow as tf\n",
+    "import keras.backend as K\n",
+    "\n",
+    "tf.set_random_seed(0)\n",
+    "np.random.seed(0)\n",
+    "sess = tf.InteractiveSession()\n",
+    "K.set_session(sess)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Tensor Nets\n",
+    "\n",
+    "In this notebook we provide an example of how to build a simple Tensor Net (see https://arxiv.org/abs/1509.06569).\n",
+    "\n",
+    "The main ingredient is the so-called TT-Matrix, a generalization of the Kronecker product matrices, i.e. matrices of the form \n",
+    "$$A = A_1 \\otimes A_2 \\cdots \\otimes A_n$$\n",
+    "\n",
+    "In `t3f` TT-Matrices are represented using the `TensorTrain` class."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "A TT-Matrix of size 784 x 625, underlying tensor shape: (4, 7, 4, 7) x (5, 5, 5, 5), TT-ranks: (1, 2, 2, 2, 1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "W = t3f.random_matrix([[4, 7, 4, 7], [5, 5, 5, 5]], tt_rank=2)\n",
+    "\n",
+    "print(W)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Using TT-Matrices we can compactly represent densely connected layers in neural networks, which allows us to greatly reduce number of parameters. Matrix multiplication can be handled by the `t3f.matmul` method which allows for multiplying dense (ordinary) matrices and TT-Matrices. Very simple neural network could look as following (for initialization several options such as `t3f.initializers.glorot`, `t3f.initializers.he` or `t3f.random_matrix` are available):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "x = tf.placeholder(tf.float32, [None, 784])\n",
+    "y = tf.placeholder(tf.int64, [None])\n",
+    "\n",
+    "initializer = t3f.glorot_initializer([[4, 7, 4, 7], [5, 5, 5, 5]], tt_rank=2)\n",
+    "W1 = t3f.get_variable('W1', initializer=initializer) \n",
+    "b1 = tf.get_variable('b1', shape=[625])\n",
+    "h1 = t3f.matmul(x, W1) + b1\n",
+    "h1 = tf.nn.relu(h1)\n",
+    "\n",
+    "W2 = tf.get_variable('W2', shape=[625, 10])\n",
+    "b2 = tf.get_variable('b2', shape=[10])\n",
+    "h2 = tf.matmul(h1, W2) + b2\n",
+    "\n",
+    "y_ = tf.one_hot(y, 10)\n",
+    "loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=h2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For convenience we have implemented a layer analogous to *Keras* `Dense` layer but with a TT-Matrix instead of an ordinary matrix. An example of fully trainable net is provided below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from keras.datasets import mnist\n",
+    "from keras.models import Sequential\n",
+    "from keras.layers import Dense, Activation, Dropout, Flatten\n",
+    "import numpy as np\n",
+    "from keras.utils import to_categorical\n",
+    "from keras import optimizers\n",
+    "from utils import TTDense"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "(x_train, y_train), (x_test, y_test) = mnist.load_data()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Some preprocessing..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "x_train = x_train / 127.5 - 1.0\n",
+    "x_test = x_test / 127.5 - 1.0\n",
+    "\n",
+    "y_train = to_categorical(y_train, num_classes=10)\n",
+    "y_test = to_categorical(y_test, num_classes=10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "model = Sequential()\n",
+    "model.add(Flatten(input_shape=(28, 28)))\n",
+    "model.add(TTDense(row_dims=[7, 4, 7, 4], column_dims=[5, 5, 5, 5], tt_rank=4, init='glorot', activation='relu', bias_init=1e-3))\n",
+    "model.add(Dense(10))\n",
+    "model.add(Activation('softmax'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "flatten_1 (Flatten)          (None, 784)               0         \n",
+      "_________________________________________________________________\n",
+      "tt_dense_1 (TTDense)         (None, 625)               1725      \n",
+      "_________________________________________________________________\n",
+      "dense_1 (Dense)              (None, 10)                6260      \n",
+      "_________________________________________________________________\n",
+      "activation_2 (Activation)    (None, 10)                0         \n",
+      "=================================================================\n",
+      "Total params: 7,985\n",
+      "Trainable params: 7,985\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that in the dense layer we only have $1725$ parameters instead of $784 * 625 = 490000$."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "optimizer = optimizers.Adam(lr=1e-2)\n",
+    "model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train on 60000 samples, validate on 10000 samples\n",
+      "Epoch 1/2\n",
+      "60000/60000 [==============================] - 30s 494us/step - loss: 0.2540 - acc: 0.9233 - val_loss: 0.1394 - val_acc: 0.9569\n",
+      "Epoch 2/2\n",
+      "60000/60000 [==============================] - 28s 474us/step - loss: 0.1483 - acc: 0.9564 - val_loss: 0.1182 - val_acc: 0.9645\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<keras.callbacks.History at 0x182f8be9b0>"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.fit(x_train, y_train, epochs=2, batch_size=64, validation_data=(x_test, y_test))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "# Compression of Dense layers"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "Let us now train an ordinary DNN (without TT-Matrices) and show how we can compress it using the TT decomposition."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "model = Sequential()\n",
+    "model.add(Flatten(input_shape=(28, 28)))\n",
+    "model.add(Dense(625, activation='relu'))\n",
+    "model.add(Dense(10))\n",
+    "model.add(Activation('softmax'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "flatten_5 (Flatten)          (None, 784)               0         \n",
+      "_________________________________________________________________\n",
+      "dense_6 (Dense)              (None, 625)               490625    \n",
+      "_________________________________________________________________\n",
+      "dense_7 (Dense)              (None, 10)                6260      \n",
+      "_________________________________________________________________\n",
+      "activation_8 (Activation)    (None, 10)                0         \n",
+      "=================================================================\n",
+      "Total params: 496,885\n",
+      "Trainable params: 496,885\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "optimizer = optimizers.Adam(lr=1e-3)\n",
+    "model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train on 60000 samples, validate on 10000 samples\n",
+      "Epoch 1/5\n",
+      "60000/60000 [==============================] - 7s 111us/step - loss: 0.2937 - acc: 0.9115 - val_loss: 0.1642 - val_acc: 0.9488\n",
+      "Epoch 2/5\n",
+      "60000/60000 [==============================] - 6s 102us/step - loss: 0.1353 - acc: 0.9590 - val_loss: 0.1297 - val_acc: 0.9606\n",
+      "Epoch 3/5\n",
+      "60000/60000 [==============================] - 6s 97us/step - loss: 0.1027 - acc: 0.9684 - val_loss: 0.0962 - val_acc: 0.9706\n",
+      "Epoch 4/5\n",
+      "60000/60000 [==============================] - 6s 100us/step - loss: 0.0813 - acc: 0.9744 - val_loss: 0.1335 - val_acc: 0.9593\n",
+      "Epoch 5/5\n",
+      "60000/60000 [==============================] - 6s 100us/step - loss: 0.0736 - acc: 0.9767 - val_loss: 0.1193 - val_acc: 0.9608\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<keras.callbacks.History at 0x1813a607f0>"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let us convert the matrix used in the Dense layer to the TT-Matrix with tt-ranks equal to 16 (since we trained the network without the low-rank structure assumption we may wish start with high rank values)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<tf.Variable 'dense_6/kernel:0' shape=(784, 625) dtype=float32_ref>\n",
+      "A TT-Matrix of size 784 x 625, underlying tensor shape: (7, 4, 7, 4) x (5, 5, 5, 5), TT-ranks: (1, 16, 16, 16, 1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "W = model.trainable_weights[0]\n",
+    "print(W)\n",
+    "Wtt = t3f.to_tt_matrix(W, shape=[[7, 4, 7, 4], [5, 5, 5, 5]], max_tt_rank=16)\n",
+    "print(Wtt)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We need to evaluate the tt-cores of Wtt. We also need to store other parameters for later (biases and the second dense layer)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "cores = sess.run(Wtt.tt_cores)\n",
+    "other_params = model.get_weights()[1:]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we can construct a tensor network with the first Dense layer replaced by `Wtt`\n",
+    "initialized using the previously computed cores."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "model = Sequential()\n",
+    "model.add(Flatten(input_shape=(28, 28)))\n",
+    "model.add(TTDense(row_dims=[7, 4, 7, 4], column_dims=[5, 5, 5, 5], tt_rank=16, activation='relu'))\n",
+    "model.add(Dense(10))\n",
+    "model.add(Activation('softmax'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "optimizer = optimizers.Adam(lr=1e-3)\n",
+    "model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "model.set_weights(list(cores) + other_params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "10000/10000 [==============================] - 10s 972us/step\n",
+      "new accuracy:  0.6459\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"new accuracy: \", model.evaluate(x_test, y_test)[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "flatten_6 (Flatten)          (None, 784)               0         \n",
+      "_________________________________________________________________\n",
+      "tt_dense_4 (TTDense)         (None, 625)               15585     \n",
+      "_________________________________________________________________\n",
+      "dense_8 (Dense)              (None, 10)                6260      \n",
+      "_________________________________________________________________\n",
+      "activation_10 (Activation)   (None, 10)                0         \n",
+      "=================================================================\n",
+      "Total params: 21,845\n",
+      "Trainable params: 21,845\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We see that even though we now have about 5% of the original number of parameters we still achieve a relatively high accuracy."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Finetuning the model. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can now finetune this tensor network."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train on 60000 samples, validate on 10000 samples\n",
+      "Epoch 1/2\n",
+      "60000/60000 [==============================] - 117s 2ms/step - loss: 0.1346 - acc: 0.9595 - val_loss: 0.1142 - val_acc: 0.9653\n",
+      "Epoch 2/2\n",
+      "60000/60000 [==============================] - 128s 2ms/step - loss: 0.0806 - acc: 0.9759 - val_loss: 0.0823 - val_acc: 0.9737\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<keras.callbacks.History at 0x182fdaa3c8>"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.fit(x_train, y_train, epochs=2, batch_size=64, validation_data=(x_test, y_test))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We see that we were able to achieve higher validation accuracy than we had in the plain DNN, while keeping the number of parameters extremely small."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Environment (conda_tf)",
+   "language": "python",
+   "name": "conda_tf"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/tensor-riemannian.ipynb b/examples/tensor-riemannian.ipynb
new file mode 100644
index 00000000..99a45649
--- /dev/null
+++ b/examples/tensor-riemannian.ipynb
@@ -0,0 +1,283 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import t3f\n",
+    "import tensorflow as tf\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "tf.set_random_seed(0)\n",
+    "np.random.seed(0)\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Riemannian optimization"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Riemannian optimization is a framework for solving optimization problems with a constraint that the solution belongs to a manifold. \n",
+    "\n",
+    "Let us consider the following problem. Given some TT tensor $A$ with large tt-ranks we would like to find a tensor $X$ (with small prescribed tt-ranks $r$) which is closest to $A$ (in the sense of Frobenius norm). Mathematically it can be written as follows:\n",
+    "\\begin{equation*}\n",
+    "\\begin{aligned}\n",
+    "& \\underset{X}{\\text{minimize}} \n",
+    "& & \\frac{1}{2}\\|X - A\\|_F^2 \\\\\n",
+    "& \\text{subject to} \n",
+    "& & \\text{tt_rank}(X) = r\n",
+    "\\end{aligned}\n",
+    "\\end{equation*}\n",
+    "\n",
+    "It is known that the set of TT tensors with elementwise fixed TT ranks forms a manifold. Thus we can solve this problem using the so called Riemannian gradient descent. Given some functional $F$ on a manifold $\\mathcal{M}$  it is defined as\n",
+    "$$\\hat{x}_{k+1} = x_{k} - \\alpha P_{T_{x_k}\\mathcal{M}} \\nabla F(x_k),$$\n",
+    "$$x_{k+1} = \\mathcal{R}(\\hat{x}_{k+1})$$\n",
+    "with $P_{T_{x_k}\\mathcal{M}}$ being the projection onto the tangent space of $\\mathcal{M}$ at the point $x_k$ and $\\mathcal{R}$ being a retraction - an operation which projects points to the manifold, and $\\alpha$ is the learning rate.\n",
+    "\n",
+    "We can implement this in `t3f` using the `t3f.riemannian` module. As a retraction it is convenient to use the rounding method (`t3f.round`)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "sess = tf.InteractiveSession()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Initialize A randomly, with large tt-ranks\n",
+    "shape = 10 * [2]\n",
+    "init_A = t3f.random_tensor(shape, tt_rank=16)\n",
+    "A = t3f.get_variable('A', initializer=init_A, trainable=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Create an X variable and compute the gradient of the functional. Note that it is simply X - A.\n",
+    "\n",
+    "init_X = t3f.random_tensor(shape, tt_rank=2)\n",
+    "X = t3f.get_variable('X', initializer=init_X)\n",
+    "\n",
+    "gradF = X - A"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Let us compute the projection of the gradient onto the tangent space at X"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "riemannian_grad = t3f.riemannian.project(gradF, X)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Compute the update by subtracting the Riemannian gradient\n",
+    "# and retracting back to the manifold\n",
+    "alpha = 1.0\n",
+    "\n",
+    "train_step = t3f.assign(X, t3f.round(X - alpha * riemannian_grad, max_tt_rank=2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# let us also compute the value of the functional\n",
+    "# to see if it is decreasing\n",
+    "F = 0.5 * t3f.frobenius_norm_squared(X - A)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "sess.run(tf.global_variables_initializer())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "81.622\n",
+      "58.5347\n",
+      "56.27\n",
+      "56.0832\n",
+      "51.7328\n",
+      "50.7767\n",
+      "50.7767\n",
+      "50.7767\n",
+      "50.7767\n",
+      "50.7767\n"
+     ]
+    }
+   ],
+   "source": [
+    "log = []\n",
+    "for i in range(100):\n",
+    "    F_v, _ = sess.run([F, train_step.op])\n",
+    "    if i % 10 == 0:\n",
+    "        print (F_v)\n",
+    "    log.append(F_v)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "It is intructive to compare the obtained result with the quasioptimum delivered by the TT-round procedure. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "52.4074\n"
+     ]
+    }
+   ],
+   "source": [
+    "quasi_sol = t3f.round(A, max_tt_rank=2)\n",
+    "\n",
+    "val = sess.run(0.5 * t3f.frobenius_norm_squared(quasi_sol - A))\n",
+    "print (val)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We see that the value is slightly bigger than the exact minimum, but TT-round is faster and cheaper to compute, so it is often used in practice."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<matplotlib.legend.Legend at 0x181e365f98>"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZkAAAEKCAYAAADAVygjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xl8VOXZ//HPlT2EEEjYA0pQFlkT\nJKCCSute0Fp3qrVuULUu1bZP7VOtS+vz68+f2qq1ta7URwtVcYNStSpopVUBBQFRXEAIIGCQsIZs\n1++PmUxDyDIJMxky832/XmM4Z865z3XmxLly3/c5923ujoiISDQkxToAERGJX0oyIiISNUoyIiIS\nNUoyIiISNUoyIiISNUoyIiISNUoyIiISNUoyIiISNUoyIiISNSmxDiDWunbt6v369Yt1GCIi7cqi\nRYu+cvduzW2X8EmmX79+LFy4MNZhiIi0K2b2RTjbqblMRESiRklGRESiRklGRESiJuH7ZEQiobKy\nkpKSEsrLy2MdikhEZWRk0KdPH1JTU1u1v5KMSASUlJSQnZ1Nv379MLNYhyMSEe5OaWkpJSUlFBQU\ntKoMNZeJREB5eTl5eXlKMBJXzIy8vLz9qqEryYhEiBKMxKP9/b1Wkmmlv32wgWnzV8U6DBGRA5qS\nTCu9umIjf3rz81iHIRKSnJxMYWEhw4YN49RTT2Xr1q0ArF+/nrPOOivG0e3rl7/8Ja+++mqsw2hQ\nx44dgf3/7H73u9+xa9euZrebN28ekyZNavVx9tfWrVv5wx/+EJWylWRaaVDPbDaUlVO2qzLWoYgA\nkJmZyeLFi1m2bBm5ubncf//9APTu3ZtnnnkmxtHt67bbbuP4449vs+NVV1e3eJ/9/ezCTTKxpiRz\nABrUIxuAjzduj3EkIvs68sgjWbduHQCrV69m2LBhQOCL9qc//SnFxcWMGDGCP/3pT0DgL+ljjz2W\nc845h4EDB3LDDTfw5JNPMmbMGIYPH85nn30GwKxZsxg7dixFRUUcf/zxbNy4EYBbbrmFSy65hAkT\nJtC/f3/uvffe0LEPO+wwpkyZwtChQznxxBPZvXs3ABdddFHoC/y2226juLiYYcOGMXXqVNwdgAkT\nJvCzn/2MMWPGMHDgQP75z3/uc641NTVceeWVDB06lEmTJvGtb30rVG6/fv247bbbGD9+PE8//TQP\nPfQQxcXFjBw5kjPPPDOUAFatWsWRRx5JcXExN910U6jscD+7CRMmcNZZZzF48GDOP/983J17772X\n9evX841vfINvfOMb+8T90ksvMXjwYMaPH8+zzz4bWr9z504uueQSiouLKSoq4oUXXgBg+fLljBkz\nhsLCQkaMGMEnn3wCwOOPP86IESMYOXIk3/ve9wDYvHkzZ555JsXFxRQXFzN//vwmr9MNN9zAZ599\nRmFhIT/96U+b+/VqEd3C3EqDev4nyYwpyI1xNHIguXXWcj5cvy2iZQ7p3YmbTx0a1rbV1dW89tpr\nXHrppfu898gjj5CTk8OCBQvYs2cP48aN48QTTwRgyZIlrFixgtzcXPr3789ll13Gu+++yz333MN9\n993H7373O8aPH8/bb7+NmfHwww9zxx13cNdddwHw0UcfMXfuXLZv386gQYO44oorAPjkk0+YPn06\nDz30EOeccw4zZ87kggsu2Cuuq666il/+8pcAfO9732P27NmceuqpAFRVVfHuu+8yZ84cbr311n2a\n2J599llWr17N0qVL2bRpE4cddhiXXHJJ6P2MjAzeeustAEpLS5kyZQoAN954I4888ghXX3011157\nLVdccQUXXnhhqAbYks/u/fffZ/ny5fTu3Ztx48Yxf/58rrnmGu6++27mzp1L165d9yqrvLycKVOm\n8Prrr3PooYdy7rnnht67/fbb+eY3v8mjjz7K1q1bGTNmDMcffzwPPPAA1157Leeffz4VFRVUV1ez\nfPlybr/9dubPn0/Xrl3ZsmULANdeey3XXXcd48ePZ82aNZx00kmsWLGi0ev0m9/8hmXLlrF48eIG\nz31/KMm0Uq+cDLIzUvj4y8h+mYi01u7duyksLGT16tUcfvjhnHDCCfts88orr/DBBx+E/tIvKyvj\nk08+IS0tjeLiYnr16gXAIYccEvoCHT58OHPnzgUCzwOde+65bNiwgYqKir2enZg4cSLp6emkp6fT\nvXv3UC2noKCAwsJCAA4//HBWr169T1xz587ljjvuYNeuXWzZsoWhQ4eGkswZZ5zR5L5vvfUWZ599\nNklJSfTs2XOfWkPdL/Bly5Zx4403snXrVnbs2MFJJ50EwPz585k5cyYQSHI/+9nPWvTZjRkzhj59\n+gCErsH48eP3KaPWRx99REFBAQMGDADgggsu4MEHHwwd58UXX+TOO+8EAglpzZo1HHnkkdx+++2U\nlJRwxhlnMGDAAF5//XXOOuusUBLLzQ38wfvqq6/y4Ycfho63bds2tm8PtLo0dp2iJS6TjJn1B34B\n5Lh7VHo8zYzBPbP5+Es1l8newq1xRFptn0xZWRmTJk3i/vvv55prrtlrG3fnvvvuC3251po3bx7p\n6emh5aSkpNByUlISVVVVAFx99dVcf/31nHbaacybN49bbrkltE/d/ZOTk0P71F9f21xWq7y8nCuv\nvJKFCxfSt29fbrnllr2ey6jdv26Z9c+pKVlZWaF/X3TRRTz//POMHDmSadOmMW/evNB7zd2qG+5n\n11ic9TV2PHdn5syZDBo0aK/1hx12GGPHjuVvf/sbJ510Eg8//DDu3mA5NTU1/Pvf/yYzM3Of91oT\n6/6Iap+MmV1nZsvNbJmZTTezjFaW86iZbTKzZQ28d7KZfWxmn5rZDQDu/rm779tWEGEDe2Tz0Zfb\nm/0lF2lLOTk53Hvvvdx5551UVu59Y8pJJ53EH//4x9D6lStXsnPnzrDLLisrIz8/H4A///nPEYm3\nNqF07dqVHTt2tLijffz48cycOZOamho2bty4V+Kob/v27fTq1YvKykqefPLJ0Ppx48YxY8YMgL3W\n19Wazy47OztUg6hr8ODBrFq1KtTXNX369L2Oc99994W+V95//30APv/8c/r3788111zDaaedxgcf\nfMBxxx3HU089RWlpKUCouezEE0/k97//fajM5prBGoszEqKWZMwsH7gGGO3uw4Bk4Lx623Q3s+x6\n6w5toLhpwMkNHCMZuB84BRgCTDazIRE5gTAM7pnN9vIqvtym8arkwFJUVMTIkSNDX5y1LrvsMoYM\nGcKoUaMYNmwYP/jBD1r0l+wtt9zC2WefzdFHH71PP0Nrde7cmSlTpjB8+HBOP/10iouLW7T/mWee\nSZ8+fULnM3bsWHJychrc9le/+hVjx47lhBNOYPDgwaH199xzD/fffz/FxcWUlZU1uG9rPrupU6dy\nyimn7NOEl5GRwYMPPsjEiRMZP348Bx98cOi9m266icrKSkaMGMGwYcNCNyL89a9/ZdiwYRQWFvLR\nRx9x4YUXMnToUH7xi19w7LHHMnLkSK6//noA7r33XhYuXMiIESMYMmQIDzzwQJNx5uXlMW7cOIYN\nGxbxjn/cPSovIB9YC+QSaJabDZxYb5uzgdeBjODyFGBOI+X1A5bVW3ck8HKd5Z8DP6+z/ExzcR5+\n+OHeWu98XuoH/2y2v/7RxlaXIfHhww8/jHUICW379u3u7v7VV195//79fcOGDTGOKL409PsNLPQw\nckHUajLuvg64E1gDbADK3P2Vets8DbwEzDCz84FLgHNacJjaRFarBMg3szwzewAoMrOfN7SjmZ1q\nZg829ldLOEK3MatfRiSmJk2aRGFhIUcffTQ33XQTPXv2jHVIEhS1jn8z6wJ8GygAtgJPm9kF7v5E\n3e3c/Q4zmwH8ETjE3Xe05DANrHN3LwUub2pHd58FzBo9evSUFhxvLzkdUunZKUNJRiTGmuqHkdiK\nZsf/8cAqd9/s7pXAs8BR9Tcys6OBYcBzwM0tPEYJ0LfOch9gfevCbZ1BusNMRKRR0Uwya4AjzKyD\nBe6xOw5YUXcDMysCHiJQ47kYyDWzX7fgGAuAAWZWYGZpBG4seDEi0YdpcM9sPt28g6rqmrY8rIhI\nuxDNPpl3gGeA94ClwWM9WG+zDsDZ7v6Zu9cA3we+qF+WmU0H/g0MMrMSM7s0eIwq4CrgZQIJ7Cl3\nXx6lU2rQwB7ZVFTVsLo0/NtARUQSRVQfxnT3m2miCczd59dbriRQs6m/3eQmypgDzNmPMPdL7fAy\nH325nUO7ZzeztYhIYtEAmfvp0O4dSU4yVqpfRmKotLSUwsJCCgsL6dmzJ/n5+QwfPpzCwkKGDBkS\nWle7TUVFRcxirR1GH2DDhg37DHF/7bXXkp+fT03Nf5qgZ8+ezc03t7TLVg4ESjL7KSM1mX55HfhI\nSUZiKC8vj8WLF7N48WIuv/xyrrvuOpYuXcrixYv58MMPQ+tqt0lLS2u0rGgPM1LX3XffHRqwEgLD\noTz33HP07duXN998M7R+4sSJvPjii+1i2HzZm5JMBAzqma0h/6Vdu+WWW5g6dSonnngiF154IeXl\n5Vx88cUMHz6coqKi0ACZ06ZN46qrrgrtN2nSpNDtwx07duQXv/gFI0eO5IgjjggNvNjYMPoAM2fO\n5OST/zOYx9y5cxk2bBhXXHHFXkOtmBkTJkxg9uzZ0foIJEqUZCLg0G4dWbtlFxVVusNM2q9Fixbx\nwgsv8Je//CU03P3SpUuZPn063//+9/catLIhO3fu5IgjjmDJkiUcc8wxPPRQoHu1dhj9BQsW7PWQ\n5KpVq+jSpcteAzZOnz6dyZMn853vfIfZs2fvNfba6NGjG5xPRg5scTkKc1sr6JZFjcOaLbs4tHvH\n5neQuDdv3jzeeOON0HJtk1DtFy/Asccey4QJE7jrrrvYsSPwDHKvXr2YOnUqs2bN4r333gtte/31\n15OdHd0bS0477bTQqL1vvfUWV199NRAYzPHggw9m5cqVTe6flpYW6l85/PDD+cc//gE0Poz+hg0b\n6NatW2j/iooK5syZw29/+1uys7MZO3Ysr7zyChMnTgSge/furF/fpo/BSQQoyURAv7zAUOKrvtqp\nJCNAYEbHCRMm7LO+oc7rH//4x/usO/XUU0PzqUTD/fffH0p4c+YEbs6sOyS+NzKyeEpKyl4d8nVr\nN6mpqaFh5+sPId/QcPSZmZl77f/SSy9RVlbG8OHDAdi1axcdOnQIJZny8vIGh66XA5uayyKgoGvg\nf87VX+lZGWkffvjDH4ZuAujdu/c+7x9zzDGhIe9XrlzJmjVrGDRoEP369WPx4sXU1NSwdu1a3n33\n3WaP1dgw+gMHDtxrErLp06fz8MMPs3r1alavXs2qVat45ZVXQp39K1euDE2FLO2HkkwEdO6QRpcO\nqXyuJCNx4sorr6S6uprhw4dz7rnnMm3aNNLT0xk3bhwFBQUMHz6cn/zkJ4waNarZshobRj8rK4tD\nDjmETz/9lF27dvHyyy+Hai21748fP55Zs2YBgZsC6r4v7YM1Vi1OFKNHj/aFCxfudzln/GE+aSlJ\nzJh6ZASikvZmxYoVHHbYYbEOo9157rnnWLRoEb/+ddOjSW3cuJHvfve7vPbaa20UmdTV0O+3mS1y\n99HN7auaTIQUdO3I6q90D79IS3znO9+hX79+zW63Zs0a7rrrrugHJBGnjv8IKejagZnvlbOroooO\nafpYRcJ12WWXNbtNS2fLlAOHajIRUtA1cFeZajOJK9GbniU+7e/vtZJMhPTr2gEI3MYsiScjI4PS\n0lIlGokr7k5paSkZGRmtLkPtOhFS+6yMhvxPTH369KGkpITNmzfHOhSRiMrIyKBPnz6t3l9JJkKy\n0lPo0SmdzzcrySSi1NRUCgoKYh2GyAFHzWURVNA1SzUZEZE6lGQiqKBrlvpkRETqUJKJoIKuWWzZ\nWUHZrsrmNxYRSQBKMhEUGihTTWYiIoCSTET171Y7GvOOGEciInJgUJKJoL65HUgyWKUHMkVEACWZ\niEpPSSa/S6Y6/0VEgpRkIiwwUKaSjIgIKMlEXEFeB9VkRESClGQirEdOBjv2VLG7ojrWoYiIxJyS\nTITlZaUBULpzT4wjERGJPSWZCOvSIZBktuysiHEkIiKxpyQTYXkdlWRERGopyURYblY6oCQjIgJN\nDPVvZkuBhmZgMsDdfUTUomrHcrNUkxERqdXUfDKT2iyKONIpI4WUJKNUSUZEpPEk4+5ftGUg8cLM\n6JKVxtdKMiIizffJmNkRZrbAzHaYWYWZVZvZtrYIrr3Ky0pTTUZEhPA6/n8PTAY+ATKBy4D7ohlU\ne5eblaY+GRERwry7zN0/BZLdvdrdHwO+Ed2w2rcuSjIiIkDTHf+1dplZGrDYzO4ANgBZ0Q2rfctT\nkhERAcKryXwvuN1VwE6gL3BmNINq73Kz0ijbXUlldU2sQxERialmazJ17jIrB26NbjjxoXb8sq93\nVdA9OyPG0YiIxE44d5eNM7N/mNlKM/u89tUWwbVXXWqTzM7KGEciIhJb4fTJPAJcBywCNH59GHL3\nGok5O7bBiIjEUDhJpszd/x71SOJInsYvExEBwksyc83s/wHPAqFJUtz9vahF1c5p/DIRkYBwkszY\n4M/RddY58M3IhxMfOndIBZRkRETCubtMD162UGpyEjmZqUoyIpLwwrm7LMfM7jazhcHXXWaW0xbB\ntWcav0xEJLyHMR8FtgPnBF/bgMeiGdT+MrP+ZvaImT0Tqxhys9LYskNJRkQSWzhJ5hB3v9ndPw++\nbgX6N7eTmQ0ys8V1XtvM7EetCdLMHjWzTWa2rIH3Tjazj83sUzO7ASAY56WtOVakdMlK4+tdSjIi\nktjCSTK7zWx87YKZjQN2N7eTu3/s7oXuXggcDuwCnqu7jZl1N7PseusObaC4acDJ9VeaWTJwP3AK\nMASYbGZDmj2jNqDmMhGR8JLMFcD9ZrbazL4gMPT/5S08znHAZw1MhHYs8IKZZQCY2RTg3vo7u/ub\nwJYGyh0DfBqsuVQAM4BvtzC2qMgNTlzm3tAM1iIiiaHZJOPui919JDACGO7uRe6+pIXHOQ+Y3kDZ\nTwMvATPM7HzgEgL9PuHKB9bWWS4B8s0sz8weAIrM7OcN7Whmp5rZg2VlZS04XPhys9KoqnG2lVdF\npXwRkfag0VuYzewCd3/CzK6vtx4Ad787nAMEpwk4DWjwy97d7zCzGcAfCfT/7AgzdgBruEgvpZna\nlrvPAmaNHj16SguOF7a6D2TmZKZG4xAiIge8pmoytXPGZDfw6tiCY5wCvOfuGxt608yOBoYR6K+5\nuQXlQqDm0rfOch9gfQvLiIr/JJk9zWwpIhK/Gq3JuPufgv981d3n130v2Pkfrsk00FQWLKcIeAiY\nCKwCnjCzX7v7jWGWvQAYYGYFwDoCzXLfbUFsUVM7flmpbmMWkQQWTsf/fWGu24eZdQBOIDDuWUM6\nAGe7+2fuXgN8H6h/cwBmNh34NzDIzErM7FIAd68iMJnay8AK4Cl3Xx5ObNHWJSvQRKbbmEUkkTXV\nJ3MkcBTQrV6/TCcgOZzC3X0XkNfE+/PrLVcSqNnU325yE2XMAeaEE09bCtVkdBuziCSwpsYuSyPQ\n95LC3pOibAPOimZQ8SAzLZnM1GQ99S8iCa2pPpk3gDfMbFoDz7dIGHKz0jRIpogktHD6ZB42s861\nC2bWxcxejmJMcSM3K40t6pMRkQQWTpLp6u5baxfc/Wuge/RCih+qyYhIogsnydSY2UG1C2Z2MIFJ\ny6QZeVlpuoVZRBJaODNj/gJ4y8zeCC4fA0yNXkjxI1cjMYtIggtnZsyXzGwUcASBYVyuc/evoh5Z\nHMjtmMauimp2V1STmRbWXd8iInElnOYygHQCoyCXAUPM7JjohRQ/enbKAODLbeUxjkREJDaarcmY\n2f8FzgWWAzXB1Q68GcW44kLvzpkArPt6NwVds5rZWkQk/oTTJ3M6MMjdNdJjC+UHk8z6rc3O8SYi\nEpfCaS77HNBY9a3Qo1MGZrBOSUZEElQ4NZldwGIzew0I1Wbc/ZqoRRUn0lKS6J6drpqMiCSscJLM\ni8GXtELvzpmsL1OSEZHEFM4tzH9ui0DiVX7nTJav3xbrMEREYiKcu8tW0cAT/u7ePyoRxZn8zpm8\n8uFG3D00dbWISKIIp7lsdJ1/ZwBnA7nRCSf+9O6cSUVVDaU7K+jaMT3W4YiItKlm7y5z99I6r3Xu\n/jvgm20QW1zorduYRSSBhdNcNqrOYhKBmk12I5tLPb07B576X/f1bkb06dzM1iIi8SWc5rK76vy7\nClgFnBOdcOJP7QOZelZGRBJRo0nGzK5193uAm9z9rTaMKa7kZKbSIS2Z9Vs1fpmIJJ6m+mQuDv68\nty0CiVdmRn7nTPXJiEhCaqq5bIWZrQa6mdkHddYb4O4+IqqRxRE9kCkiiarRJOPuk82sJ/AycFrb\nhRR/enfOZPn6sliHISLS5prs+Hf3L4GRbRRL3MrvnMFXOyoor6wmI1WTl4lI4gh30jLZD7XPymwo\nU+e/iCQWJZk2UHfyMhGRRBJ2kjEzTe3YSpq8TEQSVbNJxsyOMrMPgRXB5ZFm9oeoRxZHNHmZiCSq\ncGoyvwVOAkoB3H0JcEw0g4o3aSlJ9MjOUE1GRBJOWM1l7r623qrqKMQS13p3ztCzMiKScMJJMmvN\n7CjAzSzNzH5CsOlMwte7c6aGlhGRhBNOkrkc+CGQD5QAhcFlaYH8zpms27ob933mfxMRiVvhTL/8\nFXB+G8QS12onL9u0fQ89OmXEOhwRkTYRznwy3YApQL+627v7JdELK/4U9wtMJjpn6QYuHlcQ42hE\nRNpGOM1lLwA5wKvA3+q8pAWG9O7EiD45zHh3rZrMRCRhhDNpWQd3/1nUI0kA5xUfxH8/t5T3125l\n1EFdYh2OiEjUhVOTmW1m34p6JAngtMLedEhLZsa7a2IdiohIm2g0yZjZdjPbBlxLINHsNrNtddZL\nC3VMT+HUEb2ZtWQD28srYx2OiEjUNZpk3D3b3TsFfya5e2ad5U5tGWQ8OW9MX3ZXVvPikvWxDkVE\nJOrCGbvstXDWSXgK+3ZmcM9sZrxbfxAFEZH401RzWYaZ5QFdzayLmeUGX/2A3m0VYLwxM84r7svS\ndWXMWboh1uGIiERVU3eX/QD4EYGEsgiw4PptwP1RjiuunTfmIF5Ysp4fzVhM5w6pHHVI11iHJCIS\nFU31ydzj7gXAT9y9v7sXBF8j3f33bRhj3MlITeaxi4o5OK8DUx9fxLJ1ZbEOSUQkKprtk3H3+9oi\nkETTuUMaj186hk4ZKVz02Lus+mpnrEMSEYk4Tb8cQ71yMnn80rHUOHz3obdZu2VXrEMSEYmopjr+\nxwV/prddOInn0O4deeLSseyqqGbyQ29r9kwRiStN1WTuDf78d1sEksiG9O7EE5eOpWx3Jd996G02\nbdO8MyISH5pKMpVm9hiQb2b31n+1VYCJYnifHP58yRg2b9/DlP9dRHmlJh8VkfavqSQzCXgZKCdw\nC3P9l0TYqIO6cM95RXxQspWfPL1EozWLSLvX6HMywcnKZpjZCndf0oYxJbQThvTgZycP5jd//4gB\n3bO59vgBsQ5JRKTVwrm7rNTMnjOzTWa20cxmmlmfqEeWwH5wTH/OHNWH3766kjdWbo51OCIirRZO\nknkMeJHAk//5wKzgOokSM+N/zhhGj07p/O+/v4h1OCIirRZOkunu7o+5e1XwNQ3oFuW4El56SjLf\nLsxn3seb2LKzItbhiIi0SjhJZrOZXWBmycHXBUBptAMT+HZhb6pqnL9pIE0RaafCSTKXAOcAXwIb\ngLOC6yTKhvTqxMAeHXnh/XWxDkVEpFXCGbtsjbuf5u7d3L27u5/u7gd0R4GZ9TezR8zsmVjHsj/M\njNOL8ln4xdesKdWQMyLS/kR17DIz62xmz5jZR2a2wsyObGU5jwbvblvWwHsnm9nHZvapmd0A4O6f\nu/ul+xv/geC0kYGpe15YrNqMiLQ/0R4g8x7gJXcfDIwEVtR908y6m1l2vXWHNlDONODk+ivNLJnA\n3DanAEOAyWY2JDKhHxj6dOnAmIJcnl+8Tg9niki7E7UkY2adgGOARwDcvcLdt9bb7FjgBTPLCO4z\nhf+MmRbi7m8CWxo4zBjg02DNpQKYAXw7zPhONbMHy8oO/LlcvlOUz2ebd7Js3bZYhyIi0iLNJhkz\n6xHs3/h7cHmImYXTFNUf2Aw8Zmbvm9nDZpZVdwN3fxp4icDIAufzn5sMwpUPrK2zXEJgrLU8M3sA\nKDKznze0o7vPcvepOTk5LThcbHxrWC9Sk01NZiLS7oRTk5lGYAyz3sHllQSmZW5OCjAK+KO7FwE7\ngRvqb+TudxAYH+2PwGnuviOMsmtZA+vc3Uvd/XJ3P8Td/08Lyjsg5XRI5ZgB3ZizdIOazESkXQkn\nyXR196eAGgB3rwLCGSK4BChx93eCy88QSDp7MbOjgWHAc8DN4QRd7xh96yz3Ada3sIx24VvDe7G+\nrJz319ZvcRQROXCFk2R2mlke4ABmdgTQbEeGu38JrDWzQcFVxwEf1t3GzIqAhwj0o1wM5JrZr8MP\nnwXAADMrMLM04DwCQ+DEneOH9CA12ZjzgR7MFJH2I5wkcz2BL+5DzGw+8DhwdZjlXw08aWYfAIXA\n/9R7vwNwtrt/5u41wPeBfZ7BMbPpBCZPG2RmJbV9QsFa1VUEmvNWAE+5+/IwY2tXcjJTOVpNZiLS\nzjQ61H8td3/PzI4FBhHoA/nY3SvDKdzdFwOjm3h/fr3lSgI1m/rbTW6ijDnAnHDiae8mDu/F6x9t\n4v21Wxl1UJdYhyMi0qxmk4yZXVhv1Sgzw90fj1JM0oi6TWZKMiLSHoTTXFZc53U0cAtwWhRjkkao\nyUxE2ptwmsv26n8xsxzgf6MWkTTpW2oyE5F2pDVP/O8CNCdwjJwQbDJ7/F+rYx2KiEizwumTmUXw\n9mUCSWkI8FQ0g5LG5WSmcvmxh3Df658y7tCunD26b/M7iYjESLNJBrizzr+rgC/cvSRK8UgYfnT8\nQBau/pqbXljG8D45DO7ZKdYhiYg0KJz5ZN6o85qvBBN7yUnGPZMLyc5I5con3mPHnqpYhyQi0qBG\nk4yZbTezbQ28tpuZhgOOse7LXy3NAAAQy0lEQVTZGdw3uYjVpTuZ8ueFbNlZEeuQRET20WiScfds\nd+/UwCvb3dU+cwA4on8ed50zkkVrvubU+95iacmBP22BiCSWsO8uC04wdlDtK5pBSfi+U9SHmZcf\nBcCZD/yLafNXUVVdE+OoREQCwplP5jQz+wRYBbwBrAb+HuW4pAWG98lh1tXjOaJ/HrfM+pCJ977F\nPz/ZHOuwRETCqsn8CjgCWOnuBQRGU57f9C7S1nKz0vjzxcU8cMEodlVW8b1H3uWSaQv46Et1n4lI\n7ISTZCrdvRRIMrMkd59LYERlOcCYGScP68Wr1x/Lz08ZzILVWzjlnn/y46eWUPL1rliHJyIJKJzn\nZLaaWUfgTQLD9m8i8LyMHKDSU5L5wbGHcG5xX/4w7zOm/Ws1Ly5Zx+mF+Vw+4RAO6dYx1iGKSIKw\n5gZaNLMsAtMjG3A+kAM8GazdtHujR4/2hQsXxjqMqFq/dTd/euMzZixYS0V1DScO6cGl4/tT3K8L\nZg3NYC0i0jQzW+TujU7lEtqusSRjZr8H/uLu/4p0cAeSREgytb7asYfH5q/iibfXULa7kqG9O3HR\nUf2YOKIXHdLCqdSKiAREIslcS2A6417AX4HpwUnI4koiJZlauyuqee79dTw2fxWfbNpBx/QUJg7v\nxVmj+3D4QV1ISlLtRkSatt9Jpk5BBxNINucBGcB0YIa7r4xEoLGWiEmmlruzYPXXPL1wLX9buoFd\nFdX07JTBKcN7MnF4L4oO6kKyEo6INCBiSaZeoUXAo8AId0/ej/gOGImcZOrauaeKf3y4kb8t3cAb\nKzdTUVUTnCStK8cM7MaR/fPo0yVTfTgiAoSfZMIZ6j8VOJlATeY4Ag9k3rrfEcoBJSs9hdOL8jm9\nKJ/t5ZXM+3gzb67czBsrNzP7gw0A9OyUweh+XRjZpzND8zsxtHcOOZmpMY5cRA5kTfXJnABMBiYC\n7wIzgOfdfWfbhRd9qsk0zd1ZuXEH764q5d3VX7Nw9RY2lJWH3u/RKZ3+XTvSv1sWB+V2oHfnTHp3\nzqB7dga5WWl0SEver9pPTY2zdXclpTv2sHnHHjZvD7527GHLjgq27KygbHdlaMKjZDO6d0qnT5cO\n9M3N5JuDu9MrJ3M/PwURqS8SHf9zgb8AM919S4TjO2AoybRc6Y49LF+/jeXrt/Hpph18/tUOPtu0\ng23l+z4+lZGaRKeMVDqmp9AxI4WM1GTSkpNITba9+nuqapyKqhr2VNWwu6Ka7Xsq2V5exfbyKqpr\n9v0dTUtOIjcrjdysNHIyU0NlVVbX8OW2ctZv3U1ltZNkcMzAbpw7ui8nDOlBSnJrJoMVkfqi0icT\nj5RkImdbeSUbtpazvmw3X23fQ+nOQE1j2+5KduypYseeKsorq6msdiqra6iqdmorOclJRnpKEmkp\nSWSkJJOdkUJ2Rio5mankdUwjr2M6XbPS6N4pnW4dM+iUmdJkDammxllVupPn31/H0wtL+HJbOWMK\ncvn95CK6d8poo09EJH4pyYRJSSb+Vdc4z75Xwi9fWE5Wegr3TS7iyEPyYh2WSLsWbpJR24HEveQk\n4+zRfXnhqnF0ykzh/Iff5oXF62IdlkhCUJKRhDGwRzYvXjWeww/uwo3PLWND2e5YhyQS95RkJKF0\nTE/hzrNHUlXj/PezS0n05mKRaFOSkYRzcF4WPz1pEHM/3syz76nZTCSalGQkIV10VD9GH9yFW2ct\nZ9O28uZ3EJFWUZKRhJSUZNxx1gj2VNVw1ytxMQyfyAFJSUYSVv9uHTljVB9eWLKOsl2VsQ5HJC4p\nyUhCu+CIgyivrOGZ90piHYpIXFKSkYQ2tHcORQd15sl3vtCdZiJRoCQjCe+CsQfz+ead/PuzuJhR\nXOSAoiQjCW/iiF507pDKE+98EetQROKOkowkvIzUZM4+vA+vLN+o25lFIkxJRgT47tiDqapxZixY\nG+tQROKKkowIUNA1iyP65zJryfpYhyISV5RkRIJOHtqTTzbt4LPNO2IdikjcUJIRCTpxaE8AXl7+\nZYwjEYkfSjIiQb07ZzKyb2deXqYkIxIpSjIidZw0tAdLSspYv1VzzYhEgpKMSB0nB5vMXlGTmUhE\nKMmI1NG/W0cGdO/Iy8s3xjoUkbigJCNSz8nDevLOqlK27KyIdSgi7Z6SjEg9Jw3tSY3DqytUmxHZ\nX0oyIvUM7d2J/M6Z/H3phliHItLuKcmI1GNmTBrZi39+8hVf7dgT63BE2jUlGZEGnFHUh6oa1zAz\nIvtJSUakAYN6ZjO0dyeee39drEMRaddSYh1ArG3fvp1bb701tDxlyhQAHnroodC6Y489lgkTJnDX\nXXexY0dgXKtevXoxdepUZs2axXvvvRfa9vrrr2f9+vXMmDEjtG7SpEkcfvjhex1n4MCBTJ48menT\np7Ny5crQ+ptvvplFixYxe/bs0LrzzjuP3r17c/fdd4fWjRo1ilNPPZUHH3yQDRsCfQcdO3bkxz/+\nMfPmzeONN97QOe3nOY0B1lTn8OmmkSx4bXZcnFM8XiedU2zOKVyW6FPOjh492hcuXBjrMOQAtGl7\nOUf+n9f5wTH9+a+TB8c6HJEDipktcvfRzW2n5jKRRnTPzuDoAV15/v111NQk9h9jIq2lJCPShO8U\n5bO+rJy3V5XGOhSRdklJRqQJJw7pScf0FGYu0g0AIq2hJCPShMy0ZM4Ylc/zi9exbF1ZrMMRaXeU\nZESa8eMTBtGlQxo3PPsBVdU1sQ5HpF1RkhFpRk6HVG49bSjL1m3jsfmrYx2OSLuiJCMShm8N78nx\nh/Xgrn98zJrSXbEOR6TdUJIRCYOZ8avTh5KSlMS1f32fDWWaOVMkHEoyImHqlZPJb84czooN2zjh\n7jd54u0v9PyMSDMSflgZkZaYNKI3I/I7c8OzH3Dj88v4yztrGD+gK4V9OzOkVydyO6aRnZ6CmcU6\nVJEDgpKMSAsdlNeBJy8by9OLSvjLO2uYNn81FXXuOktOMjqmp5CanERqspGSbCRZ4GUAwfxjEFYy\nUrqSaPnD+aMY0CM7qsdQkhFpBTPjnNF9OWd0X/ZUVbNiw3ZWbtzOtt2VlO2uZNvuSiprnMqqGqpq\nHHenxqEmOFagh/7TNA9nI5FWSk9JjvoxlGRE9lN6SjKFfTtT2LdzrEMROeCo419ERKImLpOMmfU3\ns0fM7JlYxyIiksiimmTMbLWZLTWzxWbW6klbzOxRM9tkZssaeO9kM/vYzD41sxsA3P1zd790f2IX\nEZH91xY1mW+4e2FDk9uYWXczy6637tAGypgGnNzA/snA/cApwBBgspkNiUjUIiKy32LdXHYs8IKZ\nZQCY2RTg3vobufubwJYG9h8DfBqsuVQAM4Bvh3NgMzvVzB4sK9PIuiIi0RLtJOPAK2a2yMym7vOm\n+9PAS8AMMzsfuAQ4pwXl5wNr6yyXAPlmlmdmDwBFZvbzBgNzn+XuU3NyclpwOBERaYlo38I8zt3X\nm1l34B9m9lGwVhLi7neY2Qzgj8Ah7r6jBeU39Jyau3spcHnrwxYRkUiIak3G3dcHf24CniPQvLUX\nMzsaGBZ8/+YWHqIE6FtnuQ+wvlXBiohIxJl7dJ4oNrMsIMndtwf//Q/gNnd/qc42RcB0YCKwCngC\n+Nzdb2ygvH7AbHcfVmddCrASOA5YBywAvuvuy1sQ52bgixafYEBX4KtW7tueJeJ5J+I5Q2KedyKe\nM7T8vA92927NbRTN5rIewHPBsZlSgL/UTTBBHYCz3f0zADP7PnBR/YLMbDowAehqZiXAze7+iLtX\nmdlVwMtAMvBoSxIMQDgfUmPMbGFDd83Fu0Q870Q8Z0jM807Ec4bonXfUkoy7fw6MbGab+fWWK4GH\nGthuchNlzAHmtDJMERGJoljfwiwiInFMSWb/PBjrAGIkEc87Ec8ZEvO8E/GcIUrnHbWOfxEREdVk\nREQkapRkWqmhgTnjjZn1NbO5ZrbCzJab2bXB9blm9g8z+yT4s0usY400M0s2s/fNbHZwucDM3gme\n81/NLC3WMUaamXU2s2fM7KPgNT8y3q+1mV0X/N1eZmbTzSwjHq91Q4MMN3ZtLeDe4HfbB2Y2an+O\nrSTTCgk0MGcV8GN3Pww4Avhh8DxvAF5z9wHAa8HleHMtsKLO8v8Ffhs856+BeBzl+x7gJXcfTODO\n0BXE8bU2s3zgGmB08Pm7ZOA84vNaT2PfQYYbu7anAAOCr6kERmNpNSWZ1mn1wJztibtvcPf3gv/e\nTuBLJ5/Auf45uNmfgdNjE2F0mFkfAg8IPxxcNuCbQO38RPF4zp2AY4BHANy9wt23EufXmsBjHJnB\nB7s7ABuIw2vdyCDDjV3bbwOPe8DbQGcz69XaYyvJtE6DA3PGKJY2ERxxoQh4B+jh7hsgkIiA7rGL\nLCp+B/wXUBNczgO2untVcDker3d/YDPwWLCZ8OHgSB1xe63dfR1wJ7CGQHIpAxYR/9e6VmPXNqLf\nb0oyrdPgwJxtHkUbMbOOwEzgR+6+LdbxRJOZTQI2ufuiuqsb2DTerncKMAr4o7sXATuJo6axhgT7\nIL4NFAC9gSwCTUX1xdu1bk5Ef9+VZFonYQbmNLNUAgnmSXd/Nrh6Y231OfhzU6zii4JxwGlmtppA\nM+g3CdRsOgebVCA+r3cJUOLu7wSXnyGQdOL5Wh8PrHL3zcHRRp4FjiL+r3Wtxq5tRL/flGRaZwEw\nIHgXShqBzsIXYxxTxAX7Ih4BVrj73XXeehH4fvDf3wdeaOvYosXdf+7ufdy9H4Hr+rq7nw/MBc4K\nbhZX5wzg7l8Ca81sUHDVccCHxPG1JtBMdoSZdQj+rteec1xf6zoau7YvAhcG7zI7AiirbVZrDT2M\n2Upm9i0Cf+HWDsx5e4xDijgzGw/8E1jKf/on/ptAv8xTwEEE/kc9290bmrm0XTOzCcBP3H2SmfUn\nULPJBd4HLnD3PbGML9LMrJDAzQ5pwOfAxQT+EI3ba21mtwLnEriT8n3gMgL9D3F1resOMgxsJDCt\nyvM0cG2DCff3BO5G2wVc7O4LW31sJRkREYkWNZeJiEjUKMmIiEjUKMmIiEjUKMmIiEjUKMmIiEjU\nKMmIRIiZ7Qj+7Gdm341w2f9db/lfkSxfJFqUZEQirx/QoiQTHNm7KXslGXc/qoUxicSEkoxI5P0G\nONrMFgfnK0k2s/9nZguC83P8AAIPewbn6/kLgQdeMbPnzWxRcI6TqcF1vyEwUvBiM3syuK621mTB\nspeZ2VIzO7dO2fPqzA/zZPAhO5E2ldL8JiLSQjcQHCkAIJgsyty92MzSgflm9kpw2zHAMHdfFVy+\nJPjUdSawwMxmuvsNZnaVuxc2cKwzgEIC8790De7zZvC9ImAogXGn5hMYl+2tyJ+uSONUkxGJvhMJ\njAW1mMCQPHkEJoQCeLdOggG4xsyWAG8TGKRwAE0bD0x392p33wi8ARTXKbvE3WuAxQSa8UTalGoy\nItFnwNXu/vJeKwNjo+2st3w8cKS77zKzeUBGGGU3pu54W9Xo/3eJAdVkRCJvO5BdZ/ll4IrgtAmY\n2cDghGD15QBfBxPMYAJTXteqrN2/njeBc4P9Pt0IzG75bkTOQiQC9JeNSOR9AFQFm72mAfcQaKp6\nL9j5vpmGp/R9CbjczD4APibQZFbrQeADM3svOPVAreeAI4ElBCaW+i93/zKYpERiTqMwi4hI1Ki5\nTEREokZJRkREokZJRkREokZJRkREokZJRkREokZJRkREokZJRkREokZJRkREoub/AyBm2qNNuZkD\nAAAAAElFTkSuQmCC\n",
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x181e102940>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plt.semilogy(log, label='Riemannian gradient descent')\n",
+    "plt.axhline(y=val, lw=1, ls='--', color='gray', label='TT-round(A)')\n",
+    "plt.xlabel('Iteration')\n",
+    "plt.ylabel('Value of the functional')\n",
+    "plt.legend()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Environment (conda_tf)",
+   "language": "python",
+   "name": "conda_tf"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/tt-basic.ipynb b/examples/tt-basic.ipynb
new file mode 100644
index 00000000..58c6c5c1
--- /dev/null
+++ b/examples/tt-basic.ipynb
@@ -0,0 +1,310 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import t3f\n",
+    "import tensorflow as tf\n",
+    "import numpy as np\n",
+    "\n",
+    "sess = tf.InteractiveSession()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this example we will show how to convert existing tensors into Tensor Train format. Let us consider the tensor obtained by evaluating $\\sin(x)$ on a uniform grid of size $2 ^ d$ and reshaping the obtained array into tensor of size $2 \\times 2 \\times 2 \\cdots \\times 2$. This is an example of a tensor in the so-called Quantized Tensor Train format (QTT)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "d = 10\n",
+    "x = np.linspace(0, 2 * np.pi, 2 ** 10)\n",
+    "y = np.sin(x)\n",
+    "y = np.reshape(y, d * [2])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(2, 2, 2, 2, 2, 2, 2, 2, 2, 2)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print (y.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now lets construct a TT tensor out of `y` using `t3f.to_tt_tensor`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "A Tensor Train of shape (2, 2, 2, 2, 2, 2, 2, 2, 2, 2), TT-ranks: (1, 2, 4, 8, 16, 32, 16, 8, 4, 2, 1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "y_tt = t3f.to_tt_tensor(y, max_tt_rank=32)\n",
+    "print (y_tt)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let us verify that this indeed an approximation of the original tensor `y`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "relative error is 2.8373670049013647e-15\n"
+     ]
+    }
+   ],
+   "source": [
+    "y_tt_full = sess.run(t3f.full(y_tt))\n",
+    "print (\"relative error is {}\".format(np.linalg.norm(y_tt_full - y) / np.linalg.norm(y)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "It is known that the exact TT-rank of the obtained tensor is equal to $2$. Let us round `y_tt` using `t3f.round` by setting the maximal value of the TT-rank to $2$ and verify it numerically."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "relative error is 1.4717721230311198e-15\n"
+     ]
+    }
+   ],
+   "source": [
+    "y_tt_round = t3f.round(y_tt, max_tt_rank=2)\n",
+    "\n",
+    "y_tt_round_full = sess.run(t3f.full(y_tt_round))\n",
+    "print (\"relative error is {}\".format(np.linalg.norm(y_tt_round_full - y) / np.linalg.norm(y)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This fact allows us to significantly compress the tensor `y`. The total number of degrees of freedom in `y_tt` after rounding is :"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "72\n"
+     ]
+    }
+   ],
+   "source": [
+    "dof = sum(np.prod(tt_core.shape.as_list()) for tt_core in y_tt_round.tt_cores)\n",
+    "print (dof)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "which is much smaller than the original $2^d$ and is in fact proportional to $d$."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Exercise**: perform the same analysis for $e^x$."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "We can also perform various operations with tensors in the TT format."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Element-wise product\n",
+    "y_tt_sq = y_tt_round * y_tt_round\n",
+    "# Element-wise sum\n",
+    "y_tt_sum = y_tt_round + y_tt_round"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let us check that we indeed get the desired results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "y_tt_sq_full = sess.run(t3f.full(y_tt_sq))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "5.57568393209e-14\n"
+     ]
+    }
+   ],
+   "source": [
+    "print (np.linalg.norm(y ** 2 - y_tt_sq_full))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "y_tt_sum_full = sess.run(t3f.full(y_tt_sum))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "6.65826459441e-14\n"
+     ]
+    }
+   ],
+   "source": [
+    "print (np.linalg.norm(2 * y  - y_tt_sum_full))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "not enough arguments for format string",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-6-3739418200fe>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\"ololo %a %d\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m: not enough arguments for format string"
+     ]
+    }
+   ],
+   "source": [
+    "print (\"ololo %a %d\" % 1, 3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Environment (conda_tf)",
+   "language": "python",
+   "name": "conda_tf"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/utils/__init__.py b/examples/utils/__init__.py
new file mode 100644
index 00000000..dfa90ed7
--- /dev/null
+++ b/examples/utils/__init__.py
@@ -0,0 +1 @@
+from .tt_dense import TTDense
diff --git a/examples/utils/tt_dense.py b/examples/utils/tt_dense.py
new file mode 100644
index 00000000..de916eee
--- /dev/null
+++ b/examples/utils/tt_dense.py
@@ -0,0 +1,87 @@
+from keras.engine.topology import Layer
+from keras.layers import Activation
+import t3f
+import tensorflow as tf
+import numpy as np
+
+activations = ['relu', 'sigmoid', 'tanh', 'softmax']
+inits = ['glorot', 'he', 'lecun']
+
+
+class TTDense(Layer):
+    counter = 0
+
+    def __init__(self, row_dims, column_dims, tt_rank=2, init='glorot',
+                 activation='relu', bias=True, bias_init=0.1, **kwargs):
+        """Creates a TT-Matrix based Dense layer.
+
+        Args:
+            row_dims: an array, shape of the matrix row index
+            column_dims: an array, shape of the matrix column index
+            tt_rank: a number or an array, desired tt-rank of the TT-Matrix
+            init: string specifying initializer for the TT-Matrix. Possible
+                values are 'glorot', 'he', 'lecun'.
+            activation: string, specifies the activation function. Possible
+                values are 'relu', 'sigmoid', 'tanh', 'softmax' and None
+            bias: bool, whether to use bias
+            bias_init: a number, initialization value of the bias
+
+        Returns:
+            Layer object corresponding to multiplication by a TT-Matrix
+                followed by addition of a bias and applying
+                an elementwise activation
+
+        Raises:
+            ValueError if the provided activation or init is unknown
+        """
+        self.tt_shape = [row_dims, column_dims]
+        self.output_dim = np.prod(column_dims)
+        self.tt_rank = tt_rank
+        self.activation = activation
+        self.bias = bias
+        self.bias_init = bias_init
+        self.init = init
+        super(TTDense, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        if self.init == 'glorot':
+            initializer = t3f.glorot_initializer(self.tt_shape,
+                                                 tt_rank=self.tt_rank)
+        elif self.init == 'he':
+            initializer = t3f.he_initializer(self.tt_shape,
+                                             tt_rank=self.tt_rank)
+        elif self.init == 'lecun':
+            initializer = t3f.lecun_initializer(self.tt_shape,
+                                                tt_rank=self.tt_rank)
+        else:
+            raise ValueError('Unknown init "%s", only %s are supported'
+                             % (self.init, inits))
+        name = 'tt_dense_matrix_{}'.format(TTDense.counter)
+        self.W = t3f.get_variable(name, initializer=initializer)
+        self.b = None
+        if self.bias:
+            b_name = 'tt_dense_b_{}'.format(TTDense.counter)
+            b_init = tf.constant_initializer(self.bias_init)
+            self.b = tf.get_variable(b_name, shape=self.output_dim,
+                                     initializer=b_init)
+        TTDense.counter += 1
+        self.trainable_weights = list(self.W.tt_cores)
+        if self.b is not None:
+            self.trainable_weights.append(self.b)
+
+    def call(self, x):
+        if self.bias:
+            h = t3f.matmul(x, self.W) + self.b
+        else:
+            h = t3f.matmul(x, self.W)
+        if self.activation is not None:
+            if self.activation in activations:
+                h = Activation(self.activation)(h)
+            else:
+                raise ValueError('Unknown activation "%s", only %s and None '
+                                 'are supported'
+                                 % (self.activation, activations))
+        return h
+
+    def compute_output_shape(self, input_shape):
+        return (input_shape[0], self.output_dim)
diff --git a/setup.py b/setup.py
index 09ee29e9..fcdfdb76 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup
 
 setup(name='t3f',
-      version='0.2.0',
+      version='1.0.0',
       description='Tensor Train decomposition on TensorFlow',
       url='https://github.com/Bihaqo/t3f',
       author='Alexander Novikov',
@@ -9,7 +9,6 @@
       license='MIT',
       packages=['t3f'],
       install_requires=[
-            'tensorflow>=1,<2',
             'numpy',
       ],
       zip_safe=False)
diff --git a/t3f/__init__.py b/t3f/__init__.py
index 85814ca2..60abb533 100644
--- a/t3f/__init__.py
+++ b/t3f/__init__.py
@@ -1,11 +1,81 @@
-from tensor_train_base import TensorTrainBase
-from tensor_train import TensorTrain
-from tensor_train_batch import TensorTrainBatch
-from variables import *
-from ops import *
-from batch_ops import *
-from initializers import *
-from regularizers import *
-from riemannian import *
-from shapes import *
-from decompositions import *
\ No newline at end of file
+from t3f.tensor_train_base import TensorTrainBase
+from t3f.tensor_train import TensorTrain
+from t3f.tensor_train_batch import TensorTrainBatch
+
+from t3f.variables import assign
+from t3f.variables import get_variable
+
+from t3f.ops import add
+from t3f.ops import cast
+from t3f.ops import flat_inner
+from t3f.ops import frobenius_norm
+from t3f.ops import frobenius_norm_squared
+from t3f.ops import full
+from t3f.ops import matmul
+from t3f.ops import multiply
+from t3f.ops import quadratic_form
+from t3f.ops import transpose
+from t3f.ops import gather_nd
+from t3f.ops import renormalize_tt_cores
+
+from t3f.batch_ops import concat_along_batch_dim
+from t3f.batch_ops import gram_matrix
+from t3f.batch_ops import multiply_along_batch_dim
+from t3f.batch_ops import pairwise_flat_inner
+
+from t3f.initializers import matrix_with_random_cores
+from t3f.initializers import matrix_batch_with_random_cores
+from t3f.initializers import tensor_with_random_cores
+from t3f.initializers import tensor_batch_with_random_cores
+from t3f.initializers import random_tensor
+from t3f.initializers import random_tensor_batch
+from t3f.initializers import random_matrix
+from t3f.initializers import random_matrix_batch
+from t3f.initializers import tensor_ones
+from t3f.initializers import tensor_zeros
+from t3f.initializers import matrix_ones
+from t3f.initializers import matrix_zeros
+from t3f.initializers import eye
+from t3f.initializers import ones_like
+from t3f.initializers import zeros_like
+from t3f.initializers import glorot_initializer
+from t3f.initializers import he_initializer
+from t3f.initializers import lecun_initializer
+
+from t3f.regularizers import cores_regularizer
+from t3f.regularizers import l2_regularizer
+
+from t3f.riemannian import add_n_projected
+from t3f.riemannian import pairwise_flat_inner_projected
+from t3f.riemannian import project
+from t3f.riemannian import project_matmul
+from t3f.riemannian import project_sum
+
+from t3f.shapes import batch_size
+from t3f.shapes import clean_raw_shape
+from t3f.shapes import expand_batch_dim
+from t3f.shapes import is_batch_broadcasting_possible
+from t3f.shapes import lazy_batch_size
+from t3f.shapes import lazy_raw_shape
+from t3f.shapes import lazy_shape
+from t3f.shapes import lazy_tt_ranks
+from t3f.shapes import raw_shape
+from t3f.shapes import shape
+from t3f.shapes import squeeze_batch_dim
+from t3f.shapes import tt_ranks
+
+from t3f.decompositions import orthogonalize_tt_cores
+from t3f.decompositions import round
+from t3f.decompositions import to_tt_matrix
+from t3f.decompositions import to_tt_tensor
+
+import t3f.approximate as approximate
+import t3f.kronecker as kronecker
+import t3f.utils as utils
+
+_directly_imported = ['tensor_train_base', 'tensor_train', 'tensor_train_batch',
+                      'variables', 'ops', 'batch_ops', 'initializers',
+                      'regularizers', 'riemannian', 'shapes', 'decompositions']
+
+__all__ = [s for s in dir() if
+           s not in _directly_imported and not s.startswith('_')]
diff --git a/t3f/approximate.py b/t3f/approximate.py
new file mode 100644
index 00000000..4cb8f495
--- /dev/null
+++ b/t3f/approximate.py
@@ -0,0 +1,167 @@
+import numpy as np
+import tensorflow as tf
+from t3f.tensor_train_batch import TensorTrainBatch
+from t3f import decompositions
+from t3f import batch_ops
+
+
+def add_n(tt_objects, max_tt_rank):
+  """Adds a bunch of TT-object and round after each summation.
+
+  This version implements a slow-to-compile but fast-to-execute (at least on
+  a GPU) version: summing in a binary tree order.
+  I.e. it uses the following idea:
+    round(a + b + c + d) ~= round(round(a + b) + round(c + d))
+  and so is able to compute the answer in log(N) parallel adds/rounds.
+
+  Args:
+    tt_objects: a list of `TensorTrainBase` objects.
+    max_tt_rank: a number, TT-rank for each individual rounding.
+
+  Returns:
+    Object of the same type as each input.
+  
+  See Also:
+    t3f.approximate.reduce_sum_batch
+  """
+  prev_level = tt_objects
+  while len(prev_level) > 1:
+    next_level = []
+    for i in range(0, len(prev_level), 2):
+      curr = prev_level[i]
+      if i + 1 < len(prev_level):
+        curr = decompositions.round(curr + prev_level[i + 1], max_tt_rank)
+      next_level.append(curr)
+    prev_level = next_level
+  return prev_level[0]
+
+
+def reduce_sum_batch(tt_batch, max_tt_rank, coef=None):
+  """Sum of all TT-objects in the batch with rounding after each summation.
+  
+  This version implements a slow-to-compile but fast-to-execute (at least on
+  a GPU) version: summing in a binary tree order.
+  I.e. it uses the following idea:
+    round(a + b + c + d) ~= round(round(a + b) + round(c + d))
+  and so is able to compute the answer in log(batch_size) parallel adds/rounds.
+
+  Args:
+    tt_batch: `TensorTrainBatch` object.
+    max_tt_rank: a number, TT-rank for each individual rounding.
+    coef: tf.Tensor, its shape is either batch_size, or batch_size x N.
+      If coef is a vecotor of size batch_size, the result will
+        be (approximate) weighted sum.
+      If coef is a matrix of shape batch_size x N, the result will be
+        a `TensorTrainBatch` res containing N TT-object such that
+          res[j] ~= sum_i tt_batch[i] coef[i, j]
+
+  Returns:
+    If coefficients are absent or is a vector of numbers, returns
+      a `TensorTrain` object representing (approximate) element-wise sum of all
+      the objects in the batch, weighted if coef is provided.
+    If coefficients is a matrix, returns `TensorTrainBatch`.
+  
+  See Also:
+    t3f.approximate.add_n
+  """
+  ndims = tt_batch.ndims()
+  left_tt_rank_dim = tt_batch.left_tt_rank_dim
+  right_tt_rank_dim = tt_batch.right_tt_rank_dim
+  shape = tt_batch.get_raw_shape()
+  dtype = tt_batch.dtype
+
+  is_batch_output = False
+  if coef is not None:
+    coef = tf.convert_to_tensor(coef)
+    if len(coef.get_shape()) == 1:
+      tt_batch = batch_ops.multiply_along_batch_dim(tt_batch, coef)
+    elif len(coef.get_shape()) == 2:
+      is_batch_output = True
+      output_size = coef.get_shape().as_list()[1]
+      # Coef is of size batch_size x N, need to duplicate the batch
+      # dimension xN.
+      if coef.shape[0] != tt_batch.batch_size:
+        raise ValueError('If coef is a matrix, it should be of shape '
+                         'batch_size x N, got %d x %d instead '
+                         '(batch size is %d).' % (coef.shape[0], coef.shape[1],
+                                                  tt_batch.batch_size))
+      tt_batch_cores = []
+      for core_idx in range(ndims):
+        curr_core = tt_batch.tt_cores[core_idx]
+        curr_shape = curr_core.get_shape().as_list()
+        new_shape = np.insert(curr_shape, 1, 1)
+        tiling = np.ones(len(new_shape))
+        tiling[1] = output_size
+        curr_core = tf.tile(tf.reshape(curr_core, new_shape), tiling)
+        if core_idx == 0:
+          # Multiply the first TT-core by the provided coefficients.
+          # TODO: add t3f.utils.expands_dims_like(coef, curr_core)
+          shaped_coef = coef
+          for _ in range(len(curr_core.get_shape()) - len(coef.shape)):
+            shaped_coef = tf.expand_dims(shaped_coef, -1)
+          curr_core = curr_core * shaped_coef
+        # Merge the first two dimensions back into one.
+        raveled_shape = np.array(curr_shape).copy()
+        raveled_shape[0] *= output_size
+        curr_core = tf.reshape(curr_core, raveled_shape)
+        tt_batch_cores.append(curr_core)
+      tt_batch = TensorTrainBatch(tt_batch_cores, shape,
+                                  tt_batch.get_tt_ranks())
+
+    else:
+      raise ValueError('Coef cannot be more than 2-d.')
+
+  if not is_batch_output:
+    output_size = 1
+
+  prev_level = tt_batch
+  while prev_level.batch_size > output_size:
+    current_level_cores = []
+    for core_idx in range(ndims):
+      curr_orig_core = prev_level.tt_cores[core_idx]
+      if is_batch_output:
+        # Split the first dimension into batch_size x N
+        unraveled_shape = curr_orig_core.get_shape().as_list()
+        unraveled_shape = np.array(unraveled_shape).copy()
+        unraveled_shape[0] /= output_size
+        unraveled_shape = np.insert(unraveled_shape, 1, output_size)
+        curr_orig_core = tf.reshape(curr_orig_core, unraveled_shape)
+
+      a_core = curr_orig_core[::2]
+      b_core = curr_orig_core[1::2]
+
+      if a_core.get_shape()[0] > b_core.get_shape()[0]:
+        # Odd number of elements in the batch, will have to add dummy
+        # TT-object with the tt-cores filled with zeros.
+        zeros_shape = b_core.get_shape().as_list()
+        zeros_shape[0] = 1
+        zeros = tf.zeros(zeros_shape, dtype)
+        b_core = tf.concat((b_core, zeros), axis=0)
+
+      if is_batch_output:
+        # Merge the first two dimensions back into one.
+        a_core_shape = a_core.get_shape().as_list()
+        a_core_shape[0] = a_core_shape[0] * a_core_shape[1]
+        a_core_shape = np.delete(a_core_shape, 1)
+        a_core = tf.reshape(a_core, a_core_shape)
+        b_core_shape = b_core.get_shape().as_list()
+        b_core_shape[0] = b_core_shape[0] * b_core_shape[1]
+        b_core_shape = np.delete(b_core_shape, 1)
+        b_core = tf.reshape(b_core, b_core_shape)
+
+      if core_idx == 0:
+        curr_sum_core = tf.concat((a_core, b_core), axis=right_tt_rank_dim)
+      elif core_idx == ndims - 1:
+        curr_sum_core = tf.concat((a_core, b_core), axis=left_tt_rank_dim)
+      else:
+        zeros = tf.zeros(b_core.get_shape(), dtype)
+        upper = tf.concat((a_core, zeros), axis=right_tt_rank_dim)
+        lower = tf.concat((zeros, b_core), axis=right_tt_rank_dim)
+        curr_sum_core = tf.concat((upper, lower), axis=left_tt_rank_dim)
+      current_level_cores.append(curr_sum_core)
+    current_level = TensorTrainBatch(current_level_cores, shape)
+    prev_level = decompositions.round(current_level, max_tt_rank)
+  if is_batch_output:
+    return prev_level
+  else:
+    return prev_level[0]
diff --git a/t3f/approximate_test.py b/t3f/approximate_test.py
new file mode 100644
index 00000000..52a99320
--- /dev/null
+++ b/t3f/approximate_test.py
@@ -0,0 +1,101 @@
+import numpy as np
+import tensorflow as tf
+
+from t3f import ops
+from t3f import approximate
+from t3f import initializers
+
+
+class ApproximateTest(tf.test.TestCase):
+
+  def testAddN(self):
+    # Sum a bunch of TT-matrices.
+    tt_a = initializers.random_matrix(((2, 1, 4), (2, 2, 2)), tt_rank=2)
+    tt_b = initializers.random_matrix(((2, 1, 4), (2, 2, 2)),
+                                            tt_rank=[1, 2, 4, 1])
+
+    def desired(tt_objects):
+      res = tt_objects[0]
+      for tt in tt_objects[1:]:
+        res += tt
+      return res
+
+    with self.test_session() as sess:
+      res_actual = ops.full(approximate.add_n([tt_a, tt_b], 6))
+      res_desired = ops.full(desired([tt_a, tt_b]))
+      res_desired_val, res_actual_val = sess.run([res_desired, res_actual])
+      self.assertAllClose(res_desired_val, res_actual_val, atol=1e-5, rtol=1e-5)
+
+      res_actual = ops.full(approximate.add_n([tt_a, tt_b, tt_a], 8))
+      res_desired = ops.full(desired([tt_a, tt_b, tt_a]))
+      res_desired_val, res_actual_val = sess.run([res_desired, res_actual])
+      self.assertAllClose(res_desired_val, res_actual_val, atol=1e-5, rtol=1e-5)
+
+      res_actual = ops.full(approximate.add_n([tt_a, tt_b, tt_a, tt_a, tt_a], 12))
+      res_desired = ops.full(desired([tt_a, tt_b, tt_a, tt_a, tt_a]))
+      res_desired_val, res_actual_val = sess.run([res_desired, res_actual])
+      self.assertAllClose(res_desired_val, res_actual_val, atol=1e-5, rtol=1e-5)
+
+  def testReduceSumBatch(self):
+    # Sum a batch of TT-tensors.
+
+    def desired(tt_batch):
+      res = tt_batch[0]
+      for i in range(1, tt_batch.batch_size):
+        res += tt_batch[i]
+      return res
+    for batch_size in [2, 3, 4, 5]:
+      with self.test_session() as sess:
+        tt_batch = initializers.random_tensor_batch((4, 3, 5),
+                                                    tt_rank=2,
+                                                    batch_size=batch_size)
+        res_actual = ops.full(approximate.reduce_sum_batch(tt_batch, 10))
+        res_desired = ops.full(desired(tt_batch))
+        res_desired_val, res_actual_val = sess.run([res_desired, res_actual])
+        self.assertAllClose(res_desired_val, res_actual_val, atol=1e-5, rtol=1e-5)
+
+  def testReduceSumBatchWeighted(self):
+    # Weighted sum of a batch of TT-tensors.
+
+    def desired(tt_batch, coef):
+      res = coef[0] * tt_batch[0]
+      for i in range(1, tt_batch.batch_size):
+        res += coef[i] * tt_batch[i]
+      return res
+    with self.test_session() as sess:
+      tt_batch = initializers.random_tensor_batch((4, 3, 5),
+                                                  tt_rank=3,
+                                                  batch_size=3)
+      res_actual = ops.full(approximate.reduce_sum_batch(tt_batch, 9,
+                                                         [1.2, -0.2, 1]))
+      res_desired = ops.full(desired(tt_batch, [1.2, -0.2, 1]))
+      res_desired_val, res_actual_val = sess.run([res_desired, res_actual])
+      self.assertAllClose(res_desired_val, res_actual_val, atol=1e-5, rtol=1e-5)
+
+  def testReduceSumBatchMultipleWeighted(self):
+    # Multiple weighted sums of a batch of TT-tensors.
+
+    def desired(tt_batch, coef):
+      res = coef[0] * tt_batch[0]
+      for i in range(1, tt_batch.batch_size):
+        res += coef[i] * tt_batch[i]
+      return res
+    with self.test_session() as sess:
+      tt_batch = initializers.random_tensor_batch((4, 3, 5),
+                                                  tt_rank=2,
+                                                  batch_size=3)
+      coef = [[1., 0.1],
+              [0.9, -0.2],
+              [0.3, 0.3]]
+      coef = np.array(coef).astype(np.float32)
+      res_actual = ops.full(approximate.reduce_sum_batch(tt_batch, 6,
+                                                         coef))
+      res_desired_1 = ops.full(desired(tt_batch, coef[:, 0]))
+      res_desired_2 = ops.full(desired(tt_batch, coef[:, 1]))
+      res_desired = tf.stack((res_desired_1, res_desired_2))
+      res_desired_val, res_actual_val = sess.run([res_desired, res_actual])
+      self.assertAllClose(res_desired_val, res_actual_val, atol=1e-5, rtol=1e-5)
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/t3f/batch_ops.py b/t3f/batch_ops.py
index bf234cd3..bd89bb36 100644
--- a/t3f/batch_ops.py
+++ b/t3f/batch_ops.py
@@ -1,8 +1,8 @@
 import tensorflow as tf
 
-from tensor_train_base import TensorTrainBase
-from tensor_train_batch import TensorTrainBatch
-import ops
+from t3f.tensor_train_base import TensorTrainBase
+from t3f.tensor_train_batch import TensorTrainBatch
+from t3f import ops
 
 
 def concat_along_batch_dim(tt_list):
@@ -39,21 +39,50 @@ def concat_along_batch_dim(tt_list):
     curr_core = tf.concat([tt.tt_cores[core_idx] for tt in tt_list], axis=0)
     res_cores.append(curr_core)
 
-  batch_size = sum([tt.batch_size for tt in tt_list])
+  try:
+    batch_size = sum([tt.batch_size for tt in tt_list])
+  except TypeError:
+    # The batch sizes are not defined and you can't sum Nones.
+    batch_size = None
 
   return TensorTrainBatch(res_cores, tt_list[0].get_raw_shape(),
                           tt_list[0].get_tt_ranks(), batch_size)
 
 
+def multiply_along_batch_dim(batch_tt, weights):
+  """Multiply each TensorTrain in a batch by a number.
+
+  Args:
+    batch_tt: TensorTrainBatch object, TT-matrices or TT-tensors.
+    weights: 1-D tf.Tensor (or something convertible to it like np.array) of size
+     tt.batch_size with weights.
+
+  Returns:
+    TensorTrainBatch
+  """
+  weights = tf.convert_to_tensor(weights)
+  tt_cores = list(batch_tt.tt_cores)
+  if batch_tt.is_tt_matrix():
+    weights = weights[:, tf.newaxis, tf.newaxis, tf.newaxis, tf.newaxis]
+  else:
+    weights = weights[:, tf.newaxis, tf.newaxis, tf.newaxis]
+  tt_cores[0] = weights * tt_cores[0]
+  out_shape = batch_tt.get_raw_shape()
+  out_ranks = batch_tt.get_tt_ranks()
+  out_batch_size = batch_tt.batch_size
+  return TensorTrainBatch(tt_cores, out_shape, out_ranks, out_batch_size)
+
+
 def gram_matrix(tt_vectors, matrix=None):
-  """Computes Gramian matrix of a batch of TT-vecors.
+  """Computes Gramian matrix of a batch of TT-vectors.
 
   If matrix is None, computes
     res[i, j] = t3f.flat_inner(tt_vectors[i], tt_vectors[j]).
   If matrix is present, computes
       res[i, j] = t3f.flat_inner(tt_vectors[i], t3f.matmul(matrix, tt_vectors[j]))
-    or more shorly
+    or more shortly
       res[i, j] = tt_vectors[i]^T * matrix * tt_vectors[j]
+    but is more efficient.
 
   Args:
     tt_vectors: TensorTrainBatch.
@@ -61,37 +90,129 @@ def gram_matrix(tt_vectors, matrix=None):
 
   Returns:
     tf.tensor with the Gram matrix.
+      
+  Complexity:
+    If the matrix is not present, the complexity is O(batch_size^2 d r^3 n)
+      where d is the number of
+      TT-cores (tt_vectors.ndims()), r is the largest TT-rank
+        max(tt_vectors.get_tt_rank())
+      and n is the size of the axis dimension, e.g.
+        for a tensor of size 4 x 4 x 4, n is 4;
+        for a 9 x 64 matrix of raw shape (3, 3, 3) x (4, 4, 4) n is 12
+    If the matrix of TT-rank R is present, the complexity is
+        O(batch_size^2 d R r^2 n (r + nR))
+      where the matrix is of raw-shape (n, n, ..., n) x (n, n, ..., n);
+      r is the TT-rank of vectors tt_vectors;
+      R is the TT-rank of the matrix.
   """
-  ndims = tt_vectors.ndims()
+  return pairwise_flat_inner(tt_vectors, tt_vectors, matrix)
+
+
+def pairwise_flat_inner(tt_1, tt_2, matrix=None):
+  """Computes all scalar products between two batches of TT-objects.
+
+  If matrix is None, computes
+    res[i, j] = t3f.flat_inner(tt_1[i], tt_2[j]).
+
+  If matrix is present, computes
+      res[i, j] = t3f.flat_inner(tt_1[i], t3f.matmul(matrix, tt_2[j]))
+    or more shortly
+      res[i, j] = tt_1[i]^T * matrix * tt_2[j]
+    but is more efficient.
+
+  Args:
+    tt_1: TensorTrainBatch.
+    tt_2: TensorTrainBatch.
+    matrix: None, or TensorTrain matrix.
+
+  Returns:
+    tf.tensor with the matrix of pairwise scalar products (flat inners).
+      
+  Complexity:
+    If the matrix is not present, the complexity is O(batch_size^2 d r^3 n)
+      where d is the number of
+      TT-cores (tt_vectors.ndims()), r is the largest TT-rank
+        max(tt_vectors.get_tt_rank())
+      and n is the size of the axis dimension, e.g.
+        for a tensor of size 4 x 4 x 4, n is 4;
+        for a 9 x 64 matrix of raw shape (3, 3, 3) x (4, 4, 4) n is 12
+      A more precise complexity is
+        O(batch_size^2 d r1 r2 n max(r1, r2))
+      where r1 is the largest TT-rank of tt_a
+      and r2 is the largest TT-rank of tt_b.
+    If the matrix is present, the complexity is
+        O(batch_size^2 d R r1 r2 (n r1 + n m R + m r2))
+      where
+      the matrix is of raw-shape (n, n, ..., n) x (m, m, ..., m) and TT-rank R;
+      tt_1 is of shape (n, n, ..., n) and is of the TT-rank r1;
+      tt_2 is of shape (m, m, ..., m) and is of the TT-rank r2;
+  """
+  ndims = tt_1.ndims()
   if matrix is None:
-    curr_core = tt_vectors.tt_cores[0]
-    res = tf.einsum('paijb,qcijd->pqbd', curr_core, curr_core)
+    curr_core_1 = tt_1.tt_cores[0]
+    curr_core_2 = tt_2.tt_cores[0]
+    mode_string = 'ij' if tt_1.is_tt_matrix() else 'i'
+    einsum_str = 'pa{0}b,qc{0}d->pqbd'.format(mode_string)
+    res = tf.einsum(einsum_str, curr_core_1, curr_core_2)
     for core_idx in range(1, ndims):
-      curr_core = tt_vectors.tt_cores[core_idx]
-      res = tf.einsum('pqac,paijb,qcijd->pqbd', res, curr_core, curr_core)
+      curr_core_1 = tt_1.tt_cores[core_idx]
+      curr_core_2 = tt_2.tt_cores[core_idx]
+      einsum_str = 'pqac,pa{0}b,qc{0}d->pqbd'.format(mode_string)
+      res = tf.einsum(einsum_str, res, curr_core_1, curr_core_2)
   else:
-    # res[i, j] = tt_vectors[i] ^ T * matrix * tt_vectors[j]
-    vectors_shape = tt_vectors.get_shape()
-    if vectors_shape[2] == 1 and vectors_shape[1] != 1:
+    # res[i, j] = tt_1[i] ^ T * matrix * tt_2[j]
+    if not tt_1.is_tt_matrix() or not tt_2.is_tt_matrix() or not matrix.is_tt_matrix():
+      raise ValueError('When passing three arguments to pairwise_flat_inner, '
+                       'the first 2 of them should be TT-vecors and the last '
+                       'should be a TT-matrix. Got %s, %s, and %s instead.' %
+                       (tt_1, tt_2, matrix))
+    matrix_shape = matrix.get_raw_shape()
+    if not tt_1.get_raw_shape()[0].is_compatible_with(matrix_shape[0]):
+      raise ValueError('The shape of the first argument should be compatible '
+                       'with the shape of the TT-matrix, that is it should be '
+                       'possible to do the following matmul: '
+                       'transpose(tt_1) * matrix. Got the first argument '
+                       '"%s" and matrix "%s"' % (tt_1, matrix))
+    if not tt_2.get_raw_shape()[0].is_compatible_with(matrix_shape[1]):
+      raise ValueError('The shape of the second argument should be compatible '
+                       'with the shape of the TT-matrix, that is it should be '
+                       'possible to do the following matmul: '
+                       'matrix * tt_2. Got the second argument '
+                       '"%s" and matrix "%s"' % (tt_2, matrix))
+
+    vectors_1_shape = tt_1.get_shape()
+    if vectors_1_shape[2] == 1 and vectors_1_shape[1] != 1:
+      # TODO: not very efficient, better to use different order in einsum.
+      tt_1 = ops.transpose(tt_1)
+    vectors_1_shape = tt_1.get_shape()
+    vectors_2_shape = tt_2.get_shape()
+    if vectors_2_shape[2] == 1 and vectors_2_shape[1] != 1:
       # TODO: not very efficient, better to use different order in einsum.
-      tt_vectors = ops.transpose(tt_vectors)
-    vectors_shape = tt_vectors.get_shape()
-    if vectors_shape[1] != 1:
+      tt_2 = ops.transpose(tt_2)
+    vectors_2_shape = tt_2.get_shape()
+    if vectors_1_shape[1] != 1:
+      # TODO: do something so that in case the shape is undefined on compilation
+      # it still works.
+      raise ValueError('The tt_vectors_1 argument should be vectors (not '
+                       'matrices) with shape defined on compilation.')
+    if vectors_2_shape[1] != 1:
       # TODO: do something so that in case the shape is undefined on compilation
       # it still works.
-      raise ValueError('The tt_vectors argument should be vectors (not '
+      raise ValueError('The tt_vectors_2 argument should be vectors (not '
                        'matrices) with shape defined on compilation.')
-    curr_core = tt_vectors.tt_cores[0]
+    curr_core_1 = tt_1.tt_cores[0]
+    curr_core_2 = tt_2.tt_cores[0]
     curr_matrix_core = matrix.tt_cores[0]
     # We enumerate the dummy dimension (that takes 1 value) with `k`.
-    res = tf.einsum('pakib,cijd,qekjf->pqbdf', curr_core, curr_matrix_core,
-                    curr_core)
+    res = tf.einsum('pakib,cijd,qekjf->pqbdf', curr_core_1, curr_matrix_core,
+                    curr_core_2)
     for core_idx in range(1, ndims):
-      curr_core = tt_vectors.tt_cores[core_idx]
+      curr_core_1 = tt_1.tt_cores[core_idx]
+      curr_core_2 = tt_2.tt_cores[core_idx]
       curr_matrix_core = matrix.tt_cores[core_idx]
-      res = tf.einsum('pqace,pakib,cijd,qekjf->pqbdf', res, curr_core,
-                      curr_matrix_core, curr_core)
+      res = tf.einsum('pqace,pakib,cijd,qekjf->pqbdf', res, curr_core_1,
+                      curr_matrix_core, curr_core_2)
 
   # Squeeze to make the result of size batch_size x batch_size instead of
   # batch_size x batch_size x 1 x 1.
-  return tf.squeeze(res)
\ No newline at end of file
+  return tf.squeeze(res)
diff --git a/t3f/batch_ops_test.py b/t3f/batch_ops_test.py
index b2e9a3a6..4c8cfd17 100644
--- a/t3f/batch_ops_test.py
+++ b/t3f/batch_ops_test.py
@@ -1,11 +1,9 @@
 import numpy as np
 import tensorflow as tf
 
-from tensor_train import TensorTrain
-from tensor_train_batch import TensorTrainBatch
-import ops
-import batch_ops
-import initializers
+from t3f import ops
+from t3f import batch_ops
+from t3f import initializers
 
 
 class BatchOpsTest(tf.test.TestCase):
@@ -44,6 +42,39 @@ def testConcatMatrix(self):
       self.assertAllClose(first_second_res_val, first_second_desired_val)
       self.assertAllClose(first_second_third_res_val, first_second_third_desired_val)
 
+  def testConcatTensorPlaceholders(self):
+    # Test concating TTTensors of unknown batch sizes along batch dimension.
+    number_of_objects = tf.placeholder(tf.int32)
+    all = initializers.random_tensor_batch((2, 3), batch_size=5)
+    actual = batch_ops.concat_along_batch_dim((all[:number_of_objects],
+                                              all[number_of_objects:]))
+    with self.test_session() as sess:
+      desired_val, actual_val = sess.run((ops.full(all), ops.full(actual)),
+                                         feed_dict={number_of_objects: 2})
+      self.assertAllClose(desired_val, actual_val)
+
+  def testConcatMatrixPlaceholders(self):
+    # Test concating TTMatrices of unknown batch sizes along batch dimension.
+    number_of_objects = tf.placeholder(tf.int32)
+    all = initializers.random_matrix_batch(((2, 3), (2, 3)), batch_size=5)
+    actual = batch_ops.concat_along_batch_dim((all[:number_of_objects],
+                                              all[number_of_objects:]))
+    with self.test_session() as sess:
+      desired_val, actual_val = sess.run((ops.full(all), ops.full(actual)),
+                                         feed_dict={number_of_objects: 2})
+      self.assertAllClose(desired_val, actual_val)
+
+  def testBatchMultiply(self):
+    # Test multiplying batch of TTMatrices by individual numbers.
+    tt = initializers.random_matrix_batch(((2, 3), (3, 3)), batch_size=3)
+    weights = [0.1, 0, -10]
+    actual = batch_ops.multiply_along_batch_dim(tt, weights)
+    individual_desired = [weights[i] * tt[i:i+1] for i in range(3)]
+    desired = batch_ops.concat_along_batch_dim(individual_desired)
+    with self.test_session() as sess:
+      desired_val, acutual_val = sess.run((ops.full(desired), ops.full(actual)))
+      self.assertAllClose(desired_val, acutual_val)
+
   def testGramMatrix(self):
     # Test Gram Matrix of a batch of TT vectors.
     tt_vectors = initializers.random_matrix_batch(((2, 3), None), batch_size=5)
@@ -59,7 +90,7 @@ def testGramMatrixWithMatrix(self):
     # Test Gram Matrix of a batch of TT vectors with providing a matrix, so we
     # should compute
     # res[i, j] = tt_vectors[i] ^ T * matrix * tt_vectors[j]
-    tt_vectors = initializers.random_matrix_batch((None, (2, 3)), batch_size=4)
+    tt_vectors = initializers.random_matrix_batch(((2, 3), None), batch_size=4)
     matrix = initializers.random_matrix(((2, 3), (2, 3)))
     res_actual = batch_ops.gram_matrix(tt_vectors, matrix)
     full_vectors = tf.reshape(ops.full(tt_vectors), (4, 6))
@@ -74,6 +105,57 @@ def testGramMatrixWithMatrix(self):
           res_desired_val[i, j] = curr_val
       self.assertAllClose(res_desired_val, res_actual_val, atol=1e-5, rtol=1e-5)
 
+  def testPairwiseFlatInnerTensor(self):
+    # Test pairwise_flat_inner of a batch of TT tensors.
+    tt_tensors_1 = initializers.random_tensor_batch((2, 3, 2), batch_size=5)
+    tt_tensors_2 = initializers.random_tensor_batch((2, 3, 2), batch_size=5)
+    res_actual = batch_ops.pairwise_flat_inner(tt_tensors_1, tt_tensors_2)
+    full_tensors_1 = tf.reshape(ops.full(tt_tensors_1), (5, 12))
+    full_tensors_2 = tf.reshape(ops.full(tt_tensors_2), (5, 12))
+    res_desired = tf.matmul(full_tensors_1, tf.transpose(full_tensors_2))
+    res_desired = tf.squeeze(res_desired)
+    with self.test_session() as sess:
+      res_actual_val, res_desired_val = sess.run((res_actual, res_desired))
+      self.assertAllClose(res_desired_val, res_actual_val)
+
+  def testPairwiseFlatInnerMatrix(self):
+    # Test pairwise_flat_inner of a batch of TT matrices.
+    tt_vectors_1 = initializers.random_matrix_batch(((2, 3), (2, 3)),
+                                                    batch_size=5)
+    tt_vectors_2 = initializers.random_matrix_batch(((2, 3), (2, 3)),
+                                                    batch_size=5)
+    res_actual = batch_ops.pairwise_flat_inner(tt_vectors_1, tt_vectors_2)
+    full_vectors_1 = tf.reshape(ops.full(tt_vectors_1), (5, 36))
+    full_vectors_2 = tf.reshape(ops.full(tt_vectors_2), (5, 36))
+    res_desired = tf.matmul(full_vectors_1, tf.transpose(full_vectors_2))
+    res_desired = tf.squeeze(res_desired)
+    with self.test_session() as sess:
+      res_actual_val, res_desired_val = sess.run((res_actual, res_desired))
+      self.assertAllClose(res_desired_val, res_actual_val, atol=1e-5, rtol=1e-5)
+
+  def testPairwiseFlatInnerVectorsWithMatrix(self):
+    # Test pairwise_flat_inner of a batch of TT vectors with providing a matrix,
+    # so we should compute
+    # res[i, j] = tt_vectors[i] ^ T * matrix * tt_vectors[j]
+    tt_vectors_1 = initializers.random_matrix_batch(((2, 3), None), batch_size=2)
+    tt_vectors_2 = initializers.random_matrix_batch(((2, 3), None), batch_size=3)
+    matrix = initializers.random_matrix(((2, 3), (2, 3)))
+    res_actual = batch_ops.pairwise_flat_inner(tt_vectors_1, tt_vectors_2,
+                                               matrix)
+    full_vectors_1 = tf.reshape(ops.full(tt_vectors_1), (2, 6))
+    full_vectors_2 = tf.reshape(ops.full(tt_vectors_2), (3, 6))
+    with self.test_session() as sess:
+      res = sess.run((res_actual, full_vectors_1, full_vectors_2,
+                      ops.full(matrix)))
+      res_actual_val, vectors_1_val, vectors_2_val, matrix_val = res
+      res_desired_val = np.zeros((2, 3))
+      for i in range(2):
+        for j in range(3):
+          curr_val = np.dot(vectors_1_val[i], matrix_val)
+          curr_val = np.dot(curr_val, vectors_2_val[j])
+          res_desired_val[i, j] = curr_val
+      self.assertAllClose(res_desired_val, res_actual_val)
+
 if __name__ == "__main__":
   tf.test.main()
 
diff --git a/t3f/decompositions.py b/t3f/decompositions.py
index dee8edfa..b16ee7f0 100644
--- a/t3f/decompositions.py
+++ b/t3f/decompositions.py
@@ -1,9 +1,9 @@
 import numpy as np
 import tensorflow as tf
 
-from tensor_train import TensorTrain
-from tensor_train_batch import TensorTrainBatch
-import shapes
+from t3f.tensor_train import TensorTrain
+from t3f.tensor_train_batch import TensorTrainBatch
+from t3f import shapes
 
 
 def to_tt_matrix(mat, shape, max_tt_rank=10, epsilon=None):
@@ -411,6 +411,18 @@ def _orthogonalize_tt_cores_left_to_right(tt):
     tt: TenosorTrain or a TensorTrainBatch.
   Returns:
     The same type as the input `tt` (TenosorTrain or a TensorTrainBatch).
+  
+  Complexity:
+    for a single TT-object:
+      O(d r^3 n)
+    for a batch of TT-objects:
+      O(batch_size d r^3 n)
+    where
+      d is the number of TT-cores (tt.ndims());
+      r is the largest TT-rank of tt max(tt.get_tt_rank())
+      n is the size of the axis dimension, e.g.
+        for a tensor of size 4 x 4 x 4, n is 4;
+        for a 9 x 64 matrix of raw shape (3, 3, 3) x (4, 4, 4) n is 12 
   """
   # Left to right orthogonalization.
   ndims = tt.ndims()
diff --git a/t3f/decompositions_test.py b/t3f/decompositions_test.py
index d9f5692e..158eff9e 100644
--- a/t3f/decompositions_test.py
+++ b/t3f/decompositions_test.py
@@ -1,10 +1,10 @@
 import numpy as np
 import tensorflow as tf
 
-import ops
-import shapes
-import decompositions
-import initializers
+from t3f import ops
+from t3f import shapes
+from t3f import decompositions
+from t3f import initializers
 
 
 class DecompositionsTest(tf.test.TestCase):
diff --git a/t3f/examples_tests.py b/t3f/examples_tests.py
new file mode 100644
index 00000000..63dbc151
--- /dev/null
+++ b/t3f/examples_tests.py
@@ -0,0 +1,43 @@
+"""Tests from the README examples and the paper."""
+
+import tensorflow as tf
+import t3f
+
+class ExamplesTest(tf.test.TestCase):
+
+  def testMainReadme(self):
+    # Just check that the readme examples do not raise exceptions.
+    # Create a random tensor of shape (3, 2, 2).
+    a = t3f.random_tensor((3, 2, 2), tt_rank=3)
+    norm = t3f.frobenius_norm(a)
+    # Convert TT-tensor into a dense tensor for printing.
+    a_full = t3f.full(a)
+    # Run a tensorflow session to run the operations.
+    with tf.Session() as sess:
+      # Run the operations. Note that if you run these
+      # two operations separetly (sess.run(a_full), sess.run(norm))
+      # the result will be different, since sess.run will
+      # generate a new random tensor a on each run because `a' is
+      # an operation 'generate me a random tensor'.
+      a_val, norm_val = sess.run([a_full, norm])
+    a = t3f.random_tensor((3, 2, 2), tt_rank=3)
+    b_dense = tf.random_normal((3, 2, 2))
+    # Use TT-SVD on b_dense.
+    b_tt = t3f.to_tt_tensor(b_dense, max_tt_rank=4)
+    sum_round = t3f.round(t3f.add(a, b_tt), max_tt_rank=2)
+    # Inner product (sum of products of all elements).
+    a = t3f.random_tensor((3, 2, 2), tt_rank=3)
+    b = t3f.random_tensor((3, 2, 2), tt_rank=4)
+    inner_prod = t3f.flat_inner(a, b)
+    A = t3f.random_matrix(((3, 2, 2), (2, 3, 3)), tt_rank=3)
+    b = t3f.random_matrix(((2, 3, 3), None), tt_rank=3)
+    # Matrix-by-vector
+    matvec = t3f.matmul(A, b)
+
+    # Matrix-by-dense matrix
+    b_dense = tf.random_normal((18, 1))
+    matvec2 = t3f.matmul(A, b_dense)
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/t3f/initializers.py b/t3f/initializers.py
index 489cbbc0..6f192cc0 100644
--- a/t3f/initializers.py
+++ b/t3f/initializers.py
@@ -1,16 +1,239 @@
 import numpy as np
 import tensorflow as tf
 
-from tensor_train import TensorTrain
-from tensor_train_batch import TensorTrainBatch
+from t3f.tensor_train import TensorTrain
+from t3f.tensor_train_batch import TensorTrainBatch
+from t3f.tensor_train_base import TensorTrainBase
+from t3f import shapes
 
 
-def random_tensor(shape, tt_rank=2):
-  """Generate a random TT-tensor of given shape.
+def _validate_input_parameters(is_tensor, shape, **params):
+  """Internal function for validating input parameters
+
+  Args:
+    is_tensor: bool, determines whether we attempt to construct a TT-tensor or
+      a TT-matrix (needed for the correct shape checks).
+    shape: array, the desired shape of the generated TT object
+    params: optional, possible values:
+      batch_size: int, for constructing batches
+      tt_rank: array or int, desired TT-ranks
+  """
+
+  if is_tensor:
+    if len(shape.shape) != 1:
+      raise ValueError('shape should be 1d array, got %a' % shape)
+    if np.any(shape < 1):
+      raise ValueError('all elements in `shape` should be positive, got %a' %
+                       shape)
+    if not all(isinstance(sh, np.integer) for sh in shape):
+      raise ValueError('all elements in `shape` should be integers, got %a' %
+                       shape)
+  else:
+    if len(shape.shape) != 2:
+      raise ValueError('shape should be 2d array, got %a' % shape)
+    if shape[0].size != shape[1].size:
+      raise ValueError('shape[0] should have the same length as shape[1], but'
+                       'got %d and %d' % (shape[0].size, shape[1].size))
+    if np.any(shape.flatten() < 1):
+      raise ValueError('all elements in `shape` should be positive, got %a' %
+                       shape)
+    if not all(isinstance(sh, np.integer) for sh in shape.flatten()):
+      raise ValueError('all elements in `shape` should be integers, got %a' %
+                       shape)
+
+  if 'batch_size' in params:
+    batch_size = params['batch_size']
+    if not isinstance(batch_size, (int, np.integer)):
+      raise ValueError('`batch_size` should be integer, got %f' % batch_size)
+    if batch_size < 1:
+      raise ValueError('Batch size should be positive, got %d' % batch_size)
+  if 'tt_rank' in params:
+    tt_rank = params['tt_rank']
+    if tt_rank.size == 1:
+      if not isinstance(tt_rank[()], np.integer):
+        raise ValueError('`tt_rank` should be integer, got %f' % tt_rank[()])
+    if tt_rank.size > 1:
+      if not all(isinstance(tt_r, np.integer) for tt_r in tt_rank):
+        raise ValueError('all elements in `tt_rank` should be integers, got'
+                         ' %a' % tt_rank)
+    if np.any(tt_rank < 1):
+      raise ValueError('`tt_rank` should be positive, got %a' % tt_rank)
+
+    if is_tensor:
+      if tt_rank.size != 1 and tt_rank.size != (shape.size + 1):
+        raise ValueError('`tt_rank` array has inappropriate size, expected'
+                         '1 or %d, got %d' % (shape.size + 1, tt_rank.size))
+    else:
+      if tt_rank.size != 1 and tt_rank.size != (shape[0].size + 1):
+        raise ValueError('`tt_rank` array has inappropriate size, expected'
+                         '1 or %d, got %d' % (shape[0].size + 1, tt_rank.size))
+
+
+def tensor_ones(shape):
+  """Generate TT-tensor of the given shape with all entries equal to 1.
 
   Args:
     shape: array representing the shape of the future tensor
-    tt_rank: a number or a (d+1)-element array with ranks.
+
+  Returns:
+    TensorTrain object containing a TT-tensor
+  """
+
+  shape = np.array(shape)
+  _validate_input_parameters(is_tensor=True, shape=shape)
+  num_dims = shape.size
+  tt_rank = np.ones(num_dims + 1)
+
+  tt_cores = num_dims * [None]
+  for i in range(num_dims):
+    curr_core_shape = (1, shape[i], 1)
+    tt_cores[i] = tf.ones(curr_core_shape)
+
+  return TensorTrain(tt_cores, shape, tt_rank)
+
+
+def tensor_zeros(shape):
+  """Generate TT-tensor of the given shape with all entries equal to 0.
+
+  Args:
+    shape: array representing the shape of the future tensor
+
+  Returns:
+    TensorTrain object containing a TT-tensor
+  """
+
+  shape = np.array(shape)
+  _validate_input_parameters(is_tensor=True, shape=shape)
+  num_dims = shape.size
+  tt_rank = np.ones(num_dims + 1)
+  tt_cores = num_dims * [None]
+  for i in range(num_dims):
+    curr_core_shape = (1, shape[i], 1)
+    tt_cores[i] = tf.zeros(curr_core_shape)
+
+  return TensorTrain(tt_cores, shape, tt_rank)
+
+
+def eye(shape):
+  """Creates an identity TT-matrix.
+
+  Args:
+    shape: array which defines the shape of the matrix row and column
+    indices.
+
+  Returns:
+    TensorTrain containing an identity TT-matrix of size
+    np.prod(shape) x np.prod(shape)
+  """
+  shape = np.array(shape)
+  # In this special case shape is in the same format as in the TT-tensor case
+  _validate_input_parameters(is_tensor=True, shape=shape)
+
+  num_dims = shape.size
+  tt_ranks = np.ones(num_dims + 1)
+
+  tt_cores = num_dims * [None]
+  for i in range(num_dims):
+    curr_core_shape = (1, shape[i], shape[i], 1)
+    tt_cores[i] = tf.reshape(tf.eye(shape[i]), curr_core_shape)
+
+  true_shape = np.vstack([shape, shape])
+  return TensorTrain(tt_cores, true_shape, tt_ranks)
+
+
+def matrix_ones(shape):
+  """Generate a TT-matrix of the given shape with each entry equal to 1.
+
+  Args:
+    shape: 2d array, shape[0] is the shape of the matrix row-index,
+      shape[1] is the shape of the column index.
+      shape[0] and shape[1] should have the same number of elements (d)
+      Also supports omitting one of the dimensions for vectors, e.g.
+        matrix_ones([[2, 2, 2], None])
+      and
+        matrix_ones([None, [2, 2, 2]])
+      will create an 8-element column and row vectors correspondingly.
+
+  Returns:
+    TensorTrain containing a TT-matrix of size
+      np.prod(shape[0]) x np.prod(shape[1]) with each entry equal to 1
+  """
+
+  shape = list(shape)
+  # In case shape represents a vector, e.g. [None, [2, 2, 2]]
+  if shape[0] is None:
+    shape[0] = np.ones(len(shape[1]), dtype=int)
+  # In case shape represents a vector, e.g. [[2, 2, 2], None]
+  if shape[1] is None:
+    shape[1] = np.ones(len(shape[0]), dtype=int)
+  shape = np.array(shape)
+
+  _validate_input_parameters(is_tensor=False, shape=shape)
+
+  num_dims = shape[0].size
+  tt_rank = np.ones(shape[0].size + 1)
+
+  # TODO: variable (name?) scope.
+
+  tt_cores = [None] * num_dims
+  for i in range(num_dims):
+    curr_core_shape = (1, shape[0][i], shape[1][i], 1)
+    tt_cores[i] = tf.ones(curr_core_shape)
+
+  return TensorTrain(tt_cores, shape, tt_rank)
+
+
+def matrix_zeros(shape):
+  """Generate a TT-matrix of the given shape with each entry equal to 0.
+
+  Args:
+    shape: 2d array, shape[0] is the shape of the matrix row-index,
+      shape[1] is the shape of the column index.
+      shape[0] and shape[1] should have the same number of elements (d)
+      Also supports omitting one of the dimensions for vectors, e.g.
+        matrix_zeros([[2, 2, 2], None])
+      and
+        matrix_zeros([None, [2, 2, 2]])
+      will create an 8-element column and row vectors correspondingly.
+
+  Returns:
+    TensorTrain containing a TT-matrix of size
+      np.prod(shape[0]) x np.prod(shape[1]) with each entry equal to 0
+  """
+
+  shape = list(shape)
+  # In case shape represents a vector, e.g. [None, [2, 2, 2]]
+  if shape[0] is None:
+    shape[0] = np.ones(len(shape[1]), dtype=int)
+  # In case shape represents a vector, e.g. [[2, 2, 2], None]
+  if shape[1] is None:
+    shape[1] = np.ones(len(shape[0]), dtype=int)
+  shape = np.array(shape)
+
+  _validate_input_parameters(is_tensor=False, shape=shape)
+  num_dims = shape[0].size
+  tt_rank = np.ones(shape[0].size + 1)
+
+  # TODO: variable (name?) scope.
+
+  tt_cores = [None] * num_dims
+  for i in range(num_dims):
+    curr_core_shape = (1, shape[0][i], shape[1][i], 1)
+    tt_cores[i] = tf.zeros(curr_core_shape)
+
+  return TensorTrain(tt_cores, shape, tt_rank)
+
+
+def tensor_with_random_cores(shape, tt_rank=2, mean=0., stddev=1.):
+  """Generate a TT-tensor of the given shape with N(mean, stddev^2) cores.
+
+  Args:
+    shape: array representing the shape of the future tensor.
+    tt_rank: a number or a (d+1)-element array with the desired ranks.
+    mean: a number, the mean of the normal distribution used for
+      initializing TT-cores.
+    stddev: a number, the standard deviation of the normal distribution used
+      for initializing TT-cores.
 
   Returns:
     TensorTrain containing a TT-tensor
@@ -20,90 +243,78 @@ def random_tensor(shape, tt_rank=2):
   # TODO: support None as a dimension.
   shape = np.array(shape)
   tt_rank = np.array(tt_rank)
-  if len(shape.shape) != 1:
-    raise ValueError('shape should be 1d array')
-  if np.any(shape < 1):
-    raise ValueError('all elements in `shape` should be positive')
-  if np.any(tt_rank < 1):
-    raise ValueError('`rank` should be positive')
-  if tt_rank.size != 1 and tt_rank.size != (shape.size + 1):
-    raise ValueError('`rank` array has inappropriate size')
-
+  _validate_input_parameters(is_tensor=True, shape=shape, tt_rank=tt_rank)
   num_dims = shape.size
   if tt_rank.size == 1:
     tt_rank = tt_rank * np.ones(num_dims - 1)
     tt_rank = np.insert(tt_rank, 0, 1)
     tt_rank = np.append(tt_rank, 1)
-  # TODO: check that ints?
-  shape = shape.astype(int)
-  tt_rank_ext = tt_rank.astype(int)
+
+  tt_rank = tt_rank.astype(int)
   # TODO: variable (name?) scope.
   tt_cores = [None] * num_dims
   for i in range(num_dims):
-    curr_core_shape = (tt_rank_ext[i], shape[i], tt_rank_ext[i + 1])
-    tt_cores[i] = tf.random_normal(curr_core_shape)
+    curr_core_shape = (tt_rank[i], shape[i], tt_rank[i + 1])
+    tt_cores[i] = tf.random_normal(curr_core_shape, mean=mean, stddev=stddev)
 
-  return TensorTrain(tt_cores, shape, tt_rank_ext)
+  return TensorTrain(tt_cores, shape, tt_rank)
 
 
-def random_tensor_batch(shape, tt_rank=2, batch_size=1):
-  """Generate a batch of random TT-tensors of given shape.
+def tensor_batch_with_random_cores(shape, tt_rank=2, batch_size=1,
+                                   mean=0., stddev=1.):
+  """Generate a batch of TT-tensors of given shape with N(mean, stddev^2) cores.
 
   Args:
-    shape: array representing the shape of the future tensor
+    shape: array representing the shape of the future tensor.
     tt_rank: a number or a (d+1)-element array with ranks.
     batch_size: an integer.
+    mean: a number, the mean of the normal distribution used for
+      initializing TT-cores.
+    stddev: a number, the standard deviation of the normal distribution used
+      for initializing TT-cores.
 
   Returns:
-    TensorTrainBatch containing a TT-tensor
+    TensorTrainBatch containing TT-tensors
   """
-  # TODO: good distribution to init training.
+
   # TODO: support shape and tt_ranks as TensorShape?.
   # TODO: support None as a dimension.
   shape = np.array(shape)
   tt_rank = np.array(tt_rank)
-  if len(shape.shape) != 1:
-    raise ValueError('shape should be 1d array')
-  if np.any(shape < 1):
-    raise ValueError('all elements in `shape` should be positive')
-  if np.any(tt_rank < 1):
-    raise ValueError('`rank` should be positive')
-  if tt_rank.size != 1 and tt_rank.size != (shape.size + 1):
-    raise ValueError('`rank` array has inappropriate size')
-  if batch_size < 1:
-    raise ValueError('Batch size should be positive')
-
+  _validate_input_parameters(is_tensor=True, shape=shape, tt_rank=tt_rank,
+                             batch_size=batch_size)
   num_dims = shape.size
   if tt_rank.size == 1:
     tt_rank = tt_rank * np.ones(num_dims - 1)
     tt_rank = np.insert(tt_rank, 0, 1)
     tt_rank = np.append(tt_rank, 1)
-  # TODO: check that ints?
-  shape = shape.astype(int)
-  batch_size = int(batch_size)
-  tt_rank_ext = tt_rank.astype(int)
+  tt_rank = tt_rank.astype(int)
   # TODO: variable (name?) scope.
   tt_cores = [None] * num_dims
   for i in range(num_dims):
-    curr_core_shape = (batch_size, tt_rank_ext[i], shape[i], tt_rank_ext[i + 1])
-    tt_cores[i] = tf.random_normal(curr_core_shape)
+    curr_core_shape = (batch_size, tt_rank[i], shape[i], tt_rank[i + 1])
+    tt_cores[i] = tf.random_normal(curr_core_shape, mean=mean, stddev=stddev)
 
-  return TensorTrainBatch(tt_cores, shape, tt_rank_ext, batch_size)
+  return TensorTrainBatch(tt_cores, shape, tt_rank, batch_size)
 
 
-def random_matrix(shape, tt_rank=2):
-  """Generate a random TT-matrix of given shape.
+def matrix_with_random_cores(shape, tt_rank=2, mean=0., stddev=1.):
+  """Generate a TT-matrix of given shape with N(mean, stddev^2) cores.
 
   Args:
     shape: 2d array, shape[0] is the shape of the matrix row-index,
       shape[1] is the shape of the column index.
       shape[0] and shape[1] should have the same number of elements (d)
-      Also supports ommiting one of the dimensions for vectors, e.g.
-        random_matrix([[2, 2, 2], None])
+      Also supports omitting one of the dimensions for vectors, e.g.
+        matrix_with_random_cores([[2, 2, 2], None])
       and
-        random_matrix([None, [2, 2, 2]])
+        matrix_with_random_cores([None, [2, 2, 2]])
       will create an 8-element column and row vectors correspondingly.
     tt_rank: a number or a (d+1)-element array with ranks.
+    mean: a number, the mean of the normal distribution used for
+      initializing TT-cores.
+    stddev: a number, the standard deviation of the normal distribution used
+      for initializing TT-cores.
 
   Returns:
     TensorTrain containing a TT-matrix of size
@@ -114,54 +325,50 @@ def random_matrix(shape, tt_rank=2):
   shape = list(shape)
   # In case shape represents a vector, e.g. [None, [2, 2, 2]]
   if shape[0] is None:
-    shape[0] = np.ones(len(shape[1]))
+    shape[0] = np.ones(len(shape[1]), dtype=int)
   # In case shape represents a vector, e.g. [[2, 2, 2], None]
   if shape[1] is None:
-    shape[1] = np.ones(len(shape[0]))
+    shape[1] = np.ones(len(shape[0]), dtype=int)
   shape = np.array(shape)
   tt_rank = np.array(tt_rank)
-  if len(shape.shape) != 2:
-    raise ValueError('shape should be 2d array')
-  if shape[0].size != shape[1].size:
-    raise ValueError('shape[0] should have the same length as shape[1]')
-  if np.any(shape.flatten() < 1):
-    raise ValueError('all elements in `shape` should be positive')
-  if np.any(tt_rank < 1):
-    raise ValueError('`rank` should be positive')
-  if tt_rank.size != 1 and tt_rank.size != (shape[0].size + 1):
-    raise ValueError('`rank` array has inappropriate size')
+  _validate_input_parameters(is_tensor=False, shape=shape, tt_rank=tt_rank)
 
   num_dims = shape[0].size
   if tt_rank.size == 1:
     tt_rank = tt_rank * np.ones(num_dims - 1)
     tt_rank = np.concatenate([[1], tt_rank, [1]])
-  # TODO: check that ints?
-  shape = shape.astype(int)
+
   tt_rank = tt_rank.astype(int)
   # TODO: variable (name?) scope.
   tt_cores = [None] * num_dims
   for i in range(num_dims):
     curr_core_shape = (tt_rank[i], shape[0][i], shape[1][i],
                        tt_rank[i + 1])
-    tt_cores[i] = tf.random_normal(curr_core_shape)
+    tt_cores[i] = tf.random_normal(curr_core_shape, mean=mean, stddev=stddev)
 
   return TensorTrain(tt_cores, shape, tt_rank)
 
 
-def random_matrix_batch(shape, tt_rank=2, batch_size=1):
-  """Generate a random batch of TT-matrices of given shape.
+def matrix_batch_with_random_cores(shape, tt_rank=2, batch_size=1,
+                                   mean=0., stddev=1.):
+  """Generate a batch of TT-matrices of given shape with N(mean, stddev^2) cores.
 
   Args:
     shape: 2d array, shape[0] is the shape of the matrix row-index,
       shape[1] is the shape of the column index.
       shape[0] and shape[1] should have the same number of elements (d)
-      Also supports ommiting one of the dimensions for vectors, e.g.
-        random_matrix_batch([[2, 2, 2], None])
+      Also supports omitting one of the dimensions for vectors, e.g.
+        matrix_batch_with_random_cores([[2, 2, 2], None])
       and
-        random_matrix_batch([None, [2, 2, 2]])
-      will create a batch of one 8-element column and row vector correspondingly.
+        matrix_batch_with_random_cores([None, [2, 2, 2]])
+    will create a batch of one 8-element column and row vector correspondingly.
+
     tt_rank: a number or a (d+1)-element array with ranks.
     batch_size: an integer.
+    mean: a number, the mean of the normal distribution used for
+      initializing TT-cores.
+    stddev: a number, the standard deviation of the normal distribution used
+      for initializing TT-cores.
 
   Returns:
     TensorTrainBatch containing a batch of TT-matrices of size
@@ -172,30 +379,18 @@ def random_matrix_batch(shape, tt_rank=2, batch_size=1):
   shape = list(shape)
   # In case shape represents a vector, e.g. [None, [2, 2, 2]]
   if shape[0] is None:
-    shape[0] = np.ones(len(shape[1]))
+    shape[0] = np.ones(len(shape[1]), dtype=int)
   # In case shape represents a vector, e.g. [[2, 2, 2], None]
   if shape[1] is None:
-    shape[1] = np.ones(len(shape[0]))
+    shape[1] = np.ones(len(shape[0]), dtype=int)
   shape = np.array(shape)
   tt_rank = np.array(tt_rank)
-  if len(shape.shape) != 2:
-    raise ValueError('shape should be 2d array')
-  if shape[0].size != shape[1].size:
-    raise ValueError('shape[0] should have the same length as shape[1]')
-  if np.any(shape.flatten() < 1):
-    raise ValueError('all elements in `shape` should be positive')
-  if np.any(tt_rank < 1):
-    raise ValueError('`rank` should be positive')
-  if tt_rank.size != 1 and tt_rank.size != (shape[0].size + 1):
-    raise ValueError('`rank` array has inappropriate size')
-  if batch_size < 1:
-    raise ValueError('Batch size should be positive')
-
+  _validate_input_parameters(is_tensor=False, shape=shape, tt_rank=tt_rank,
+                             batch_size=batch_size)
   num_dims = shape[0].size
   if tt_rank.size == 1:
     tt_rank = tt_rank * np.ones(num_dims - 1)
     tt_rank = np.concatenate([[1], tt_rank, [1]])
-  # TODO: check that ints?
   shape = shape.astype(int)
   tt_rank = tt_rank.astype(int)
   # TODO: variable (name?) scope.
@@ -203,6 +398,392 @@ def random_matrix_batch(shape, tt_rank=2, batch_size=1):
   for i in range(num_dims):
     curr_core_shape = (batch_size, tt_rank[i], shape[0][i], shape[1][i],
                        tt_rank[i + 1])
-    tt_cores[i] = tf.random_normal(curr_core_shape)
+    tt_cores[i] = tf.random_normal(curr_core_shape, mean=mean, stddev=stddev)
 
   return TensorTrainBatch(tt_cores, shape, tt_rank, batch_size)
+
+
+def ones_like(tt):
+  """Constructs t3f.ones with the shape of `tt`.
+
+  In the case when `tt` is TensorTrainBatch constructs t3f.ones with the shape
+  of a TensorTrain in `tt`.
+
+  Args:
+    tt: TensorTrain object
+
+  Returns:
+    TensorTrain object of the same shape as `tt` but with all entries equal to
+    1.
+
+  """
+  if not isinstance(tt, TensorTrainBase):
+    raise ValueError("`tt` has to be a Tensor Train object")
+  else:
+    shape = shapes.lazy_raw_shape(tt)
+    if tt.is_tt_matrix():
+      return matrix_ones(shape)
+    else:
+      return tensor_ones(shape[0, :])
+
+
+def zeros_like(tt):
+  """Constructs t3f.zeros with the shape of `tt`.
+
+  In the case when `tt` is a TensorTrainBatch constructs t3f.zeros with
+  the shape of a TensorTrain in `tt`.
+
+  Args:
+    tt: TensorTrain object
+
+  Returns:
+    TensorTrain object of the same shape as `tt` but with all entries equal to
+    0.
+
+  """
+  if not isinstance(tt, TensorTrainBase):
+    raise ValueError("`tt` has to be a Tensor Train object")
+  else:
+    shape = shapes.lazy_raw_shape(tt)
+    if tt.is_tt_matrix():
+      return matrix_zeros(shape)
+    else:
+      return tensor_zeros(shape[0, :])
+
+
+def random_tensor(shape, tt_rank=2, mean=0., stddev=1.):
+  """Generate a random TT-tensor of the given shape with given mean and stddev.
+
+  Entries of the generated tensor (in the full format) will be iid and satisfy
+  E[x_{i1i2..id}] = mean, Var[x_{i1i2..id}] = stddev^2, but the distribution is
+  in fact not Gaussian (but is close for large tensors).
+
+  In the current implementation only mean 0 is supported. To get
+  a random_tensor with specified mean but tt_rank greater by 1 you can
+  call
+  x = t3f.random_tensor(shape, tt_rank, stddev=stddev)
+  x = mean * t3f.ones_like(x) + x
+
+  Args:
+    shape: array representing the shape of the future tensor.
+    tt_rank: a number or a (d+1)-element array with the desired ranks.
+    mean: a number, the desired mean for the distribution of entries.
+    stddev: a number, the desired standard deviation for the distribution of
+      entries.
+
+  Returns:
+    TensorTrain containing a TT-tensor
+  """
+  shape = np.array(shape)
+  tt_rank = np.array(tt_rank)
+  _validate_input_parameters(is_tensor=True, shape=shape, tt_rank=tt_rank)
+
+  num_dims = shape.size
+  if tt_rank.size == 1:
+    tt_rank = tt_rank * np.ones(num_dims - 1)
+    tt_rank = np.insert(tt_rank, 0, 1)
+    tt_rank = np.append(tt_rank, 1)
+
+  tt_rank = tt_rank.astype(int)
+
+  # Empirically entries of a TT tensor with cores initialized from N(0, 1)
+  # will have variances np.prod(tt_rank) and mean 0.
+  # We scale each TT-core to obtain the desired stddev
+
+  cr_exponent = -1.0 / (2 * num_dims)
+  var = np.prod(tt_rank ** cr_exponent)
+  core_stddev = stddev ** (1.0 / num_dims) * var
+  tt = tensor_with_random_cores(shape, tt_rank=tt_rank, stddev=core_stddev)
+
+  if np.abs(mean) < 1e-8:
+    return tt
+  else:
+    raise NotImplementedError('non-zero mean is not supported yet')
+
+
+def random_tensor_batch(shape, tt_rank=2, batch_size=1, mean=0., stddev=1.):
+  """Generate a batch of TT-tensors with given shape, mean and stddev.
+
+  Entries of the generated tensors (in the full format) will be iid and satisfy
+  E[x_{i1i2..id}] = mean, Var[x_{i1i2..id}] = stddev^2, but the distribution is
+  in fact not Gaussian (but is close for large tensors).
+
+  In the current implementation only mean 0 is supported. To get
+  a random_tensor_batch with specified mean but tt_rank greater by 1 you can
+  call
+  x = t3f.random_tensor_batch(shape, tt_rank, batch_size=bs, stddev=stddev)
+  x = mean * t3f.ones_like(x) + x
+
+  Args:
+    shape: array representing the shape of the future tensor.
+    tt_rank: a number or a (d+1)-element array with ranks.
+    batch_size: an integer.
+    mean: a number, the desired mean for the distribution of entries.
+    stddev: a number, the desired standard deviation for the distribution of
+      entries.
+
+  Returns:
+    TensorTrainBatch containing TT-tensors.
+  """
+  # TODO: support shape and tt_ranks as TensorShape?.
+  # TODO: support None as a dimension.
+  shape = np.array(shape)
+  tt_rank = np.array(tt_rank)
+  _validate_input_parameters(is_tensor=True, shape=shape, tt_rank=tt_rank,
+                             batch_size=batch_size)
+  num_dims = shape.size
+  if tt_rank.size == 1:
+    tt_rank = tt_rank * np.ones(num_dims - 1)
+    tt_rank = np.insert(tt_rank, 0, 1)
+    tt_rank = np.append(tt_rank, 1)
+  tt_rank = tt_rank.astype(int)
+
+  cr_exponent = -1.0 / (2 * num_dims)
+  var = np.prod(tt_rank ** cr_exponent)
+  cr_stddev = stddev ** (1.0 / num_dims) * var
+  tt = tensor_batch_with_random_cores(shape, tt_rank=tt_rank, stddev=cr_stddev,
+                                      batch_size=batch_size)
+
+  if np.abs(mean) < 1e-8:
+    return tt
+  else:
+    raise NotImplementedError('non-zero mean is not supported yet')
+
+
+def random_matrix(shape, tt_rank=2, mean=0., stddev=1.):
+  """Generate a random TT-matrix of the given shape with given mean and stddev.
+
+  Entries of the generated matrix (in the full format) will be iid and satisfy
+  E[x_{i1i2..id}] = mean, Var[x_{i1i2..id}] = stddev^2, but the distribution is
+  in fact not Gaussian.
+
+  In the current implementation only mean 0 is supported. To get
+  a random_matrix with specified mean but tt_rank greater by 1 you can call
+  x = t3f.random_matrix(shape, tt_rank, stddev=stddev)
+  x = mean * t3f.ones_like(x) + x
+
+  Args:
+    shape: 2d array, shape[0] is the shape of the matrix row-index,
+      shape[1] is the shape of the column index.
+      shape[0] and shape[1] should have the same number of elements (d)
+      Also supports omitting one of the dimensions for vectors, e.g.
+        random_matrix([[2, 2, 2], None])
+      and
+        random_matrix([None, [2, 2, 2]])
+      will create an 8-element column and row vectors correspondingly.
+    tt_rank: a number or a (d+1)-element array with ranks.
+    mean: a number, the desired mean for the distribution of entries.
+    stddev: a number, the desired standard deviation for the distribution of
+      entries.
+
+  Returns:
+    TensorTrain containing a TT-matrix of size
+      np.prod(shape[0]) x np.prod(shape[1])
+  """
+  # TODO: good distribution to init training.
+  # In case the shape is immutable.
+  shape = list(shape)
+  # In case shape represents a vector, e.g. [None, [2, 2, 2]]
+  if shape[0] is None:
+    shape[0] = np.ones(len(shape[1]), dtype=int)
+  # In case shape represents a vector, e.g. [[2, 2, 2], None]
+  if shape[1] is None:
+    shape[1] = np.ones(len(shape[0]), dtype=int)
+  shape = np.array(shape)
+  tt_rank = np.array(tt_rank)
+
+  _validate_input_parameters(is_tensor=False, shape=shape, tt_rank=tt_rank)
+
+  num_dims = shape[0].size
+  if tt_rank.size == 1:
+    tt_rank = tt_rank * np.ones(num_dims - 1)
+    tt_rank = np.concatenate([[1], tt_rank, [1]])
+
+  tt_rank = tt_rank.astype(int)
+  var = np.prod(tt_rank)
+
+  # Empirically entries of a TT tensor with cores initialized from N(0, 1)
+  # will have variances np.prod(tt_rank) and mean 0.
+  # We scale each TT-core to obtain the desired stddev
+
+  cr_exponent = -1.0 / (2 * num_dims)
+  var = np.prod(tt_rank ** cr_exponent)
+  core_stddev = stddev ** (1.0 / num_dims) * var
+  tt = matrix_with_random_cores(shape, tt_rank=tt_rank, stddev=core_stddev)
+
+  if np.abs(mean) < 1e-8:
+    return tt
+  else:
+    raise NotImplementedError('non-zero mean is not supported yet')
+
+
+def random_matrix_batch(shape, tt_rank=2, batch_size=1, mean=0., stddev=1.):
+  """Generate a batch of TT-matrices with given shape, mean and stddev.
+
+  Entries of the generated matrices (in the full format) will be iid and
+  satisfy E[x_{i1i2..id}] = mean, Var[x_{i1i2..id}] = stddev^2, but the
+  distribution is in fact not Gaussian.
+
+  In the current implementation only mean 0 is supported. To get a
+  random_matrix_batch with specified mean but tt_rank greater by 1 you can call
+  x = t3f.random_matrix_batch(shape, tt_rank, batch_size=bs, stddev=stddev)
+  x = mean * t3f.ones_like(x) + x
+
+  Args:
+    shape: 2d array, shape[0] is the shape of the matrix row-index,
+      shape[1] is the shape of the column index.
+      shape[0] and shape[1] should have the same number of elements (d)
+      Also supports omitting one of the dimensions for vectors, e.g.
+        random_matrix_batch([[2, 2, 2], None])
+      and
+        random_matrix_batch([None, [2, 2, 2]])
+    will create a batch of one 8-element column and row vector correspondingly.
+    tt_rank: a number or a (d+1)-element array with ranks.
+    batch_size: an integer.
+    mean: a number, the desired mean for the distribution of entries.
+    stddev: a number, the desired standard deviation for the distribution of
+      entries.
+
+  Returns:
+    TensorTrainBatch containing a batch of TT-matrices of size
+      np.prod(shape[0]) x np.prod(shape[1])
+  """
+  # In case the shape is immutable.
+  shape = list(shape)
+  # In case shape represents a vector, e.g. [None, [2, 2, 2]]
+  if shape[0] is None:
+    shape[0] = np.ones(len(shape[1]), dtype=int)
+  # In case shape represents a vector, e.g. [[2, 2, 2], None]
+  if shape[1] is None:
+    shape[1] = np.ones(len(shape[0]), dtype=int)
+  shape = np.array(shape)
+  tt_rank = np.array(tt_rank)
+  _validate_input_parameters(is_tensor=False, shape=shape, tt_rank=tt_rank,
+                             batch_size=batch_size)
+  num_dims = shape[0].size
+  if tt_rank.size == 1:
+    tt_rank = tt_rank * np.ones(num_dims - 1)
+    tt_rank = np.concatenate([[1], tt_rank, [1]])
+
+  shape = shape.astype(int)
+  tt_rank = tt_rank.astype(int)
+
+  cr_exponent = -1.0 / (2 * num_dims)
+  var = np.prod(tt_rank ** cr_exponent)
+  core_stddev = stddev ** (1.0 / num_dims) * var
+  tt = matrix_batch_with_random_cores(shape, tt_rank=tt_rank,
+                                      stddev=core_stddev,
+                                      batch_size=batch_size)
+
+  if np.abs(mean) < 1e-8:
+    return tt
+  else:
+    raise NotImplementedError('non-zero mean is not supported yet')
+
+
+def glorot_initializer(shape, tt_rank=2):
+  """Constructs a random TT matrix with entrywise variance 2.0 / (n_in + n_out)
+
+  Args:
+    shape: 2d array, shape[0] is the shape of the matrix row-index,
+      shape[1] is the shape of the column index.
+      shape[0] and shape[1] should have the same number of elements (d)
+      Also supports omitting one of the dimensions for vectors, e.g.
+        glorot_initializer([[2, 2, 2], None])
+      and
+        glorot_initializer([None, [2, 2, 2]])
+      will create an 8-element column and row vectors correspondingly.
+    tt_rank: a number or a (d+1)-element array with ranks.
+
+  Returns:
+    TensorTrain containing a TT-matrix of size
+      np.prod(shape[0]) x np.prod(shape[1])
+  """
+
+  # In case the shape is immutable.
+  shape = list(shape)
+  # In case shape represents a vector, e.g. [None, [2, 2, 2]]
+  if shape[0] is None:
+    shape[0] = np.ones(len(shape[1]), dtype=int)
+  # In case shape represents a vector, e.g. [[2, 2, 2], None]
+  if shape[1] is None:
+    shape[1] = np.ones(len(shape[0]), dtype=int)
+  shape = np.array(shape)
+  tt_rank = np.array(tt_rank)
+  _validate_input_parameters(is_tensor=False, shape=shape, tt_rank=tt_rank)
+  n_in = np.prod(shape[0])
+  n_out = np.prod(shape[1])
+  lamb = 2.0 / (n_in + n_out)
+
+  return random_matrix(shape, tt_rank=tt_rank, stddev=np.sqrt(lamb))
+
+
+def he_initializer(shape, tt_rank=2):
+  """Constructs a random TT matrix with entrywise variance 2.0 / n_in
+
+  Args:
+    shape: 2d array, shape[0] is the shape of the matrix row-index,
+      shape[1] is the shape of the column index.
+      shape[0] and shape[1] should have the same number of elements (d)
+      Also supports omitting one of the dimensions for vectors, e.g.
+        he_initializer([[2, 2, 2], None])
+      and
+        he_initializer([None, [2, 2, 2]])
+      will create an 8-element column and row vectors correspondingly.
+    tt_rank: a number or a (d+1)-element array with ranks.
+
+  Returns:
+    TensorTrain containing a TT-matrix of size
+      np.prod(shape[0]) x np.prod(shape[1])
+  """
+
+  # In case the shape is immutable.
+  shape = list(shape)
+  # In case shape represents a vector, e.g. [None, [2, 2, 2]]
+  if shape[0] is None:
+    shape[0] = np.ones(len(shape[1]), dtype=int)
+  # In case shape represents a vector, e.g. [[2, 2, 2], None]
+  if shape[1] is None:
+    shape[1] = np.ones(len(shape[0]), dtype=int)
+  shape = np.array(shape)
+  tt_rank = np.array(tt_rank)
+  _validate_input_parameters(is_tensor=False, shape=shape, tt_rank=tt_rank)
+  n_in = np.prod(shape[0])
+  lamb = 2.0 / n_in
+
+  return random_matrix(shape, tt_rank=tt_rank, stddev=np.sqrt(lamb))
+
+
+def lecun_initializer(shape, tt_rank=2):
+  """Constructs a random TT matrix with entrywise variance 1.0 / n_in
+
+  Args:
+    shape: 2d array, shape[0] is the shape of the matrix row-index,
+      shape[1] is the shape of the column index.
+      shape[0] and shape[1] should have the same number of elements (d)
+      Also supports omitting one of the dimensions for vectors, e.g.
+        lecun_initializer([[2, 2, 2], None])
+      and
+        lecun_initializer([None, [2, 2, 2]])
+      will create an 8-element column and row vectors correspondingly.
+    tt_rank: a number or a (d+1)-element array with ranks.
+
+  Returns:
+    TensorTrain containing a TT-matrix of size
+      np.prod(shape[0]) x np.prod(shape[1])
+  """
+
+  # In case the shape is immutable.
+  shape = list(shape)
+  # In case shape represents a vector, e.g. [None, [2, 2, 2]]
+  if shape[0] is None:
+    shape[0] = np.ones(len(shape[1]), dtype=int)
+  # In case shape represents a vector, e.g. [[2, 2, 2], None]
+  if shape[1] is None:
+    shape[1] = np.ones(len(shape[0]), dtype=int)
+  shape = np.array(shape)
+  tt_rank = np.array(tt_rank)
+  _validate_input_parameters(is_tensor=False, shape=shape, tt_rank=tt_rank)
+  n_in = np.prod(shape[0])
+  lamb = 1.0 / n_in
+  return random_matrix(shape, tt_rank=tt_rank, stddev=np.sqrt(lamb))
diff --git a/t3f/initializers_test.py b/t3f/initializers_test.py
new file mode 100644
index 00000000..a555eeba
--- /dev/null
+++ b/t3f/initializers_test.py
@@ -0,0 +1,176 @@
+import numpy as np
+import tensorflow as tf
+
+from t3f import initializers
+from t3f import ops
+
+
+class InitializersTest(tf.test.TestCase):
+
+  def testTensorOnesAndZeros(self):
+    tt_ones = initializers.tensor_ones([2, 3, 4])
+    tt_zeros = initializers.tensor_zeros([2, 3, 4])
+
+    ones_desired = np.ones((2, 3, 4))
+    zeros_desired = np.zeros((2, 3, 4))
+    with self.test_session() as sess:
+      tt_ones_full = sess.run(ops.full(tt_ones))
+      tt_zeros_full = sess.run(ops.full(tt_zeros))
+      self.assertAllClose(tt_ones_full, ones_desired)
+      self.assertAllClose(tt_zeros_full, zeros_desired)
+    bad_shapes = [[[2, 3]], [-1, 3], [0.1, 4]]
+    for shape in bad_shapes:
+      with self.assertRaises(ValueError):
+        initializers.tensor_ones(shape)
+      with self.assertRaises(ValueError):
+        initializers.tensor_zeros(shape)
+
+  def testMatrixOnesAndZeros(self):
+    tt_ones = initializers.matrix_ones([[2, 3, 4], [1, 2, 5]])
+    tt_zeros = initializers.matrix_zeros([[2, 3, 4], [1, 2, 5]])
+
+    ones_desired = np.ones((24, 10))
+    zeros_desired = np.zeros((24, 10))
+
+    bad_shapes = [[[-1, 2, 3], [3, 4, 6]], [[1.5, 2, 4], [2, 5, 6]],
+                  [[1], [2, 3]], [2, 3, 4]]
+    with self.test_session() as sess:
+      tt_ones_full = sess.run(ops.full(tt_ones))
+      tt_zeros_full = sess.run(ops.full(tt_zeros))
+      self.assertAllClose(tt_ones_full, ones_desired)
+      self.assertAllClose(tt_zeros_full, zeros_desired)
+    for shape in bad_shapes:
+      with self.assertRaises(ValueError):
+        initializers.matrix_ones(shape)
+      with self.assertRaises(ValueError):
+        initializers.matrix_zeros(shape)
+
+  def testEye(self):
+      tt_eye = initializers.eye([4, 5, 6])
+      eye_desired = np.eye(120)
+      with self.test_session() as sess:
+        eye_full = sess.run(ops.full(tt_eye))
+        self.assertAllClose(eye_full, eye_desired)
+      bad_shapes = [[[2, 3]], [-1, 3], [0.1, 4]]
+      for shape in bad_shapes:
+        with self.assertRaises(ValueError):
+          initializers.eye(shape)
+
+  def testOnesLikeAndZerosLike(self):
+    a = initializers.random_tensor([2, 3, 4])
+    b = initializers.ones_like(a)
+    c = initializers.zeros_like(a)
+    var_list = [ops.full(b), ops.full(c)]
+    with self.test_session() as sess:
+      bf, cf = sess.run(var_list)
+      self.assertAllClose(bf, np.ones((2, 3, 4)))
+      self.assertAllClose(cf, np.zeros((2, 3, 4)))
+    with self.assertRaises(ValueError):
+      initializers.ones_like(1)
+    with self.assertRaises(ValueError):
+      initializers.zeros_like(1)
+
+  def testRandomTensor(self):
+    shapes = [[3, 4], [3, 4], [3, 4], [3, 4], [1, -2], [1.1, 2], [[3, 4]]]
+    tt_ranks = [-2, 1.5, [2, 3, 4, 5], [1.5], 2, 2, 2]
+    bad_cases = zip(shapes, tt_ranks)
+    for case in bad_cases:
+      with self.assertRaises(ValueError):
+        initializers.random_tensor(case[0], tt_rank=case[1])
+
+    for case in bad_cases:
+      with self.assertRaises(ValueError):
+        initializers.tensor_with_random_cores(case[0], tt_rank=case[1])
+
+    with self.assertRaises(NotImplementedError):
+      initializers.random_tensor([1, 2], mean=1.0)
+
+  def testRandomMatrix(self):
+    shapes = [[1, 2, 3], [[1, 2], [1, 2, 3]], [[-1, 2, 3], [1, 2, 3]],
+              [[0.5, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]],
+              [[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]],
+              [[1, 2, 3], [1, 2, 3]]]
+    tt_ranks = [2, 2, 2, 2, -1, [[[1]]], [2.5, 3]]
+    bad_cases = zip(shapes, tt_ranks)
+    for case in bad_cases:
+      with self.assertRaises(ValueError):
+        initializers.random_matrix(case[0], tt_rank=case[1])
+    for case in bad_cases:
+      with self.assertRaises(ValueError):
+        initializers.matrix_with_random_cores(case[0], tt_rank=case[1])
+    with self.assertRaises(NotImplementedError):
+        initializers.random_matrix([[2, 3, 4], [1, 2, 3]], mean=1.0)
+
+  def testRandomTensorBatch(self):
+    shapes = [[3, 4], [3, 4], [3, 4], [3, 4], [1, -2], [1.1, 2], [[3, 4]],
+              [1, 2], [3, 4]]
+    tt_ranks = [-2, 1.5, [2, 3, 4, 5], [1.5], 2, 2, 2, 2, 2]
+    bs = [1] * 7 + [-1] + [0.5]
+    bad_cases = zip(shapes, tt_ranks, bs)
+    for case in bad_cases:
+      with self.assertRaises(ValueError):
+        initializers.random_tensor_batch(case[0], tt_rank=case[1],
+                                         batch_size=case[2])
+    for case in bad_cases:
+      with self.assertRaises(ValueError):
+        initializers.tensor_batch_with_random_cores(case[0], tt_rank=case[1],
+                                                    batch_size=case[2])
+    with self.assertRaises(NotImplementedError):
+        initializers.random_tensor_batch([1, 2, 3], mean=1.0)
+
+  def testRandomMatrixBatch(self):
+    shapes = [[1, 2, 3], [[1, 2], [1, 2, 3]], [[-1, 2, 3], [1, 2, 3]],
+              [[0.5, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]],
+              [[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]],
+              [[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]],
+              [[1, 2, 3], [1, 2, 3]]]
+    tt_ranks = [2, 2, 2, 2, -1, [[[1]]], [2.5, 3], 2, 2]
+    bs = 7 * [1] + [-1] + [0.5]
+    bad_cases = zip(shapes, tt_ranks, bs)
+    for case in bad_cases:
+      with self.assertRaises(ValueError):
+        initializers.random_matrix_batch(case[0], tt_rank=case[1],
+                                         batch_size=case[2])
+    for case in bad_cases:
+      with self.assertRaises(ValueError):
+        initializers.matrix_batch_with_random_cores(case[0], tt_rank=case[1],
+                                                    batch_size=case[2])
+    with self.assertRaises(NotImplementedError):
+      initializers.random_matrix_batch([[1, 2, 3], [1, 2, 3]], mean=1.0)
+
+  def testGlorot(self):
+    shapes = [[1, 2, 3], [[1, 2], [1, 2, 3]], [[-1, 2, 3], [1, 2, 3]],
+              [[0.5, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]],
+              [[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]],
+              [[1, 2, 3], [1, 2, 3]]]
+    tt_ranks = [2, 2, 2, 2, -1, [[[1]]], [2.5, 3]]
+    bad_cases = zip(shapes, tt_ranks)
+    for case in bad_cases:
+      with self.assertRaises(ValueError):
+        initializers.glorot_initializer(case[0], tt_rank=case[1])
+
+  def testHe(self):
+    shapes = [[1, 2, 3], [[1, 2], [1, 2, 3]], [[-1, 2, 3], [1, 2, 3]],
+              [[0.5, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]],
+              [[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]],
+              [[1, 2, 3], [1, 2, 3]]]
+    tt_ranks = [2, 2, 2, 2, -1, [[[1]]], [2.5, 3]]
+    bad_cases = zip(shapes, tt_ranks)
+    for case in bad_cases:
+      with self.assertRaises(ValueError):
+        initializers.he_initializer(case[0], tt_rank=case[1])
+
+  def testLecun(self):
+    shapes = [[1, 2, 3], [[1, 2], [1, 2, 3]], [[-1, 2, 3], [1, 2, 3]],
+              [[0.5, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]],
+              [[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]],
+              [[1, 2, 3], [1, 2, 3]]]
+    tt_ranks = [2, 2, 2, 2, -1, [[[1]]], [2.5, 3]]
+    bad_cases = zip(shapes, tt_ranks)
+    for case in bad_cases:
+      with self.assertRaises(ValueError):
+        initializers.lecun_initializer(case[0], tt_rank=case[1])
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/t3f/kronecker.py b/t3f/kronecker.py
index 27b8c94b..0910c364 100644
--- a/t3f/kronecker.py
+++ b/t3f/kronecker.py
@@ -1,8 +1,9 @@
-import numpy as np
 import tensorflow as tf
 
-from tensor_train import TensorTrain
-import ops
+from t3f.tensor_train import TensorTrain
+from t3f.tensor_train_batch import TensorTrainBatch
+from t3f import ops
+
 
 def determinant(kron_a):
   """Computes the determinant of a given Kronecker-factorized matrix. 
@@ -10,12 +11,13 @@ def determinant(kron_a):
   Note, that this method can suffer from overflow.
 
   Args:
-    kron_a: `TensorTrain` object containing a matrix of size N x N, 
-    factorized into a Kronecker product of square matrices (all 
-    tt-ranks are 1 and all tt-cores are square). 
+    kron_a: `TensorTrain` or `TensorTrainBatch` object containing a matrix or a
+    batch of matrices of size N x N, factorized into a Kronecker product of 
+    square matrices (all tt-ranks are 1 and all tt-cores are square). 
   
   Returns:
-    Number, the determinant of the given matrix.
+    A number or a Tensor with numbers for each element in the batch.
+    The determinant of the given matrix.
 
   Raises:
     ValueError if the tt-cores of the provided matrix are not square,
@@ -38,12 +40,16 @@ def determinant(kron_a):
       raise ValueError('The argument should be a Kronecker product of square '
                        'matrices (tt-cores must be square)')
       
+  is_batch = isinstance(kron_a, TensorTrainBatch)
   pows = tf.cast(tf.reduce_prod(i_shapes), kron_a.dtype)
   cores = kron_a.tt_cores
   det = 1
   for core_idx in range(kron_a.ndims()):
     core = cores[core_idx]
-    core_det = tf.matrix_determinant(core[0, :, :, 0])
+    if is_batch:
+      core_det = tf.matrix_determinant(core[:, 0, :, :, 0])
+    else:
+      core_det = tf.matrix_determinant(core[0, :, :, 0])
     core_pow = pows / i_shapes[core_idx].value
 
     det *= tf.pow(core_det, core_pow)
@@ -54,12 +60,13 @@ def slog_determinant(kron_a):
   """Computes the sign and log-det of a given Kronecker-factorized matrix.
 
   Args:
-    kron_a: `TensorTrain` object containing a matrix of size N x N, 
-    factorized into a Kronecker product of square matrices (all 
-    tt-ranks are 1 and all tt-cores are square).
-
+    kron_a: `TensorTrain` or `TensorTrainBatch` object containing a matrix or a
+    batch of matrices of size N x N, factorized into a Kronecker product of 
+    square matrices (all tt-ranks are 1 and all tt-cores are square). 
+  
   Returns:
-    Two numbers, sign of the determinant and the log-determinant of the given 
+    Two number or two Tensor with numbers for each element in the batch.
+    Sign of the determinant and the log-determinant of the given 
     matrix. If the determinant is zero, then sign will be 0 and logdet will be
     -Inf. In all cases, the determinant is equal to sign * np.exp(logdet).
 
@@ -83,33 +90,39 @@ def slog_determinant(kron_a):
     if i_shapes != j_shapes:
       raise ValueError('The argument should be a Kronecker product of square '
                        'matrices (tt-cores must be square)')
+
+  is_batch = isinstance(kron_a, TensorTrainBatch)
   pows = tf.cast(tf.reduce_prod(i_shapes), kron_a.dtype)
-                                                          
   logdet = 0.
   det_sign = 1.
+
   for core_idx in range(kron_a.ndims()):
     core = kron_a.tt_cores[core_idx]
-    core_det = tf.matrix_determinant(core[0, :, :, 0])
+    if is_batch:
+      core_det = tf.matrix_determinant(core[:, 0, :, :, 0])
+    else:
+      core_det = tf.matrix_determinant(core[0, :, :, 0])
     core_abs_det = tf.abs(core_det)
     core_det_sign = tf.sign(core_det)
     core_pow = pows / i_shapes[core_idx].value
     logdet += tf.log(core_abs_det) * core_pow
     det_sign *= core_det_sign**(core_pow)
-  
-  
   return det_sign, logdet
 
+
 def inv(kron_a):
   """Computes the inverse of a given Kronecker-factorized matrix.
 
   Args:
-    kron_a: `TensorTrain` object containing a matrix of size N x N, 
-    factorized into a Kronecker product of square matrices (all 
-    tt-ranks are 1 and all tt-cores are square). All the cores
-    must be invertable.
+    kron_a: `TensorTrain` or `TensorTrainBatch` object containing a matrix or a
+    batch of matrices of size N x N, factorized into a Kronecker product of 
+    square matrices (all tt-ranks are 1 and all tt-cores are square). 
 
   Returns:
-    `TensorTrain` object, containing a TT-matrix of size N x N. 
+    `TensorTrain` object containing a TT-matrix of size N x N if the argument is
+      `TensorTrain`
+    `TensorTrainBatch` object, containing TT-matrices of size N x N if the 
+      argument is `TensorTrainBatch`  
   
   Raises:
     ValueError if the tt-cores of the provided matrix are not square,
@@ -132,27 +145,40 @@ def inv(kron_a):
       raise ValueError('The argument should be a Kronecker product of square '
                        'matrices (tt-cores must be square)')
 
+  is_batch = isinstance(kron_a, TensorTrainBatch)
   inv_cores = []
   for core_idx in range(kron_a.ndims()):
     core = kron_a.tt_cores[core_idx]
-    core_inv = tf.matrix_inverse(core[0, :, :, 0])
-    inv_cores.append(tf.expand_dims(tf.expand_dims(core_inv, 0), -1))
+    if is_batch:
+      core_inv = tf.matrix_inverse(core[:, 0, :, :, 0])
+      core_inv = tf.expand_dims(tf.expand_dims(core_inv, 1), -1)
+    else:
+      core_inv = tf.matrix_inverse(core[0, :, :, 0])
+      core_inv = tf.expand_dims(tf.expand_dims(core_inv, 0), -1)
+    inv_cores.append(core_inv)
 
   res_ranks = kron_a.get_tt_ranks() 
   res_shape = kron_a.get_raw_shape()
-  return TensorTrain(inv_cores, res_shape, res_ranks) 
+  if is_batch:
+    return TensorTrainBatch(inv_cores, res_shape, res_ranks) 
+  else:
+    return TensorTrain(inv_cores, res_shape, res_ranks) 
+
 
 def cholesky(kron_a):
   """Computes the Cholesky decomposition of a given Kronecker-factorized matrix.
 
   Args:
-    kron_a: `TensorTrain` object containing a matrix of size N x N, 
-    factorized into a Kronecker product of square matrices (all 
-    tt-ranks are 1 and all tt-cores are square). All the cores
-    must be symmetric positive-definite.
+    kron_a: `TensorTrain` or `TensorTrainBatch` object containing a matrix or a
+    batch of matrices of size N x N, factorized into a Kronecker product of 
+    square matrices (all tt-ranks are 1 and all tt-cores are square). All the 
+    cores must be symmetric positive-definite.
 
   Returns:
-    `TensorTrain` object, containing a TT-matrix of size N x N. 
+    `TensorTrain` object containing a TT-matrix of size N x N if the argument is
+      `TensorTrain`
+    `TensorTrainBatch` object, containing TT-matrices of size N x N if the 
+      argument is `TensorTrainBatch`  
     
   Raises:
     ValueError if the tt-cores of the provided matrix are not square,
@@ -174,23 +200,34 @@ def cholesky(kron_a):
     if i_shapes != j_shapes:
       raise ValueError('The argument should be a Kronecker product of square '
                        'matrices (tt-cores must be square)')
+
+  is_batch = isinstance(kron_a, TensorTrainBatch)
   cho_cores = []
+
   for core_idx in range(kron_a.ndims()):
     core = kron_a.tt_cores[core_idx]
-    core_cho = tf.cholesky(core[0, :, :, 0])
-    cho_cores.append(tf.expand_dims(tf.expand_dims(core_cho, 0), -1))
+    if is_batch:
+      core_cho = tf.cholesky(core[:, 0, :, :, 0])
+      core_cho = tf.expand_dims(tf.expand_dims(core_cho, 1), -1)
+    else:
+      core_cho = tf.cholesky(core[0, :, :, 0])
+      core_cho = tf.expand_dims(tf.expand_dims(core_cho, 0), -1)
+    cho_cores.append(core_cho)
 
   res_ranks = kron_a.get_tt_ranks() 
   res_shape = kron_a.get_raw_shape()
-  return TensorTrain(cho_cores, res_shape, res_ranks) 
+  if is_batch:
+    return TensorTrainBatch(cho_cores, res_shape, res_ranks) 
+  else:
+    return TensorTrain(cho_cores, res_shape, res_ranks) 
 
 
 def _is_kron(tt_a):
   """Returns True if the argument is a Kronecker product matrix.
 
   Args:
-    tt_a: `TensorTrain` object
-
+    t_a: `TensorTrain` or `TensorTrainBatch` object.
+    
   Returns:
     bool
   """
diff --git a/t3f/kronecker_test.py b/t3f/kronecker_test.py
index b771f4f0..84a07940 100644
--- a/t3f/kronecker_test.py
+++ b/t3f/kronecker_test.py
@@ -1,29 +1,31 @@
 import numpy as np
 import tensorflow as tf
 
-import t3f
-import tensor_train
-import kronecker as kr
-import ops
+from t3f.tensor_train import TensorTrain
+from t3f.tensor_train_batch import TensorTrainBatch
+from t3f import ops
+from t3f import initializers
+from t3f import variables
+from t3f import kronecker as kr
 
 class KroneckerTest(tf.test.TestCase):
 
   def testIsKronNonKron(self):
     # Tests _is_kron on a non-Kronecker matrix
-    initializer = t3f.random_matrix(((2, 3), (3, 2)), tt_rank=2)
-    tt_mat = t3f.get_variable('tt_mat', initializer=initializer)
+    initializer = initializers.random_matrix(((2, 3), (3, 2)), tt_rank=2)
+    tt_mat = variables.get_variable('tt_mat', initializer=initializer)
     self.assertFalse(kr._is_kron(tt_mat))
            
   def testIsKronKron(self):
     # Tests _is_kron on a Kronecker matrix
-    initializer = t3f.random_matrix(((2, 3), (3, 2)), tt_rank=1)
-    kron_mat = t3f.get_variable('kron_mat', initializer=initializer)
+    initializer = initializers.random_matrix(((2, 3), (3, 2)), tt_rank=1)
+    kron_mat = variables.get_variable('kron_mat', initializer=initializer)
     self.assertTrue(kr._is_kron(kron_mat))
 
   def testDet(self):
     # Tests the determinant function
-    initializer = t3f.random_matrix(((2, 3, 2), (2, 3, 2)), tt_rank=1)
-    kron_mat = t3f.get_variable('kron_mat', initializer=initializer)
+    initializer = initializers.random_matrix(((2, 3, 2), (2, 3, 2)), tt_rank=1)
+    kron_mat = variables.get_variable('kron_mat', initializer=initializer)
     init_op = tf.global_variables_initializer()
     with self.test_session() as sess:
       sess.run(init_op)
@@ -38,12 +40,12 @@ def testSlogDet(self):
     # the current version is platform-dependent
     
     tf.set_random_seed(5) # negative derminant
-    initializer = t3f.random_matrix(((2, 3), (2, 3)), tt_rank=1)
-    kron_neg = t3f.get_variable('kron_neg', initializer=initializer)
+    initializer = initializers.random_matrix(((2, 3), (2, 3)), tt_rank=1)
+    kron_neg = variables.get_variable('kron_neg', initializer=initializer)
   
     tf.set_random_seed(1) # positive determinant
-    initializer = t3f.random_matrix(((2, 3), (2, 3)), tt_rank=1)
-    kron_pos = t3f.get_variable('kron_pos', initializer=initializer)
+    initializer = initializers.random_matrix(((2, 3), (2, 3)), tt_rank=1)
+    kron_pos = variables.get_variable('kron_pos', initializer=initializer)
 
     init_op = tf.global_variables_initializer()
     with self.test_session() as sess:
@@ -62,8 +64,8 @@ def testSlogDet(self):
 
   def testInv(self):
     # Tests the inv function
-    initializer = t3f.random_matrix(((2, 3, 2), (2, 3, 2)), tt_rank=1)
-    kron_mat = t3f.get_variable('kron_mat', initializer=initializer)
+    initializer = initializers.random_matrix(((2, 3, 2), (2, 3, 2)), tt_rank=1)
+    kron_mat = variables.get_variable('kron_mat', initializer=initializer)
     init_op = tf.global_variables_initializer()
     with self.test_session() as sess:
       sess.run(init_op)
@@ -81,10 +83,10 @@ def testCholesky(self):
     K_1 = L_1.dot(L_1.T)
     K_2 = L_2.dot(L_2.T)
     K = np.kron(K_1, K_2)
-    initializer = tensor_train.TensorTrain([K_1[None, :, :, None], 
+    initializer = TensorTrain([K_1[None, :, :, None], 
                                             K_2[None, :, :, None]], 
                                             tt_ranks=7*[1])
-    kron_mat = t3f.get_variable('kron_mat', initializer=initializer)
+    kron_mat = variables.get_variable('kron_mat', initializer=initializer)
     init_op = tf.global_variables_initializer()
     with self.test_session() as sess:
       sess.run(init_op)
@@ -92,6 +94,89 @@ def testCholesky(self):
       actual = ops.full(kr.cholesky(kron_mat)).eval()
       self.assertAllClose(desired, actual)
 
+class BatchKroneckerTest(tf.test.TestCase):
+
+  def testIsKronNonKron(self):
+    # Tests _is_kron on a non-Kronecker matrix batch
+    initializer = initializers.random_matrix_batch(((2, 3), (3, 2)), tt_rank=2, 
+                                                        batch_size=3)
+    tt_mat_batch = variables.get_variable('tt_mat_batch', 
+                                                        initializer=initializer)
+    self.assertFalse(kr._is_kron(tt_mat_batch))
+           
+  def testIsKronKron(self):
+    # Tests _is_kron on a Kronecker matrix batch
+    initializer = initializers.random_matrix_batch(((2, 3), (3, 2)), tt_rank=1, 
+                                                        batch_size=3)
+    kron_mat_batch = variables.get_variable('kron_mat_batch', 
+                                                        initializer=initializer)
+    self.assertTrue(kr._is_kron(kron_mat_batch))
+
+  def testDet(self):
+    # Tests the determinant function
+    initializer = initializers.random_matrix_batch(((2, 3, 2), (2, 3, 2)), 
+                                                        tt_rank=1, batch_size=3)
+    kron_mat_batch = variables.get_variable('kron_mat_batch', 
+                                                        initializer=initializer)
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      desired = tf.matrix_determinant(ops.full(kron_mat_batch)).eval()
+      actual = kr.determinant(kron_mat_batch).eval()
+      self.assertAllClose(desired, actual)
+
+  def testSlogDet(self):
+    # Tests the slog_determinant function
+    
+    tf.set_random_seed(1) # negative and positive determinants
+    initializer = initializers.random_matrix_batch(((2, 3), (2, 3)), tt_rank=1, 
+                                                        batch_size=3)
+    kron_mat_batch = variables.get_variable('kron_mat_batch', 
+                                                        initializer=initializer)
+  
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+       # negative derminant
+      sess.run(init_op)
+      desired_sign, desired_det = np.linalg.slogdet(
+                                                ops.full(kron_mat_batch).eval())
+      actual_sign, actual_det = sess.run(kr.slog_determinant(kron_mat_batch))
+      self.assertAllEqual(desired_sign, actual_sign)
+      self.assertAllClose(desired_det, actual_det)
+
+  def testInv(self):
+    # Tests the inv function
+    initializer = initializers.random_matrix_batch(((2, 3, 2), (2, 3, 2)), 
+                                                        tt_rank=1, batch_size=3)
+    kron_mat_batch = variables.get_variable('kron_mat_batch', 
+                                                        initializer=initializer)
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      desired = np.linalg.inv(ops.full(kron_mat_batch).eval())
+      actual = ops.full(kr.inv(kron_mat_batch)).eval()
+      self.assertAllClose(desired, actual, atol=1e-4)
+    
+  def testCholesky(self):
+    # Tests the cholesky function
+    np.random.seed(8)
+
+    # generating two symmetric positive-definite tt-cores
+    L_1 = np.tril(np.random.normal(scale=2., size=(4, 2, 2)))
+    L_2 = np.tril(np.random.normal(scale=2., size=(4, 3, 3)))
+    K_1 = np.einsum('ijk,ilk->ijl', L_1, L_1)
+    K_2 = np.einsum('ijk,ilk->ijl', L_2, L_2)
+    initializer = TensorTrainBatch([K_1[:, None, :, :, None],
+                                    K_2[:, None, :, :, None]], tt_ranks=7*[1])
+    kron_mat_batch = variables.get_variable('kron_mat_batch', 
+                                                        initializer=initializer)
+    init_op = tf.global_variables_initializer()
+    with self.test_session() as sess:
+      sess.run(init_op)
+      desired = np.linalg.cholesky(ops.full(kron_mat_batch).eval())
+      actual = ops.full(kr.cholesky(kron_mat_batch)).eval()
+      self.assertAllClose(desired, actual)
+
 
 if __name__ == "__main__":
   tf.test.main()
diff --git a/t3f/ops.py b/t3f/ops.py
index fffe59f0..0ea892db 100644
--- a/t3f/ops.py
+++ b/t3f/ops.py
@@ -1,14 +1,16 @@
 import tensorflow as tf
-
-from tensor_train_base import TensorTrainBase
-from tensor_train import TensorTrain
-from tensor_train_batch import TensorTrainBatch
-import shapes
-import utils
-
+import numpy as np
+from t3f.tensor_train_base import TensorTrainBase
+from t3f.tensor_train import TensorTrain
+from t3f.tensor_train_batch import TensorTrainBatch
+from t3f import shapes
+from t3f import utils
+from t3f import decompositions
+from t3f import initializers
 
 # TODO: add complexities to the comments.
 
+
 def full(tt):
   """Converts a TensorTrain into a regular tensor or matrix (tf.Tensor).
 
@@ -313,7 +315,7 @@ def tt_tt_flat_inner(tt_a, tt_b):
   """Inner product between two TT-tensors or TT-matrices along all axis.
 
   The shapes of tt_a and tt_b should coincide.
-
+  
   Args:
     tt_a: `TensorTrain` or `TensorTrainBatch` object
     tt_b: `TensorTrain` or `TensorTrainBatch` object
@@ -326,6 +328,17 @@ def tt_tt_flat_inner(tt_a, tt_b):
     ValueError if the arguments are not `TensorTrain` objects, have different
       number of TT-cores, different underlying shape, or if you are trying to
       compute inner product between a TT-matrix and a TT-tensor.
+      
+  Complexity:
+    Multiplying two single TT-objects is O(d r^3 n) where d is the number of
+      TT-cores (tt_a.ndims()), r is the largest TT-rank
+        max(tt_a.get_tt_rank(), tt_b.get_tt_rank())
+      and n is the size of the axis dimension, e.g.
+        for a tensor of size 4 x 4 x 4, n is 4;
+        for a 9 x 64 matrix of raw shape (3, 3, 3) x (4, 4, 4) n is 12
+      A more precise complexity is O(d r1 r2 n max(r1, r2)) where
+        r1 is the largest TT-rank of tt_a and r2 is the largest TT-rank of tt_b.
+    The complexity of this operation for batch input is O(batch_size d r^3 n).
   """
   if not isinstance(tt_a, TensorTrainBase) or not isinstance(tt_b,
                                                              TensorTrainBase):
@@ -414,10 +427,10 @@ def tt_sparse_flat_inner(tt_a, sparse_b):
     num_elements = sparse_b.indices.get_shape()[0]
   else:
     num_elements = tf.shape(sparse_b.indices)[0]
-  tt_a_elements = tf.ones((num_elements, 1, 1))
   a_shape = shapes.lazy_raw_shape(tt_a)
   a_ranks = shapes.lazy_tt_ranks(tt_a)
   if tt_a.is_tt_matrix():
+    tt_a_elements = tf.ones((num_elements, 1, 1))
     # TODO: use t3f.shape is safer??
     tensor_shape = tt_a.get_raw_shape()
     row_idx_linear = tf.cast(sparse_b.indices[:, 0], tf.int64)
@@ -425,9 +438,6 @@ def tt_sparse_flat_inner(tt_a, sparse_b):
     col_idx_linear = tf.cast(sparse_b.indices[:, 1], tf.int64)
     col_idx = utils.unravel_index(col_idx_linear, tf.cast(tensor_shape[1], tf.int64))
     for core_idx in range(tt_a.ndims()):
-      # TODO: probably a very slow way to do it, wait for a reasonable gather
-      # implementation
-      # https://github.com/tensorflow/tensorflow/issues/206
       curr_core = tt_a.tt_cores[core_idx]
       left_rank = a_ranks[core_idx]
       right_rank = a_ranks[core_idx + 1]
@@ -438,20 +448,13 @@ def tt_sparse_flat_inner(tt_a, sparse_b):
       # Ravel multiindex (row_idx[:, core_idx], col_idx[:, core_idx]) into
       # a linear index to use tf.gather that supports only first dimensional
       # gather.
+      # TODO: use gather_nd instead.
       curr_elements_idx = row_idx[:, core_idx] * tensor_shape[1][core_idx]
       curr_elements_idx += col_idx[:, core_idx]
       core_slices = tf.gather(curr_core, curr_elements_idx)
       tt_a_elements = tf.matmul(tt_a_elements, core_slices)
   else:
-    for core_idx in range(tt_a.ndims()):
-      curr_elements_idx = sparse_b.indices[:, core_idx]
-      # TODO: probably a very slow way to do it, wait for a reasonable gather
-      # implementation
-      # https://github.com/tensorflow/tensorflow/issues/206
-      curr_core = tt_a.tt_cores[core_idx]
-      curr_core = tf.transpose(curr_core, (1, 0, 2))
-      core_slices = tf.gather(curr_core, curr_elements_idx)
-      tt_a_elements = tf.matmul(tt_a_elements, core_slices)
+    tt_a_elements = gather_nd(tt_a, sparse_b.indices)
   tt_a_elements = tf.reshape(tt_a_elements, (1, -1))
   sparse_b_elements = tf.reshape(sparse_b.values, (-1, 1))
   result = tf.matmul(tt_a_elements, sparse_b_elements)
@@ -730,98 +733,243 @@ def add(tt_a, tt_b):
 def multiply(tt_left, right):
   """Returns a TensorTrain corresponding to element-wise product tt_left * right.
 
-  The shapes of tt_left and right should coincide.
+  Supports broadcasting:
+    multiply(TensorTrainBatch, TensorTrain) returns TensorTrainBatch consisting
+    of element-wise products of TT in TensorTrainBatch and TensorTrain
+
+    multiply(TensorTrainBatch_a, TensorTrainBatch_b) returns TensorTrainBatch
+    consisting of element-wise products of TT in TensorTrainBatch_a and
+    TT in TensorTrainBatch_b
 
+    Batch sizes should support broadcasting
   Args:
-    tt_left: `TensorTrain`, TT-tensor or TT-matrix
-    right: `TensorTrain`, TT-tensor or TT-matrix, OR a number.
+    tt_left: `TensorTrain` OR `TensorTrainBatch`
+    right: `TensorTrain` OR `TensorTrainBatch` OR a number.
 
   Returns
-    a `TensorTrain` object corresponding to the element-wise product of the
-    arguments.
+    a `TensorTrain` or `TensorTrainBatch` object corresponding to the
+    element-wise product of the arguments.
 
   Raises
-    ValueError if the arguments shapes do not coincide.
+    ValueError if the arguments shapes do not coincide or broadcasting is not
+    possible.
   """
+
+  is_left_batch = isinstance(tt_left, TensorTrainBatch)
+  is_right_batch = isinstance(right, TensorTrainBatch)
+
+  is_batch_case = is_left_batch or is_right_batch
+  ndims = tt_left.ndims()
   if not isinstance(right, TensorTrainBase):
     # Assume right is a number, not TensorTrain.
+    # To squash right uniformly across TT-cores we pull its absolute value
+    # and raise to the power 1/ndims. First TT-core is multiplied by the sign
+    # of right.
     tt_cores = list(tt_left.tt_cores)
-    tt_cores[0] = right * tt_cores[0]
+    fact = tf.pow(tf.cast(tf.abs(right), tt_left.dtype), 1.0 / ndims)
+    sign = tf.cast(tf.sign(right), tt_left.dtype)
+    for i in range(len(tt_cores)):
+      tt_cores[i] = fact * tt_cores[i]
+
+    tt_cores[0] = tt_cores[0] * sign
     out_ranks = tt_left.get_tt_ranks()
+    if is_left_batch:
+        out_batch_size = tt_left.batch_size
   else:
-    ndims = tt_left.ndims()
+
     if tt_left.is_tt_matrix() != right.is_tt_matrix():
       raise ValueError('The arguments should be both TT-tensors or both '
                        'TT-matrices')
 
-    if tt_left.get_shape() != right.get_shape():
+    if tt_left.get_raw_shape() != right.get_raw_shape():
       raise ValueError('The arguments should have the same shape.')
 
+    out_batch_size = 1
+    dependencies = []
+    can_determine_if_broadcast = True
+    if is_left_batch and is_right_batch:
+      if tt_left.batch_size is None and right.batch_size is None:
+        can_determine_if_broadcast = False
+      elif tt_left.batch_size is None and right.batch_size is not None:
+        if right.batch_size > 1:
+            can_determine_if_broadcast = False
+      elif tt_left.batch_size is not None and right.batch_size is None:
+        if tt_left.batch_size > 1:
+            can_determine_if_broadcast = False
+
+    if not can_determine_if_broadcast:
+      # Cannot determine if broadcasting is needed. Avoid broadcasting and
+      # assume elementwise multiplication AND add execution time assert to print
+      # a better error message if the batch sizes turn out to be different.
+
+      message = ('The batch sizes were unknown on compilation stage, so '
+                 'assumed elementwise multiplication (i.e. no broadcasting). '
+                 'Now it seems that they are different after all :')
+
+      data = [message, shapes.lazy_batch_size(tt_left), ' x ',
+              shapes.lazy_batch_size(right)]
+      bs_eq = tf.assert_equal(shapes.lazy_batch_size(tt_left),
+                              shapes.lazy_batch_size(right), data=data)
+
+      dependencies.append(bs_eq)
+
+    do_broadcast = shapes.is_batch_broadcasting_possible(tt_left, right)
+    if not can_determine_if_broadcast:
+      # Assume elementwise multiplication if broadcasting cannot be determined
+      # on compilation stage.
+      do_broadcast = False
+    if not do_broadcast and can_determine_if_broadcast:
+      raise ValueError('The batch sizes are different and not 1, broadcasting '
+                       'is not available.')
+
     a_ranks = shapes.lazy_tt_ranks(tt_left)
     b_ranks = shapes.lazy_tt_ranks(right)
     shape = shapes.lazy_raw_shape(tt_left)
 
+    output_str = ''
+    bs_str_left = ''
+    bs_str_right = ''
+
+    if is_batch_case:
+      if is_left_batch and is_right_batch:
+        # Both arguments are batches of equal size.
+        if tt_left.batch_size == right.batch_size or not can_determine_if_broadcast:
+          bs_str_left = 'n'
+          bs_str_right = 'n'
+          output_str = 'n'
+          if not can_determine_if_broadcast:
+            out_batch_size = None
+          else:
+            out_batch_size = tt_left.batch_size
+        else:
+          # Broadcasting (e.g batch_sizes are 1 and n>1).
+          bs_str_left = 'n'
+          bs_str_right = 'm'
+          output_str = 'nm'
+          if tt_left.batch_size is None or tt_left.batch_size > 1:
+            out_batch_size = tt_left.batch_size
+          else:
+            out_batch_size = right.batch_size
+      else:
+        # One of the arguments is TensorTrain.
+        if is_left_batch:
+          bs_str_left = 'n'
+          bs_str_right = ''
+          out_batch_size = tt_left.batch_size
+        else:
+          bs_str_left = ''
+          bs_str_right = 'n'
+          out_batch_size = right.batch_size
+        output_str = 'n'
+
     is_matrix = tt_left.is_tt_matrix()
     tt_cores = []
+
     for core_idx in range(ndims):
       a_core = tt_left.tt_cores[core_idx]
       b_core = right.tt_cores[core_idx]
       left_rank = a_ranks[core_idx] * b_ranks[core_idx]
       right_rank = a_ranks[core_idx + 1] * b_ranks[core_idx + 1]
       if is_matrix:
-        curr_core = tf.einsum('aijb,cijd->acijbd', a_core, b_core)
-        curr_core = tf.reshape(curr_core, (left_rank, shape[0][core_idx],
-                                           shape[1][core_idx], right_rank))
+        with tf.control_dependencies(dependencies):
+          curr_core = tf.einsum('{0}aijb,{1}cijd->{2}acijbd'.format(bs_str_left,
+                                bs_str_right, output_str), a_core, b_core)
+          curr_core = tf.reshape(curr_core, (-1, left_rank,
+                                             shape[0][core_idx],
+                                             shape[1][core_idx],
+                                             right_rank))
+          if not is_batch_case:
+              curr_core = tf.squeeze(curr_core, axis=0)
       else:
-        curr_core = tf.einsum('aib,cid->acibd', a_core, b_core)
-        curr_core = tf.reshape(curr_core, (left_rank, shape[0][core_idx],
-                                           right_rank))
+        with tf.control_dependencies(dependencies):
+          curr_core = tf.einsum('{0}aib,{1}cid->{2}acibd'.format(bs_str_left,
+                                bs_str_right, output_str), a_core, b_core)
+          curr_core = tf.reshape(curr_core, (-1, left_rank,
+                                 shape[0][core_idx], right_rank))
+          if not is_batch_case:
+            curr_core = tf.squeeze(curr_core, axis=0)
+
       tt_cores.append(curr_core)
 
     combined_ranks = zip(tt_left.get_tt_ranks(), right.get_tt_ranks())
     out_ranks = [a * b for a, b in combined_ranks]
-  return TensorTrain(tt_cores, tt_left.get_raw_shape(), out_ranks)
 
+  if not is_batch_case:
+    return TensorTrain(tt_cores, tt_left.get_raw_shape(), out_ranks)
+  else:
+    return TensorTrainBatch(tt_cores, tt_left.get_raw_shape(), out_ranks,
+                            batch_size=out_batch_size)
+
+def frobenius_norm_squared(tt, differentiable=False):
+  """Frobenius norm squared of `TensorTrain` or of each TT in `TensorTrainBatch`.
 
-def frobenius_norm_squared(tt):
-  """Frobenius norm squared of a TensorTrain (sum of squares of all elements).
+  Frobenius norm squared is the sum of squares of all elements in a tensor.
 
   Args:
-    tt: `TensorTrain` object
+    tt: `TensorTrain` or `TensorTrainBatch` object
+    differentiable: bool, whether to use a differentiable implementation
+      or a fast and stable implementation based on QR decomposition.
 
   Returns
-    a number
-    sum of squares of all elements in `tt`
+    a number which is the Frobenius norm squared of `tt`, if it is `TensorTrain`
+    OR
+    a Tensor of size tt.batch_size, consisting of the Frobenius norms squared of
+    each TensorTrain in `tt`, if it is `TensorTrainBatch`
   """
-  if tt.is_tt_matrix():
-    running_prod = tf.einsum('aijb,cijd->bd', tt.tt_cores[0], tt.tt_cores[0])
-  else:
-    running_prod = tf.einsum('aib,cid->bd', tt.tt_cores[0], tt.tt_cores[0])
-
-  for core_idx in range(1, tt.ndims()):
-    curr_core = tt.tt_cores[core_idx]
+  if differentiable:
+    if hasattr(tt, 'batch_size'):
+        bs_str = 'n'
+    else:
+        bs_str = ''
     if tt.is_tt_matrix():
-      running_prod = tf.einsum('ac,aijb,cijd->bd', running_prod, curr_core,
-                               curr_core)
+      running_prod = tf.einsum('{0}aijb,{0}cijd->{0}bd'.format(bs_str),
+                               tt.tt_cores[0], tt.tt_cores[0])
+    else:
+      running_prod = tf.einsum('{0}aib,{0}cid->{0}bd'.format(bs_str),
+                               tt.tt_cores[0], tt.tt_cores[0])
+
+    for core_idx in range(1, tt.ndims()):
+      curr_core = tt.tt_cores[core_idx]
+      if tt.is_tt_matrix():
+        running_prod = tf.einsum('{0}ac,{0}aijb,{0}cijd->{0}bd'.format(bs_str),
+                                 running_prod, curr_core, curr_core)
+      else:
+        running_prod = tf.einsum('{0}ac,{0}aib,{0}cid->{0}bd'.format(bs_str),
+                                 running_prod, curr_core, curr_core)
+
+    return tf.squeeze(running_prod, [-1, -2])
+
+  else:
+    orth_tt = decompositions.orthogonalize_tt_cores(tt, left_to_right=True)
+    # All the cores of orth_tt except the last one are orthogonal, hence
+    # the Frobenius norm of orth_tt equals to the norm of the last core.
+    if hasattr(tt, 'batch_size'):
+      batch_size = shapes.lazy_batch_size(tt)
+      last_core = tf.reshape(orth_tt.tt_cores[-1], (batch_size, -1))
+      return tf.norm(last_core, axis=1) ** 2
     else:
-      running_prod = tf.einsum('ac,aib,cid->bd', running_prod, curr_core,
-                               curr_core)
-  return running_prod[0, 0]
+      return tf.norm(orth_tt.tt_cores[-1]) ** 2
 
 
-def frobenius_norm(tt, epsilon=1e-5):
-  """Frobenius norm of a TensorTrain (sqrt of the sum of squares of all elements).
+def frobenius_norm(tt, epsilon=1e-5, differentiable=False):
+  """Frobenius norm of `TensorTrain` or of each TT in `TensorTrainBatch`
+
+  Frobenius norm is the sqrt of the sum of squares of all elements in a tensor.
 
   Args:
-    tt: `TensorTrain` object
+    tt: `TensorTrain` or `TensorTrainBatch` object
     epsilon: the function actually computes sqrt(norm_squared + epsilon) for
       numerical stability (e.g. gradient of sqrt at zero is inf).
+    differentiable: bool, whether to use a differentiable implementation or
+      a fast and stable implementation based on QR decomposition.
 
   Returns
-    a number
-    sqrt of the sum of squares of all elements in `tt`
+    a number which is the Frobenius norm of `tt`, if it is `TensorTrain`
+    OR
+    a Tensor of size tt.batch_size, consisting of the Frobenius norms of
+    each TensorTrain in `tt`, if it is `TensorTrainBatch`
   """
-  return tf.sqrt(frobenius_norm_squared(tt) + epsilon)
+  return tf.sqrt(frobenius_norm_squared(tt, differentiable) + epsilon)
 
 
 def transpose(tt_matrix):
@@ -862,24 +1010,23 @@ def transpose(tt_matrix):
 
 
 def quadratic_form(A, b, c):
-  """Computes the quadratic form b^t A c where A is a TT-matrix (or a batch).
+  """Quadratic form b^t A c; A is a TT-matrix, b and c can be batches.
 
   Args:
-    A: `TensorTrain` object containing a TT-matrix or `TensorTrainBatch`
-      with a batch of TT-matrices.
-    b: `TensorTrain` object containing a TT-vector or `TensorTrainBatch`
-      with a batch of TT-vectors.
-    c: `TensorTrain` object containing a TT-vector or `TensorTrainBatch`
-      with a batch of TT-vectors.
+    A: `TensorTrain` object containing a TT-matrix of size N x M.
+    b: `TensorTrain` object containing a TT-matrix of size N x 1
+      or `TensorTrainBatch` with a batch of TT-matrices of size N x 1.
+    c: `TensorTrain` object containing a TT-matrix of size M x 1
+      or `TensorTrainBatch` with a batch of TT-matrices of size M x 1.
 
   Returns:
     A number, the value of the quadratic form if all the arguments are
       `TensorTrain`s.
-    OR tf.tensor of size batch_size if at least one of the arguments is
+    OR tf.Tensor of size batch_size if at least one of the arguments is
       `TensorTrainBatch`
 
   Raises:
-    ValueError if the argument is not a TT-matrix or if the shapes are
+    ValueError if the arguments are not TT-matrices or if the shapes are
       not consistent.
   """
   if not isinstance(A, TensorTrainBase) or not A.is_tt_matrix():
@@ -891,8 +1038,39 @@ def quadratic_form(A, b, c):
   if not isinstance(c, TensorTrainBase) or not c.is_tt_matrix():
     raise ValueError('The arguments should be a TT-matrix.')
 
-  # TODO: make a more efficient implementation taylored for this case.
-  return flat_inner(A, tt_tt_matmul(b, transpose(c)))
+  b_is_batch = isinstance(b, TensorTrainBatch)
+  c_is_batch = isinstance(b, TensorTrainBatch)
+  b_bs_str = 'p' if b_is_batch else ''
+  c_bs_str = 'p' if c_is_batch else ''
+  out_bs_str = 'p' if b_is_batch or c_is_batch else ''
+
+  ndims = A.ndims()
+  curr_core_1 = b.tt_cores[0]
+  curr_core_2 = c.tt_cores[0]
+  curr_matrix_core = A.tt_cores[0]
+  # We enumerate the dummy dimension (that takes 1 value) with `k`.
+  # You may think that using two different k would be faster, but in my
+  # experience it's even a little bit slower (but neglectable in general).
+  einsum_str = '{0}aikb,cijd,{1}ejkf->{2}bdf'.format(b_bs_str, c_bs_str,
+                                                     out_bs_str)
+  res = tf.einsum(einsum_str, curr_core_1, curr_matrix_core, curr_core_2)
+  for core_idx in range(1, ndims):
+    curr_core_1 = b.tt_cores[core_idx]
+    curr_core_2 = c.tt_cores[core_idx]
+    curr_matrix_core = A.tt_cores[core_idx]
+    einsum_str = '{2}ace,{0}aikb,cijd,{1}ejkf->{2}bdf'.format(b_bs_str,
+                                                              c_bs_str,
+                                                              out_bs_str)
+    res = tf.einsum(einsum_str, res, curr_core_1,
+                    curr_matrix_core, curr_core_2)
+
+  # Squeeze to make the result a number instead of 1 x 1 for NON batch case and
+  # to make the result a tensor of size
+  #   batch_size
+  # instead of
+  #   batch_size x 1 x 1
+  # in the batch case.
+  return tf.squeeze(res)
 
 
 def cast(tt_a, dtype):
@@ -900,20 +1078,127 @@ def cast(tt_a, dtype):
 
   Args:
     tt_a: `TensorTrain` object.
-    dtype: The destination type. 
+    dtype: The destination type.
 
   Raises:
     TypeError: If `tt_a` cannot be cast to the `dtype`.
-    ValueError: If `tt_a` is not a `TensorTrain`
+    ValueError: If `tt_a` is not a `TensorTrain` or `TensorTrainBatch`.
   """
-
-  if not isinstance(tt_a, TensorTrain):
-    raise ValueError('Argument should be a TensorTrain')
-
   res_cores = []
   cores = tt_a.tt_cores
   for core_idx in range(tt_a.ndims()):
     res_cores.append(tf.cast(cores[core_idx], dtype))
   res_shape = tt_a.get_raw_shape()
   res_ranks = tt_a.get_tt_ranks()
-  return TensorTrain(res_cores, res_shape, res_ranks)
+  if isinstance(tt_a, TensorTrain):
+    return TensorTrain(res_cores, res_shape, res_ranks)
+  elif isinstance(tt_a, TensorTrainBatch):
+    return TensorTrainBatch(res_cores, res_shape, res_ranks, tt_a.batch_size)
+  else:
+    raise ValueError('Unsupported type of input "%s", should be TensorTrain or '
+                     'TensorTrainBatch.' % tt_a)
+
+
+def gather_nd(tt, indices):
+  """out[i] = tt[indices[i, 0], indices[i, 1], ...]
+
+  Equivalent to
+      tf.gather_nd(t3f.full(tt), indices)
+    but much faster, since it does not materialize the full tensor.
+
+  For batches of TT works indices should include the batch dimension as well.
+
+  Args:
+    tt: `TensorTrain` or `TensorTrainBatch` object representing a tensor
+      (TT-matrices are not implemented yet)
+    indices: numpy array, tf.Tensor, placeholder with 2 or more dimensions.
+      The last dimension indices.shape[-1] should be equal to the numbers of
+      dimensions in TT:
+        indices.shape[-1] = tt.ndims for `TensorTrain`
+        indices.shape[-1] = tt.ndims + 1 for `TensorTrainBatch`
+
+  Returns:
+    tf.Tensor with elements specified by indices.
+
+  Raises:
+    ValueError if `indices` have wrong shape.
+    NotImplementedError if `tt` is a TT-matrix.
+  """
+  if tt.is_tt_matrix():
+    raise NotImplementedError('gather_nd doesnt support TT-matrices yet '
+                              '(got %s)' % tt)
+  indices = tf.convert_to_tensor(indices)
+  if isinstance(tt, TensorTrainBatch):
+    if indices.get_shape()[-1] != tt.ndims() + 1:
+      raise ValueError('The last dimension of indices (%d) should have '
+                       'the same size as the number of dimensions in the tt '
+                       'object (%d) + 1 (for the batch dimension).' %
+                       (indices.get_shape()[-1], tt.ndims()))
+  else:
+    if indices.get_shape()[-1] != tt.ndims():
+      raise ValueError('The last dimension of indices (%d) should have '
+                       'the same size as the number of dimensions in the tt '
+                       'object (%d).' % (indices.get_shape()[-1], tt.ndims()))
+  tt_elements = tf.ones(tf.shape(indices)[:-1])
+  tt_elements = tf.reshape(tt_elements, (-1, 1, 1))
+  for core_idx in range(tt.ndims()):
+    curr_core = tt.tt_cores[core_idx]
+    if isinstance(tt, TensorTrainBatch):
+      curr_core = tf.transpose(curr_core, (0, 2, 1, 3))
+      curr_idx = tf.stack((indices[:, 0], indices[:, core_idx + 1]), axis=1)
+      core_slices = tf.gather_nd(curr_core, curr_idx)
+    else:
+      curr_core = tf.transpose(curr_core, (1, 0, 2))
+      core_slices = tf.gather(curr_core, indices[:, core_idx])
+    tt_elements = tf.matmul(tt_elements, core_slices)
+  tt_elements = tf.reshape(tt_elements, tf.shape(indices)[:-1])
+  return tt_elements
+
+
+def renormalize_tt_cores(tt, epsilon=1e-8):
+    """Renormalizes TT-cores to make them of the same Frobenius norm.
+
+    Doesn't change the tensor represented by `tt` object, but renormalizes the
+    TT-cores to make further computations more stable.
+
+    Args:
+      tt: `TensorTrain` or `TensorTrainBatch` object
+      epsilon: parameter for numerical stability of sqrt
+    Returns:
+      `TensorTrain` or `TensorTrainBatch` which represents the same
+      tensor as tt, but with all cores having equal norm. In the batch
+      case applies to each TT in `TensorTrainBatch`.
+
+    """
+    if isinstance(tt, TensorTrain):
+      new_cores = []
+      running_log_norm = 0
+      core_norms = []
+      for core in tt.tt_cores:
+        cur_core_norm = tf.sqrt(tf.maximum(tf.reduce_sum(core ** 2), epsilon))
+        core_norms.append(cur_core_norm)
+        running_log_norm += tf.log(cur_core_norm)
+
+      running_log_norm = running_log_norm / tt.ndims()
+      fact = tf.exp(running_log_norm)
+      for i, core in enumerate(tt.tt_cores):
+        new_cores.append(core * fact / core_norms[i])
+
+      return TensorTrain(new_cores)
+    else:
+      sz = (tt.batch_size,) + (len(tt.tt_cores[0].shape) - 1) * (1,)
+      running_core_log_norms = tf.zeros(sz)
+      ax = np.arange(len(tt.tt_cores[0].shape))[1:]
+      fact_list = []
+      for core in tt.tt_cores:
+        cur_core_norm_sq = tf.reduce_sum(core**2, axis=ax, keep_dims=True)
+        cur_core_norm = tf.sqrt(tf.maximum(epsilon, cur_core_norm_sq))
+        fact_list.append(cur_core_norm)
+        running_core_log_norms += tf.log(cur_core_norm)
+
+      new_cores = []
+      exp_fact = tf.exp(running_core_log_norms / tt.ndims())
+      for i, core in enumerate(tt.tt_cores):
+        new_cores.append(tf.multiply(core, exp_fact / fact_list[i]))
+
+      return TensorTrainBatch(new_cores)
diff --git a/t3f/ops_test.py b/t3f/ops_test.py
index 19fbd36d..cb1dcc7c 100644
--- a/t3f/ops_test.py
+++ b/t3f/ops_test.py
@@ -1,10 +1,11 @@
 import numpy as np
 import tensorflow as tf
 
-from tensor_train import TensorTrain
-from tensor_train_batch import TensorTrainBatch
-import ops
-import initializers
+from t3f.tensor_train import TensorTrain
+from t3f.tensor_train_batch import TensorTrainBatch
+from t3f import ops
+from t3f import shapes
+from t3f import initializers
 
 
 class TTTensorTest(tf.test.TestCase):
@@ -105,6 +106,18 @@ def testMultiply(self):
       self.assertAllClose(res_actual_val, res_desired_val)
       self.assertAllClose(res_actual2_val, res_desired_val)
 
+  def testMultiplyByNumber(self):
+    # Multiply a tensor by a number.
+    tt = initializers.random_tensor((1, 2, 3), tt_rank=(1, 2, 3, 1))
+    with self.test_session() as sess:
+      res_actual = ops.full(ops.multiply(tt, 4))
+      res_actual2 = ops.full(4.0 * tt)
+      res_desired = 4.0 * ops.full(tt)
+      to_run = [res_actual, res_actual2, res_desired]
+      res_actual_val, res_actual2_val, res_desired_val = sess.run(to_run)
+      self.assertAllClose(res_actual_val, res_desired_val)
+      self.assertAllClose(res_actual2_val, res_desired_val)
+
   def testFrobeniusNormTens(self):
     # Frobenius norm of a TT-tensor.
     shape_list = ((2, 2),
@@ -130,8 +143,12 @@ def testCastFloat(self):
     # Test cast function for float tt-tensors.
     tt_x = initializers.random_tensor((2, 3, 2), tt_rank=2)
 
-    for dtype in [tf.float16, tf.float32, tf.float64]:
-      self.assertEqual(ops.cast(tt_x, dtype).dtype, dtype)
+    with self.test_session() as sess:
+      for dtype in [tf.float16, tf.float32, tf.float64]:
+        casted = ops.cast(tt_x, dtype)
+        casted_val = sess.run(ops.full(casted))
+        self.assertEqual(dtype, casted.dtype)
+        self.assertTrue(dtype, casted_val.dtype)
 
   def testCastIntFloat(self):
     # Tests cast function from int to float for tensors.
@@ -140,9 +157,27 @@ def testCastIntFloat(self):
     K_2 = np.random.randint(0, high=100, size=(2, 3, 2))
     K_3 = np.random.randint(0, high=100, size=(2, 2, 1))
     tt_int = TensorTrain([K_1, K_2, K_3], tt_ranks=[1, 2, 2, 1])
-    
-    for dtype in [tf.float16, tf.float32, tf.float64]:
-      self.assertEqual(ops.cast(tt_int, dtype).dtype, dtype)
+
+    with self.test_session() as sess:
+      for dtype in [tf.float16, tf.float32, tf.float64]:
+        casted = ops.cast(tt_int, dtype)
+        casted_val = sess.run(ops.full(casted))
+        self.assertEqual(dtype, casted.dtype)
+        self.assertTrue(dtype, casted_val.dtype)
+
+  def testCoreRenorm(self):
+      a = initializers.random_tensor(3 * (10,), tt_rank=7)
+      b = ops.renormalize_tt_cores(a)
+      var_list = [ops.full(a), ops.full(b)]
+      with self.test_session() as sess:
+          af, bf = sess.run(var_list)
+          b_cores = sess.run(b.tt_cores)
+          b_cores_norms = []
+          for cr in b_cores:
+              b_cores_norms.append(np.linalg.norm(cr))
+          self.assertAllClose(af, bf, atol=1e-5, rtol=1e-5)
+          self.assertAllClose(b_cores_norms, b_cores_norms[0]
+                              * np.ones((len(b_cores))))
 
 
 class TTMatrixTest(tf.test.TestCase):
@@ -317,18 +352,42 @@ def testQuadraticForm(self):
           vars = [res_actual, ops.full(A), ops.full(b), ops.full(c)]
           res_actual_val, A_val, b_val, c_val = sess.run(vars)
           res_desired = b_val.T.dot(A_val).dot(c_val)
-          self.assertAllClose(res_actual_val, np.squeeze(res_desired))
+          self.assertAllClose(res_actual_val, np.squeeze(res_desired),
+                              atol=1e-5, rtol=1e-5)
+
+  def testQuadraticFormBatch(self):
+    # Test quadratic form for batch of tensors.
+    shape_list = (((2, 2), (3, 4)),
+                  ((2, 3, 4), (2, 2, 2)))
+    rank_list = (1, 2)
+    with self.test_session() as sess:
+      for tensor_shape in shape_list:
+        for rank in rank_list:
+          A = initializers.random_matrix(tensor_shape, tt_rank=rank)
+          b = initializers.random_matrix_batch((tensor_shape[0], None),
+                                               tt_rank=rank, batch_size=5)
+          c = initializers.random_matrix_batch((tensor_shape[1], None),
+                                               tt_rank=rank, batch_size=5)
+          res_actual = ops.quadratic_form(A, b, c)
+          vars = [res_actual, ops.full(A), ops.full(b), ops.full(c)]
+          res_actual_val, A_val, b_val, c_val = sess.run(vars)
+          res_desired = np.diag(b_val[:, :, 0].dot(A_val).dot(c_val[:, :, 0].T))
+          self.assertAllClose(res_actual_val, np.squeeze(res_desired),
+                              atol=1e-5, rtol=1e-5)
 
   def testCastFloat(self):
     # Test cast function for float tt-matrices and vectors.
-    
-    tt_mat = initializers.random_matrix(((2, 3), (3, 2)), tt_rank=2)
 
+    tt_mat = initializers.random_matrix(((2, 3), (3, 2)), tt_rank=2)
     tt_vec = initializers.random_matrix(((2, 3), None), tt_rank=2)
-    
-    for dtype in [tf.float16, tf.float32, tf.float64]:
-      self.assertEqual(ops.cast(tt_vec, dtype).dtype, dtype)
-      self.assertEqual(ops.cast(tt_mat, dtype).dtype, dtype)
+
+    with self.test_session() as sess:
+      for tt in [tt_mat, tt_vec]:
+        for dtype in [tf.float16, tf.float32, tf.float64]:
+          casted = ops.cast(tt, dtype)
+          casted_val = sess.run(ops.full(casted))
+          self.assertEqual(dtype, casted.dtype)
+          self.assertTrue(dtype, casted_val.dtype)
 
   def testCastIntFloat(self):
     # Tests cast function from int to float for matrices.
@@ -337,9 +396,13 @@ def testCastIntFloat(self):
     K_2 = np.random.randint(0, high=100, size=(2, 3, 3, 2))
     K_3 = np.random.randint(0, high=100, size=(2, 2, 2, 1))
     tt_int = TensorTrain([K_1, K_2, K_3], tt_ranks=[1, 2, 2, 1])
-    
-    for dtype in [tf.float16, tf.float32, tf.float64]:
-      self.assertEqual(ops.cast(tt_int, dtype).dtype, dtype)
+
+    with self.test_session() as sess:
+      for dtype in [tf.float16, tf.float32, tf.float64]:
+        casted = ops.cast(tt_int, dtype)
+        casted_val = sess.run(ops.full(casted))
+        self.assertEqual(dtype, casted.dtype)
+        self.assertTrue(dtype, casted_val.dtype)
 
   def testUnknownRanksTTMatmul(self):
     # Tests tt_tt_matmul for matrices with unknown ranks
@@ -358,8 +421,8 @@ def testUnknownRanksTTMatmul(self):
 
 
   def testHalfKnownRanksTTMatmul(self):
-    # Tests tt_tt_matmul for the case  when one matrice has known ranks 
-    # and the other one doesn't    
+    # Tests tt_tt_matmul for the case  when one matrice has known ranks
+    # and the other one doesn't
     np.random.seed(1)
     K_1 = tf.placeholder(tf.float32, (1, 2, 2, None))
     K_2 = tf.placeholder(tf.float32, (None, 3, 3, 1))
@@ -469,10 +532,32 @@ def testAddBroadcasting(self):
       self.assertAllClose(res_actual_val, res_desired_val)
       self.assertAllClose(res_actual2_val, res_desired_val)
 
+  def testMultiplyByNumber(self):
+    # Multiply batch of tensors by a number.
+    tt = initializers.random_tensor_batch((1, 2, 3), tt_rank=(1, 2, 3, 1),
+                                          batch_size=3)
+    with self.test_session() as sess:
+      res_actual = ops.full(ops.multiply(tt, 4))
+      res_actual2 = ops.full(4.0 * tt)
+      res_desired = 4.0 * ops.full(tt)
+      to_run = [res_actual, res_actual2, res_desired]
+      res_actual_val, res_actual2_val, res_desired_val = sess.run(to_run)
+      self.assertAllClose(res_actual_val, res_desired_val)
+      self.assertAllClose(res_actual2_val, res_desired_val)
+
+  def testFrobeniusNormDifferentiableBatch(self):
+    with self.test_session() as sess:
+      tt = initializers.random_tensor_batch((3, 3, 3), tt_rank=2, batch_size=5)
+      norm_sq_diff = ops.frobenius_norm_squared(tt, differentiable=True)
+      variables = [norm_sq_diff, ops.full(tt)]
+      norm_sq_diff_val, tt_full = sess.run(variables)
+      desired_norm = np.linalg.norm(tt_full.reshape((5, -1)), axis=1)**2
+      self.assertAllClose(norm_sq_diff_val, desired_norm, atol=1e-5, rtol=1e-5)
+
   def testFrobeniusNormTens(self):
     # Frobenius norm of a batch of TT-tensors.
     with self.test_session() as sess:
-      tt = initializers.random_tensor_batch((2, 1, 3), batch_size=3)
+      tt = initializers.tensor_batch_with_random_cores((2, 1, 3), batch_size=3)
       norm_sq_actual = ops.frobenius_norm_squared(tt)
       norm_actual = ops.frobenius_norm(tt)
       vars = [norm_sq_actual, norm_actual, ops.full(tt)]
@@ -484,6 +569,158 @@ def testFrobeniusNormTens(self):
       self.assertAllClose(norm_actual_val, norm_desired_val, atol=1e-5,
                           rtol=1e-5)
 
+  def testMultiplyBatchByTensor(self):
+    tt_a = initializers.random_tensor((3, 3, 3), tt_rank=2)
+    tt_b = initializers.random_tensor_batch((3, 3, 3), tt_rank=2, batch_size=5)
+    with self.test_session() as sess:
+      res_actual = ops.full(ops.multiply(tt_a, tt_b))
+      res_actual2 = ops.full(ops.multiply(tt_b, tt_a))
+      res_desired = ops.full(tt_a) * ops.full(tt_b)
+      to_run = [res_actual, res_actual2, res_desired]
+      res_actual_val, res_actual2_val, res_desired_val = sess.run(to_run)
+      self.assertAllClose(res_actual_val, res_desired_val)
+      self.assertAllClose(res_actual2_val, res_desired_val)
+
+  def testMultiplyBatchByBatch(self):
+    tt_a = initializers.random_tensor_batch((3, 3, 3), tt_rank=2, batch_size=5)
+    tt_b = initializers.random_tensor_batch((3, 3, 3), tt_rank=2, batch_size=5)
+    res_actual = ops.full(ops.multiply(tt_a, tt_b))
+    res_actual2 = ops.full(ops.multiply(tt_b, tt_a))
+    res_desired = ops.full(tt_a) * ops.full(tt_b)
+    to_run = [res_actual, res_actual2, res_desired]
+    with self.test_session() as sess:
+      res_actual = ops.full(ops.multiply(tt_a, tt_b))
+      res_actual2 = ops.full(ops.multiply(tt_b, tt_a))
+      res_desired = ops.full(tt_a) * ops.full(tt_b)
+      to_run = [res_actual, res_actual2, res_desired]
+      res_actual_val, res_actual2_val, res_desired_val = sess.run(to_run)
+      self.assertAllClose(res_actual_val, res_desired_val)
+      self.assertAllClose(res_actual2_val, res_desired_val)
+
+  def testMultiplyBroadcasting(self):
+    tt_a = initializers.random_tensor_batch((3, 3, 3), tt_rank=2, batch_size=1)
+    tt_b = initializers.random_tensor_batch((3, 3, 3), tt_rank=2, batch_size=5)
+    with self.test_session() as sess:
+      res_actual = ops.full(ops.multiply(tt_a, tt_b))
+      res_actual2 = ops.full(ops.multiply(tt_b, tt_a))
+      res_desired = ops.full(tt_a) * ops.full(tt_b)
+      to_run = [res_actual, res_actual2, res_desired]
+      res_actual_val, res_actual2_val, res_desired_val = sess.run(to_run)
+      self.assertAllClose(res_actual_val, res_desired_val)
+      self.assertAllClose(res_actual2_val, res_desired_val)
+
+  def testMultiplyUnknownBatchSizeBroadcasting(self):
+    c1 = tf.placeholder(tf.float32, [None, 1, 3, 2])
+    c2 = tf.placeholder(tf.float32, [None, 2, 3, 1])
+    tt_a = TensorTrainBatch([c1, c2])
+    tt_b = initializers.random_tensor_batch((3, 3), tt_rank=3, batch_size=1)
+    tt_c = initializers.random_tensor((3, 3), tt_rank=3)
+    res_ab = ops.full(ops.multiply(tt_a, tt_b))
+    res_ba = ops.full(ops.multiply(tt_b, tt_a))
+    res_ac = ops.full(ops.multiply(tt_a, tt_c))
+    res_ca = ops.full(ops.multiply(tt_c, tt_a))
+    res_desired_ab = ops.full(tt_a) * ops.full(tt_b)
+    res_desired_ac = ops.full(tt_a) * ops.full(tt_c)
+    to_run = [res_ab, res_ba, res_ac, res_ca, res_desired_ab, res_desired_ac]
+    feed_dict = {c1:np.random.rand(7, 1, 3, 2),
+                 c2:np.random.rand(7, 2, 3, 1)}
+    with self.test_session() as sess:
+      ab, ba, ac, ca, des_ab, des_ac = sess.run(to_run, feed_dict=feed_dict)
+      self.assertAllClose(ab, des_ab)
+      self.assertAllClose(ba, des_ab)
+      self.assertAllClose(ac, des_ac)
+      self.assertAllClose(ca, des_ac)
+
+  def testMultiplyTwoBatchesUnknownSize(self):
+    c1 = tf.placeholder(tf.float32, [None, 1, 3, 2])
+    c2 = tf.placeholder(tf.float32, [None, 2, 3, 1])
+    c3 = tf.placeholder(tf.float32, [None, 1, 3, 2])
+    c4 = tf.placeholder(tf.float32, [None, 2, 3, 1])
+    tt_a = TensorTrainBatch([c1, c2])
+    tt_b = TensorTrainBatch([c3, c4])
+    res_ab = ops.full(ops.multiply(tt_a, tt_b))
+    res_ba = ops.full(ops.multiply(tt_b, tt_a))
+    res_desired = ops.full(tt_a) * ops.full(tt_b)
+    to_run = [res_ab, res_ba, res_desired]
+    feed_dict = {c1:np.random.rand(7, 1, 3, 2),
+                 c2:np.random.rand(7, 2, 3, 1),
+                 c3:np.random.rand(7, 1, 3, 2),
+                 c4:np.random.rand(7, 2, 3, 1)}
+
+    feed_dict_err = {c1:np.random.rand(7, 1, 3, 2),
+                     c2:np.random.rand(7, 2, 3, 1),
+                     c3:np.random.rand(1, 1, 3, 2),
+                     c4:np.random.rand(1, 2, 3, 1)}
+
+    with self.test_session() as sess:
+      ab_full, ba_full, des_full = sess.run(to_run, feed_dict=feed_dict)
+      self.assertAllClose(ab_full, des_full)
+      self.assertAllClose(ba_full, des_full)
+      with self.assertRaises(tf.errors.InvalidArgumentError):
+        sess.run(to_run, feed_dict=feed_dict_err)
+
+  def testMultiplyUnknownSizeBatchAndBatch(self):
+    c1 = tf.placeholder(tf.float32, [None, 1, 3, 2])
+    c2 = tf.placeholder(tf.float32, [None, 2, 3, 1])
+    tt_b = initializers.random_tensor_batch((3, 3), tt_rank=2, batch_size=8)
+    tt_a = TensorTrainBatch([c1, c2])
+    res_ab = ops.full(ops.multiply(tt_a, tt_b))
+    res_ba = ops.full(ops.multiply(tt_b, tt_a))
+    res_desired = ops.full(tt_a) * ops.full(tt_b)
+    to_run = [res_ab, res_ba, res_desired]
+    feed_dict = {c1:np.random.rand(8, 1, 3, 2),
+                 c2:np.random.rand(8, 2, 3, 1)}
+
+    feed_dict_err = {c1:np.random.rand(1, 1, 3, 2),
+                     c2:np.random.rand(1, 2, 3, 1)}
+
+    with self.test_session() as sess:
+      ab_full, ba_full, des_full = sess.run(to_run, feed_dict=feed_dict)
+      self.assertAllClose(ab_full, des_full)
+      self.assertAllClose(ba_full, des_full)
+      with self.assertRaises(tf.errors.InvalidArgumentError):
+        sess.run(to_run, feed_dict=feed_dict_err)
+
+  def testGatherND(self):
+    idx = [[0, 0, 0], [0, 1, 2], [0, 1, 0]]
+    pl_idx = tf.placeholder(tf.int32, [None, 3])
+    tt = initializers.random_tensor((3, 4, 5), tt_rank=2)
+    res_np = ops.gather_nd(tt, idx)
+    res_pl = ops.gather_nd(tt, pl_idx)
+    res_desired = tf.gather_nd(ops.full(tt), idx)
+    to_run = [res_np, res_pl, res_desired]
+    with self.test_session() as sess:
+      res_np_v, res_pl_v, des_v = sess.run(to_run, feed_dict={pl_idx: idx})
+      self.assertAllClose(res_np_v, des_v)
+      self.assertAllClose(res_pl_v, res_pl_v)
+
+  def testGatherNDBatch(self):
+    idx = [[0, 0, 0, 0], [1, 0, 1, 2], [0, 0, 1, 0]]
+    pl_idx = tf.placeholder(tf.int32, [None, 4])
+    tt = initializers.random_tensor_batch((3, 4, 5), tt_rank=2, batch_size=2)
+    res_np = ops.gather_nd(tt, idx)
+    res_pl = ops.gather_nd(tt, pl_idx)
+    res_desired = tf.gather_nd(ops.full(tt), idx)
+    to_run = [res_np, res_pl, res_desired]
+    with self.test_session() as sess:
+      res_np_v, res_pl_v, des_v = sess.run(to_run, feed_dict={pl_idx: idx})
+      self.assertAllClose(res_np_v, des_v)
+      self.assertAllClose(res_pl_v, res_pl_v)
+
+  def testCoreRenormBatch(self):
+      a = initializers.random_tensor_batch(3 * (10,), tt_rank=7, batch_size=5)
+      b = ops.renormalize_tt_cores(a)
+      var_list = [ops.full(a), ops.full(b)]
+
+      with self.test_session() as sess:
+          af, bf = sess.run(var_list)
+          b_cores = sess.run(b.tt_cores)
+          b_cores_norms = []
+          for cr in b_cores:
+              b_cores_norms.append(np.linalg.norm(cr))
+          self.assertAllClose(af, bf, atol=1e-5, rtol=1e-5)
+          self.assertAllClose(b_cores_norms, b_cores_norms[0]
+                              * np.ones((len(b_cores))))
 
 class TTMatrixTestBatch(tf.test.TestCase):
 
@@ -559,8 +796,9 @@ def testTTMatTimesTTMatBroadcasting(self):
                               ops.full(tt_mat_2[0]))
       to_run = [res_actual, res_actual2, res_desired]
       res_actual_val, res_actual2_val, res_desired_val = sess.run(to_run)
-      self.assertAllClose(res_actual_val, res_desired_val)
-      self.assertAllClose(res_actual2_val, res_desired_val)
+      self.assertAllClose(res_actual_val, res_desired_val, atol=1e-5, rtol=1e-5)
+      self.assertAllClose(res_actual2_val, res_desired_val, atol=1e-5,
+                          rtol=1e-5)
 
   def testTranspose(self):
     # Transpose a batch of TT-matrices.
@@ -600,6 +838,34 @@ def testAddBroadcasting(self):
       self.assertAllClose(res_actual_val, res_desired_val)
       self.assertAllClose(res_actual2_val, res_desired_val)
 
+  def testCastFloat(self):
+    # Test cast function for float tt-matrices and vectors.
+    tt_mat = initializers.random_matrix_batch(((2, 3), (3, 2)), tt_rank=2,
+                                              batch_size=3)
+
+    with self.test_session() as sess:
+      for dtype in [tf.float16, tf.float32, tf.float64]:
+        casted = ops.cast(tt_mat, dtype)
+        casted_val = sess.run(ops.full(casted))
+        self.assertEqual(dtype, casted.dtype)
+        self.assertTrue(dtype, casted_val.dtype)
+
+  def testCastIntFloat(self):
+    # Tests cast function from int to float for matrices.
+    np.random.seed(1)
+    K_1 = np.random.randint(0, high=100, size=(1, 2, 2, 2))
+    K_2 = np.random.randint(0, high=100, size=(2, 3, 3, 2))
+    K_3 = np.random.randint(0, high=100, size=(2, 2, 2, 1))
+    tt_int = TensorTrain([K_1, K_2, K_3], tt_ranks=[1, 2, 2, 1])
+    tt_int_batch = shapes.expand_batch_dim(tt_int)
+
+    with self.test_session() as sess:
+      for dtype in [tf.float16, tf.float32, tf.float64]:
+        casted = ops.cast(tt_int_batch, dtype)
+        casted_val = sess.run(ops.full(casted))
+        self.assertEqual(dtype, casted.dtype)
+        self.assertTrue(dtype, casted_val.dtype)
+
 
 def _random_sparse(shape, non_zeros):
   sparse_flat_indices = np.random.choice(np.prod(shape), non_zeros).astype(int)
diff --git a/t3f/regularizers.py b/t3f/regularizers.py
index 1fe8616d..6928e329 100644
--- a/t3f/regularizers.py
+++ b/t3f/regularizers.py
@@ -2,7 +2,7 @@
 
 import tensorflow as tf
 
-import ops
+from t3f import ops
 
 
 def l2_regularizer(scale, scope=None):
diff --git a/t3f/riemannian.py b/t3f/riemannian.py
index 7cdca1ef..21a49748 100644
--- a/t3f/riemannian.py
+++ b/t3f/riemannian.py
@@ -1,16 +1,17 @@
 import tensorflow as tf
 
-import shapes
-import decompositions
-from tensor_train import TensorTrain
+from t3f.tensor_train import TensorTrain
+from t3f.tensor_train_batch import TensorTrainBatch
+from t3f import shapes
+from t3f import decompositions
 
 
-def project(tangent_space_tens, tensor, coef=None):
-  """Project (TT) tensor on the tangent space of (TT) tangent_space_tens.
+def project_sum(what, where, weights=None):
+  """Project sum of `what` TTs on the tangent space of `where` TT.
 
-  project(x, tens) = P_x(tens)
-  project(x, batch) = P_x(\sum_i batch[i])
-  project(x, tens, coef) = P_x(\sum_i coef[i] * batch[i])
+  project_sum(what, x) = P_x(what)
+  project_sum(batch_what, x) = P_x(\sum_i batch_what[i])
+  project_sum(batch_what, x, weights) = P_x(\sum_j weights[j] * batch_what[j])
 
   This function implements the algorithm from the paper [1], theorem 3.1.
 
@@ -18,108 +19,733 @@ def project(tangent_space_tens, tensor, coef=None):
     Tensor Trains.
 
   Args:
-    tangent_space_tens: TensorTrain.
-    tensor: TensorTrain or TensorTrainBatch. In the case of batch returns
+    what: TensorTrain or TensorTrainBatch. In the case of batch returns
       projection of the sum of elements in the batch.
-    coef: python list or tf.Tensor of numbers or None
+    where: TensorTrain, TT-tensor or TT-matrix on which tangent space to project
+    weights: python list or tf.Tensor of numbers or None, weights of the sum
 
   Returns:
      a TensorTrain with the TT-ranks equal 2 * tangent_space_tens.get_tt_ranks()
+
+  Complexity:
+       O(d r_where^3 m) for orthogonalizing the TT-cores of where
+      +O(batch_size d r_what r_where n (r_what + r_where))
+    d is the number of TT-cores (what.ndims());
+    r_what is the largest TT-rank of what max(what.get_tt_rank())
+    r_where is the largest TT-rank of where
+    n is the size of the axis dimension of what and where e.g.
+      for a tensor of size 4 x 4 x 4, n is 4;
+      for a 9 x 64 matrix of raw shape (3, 3, 3) x (4, 4, 4) n is 12
   """
-  # Always work with batch of TT for simplicity.
-  tensor = shapes.expand_batch_dim(tensor)
+  # Always work with batch of TT objects for simplicity.
+  what = shapes.expand_batch_dim(what)
+
+  if weights is not None:
+    weights = tf.convert_to_tensor(weights)
 
-  if not isinstance(tangent_space_tens, TensorTrain):
+  if not isinstance(where, TensorTrain):
     raise ValueError('The first argument should be a TensorTrain object, got '
-                     '"%s".' % tangent_space_tens)
+                     '"%s".' % where)
 
-  if tangent_space_tens.get_raw_shape() != tensor.get_raw_shape():
+  if where.get_raw_shape() != what.get_raw_shape():
     raise ValueError('The shapes of the tensor we want to project and of the '
                      'tensor on which tangent space we want to project should '
                      'match, got %s and %s.' %
-                     (tangent_space_tens.get_raw_shape(),
-                      tensor.get_raw_shape()))
+                     (where.get_raw_shape(),
+                      what.get_raw_shape()))
+
+  dtypes_compatible = (where.dtype.is_compatible_with(what.dtype) or
+                       what.dtype.is_compatible_with(where.dtype))
+  if not dtypes_compatible:
+    raise ValueError('Dtypes of the arguments should coincide, got %s and %s.' %
+                     (where.dtype,
+                      what.dtype))
 
   left_tangent_space_tens = decompositions.orthogonalize_tt_cores(
-    tangent_space_tens)
+    where)
   right_tangent_space_tens = decompositions.orthogonalize_tt_cores(
     left_tangent_space_tens, left_to_right=False)
 
-
-  ndims = tangent_space_tens.ndims()
-  dtype = tangent_space_tens.dtype
-  raw_shape = shapes.lazy_raw_shape(tangent_space_tens)
-  batch_size = shapes.lazy_batch_size(tensor)
+  ndims = where.ndims()
+  dtype = where.dtype
+  raw_shape = shapes.lazy_raw_shape(where)
+  batch_size = shapes.lazy_batch_size(what)
   right_tangent_tt_ranks = shapes.lazy_tt_ranks(right_tangent_space_tens)
   left_tangent_tt_ranks = shapes.lazy_tt_ranks(left_tangent_space_tens)
 
   # For einsum notation.
-  mode_str = 'ij' if tangent_space_tens.is_tt_matrix() else 'i'
-  right_rank_dim = 3 if tangent_space_tens.is_tt_matrix() else 2
+  mode_str = 'ij' if where.is_tt_matrix() else 'i'
+  right_rank_dim = where.right_tt_rank_dim
+  left_rank_dim = where.left_tt_rank_dim
+  if weights is not None:
+    weights_shape = weights.get_shape()
+    output_is_batch = len(weights_shape) > 1 and weights_shape[1] > 1
+  else:
+    output_is_batch = False
+  output_batch_str = 'o' if output_is_batch else ''
+  if output_is_batch:
+    right_rank_dim += 1
+    left_rank_dim += 1
+    output_batch_size = weights.get_shape()[1].value
 
   # Prepare rhs vectors.
   # rhs[core_idx] is of size
   #   batch_size x tensor_tt_ranks[core_idx] x tangent_tt_ranks[core_idx]
   rhs = [None] * (ndims + 1)
-  rhs[ndims] = tf.ones((batch_size, 1, 1))
+  rhs[ndims] = tf.ones((batch_size, 1, 1), dtype=dtype)
   for core_idx in range(ndims - 1, 0, -1):
-    tens_core = tensor.tt_cores[core_idx]
+    tens_core = what.tt_cores[core_idx]
     right_tang_core = right_tangent_space_tens.tt_cores[core_idx]
-    einsum_str = 'oa{0}b,c{0}d,obd->oac'.format(mode_str)
-    rhs[core_idx] = tf.einsum(einsum_str, tens_core, right_tang_core,
-                              rhs[core_idx + 1])
+    einsum_str = 'sa{0}b,sbd,c{0}d->sac'.format(mode_str)
+    rhs[core_idx] = tf.einsum(einsum_str, tens_core, rhs[core_idx + 1],
+                              right_tang_core)
 
   # Prepare lhs vectors.
   # lhs[core_idx] is of size
   #   batch_size x tangent_tt_ranks[core_idx] x tensor_tt_ranks[core_idx]
   lhs = [None] * (ndims + 1)
-  if coef is None:
-    lhs[0] = tf.ones((batch_size, 1, 1))
+  lhs[0] = tf.ones((batch_size, 1, 1), dtype=dtype)
+  for core_idx in range(ndims - 1):
+    tens_core = what.tt_cores[core_idx]
+    left_tang_core = left_tangent_space_tens.tt_cores[core_idx]
+    einsum_str = 'sab,a{0}c,sb{0}d->scd'.format(mode_str)
+    lhs[core_idx + 1] = tf.einsum(einsum_str, lhs[core_idx], left_tang_core,
+                                  tens_core)
+
+  # Left to right sweep.
+  res_cores_list = []
+  for core_idx in range(ndims):
+    tens_core = what.tt_cores[core_idx]
+    left_tang_core = left_tangent_space_tens.tt_cores[core_idx]
+    right_tang_core = right_tangent_space_tens.tt_cores[core_idx]
+
+    if core_idx < ndims - 1:
+      einsum_str = 'sab,sb{0}c->sa{0}c'.format(mode_str)
+      proj_core = tf.einsum(einsum_str, lhs[core_idx], tens_core)
+      einsum_str = 'a{0}b,sbc->sa{0}c'.format(mode_str)
+      proj_core -= tf.einsum(einsum_str, left_tang_core, lhs[core_idx + 1])
+      if weights is None:
+        einsum_str = 'sa{0}b,sbc->a{0}c'.format(mode_str)
+        proj_core = tf.einsum(einsum_str, proj_core, rhs[core_idx + 1])
+      else:
+        einsum_str = 'sa{0}b,sbc->sa{0}c'.format(mode_str, output_batch_str)
+        proj_core_s = tf.einsum(einsum_str, proj_core, rhs[core_idx + 1])
+        einsum_str = 's{1},sa{0}c->{1}a{0}c'.format(mode_str, output_batch_str)
+        proj_core = tf.einsum(einsum_str, weights, proj_core_s)
+
+    if core_idx == ndims - 1:
+      if weights is None:
+        einsum_str = 'sab,sb{0}c->a{0}c'.format(mode_str)
+        proj_core = tf.einsum(einsum_str, lhs[core_idx], tens_core)
+      else:
+        einsum_str = 'sab,sb{0}c->sa{0}c'.format(mode_str, output_batch_str)
+        proj_core_s = tf.einsum(einsum_str, lhs[core_idx], tens_core)
+        einsum_str = 's{1},sa{0}c->{1}a{0}c'.format(mode_str, output_batch_str)
+        proj_core = tf.einsum(einsum_str, weights, proj_core_s)
+
+    if output_is_batch:
+      # Add batch dimension of size output_batch_size to left_tang_core and
+      # right_tang_core
+      extended_left_tang_core = tf.expand_dims(left_tang_core, 0)
+      extended_right_tang_core = tf.expand_dims(right_tang_core, 0)
+      if where.is_tt_matrix():
+        extended_left_tang_core = tf.tile(extended_left_tang_core,
+                                          [output_batch_size, 1, 1, 1, 1])
+        extended_right_tang_core = tf.tile(extended_right_tang_core,
+                                           [output_batch_size, 1, 1, 1, 1])
+      else:
+        extended_left_tang_core = tf.tile(extended_left_tang_core,
+                                          [output_batch_size, 1, 1, 1])
+        extended_right_tang_core = tf.tile(extended_right_tang_core,
+                                           [output_batch_size, 1, 1, 1])
+    else:
+      extended_left_tang_core = left_tang_core
+      extended_right_tang_core = right_tang_core
+
+    if core_idx == 0:
+      res_core = tf.concat((proj_core, extended_left_tang_core),
+                           axis=right_rank_dim)
+    elif core_idx == ndims - 1:
+      res_core = tf.concat((extended_right_tang_core, proj_core), axis=left_rank_dim)
+    else:
+      rank_1 = right_tangent_tt_ranks[core_idx]
+      rank_2 = left_tangent_tt_ranks[core_idx + 1]
+      if where.is_tt_matrix():
+        mode_size_n = raw_shape[0][core_idx]
+        mode_size_m = raw_shape[1][core_idx]
+        shape = [rank_1, mode_size_n, mode_size_m, rank_2]
+      else:
+        mode_size = raw_shape[0][core_idx]
+        shape = [rank_1, mode_size, rank_2]
+      if output_is_batch:
+        shape = [output_batch_size] + shape
+      zeros = tf.zeros(shape, dtype)
+      upper = tf.concat((extended_right_tang_core, zeros), axis=right_rank_dim)
+      lower = tf.concat((proj_core, extended_left_tang_core),
+                        axis=right_rank_dim)
+      res_core = tf.concat((upper, lower), axis=left_rank_dim)
+    res_cores_list.append(res_core)
+  # TODO: TT-ranks.
+  if output_is_batch:
+    res = TensorTrainBatch(res_cores_list, where.get_raw_shape(),
+                            batch_size=output_batch_size)
   else:
-    lhs[0] = tf.reshape(coef, (batch_size, 1, 1))
+    res = TensorTrain(res_cores_list, where.get_raw_shape())
+
+  res.projection_on = where
+  return res
+
+
+def project(what, where):
+  """Project `what` TTs on the tangent space of `where` TT.
+
+  project(what, x) = P_x(what)
+  project(batch_what, x) = batch(P_x(batch_what[0]), ..., P_x(batch_what[N]))
+
+  This function implements the algorithm from the paper [1], theorem 3.1.
+
+  [1] C. Lubich, I. Oseledets and B. Vandereycken, Time integration of
+    Tensor Trains.
+
+  Args:
+    what: TensorTrain or TensorTrainBatch. In the case of batch returns
+      batch with projection of each individual tensor.
+    where: TensorTrain, TT-tensor or TT-matrix on which tangent space to project
+
+  Returns:
+     a TensorTrain with the TT-ranks equal 2 * tangent_space_tens.get_tt_ranks()
+     
+  Complexity:
+       O(d r_where^3 m) for orthogonalizing the TT-cores of where
+      +O(batch_size d r_what r_where n (r_what + r_where))
+    d is the number of TT-cores (what.ndims());
+    r_what is the largest TT-rank of what max(what.get_tt_rank())
+    r_where is the largest TT-rank of where
+    n is the size of the axis dimension of what and where e.g.
+      for a tensor of size 4 x 4 x 4, n is 4;
+      for a 9 x 64 matrix of raw shape (3, 3, 3) x (4, 4, 4) n is 12
+  """
+
+  if not isinstance(where, TensorTrain):
+    raise ValueError('The first argument should be a TensorTrain object, got '
+                     '"%s".' % where)
+
+  if where.get_raw_shape() != what.get_raw_shape():
+    raise ValueError('The shapes of the tensor we want to project and of the '
+                     'tensor on which tangent space we want to project should '
+                     'match, got %s and %s.' %
+                     (where.get_raw_shape(),
+                      what.get_raw_shape()))
+  dtypes_compatible = (where.dtype.is_compatible_with(what.dtype) or
+                       what.dtype.is_compatible_with(where.dtype))
+  if not dtypes_compatible:
+    raise ValueError('Dtypes of the arguments should coincide, got %s and %s.' %
+                     (where.dtype,
+                      what.dtype))
+
+  left_tangent_space_tens = decompositions.orthogonalize_tt_cores(
+    where)
+  right_tangent_space_tens = decompositions.orthogonalize_tt_cores(
+    left_tangent_space_tens, left_to_right=False)
+
+  ndims = where.ndims()
+  dtype = where.dtype
+  raw_shape = shapes.lazy_raw_shape(where)
+  right_tangent_tt_ranks = shapes.lazy_tt_ranks(right_tangent_space_tens)
+  left_tangent_tt_ranks = shapes.lazy_tt_ranks(left_tangent_space_tens)
+
+  # For einsum notation.
+  mode_str = 'ij' if where.is_tt_matrix() else 'i'
+  right_rank_dim = what.right_tt_rank_dim
+  left_rank_dim = what.left_tt_rank_dim
+  output_is_batch = isinstance(what, TensorTrainBatch)
+  if output_is_batch:
+    output_batch_size = what.batch_size
+
+  # Always work with batch of TT objects for simplicity.
+  what = shapes.expand_batch_dim(what)
+  batch_size = shapes.lazy_batch_size(what)
+
+  # Prepare rhs vectors.
+  # rhs[core_idx] is of size
+  #   batch_size x tensor_tt_ranks[core_idx] x tangent_tt_ranks[core_idx]
+  rhs = [None] * (ndims + 1)
+  rhs[ndims] = tf.ones((batch_size, 1, 1), dtype=dtype)
+  for core_idx in range(ndims - 1, 0, -1):
+    tens_core = what.tt_cores[core_idx]
+    right_tang_core = right_tangent_space_tens.tt_cores[core_idx]
+    einsum_str = 'sa{0}b,sbd,c{0}d->sac'.format(mode_str)
+    rhs[core_idx] = tf.einsum(einsum_str, tens_core, rhs[core_idx + 1],
+                              right_tang_core)
+
+  # Prepare lhs vectors.
+  # lhs[core_idx] is of size
+  #   batch_size x tangent_tt_ranks[core_idx] x tensor_tt_ranks[core_idx]
+  lhs = [None] * (ndims + 1)
+  lhs[0] = tf.ones((batch_size, 1, 1), dtype=dtype)
   for core_idx in range(ndims - 1):
-    tens_core = tensor.tt_cores[core_idx]
+    tens_core = what.tt_cores[core_idx]
     left_tang_core = left_tangent_space_tens.tt_cores[core_idx]
-    einsum_str = 'oab,a{0}c,ob{0}d->ocd'.format(mode_str)
+    einsum_str = 'sab,a{0}c,sb{0}d->scd'.format(mode_str)
     lhs[core_idx + 1] = tf.einsum(einsum_str, lhs[core_idx], left_tang_core,
                                   tens_core)
 
   # Left to right sweep.
   res_cores_list = []
   for core_idx in range(ndims):
-    tens_core = tensor.tt_cores[core_idx]
+    tens_core = what.tt_cores[core_idx]
     left_tang_core = left_tangent_space_tens.tt_cores[core_idx]
     right_tang_core = right_tangent_space_tens.tt_cores[core_idx]
 
     if core_idx < ndims - 1:
-      einsum_str = 'oab,ob{0}c->oa{0}c'.format(mode_str)
+      einsum_str = 'sab,sb{0}c->sa{0}c'.format(mode_str)
       proj_core = tf.einsum(einsum_str, lhs[core_idx], tens_core)
-      einsum_str = 'a{0}b,obc->oa{0}c'.format(mode_str)
+      einsum_str = 'a{0}b,sbc->sa{0}c'.format(mode_str)
       proj_core -= tf.einsum(einsum_str, left_tang_core, lhs[core_idx + 1])
-      einsum_str = 'oa{0}b,obc->a{0}c'.format(mode_str)
+      if output_is_batch:
+        einsum_str = 'sa{0}b,sbc->sa{0}c'.format(mode_str)
+      else:
+        einsum_str = 'sa{0}b,sbc->a{0}c'.format(mode_str)
       proj_core = tf.einsum(einsum_str, proj_core, rhs[core_idx + 1])
 
     if core_idx == ndims - 1:
-      einsum_str = 'oab,ob{0}c->a{0}c'.format(mode_str)
+      if output_is_batch:
+        einsum_str = 'sab,sb{0}c->sa{0}c'.format(mode_str)
+      else:
+        einsum_str = 'sab,sb{0}c->a{0}c'.format(mode_str)
       proj_core = tf.einsum(einsum_str, lhs[core_idx], tens_core)
 
+    if output_is_batch:
+      # Add batch dimension of size output_batch_size to left_tang_core and
+      # right_tang_core
+      extended_left_tang_core = tf.expand_dims(left_tang_core, 0)
+      extended_right_tang_core = tf.expand_dims(right_tang_core, 0)
+      if where.is_tt_matrix():
+        extended_left_tang_core = tf.tile(extended_left_tang_core,
+                                          [output_batch_size, 1, 1, 1, 1])
+        extended_right_tang_core = tf.tile(extended_right_tang_core,
+                                           [output_batch_size, 1, 1, 1, 1])
+      else:
+        extended_left_tang_core = tf.tile(extended_left_tang_core,
+                                          [output_batch_size, 1, 1, 1])
+        extended_right_tang_core = tf.tile(extended_right_tang_core,
+                                           [output_batch_size, 1, 1, 1])
+    else:
+      extended_left_tang_core = left_tang_core
+      extended_right_tang_core = right_tang_core
+
     if core_idx == 0:
-      res_core = tf.concat((proj_core, left_tang_core), axis=right_rank_dim)
+      res_core = tf.concat((proj_core, extended_left_tang_core),
+                           axis=right_rank_dim)
     elif core_idx == ndims - 1:
-      res_core = tf.concat((right_tang_core, proj_core), axis=0)
+      res_core = tf.concat((extended_right_tang_core, proj_core), axis=left_rank_dim)
     else:
       rank_1 = right_tangent_tt_ranks[core_idx]
       rank_2 = left_tangent_tt_ranks[core_idx + 1]
-      if tangent_space_tens.is_tt_matrix():
+      if where.is_tt_matrix():
         mode_size_n = raw_shape[0][core_idx]
         mode_size_m = raw_shape[1][core_idx]
-        zeros = tf.zeros((rank_1, mode_size_n, mode_size_m, rank_2), dtype)
+        shape = [rank_1, mode_size_n, mode_size_m, rank_2]
       else:
         mode_size = raw_shape[0][core_idx]
-        zeros = tf.zeros((rank_1, mode_size, rank_2), dtype)
-      upper = tf.concat((right_tang_core, zeros), axis=right_rank_dim)
-      lower = tf.concat((proj_core, left_tang_core), axis=right_rank_dim)
-      res_core = tf.concat((upper, lower), axis=0)
+        shape = [rank_1, mode_size, rank_2]
+      if output_is_batch:
+        shape = [output_batch_size] + shape
+      zeros = tf.zeros(shape, dtype)
+      upper = tf.concat((extended_right_tang_core, zeros), axis=right_rank_dim)
+      lower = tf.concat((proj_core, extended_left_tang_core),
+                        axis=right_rank_dim)
+      res_core = tf.concat((upper, lower), axis=left_rank_dim)
+    res_cores_list.append(res_core)
+  # TODO: TT-ranks.
+  if output_is_batch:
+    res = TensorTrainBatch(res_cores_list, where.get_raw_shape(),
+                            batch_size=output_batch_size)
+  else:
+    res = TensorTrain(res_cores_list, where.get_raw_shape())
+
+  res.projection_on = where
+  return res
+
+
+def project_matmul(what, where, matrix):
+  """Project `matrix` * `what` TTs on the tangent space of `where` TT.
+
+  project(what, x) = P_x(what)
+  project(batch_what, x) = batch(P_x(batch_what[0]), ..., P_x(batch_what[N]))
+
+  This function implements the algorithm from the paper [1], theorem 3.1.
+
+  [1] C. Lubich, I. Oseledets and B. Vandereycken, Time integration of
+    Tensor Trains.
+
+  Args:
+    what: TensorTrain or TensorTrainBatch. In the case of batch returns
+      batch with projection of each individual tensor.
+    where: TensorTrain, TT-tensor or TT-matrix on which tangent space to project
+    matrix: TensorTrain, TT-matrix to multiply by what
+
+  Returns:
+     a TensorTrain with the TT-ranks equal 2 * tangent_space_tens.get_tt_ranks()
+      
+  Complexity:
+       O(d r_where^3 m) for orthogonalizing the TT-cores of where
+      +O(batch_size d R r_what r_where (n r_what + n m R + m r_where))
+    d is the number of TT-cores (what.ndims());
+    r_what is the largest TT-rank of what max(what.get_tt_rank())
+    r_where is the largest TT-rank of where
+    matrix is of TT-rank R and of raw-shape (m, m, ..., m) x (n, n, ..., n).
+  """
+
+  if not isinstance(where, TensorTrain):
+    raise ValueError('The first argument should be a TensorTrain object, got '
+                     '"%s".' % where)
+
+  if where.get_raw_shape() != what.get_raw_shape():
+    raise ValueError('The shapes of the tensor we want to project and of the '
+                     'tensor on which tangent space we want to project should '
+                     'match, got %s and %s.' %
+                     (where.get_raw_shape(),
+                      what.get_raw_shape()))
+
+  dtypes_compatible = (where.dtype.is_compatible_with(what.dtype) or
+                       what.dtype.is_compatible_with(where.dtype))
+  if not dtypes_compatible:
+    raise ValueError('Dtypes of the arguments should coincide, got %s and %s.' %
+                     (where.dtype,
+                      what.dtype))
+
+  left_tangent_space_tens = decompositions.orthogonalize_tt_cores(
+    where)
+  right_tangent_space_tens = decompositions.orthogonalize_tt_cores(
+    left_tangent_space_tens, left_to_right=False)
+
+  ndims = where.ndims()
+  dtype = where.dtype
+  raw_shape = shapes.lazy_raw_shape(where)
+  batch_size = shapes.lazy_batch_size(what)
+  right_tangent_tt_ranks = shapes.lazy_tt_ranks(right_tangent_space_tens)
+  left_tangent_tt_ranks = shapes.lazy_tt_ranks(left_tangent_space_tens)
+
+  # For einsum notation.
+  right_rank_dim = what.right_tt_rank_dim
+  left_rank_dim = what.left_tt_rank_dim
+  output_is_batch = isinstance(what, TensorTrainBatch)
+  if output_is_batch:
+    output_batch_size = what.batch_size
+
+  # Always work with batch of TT objects for simplicity.
+  what = shapes.expand_batch_dim(what)
+
+  # Prepare rhs vectors.
+  # rhs[core_idx] is of size
+  #   batch_size x tensor_tt_ranks[core_idx] x matrix_tt_ranks[core_idx] x tangent_tt_ranks[core_idx]
+  rhs = [None] * (ndims + 1)
+  rhs[ndims] = tf.ones((batch_size, 1, 1, 1), dtype=dtype)
+  for core_idx in range(ndims - 1, 0, -1):
+    tens_core = what.tt_cores[core_idx]
+    right_tang_core = right_tangent_space_tens.tt_cores[core_idx]
+    matrix_core = matrix.tt_cores[core_idx]
+    rhs[core_idx] = tf.einsum('bije,cikf,sdef,sajkd->sabc', matrix_core,
+                              right_tang_core, rhs[core_idx + 1], tens_core)
+  # Prepare lhs vectors.
+  # lhs[core_idx] is of size
+  #   batch_size x tangent_tt_ranks[core_idx] x matrix_tt_ranks[core_idx] x tensor_tt_ranks[core_idx]
+  lhs = [None] * (ndims + 1)
+  lhs[0] = tf.ones((batch_size, 1, 1, 1), dtype=dtype)
+  for core_idx in range(ndims - 1):
+    tens_core = what.tt_cores[core_idx]
+    left_tang_core = left_tangent_space_tens.tt_cores[core_idx]
+    matrix_core = matrix.tt_cores[core_idx]
+    # TODO: brutforce order of indices in lhs??
+    lhs[core_idx + 1] = tf.einsum('bije,aikd,sabc,scjkf->sdef', matrix_core,
+                                  left_tang_core, lhs[core_idx], tens_core)
+
+  # Left to right sweep.
+  res_cores_list = []
+  for core_idx in range(ndims):
+    tens_core = what.tt_cores[core_idx]
+    matrix_core = matrix.tt_cores[core_idx]
+    left_tang_core = left_tangent_space_tens.tt_cores[core_idx]
+    right_tang_core = right_tangent_space_tens.tt_cores[core_idx]
+
+    if core_idx < ndims - 1:
+      proj_core = tf.einsum('scjke,sabc,bijd->saikde', tens_core,
+                            lhs[core_idx], matrix_core)
+      proj_core -= tf.einsum('aikb,sbcd->saikcd', left_tang_core,
+                             lhs[core_idx + 1])
+      proj_core = tf.einsum('saikcb,sbcd->saikd', proj_core, rhs[core_idx + 1])
+
+    if core_idx == ndims - 1:
+      # d and e dimensions take 1 value, since its the last rank.
+      # To make the result shape (?, ?, ?, 1), we are summing d and leaving e,
+      # but we could have done the opposite -- sum e and leave d.
+      proj_core = tf.einsum('sabc,bijd,scjke->saike', lhs[core_idx], matrix_core,
+                            tens_core)
+
+    if output_is_batch:
+      # Add batch dimension of size output_batch_size to left_tang_core and
+      # right_tang_core
+      extended_left_tang_core = tf.expand_dims(left_tang_core, 0)
+      extended_right_tang_core = tf.expand_dims(right_tang_core, 0)
+      extended_left_tang_core = tf.tile(extended_left_tang_core,
+                                        [output_batch_size, 1, 1, 1, 1])
+      extended_right_tang_core = tf.tile(extended_right_tang_core,
+                                         [output_batch_size, 1, 1, 1, 1])
+    else:
+      extended_left_tang_core = left_tang_core
+      extended_right_tang_core = right_tang_core
+
+    if core_idx == 0:
+      res_core = tf.concat((proj_core, extended_left_tang_core),
+                           axis=right_rank_dim)
+    elif core_idx == ndims - 1:
+      res_core = tf.concat((extended_right_tang_core, proj_core),
+                           axis=left_rank_dim)
+    else:
+      rank_1 = right_tangent_tt_ranks[core_idx]
+      rank_2 = left_tangent_tt_ranks[core_idx + 1]
+      mode_size_n = raw_shape[0][core_idx]
+      mode_size_m = raw_shape[1][core_idx]
+      shape = [rank_1, mode_size_n, mode_size_m, rank_2]
+      if output_is_batch:
+        shape = [output_batch_size] + shape
+      zeros = tf.zeros(shape, dtype)
+      upper = tf.concat((extended_right_tang_core, zeros),
+                        axis=right_rank_dim)
+      lower = tf.concat((proj_core, extended_left_tang_core),
+                        axis=right_rank_dim)
+      res_core = tf.concat((upper, lower), axis=left_rank_dim)
     res_cores_list.append(res_core)
+
   # TODO: TT-ranks.
-  return TensorTrain(res_cores_list, tangent_space_tens.get_raw_shape())
\ No newline at end of file
+  if output_is_batch:
+    res = TensorTrainBatch(res_cores_list, where.get_raw_shape(),
+                            batch_size=output_batch_size)
+  else:
+    res = TensorTrain(res_cores_list, where.get_raw_shape())
+
+  res.projection_on = where
+  return res
+
+
+def pairwise_flat_inner_projected(projected_tt_vectors_1,
+                                  projected_tt_vectors_2):
+  """Scalar products between two batches of TTs from the same tangent space.
+
+    res[i, j] = t3f.flat_inner(projected_tt_vectors_1[i], projected_tt_vectors_1[j]).
+
+  pairwise_flat_inner_projected(projected_tt_vectors_1, projected_tt_vectors_2)
+  is equivalent to
+    pairwise_flat_inner(projected_tt_vectors_1, projected_tt_vectors_2)
+  , but works only on objects from the same tangent space and is much faster
+  than general pairwise_flat_inner.
+
+  Args:
+    projected_tt_vectors_1: TensorTrainBatch of tensors projected on the same
+      tangent space as projected_tt_vectors_2.
+    projected_tt_vectors_2: TensorTrainBatch.
+
+  Returns:
+    tf.tensor with the scalar product matrix.
+      
+  Complexity:
+      O(batch_size^2 d r^2 n), where
+    d is the number of TT-cores (projected_tt_vectors_1.ndims());
+    r is the largest TT-rank max(projected_tt_vectors_1.get_tt_rank())
+      (i.e. 2 * {the TT-rank of the object we projected vectors onto}.
+    and n is the size of the axis dimension, e.g.
+      for a tensor of size 4 x 4 x 4, n is 4;
+      for a 9 x 64 matrix of raw shape (3, 3, 3) x (4, 4, 4) n is 12.
+  """
+  if not hasattr(projected_tt_vectors_1, 'projection_on') or \
+      not hasattr(projected_tt_vectors_2, 'projection_on'):
+    raise ValueError('Both arguments should be projections on the tangent '
+                     'space of some other TT-object. All projection* functions '
+                     'leave .projection_on field in the resulting TT-object '
+                     'which is not present in the arguments you\'ve provided')
+
+  if projected_tt_vectors_1.projection_on != projected_tt_vectors_2.projection_on:
+    raise ValueError('Both arguments should be projections on the tangent '
+                     'space of the same TT-object. The provided arguments are '
+                     'projections on different TT-objects (%s and %s). Or at '
+                     'least the pointers are different.' %
+                     (projected_tt_vectors_1.projection_on,
+                      projected_tt_vectors_2.projection_on))
+
+  # Always work with batches of objects for simplicity.
+  projected_tt_vectors_1 = shapes.expand_batch_dim(projected_tt_vectors_1)
+  projected_tt_vectors_2 = shapes.expand_batch_dim(projected_tt_vectors_2)
+
+  ndims = projected_tt_vectors_1.ndims()
+  tt_ranks = shapes.lazy_tt_ranks(projected_tt_vectors_1)
+
+  if projected_tt_vectors_1.is_tt_matrix():
+    right_size = tt_ranks[1] // 2
+    curr_core_1 = projected_tt_vectors_1.tt_cores[0]
+    curr_core_2 = projected_tt_vectors_2.tt_cores[0]
+    curr_du_1 = curr_core_1[:, :, :, :, :right_size]
+    curr_du_2 = curr_core_2[:, :, :, :, :right_size]
+    res = tf.einsum('paijb,qaijb->pq', curr_du_1, curr_du_2)
+    for core_idx in range(1, ndims):
+      left_size = tt_ranks[core_idx] // 2
+      right_size = tt_ranks[core_idx + 1] // 2
+      curr_core_1 = projected_tt_vectors_1.tt_cores[core_idx]
+      curr_core_2 = projected_tt_vectors_2.tt_cores[core_idx]
+      curr_du_1 = curr_core_1[:, left_size:, :, :, :right_size]
+      curr_du_2 = curr_core_2[:, left_size:, :, :, :right_size]
+      res += tf.einsum('paijb,qaijb->pq', curr_du_1, curr_du_2)
+
+    left_size = tt_ranks[-2] // 2
+    curr_core_1 = projected_tt_vectors_1.tt_cores[-1]
+    curr_core_2 = projected_tt_vectors_2.tt_cores[-1]
+    curr_du_1 = curr_core_1[:, left_size:, :, :, :]
+    curr_du_2 = curr_core_2[:, left_size:, :, :, :]
+    res += tf.einsum('paijb,qaijb->pq', curr_du_1, curr_du_2)
+  else:
+    # Working with TT-tensor, not TT-matrix.
+    right_size = tt_ranks[1] // 2
+    curr_core_1 = projected_tt_vectors_1.tt_cores[0]
+    curr_core_2 = projected_tt_vectors_2.tt_cores[0]
+    curr_du_1 = curr_core_1[:, :, :, :right_size]
+    curr_du_2 = curr_core_2[:, :, :, :right_size]
+    res = tf.einsum('paib,qaib->pq', curr_du_1, curr_du_2)
+    for core_idx in range(1, ndims):
+      left_size = tt_ranks[core_idx] // 2
+      right_size = tt_ranks[core_idx + 1] // 2
+      curr_core_1 = projected_tt_vectors_1.tt_cores[core_idx]
+      curr_core_2 = projected_tt_vectors_2.tt_cores[core_idx]
+      curr_du_1 = curr_core_1[:, left_size:, :, :right_size]
+      curr_du_2 = curr_core_2[:, left_size:, :, :right_size]
+      res += tf.einsum('paib,qaib->pq', curr_du_1, curr_du_2)
+
+    left_size = tt_ranks[-2] // 2
+    curr_core_1 = projected_tt_vectors_1.tt_cores[-1]
+    curr_core_2 = projected_tt_vectors_2.tt_cores[-1]
+    curr_du_1 = curr_core_1[:, left_size:, :, :]
+    curr_du_2 = curr_core_2[:, left_size:, :, :]
+    res += tf.einsum('paib,qaib->pq', curr_du_1, curr_du_2)
+  return res
+
+
+def add_n_projected(tt_objects, coef=None):
+  """Adds all input TT-objects that are projections on the same tangent space.
+
+    add_projected((a, b)) is equivalent add(a, b) for a and b that are from the
+    same tangent space, but doesn't increase the TT-ranks.
+
+  Args:
+    tt_objects: a list of TT-objects that are projections on the same tangent
+      space.
+    coef: a list of numbers or anything else convertable to tf.Tensor.
+      If provided, computes weighted sum. The size of this array should be
+        len(tt_objects) x tt_objects[0].batch_size
+
+  Returns:
+    TT-objects representing the sum of the tt_objects (weighted sum if coef is
+    provided). The TT-rank of the result equals to the TT-ranks of the arguments.
+  """
+  for tt in tt_objects:
+    if not hasattr(tt, 'projection_on'):
+      raise ValueError('Both arguments should be projections on the tangent '
+                       'space of some other TT-object. All projection* functions '
+                       'leave .projection_on field in the resulting TT-object '
+                       'which is not present in the argument you\'ve provided.')
+
+  projection_on = tt_objects[0].projection_on
+  for tt in tt_objects[1:]:
+    if tt.projection_on != projection_on:
+      raise ValueError('All tt_objects should be projections on the tangent '
+                       'space of the same TT-object. The provided arguments are '
+                       'projections on different TT-objects (%s and %s). Or at '
+                       'least the pointers are different.' % (tt.projection_on,
+                                                              projection_on))
+  if coef is not None:
+    coef = tf.convert_to_tensor(coef)
+    if coef.get_shape().ndims > 1:
+      # In batch case we will need to multiply each core by this coefficients
+      # along the first axis. To do it need to reshape the coefs to match
+      # the TT-cores number of dimensions.
+      some_core = tt_objects[0].tt_cores[0]
+      dim_array = [1] * (some_core.get_shape().ndims + 1)
+      dim_array[0] = coef.get_shape()[0].value
+      dim_array[1] = coef.get_shape()[1].value
+      coef = tf.reshape(coef, dim_array)
+
+  ndims = tt_objects[0].ndims()
+  tt_ranks = shapes.lazy_tt_ranks(tt_objects[0])
+  left_rank_dim = tt_objects[0].left_tt_rank_dim
+  right_rank_dim = tt_objects[0].right_tt_rank_dim
+  res_cores = []
+
+  def slice_tt_core(tt_core, left_idx, right_idx):
+    num_tt_core_dims = len(tt_core.get_shape())
+    idx = [slice(None)] * num_tt_core_dims
+    idx[left_rank_dim] = left_idx
+    idx[right_rank_dim] = right_idx
+    return tt_core[idx]
+
+  right_half_rank = tt_ranks[1] // 2
+  left_chunks = []
+  for obj_idx, tt in enumerate(tt_objects):
+    curr_core = slice_tt_core(tt.tt_cores[0], slice(None),
+                              slice(0, right_half_rank))
+    if coef is not None:
+      curr_core *= coef[obj_idx]
+    left_chunks.append(curr_core)
+  left_part = tf.add_n(left_chunks)
+  first_obj_core = tt_objects[0].tt_cores[0]
+  right_part = slice_tt_core(first_obj_core, slice(None),
+                             slice(right_half_rank, None))
+  first_core = tf.concat((left_part, right_part), axis=right_rank_dim)
+  res_cores.append(first_core)
+
+  for core_idx in range(1, ndims - 1):
+    first_obj_core = tt_objects[0].tt_cores[core_idx]
+    left_half_rank = tt_ranks[core_idx] // 2
+    right_half_rank = tt_ranks[core_idx + 1] // 2
+
+    upper_part = slice_tt_core(tt.tt_cores[core_idx], slice(0, left_half_rank),
+                               slice(None))
+    lower_right_part = slice_tt_core(first_obj_core,
+                                     slice(left_half_rank, None),
+                                     slice(right_half_rank, None))
+
+    lower_left_chunks = []
+    for obj_idx, tt in enumerate(tt_objects):
+      curr_core = slice_tt_core(tt.tt_cores[core_idx],
+                                slice(left_half_rank, None),
+                                slice(0, right_half_rank))
+      if coef is not None:
+        curr_core *= coef[obj_idx]
+      lower_left_chunks.append(curr_core)
+    lower_left_part = tf.add_n(lower_left_chunks)
+    lower_part = tf.concat((lower_left_part, lower_right_part),
+                           axis=right_rank_dim)
+    curr_core = tf.concat((upper_part, lower_part), axis=left_rank_dim)
+    res_cores.append(curr_core)
+
+  left_half_rank = tt_ranks[ndims - 1] // 2
+  upper_part = slice_tt_core(tt.tt_cores[-1], slice(0, left_half_rank),
+                             slice(None))
+  lower_chunks = []
+  for obj_idx, tt in enumerate(tt_objects):
+    curr_core = slice_tt_core(tt.tt_cores[-1], slice(left_half_rank, None),
+                              slice(None))
+    if coef is not None:
+      curr_core *= coef[obj_idx]
+    lower_chunks.append(curr_core)
+  lower_part = tf.add_n(lower_chunks)
+  last_core = tf.concat((upper_part, lower_part), axis=left_rank_dim)
+  res_cores.append(last_core)
+
+  raw_shape = tt_objects[0].get_raw_shape()
+  static_tt_ranks = tt_objects[0].get_tt_ranks()
+  if isinstance(tt_objects[0], TensorTrain):
+    res = TensorTrain(res_cores, raw_shape, static_tt_ranks)
+  elif isinstance(tt_objects[0], TensorTrainBatch):
+    res = TensorTrainBatch(res_cores, raw_shape, static_tt_ranks,
+                           tt_objects[0].batch_size)
+  # Maintain the projection_on property.
+  res.projection_on = tt_objects[0].projection_on
+  return res
diff --git a/t3f/riemannian_test.py b/t3f/riemannian_test.py
index cea79093..25ddb7d1 100644
--- a/t3f/riemannian_test.py
+++ b/t3f/riemannian_test.py
@@ -1,10 +1,12 @@
 import numpy as np
 import tensorflow as tf
 
-from tensor_train import TensorTrain
-import ops
-import initializers
-import riemannian
+from t3f.tensor_train import TensorTrain
+from t3f import ops
+from t3f import initializers
+from t3f import riemannian
+from t3f import shapes
+from t3f import batch_ops
 
 
 class RiemannianTest(tf.test.TestCase):
@@ -12,7 +14,7 @@ class RiemannianTest(tf.test.TestCase):
   def testProjectOnItself(self):
     # Projection of X into the tangent space of itself is X: P_x(x) = x.
     tens = initializers.random_tensor((2, 3, 4))
-    proj = riemannian.project(tens, tens)
+    proj = riemannian.project_sum(tens, tens)
     with self.test_session() as sess:
       actual_val, desired_val = sess.run((ops.full(proj), ops.full(tens)))
       self.assertAllClose(desired_val, actual_val)
@@ -50,7 +52,7 @@ def testProject(self):
        [-1.66479093, -0.99003251,  2.46629195],
        [-0.04847773, -0.72908174,  0.20142675],
        [ 0.34431125, -0.20935516, -1.15864246]]
-    proj = riemannian.project(tangent_tens, tens)
+    proj = riemannian.project_sum(tens, tangent_tens)
     with self.test_session() as sess:
       self.assertAllClose(desired_projection, ops.full(proj).eval())
 
@@ -59,36 +61,190 @@ def testProjectSum(self):
     tens = initializers.random_tensor_batch((2, 3, 4), batch_size=3)
     tangent_tens = initializers.random_tensor((2, 3, 4), 3)
     weighted_sum = tens[0] + tens[1] + tens[2]
-    direct_proj = riemannian.project(tangent_tens, weighted_sum)
-    actual_proj = riemannian.project(tangent_tens, tens)
+    direct_proj = riemannian.project_sum(weighted_sum, tangent_tens)
+    actual_proj = riemannian.project_sum(tens, tangent_tens)
     with self.test_session() as sess:
       res = sess.run((ops.full(direct_proj), ops.full(actual_proj)))
       desired_val, actual_val = res
       self.assertAllClose(desired_val, actual_val)
 
-  def testProjectSumCoef(self):
+  def testProjectWeightedSum(self):
     # Test projecting a batch of TT-tensors with providing coefs.
     tens = initializers.random_tensor_batch((2, 3, 4), 3, batch_size=4)
     coef = [0.1, -2, 0, 0.4]
     tangent_tens = initializers.random_tensor((2, 3, 4), 4)
     weighted_sum = coef[0] * tens[0] + coef[1] * tens[1] + coef[2] * tens[2]
     weighted_sum += coef[3] * tens[3]
-    direct_proj = riemannian.project(tangent_tens, weighted_sum)
-    actual_proj = riemannian.project(tangent_tens, tens, coef)
+    direct_proj = riemannian.project_sum(weighted_sum, tangent_tens)
+    actual_proj = riemannian.project_sum(tens, tangent_tens, coef)
     with self.test_session() as sess:
       res = sess.run((ops.full(direct_proj), ops.full(actual_proj)))
       desired_val, actual_val = res
       self.assertAllClose(desired_val, actual_val)
 
+  def testProjectWeightedSumMultipleOutputs(self):
+    # Test projecting a batch of TT-tensors with providing weights and outputing
+    # several TT objects with different weights.
+    tens = initializers.random_tensor_batch((2, 3, 4), 3, batch_size=4)
+    np.random.seed(0)
+    weights = np.random.randn(4, 2).astype(np.float32)
+    tangent_tens = initializers.random_tensor((2, 3, 4), 4)
+    weighted_sum_1 = weights[0, 0] * tens[0] + weights[1, 0] * tens[1] +\
+                     weights[2, 0] * tens[2] + weights[3, 0] * tens[3]
+    weighted_sum_2 = weights[0, 1] * tens[0] + weights[1, 1] * tens[1] +\
+                     weights[2, 1] * tens[2] + weights[3, 1] * tens[3]
+    direct_proj_1 = riemannian.project_sum(weighted_sum_1, tangent_tens)
+    direct_proj_2 = riemannian.project_sum(weighted_sum_2, tangent_tens)
+    direct_proj_1 = shapes.expand_batch_dim(direct_proj_1)
+    direct_proj_2 = shapes.expand_batch_dim(direct_proj_2)
+    direct_projs = batch_ops.concat_along_batch_dim((direct_proj_1, direct_proj_2))
+    actual_proj = riemannian.project_sum(tens, tangent_tens, weights)
+    with self.test_session() as sess:
+      res = sess.run((ops.full(direct_projs), ops.full(actual_proj)))
+      desired_val, actual_val = res
+      self.assertAllClose(desired_val, actual_val, atol=1e-5, rtol=1e-5)
+
   def testProjectMatrixOnItself(self):
     # Project a TT-matrix on itself.
     # Projection of X into the tangent space of itself is X: P_x(x) = x.
     tt_mat = initializers.random_matrix(((2, 3, 4), (2, 3, 4)))
-    proj = riemannian.project(tt_mat, tt_mat)
+    proj = riemannian.project_sum(tt_mat, tt_mat)
     with self.test_session() as sess:
       actual_val, desired_val = sess.run((ops.full(proj), ops.full(tt_mat)))
       self.assertAllClose(desired_val, actual_val)
 
+  def testCompareProjectSumAndProject(self):
+    # Compare results of project_sum and project.
+    tens = initializers.random_tensor_batch((2, 3, 4), 3, batch_size=4)
+    tangent_tens = initializers.random_tensor((2, 3, 4), 4)
+    project_sum = riemannian.project_sum(tens, tangent_tens, tf.eye(4))
+    project = riemannian.project(tens, tangent_tens)
+    with self.test_session() as sess:
+      res = sess.run((ops.full(project_sum), ops.full(project)))
+      project_sum_val, project_val = res
+      self.assertAllClose(project_sum_val, project_val)
+
+  def testProjectMatmul(self):
+    # Project a TT-matrix times TT-vector on a TT-vector.
+    tt_mat = initializers.random_matrix(((2, 3, 4), (2, 3, 4)))
+    tt_vec_what = initializers.random_matrix_batch(((2, 3, 4), None),
+                                                   batch_size=3)
+    tt_vec_where = initializers.random_matrix(((2, 3, 4), None))
+    proj = riemannian.project_matmul(tt_vec_what, tt_vec_where, tt_mat)
+    matvec = ops.matmul(tt_mat, tt_vec_what)
+    proj_desired = riemannian.project(matvec, tt_vec_where)
+    with self.test_session() as sess:
+      actual_val, desired_val = sess.run((ops.full(proj), ops.full(proj_desired)))
+      self.assertAllClose(desired_val, actual_val, atol=1e-5, rtol=1e-5)
+
+  def testPairwiseFlatInnerTensor(self):
+    # Compare pairwise_flat_inner_projected against naive implementation.
+    what1 = initializers.random_tensor_batch((2, 3, 4), 4, batch_size=3)
+    what2 = initializers.random_tensor_batch((2, 3, 4), 4, batch_size=4)
+    where = initializers.random_tensor((2, 3, 4), 3)
+    projected1 = riemannian.project(what1, where)
+    projected2 = riemannian.project(what2, where)
+    desired = batch_ops.pairwise_flat_inner(projected1, projected2)
+    actual = riemannian.pairwise_flat_inner_projected(projected1, projected2)
+    with self.test_session() as sess:
+      desired_val, actual_val = sess.run((desired, actual))
+      self.assertAllClose(desired_val, actual_val, atol=1e-5, rtol=1e-5)
+
+    with self.assertRaises(ValueError):
+      # Second argument is not a projection on the tangent space.
+      riemannian.pairwise_flat_inner_projected(projected1, what2)
+    where2 = initializers.random_tensor((2, 3, 4), 3)
+    another_projected2 = riemannian.project(what2, where2)
+    with self.assertRaises(ValueError):
+      # The arguments are projections on different tangent spaces.
+      riemannian.pairwise_flat_inner_projected(projected1, another_projected2)
+
+  def testPairwiseFlatInnerMatrix(self):
+    # Compare pairwise_flat_inner_projected against naive implementation.
+    what1 = initializers.random_matrix_batch(((2, 3, 4), None), 4, batch_size=3)
+    what2 = initializers.random_matrix_batch(((2, 3, 4), None), 4, batch_size=4)
+    where = initializers.random_matrix(((2, 3, 4), None), 3)
+    projected1 = riemannian.project(what1, where)
+    projected2 = riemannian.project(what2, where)
+    desired = batch_ops.pairwise_flat_inner(projected1, projected2)
+    actual = riemannian.pairwise_flat_inner_projected(projected1, projected2)
+    with self.test_session() as sess:
+      desired_val, actual_val = sess.run((desired, actual))
+      self.assertAllClose(desired_val, actual_val, atol=1e-5, rtol=1e-5)
+
+    with self.assertRaises(ValueError):
+      # Second argument is not a projection on the tangent space.
+      riemannian.pairwise_flat_inner_projected(projected1, what2)
+    where2 = initializers.random_matrix(((2, 3, 4), None), 3)
+    another_projected2 = riemannian.project(what2, where2)
+    with self.assertRaises(ValueError):
+      # The arguments are projections on different tangent spaces.
+      riemannian.pairwise_flat_inner_projected(projected1, another_projected2)
+
+  def testAddNProjected(self):
+    # Add several TT-objects from the same tangent space.
+    what1 = initializers.random_tensor_batch((2, 3, 4), 4, batch_size=3)
+    what2 = initializers.random_tensor_batch((2, 3, 4), 3, batch_size=3)
+    where = initializers.random_tensor((2, 3, 4), 3)
+    projected1 = riemannian.project(what1, where)
+    projected2 = riemannian.project(what2, where)
+    desired = ops.full(projected1 + projected2)
+    actual = ops.full(riemannian.add_n_projected((projected1, projected2)))
+    with self.test_session() as sess:
+      desired_val, actual_val = sess.run((desired, actual))
+      self.assertAllClose(desired_val, actual_val, atol=1e-5, rtol=1e-5)
+
+    with self.assertRaises(ValueError):
+      # Second argument is not a projection on the tangent space.
+      riemannian.add_n_projected((projected1, what2))
+    where2 = initializers.random_tensor((2, 3, 4), 3)
+    another_projected2 = riemannian.project(what2, where2)
+    with self.assertRaises(ValueError):
+      # The arguments are projections on different tangent spaces.
+      riemannian.add_n_projected((projected1, another_projected2))
+
+  def testWeightedAddNProjected(self):
+    # Add several TT-objects from the same tangent space with coefs.
+    what1 = initializers.random_tensor((2, 3, 4), 4)
+    what2 = initializers.random_tensor((2, 3, 4), 1)
+    where = initializers.random_tensor((2, 3, 4), 3)
+    projected1 = riemannian.project(what1, where)
+    projected2 = riemannian.project(what2, where)
+    desired = ops.full(1.2 * projected1 + -2.0 * projected2)
+    actual = ops.full(riemannian.add_n_projected((projected1, projected2),
+                                                 coef=[1.2, -2.0]))
+    with self.test_session() as sess:
+      desired_val, actual_val = sess.run((desired, actual))
+      self.assertAllClose(desired_val, actual_val)
+
+    with self.assertRaises(ValueError):
+      # Second argument is not a projection on the tangent space.
+      riemannian.add_n_projected((projected1, what2), coef=[1.2, -2.0])
+    where2 = initializers.random_tensor((2, 3, 4), 3)
+    another_projected2 = riemannian.project(what2, where2)
+    with self.assertRaises(ValueError):
+      # The arguments are projections on different tangent spaces.
+      riemannian.add_n_projected((projected1, another_projected2),
+                                 coef=[1.2, -2.0])
+
+  def testWeightedAddNProjectedBatch(self):
+    # Add several TT-batches from the same tangent space with coefs.
+    what1 = initializers.random_tensor_batch((2, 3, 4), 4, batch_size=3)
+    what2 = initializers.random_tensor_batch((2, 3, 4), 1, batch_size=3)
+    where = initializers.random_tensor((2, 3, 4), 3)
+    projected1 = riemannian.project(what1, where)
+    projected2 = riemannian.project(what2, where)
+
+    desired_0 = ops.full(1.2 * projected1[0] + -2.0 * projected2[0])
+    desired_1 = ops.full(1.9 * projected1[1] + 2.0 * projected2[1])
+    desired_2 = ops.full(0.0 * projected1[2] + 1.0 * projected2[2])
+    desired = tf.stack((desired_0, desired_1, desired_2), axis=0)
+    actual = ops.full(riemannian.add_n_projected((projected1, projected2),
+                                                 coef=[[1.2, 1.9, 0.0],
+                                                       [-2.0, 2.0, 1.0]]))
+    with self.test_session() as sess:
+      desired_val, actual_val = sess.run((desired, actual))
+      self.assertAllClose(desired_val, actual_val, atol=1e-5, rtol=1e-5)
 
 if __name__ == "__main__":
   tf.test.main()
diff --git a/t3f/shapes.py b/t3f/shapes.py
index 3e0ff172..bc411092 100644
--- a/t3f/shapes.py
+++ b/t3f/shapes.py
@@ -17,11 +17,8 @@ def tt_ranks(tt):
   """
   num_dims = tt.ndims()
   ranks = []
-  # TODO: ugly.
-  from tensor_train_batch import TensorTrainBatch
-  left_rank_dim = 1 if isinstance(tt, TensorTrainBatch) else 0
   for i in range(num_dims):
-    ranks.append(tf.shape(tt.tt_cores[i])[left_rank_dim])
+    ranks.append(tf.shape(tt.tt_cores[i])[tt.left_tt_rank_dim])
   ranks.append(tf.shape(tt.tt_cores[-1])[-1])
   return tf.stack(ranks, axis=0)
 
@@ -30,8 +27,10 @@ def shape(tt):
   """Returns the shape of a TensorTrain.
 
   This operation returns a 1-D integer tensor representing the shape of
-  the input. For TT-matrices the shape would have two values, see raw_shape for
-  the tensor shape.
+    the input. For TT-matrices the shape would have two values, see raw_shape for
+    the tensor shape.
+  If the input is a TensorTrainBatch, the first dimension of the output is the
+    batch_size.
 
   Args:
     tt: `TensorTrain` or `TensorTrainBatch` object.
@@ -41,9 +40,16 @@ def shape(tt):
   """
   tt_raw_shape = raw_shape(tt)
   if tt.is_tt_matrix():
-    return tf.reduce_prod(raw_shape, axis=1)
+    res = tf.reduce_prod(tt_raw_shape, axis=1)
   else:
-    return tt_raw_shape[0]
+    res = tt_raw_shape[0]
+
+  # TODO: ugly.
+  from t3f.tensor_train_batch import TensorTrainBatch
+  if isinstance(tt, TensorTrainBatch):
+    res = tf.concat((tf.expand_dims(batch_size(tt), 0), res), axis=0)
+
+  return res
 
 
 def raw_shape(tt):
@@ -65,7 +71,7 @@ def raw_shape(tt):
   num_tensor_axis = len(tt.get_raw_shape())
   final_raw_shape = []
   # TODO: ugly.
-  from tensor_train import TensorTrain
+  from t3f.tensor_train import TensorTrain
   axes_shift = 1 if isinstance(tt, TensorTrain) else 2
   for ax in range(num_tensor_axis):
     curr_raw_shape = []
@@ -207,6 +213,9 @@ def is_batch_broadcasting_possible(tt_a, tt_b):
 
   Returns true if the batch sizes are the same or if one of them is 1.
 
+  If the batch size that is supposed to be 1 is not known on compilation stage,
+  broadcasting is not allowed.
+
   Args:
     tt_a: TensorTrain or TensorTrainBatch
     tt_b: TensorTrain or TensorTrainBatch
@@ -215,10 +224,10 @@ def is_batch_broadcasting_possible(tt_a, tt_b):
     Bool
   """
   try:
-    if tt_a.batch_size is None or tt_b.batch_size is None:
-      # If one of the batch sizes is not available on the compilation stage,
-      # we cannot say if broadcasting is possible.
-      return True
+    if tt_a.batch_size is None and tt_b.batch_size is None:
+      # If both batch sizes are not available on the compilation stage,
+      # we cannot say if broadcasting is possible so we will not allow it.
+      return False
     if tt_a.batch_size == tt_b.batch_size:
       return True
     if tt_a.batch_size == 1 or tt_b.batch_size == 1:
@@ -264,9 +273,9 @@ def expand_batch_dim(tt):
   if hasattr(tt, 'batch_size'):
     return tt
   else:
-    from tensor_train_batch import TensorTrainBatch
+    from t3f.tensor_train_batch import TensorTrainBatch
     tt_cores = []
     for core_idx in range(tt.ndims()):
       tt_cores.append(tf.expand_dims(tt.tt_cores[core_idx], 0))
     return TensorTrainBatch(tt_cores, tt.get_raw_shape(), tt.get_tt_ranks(),
-                            batch_size=1)
\ No newline at end of file
+                            batch_size=1)
diff --git a/t3f/tensor_train.py b/t3f/tensor_train.py
index 3da634d2..b884e92f 100644
--- a/t3f/tensor_train.py
+++ b/t3f/tensor_train.py
@@ -1,7 +1,7 @@
 import tensorflow as tf
 
-from tensor_train_base import TensorTrainBase
-import shapes
+from t3f.tensor_train_base import TensorTrainBase
+from t3f import shapes
 
 
 class TensorTrain(TensorTrainBase):
@@ -18,6 +18,8 @@ class TensorTrain(TensorTrainBase):
   @@graph
   @@ndims
   @@get_tt_ranks
+  @@left_tt_rank_dim
+  @@right_tt_rank_dim
   @@is_tt_matrix
   @@is_variable
   @@eval
@@ -79,6 +81,23 @@ def tt_cores(self):
     """
     return self._tt_cores
 
+  @property
+  def left_tt_rank_dim(self):
+    """The dimension of the left TT-rank in each TT-core."""
+    return 0
+
+  @property
+  def right_tt_rank_dim(self):
+    """The dimension of the right TT-rank in each TT-core."""
+    if self.is_tt_matrix():
+      # The dimensions of each TT-core are
+      # [left_rank, n, m, right_rank]
+      return 3
+    else:
+      # The dimensions of each TT-core are
+      # [left_rank, n, right_rank]
+      return 2
+
   def __str__(self):
     """A string describing the TensorTrain object, its TT-rank, and shape."""
     shape = self.get_shape()
@@ -104,6 +123,9 @@ def __getitem__(self, slice_spec):
       >>> a[1:2, :, :]
       is a 3D TensorTrain 1 x 3 x 4
     """
+    if len(slice_spec) != self.ndims():
+      raise ValueError('Expected %d indices, got %d' % (self.ndims(),
+                                                        len(slice_spec)))
     new_tt_cores = []
     remainder = None
     for i in range(self.ndims()):
diff --git a/t3f/tensor_train_base.py b/t3f/tensor_train_base.py
index fc636a84..7bad0a07 100644
--- a/t3f/tensor_train_base.py
+++ b/t3f/tensor_train_base.py
@@ -148,9 +148,31 @@ def __add__(self, other):
     """
     # TODO: ugly.
     # We can't import ops in the beginning since it creates cyclic dependencies.
-    import ops
+    from t3f import ops
     return ops.add(self, other)
 
+  def __sub__(self, other):
+    """Returns a TensorTrain corresponding to element-wise difference tt_a - tt_b.
+
+    Supports broadcasting (e.g. you can subtract TensorTrainBatch and
+    TensorTrain).
+    Just calls t3f.add(self, (-1) * other), see its documentation for details.
+    """
+    # TODO: ugly.
+    # We can't import ops in the beginning since it creates cyclic dependencies.
+    from t3f import ops
+    return ops.add(self, ops.multiply(other, -1.))
+
+  def __neg__(self):
+    """Returns a TensorTrain corresponding to element-wise negative -tt_a.
+
+    Just calls t3f.multiply(self, -1.), see its documentation for details.
+    """
+    # TODO: ugly.
+    # We can't import ops in the beginning since it creates cyclic dependencies.
+    from t3f import ops
+    return ops.multiply(self, -1.)
+
   def __mul__(self, other):
     """Returns a TensorTrain corresponding to element-wise product tt_a * tt_b.
 
@@ -160,7 +182,7 @@ def __mul__(self, other):
     """
     # TODO: ugly.
     # We can't import ops in the beginning since it creates cyclic dependencies.
-    import ops
+    from t3f import ops
     return ops.multiply(self, other)
 
   # To support 'TT * 4' as well as '4 * TT'.
diff --git a/t3f/tensor_train_batch.py b/t3f/tensor_train_batch.py
index 02bf365a..5ff0410b 100644
--- a/t3f/tensor_train_batch.py
+++ b/t3f/tensor_train_batch.py
@@ -1,10 +1,9 @@
-import numbers
 import numpy as np
 import tensorflow as tf
 
-from tensor_train_base import TensorTrainBase
-from tensor_train import TensorTrain
-import shapes
+from t3f.tensor_train_base import TensorTrainBase
+from t3f.tensor_train import TensorTrain
+from t3f import shapes
 
 
 class TensorTrainBatch(TensorTrainBase):
@@ -21,6 +20,8 @@ class TensorTrainBatch(TensorTrainBase):
   @@graph
   @@ndims
   @@get_tt_ranks
+  @@left_tt_rank_dim
+  @@right_tt_rank_dim
   @@is_tt_matrix
   @@is_variable
   @@eval
@@ -28,11 +29,12 @@ class TensorTrainBatch(TensorTrainBase):
 
   def __init__(self, tt_cores, shape=None, tt_ranks=None, batch_size=None,
                convert_to_tensors=True):
-    """Creates a `TensorTrain`.
+    """Creates a `TensorTrainBatch`.
 
     Args:
-      tt_cores: A tuple of 3d or 4d tensor-like objects of shape
-        `[r_k-1, n_k, r_k]`.
+      tt_cores: A tuple of 4d or 5d tensor-like objects of shape
+        `[batch_size, r_k-1, n_k, r_k]` or
+        `[batch_size, r_k-1, n_k, m_k, r_k]`
         Tensor-like can be numpy array, tf.Tensor, of tf.Variable
       batch_size: number of elements in the batch. If None, tries to infer from
         the TT-cores (not always possible even if it should be, e.g. if ranks
@@ -106,17 +108,40 @@ def batch_size(self):
     """The number of elements or None if not known."""
     return self._batch_size
 
+  @property
+  def left_tt_rank_dim(self):
+    """The dimension of the left TT-rank in each TT-core."""
+    return 1
+
+  @property
+  def right_tt_rank_dim(self):
+    """The dimension of the right TT-rank in each TT-core."""
+    if self.is_tt_matrix():
+      # The dimensions of each TT-core are
+      # [batch_idx, left_rank, n, m, right_rank]
+      return 4
+    else:
+      # The dimensions of each TT-core are
+      # [batch_idx, left_rank, n, right_rank]
+      return 3
+
   def __str__(self):
     """A string describing the TensorTrainBatch, its TT-rank and shape."""
     shape = self.get_shape()
     tt_ranks = self.get_tt_ranks()
 
+    if self.batch_size is None:
+        batch_size_str = '(?)'
+    else:
+        batch_size_str = str(self.batch_size)
+
     if self.is_tt_matrix():
       raw_shape = self.get_raw_shape()
       type_str = 'TT-matrix variables' if self.is_variable() else 'TT-matrices'
-      return "A %d element batch of %s of size %d x %d, underlying tensor " \
-             "shape: %s x %s, TT-ranks: %s" % (self.batch_size, type_str,
-                                               shape[0], shape[1],
+
+      return "A %s element batch of %s of size %d x %d, underlying tensor " \
+             "shape: %s x %s, TT-ranks: %s" % (batch_size_str, type_str,
+                                               shape[1], shape[2],
                                                raw_shape[0], raw_shape[1],
                                                tt_ranks)
     else:
@@ -124,8 +149,17 @@ def __str__(self):
         type_str = 'Tensor Train variables'
       else:
         type_str = 'Tensor Trains'
-      return "A %d element batch of %s of shape %s, TT-ranks: %s" % \
-             (self.batch_size, type_str, shape, tt_ranks)
+      return "A %s element batch of %s of shape %s, TT-ranks: %s" % \
+             (batch_size_str, type_str, shape[1:], tt_ranks)
+
+  @staticmethod
+  def _do_collapse_dim(slice_spec):
+    # Returns true if slice_spec is specified exactly and we want to collapse
+    # the corresponding axis, i.e. return an object with less dims. To be used
+    # in indexing functions.
+    # If its a actual slice, nothing to collapse. Otherwise (a number or
+    # a tf.Tensor) want to collapse.
+    return not isinstance(slice_spec, slice)
 
   def _batch_dim_getitem(self, element_spec):
     """__getitem__ when provided only one (batch) index.
@@ -137,9 +171,7 @@ def _batch_dim_getitem(self, element_spec):
 
     # This object index is specified exactly and we want to collapse the
     # batch_size axis, i.e. return a TensorTrain instead of a TensorTrainBatch.
-    do_collapse_batch_dim = isinstance(element_spec, numbers.Number)
-    if not isinstance(element_spec, slice) and not do_collapse_batch_dim:
-      raise ValueError('Expected just 1 index, got %s' % element_spec)
+    do_collapse_batch_dim = self._do_collapse_dim(element_spec)
 
     new_tt_cores = []
     for core_idx in range(self.ndims()):
@@ -170,7 +202,7 @@ def _full_getitem(self, slice_spec):
                                                         len(slice_spec)))
     # This object index is specified exactly and we want to collapse the
     # batch_size axis, i.e. return a TensorTrain instead of a TensorTrainBatch.
-    do_collapse_batch_dim = isinstance(slice_spec[0], numbers.Number)
+    do_collapse_batch_dim = self._do_collapse_dim(slice_spec[0])
     remainder = None
     new_tt_cores = []
     for core_idx in range(self.ndims()):
@@ -179,8 +211,7 @@ def _full_getitem(self, slice_spec):
         raise NotImplementedError
       else:
         sliced_core = curr_core[slice_spec[0], :, slice_spec[core_idx + 1], :]
-        do_collapse_curr_dim = isinstance(slice_spec[core_idx + 1],
-                                          numbers.Number)
+        do_collapse_curr_dim = self._do_collapse_dim(slice_spec[core_idx + 1])
         if do_collapse_curr_dim:
           # This index is specified exactly and we want to collapse this axis.
           if remainder is None:
@@ -240,9 +271,12 @@ def __getitem__(self, slice_spec):
       `TensorTrainBatch` or `TensorTrain` depending on whether the first
       (batch) dim was specified as a range or as a number.
     """
-    new_tt_cores = []
-    slice_only_batch_dim = isinstance(slice_spec, slice) or \
-                           isinstance(slice_spec, numbers.Number)
+    try:
+      slice_only_batch_dim = len(slice_spec) == 1
+    except TypeError:
+      # The argument is not iterable, so it's a single slice, or a number, or a
+      # tf.Tensor with a number.
+      slice_only_batch_dim = True
 
     if slice_only_batch_dim:
       # Indexing only for the batch_size axis, e.g. a[1:3].
diff --git a/t3f/tensor_train_batch_test.py b/t3f/tensor_train_batch_test.py
index dcdab497..b609f1a7 100644
--- a/t3f/tensor_train_batch_test.py
+++ b/t3f/tensor_train_batch_test.py
@@ -1,9 +1,7 @@
 import tensorflow as tf
 
-from tensor_train import TensorTrain
-import tensor_train_batch
-import initializers
-import ops
+from t3f import initializers
+from t3f import ops
 
 
 class TensorTrainBatchTest(tf.test.TestCase):
@@ -40,5 +38,40 @@ def testTensorIndexing(self):
       desired, actual = sess.run([desired, actual])
       self.assertAllClose(desired, actual)
 
+      # Wrong number of dims.
+      with self.assertRaises(ValueError):
+        tens[1, :, 3]
+      with self.assertRaises(ValueError):
+        tens[1, :, 3, 1:2, 1:3]
+      with self.assertRaises(ValueError):
+        tens[1, 1]
+
+  def testPlaceholderTensorIndexing(self):
+    tens = initializers.random_tensor_batch((3, 3, 4), batch_size=3)
+    with self.test_session() as sess:
+      start = tf.placeholder(tf.int32)
+      end = tf.placeholder(tf.int32)
+
+      desired = ops.full(tens)[0:-1]
+      actual = ops.full(tens[start:end])
+      desired, actual = sess.run([desired, actual], {start: 0, end: -1})
+      self.assertAllClose(desired, actual)
+
+      desired = ops.full(tens)[0:1]
+      actual = ops.full(tens[start:end])
+      desired, actual = sess.run([desired, actual], {start: 0, end: 1})
+      self.assertAllClose(desired, actual)
+
+      desired = ops.full(tens)[1]
+      actual = ops.full(tens[start])
+      desired, actual = sess.run([desired, actual], {start: 1})
+      self.assertAllClose(desired, actual)
+
+      desired = ops.full(tens)[1, 1:3, 1, :3]
+      actual = ops.full(tens[start, start:end, start, :end])
+      desired, actual = sess.run([desired, actual], {start: 1, end: 3})
+      self.assertAllClose(desired, actual)
+
+
 if __name__ == "__main__":
   tf.test.main()
diff --git a/t3f/tensor_train_test.py b/t3f/tensor_train_test.py
index 3a6234ef..22983424 100644
--- a/t3f/tensor_train_test.py
+++ b/t3f/tensor_train_test.py
@@ -1,8 +1,8 @@
 import tensorflow as tf
 
-import tensor_train
-import initializers
-import ops
+from t3f import tensor_train
+from t3f import initializers
+from t3f import ops
 
 
 class TensorTrainTest(tf.test.TestCase):
@@ -118,12 +118,20 @@ def testTensorIndexing(self):
       desired, actual = sess.run([desired, actual])
       self.assertAllClose(desired, actual)
 
-  def testTensorIndexingOneElement(self):
-    tens = initializers.random_tensor((4, 4, 4))
+      # Wrong number of dims.
+      with self.assertRaises(ValueError):
+        tens[1, :, 3, :]
+      with self.assertRaises(ValueError):
+        tens[1, 1]
+
+  def testPlaceholderTensorIndexing(self):
+    tens = initializers.random_tensor((3, 3, 4))
     with self.test_session() as sess:
-      desired = ops.full(tens)[1, 2, 3]
-      actual = ops.full(tens[1, 2, 3])
-      desired, actual = sess.run([desired, actual])
+      start = tf.placeholder(tf.int32)
+      end = tf.placeholder(tf.int32)
+      desired = ops.full(tens)[1:3, 1, :3]
+      actual = ops.full(tens[start:end, start, :end])
+      desired, actual = sess.run([desired, actual], {start: 1, end: 3})
       self.assertAllClose(desired, actual)
 
 if __name__ == "__main__":
diff --git a/t3f/utils.py b/t3f/utils.py
index 4ee241d5..7d26ab0d 100644
--- a/t3f/utils.py
+++ b/t3f/utils.py
@@ -1,3 +1,4 @@
+import numpy as np
 import tensorflow as tf
 
 
@@ -10,3 +11,29 @@ def unravel_index(indices, shape):
     strides_shifted = tf.cumprod(shape, exclusive=True, reverse=True)
     res = (indices // strides_shifted) % shape
     return tf.transpose(res, (1, 0))
+
+
+# TODO: get rid of this when TF fixes the NaN bugs in tf.svd:
+# https://github.com/tensorflow/tensorflow/issues/8905
+def replace_tf_svd_with_np_svd():
+  """Replaces tf.svd with np.svd. Slow, but a workaround for tf.svd bugs."""
+  if hasattr(tf, 'original_svd'):
+    # This function has been already called and tf.svd is already replaced.
+    return
+  tf.original_svd = tf.svd
+
+  def my_svd(tensor, full_matrices=False, compute_uv=True):
+    dtype = tensor.dtype
+    u, s, v = tf.py_func(np.linalg.svd, [tensor, full_matrices, compute_uv],
+                         [dtype, dtype, dtype])
+    s_, u_, v_ = tf.original_svd(tensor, full_matrices, compute_uv)
+    s = tf.reshape(s, s_.get_shape())
+    u = tf.reshape(u, u_.get_shape())
+    v = tf.reshape(v, v_.get_shape())
+    # Converting numpy order of v dims to TF order.
+    order = range(tensor.get_shape().ndims)
+    order[-2], order[-1] = order[-1], order[-2]
+    v = tf.transpose(v, order)
+    return s, u, v
+
+  tf.svd = my_svd
diff --git a/t3f/utils_test.py b/t3f/utils_test.py
index 878a8f8d..170ab420 100644
--- a/t3f/utils_test.py
+++ b/t3f/utils_test.py
@@ -1,7 +1,7 @@
 import numpy as np
 import tensorflow as tf
 
-import utils
+from t3f import utils
 
 
 class UtilsTest(tf.test.TestCase):
diff --git a/t3f/variables.py b/t3f/variables.py
index a733f9dd..e391ad67 100644
--- a/t3f/variables.py
+++ b/t3f/variables.py
@@ -1,7 +1,7 @@
 import tensorflow as tf
 
-from tensor_train import TensorTrain
-from tensor_train_batch import TensorTrainBatch
+from t3f.tensor_train import TensorTrain
+from t3f.tensor_train_batch import TensorTrainBatch
 
 
 def get_variable(name,
@@ -49,8 +49,13 @@ def get_variable(name,
   # TODO: support validate shape: check that the tensor dimensions are correct,
   # but ignore the ranks.
   # TODO: add validate ranks flag.
+
+  reuse = tf.get_variable_scope().reuse
+  if not reuse and initializer is None:
+    raise ValueError('Scope reuse is False and initializer is not provided.')
+
   variable_cores = []
-  if initializer is None:
+  if reuse:
     # Find an existing variable in the collection.
     path = tf.get_variable_scope().name
     if path != '' and path[-1] != '/':
diff --git a/t3f/variables_test.py b/t3f/variables_test.py
index 9bfce4e3..de96a22d 100644
--- a/t3f/variables_test.py
+++ b/t3f/variables_test.py
@@ -1,9 +1,9 @@
 import numpy as np
 import tensorflow as tf
 
-import variables
-import ops
-import initializers
+from t3f import variables
+from t3f import ops
+from t3f import initializers
 
 class VariablesTest(tf.test.TestCase):
 
@@ -21,10 +21,17 @@ def testGetExistingVariable(self):
         with tf.variable_scope('', reuse=True):
           # The variable doesn't exist.
           variables.get_variable('tt_3')
+
       with tf.variable_scope('', reuse=True):
         tt_1_copy = variables.get_variable('tt_1')
         self.assertAllClose(ops.full(tt_1).eval(), ops.full(tt_1_copy).eval())
 
+      with tf.variable_scope('', reuse=True):
+        # Again try to retrieve an existing variable, but pass an initializer
+        # and check that it still works.
+        tt_1_copy = variables.get_variable('tt_1', initializer=0 * init)
+        self.assertAllClose(ops.full(tt_1).eval(), ops.full(tt_1_copy).eval())
+
       with self.assertRaises(ValueError):
         with tf.variable_scope('', reuse=True):
           # The variable is defined in a different scope