From 99842503a503be940545b0ab6e3e855aec1b741d Mon Sep 17 00:00:00 2001
From: Hui Chao <chaohui6014@gmail.com>
Date: Fri, 6 Sep 2024 16:55:27 +0800
Subject: [PATCH] DOC: update doc content && update sphinx to latest version
 (#809)

---
 doc/source/conf.py                            |   9 +-
 doc/source/getting_started/index.rst          |   3 +-
 doc/source/index.rst                          |   3 -
 doc/source/libraries/index.rst                |   2 -
 doc/source/libraries/xorbits_data/pandas.rst  | 581 +++++++++++++-----
 .../libraries/xorbits_inference/index.rst     |   4 +-
 doc/source/reference/datasets/index.rst       |   4 +-
 doc/source/reference/index.rst                |   2 -
 doc/source/reference/lightgbm/learning.rst    |   2 +-
 doc/source/reference/lightgbm/sklearn.rst     |   3 +-
 doc/source/reference/xgboost/learning_api.rst |   2 +-
 doc/source/reference/xgboost/sklearn.rst      |   2 +-
 doc/source/user_guide/deployment_cluster.rst  |   2 +-
 doc/source/user_guide/deployment_slurm.rst    |   2 +-
 doc/source/user_guide/loading_data.rst        |  12 +-
 python/setup.cfg                              |   7 +-
 16 files changed, 445 insertions(+), 195 deletions(-)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index 1a219f884..c2c48d9da 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -21,13 +21,13 @@
     "sphinx.ext.githubpages",
     "sphinx.ext.autosummary",
     "sphinx.ext.napoleon",
-    "IPython.sphinxext.ipython_directive",
-    "IPython.sphinxext.ipython_console_highlighting",
+    # "IPython.sphinxext.ipython_directive",
+    # "IPython.sphinxext.ipython_console_highlighting",
     "matplotlib.sphinxext.plot_directive",
 ]
 
 templates_path = ["_templates"]
-exclude_patterns = []
+exclude_patterns = ['reference/numpy/*', 'reference/pandas/*']
 
 # i18n
 locale_dirs = ["locale/"]  # path is example but recommended.
@@ -50,9 +50,6 @@
 # see https://stackoverflow.com/a/73497480
 if "zh_cn" not in tags.tags.keys():
     # en
-    html_theme_options["external_links"] = [
-        {"name": "xorbits.io", "url": "https://xorbits.io"},
-    ]
     html_theme_options["icon_links"] = [
         {
             "name": "GitHub",
diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst
index 4945f046b..b62b9f89b 100644
--- a/doc/source/getting_started/index.rst
+++ b/doc/source/getting_started/index.rst
@@ -8,8 +8,7 @@ Getting Started
 Use Xorbits to scale ML/DS workloads on your laptop or your own cluster.
 
 * Load and process datasets: :ref:`10min_pandas`
-* Scale machine learning workloads: coming soon
-* Deploy your own model: coming soon
+* Scale machine learning workloads: :ref:`xorbits_train_index`
 * Deploy to the cloud: :ref:`deployment`
 
 .. toctree::
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 65c5b3115..63913a4b2 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -44,8 +44,6 @@ Getting involved
     +--------------------------------------------------------------------------------------------------+----------------------------------------------------+
     | **Platform**                                                                                     | **Purpose**                                        |
     +--------------------------------------------------------------------------------------------------+----------------------------------------------------+
-    | `Discourse Forum <https://discuss.xorbits.io/>`_                                                 | Asking usage questions and discussing development. |
-    +--------------------------------------------------------------------------------------------------+----------------------------------------------------+
     | `Github Issues <https://github.com/xorbitsai/xorbits/issues>`_                                   | Reporting bugs and filing feature requests.        |
     +--------------------------------------------------------------------------------------------------+----------------------------------------------------+
     | `Slack <https://join.slack.com/t/xorbitsio/shared_invite/zt-1o3z9ucdh-RbfhbPVpx7prOVdM1CAuxg>`_  | Collaborating with other Xorbits users.            |
@@ -75,6 +73,5 @@ Getting involved
    getting_started/index
    libraries/index
    user_guide/index
-   deployment/index
    reference/index
    development/index
diff --git a/doc/source/libraries/index.rst b/doc/source/libraries/index.rst
index e2843f9c1..81fa5b7e0 100644
--- a/doc/source/libraries/index.rst
+++ b/doc/source/libraries/index.rst
@@ -6,5 +6,3 @@ Libraries
     
     xorbits_data/index
     xorbits_train/index
-    xorbits_tune/index
-    xorbits_inference/index
diff --git a/doc/source/libraries/xorbits_data/pandas.rst b/doc/source/libraries/xorbits_data/pandas.rst
index c8e2ff33e..3935b3d57 100644
--- a/doc/source/libraries/xorbits_data/pandas.rst
+++ b/doc/source/libraries/xorbits_data/pandas.rst
@@ -10,49 +10,76 @@ This is a short introduction to :code:`xorbits.pandas` which is originated from
 
 Customarily, we import and init as follows:
 
-.. ipython:: python
+:: 
 
-   import xorbits
-   import xorbits.numpy as np
-   import xorbits.pandas as pd
-   xorbits.init()
+   >>> import xorbits
+   >>> import xorbits.numpy as np
+   >>> import xorbits.pandas as pd
+   >>> xorbits.init()
 
 Object creation
 ---------------
 
 Creating a :class:`Series` by passing a list of values, letting it create a default integer index:
 
-.. ipython:: python
-   :okwarning:
+::
 
-   s = pd.Series([1, 3, 5, np.nan, 6, 8])
-   s
+   >>> s = pd.Series([1, 3, 5, np.nan, 6, 8])
+   >>> s
+   0    1.0
+   1    3.0
+   2    5.0
+   3    NaN
+   4    6.0
+   5    8.0
+   dtype: float64
 
 Creating a :class:`DataFrame` by passing an array, with a datetime index and labeled columns:
 
-.. ipython:: python
-
-   dates = pd.date_range('20130101', periods=6)
-   dates
-   df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
-   df
+::
+
+   >>> dates = pd.date_range('20130101', periods=6)
+   >>> dates
+   DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
+                  '2013-01-05', '2013-01-06'],
+               dtype='datetime64[ns]', freq='D')
+   >>> df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
+   >>> df
+                     A         B         C         D
+   2013-01-01  0.411902  1.709468 -0.213158  0.821644
+   2013-01-02 -0.721910 -1.677311 -1.570986 -0.621969
+   2013-01-03  0.421083 -0.750191  0.269751 -2.799289
+   2013-01-04 -1.329158  1.274036  2.442691 -0.409725
+   2013-01-05  0.689205 -1.501951  0.363000  0.401498
+   2013-01-06  0.426947 -0.469598 -1.295293 -1.435165
 
 Creating a :class:`DataFrame` by passing a dict of objects that can be converted to series-like.
 
-.. ipython:: python
+::
 
-   df2 = pd.DataFrame({'A': 1.,
-                       'B': pd.Timestamp('20130102'),
-                       'C': pd.Series(1, index=list(range(4)), dtype='float32'),
-                       'D': np.array([3] * 4, dtype='int32'),
-                       'E': 'foo'})
-   df2
+   >>> df2 = pd.DataFrame({'A': 1.,
+                           'B': pd.Timestamp('20130102'),
+                           'C': pd.Series(1, index=list(range(4)), dtype='float32'),
+                           'D': np.array([3] * 4, dtype='int32'),
+                           'E': 'foo'})
+   >>> df2
+      A          B    C  D    E
+   0  1.0 2013-01-02  1.0  3  foo
+   1  1.0 2013-01-02  1.0  3  foo
+   2  1.0 2013-01-02  1.0  3  foo
+   3  1.0 2013-01-02  1.0  3  foo
 
 The columns of the resulting :class:`DataFrame` have different dtypes.
 
-.. ipython:: python
+::
 
-   df2.dtypes
+   >>> df2.dtypes
+   A          float64
+   B    datetime64[s]
+   C          float32
+   D            int32
+   E           object
+   dtype: object
 
 
 Viewing data
@@ -60,17 +87,31 @@ Viewing data
 
 Here is how to view the top and bottom rows of the frame:
 
-.. ipython:: python
-
-   df.head()
-   df.tail(3)
+::
+
+   >>> df.head()
+                     A         B         C         D
+   2013-01-01  0.411902  1.709468 -0.213158  0.821644
+   2013-01-02 -0.721910 -1.677311 -1.570986 -0.621969
+   2013-01-03  0.421083 -0.750191  0.269751 -2.799289
+   2013-01-04 -1.329158  1.274036  2.442691 -0.409725
+   2013-01-05  0.689205 -1.501951  0.363000  0.401498
+   >>> df.tail(3)
+                     A         B         C         D
+   2013-01-04 -1.329158  1.274036  2.442691 -0.409725
+   2013-01-05  0.689205 -1.501951  0.363000  0.401498
+   2013-01-06  0.426947 -0.469598 -1.295293 -1.435165
 
 Display the index, columns:
 
-.. ipython:: python
+::
 
-   df.index
-   df.columns
+   >>> df.index
+   DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
+                  '2013-01-05', '2013-01-06'],
+               dtype='datetime64[ns]', freq='D')
+   >>> df.columns
+   Index(['A', 'B', 'C', 'D'], dtype='object')
 
 
 :meth:`DataFrame.to_numpy` gives a ndarray representation of the underlying data. Note that this
@@ -84,16 +125,27 @@ value to a Python object.
 For ``df``, our :class:`DataFrame` of all floating-point values,
 :meth:`DataFrame.to_numpy` is fast and doesn't require copying data.
 
-.. ipython:: python
+::
 
-   df.to_numpy()
+   >>> df.to_numpy()
+   array([[ 0.41190169,  1.70946816, -0.21315821,  0.82164367],
+         [-0.72191001, -1.67731119, -1.57098611, -0.62196894],
+         [ 0.42108334, -0.75019064,  0.26975121, -2.79928919],
+         [-1.32915794,  1.2740364 ,  2.44269141, -0.40972548],
+         [ 0.68920499, -1.50195139,  0.36299995,  0.40149762],
+         [ 0.42694729, -0.46959787, -1.29529258, -1.43516459]])
 
 For ``df2``, the :class:`DataFrame` with multiple dtypes, :meth:`DataFrame.to_numpy` is relatively
 expensive.
 
-.. ipython:: python
+::
 
-   df2.to_numpy()
+   >>> df2.to_numpy()
+   array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'foo'],
+         [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'foo'],
+         [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'foo'],
+         [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'foo']],
+         dtype=object)
 
 .. note::
 
@@ -102,21 +154,44 @@ expensive.
 
 :func:`~DataFrame.describe` shows a quick statistic summary of your data:
 
-.. ipython:: python
+::
 
-   df.describe()
+   >>> df.describe()
+               A         B         C         D
+   count  6.000000  6.000000  6.000000  6.000000
+   mean  -0.016988 -0.235924 -0.000666 -0.673834
+   std    0.811215  1.418734  1.439617  1.308619
+   min   -1.329158 -1.677311 -1.570986 -2.799289
+   25%   -0.438457 -1.314011 -1.024759 -1.231866
+   50%    0.416493 -0.609894  0.028296 -0.515847
+   75%    0.425481  0.838128  0.339688  0.198692
+   max    0.689205  1.709468  2.442691  0.821644
 
 Sorting by an axis:
 
-.. ipython:: python
+::
 
-   df.sort_index(axis=1, ascending=False)
+   >>> df.sort_index(axis=1, ascending=False)
+                      D         C         B         A
+   2013-01-01  0.821644 -0.213158  1.709468  0.411902
+   2013-01-02 -0.621969 -1.570986 -1.677311 -0.721910
+   2013-01-03 -2.799289  0.269751 -0.750191  0.421083
+   2013-01-04 -0.409725  2.442691  1.274036 -1.329158
+   2013-01-05  0.401498  0.363000 -1.501951  0.689205
+   2013-01-06 -1.435165 -1.295293 -0.469598  0.426947
 
 Sorting by values:
 
-.. ipython:: python
+::
 
-   df.sort_values(by='B')
+   >>> df.sort_values(by='B')
+                      A         B         C         D
+   2013-01-02 -0.721910 -1.677311 -1.570986 -0.621969
+   2013-01-05  0.689205 -1.501951  0.363000  0.401498
+   2013-01-03  0.421083 -0.750191  0.269751 -2.799289
+   2013-01-06  0.426947 -0.469598 -1.295293 -1.435165
+   2013-01-04 -1.329158  1.274036  2.442691 -0.409725
+   2013-01-01  0.411902  1.709468 -0.213158  0.821644
 
 Selection
 ---------
@@ -134,117 +209,187 @@ Getting
 
 Selecting a single column, which yields a :class:`Series`, equivalent to ``df.A``:
 
-.. ipython:: python
+::
 
-   df['A']
+   >>> df['A']
+   2013-01-01    0.411902
+   2013-01-02   -0.721910
+   2013-01-03    0.421083
+   2013-01-04   -1.329158
+   2013-01-05    0.689205
+   2013-01-06    0.426947
+   Freq: D, Name: A, dtype: float64
 
 Selecting via ``[]``, which slices the rows:
 
-.. ipython:: python
-   :okwarning:
+::
 
-   df[0:3]
-   df['20130102':'20130104']
+   >>> df[0:3]
+                      A         B         C         D
+   2013-01-01  0.411902  1.709468 -0.213158  0.821644
+   2013-01-02 -0.721910 -1.677311 -1.570986 -0.621969
+   2013-01-03  0.421083 -0.750191  0.269751 -2.799289
+   >>> df['20130102':'20130104']
+                      A         B         C         D
+   2013-01-02 -0.721910 -1.677311 -1.570986 -0.621969
+   2013-01-03  0.421083 -0.750191  0.269751 -2.799289
+   2013-01-04 -1.329158  1.274036  2.442691 -0.409725
 
 Selection by label
 ~~~~~~~~~~~~~~~~~~
 
 For getting a cross section using a label:
 
-.. ipython:: python
+::
 
-   df.loc['20130101']
+   >>> df.loc['20130101']
+   A    0.411902
+   B    1.709468
+   C   -0.213158
+   D    0.821644
+   Name: 2013-01-01 00:00:00, dtype: float64
 
 Selecting on a multi-axis by label:
 
-.. ipython:: python
+::
 
-   df.loc[:, ['A', 'B']]
+   >>> df.loc[:, ['A', 'B']]
+                      A         B
+   2013-01-01  0.411902  1.709468
+   2013-01-02 -0.721910 -1.677311
+   2013-01-03  0.421083 -0.750191
+   2013-01-04 -1.329158  1.274036
+   2013-01-05  0.689205 -1.501951
+   2013-01-06  0.426947 -0.469598
 
 Showing label slicing, both endpoints are *included*:
 
-.. ipython:: python
-   :okwarning:
+::
 
-   df.loc['20130102':'20130104', ['A', 'B']]
+   >>> df.loc['20130102':'20130104', ['A', 'B']]
+                      A         B
+   2013-01-02 -0.721910 -1.677311
+   2013-01-03  0.421083 -0.750191
+   2013-01-04 -1.329158  1.274036
 
 Reduction in the dimensions of the returned object:
 
-.. ipython:: python
+::
 
-   df.loc['20130102', ['A', 'B']]
+   >>> df.loc['20130102', ['A', 'B']]
+   A   -0.721910
+   B   -1.677311
+   Name: 2013-01-02 00:00:00, dtype: float64
 
 For getting a scalar value:
 
-.. ipython:: python
+::
 
-   df.loc['20130101', 'A']
+   >>> df.loc['20130101', 'A']
+   0.41190169091385387
 
 For getting fast access to a scalar (equivalent to the prior method):
 
-.. ipython:: python
+::
 
-   df.at['20130101', 'A']
+   >>> df.at['20130101', 'A']
+   0.41190169091385387
 
 Selection by position
 ~~~~~~~~~~~~~~~~~~~~~
 
 Select via the position of the passed integers:
 
-.. ipython:: python
+::
 
-   df.iloc[3]
+   >>> df.iloc[3]
+   A   -1.329158
+   B    1.274036
+   C    2.442691
+   D   -0.409725
+   Name: 2013-01-04 00:00:00, dtype: float64
 
 By integer slices, acting similar to python:
 
-.. ipython:: python
+::
 
-   df.iloc[3:5, 0:2]
+   >>> df.iloc[3:5, 0:2]
+                      A         B
+   2013-01-04 -1.329158  1.274036
+   2013-01-05  0.689205 -1.501951
 
 By lists of integer position locations, similar to the python style:
 
-.. ipython:: python
+::
 
-   df.iloc[[1, 2, 4], [0, 2]]
+   >>> df.iloc[[1, 2, 4], [0, 2]]
+                      A         C
+   2013-01-02 -0.721910 -1.570986
+   2013-01-03  0.421083  0.269751
+   2013-01-05  0.689205  0.363000
 
 For slicing rows explicitly:
 
-.. ipython:: python
+::
 
-   df.iloc[1:3, :]
+   >>> df.iloc[1:3, :]
+                      A         B         C         D
+   2013-01-02 -0.721910 -1.677311 -1.570986 -0.621969
+   2013-01-03  0.421083 -0.750191  0.269751 -2.799289
 
 For slicing columns explicitly:
 
-.. ipython:: python
+::
 
-   df.iloc[:, 1:3]
+   >>> df.iloc[:, 1:3]
+                      B         C
+   2013-01-01  1.709468 -0.213158
+   2013-01-02 -1.677311 -1.570986
+   2013-01-03 -0.750191  0.269751
+   2013-01-04  1.274036  2.442691
+   2013-01-05 -1.501951  0.363000
+   2013-01-06 -0.469598 -1.295293
 
 For getting a value explicitly:
 
-.. ipython:: python
+::
 
-   df.iloc[1, 1]
+   >>> df.iloc[1, 1]
+   -1.6773111933012679
 
 For getting fast access to a scalar (equivalent to the prior method):
 
-.. ipython:: python
+::
 
-   df.iat[1, 1]
+   >>> df.iat[1, 1]
+   -1.6773111933012679
 
 Boolean indexing
 ~~~~~~~~~~~~~~~~
 
 Using a single column's values to select data.
 
-.. ipython:: python
+::
 
-   df[df['A'] > 0]
+   >>> df[df['A'] > 0]
+                      A         B         C         D
+   2013-01-01  0.411902  1.709468 -0.213158  0.821644
+   2013-01-03  0.421083 -0.750191  0.269751 -2.799289
+   2013-01-05  0.689205 -1.501951  0.363000  0.401498
+   2013-01-06  0.426947 -0.469598 -1.295293 -1.435165
 
 Selecting values from a DataFrame where a boolean condition is met.
 
-.. ipython:: python
+::
 
-   df[df > 0]
+   >>> df[df > 0]
+                      A         B         C         D
+   2013-01-01  0.411902  1.709468       NaN  0.821644
+   2013-01-02       NaN       NaN       NaN       NaN
+   2013-01-03  0.421083       NaN  0.269751       NaN
+   2013-01-04       NaN  1.274036  2.442691       NaN
+   2013-01-05  0.689205       NaN  0.363000  0.401498
+   2013-01-06  0.426947       NaN       NaN       NaN
 
 
 Operations
@@ -257,26 +402,52 @@ Operations in general *exclude* missing data.
 
 Performing a descriptive statistic:
 
-.. ipython:: python
+::
 
-   df.mean()
+   >>> df.mean()
+   A   -0.016988
+   B   -0.235924
+   C   -0.000666
+   D   -0.673834
+   dtype: float64
 
 
 Same operation on the other axis:
 
-.. ipython:: python
+::
 
-   df.mean(1)
+   >>> df.mean(1)
+   2013-01-01    0.682464
+   2013-01-02   -1.148044
+   2013-01-03   -0.714661
+   2013-01-04    0.494461
+   2013-01-05   -0.012062
+   2013-01-06   -0.693277
+   Freq: D, dtype: float64
 
 
 Operating with objects that have different dimensionality and need alignment. In addition,
 :code:`xorbits.pandas` automatically broadcasts along the specified dimension.
 
-.. ipython:: python
-
-   s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)
-   s
-   df.sub(s, axis='index')
+::
+
+   >>> s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)
+   >>> s
+   2013-01-01    NaN
+   2013-01-02    NaN
+   2013-01-03    1.0
+   2013-01-04    3.0
+   2013-01-05    5.0
+   2013-01-06    NaN
+   Freq: D, dtype: float64
+   >>> df.sub(s, axis='index')
+                      A         B         C         D
+   2013-01-01       NaN       NaN       NaN       NaN
+   2013-01-02       NaN       NaN       NaN       NaN
+   2013-01-03 -0.578917 -1.750191 -0.730249 -3.799289
+   2013-01-04 -4.329158 -1.725964 -0.557309 -3.409725
+   2013-01-05 -4.310795 -6.501951 -4.637000 -4.598502
+   2013-01-06       NaN       NaN       NaN       NaN
 
 
 Apply
@@ -284,9 +455,14 @@ Apply
 
 Applying functions to the data:
 
-.. ipython:: python
+::
 
-   df.apply(lambda x: x.max() - x.min())
+   >>> df.apply(lambda x: x.max() - x.min())
+   A    2.018363
+   B    3.386779
+   C    4.013678
+   D    3.620933
+   dtype: float64
 
 String Methods
 ~~~~~~~~~~~~~~
@@ -297,10 +473,20 @@ code snippet below. Note that pattern-matching in `str` generally uses `regular
 expressions <https://docs.python.org/3/library/re.html>`__ by default (and in
 some cases always uses them).
 
-.. ipython:: python
-
-   s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
-   s.str.lower()
+::
+
+   >>> s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
+   >>> s.str.lower()
+   0       a
+   1       b
+   2       c
+   3    aaba
+   4    baca
+   5     NaN
+   6    caba
+   7     dog
+   8     cat
+   dtype: object
 
 Merge
 -----
@@ -315,15 +501,37 @@ operations.
 
 Concatenating :code:`xorbits.pandas` objects together with :func:`concat`:
 
-.. ipython:: python
-
-   df = pd.DataFrame(np.random.randn(10, 4))
-   df
-
-   # break it into pieces
-   pieces = [df[:3], df[3:7], df[7:]]
-
-   pd.concat(pieces)
+::
+
+   >>> df = pd.DataFrame(np.random.randn(10, 4))
+   >>> df
+             0         1         2         3
+   0 -0.495508  0.903802  2.152979  1.098698
+   1 -0.327001 -0.586382  1.999350 -1.056401
+   2  0.341923 -0.024582  0.439198  0.662602
+   3 -1.896886  0.181549  0.119640 -1.426697
+   4 -2.407668 -0.780552 -1.301063  0.510010
+   5 -0.350738 -0.147771 -0.566869 -2.414299
+   6 -1.994935 -0.486425 -0.531758  1.624540
+   7 -0.358207 -0.884470  1.257721  0.587503
+   8 -0.945414 -1.055967  1.334790  0.817954
+   9  1.116094 -0.664818 -0.298791  0.042105
+
+   >>> # break it into pieces
+   >>> pieces = [df[:3], df[3:7], df[7:]]
+
+   >>> pd.concat(pieces)
+             0         1         2         3
+   0 -0.495508  0.903802  2.152979  1.098698
+   1 -0.327001 -0.586382  1.999350 -1.056401
+   2  0.341923 -0.024582  0.439198  0.662602
+   3 -1.896886  0.181549  0.119640 -1.426697
+   4 -2.407668 -0.780552 -1.301063  0.510010
+   5 -0.350738 -0.147771 -0.566869 -2.414299
+   6 -1.994935 -0.486425 -0.531758  1.624540
+   7 -0.358207 -0.884470  1.257721  0.587503
+   8 -0.945414 -1.055967  1.334790  0.817954
+   9  1.116094 -0.664818 -0.298791  0.042105
 
 .. note::
    Adding a column to a :class:`DataFrame` is relatively fast. However, adding
@@ -336,23 +544,44 @@ Join
 
 SQL style merges.
 
-.. ipython:: python
-
-   left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
-   right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})
-   left
-   right
-   pd.merge(left, right, on='key')
+::
+
+   >>> left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
+   >>> right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})
+   >>> left
+      key  lval
+   0  foo     1
+   1  foo     2
+   >>> right
+      key  rval
+   0  foo     4
+   1  foo     5
+   >>> pd.merge(left, right, on='key')
+      key  lval  rval
+   0  foo     1     4
+   1  foo     1     5
+   2  foo     2     4
+   3  foo     2     5
 
 Another example that can be given is:
 
-.. ipython:: python
+::
+
+   >>> left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]})
+   >>> right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})
+   >>> left
+      key  lval
+   0  foo     1
+   1  bar     2
+   >>> right
+      key  rval
+   0  foo     4
+   1  bar     5
+   >>> pd.merge(left, right, on='key')
+      key  lval  rval
+   0  foo     1     4
+   1  bar     2     5
 
-   left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]})
-   right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})
-   left
-   right
-   pd.merge(left, right, on='key')
 
 Grouping
 --------
@@ -365,63 +594,83 @@ following steps:
  - **Combining** the results into a data structure
 
 
-.. ipython:: python
-
-   df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
-                            'foo', 'bar', 'foo', 'foo'],
-                      'B': ['one', 'one', 'two', 'three',
-                            'two', 'two', 'one', 'three'],
-                      'C': np.random.randn(8),
-                      'D': np.random.randn(8)})
-   df
+::
+
+   >>> df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+                                 'foo', 'bar', 'foo', 'foo'],
+                          'B': ['one', 'one', 'two', 'three',
+                                 'two', 'two', 'one', 'three'],
+                          'C': np.random.randn(8),
+                          'D': np.random.randn(8)})
+   >>> df
+        A      B         C         D
+   0  foo    one -0.473456  1.016378
+   1  bar    one  0.373591  0.480215
+   2  foo    two -0.538622 -0.490436
+   3  bar  three -1.833243 -1.471246
+   4  foo    two -0.083388  1.389476
+   5  bar    two  0.874384  2.006862
+   6  foo    one -0.968538 -1.703000
+   7  foo  three -1.840837  0.066493
 
 Grouping and then applying the :meth:`~xorbits.pandas.groupby.DataFrameGroupBy.sum` function to
 the resulting groups.
 
-.. ipython:: python
-   :okwarning:
+::
 
-   df.groupby('A').sum()
+   >>> df.groupby('A').sum()
+                        B         C         D
+   A                                         
+   bar        onethreetwo -0.585268  1.015831
+   foo  onetwotwoonethree -3.904840  0.278910
 
 Grouping by multiple columns forms a hierarchical index, and again we can
 apply the `sum` function.
 
-.. ipython:: python
+::
 
-   df.groupby(['A', 'B']).sum()
+   >>> df.groupby(['A', 'B']).sum()
+                  C         D
+   A   B                        
+   bar one    0.373591  0.480215
+       three -1.833243 -1.471246
+       two    0.874384  2.006862
+   foo one   -1.441994 -0.686622
+       three -1.840837  0.066493
+       two   -0.622010  0.899039
 
 Plotting
 --------
 
 We use the standard convention for referencing the matplotlib API:
 
-.. ipython:: python
+::
 
-   import matplotlib.pyplot as plt
-   plt.close('all')
+   >>> import matplotlib.pyplot as plt
+   >>> plt.close('all')
 
-.. ipython:: python
+::
 
-   ts = pd.Series(np.random.randn(1000),
-                  index=pd.date_range('1/1/2000', periods=1000))
-   ts = ts.cumsum()
+   >>> ts = pd.Series(np.random.randn(1000),
+                      index=pd.date_range('1/1/2000', periods=1000))
+   >>> ts = ts.cumsum()
 
-   @savefig series_plot_basic.png
-   ts.plot()
+   >>> @savefig series_plot_basic.png
+   >>> ts.plot()
 
 On a DataFrame, the :meth:`~DataFrame.plot` method is a convenience to plot all
 of the columns with labels:
 
-.. ipython:: python
+::
 
-   df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index,
-                     columns=['A', 'B', 'C', 'D'])
-   df = df.cumsum()
+   >>> df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index,
+                         columns=['A', 'B', 'C', 'D'])
+   >>> df = df.cumsum()
 
-   plt.figure()
-   df.plot()
-   @savefig frame_plot_basic.png
-   plt.legend(loc='best')
+   >>> plt.figure()
+   >>> df.plot()
+   >>> @savefig frame_plot_basic.png
+   >>> plt.legend(loc='best')
 
 Getting data in/out
 -------------------
@@ -431,19 +680,35 @@ CSV
 
 Writing to a csv file.
 
-.. ipython:: python
+::
 
-   df.to_csv('foo.csv')
+   >>> df.to_csv('foo.csv')
+   Empty DataFrame
+   Columns: []
+   Index: []
 
 Reading from a csv file.
 
-.. ipython:: python
-
-   pd.read_csv('foo.csv')
-
-.. ipython:: python
-   :suppress:
-
-   import os
-   os.remove('foo.csv')
-   xorbits.shutdown()
+::
+
+   >>> pd.read_csv('foo.csv')
+        Unnamed: 0         A          B         C          D
+   0    2000-01-01  0.385646   1.201584 -1.701511  -0.693112
+   1    2000-01-02  0.331648  -0.203431 -1.030354  -0.045550
+   2    2000-01-03  0.112350   0.024239 -0.690759  -1.354678
+   3    2000-01-04 -0.492772  -1.407550  0.535260  -0.030373
+   4    2000-01-05 -0.557673   0.116826  2.127525  -0.835155
+   ..          ...       ...        ...       ...        ...
+   995  2002-09-22  6.795263  15.514409 -8.909048 -43.613612
+   996  2002-09-23  5.241447  15.386009 -9.248272 -43.035980
+   997  2002-09-24  2.541217  14.514584 -9.051257 -43.824801
+   998  2002-09-25  1.450811  14.913616 -9.681888 -42.579596
+   999  2002-09-26  1.895067  16.139412 -8.192430 -42.140289
+
+   [1000 rows x 5 columns]
+
+::
+
+   >>> import os
+   >>> os.remove('foo.csv')
+   >>> xorbits.shutdown()
diff --git a/doc/source/libraries/xorbits_inference/index.rst b/doc/source/libraries/xorbits_inference/index.rst
index 2d7e9c574..a0fea37b0 100644
--- a/doc/source/libraries/xorbits_inference/index.rst
+++ b/doc/source/libraries/xorbits_inference/index.rst
@@ -1,8 +1,8 @@
 .. _xorbits_inference_index:
 
-=============
+=================
 Xorbits Inference
-=============
+=================
 
 .. toctree::
     :maxdepth: 2
diff --git a/doc/source/reference/datasets/index.rst b/doc/source/reference/datasets/index.rst
index 8af993ba7..75dcb7d57 100644
--- a/doc/source/reference/datasets/index.rst
+++ b/doc/source/reference/datasets/index.rst
@@ -1,6 +1,6 @@
-==========
+============
 Datasets API
-==========
+============
 
 .. toctree::
    :maxdepth: 2
diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst
index ea8194ccb..7833a82a9 100644
--- a/doc/source/reference/index.rst
+++ b/doc/source/reference/index.rst
@@ -8,8 +8,6 @@ API Reference
    :maxdepth: 2
 
    xorbits/index
-   pandas/index
-   numpy/index
    datasets/index
    xgboost/index
    lightgbm/index
diff --git a/doc/source/reference/lightgbm/learning.rst b/doc/source/reference/lightgbm/learning.rst
index f2c0b00da..f363b72e7 100644
--- a/doc/source/reference/lightgbm/learning.rst
+++ b/doc/source/reference/lightgbm/learning.rst
@@ -1,4 +1,4 @@
-.. _api.learning_api:
+.. _api.lightgbm_learning_api:
 
 ============
 Learning API
diff --git a/doc/source/reference/lightgbm/sklearn.rst b/doc/source/reference/lightgbm/sklearn.rst
index 04528dcb6..9fc36acf0 100644
--- a/doc/source/reference/lightgbm/sklearn.rst
+++ b/doc/source/reference/lightgbm/sklearn.rst
@@ -1,4 +1,4 @@
-.. _api.sklearn:
+.. _api.lightgbm_sklearn:
 
 ================
 Scikit-Learn API
@@ -78,7 +78,6 @@ Attributes
     LGBMRanker.load_model
     LGBMRanker.predict
     LGBMRanker.predict_proba
-    LGBMRanker.score
     LGBMRanker.set_params
     LGBMRanker.to_local
 
diff --git a/doc/source/reference/xgboost/learning_api.rst b/doc/source/reference/xgboost/learning_api.rst
index 966e75d28..b04cc4b44 100644
--- a/doc/source/reference/xgboost/learning_api.rst
+++ b/doc/source/reference/xgboost/learning_api.rst
@@ -1,4 +1,4 @@
-.. _api.learning_api:
+.. _api.xgboost_learning_api:
 
 ============
 Learning API
diff --git a/doc/source/reference/xgboost/sklearn.rst b/doc/source/reference/xgboost/sklearn.rst
index 12ae679d5..1e18e8372 100644
--- a/doc/source/reference/xgboost/sklearn.rst
+++ b/doc/source/reference/xgboost/sklearn.rst
@@ -1,4 +1,4 @@
-.. _api.sklearn:
+.. _api.xgboost_sklearn:
 
 ================
 Scikit-Learn API
diff --git a/doc/source/user_guide/deployment_cluster.rst b/doc/source/user_guide/deployment_cluster.rst
index 6c78577ab..c8527cc92 100644
--- a/doc/source/user_guide/deployment_cluster.rst
+++ b/doc/source/user_guide/deployment_cluster.rst
@@ -70,7 +70,7 @@ You can open a web browser and type ``http://<supervisor_ip>:<supervisor_web_por
 look up resource usage of workers and execution progress of submitted tasks.
 
 Command Line Options
--------------------
+--------------------
 
 Common Options
 ~~~~~~~~~~~~~~
diff --git a/doc/source/user_guide/deployment_slurm.rst b/doc/source/user_guide/deployment_slurm.rst
index 491c0e22c..1decdfa39 100644
--- a/doc/source/user_guide/deployment_slurm.rst
+++ b/doc/source/user_guide/deployment_slurm.rst
@@ -2,7 +2,7 @@
 
 
 SLURM deployment
-=============
+================
 
 
 If you have access to a SLURM cluster, you can refer to the following guide to run an Xorbits job. Other HPC job schedulers like Torque or LSF are similar.
diff --git a/doc/source/user_guide/loading_data.rst b/doc/source/user_guide/loading_data.rst
index 61f7b534e..ef4758681 100644
--- a/doc/source/user_guide/loading_data.rst
+++ b/doc/source/user_guide/loading_data.rst
@@ -59,7 +59,7 @@ Parquet file to be 16MiB ~ 128MiB in size, so there are not too many files but c
 
 For example with 200MiB of data, single file:
 
-.. code-block:: python
+.. code-block:: 
 
     In [1]: %time print(pd.read_parquet("single.parquet"))
     100%|████████████████████████████████████| 100.00/100 [00:01<00:00, 80.31it/s]
@@ -82,7 +82,7 @@ For example with 200MiB of data, single file:
 
 Stored the same data in a folder with 10 Parquet files, reading the folder:
 
-.. code-block:: python
+.. code-block:: 
 
     In [2]: %time print(pd.read_parquet("parquet_dir"))
     100%|████████████████████████████████████| 100.00/100 [00:00<00:00, 419.56it/s]
@@ -110,13 +110,13 @@ Single Parquet file with multiple row groups
 If storing as a single file, splitting into multiple row groups can also allow parallel reading. First use the
 ``row_group_size`` parameter to store into multiple row groups.
 
-.. code-block:: python
+.. code-block:: 
 
     In [3]: df.to_parquet("all.parquet", row_group_size=20_0000)
 
 When reading, specify ``groups_as_chunks=True``:
 
-.. code-block:: python
+.. code-block::
 
     In [4]: %time print(pd.read_parquet("all.parquet", groups_as_chunks=True))
     100%|███████████████████████████████████| 100.00/100 [00:00<00:00, 231.36it/s]
@@ -148,7 +148,7 @@ and process.
 
 Reading a single Parquet file and calling apply function then, this does not leverage multi-core parallelism:
 
-.. code-block:: python
+.. code-block:: 
 
     In [5]: %time print(pd.read_parquet("all.parquet").apply(lambda row: len(row[1]) * row[2], axis=1))
     100%|███████████████████████████████████| 100.00/100 [00:06<00:00, 16.10it/s]
@@ -170,7 +170,7 @@ Reading a single Parquet file and calling apply function then, this does not lev
 Upon calling rebalance, the computation will make use of multiple cores, although ``rebalance`` will consume
 some additional time, the more subsequent computations, the higher the gain.
 
-.. code-block:: python
+.. code-block:: 
 
     In [6]: %time print(pd.read_parquet("all.parquet").rebalance().apply(lambda row: len(row[1]) * row[2], axis=1))
     100%|███████████████████████████████████| 100.00/100 [00:04<00:00, 20.16it/s]
diff --git a/python/setup.cfg b/python/setup.cfg
index 4fa8c1177..ca085787c 100644
--- a/python/setup.cfg
+++ b/python/setup.cfg
@@ -63,7 +63,7 @@ dev =
     pytest-forked>=1.0
     pytest-asyncio>=0.14.0
     pytest-mock>=3.11.1
-    sphinx>=3.0.0,<5.0.0
+    sphinx
     pydata-sphinx-theme>=0.3.0
     sphinx-intl>=0.9.9
     flake8>=3.8.0
@@ -73,12 +73,9 @@ dev =
     matplotlib
     datasets
 doc =
-    ipython>=6.5.0
-    sphinx>=3.0.0,<5.0.0
+    sphinx
     pydata-sphinx-theme>=0.3.0
     sphinx-intl>=0.9.9
-    xgboost>=1.3.0
-    lightgbm>=3.3.5
     matplotlib
     datasets
 extra =