From 99842503a503be940545b0ab6e3e855aec1b741d Mon Sep 17 00:00:00 2001 From: Hui Chao Date: Fri, 6 Sep 2024 16:55:27 +0800 Subject: [PATCH] DOC: update doc content && update sphinx to latest version (#809) --- doc/source/conf.py | 9 +- doc/source/getting_started/index.rst | 3 +- doc/source/index.rst | 3 - doc/source/libraries/index.rst | 2 - doc/source/libraries/xorbits_data/pandas.rst | 581 +++++++++++++----- .../libraries/xorbits_inference/index.rst | 4 +- doc/source/reference/datasets/index.rst | 4 +- doc/source/reference/index.rst | 2 - doc/source/reference/lightgbm/learning.rst | 2 +- doc/source/reference/lightgbm/sklearn.rst | 3 +- doc/source/reference/xgboost/learning_api.rst | 2 +- doc/source/reference/xgboost/sklearn.rst | 2 +- doc/source/user_guide/deployment_cluster.rst | 2 +- doc/source/user_guide/deployment_slurm.rst | 2 +- doc/source/user_guide/loading_data.rst | 12 +- python/setup.cfg | 7 +- 16 files changed, 445 insertions(+), 195 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 1a219f884..c2c48d9da 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -21,13 +21,13 @@ "sphinx.ext.githubpages", "sphinx.ext.autosummary", "sphinx.ext.napoleon", - "IPython.sphinxext.ipython_directive", - "IPython.sphinxext.ipython_console_highlighting", + # "IPython.sphinxext.ipython_directive", + # "IPython.sphinxext.ipython_console_highlighting", "matplotlib.sphinxext.plot_directive", ] templates_path = ["_templates"] -exclude_patterns = [] +exclude_patterns = ['reference/numpy/*', 'reference/pandas/*'] # i18n locale_dirs = ["locale/"] # path is example but recommended. @@ -50,9 +50,6 @@ # see https://stackoverflow.com/a/73497480 if "zh_cn" not in tags.tags.keys(): # en - html_theme_options["external_links"] = [ - {"name": "xorbits.io", "url": "https://xorbits.io"}, - ] html_theme_options["icon_links"] = [ { "name": "GitHub", diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst index 4945f046b..b62b9f89b 100644 --- a/doc/source/getting_started/index.rst +++ b/doc/source/getting_started/index.rst @@ -8,8 +8,7 @@ Getting Started Use Xorbits to scale ML/DS workloads on your laptop or your own cluster. * Load and process datasets: :ref:`10min_pandas` -* Scale machine learning workloads: coming soon -* Deploy your own model: coming soon +* Scale machine learning workloads: :ref:`xorbits_train_index` * Deploy to the cloud: :ref:`deployment` .. toctree:: diff --git a/doc/source/index.rst b/doc/source/index.rst index 65c5b3115..63913a4b2 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -44,8 +44,6 @@ Getting involved +--------------------------------------------------------------------------------------------------+----------------------------------------------------+ | **Platform** | **Purpose** | +--------------------------------------------------------------------------------------------------+----------------------------------------------------+ - | `Discourse Forum `_ | Asking usage questions and discussing development. | - +--------------------------------------------------------------------------------------------------+----------------------------------------------------+ | `Github Issues `_ | Reporting bugs and filing feature requests. | +--------------------------------------------------------------------------------------------------+----------------------------------------------------+ | `Slack `_ | Collaborating with other Xorbits users. | @@ -75,6 +73,5 @@ Getting involved getting_started/index libraries/index user_guide/index - deployment/index reference/index development/index diff --git a/doc/source/libraries/index.rst b/doc/source/libraries/index.rst index e2843f9c1..81fa5b7e0 100644 --- a/doc/source/libraries/index.rst +++ b/doc/source/libraries/index.rst @@ -6,5 +6,3 @@ Libraries xorbits_data/index xorbits_train/index - xorbits_tune/index - xorbits_inference/index diff --git a/doc/source/libraries/xorbits_data/pandas.rst b/doc/source/libraries/xorbits_data/pandas.rst index c8e2ff33e..3935b3d57 100644 --- a/doc/source/libraries/xorbits_data/pandas.rst +++ b/doc/source/libraries/xorbits_data/pandas.rst @@ -10,49 +10,76 @@ This is a short introduction to :code:`xorbits.pandas` which is originated from Customarily, we import and init as follows: -.. ipython:: python +:: - import xorbits - import xorbits.numpy as np - import xorbits.pandas as pd - xorbits.init() + >>> import xorbits + >>> import xorbits.numpy as np + >>> import xorbits.pandas as pd + >>> xorbits.init() Object creation --------------- Creating a :class:`Series` by passing a list of values, letting it create a default integer index: -.. ipython:: python - :okwarning: +:: - s = pd.Series([1, 3, 5, np.nan, 6, 8]) - s + >>> s = pd.Series([1, 3, 5, np.nan, 6, 8]) + >>> s + 0 1.0 + 1 3.0 + 2 5.0 + 3 NaN + 4 6.0 + 5 8.0 + dtype: float64 Creating a :class:`DataFrame` by passing an array, with a datetime index and labeled columns: -.. ipython:: python - - dates = pd.date_range('20130101', periods=6) - dates - df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD')) - df +:: + + >>> dates = pd.date_range('20130101', periods=6) + >>> dates + DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', '2013-01-06'], + dtype='datetime64[ns]', freq='D') + >>> df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD')) + >>> df + A B C D + 2013-01-01 0.411902 1.709468 -0.213158 0.821644 + 2013-01-02 -0.721910 -1.677311 -1.570986 -0.621969 + 2013-01-03 0.421083 -0.750191 0.269751 -2.799289 + 2013-01-04 -1.329158 1.274036 2.442691 -0.409725 + 2013-01-05 0.689205 -1.501951 0.363000 0.401498 + 2013-01-06 0.426947 -0.469598 -1.295293 -1.435165 Creating a :class:`DataFrame` by passing a dict of objects that can be converted to series-like. -.. ipython:: python +:: - df2 = pd.DataFrame({'A': 1., - 'B': pd.Timestamp('20130102'), - 'C': pd.Series(1, index=list(range(4)), dtype='float32'), - 'D': np.array([3] * 4, dtype='int32'), - 'E': 'foo'}) - df2 + >>> df2 = pd.DataFrame({'A': 1., + 'B': pd.Timestamp('20130102'), + 'C': pd.Series(1, index=list(range(4)), dtype='float32'), + 'D': np.array([3] * 4, dtype='int32'), + 'E': 'foo'}) + >>> df2 + A B C D E + 0 1.0 2013-01-02 1.0 3 foo + 1 1.0 2013-01-02 1.0 3 foo + 2 1.0 2013-01-02 1.0 3 foo + 3 1.0 2013-01-02 1.0 3 foo The columns of the resulting :class:`DataFrame` have different dtypes. -.. ipython:: python +:: - df2.dtypes + >>> df2.dtypes + A float64 + B datetime64[s] + C float32 + D int32 + E object + dtype: object Viewing data @@ -60,17 +87,31 @@ Viewing data Here is how to view the top and bottom rows of the frame: -.. ipython:: python - - df.head() - df.tail(3) +:: + + >>> df.head() + A B C D + 2013-01-01 0.411902 1.709468 -0.213158 0.821644 + 2013-01-02 -0.721910 -1.677311 -1.570986 -0.621969 + 2013-01-03 0.421083 -0.750191 0.269751 -2.799289 + 2013-01-04 -1.329158 1.274036 2.442691 -0.409725 + 2013-01-05 0.689205 -1.501951 0.363000 0.401498 + >>> df.tail(3) + A B C D + 2013-01-04 -1.329158 1.274036 2.442691 -0.409725 + 2013-01-05 0.689205 -1.501951 0.363000 0.401498 + 2013-01-06 0.426947 -0.469598 -1.295293 -1.435165 Display the index, columns: -.. ipython:: python +:: - df.index - df.columns + >>> df.index + DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', '2013-01-06'], + dtype='datetime64[ns]', freq='D') + >>> df.columns + Index(['A', 'B', 'C', 'D'], dtype='object') :meth:`DataFrame.to_numpy` gives a ndarray representation of the underlying data. Note that this @@ -84,16 +125,27 @@ value to a Python object. For ``df``, our :class:`DataFrame` of all floating-point values, :meth:`DataFrame.to_numpy` is fast and doesn't require copying data. -.. ipython:: python +:: - df.to_numpy() + >>> df.to_numpy() + array([[ 0.41190169, 1.70946816, -0.21315821, 0.82164367], + [-0.72191001, -1.67731119, -1.57098611, -0.62196894], + [ 0.42108334, -0.75019064, 0.26975121, -2.79928919], + [-1.32915794, 1.2740364 , 2.44269141, -0.40972548], + [ 0.68920499, -1.50195139, 0.36299995, 0.40149762], + [ 0.42694729, -0.46959787, -1.29529258, -1.43516459]]) For ``df2``, the :class:`DataFrame` with multiple dtypes, :meth:`DataFrame.to_numpy` is relatively expensive. -.. ipython:: python +:: - df2.to_numpy() + >>> df2.to_numpy() + array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'foo'], + [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'foo'], + [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'foo'], + [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'foo']], + dtype=object) .. note:: @@ -102,21 +154,44 @@ expensive. :func:`~DataFrame.describe` shows a quick statistic summary of your data: -.. ipython:: python +:: - df.describe() + >>> df.describe() + A B C D + count 6.000000 6.000000 6.000000 6.000000 + mean -0.016988 -0.235924 -0.000666 -0.673834 + std 0.811215 1.418734 1.439617 1.308619 + min -1.329158 -1.677311 -1.570986 -2.799289 + 25% -0.438457 -1.314011 -1.024759 -1.231866 + 50% 0.416493 -0.609894 0.028296 -0.515847 + 75% 0.425481 0.838128 0.339688 0.198692 + max 0.689205 1.709468 2.442691 0.821644 Sorting by an axis: -.. ipython:: python +:: - df.sort_index(axis=1, ascending=False) + >>> df.sort_index(axis=1, ascending=False) + D C B A + 2013-01-01 0.821644 -0.213158 1.709468 0.411902 + 2013-01-02 -0.621969 -1.570986 -1.677311 -0.721910 + 2013-01-03 -2.799289 0.269751 -0.750191 0.421083 + 2013-01-04 -0.409725 2.442691 1.274036 -1.329158 + 2013-01-05 0.401498 0.363000 -1.501951 0.689205 + 2013-01-06 -1.435165 -1.295293 -0.469598 0.426947 Sorting by values: -.. ipython:: python +:: - df.sort_values(by='B') + >>> df.sort_values(by='B') + A B C D + 2013-01-02 -0.721910 -1.677311 -1.570986 -0.621969 + 2013-01-05 0.689205 -1.501951 0.363000 0.401498 + 2013-01-03 0.421083 -0.750191 0.269751 -2.799289 + 2013-01-06 0.426947 -0.469598 -1.295293 -1.435165 + 2013-01-04 -1.329158 1.274036 2.442691 -0.409725 + 2013-01-01 0.411902 1.709468 -0.213158 0.821644 Selection --------- @@ -134,117 +209,187 @@ Getting Selecting a single column, which yields a :class:`Series`, equivalent to ``df.A``: -.. ipython:: python +:: - df['A'] + >>> df['A'] + 2013-01-01 0.411902 + 2013-01-02 -0.721910 + 2013-01-03 0.421083 + 2013-01-04 -1.329158 + 2013-01-05 0.689205 + 2013-01-06 0.426947 + Freq: D, Name: A, dtype: float64 Selecting via ``[]``, which slices the rows: -.. ipython:: python - :okwarning: +:: - df[0:3] - df['20130102':'20130104'] + >>> df[0:3] + A B C D + 2013-01-01 0.411902 1.709468 -0.213158 0.821644 + 2013-01-02 -0.721910 -1.677311 -1.570986 -0.621969 + 2013-01-03 0.421083 -0.750191 0.269751 -2.799289 + >>> df['20130102':'20130104'] + A B C D + 2013-01-02 -0.721910 -1.677311 -1.570986 -0.621969 + 2013-01-03 0.421083 -0.750191 0.269751 -2.799289 + 2013-01-04 -1.329158 1.274036 2.442691 -0.409725 Selection by label ~~~~~~~~~~~~~~~~~~ For getting a cross section using a label: -.. ipython:: python +:: - df.loc['20130101'] + >>> df.loc['20130101'] + A 0.411902 + B 1.709468 + C -0.213158 + D 0.821644 + Name: 2013-01-01 00:00:00, dtype: float64 Selecting on a multi-axis by label: -.. ipython:: python +:: - df.loc[:, ['A', 'B']] + >>> df.loc[:, ['A', 'B']] + A B + 2013-01-01 0.411902 1.709468 + 2013-01-02 -0.721910 -1.677311 + 2013-01-03 0.421083 -0.750191 + 2013-01-04 -1.329158 1.274036 + 2013-01-05 0.689205 -1.501951 + 2013-01-06 0.426947 -0.469598 Showing label slicing, both endpoints are *included*: -.. ipython:: python - :okwarning: +:: - df.loc['20130102':'20130104', ['A', 'B']] + >>> df.loc['20130102':'20130104', ['A', 'B']] + A B + 2013-01-02 -0.721910 -1.677311 + 2013-01-03 0.421083 -0.750191 + 2013-01-04 -1.329158 1.274036 Reduction in the dimensions of the returned object: -.. ipython:: python +:: - df.loc['20130102', ['A', 'B']] + >>> df.loc['20130102', ['A', 'B']] + A -0.721910 + B -1.677311 + Name: 2013-01-02 00:00:00, dtype: float64 For getting a scalar value: -.. ipython:: python +:: - df.loc['20130101', 'A'] + >>> df.loc['20130101', 'A'] + 0.41190169091385387 For getting fast access to a scalar (equivalent to the prior method): -.. ipython:: python +:: - df.at['20130101', 'A'] + >>> df.at['20130101', 'A'] + 0.41190169091385387 Selection by position ~~~~~~~~~~~~~~~~~~~~~ Select via the position of the passed integers: -.. ipython:: python +:: - df.iloc[3] + >>> df.iloc[3] + A -1.329158 + B 1.274036 + C 2.442691 + D -0.409725 + Name: 2013-01-04 00:00:00, dtype: float64 By integer slices, acting similar to python: -.. ipython:: python +:: - df.iloc[3:5, 0:2] + >>> df.iloc[3:5, 0:2] + A B + 2013-01-04 -1.329158 1.274036 + 2013-01-05 0.689205 -1.501951 By lists of integer position locations, similar to the python style: -.. ipython:: python +:: - df.iloc[[1, 2, 4], [0, 2]] + >>> df.iloc[[1, 2, 4], [0, 2]] + A C + 2013-01-02 -0.721910 -1.570986 + 2013-01-03 0.421083 0.269751 + 2013-01-05 0.689205 0.363000 For slicing rows explicitly: -.. ipython:: python +:: - df.iloc[1:3, :] + >>> df.iloc[1:3, :] + A B C D + 2013-01-02 -0.721910 -1.677311 -1.570986 -0.621969 + 2013-01-03 0.421083 -0.750191 0.269751 -2.799289 For slicing columns explicitly: -.. ipython:: python +:: - df.iloc[:, 1:3] + >>> df.iloc[:, 1:3] + B C + 2013-01-01 1.709468 -0.213158 + 2013-01-02 -1.677311 -1.570986 + 2013-01-03 -0.750191 0.269751 + 2013-01-04 1.274036 2.442691 + 2013-01-05 -1.501951 0.363000 + 2013-01-06 -0.469598 -1.295293 For getting a value explicitly: -.. ipython:: python +:: - df.iloc[1, 1] + >>> df.iloc[1, 1] + -1.6773111933012679 For getting fast access to a scalar (equivalent to the prior method): -.. ipython:: python +:: - df.iat[1, 1] + >>> df.iat[1, 1] + -1.6773111933012679 Boolean indexing ~~~~~~~~~~~~~~~~ Using a single column's values to select data. -.. ipython:: python +:: - df[df['A'] > 0] + >>> df[df['A'] > 0] + A B C D + 2013-01-01 0.411902 1.709468 -0.213158 0.821644 + 2013-01-03 0.421083 -0.750191 0.269751 -2.799289 + 2013-01-05 0.689205 -1.501951 0.363000 0.401498 + 2013-01-06 0.426947 -0.469598 -1.295293 -1.435165 Selecting values from a DataFrame where a boolean condition is met. -.. ipython:: python +:: - df[df > 0] + >>> df[df > 0] + A B C D + 2013-01-01 0.411902 1.709468 NaN 0.821644 + 2013-01-02 NaN NaN NaN NaN + 2013-01-03 0.421083 NaN 0.269751 NaN + 2013-01-04 NaN 1.274036 2.442691 NaN + 2013-01-05 0.689205 NaN 0.363000 0.401498 + 2013-01-06 0.426947 NaN NaN NaN Operations @@ -257,26 +402,52 @@ Operations in general *exclude* missing data. Performing a descriptive statistic: -.. ipython:: python +:: - df.mean() + >>> df.mean() + A -0.016988 + B -0.235924 + C -0.000666 + D -0.673834 + dtype: float64 Same operation on the other axis: -.. ipython:: python +:: - df.mean(1) + >>> df.mean(1) + 2013-01-01 0.682464 + 2013-01-02 -1.148044 + 2013-01-03 -0.714661 + 2013-01-04 0.494461 + 2013-01-05 -0.012062 + 2013-01-06 -0.693277 + Freq: D, dtype: float64 Operating with objects that have different dimensionality and need alignment. In addition, :code:`xorbits.pandas` automatically broadcasts along the specified dimension. -.. ipython:: python - - s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2) - s - df.sub(s, axis='index') +:: + + >>> s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2) + >>> s + 2013-01-01 NaN + 2013-01-02 NaN + 2013-01-03 1.0 + 2013-01-04 3.0 + 2013-01-05 5.0 + 2013-01-06 NaN + Freq: D, dtype: float64 + >>> df.sub(s, axis='index') + A B C D + 2013-01-01 NaN NaN NaN NaN + 2013-01-02 NaN NaN NaN NaN + 2013-01-03 -0.578917 -1.750191 -0.730249 -3.799289 + 2013-01-04 -4.329158 -1.725964 -0.557309 -3.409725 + 2013-01-05 -4.310795 -6.501951 -4.637000 -4.598502 + 2013-01-06 NaN NaN NaN NaN Apply @@ -284,9 +455,14 @@ Apply Applying functions to the data: -.. ipython:: python +:: - df.apply(lambda x: x.max() - x.min()) + >>> df.apply(lambda x: x.max() - x.min()) + A 2.018363 + B 3.386779 + C 4.013678 + D 3.620933 + dtype: float64 String Methods ~~~~~~~~~~~~~~ @@ -297,10 +473,20 @@ code snippet below. Note that pattern-matching in `str` generally uses `regular expressions `__ by default (and in some cases always uses them). -.. ipython:: python - - s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat']) - s.str.lower() +:: + + >>> s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat']) + >>> s.str.lower() + 0 a + 1 b + 2 c + 3 aaba + 4 baca + 5 NaN + 6 caba + 7 dog + 8 cat + dtype: object Merge ----- @@ -315,15 +501,37 @@ operations. Concatenating :code:`xorbits.pandas` objects together with :func:`concat`: -.. ipython:: python - - df = pd.DataFrame(np.random.randn(10, 4)) - df - - # break it into pieces - pieces = [df[:3], df[3:7], df[7:]] - - pd.concat(pieces) +:: + + >>> df = pd.DataFrame(np.random.randn(10, 4)) + >>> df + 0 1 2 3 + 0 -0.495508 0.903802 2.152979 1.098698 + 1 -0.327001 -0.586382 1.999350 -1.056401 + 2 0.341923 -0.024582 0.439198 0.662602 + 3 -1.896886 0.181549 0.119640 -1.426697 + 4 -2.407668 -0.780552 -1.301063 0.510010 + 5 -0.350738 -0.147771 -0.566869 -2.414299 + 6 -1.994935 -0.486425 -0.531758 1.624540 + 7 -0.358207 -0.884470 1.257721 0.587503 + 8 -0.945414 -1.055967 1.334790 0.817954 + 9 1.116094 -0.664818 -0.298791 0.042105 + + >>> # break it into pieces + >>> pieces = [df[:3], df[3:7], df[7:]] + + >>> pd.concat(pieces) + 0 1 2 3 + 0 -0.495508 0.903802 2.152979 1.098698 + 1 -0.327001 -0.586382 1.999350 -1.056401 + 2 0.341923 -0.024582 0.439198 0.662602 + 3 -1.896886 0.181549 0.119640 -1.426697 + 4 -2.407668 -0.780552 -1.301063 0.510010 + 5 -0.350738 -0.147771 -0.566869 -2.414299 + 6 -1.994935 -0.486425 -0.531758 1.624540 + 7 -0.358207 -0.884470 1.257721 0.587503 + 8 -0.945414 -1.055967 1.334790 0.817954 + 9 1.116094 -0.664818 -0.298791 0.042105 .. note:: Adding a column to a :class:`DataFrame` is relatively fast. However, adding @@ -336,23 +544,44 @@ Join SQL style merges. -.. ipython:: python - - left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]}) - right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]}) - left - right - pd.merge(left, right, on='key') +:: + + >>> left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]}) + >>> right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]}) + >>> left + key lval + 0 foo 1 + 1 foo 2 + >>> right + key rval + 0 foo 4 + 1 foo 5 + >>> pd.merge(left, right, on='key') + key lval rval + 0 foo 1 4 + 1 foo 1 5 + 2 foo 2 4 + 3 foo 2 5 Another example that can be given is: -.. ipython:: python +:: + + >>> left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]}) + >>> right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]}) + >>> left + key lval + 0 foo 1 + 1 bar 2 + >>> right + key rval + 0 foo 4 + 1 bar 5 + >>> pd.merge(left, right, on='key') + key lval rval + 0 foo 1 4 + 1 bar 2 5 - left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]}) - right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]}) - left - right - pd.merge(left, right, on='key') Grouping -------- @@ -365,63 +594,83 @@ following steps: - **Combining** the results into a data structure -.. ipython:: python - - df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - df +:: + + >>> df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + >>> df + A B C D + 0 foo one -0.473456 1.016378 + 1 bar one 0.373591 0.480215 + 2 foo two -0.538622 -0.490436 + 3 bar three -1.833243 -1.471246 + 4 foo two -0.083388 1.389476 + 5 bar two 0.874384 2.006862 + 6 foo one -0.968538 -1.703000 + 7 foo three -1.840837 0.066493 Grouping and then applying the :meth:`~xorbits.pandas.groupby.DataFrameGroupBy.sum` function to the resulting groups. -.. ipython:: python - :okwarning: +:: - df.groupby('A').sum() + >>> df.groupby('A').sum() + B C D + A + bar onethreetwo -0.585268 1.015831 + foo onetwotwoonethree -3.904840 0.278910 Grouping by multiple columns forms a hierarchical index, and again we can apply the `sum` function. -.. ipython:: python +:: - df.groupby(['A', 'B']).sum() + >>> df.groupby(['A', 'B']).sum() + C D + A B + bar one 0.373591 0.480215 + three -1.833243 -1.471246 + two 0.874384 2.006862 + foo one -1.441994 -0.686622 + three -1.840837 0.066493 + two -0.622010 0.899039 Plotting -------- We use the standard convention for referencing the matplotlib API: -.. ipython:: python +:: - import matplotlib.pyplot as plt - plt.close('all') + >>> import matplotlib.pyplot as plt + >>> plt.close('all') -.. ipython:: python +:: - ts = pd.Series(np.random.randn(1000), - index=pd.date_range('1/1/2000', periods=1000)) - ts = ts.cumsum() + >>> ts = pd.Series(np.random.randn(1000), + index=pd.date_range('1/1/2000', periods=1000)) + >>> ts = ts.cumsum() - @savefig series_plot_basic.png - ts.plot() + >>> @savefig series_plot_basic.png + >>> ts.plot() On a DataFrame, the :meth:`~DataFrame.plot` method is a convenience to plot all of the columns with labels: -.. ipython:: python +:: - df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, - columns=['A', 'B', 'C', 'D']) - df = df.cumsum() + >>> df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, + columns=['A', 'B', 'C', 'D']) + >>> df = df.cumsum() - plt.figure() - df.plot() - @savefig frame_plot_basic.png - plt.legend(loc='best') + >>> plt.figure() + >>> df.plot() + >>> @savefig frame_plot_basic.png + >>> plt.legend(loc='best') Getting data in/out ------------------- @@ -431,19 +680,35 @@ CSV Writing to a csv file. -.. ipython:: python +:: - df.to_csv('foo.csv') + >>> df.to_csv('foo.csv') + Empty DataFrame + Columns: [] + Index: [] Reading from a csv file. -.. ipython:: python - - pd.read_csv('foo.csv') - -.. ipython:: python - :suppress: - - import os - os.remove('foo.csv') - xorbits.shutdown() +:: + + >>> pd.read_csv('foo.csv') + Unnamed: 0 A B C D + 0 2000-01-01 0.385646 1.201584 -1.701511 -0.693112 + 1 2000-01-02 0.331648 -0.203431 -1.030354 -0.045550 + 2 2000-01-03 0.112350 0.024239 -0.690759 -1.354678 + 3 2000-01-04 -0.492772 -1.407550 0.535260 -0.030373 + 4 2000-01-05 -0.557673 0.116826 2.127525 -0.835155 + .. ... ... ... ... ... + 995 2002-09-22 6.795263 15.514409 -8.909048 -43.613612 + 996 2002-09-23 5.241447 15.386009 -9.248272 -43.035980 + 997 2002-09-24 2.541217 14.514584 -9.051257 -43.824801 + 998 2002-09-25 1.450811 14.913616 -9.681888 -42.579596 + 999 2002-09-26 1.895067 16.139412 -8.192430 -42.140289 + + [1000 rows x 5 columns] + +:: + + >>> import os + >>> os.remove('foo.csv') + >>> xorbits.shutdown() diff --git a/doc/source/libraries/xorbits_inference/index.rst b/doc/source/libraries/xorbits_inference/index.rst index 2d7e9c574..a0fea37b0 100644 --- a/doc/source/libraries/xorbits_inference/index.rst +++ b/doc/source/libraries/xorbits_inference/index.rst @@ -1,8 +1,8 @@ .. _xorbits_inference_index: -============= +================= Xorbits Inference -============= +================= .. toctree:: :maxdepth: 2 diff --git a/doc/source/reference/datasets/index.rst b/doc/source/reference/datasets/index.rst index 8af993ba7..75dcb7d57 100644 --- a/doc/source/reference/datasets/index.rst +++ b/doc/source/reference/datasets/index.rst @@ -1,6 +1,6 @@ -========== +============ Datasets API -========== +============ .. toctree:: :maxdepth: 2 diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst index ea8194ccb..7833a82a9 100644 --- a/doc/source/reference/index.rst +++ b/doc/source/reference/index.rst @@ -8,8 +8,6 @@ API Reference :maxdepth: 2 xorbits/index - pandas/index - numpy/index datasets/index xgboost/index lightgbm/index diff --git a/doc/source/reference/lightgbm/learning.rst b/doc/source/reference/lightgbm/learning.rst index f2c0b00da..f363b72e7 100644 --- a/doc/source/reference/lightgbm/learning.rst +++ b/doc/source/reference/lightgbm/learning.rst @@ -1,4 +1,4 @@ -.. _api.learning_api: +.. _api.lightgbm_learning_api: ============ Learning API diff --git a/doc/source/reference/lightgbm/sklearn.rst b/doc/source/reference/lightgbm/sklearn.rst index 04528dcb6..9fc36acf0 100644 --- a/doc/source/reference/lightgbm/sklearn.rst +++ b/doc/source/reference/lightgbm/sklearn.rst @@ -1,4 +1,4 @@ -.. _api.sklearn: +.. _api.lightgbm_sklearn: ================ Scikit-Learn API @@ -78,7 +78,6 @@ Attributes LGBMRanker.load_model LGBMRanker.predict LGBMRanker.predict_proba - LGBMRanker.score LGBMRanker.set_params LGBMRanker.to_local diff --git a/doc/source/reference/xgboost/learning_api.rst b/doc/source/reference/xgboost/learning_api.rst index 966e75d28..b04cc4b44 100644 --- a/doc/source/reference/xgboost/learning_api.rst +++ b/doc/source/reference/xgboost/learning_api.rst @@ -1,4 +1,4 @@ -.. _api.learning_api: +.. _api.xgboost_learning_api: ============ Learning API diff --git a/doc/source/reference/xgboost/sklearn.rst b/doc/source/reference/xgboost/sklearn.rst index 12ae679d5..1e18e8372 100644 --- a/doc/source/reference/xgboost/sklearn.rst +++ b/doc/source/reference/xgboost/sklearn.rst @@ -1,4 +1,4 @@ -.. _api.sklearn: +.. _api.xgboost_sklearn: ================ Scikit-Learn API diff --git a/doc/source/user_guide/deployment_cluster.rst b/doc/source/user_guide/deployment_cluster.rst index 6c78577ab..c8527cc92 100644 --- a/doc/source/user_guide/deployment_cluster.rst +++ b/doc/source/user_guide/deployment_cluster.rst @@ -70,7 +70,7 @@ You can open a web browser and type ``http://:=1.0 pytest-asyncio>=0.14.0 pytest-mock>=3.11.1 - sphinx>=3.0.0,<5.0.0 + sphinx pydata-sphinx-theme>=0.3.0 sphinx-intl>=0.9.9 flake8>=3.8.0 @@ -73,12 +73,9 @@ dev = matplotlib datasets doc = - ipython>=6.5.0 - sphinx>=3.0.0,<5.0.0 + sphinx pydata-sphinx-theme>=0.3.0 sphinx-intl>=0.9.9 - xgboost>=1.3.0 - lightgbm>=3.3.5 matplotlib datasets extra =