diff --git a/docs/cudf/source/_templates/autosummary/class_without_autosummary.rst b/docs/cudf/source/_templates/autosummary/class.rst similarity index 50% rename from docs/cudf/source/_templates/autosummary/class_without_autosummary.rst rename to docs/cudf/source/_templates/autosummary/class.rst index 6676c672b20..a16cd0d7305 100644 --- a/docs/cudf/source/_templates/autosummary/class_without_autosummary.rst +++ b/docs/cudf/source/_templates/autosummary/class.rst @@ -4,3 +4,6 @@ .. currentmodule:: {{ module }} .. autoclass:: {{ objname }} + +.. + Don't include the methods or attributes sections, numpydoc adds them for us instead. diff --git a/docs/cudf/source/_templates/autosummary/class_with_autosummary.rst b/docs/cudf/source/_templates/autosummary/class_with_autosummary.rst deleted file mode 100644 index a9c9bd2b650..00000000000 --- a/docs/cudf/source/_templates/autosummary/class_with_autosummary.rst +++ /dev/null @@ -1,33 +0,0 @@ -{% extends "!autosummary/class.rst" %} - -{% block methods %} -{% if methods %} - -.. - HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages. - .. autosummary:: - :toctree: - {% for item in all_methods %} - {%- if not item.startswith('_') or item in ['__call__'] %} - {{ name }}.{{ item }} - {%- endif -%} - {%- endfor %} - -{% endif %} -{% endblock %} - -{% block attributes %} -{% if attributes %} - -.. - HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages. - .. autosummary:: - :toctree: - {% for item in all_attributes %} - {%- if not item.startswith('_') %} - {{ name }}.{{ item }} - {%- endif -%} - {%- endfor %} - -{% endif %} -{% endblock %} diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst index 642996f9379..a8f3edf5a04 100644 --- a/docs/cudf/source/api_docs/dataframe.rst +++ b/docs/cudf/source/api_docs/dataframe.rst @@ -7,7 +7,6 @@ Constructor ~~~~~~~~~~~ .. autosummary:: :toctree: api/ - :template: autosummary/class_with_autosummary.rst DataFrame diff --git a/docs/cudf/source/api_docs/extension_dtypes.rst b/docs/cudf/source/api_docs/extension_dtypes.rst index b470df4aa00..daccb01b737 100644 --- a/docs/cudf/source/api_docs/extension_dtypes.rst +++ b/docs/cudf/source/api_docs/extension_dtypes.rst @@ -10,7 +10,6 @@ cudf.CategoricalDtype ===================== .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst CategoricalDtype @@ -41,7 +40,6 @@ cudf.Decimal32Dtype =================== .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst Decimal32Dtype @@ -70,7 +68,6 @@ cudf.Decimal64Dtype =================== .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst Decimal64Dtype @@ -99,7 +96,6 @@ cudf.Decimal128Dtype ==================== .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst Decimal128Dtype @@ -128,7 +124,6 @@ cudf.ListDtype ============== .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst ListDtype @@ -154,7 +149,6 @@ cudf.StructDtype ================ .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst StructDtype diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index a6a23d189e9..013eaf29a56 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -12,7 +12,6 @@ used before calling these methods directly.** .. autosummary:: :toctree: api/ - :template: autosummary/class_with_autosummary.rst Index @@ -162,9 +161,13 @@ Numeric Index ------------- .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst RangeIndex + RangeIndex.start + RangeIndex.stop + RangeIndex.step + RangeIndex.to_numpy + RangeIndex.to_arrow Int64Index UInt64Index Float64Index @@ -175,7 +178,6 @@ CategoricalIndex ---------------- .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst CategoricalIndex @@ -200,7 +202,6 @@ IntervalIndex ------------- .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst IntervalIndex @@ -219,7 +220,6 @@ MultiIndex ---------- .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst MultiIndex @@ -250,6 +250,7 @@ MultiIndex components MultiIndex.to_frame MultiIndex.droplevel + MultiIndex.swaplevel MultiIndex selecting ~~~~~~~~~~~~~~~~~~~~ @@ -265,7 +266,6 @@ DatetimeIndex ------------- .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst DatetimeIndex @@ -299,6 +299,7 @@ Time-specific operations DatetimeIndex.round DatetimeIndex.ceil DatetimeIndex.floor + DatetimeIndex.tz_convert DatetimeIndex.tz_localize Conversion @@ -313,7 +314,6 @@ TimedeltaIndex -------------- .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst TimedeltaIndex diff --git a/docs/cudf/source/api_docs/io.rst b/docs/cudf/source/api_docs/io.rst index 05c0cc82e62..417970715f8 100644 --- a/docs/cudf/source/api_docs/io.rst +++ b/docs/cudf/source/api_docs/io.rst @@ -36,8 +36,6 @@ Parquet read_parquet DataFrame.to_parquet cudf.io.parquet.read_parquet_metadata - :template: autosummary/class_with_autosummary.rst - cudf.io.parquet.ParquetDatasetWriter cudf.io.parquet.ParquetDatasetWriter.close cudf.io.parquet.ParquetDatasetWriter.write_table diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index 8bab649f079..ebfc1e3f5d1 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -7,7 +7,6 @@ Constructor ----------- .. autosummary:: :toctree: api/ - :template: autosummary/class_with_autosummary.rst Series diff --git a/docs/cudf/source/api_docs/subword_tokenize.rst b/docs/cudf/source/api_docs/subword_tokenize.rst index 80d77ebcde2..cd240fe4db4 100644 --- a/docs/cudf/source/api_docs/subword_tokenize.rst +++ b/docs/cudf/source/api_docs/subword_tokenize.rst @@ -7,7 +7,6 @@ Constructor ~~~~~~~~~~~ .. autosummary:: :toctree: api/ - :template: autosummary/class_with_autosummary.rst SubwordTokenizer SubwordTokenizer.__call__ diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index f9982c69e1b..03b1bb7039b 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -22,6 +22,7 @@ from docutils.nodes import Text from sphinx.addnodes import pending_xref +# -- Custom Extensions ---------------------------------------------------- sys.path.append(os.path.abspath("./_ext")) # -- General configuration ------------------------------------------------ @@ -52,9 +53,6 @@ copybutton_prompt_text = ">>> " autosummary_generate = True -ipython_mplbackend = "str" - -html_use_modindex = True # Enable automatic generation of systematic, namespaced labels for sections myst_heading_anchors = 2 @@ -100,9 +98,6 @@ # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - html_theme_options = { "external_links": [], # https://github.com/pydata/pydata-sphinx-theme/issues/1220 @@ -209,14 +204,12 @@ # Config numpydoc numpydoc_show_inherited_class_members = { - "cudf.core.dtypes.CategoricalDtype": False, - "cudf.core.dtypes.Decimal32Dtype": False, - "cudf.core.dtypes.Decimal64Dtype": False, - "cudf.core.dtypes.Decimal128Dtype": False, - "cudf.core.dtypes.ListDtype": False, - "cudf.core.dtypes.StructDtype": False, + # option_context inherits undocumented members from the parent class "cudf.option_context": False, } + +# Rely on toctrees generated from autosummary on each of the pages we define +# rather than the autosummaries on the numpydoc auto-generated class pages. numpydoc_class_members_toctree = False numpydoc_attributes_as_param_list = False @@ -229,8 +222,6 @@ "cupy.core.core.ndarray": ("cupy.ndarray", "cupy.ndarray"), } -_internal_names_to_ignore = {"cudf.core.column.string.StringColumn"} - def resolve_aliases(app, doctree): pending_xrefs = doctree.traverse(condition=pending_xref) @@ -254,26 +245,7 @@ def ignore_internal_references(app, env, node, contnode): # use `cudf.Index` node["reftarget"] = "cudf.Index" return contnode - elif name is not None and name in _internal_names_to_ignore: - node["reftarget"] = "" - return contnode - - -def process_class_docstrings(app, what, name, obj, options, lines): - """ - For those classes for which we use :: - :template: autosummary/class_without_autosummary.rst - the documented attributes/methods have to be listed in the class - docstring. However, if one of those lists is empty, we use 'None', - which then generates warnings in sphinx / ugly html output. - This "autodoc-process-docstring" event connector removes that part - from the processed docstring. - """ - if what == "class": - if name in {"cudf.RangeIndex", "cudf.Int64Index", "cudf.UInt64Index", "cudf.Float64Index", "cudf.CategoricalIndex", "cudf.IntervalIndex", "cudf.MultiIndex", "cudf.DatetimeIndex", "cudf.TimedeltaIndex", "cudf.TimedeltaIndex"}: - - cut_index = lines.index('.. rubric:: Attributes') - lines[:] = lines[:cut_index] + return None nitpick_ignore = [ @@ -289,4 +261,3 @@ def setup(app): app.add_js_file("https://docs.rapids.ai/assets/js/custom.js", loading_method="defer") app.connect("doctree-read", resolve_aliases) app.connect("missing-reference", ignore_internal_references) - app.connect("autodoc-process-docstring", process_class_docstrings) diff --git a/docs/cudf/source/developer_guide/documentation.md b/docs/cudf/source/developer_guide/documentation.md index 187934cd274..26557de917a 100644 --- a/docs/cudf/source/developer_guide/documentation.md +++ b/docs/cudf/source/developer_guide/documentation.md @@ -121,6 +121,35 @@ while still matching the pandas layout as closely as possible. When adding a new API, developers simply have to add the API to the appropriate page. Adding the name of the function to the appropriate autosummary list is sufficient for it to be documented. +### Documenting classes + +Python classes and the Sphinx plugins used in RAPIDS interact in nontrivial ways. +`autosummary`'s default page generated for a class uses [`autodoc`](https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html) to automatically detect and document all methods of a class. +That means that in addition to the manually created `autosummary` pages where class methods are grouped into sections of related features, there is another page for each class where all the methods of that class are automatically summarized in a table for quick access. +However, we also use the [`numpydoc`](https://numpydoc.readthedocs.io/) extension, which offers the same feature. +We use both in order to match the contents and style of the pandas documentation as closely as possible. + +pandas is also particular about what information is included in a class's documentation. +While the documentation pages for the major user-facing classes like `DataFrame`, `Series`, and `Index` contain all APIs, less visible classes or subclasses (such as subclasses of `Index`) only include the methods that are specific to those subclasses. +For example, {py:class}`cudf.CategoricalIndex` only includes `codes` and `categories` on its page, not the entire set of `Index` functionality. + +To accommodate these requirements, we take the following approach: +1. The default `autosummary` template for classes is overridden with a [simpler template that does not generate method or attribute documentation](https://github.com/rapidsai/cudf/blob/main/docs/cudf/source/_templates/autosummary/class.rst). In other words, we disable `autosummary`'s generation of Methods and Attributes lists. +2. We rely on `numpydoc` entirely for the classes that need their entire APIs listed (`DataFrame`/`Series`/etc). `numpydoc` will automatically populate Methods and Attributes section if (and only if) they are not already defined in the class's docstring. +3. For classes that should only include a subset of APIs, we include those explicitly in the class's documentation. When those lists exist, `numpydoc` will not override them. If either the Methods or Attributes section should be empty, that section must still be included but should simply contain "None". For example, the class documentation for `CategoricalIndex` could include something like the following: + +``` + Attributes + ---------- + codes + categories + + Methods + ------- + None + +``` + ## Comparing to pandas cuDF aims to provide a pandas-like experience. diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 4acdc2431f8..a83c1f7b3c9 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -144,6 +144,16 @@ class CategoricalDtype(_BaseDtype): when used in operations that combine categoricals, e.g. astype, and will resolve to False if there is no existing ordered to maintain. + Attributes + ---------- + categories + ordered + + Methods + ------- + from_pandas + to_pandas + Examples -------- >>> import cudf @@ -320,6 +330,16 @@ class ListDtype(_BaseDtype): element_type : object A dtype with which represents the element types in the list. + Attributes + ---------- + element_type + leaf_type + + Methods + ------- + from_arrow + to_arrow + Examples -------- >>> import cudf @@ -496,6 +516,16 @@ class StructDtype(_BaseDtype): A mapping of field names to dtypes, the dtypes can themselves be of ``StructDtype`` too. + Attributes + ---------- + fields + itemsize + + Methods + ------- + from_arrow + to_arrow + Examples -------- >>> import cudf @@ -649,6 +679,17 @@ def itemsize(self): scale : int, optional The scale of the dtype. See Notes below. + Attributes + ---------- + precision + scale + itemsize + + Methods + ------- + to_arrow + from_arrow + Notes ----- When the scale is positive: diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 618d2eb4553..44a1620da8a 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -156,9 +156,16 @@ class RangeIndex(BaseIndex, BinaryOperand): copy : bool, default False Unused, accepted for homogeneity with other index types. - Returns + Attributes + ---------- + start + stop + step + + Methods ------- - RangeIndex + to_numpy + to_arrow Examples -------- @@ -1637,6 +1644,14 @@ class Int8Index(NumericIndex): name : object Name to be stored in the index. + Attributes + ---------- + None + + Methods + ------- + None + Returns ------- Int8Index @@ -1662,6 +1677,14 @@ class Int16Index(NumericIndex): name : object Name to be stored in the index. + Attributes + ---------- + None + + Methods + ------- + None + Returns ------- Int16Index @@ -1687,6 +1710,14 @@ class Int32Index(NumericIndex): name : object Name to be stored in the index. + Attributes + ---------- + None + + Methods + ------- + None + Returns ------- Int32Index @@ -1712,6 +1743,14 @@ class Int64Index(NumericIndex): name : object Name to be stored in the index. + Attributes + ---------- + None + + Methods + ------- + None + Returns ------- Int64Index @@ -1737,6 +1776,14 @@ class UInt8Index(NumericIndex): name : object Name to be stored in the index. + Attributes + ---------- + None + + Methods + ------- + None + Returns ------- UInt8Index @@ -1762,6 +1809,14 @@ class UInt16Index(NumericIndex): name : object Name to be stored in the index. + Attributes + ---------- + None + + Methods + ------- + None + Returns ------- UInt16Index @@ -1787,6 +1842,14 @@ class UInt32Index(NumericIndex): name : object Name to be stored in the index. + Attributes + ---------- + None + + Methods + ------- + None + Returns ------- UInt32Index @@ -1812,6 +1875,14 @@ class UInt64Index(NumericIndex): name : object Name to be stored in the index. + Attributes + ---------- + None + + Methods + ------- + None + Returns ------- UInt64Index @@ -1837,6 +1908,14 @@ class Float32Index(NumericIndex): name : object Name to be stored in the index. + Attributes + ---------- + None + + Methods + ------- + None + Returns ------- Float32Index @@ -1868,6 +1947,14 @@ class Float64Index(NumericIndex): name : object Name to be stored in the index. + Attributes + ---------- + None + + Methods + ------- + None + Returns ------- Float64Index @@ -1908,6 +1995,32 @@ class DatetimeIndex(GenericIndex): If True parse dates in data with the year first order. This is not yet supported + Attributes + ---------- + year + month + day + hour + minute + second + microsecond + nanosecond + date + time + dayofyear + day_of_year + weekday + quarter + freq + + Methods + ------- + ceil + floor + round + tz_convert + tz_localize + Returns ------- DatetimeIndex @@ -2567,6 +2680,19 @@ class TimedeltaIndex(GenericIndex): name : object Name to be stored in the index. + Attributes + ---------- + days + seconds + microseconds + nanoseconds + components + inferred_freq + + Methods + ------- + None + Returns ------- TimedeltaIndex @@ -2711,6 +2837,15 @@ class CategoricalIndex(GenericIndex): name : object, optional Name to be stored in the index. + Attributes + ---------- + codes + categories + + Methods + ------- + equals + Returns ------- CategoricalIndex @@ -2969,6 +3104,15 @@ class IntervalIndex(GenericIndex): name : object, optional Name to be stored in the index. + Attributes + ---------- + values + + Methods + ------- + from_breaks + get_loc + Returns ------- IntervalIndex diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 3c0e29a96be..9285a21f696 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -68,6 +68,33 @@ class MultiIndex(Frame, BaseIndex, NotIterable): Check that the levels/codes are consistent and valid. Not yet supported + Attributes + ---------- + names + nlevels + dtypes + levels + codes + + Methods + ------- + from_arrays + from_tuples + from_product + from_frame + set_levels + set_codes + to_frame + to_flat_index + sortlevel + droplevel + swaplevel + reorder_levels + remove_unused_levels + get_level_values + get_loc + drop + Returns ------- MultiIndex