diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index eb3458d2c9..8b8fda0c15 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -311,7 +311,7 @@ make pytest make e2e-tests ``` -#### Linting tests (`isort`, `black`, `pylint`, `flake8` and `mypy`) +#### Linting tests (`ruff` linter and formatter, and `mypy`) ```bash make lint diff --git a/Makefile b/Makefile index 9536ca63cc..864537c6b1 100644 --- a/Makefile +++ b/Makefile @@ -27,18 +27,15 @@ e2e-tests: lint: format-fix lint-check format-fix: - isort package/kedro_viz package/tests package/features - black package/kedro_viz package/tests package/features + ruff check --fix + ruff format format-check: - isort --check package/kedro_viz package/tests package/features - black --check package/kedro_viz package/tests package/features + ruff check + ruff format --check lint-check: - pylint --rcfile=package/.pylintrc -j 0 package/kedro_viz - pylint --rcfile=package/.pylintrc -j 0 --disable=protected-access,missing-docstring,redefined-outer-name,invalid-name,too-few-public-methods,no-member,unused-argument,duplicate-code,abstract-class-instantiated package/tests - pylint --rcfile=package/.pylintrc -j 0 --disable=missing-docstring,no-name-in-module,unused-argument package/features - flake8 --config=package/.flake8 package + ruff check mypy --config-file=package/mypy.ini package/kedro_viz package/features mypy --disable-error-code abstract --config-file=package/mypy.ini package/tests diff --git a/RELEASE.md b/RELEASE.md index 66d6a51a24..aaed5e9a5f 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -18,6 +18,7 @@ Please follow the established format: - Fix unserializable parameters value (#2122) - Display full dataset type with library prefix in metadata panel (#2136) - Enable SQLite WAL mode for Azure ML to fix database locking issues (#2131) +- Replace `flake8`, `isort`, `pylint` and `black` by `ruff` (#2149) # Release 10.0.0 diff --git a/demo-project/.pre-commit-config.yaml b/demo-project/.pre-commit-config.yaml index 9213629835..cbf9699872 100644 --- a/demo-project/.pre-commit-config.yaml +++ b/demo-project/.pre-commit-config.yaml @@ -14,31 +14,11 @@ repos: - id: check-json # Checks json files for parseable syntax. - id: check-case-conflict # Check for files that would conflict in case-insensitive filesystems - id: check-merge-conflict # Check for files that contain merge conflict strings. - - id: debug-statements # Check for debugger imports and py37+ `breakpoint()` calls in python source. - id: requirements-txt-fixer # Sorts entries in requirements.txt - - id: flake8 - args: - - "--max-line-length=100" - - "--max-complexity=18" - - "--max-complexity=18" - - "--select=B,C,E,F,W,T4,B9" - - "--ignore=E203,E266,E501,W503" - - repo: local + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.7.0 hooks: - - id: isort - name: "Sort imports" - language: system - types: [file, python] - entry: isort - - id: black - name: "Black" - language: system - types: [file, python] - entry: black - - id: kedro lint - name: "Kedro lint" - language: python_venv - types: [file, python] - entry: kedro lint - stages: [commit] + - id: ruff + args: [--fix] + - id: ruff-format diff --git a/demo-project/docs/source/conf.py b/demo-project/docs/source/conf.py index 4220ef0a6b..25f0f26269 100644 --- a/demo-project/docs/source/conf.py +++ b/demo-project/docs/source/conf.py @@ -22,6 +22,7 @@ from kedro.framework.cli.utils import find_stylesheets from recommonmark.transform import AutoStructify + from demo_project import __version__ as release # -- Project information ----------------------------------------------------- diff --git a/demo-project/pyproject.toml b/demo-project/pyproject.toml index 0be0a5449b..76b0de5fcf 100644 --- a/demo-project/pyproject.toml +++ b/demo-project/pyproject.toml @@ -3,14 +3,6 @@ package_name = "demo_project" project_name = "modular-spaceflights" kedro_init_version = "0.19.0" -[tool.isort] -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -use_parentheses = true -line_length = 88 -known_third_party = "kedro" - [tool.pytest.ini_options] addopts = """ --cov-report term-missing \ diff --git a/demo-project/ruff.toml b/demo-project/ruff.toml new file mode 100644 index 0000000000..ca826c2941 --- /dev/null +++ b/demo-project/ruff.toml @@ -0,0 +1,5 @@ +extend = "../ruff.toml" + +[lint.isort] +known-first-party = ["demo_project"] +known-third-party = ["kedro"] diff --git a/demo-project/setup.cfg b/demo-project/setup.cfg deleted file mode 100644 index 63ea673001..0000000000 --- a/demo-project/setup.cfg +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -max-line-length=88 -extend-ignore=E203 diff --git a/demo-project/src/demo_project/__init__.py b/demo-project/src/demo_project/__init__.py index 8ed604c689..72074eaac8 100644 --- a/demo-project/src/demo_project/__init__.py +++ b/demo-project/src/demo_project/__init__.py @@ -1,4 +1,3 @@ -"""demo-project -""" +"""demo-project""" __version__ = "0.1" diff --git a/demo-project/src/demo_project/__main__.py b/demo-project/src/demo_project/__main__.py index e9c8eea9f3..9b25b41ce3 100644 --- a/demo-project/src/demo_project/__main__.py +++ b/demo-project/src/demo_project/__main__.py @@ -1,6 +1,7 @@ """demo-project file for ensuring the package is executable as `demo-project` and `python -m demo_project` """ + from pathlib import Path from kedro.framework.project import configure_project diff --git a/demo-project/src/demo_project/hooks.py b/demo-project/src/demo_project/hooks.py index c2bb3ef03e..3f4b474986 100644 --- a/demo-project/src/demo_project/hooks.py +++ b/demo-project/src/demo_project/hooks.py @@ -1,4 +1,5 @@ """Project hooks.""" + import logging import time from typing import Any diff --git a/demo-project/src/demo_project/pipeline_registry.py b/demo-project/src/demo_project/pipeline_registry.py index 659a3701a3..49fa22539d 100644 --- a/demo-project/src/demo_project/pipeline_registry.py +++ b/demo-project/src/demo_project/pipeline_registry.py @@ -1,4 +1,5 @@ """Project pipelines.""" + from typing import Dict from kedro.pipeline import Pipeline @@ -8,6 +9,7 @@ from demo_project.pipelines import modelling as mod from demo_project.pipelines import reporting as rep + def register_pipelines() -> Dict[str, Pipeline]: """Register the project's pipelines. @@ -24,7 +26,7 @@ def register_pipelines() -> Dict[str, Pipeline]: ) reporting_pipeline = rep.create_pipeline() - + return { "__default__": ( ingestion_pipeline diff --git a/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py b/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py index 1acbdf9531..49904271bb 100755 --- a/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py +++ b/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py @@ -27,21 +27,21 @@ def create_pipeline(**kwargs) -> Pipeline: func=apply_types_to_companies, inputs="companies", outputs="int_typed_companies", - name='apply_types_to_companies', - tags='companies' + name="apply_types_to_companies", + tags="companies", ), node( func=apply_types_to_shuttles, inputs="shuttles", outputs="int_typed_shuttles@pandas1", - name='apply_types_to_shuttles', - tags='shuttles' + name="apply_types_to_shuttles", + tags="shuttles", ), node( func=apply_types_to_reviews, inputs=["reviews", "params:typing.reviews.columns_as_floats"], outputs="int_typed_reviews", - name='apply_types_to_reviews' + name="apply_types_to_reviews", ), node( func=aggregate_company_data, diff --git a/demo-project/src/demo_project/pipelines/feature_engineering/pipeline.py b/demo-project/src/demo_project/pipelines/feature_engineering/pipeline.py index f4706df75c..fe8f87e275 100644 --- a/demo-project/src/demo_project/pipelines/feature_engineering/pipeline.py +++ b/demo-project/src/demo_project/pipelines/feature_engineering/pipeline.py @@ -3,7 +3,6 @@ generated using Kedro 0.18.1 """ - from kedro.pipeline import Pipeline, node from kedro.pipeline.modular_pipeline import pipeline diff --git a/demo-project/src/demo_project/pipelines/modelling/nodes.py b/demo-project/src/demo_project/pipelines/modelling/nodes.py index 94382c0ec4..a56b974cee 100755 --- a/demo-project/src/demo_project/pipelines/modelling/nodes.py +++ b/demo-project/src/demo_project/pipelines/modelling/nodes.py @@ -85,15 +85,11 @@ def evaluate_model( """ y_pred = regressor.predict(X_test) score = r2_score(y_test, y_pred) - a2_score = random.randint(0,100)*0.1 - b2_score = random.randint(0,100)*0.1 + a2_score = random.randint(0, 100) * 0.1 + b2_score = random.randint(0, 100) * 0.1 logger = logging.getLogger(__name__) logger.info( f"Model has a coefficient R^2 of {score:.3f} on test data using a " f"regressor of type '{type(regressor)}'" ) - return { - "r2_score": score, - "a2_score":a2_score, - "b2_score":b2_score - } + return {"r2_score": score, "a2_score": a2_score, "b2_score": b2_score} diff --git a/demo-project/src/demo_project/pipelines/modelling/pipeline.py b/demo-project/src/demo_project/pipelines/modelling/pipeline.py index 808a4e77e2..1244720b11 100755 --- a/demo-project/src/demo_project/pipelines/modelling/pipeline.py +++ b/demo-project/src/demo_project/pipelines/modelling/pipeline.py @@ -22,13 +22,13 @@ def new_train_eval_template() -> Pipeline: func=train_model, inputs=["X_train", "y_train", "params:dummy_model_options"], outputs=["regressor", "experiment_params"], - tags="train" + tags="train", ), node( func=evaluate_model, inputs=["regressor", "X_test", "y_test"], outputs="r2_score", - tags="evaluate" + tags="evaluate", ), ] ) @@ -83,7 +83,7 @@ def create_pipeline(model_types: List[str]) -> Pipeline: pipeline( pipe=new_train_eval_template(), parameters={"dummy_model_options": f"model_options.{model_type}"}, - inputs={k: k for k in test_train_refs}, + inputs={k: k for k in test_train_refs}, namespace=model_type, ) for model_type in model_types diff --git a/demo-project/src/demo_project/pipelines/reporting/image_utils.py b/demo-project/src/demo_project/pipelines/reporting/image_utils.py index 701d1d1b21..00334dd9fb 100644 --- a/demo-project/src/demo_project/pipelines/reporting/image_utils.py +++ b/demo-project/src/demo_project/pipelines/reporting/image_utils.py @@ -22,7 +22,6 @@ def __init__(self, _df: pd.DataFrame, x: int = 500, y: int = 200): self._populate(_df) def _draw_grid(self): - width, height = self.image.size row_step = (height - self.border * 2) / (self.rows) col_step = (width - self.border * 2) / (self.cols) diff --git a/demo-project/src/demo_project/pipelines/reporting/nodes.py b/demo-project/src/demo_project/pipelines/reporting/nodes.py index cd4796ceb1..dbcc8033a2 100644 --- a/demo-project/src/demo_project/pipelines/reporting/nodes.py +++ b/demo-project/src/demo_project/pipelines/reporting/nodes.py @@ -2,6 +2,9 @@ This is a boilerplate pipeline 'reporting' generated using Kedro 0.18.1 """ + +from typing import Dict + import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -9,7 +12,7 @@ import plotly.express as px import seaborn as sn from plotly import graph_objects as go -from typing import Dict + from .image_utils import DrawTable diff --git a/demo-project/src/demo_project/pipelines/reporting/pipeline.py b/demo-project/src/demo_project/pipelines/reporting/pipeline.py index 4b6eb4e6de..e7db18ef57 100644 --- a/demo-project/src/demo_project/pipelines/reporting/pipeline.py +++ b/demo-project/src/demo_project/pipelines/reporting/pipeline.py @@ -8,10 +8,10 @@ from demo_project.pipelines.reporting.nodes import ( create_feature_importance_plot, create_matplotlib_chart, + get_top_shuttles_data, make_cancel_policy_bar_chart, make_price_analysis_image, make_price_histogram, - get_top_shuttles_data, ) diff --git a/demo-project/src/demo_project/requirements.in b/demo-project/src/demo_project/requirements.in index dff89fa8d9..3d69566188 100644 --- a/demo-project/src/demo_project/requirements.in +++ b/demo-project/src/demo_project/requirements.in @@ -1,7 +1,4 @@ -black~=22.0 -flake8>=3.7.9, <4.0 ipython~=7.0 -isort~=5.0 jupyter~=1.0 jupyter_client>=5.1, <7.0 jupyterlab~=3.0 @@ -16,4 +13,5 @@ wheel>=0.35, <0.37 pillow~=9.0 matplotlib==3.5.0 pre-commit~=1.17 +ruff==0.7.0 seaborn>=0.13.0 diff --git a/demo-project/src/tests/test_run.py b/demo-project/src/tests/test_run.py index cedc6bcd49..0097339aad 100644 --- a/demo-project/src/tests/test_run.py +++ b/demo-project/src/tests/test_run.py @@ -7,6 +7,7 @@ To run the tests, run ``kedro test``. """ + from pathlib import Path import pytest diff --git a/package/.flake8 b/package/.flake8 deleted file mode 100644 index 9e226baf4e..0000000000 --- a/package/.flake8 +++ /dev/null @@ -1,7 +0,0 @@ -# copied from black - -[flake8] -ignore = E203,E231,E266,E501,F401,W503 -max-line-length = 88 -max-complexity = 18 -select = B,C,E,F,W,T4,B9 diff --git a/package/.isort.cfg b/package/.isort.cfg deleted file mode 100644 index 78ae47d50a..0000000000 --- a/package/.isort.cfg +++ /dev/null @@ -1,9 +0,0 @@ -# copied from black - -[settings] -multi_line_output=3 -include_trailing_comma=True -force_grid_wrap=0 -use_parentheses=True -line_length=88 -known_third_party=behave diff --git a/package/.pylintrc b/package/.pylintrc deleted file mode 100644 index bbf5e49a40..0000000000 --- a/package/.pylintrc +++ /dev/null @@ -1,391 +0,0 @@ -[MASTER] - -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code -extension-pkg-whitelist=pydantic - -# Add files or directories to the blacklist. They should be base names, not -# paths. -ignore=CVS - -# Add files or directories matching the regex patterns to the blacklist. The -# regex matches against base names, not paths. -ignore-patterns= - -# Python code to execute, usually for sys.path manipulation such as -# pygtk.require(). -#init-hook= - -# Use multiple processes to speed up Pylint. -jobs=1 - -# List of plugins (as comma separated values of python modules names) to load, -# usually to register additional checkers. -load-plugins=pylint.extensions.docparams,pylint_pydantic - -# Pickle collected data for later comparisons. -persistent=yes - -# Specify a configuration file. -#rcfile= - -# Allow loading of arbitrary C extensions. Extensions are imported into the -# active Python interpreter and may run arbitrary code. -unsafe-load-any-extension=no - - -[MESSAGES CONTROL] - -# Only show warnings with the listed confidence levels. Leave empty to show -# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED -confidence= - -# Disable the message, report, category or checker with the given id(s). You -# can either give multiple identifiers separated by comma (,) or put this -# option multiple times (only on the command line, not in the configuration -# file where it should appear only once).You can also use "--disable=all" to -# disable everything first and then reenable specific checks. For example, if -# you want to run only the similarities checker, you can use "--disable=all -# --enable=similarities". If you want to run only the classes checker, but have -# no Warning level messages displayed, use"--disable=all --enable=classes -# --disable=W" -disable=ungrouped-imports,attribute-defined-outside-init,too-many-arguments,duplicate-code,too-many-positional-arguments,fixme - -# Enable the message, report, category or checker with the given id(s). You can -# either give multiple identifier separated by comma (,) or put this option -# multiple time (only on the command line, not in the configuration file where -# it should appear only once). See also the "--disable" option for examples. -enable=useless-suppression - - -[REPORTS] - -# Python expression which should return a note less than 10 (10 is the highest -# note). You have access to the variables errors warning, statement which -# respectively contain the number of errors / warnings messages and the total -# number of statements analyzed. This is used by the global evaluation report -# (RP0004). -evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) - -# Template used to display messages. This is a python new-style format string -# used to format the message information. See doc for all details -#msg-template= - -# Set the output format. Available formats are text, parseable, colorized, json -# and msvs (visual studio).You can also give a reporter class, eg -# mypackage.mymodule.MyReporterClass. -output-format=text - -# Tells whether to display a full report or only the messages -reports=no - -# Activate the evaluation score. -score=yes - - -[REFACTORING] - -# Maximum number of nested blocks for function / method body -max-nested-blocks=5 - - -[BASIC] - -# Regular expression matching correct argument names -argument-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - - -# Regular expression matching correct attribute names -attr-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Bad variable names which should always be refused, separated by a comma -bad-names=foo,bar,baz,toto,tutu,tata - -# Regular expression matching correct class attribute names -class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ - -# Regular expression matching correct class names -class-rgx=[A-Z_][a-zA-Z0-9]+$ - - -# Regular expression matching correct constant names -const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ - -# Minimum line length for functions/classes that require docstrings, shorter -# ones are exempt. -docstring-min-length=-1 - -# Regular expression matching correct function names -function-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - -# Good variable names which should always be accepted, separated by a comma -good-names=i,j,k,ex,Run,_,id - -# Include a hint for the correct naming format with invalid-name -include-naming-hint=no - -# Regular expression matching correct inline iteration names -inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ - -# Regular expression matching correct method names -method-rgx=(([a-z][a-z0-9_]{2,60})|(_[a-z0-9_]*))$ - -# Regular expression matching correct module names -module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ - -# Colon-delimited sets of names that determine each other's naming style when -# the name regexes allow several styles. -name-group= - -# Regular expression which should only match function or class names that do -# not require a docstring. -no-docstring-rgx=^_ - -# List of decorators that produce properties, such as abc.abstractproperty. Add -# to this list to register other decorators that produce valid properties. -property-classes=abc.abstractproperty - -# Regular expression matching correct variable names -variable-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ - - -[FORMAT] - -# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. -expected-line-ending-format= - -# Regexp for a line that is allowed to be longer than the limit. -ignore-long-lines=^\s*(# )??$ - -# Number of spaces of indent required inside a hanging or continued line. -indent-after-paren=4 - -# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 -# tab). -indent-string=' ' - -# Maximum number of characters on a single line. -max-line-length=100 - -# Maximum number of lines in a module -max-module-lines=1000 - -# Allow the body of a class to be on the same line as the declaration if body -# contains single statement. -single-line-class-stmt=no - -# Allow the body of an if to be on the same line as the test if there is no -# else. -single-line-if-stmt=no - - -[LOGGING] - -# Logging modules to check that the string format arguments are in logging -# function parameter format -logging-modules=logging - - -[MISCELLANEOUS] - -# List of note tags to take in consideration, separated by a comma. -notes=FIXME,XXX,TODO - - -[SIMILARITIES] - -# Ignore comments when computing similarities. -ignore-comments=yes - -# Ignore docstrings when computing similarities. -ignore-docstrings=yes - -# Ignore imports when computing similarities. -ignore-imports=no - -# Minimum lines number of a similarity. -min-similarity-lines=20 - - -[SPELLING] - -# Spelling dictionary name. Available dictionaries: none. To make it working -# install python-enchant package. -spelling-dict= - -# List of comma separated words that should not be checked. -spelling-ignore-words= - -# A path to a file that contains private dictionary; one word per line. -spelling-private-dict-file= - -# Tells whether to store unknown words to indicated private dictionary in -# --spelling-private-dict-file option instead of raising a message. -spelling-store-unknown-words=no - - -[TYPECHECK] - -# List of decorators that produce context managers, such as -# contextlib.contextmanager. Add to this list to register other decorators that -# produce valid context managers. -contextmanager-decorators=contextlib.contextmanager - -# List of members which are set dynamically and missed by pylint inference -# system, and so shouldn't trigger E1101 when accessed. Python regular -# expressions are accepted. -generated-members= - -# Tells whether missing members accessed in mixin class should be ignored. A -# mixin class is detected if its name ends with "mixin" (case insensitive). -ignore-mixin-members=yes - -# This flag controls whether pylint should warn about no-member and similar -# checks whenever an opaque object is returned when inferring. The inference -# can return multiple potential results while evaluating a Python object, but -# some branches might not be evaluated, which results in partial inference. In -# that case, it might be useful to still emit no-member and other checks for -# the rest of the inferred objects. -ignore-on-opaque-inference=yes - -# List of class names for which member attributes should not be checked (useful -# for classes with dynamically set attributes). This supports the use of -# qualified names. -ignored-classes=optparse.Values,thread._local,_thread._local - -# List of module names for which member attributes should not be checked -# (useful for modules/projects where namespaces are manipulated during runtime -# and thus existing member attributes cannot be deduced by static analysis. It -# supports qualified module names, as well as Unix pattern matching. -ignored-modules=orjson - -# Show a hint with possible names when a member name was not found. The aspect -# of finding the hint is based on edit distance. -missing-member-hint=yes - -# The minimum edit distance a name should have in order to be considered a -# similar match for a missing member name. -missing-member-hint-distance=1 - -# The total number of similar names that should be taken in consideration when -# showing a hint for a missing member. -missing-member-max-choices=1 - - -[VARIABLES] - -# List of additional names supposed to be defined in builtins. Remember that -# you should avoid to define new builtins when possible. -additional-builtins= - -# Tells whether unused global variables should be treated as a violation. -allow-global-unused-variables=yes - -# List of strings which can identify a callback function by name. A callback -# name must start or end with one of those strings. -callbacks=cb_,_cb - -# A regular expression matching the name of dummy variables (i.e. expectedly -# not used). -dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ - -# Argument names that match this expression will be ignored. Default to name -# with leading underscore -ignored-argument-names=_.*|^ignored_|^unused_ - -# Tells whether we should check for unused import in __init__ files. -init-import=no - -# List of qualified module names which can have objects that can redefine -# builtins. -redefining-builtins-modules=six.moves,future.builtins - - -[CLASSES] - -# List of method names used to declare (i.e. assign) instance attributes. -defining-attr-methods=__init__,__new__,setUp - -# List of member names, which should be excluded from the protected access -# warning. -exclude-protected=_asdict,_fields,_replace,_source,_make - -# List of valid names for the first argument in a class method. -valid-classmethod-first-arg=cls - -# List of valid names for the first argument in a metaclass class method. -valid-metaclass-classmethod-first-arg=mcs - - -[DESIGN] - -# Maximum number of arguments for function / method -max-args=12 - -# Maximum number of attributes for a class (see R0902). -max-attributes=7 - -# Maximum number of boolean expressions in a if statement -max-bool-expr=5 - -# Maximum number of branch for function / method body -max-branches=12 - -# Maximum number of locals for function / method body -max-locals=15 - -# Maximum number of parents for a class (see R0901). -max-parents=7 - -# Maximum number of public methods for a class (see R0904). -max-public-methods=20 - -# Maximum number of return / yield for function / method body -max-returns=6 - -# Maximum number of statements in function / method body -max-statements=50 - -# Minimum number of public methods for a class (see R0903). -min-public-methods=1 - - -[IMPORTS] - -# Allow wildcard imports from modules that define __all__. -allow-wildcard-with-all=no - -# Analyse import fallback blocks. This can be used to support both Python 2 and -# 3 compatible code, which means that the block might have code that exists -# only in one or another interpreter, leading to false positives when analysed. -analyse-fallback-blocks=no - -# Deprecated modules which should not be used, separated by a comma -deprecated-modules=optparse,tkinter.tix - -# Create a graph of external dependencies in the given file (report RP0402 must -# not be disabled) -ext-import-graph= - -# Create a graph of every (i.e. internal and external) dependencies in the -# given file (report RP0402 must not be disabled) -import-graph= - -# Create a graph of internal dependencies in the given file (report RP0402 must -# not be disabled) -int-import-graph= - -# Force import order to recognize a module as part of the standard -# compatibility libraries. -known-standard-library= - -# Force import order to recognize a module as part of a third party library. -known-third-party=enchant - - -[EXCEPTIONS] - -# Exceptions that will emit a warning when being caught. Defaults to -# "Exception" -overgeneral-exceptions=builtins.Exception diff --git a/package/features/environment.py b/package/features/environment.py index b0b17df598..38fb8e0e61 100644 --- a/package/features/environment.py +++ b/package/features/environment.py @@ -97,7 +97,7 @@ def _setup_context_with_venv(context, venv_dir): return context -def after_scenario(context, scenario): +def after_scenario(context, scenario): # noqa: ARG001 for path in _PATHS_TO_REMOVE: # ignore errors when attempting to remove already removed directories shutil.rmtree(path, ignore_errors=True) diff --git a/package/features/steps/cli_steps.py b/package/features/steps/cli_steps.py index 769cb08d64..3f9deb1304 100644 --- a/package/features/steps/cli_steps.py +++ b/package/features/steps/cli_steps.py @@ -166,7 +166,7 @@ def check_kedroviz_up(context): while time() < end_by: try: data_json = requests.get("http://localhost:4141/api/main").json() - except Exception: + except Exception: # noqa: BLE001 sleep(2.0) continue else: @@ -191,7 +191,7 @@ def get_main_api_response(context): response = requests.get("http://localhost:4141/api/main") context.response = response.json() assert response.status_code == 200 - except Exception: + except Exception: # noqa: BLE001 sleep(2.0) continue else: diff --git a/package/features/viz.feature b/package/features/viz.feature index d3c01e2f7f..dfe78dd5db 100644 --- a/package/features/viz.feature +++ b/package/features/viz.feature @@ -24,6 +24,12 @@ Feature: Viz plugin in new project When I execute the kedro viz run command Then kedro-viz should start successfully + Scenario: Execute viz lite with the earliest Kedro version that it supports + Given I have installed kedro version "0.18.3" + And I have run a non-interactive kedro new with pandas-iris starter + When I execute the kedro viz run command with lite option + Then kedro-viz should start successfully + Scenario: Execute viz lite with latest Kedro Given I have installed kedro version "latest" And I have run a non-interactive kedro new with spaceflights-pandas starter diff --git a/package/kedro_viz/__init__.py b/package/kedro_viz/__init__.py index 6053ab4eac..806f783d93 100644 --- a/package/kedro_viz/__init__.py +++ b/package/kedro_viz/__init__.py @@ -1,4 +1,5 @@ """Kedro plugin for visualising a Kedro pipeline""" + import sys import warnings diff --git a/package/kedro_viz/api/apps.py b/package/kedro_viz/api/apps.py index aef4d44715..e188ab1911 100644 --- a/package/kedro_viz/api/apps.py +++ b/package/kedro_viz/api/apps.py @@ -1,6 +1,7 @@ """`kedro_viz.api.app` defines the FastAPI app to serve Kedro data in a RESTful API. This data could either come from a real Kedro project or a file. """ + import json import os import time @@ -14,7 +15,7 @@ from jinja2 import Environment, FileSystemLoader from kedro_viz import __version__ -from kedro_viz.api.rest.responses import EnhancedORJSONResponse +from kedro_viz.api.rest.responses.utils import EnhancedORJSONResponse from kedro_viz.integrations.kedro import telemetry as kedro_telemetry from .graphql.router import router as graphql_router @@ -42,7 +43,7 @@ def _create_base_api_app() -> FastAPI: @app.middleware("http") async def set_secure_headers(request, call_next): response = await call_next(request) - secure_headers.framework.fastapi(response) # pylint: disable=no-member + secure_headers.framework.fastapi(response) return response return app diff --git a/package/kedro_viz/api/graphql/router.py b/package/kedro_viz/api/graphql/router.py index eb0b257ef7..803a5b7527 100644 --- a/package/kedro_viz/api/graphql/router.py +++ b/package/kedro_viz/api/graphql/router.py @@ -1,4 +1,5 @@ """`kedro_viz.api.graphql.router` defines GraphQL routes.""" + # mypy: ignore-errors from fastapi import APIRouter from strawberry.asgi import GraphQL diff --git a/package/kedro_viz/api/graphql/schema.py b/package/kedro_viz/api/graphql/schema.py index bb0fb5b552..24632b57b4 100644 --- a/package/kedro_viz/api/graphql/schema.py +++ b/package/kedro_viz/api/graphql/schema.py @@ -1,5 +1,4 @@ """`kedro_viz.api.graphql.schema` defines the GraphQL schema: queries and mutations.""" -# pylint: disable=missing-function-docstring,missing-class-docstring from __future__ import annotations @@ -77,7 +76,6 @@ def run_tracking_data( group: TrackingDatasetGroup, show_diff: Optional[bool] = True, ) -> List[TrackingDataset]: - # pylint: disable=line-too-long tracking_dataset_models = data_access_manager.tracking_datasets.get_tracking_datasets_by_group_by_run_ids( run_ids, group ) @@ -110,7 +108,6 @@ def run_metrics_data(self, limit: Optional[int] = 25) -> MetricPlotDataset: ] group = TrackingDatasetGroup.METRIC - # pylint: disable=line-too-long metric_dataset_models = data_access_manager.tracking_datasets.get_tracking_datasets_by_group_by_run_ids( run_ids, group ) diff --git a/package/kedro_viz/api/graphql/types.py b/package/kedro_viz/api/graphql/types.py index 86848d7e6e..d5ec8ad527 100644 --- a/package/kedro_viz/api/graphql/types.py +++ b/package/kedro_viz/api/graphql/types.py @@ -1,6 +1,5 @@ """`kedro_viz.api.graphql.types` defines strawberry types.""" -# pylint: disable=too-few-public-methods,missing-class-docstring from __future__ import annotations import sys diff --git a/package/kedro_viz/api/rest/responses.py b/package/kedro_viz/api/rest/responses.py deleted file mode 100644 index 2f59d33b16..0000000000 --- a/package/kedro_viz/api/rest/responses.py +++ /dev/null @@ -1,495 +0,0 @@ -"""`kedro_viz.api.rest.responses` defines REST response types.""" - -# pylint: disable=missing-class-docstring,invalid-name -import abc -import json -import logging -from typing import Any, Dict, List, Optional, Union - -import orjson -from fastapi.encoders import jsonable_encoder -from fastapi.responses import JSONResponse, ORJSONResponse -from pydantic import BaseModel, ConfigDict - -from kedro_viz.api.rest.utils import get_package_compatibilities -from kedro_viz.data_access import data_access_manager -from kedro_viz.models.flowchart import ( - DataNode, - DataNodeMetadata, - ParametersNodeMetadata, - TaskNode, - TaskNodeMetadata, - TranscodedDataNode, - TranscodedDataNodeMetadata, -) -from kedro_viz.models.metadata import Metadata, PackageCompatibility - -logger = logging.getLogger(__name__) - - -class APIErrorMessage(BaseModel): - message: str - - -class BaseAPIResponse(BaseModel, abc.ABC): - model_config = ConfigDict(from_attributes=True) - - -class BaseGraphNodeAPIResponse(BaseAPIResponse): - id: str - name: str - tags: List[str] - pipelines: List[str] - type: str - - # If a node is a ModularPipeline node, this value will be None, hence Optional. - modular_pipelines: Optional[List[str]] = None - - -class TaskNodeAPIResponse(BaseGraphNodeAPIResponse): - parameters: Dict - model_config = ConfigDict( - json_schema_extra={ - "example": { - "id": "6ab908b8", - "name": "split_data_node", - "tags": [], - "pipelines": ["__default__", "ds"], - "modular_pipelines": [], - "type": "task", - "parameters": { - "test_size": 0.2, - "random_state": 3, - "features": [ - "engines", - "passenger_capacity", - "crew", - "d_check_complete", - "moon_clearance_complete", - "iata_approved", - "company_rating", - "review_scores_rating", - ], - }, - } - } - ) - - -class DataNodeAPIResponse(BaseGraphNodeAPIResponse): - layer: Optional[str] = None - dataset_type: Optional[str] = None - stats: Optional[Dict] = None - model_config = ConfigDict( - json_schema_extra={ - "example": { - "id": "d7b83b05", - "name": "master_table", - "tags": [], - "pipelines": ["__default__", "dp", "ds"], - "modular_pipelines": [], - "type": "data", - "layer": "primary", - "dataset_type": "kedro_datasets.pandas.csv_dataset.CSVDataset", - "stats": {"rows": 10, "columns": 2, "file_size": 2300}, - } - } - ) - - -NodeAPIResponse = Union[ - TaskNodeAPIResponse, - DataNodeAPIResponse, -] - - -class TaskNodeMetadataAPIResponse(BaseAPIResponse): - code: Optional[str] = None - filepath: Optional[str] = None - parameters: Optional[Dict] = None - inputs: List[str] - outputs: List[str] - run_command: Optional[str] = None - model_config = ConfigDict( - json_schema_extra={ - "example": { - "code": "def split_data(data: pd.DataFrame, parameters: Dict) -> Tuple:", - "filepath": "proj/src/new_kedro_project/pipelines/data_science/nodes.py", - "parameters": {"test_size": 0.2}, - "inputs": ["params:input1", "input2"], - "outputs": ["output1"], - "run_command": "kedro run --to-nodes=split_data", - } - } - ) - - -class DataNodeMetadataAPIResponse(BaseAPIResponse): - filepath: Optional[str] = None - type: str - run_command: Optional[str] = None - preview: Optional[Union[Dict, str]] = None - preview_type: Optional[str] = None - stats: Optional[Dict] = None - model_config = ConfigDict( - json_schema_extra={ - "example": { - "filepath": "/my-kedro-project/data/03_primary/master_table.csv", - "type": "kedro_datasets.pandas.csv_dataset.CSVDataset", - "run_command": "kedro run --to-outputs=master_table", - } - } - ) - - -class TranscodedDataNodeMetadataAPIReponse(BaseAPIResponse): - filepath: Optional[str] = None - original_type: str - transcoded_types: List[str] - run_command: Optional[str] = None - stats: Optional[Dict] = None - - -class ParametersNodeMetadataAPIResponse(BaseAPIResponse): - parameters: Dict - model_config = ConfigDict( - json_schema_extra={ - "example": { - "parameters": { - "test_size": 0.2, - "random_state": 3, - "features": [ - "engines", - "passenger_capacity", - "crew", - "d_check_complete", - "moon_clearance_complete", - "iata_approved", - "company_rating", - "review_scores_rating", - ], - } - } - } - ) - - -NodeMetadataAPIResponse = Union[ - TaskNodeMetadataAPIResponse, - DataNodeMetadataAPIResponse, - TranscodedDataNodeMetadataAPIReponse, - ParametersNodeMetadataAPIResponse, -] - - -class GraphEdgeAPIResponse(BaseAPIResponse): - source: str - target: str - - -class NamedEntityAPIResponse(BaseAPIResponse): - """Model an API field that has an ID and a name. - For example, used for representing modular pipelines and pipelines in the API response. - """ - - id: str - name: Optional[str] = None - - -class ModularPipelineChildAPIResponse(BaseAPIResponse): - """Model a child in a modular pipeline's children field in the API response.""" - - id: str - type: str - - -class ModularPipelinesTreeNodeAPIResponse(BaseAPIResponse): - """Model a node in the tree representation of modular pipelines in the API response.""" - - id: str - name: str - inputs: List[str] - outputs: List[str] - children: List[ModularPipelineChildAPIResponse] - - -# Represent the modular pipelines in the API response as a tree. -# The root node is always designated with the __root__ key. -# Example: -# { -# "__root__": { -# "id": "__root__", -# "name": "Root", -# "inputs": [], -# "outputs": [], -# "children": [ -# {"id": "d577578a", "type": "parameters"}, -# {"id": "data_science", "type": "modularPipeline"}, -# {"id": "f1f1425b", "type": "parameters"}, -# {"id": "data_engineering", "type": "modularPipeline"}, -# ], -# }, -# "data_engineering": { -# "id": "data_engineering", -# "name": "Data Engineering", -# "inputs": ["d577578a"], -# "outputs": [], -# "children": [], -# }, -# "data_science": { -# "id": "data_science", -# "name": "Data Science", -# "inputs": ["f1f1425b"], -# "outputs": [], -# "children": [], -# }, -# } -# } -ModularPipelinesTreeAPIResponse = Dict[str, ModularPipelinesTreeNodeAPIResponse] - - -class GraphAPIResponse(BaseAPIResponse): - nodes: List[NodeAPIResponse] - edges: List[GraphEdgeAPIResponse] - layers: List[str] - tags: List[NamedEntityAPIResponse] - pipelines: List[NamedEntityAPIResponse] - modular_pipelines: ModularPipelinesTreeAPIResponse - selected_pipeline: str - - -class MetadataAPIResponse(BaseAPIResponse): - has_missing_dependencies: bool = False - package_compatibilities: List[PackageCompatibility] = [] - model_config = ConfigDict( - json_schema_extra={ - "has_missing_dependencies": False, - "package_compatibilities": [ - { - "package_name": "fsspec", - "package_version": "2024.6.1", - "is_compatible": True, - }, - { - "package_name": "kedro-datasets", - "package_version": "4.0.0", - "is_compatible": True, - }, - ], - } - ) - - -class EnhancedORJSONResponse(ORJSONResponse): - @staticmethod - def encode_to_human_readable(content: Any) -> bytes: - """A method to encode the given content to JSON, with the - proper formatting to write a human-readable file. - - Returns: - A bytes object containing the JSON to write. - - """ - return orjson.dumps( - content, - option=orjson.OPT_INDENT_2 - | orjson.OPT_NON_STR_KEYS - | orjson.OPT_SERIALIZE_NUMPY, - ) - - -def get_default_response() -> GraphAPIResponse: - """Default response for `/api/main`.""" - default_selected_pipeline_id = ( - data_access_manager.get_default_selected_pipeline().id - ) - - modular_pipelines_tree = ( - data_access_manager.create_modular_pipelines_tree_for_registered_pipeline( - default_selected_pipeline_id - ) - ) - - return GraphAPIResponse( - nodes=data_access_manager.get_nodes_for_registered_pipeline( - default_selected_pipeline_id - ), - edges=data_access_manager.get_edges_for_registered_pipeline( - default_selected_pipeline_id - ), - tags=data_access_manager.tags.as_list(), - layers=data_access_manager.get_sorted_layers_for_registered_pipeline( - default_selected_pipeline_id - ), - pipelines=data_access_manager.registered_pipelines.as_list(), - modular_pipelines=modular_pipelines_tree, - selected_pipeline=default_selected_pipeline_id, - ) - - -def get_node_metadata_response(node_id: str): - """API response for `/api/nodes/node_id`.""" - node = data_access_manager.nodes.get_node_by_id(node_id) - if not node: - return JSONResponse(status_code=404, content={"message": "Invalid node ID"}) - - if not node.has_metadata(): - return JSONResponse(content={}) - - if isinstance(node, TaskNode): - return TaskNodeMetadata(task_node=node) - - if isinstance(node, DataNode): - return DataNodeMetadata(data_node=node) - - if isinstance(node, TranscodedDataNode): - return TranscodedDataNodeMetadata(transcoded_data_node=node) - - return ParametersNodeMetadata(parameters_node=node) - - -def get_selected_pipeline_response(registered_pipeline_id: str): - """API response for `/api/pipeline/pipeline_id`.""" - if not data_access_manager.registered_pipelines.has_pipeline( - registered_pipeline_id - ): - return JSONResponse(status_code=404, content={"message": "Invalid pipeline ID"}) - - modular_pipelines_tree = ( - data_access_manager.create_modular_pipelines_tree_for_registered_pipeline( - registered_pipeline_id - ) - ) - - return GraphAPIResponse( - nodes=data_access_manager.get_nodes_for_registered_pipeline( - registered_pipeline_id - ), - edges=data_access_manager.get_edges_for_registered_pipeline( - registered_pipeline_id - ), - tags=data_access_manager.tags.as_list(), - layers=data_access_manager.get_sorted_layers_for_registered_pipeline( - registered_pipeline_id - ), - pipelines=data_access_manager.registered_pipelines.as_list(), - selected_pipeline=registered_pipeline_id, - modular_pipelines=modular_pipelines_tree, - ) - - -def get_metadata_response(): - """API response for `/api/metadata`.""" - package_compatibilities = get_package_compatibilities() - Metadata.set_package_compatibilities(package_compatibilities) - return Metadata() - - -def get_encoded_response(response: Any) -> bytes: - """Encodes and enhances the default response using human-readable format.""" - jsonable_response = jsonable_encoder(response) - encoded_response = EnhancedORJSONResponse.encode_to_human_readable( - jsonable_response - ) - - return encoded_response - - -def write_api_response_to_fs(file_path: str, response: Any, remote_fs: Any): - """Get encoded responses and writes it to a file""" - encoded_response = get_encoded_response(response) - - with remote_fs.open(file_path, "wb") as file: - file.write(encoded_response) - - -def get_kedro_project_json_data(): - """Decodes the default response and returns the Kedro project JSON data. - This will be used in VSCode extension to get current Kedro project data.""" - encoded_response = get_encoded_response(get_default_response()) - - try: - response_str = encoded_response.decode("utf-8") - json_data = json.loads(response_str) - except UnicodeDecodeError as exc: # pragma: no cover - json_data = None - logger.error("Failed to decode response string. Error: %s", str(exc)) - except json.JSONDecodeError as exc: # pragma: no cover - json_data = None - logger.error("Failed to parse JSON data. Error: %s", str(exc)) - - return json_data - - -def save_api_main_response_to_fs(main_path: str, remote_fs: Any): - """Saves API /main response to a directory.""" - try: - write_api_response_to_fs(main_path, get_default_response(), remote_fs) - except Exception as exc: # pragma: no cover - logger.exception("Failed to save default response. Error: %s", str(exc)) - raise exc - - -def save_api_node_response_to_fs( - nodes_path: str, remote_fs: Any, is_all_previews_enabled: bool -): - """Saves API /nodes/{node} response to a directory.""" - # Set if preview is enabled/disabled for all data nodes - DataNodeMetadata.set_is_all_previews_enabled(is_all_previews_enabled) - - for nodeId in data_access_manager.nodes.get_node_ids(): - try: - write_api_response_to_fs( - f"{nodes_path}/{nodeId}", get_node_metadata_response(nodeId), remote_fs - ) - except Exception as exc: # pragma: no cover - logger.exception( - "Failed to save node data for node ID %s. Error: %s", nodeId, str(exc) - ) - raise exc - - -def save_api_pipeline_response_to_fs(pipelines_path: str, remote_fs: Any): - """Saves API /pipelines/{pipeline} response to a directory.""" - for pipelineId in data_access_manager.registered_pipelines.get_pipeline_ids(): - try: - write_api_response_to_fs( - f"{pipelines_path}/{pipelineId}", - get_selected_pipeline_response(pipelineId), - remote_fs, - ) - except Exception as exc: # pragma: no cover - logger.exception( - "Failed to save pipeline data for pipeline ID %s. Error: %s", - pipelineId, - str(exc), - ) - raise exc - - -def save_api_responses_to_fs(path: str, remote_fs: Any, is_all_previews_enabled: bool): - """Saves all Kedro Viz API responses to a directory.""" - try: - logger.debug( - """Saving/Uploading api files to %s""", - path, - ) - - main_path = f"{path}/api/main" - nodes_path = f"{path}/api/nodes" - pipelines_path = f"{path}/api/pipelines" - - if "file" in remote_fs.protocol: - remote_fs.makedirs(path, exist_ok=True) - remote_fs.makedirs(nodes_path, exist_ok=True) - remote_fs.makedirs(pipelines_path, exist_ok=True) - - save_api_main_response_to_fs(main_path, remote_fs) - save_api_node_response_to_fs(nodes_path, remote_fs, is_all_previews_enabled) - save_api_pipeline_response_to_fs(pipelines_path, remote_fs) - - except Exception as exc: # pragma: no cover - logger.exception( - "An error occurred while preparing data for saving. Error: %s", str(exc) - ) - raise exc diff --git a/package/kedro_viz/api/rest/responses/__init__.py b/package/kedro_viz/api/rest/responses/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/package/kedro_viz/api/rest/responses/base.py b/package/kedro_viz/api/rest/responses/base.py new file mode 100755 index 0000000000..99fe66e85c --- /dev/null +++ b/package/kedro_viz/api/rest/responses/base.py @@ -0,0 +1,28 @@ +"""`kedro_viz.api.rest.responses.base` contains base +response classes and utility functions for the REST endpoints""" + +import abc +import logging + +from pydantic import BaseModel, ConfigDict + +logger = logging.getLogger(__name__) + + +class APINotFoundResponse(BaseModel): + """ + APINotFoundResponse is a Pydantic model representing a response for an API not found error. + + Attributes: + message (str): A message describing the error. + """ + + message: str + + +class BaseAPIResponse(BaseModel, abc.ABC): + """ + BaseAPIResponse is an abstract base class for API responses. + """ + + model_config = ConfigDict(from_attributes=True) diff --git a/package/kedro_viz/api/rest/responses/metadata.py b/package/kedro_viz/api/rest/responses/metadata.py new file mode 100755 index 0000000000..0222d261a1 --- /dev/null +++ b/package/kedro_viz/api/rest/responses/metadata.py @@ -0,0 +1,47 @@ +"""`kedro_viz.api.rest.responses.metadata` contains response classes +and utility functions for the `/metadata` REST endpoint""" + +from typing import List + +from pydantic import ConfigDict + +from kedro_viz.api.rest.responses.base import BaseAPIResponse +from kedro_viz.api.rest.utils import get_package_compatibilities +from kedro_viz.models.metadata import Metadata, PackageCompatibility + + +class MetadataAPIResponse(BaseAPIResponse): + """ + MetadataAPIResponse is a subclass of BaseAPIResponse that represents the response structure for metadata API. + + Attributes: + has_missing_dependencies (bool): Indicates if there are any missing dependencies. Defaults to False. + package_compatibilities (List[PackageCompatibility]): A list of package compatibility information. Defaults to an empty list. + """ + + has_missing_dependencies: bool = False + package_compatibilities: List[PackageCompatibility] = [] + model_config = ConfigDict( + json_schema_extra={ + "has_missing_dependencies": False, + "package_compatibilities": [ + { + "package_name": "fsspec", + "package_version": "2024.6.1", + "is_compatible": True, + }, + { + "package_name": "kedro-datasets", + "package_version": "4.0.0", + "is_compatible": True, + }, + ], + } + ) + + +def get_metadata_response(): + """API response for `/api/metadata`.""" + package_compatibilities = get_package_compatibilities() + Metadata.set_package_compatibilities(package_compatibilities) + return Metadata() diff --git a/package/kedro_viz/api/rest/responses/nodes.py b/package/kedro_viz/api/rest/responses/nodes.py new file mode 100644 index 0000000000..f6df0c53ce --- /dev/null +++ b/package/kedro_viz/api/rest/responses/nodes.py @@ -0,0 +1,162 @@ +"""`kedro_viz.api.rest.responses.nodes` contains response classes +and utility functions for the `/nodes/*` REST endpoints""" + +import logging +from typing import Dict, List, Optional, Union + +from fastapi.responses import JSONResponse +from pydantic import ConfigDict + +from kedro_viz.api.rest.responses.base import BaseAPIResponse +from kedro_viz.data_access import data_access_manager +from kedro_viz.models.flowchart.node_metadata import ( + DataNodeMetadata, + ParametersNodeMetadata, + TaskNodeMetadata, + TranscodedDataNodeMetadata, +) +from kedro_viz.models.flowchart.nodes import DataNode, TaskNode, TranscodedDataNode + +logger = logging.getLogger(__name__) + + +class TaskNodeMetadataAPIResponse(BaseAPIResponse): + """ + TaskNodeMetadataAPIResponse is a data model for representing the metadata of a task node in the Kedro visualization API. + + Attributes: + code (Optional[str]): The code snippet of the task node. + filepath (Optional[str]): The file path where the task node is defined. + parameters (Optional[Dict]): The parameters used by the task node. + inputs (List[str]): The list of input data for the task node. + outputs (List[str]): The list of output data from the task node. + run_command (Optional[str]): The command to run the task node. + """ + + code: Optional[str] = None + filepath: Optional[str] = None + parameters: Optional[Dict] = None + inputs: List[str] + outputs: List[str] + run_command: Optional[str] = None + model_config = ConfigDict( + json_schema_extra={ + "example": { + "code": "def split_data(data: pd.DataFrame, parameters: Dict) -> Tuple:", + "filepath": "proj/src/new_kedro_project/pipelines/data_science/nodes.py", + "parameters": {"test_size": 0.2}, + "inputs": ["params:input1", "input2"], + "outputs": ["output1"], + "run_command": "kedro run --to-nodes=split_data", + } + } + ) + + +class DataNodeMetadataAPIResponse(BaseAPIResponse): + """ + DataNodeMetadataAPIResponse is a class that represents the metadata response for a data node in the Kedro visualization API. + + Attributes: + filepath (Optional[str]): The file path of the data node. + type (str): The type of the data node. + run_command (Optional[str]): The command to run the data node. + preview (Optional[Union[Dict, str]]): A preview of the data node's content. + preview_type (Optional[str]): The type of the preview. + stats (Optional[Dict]): Statistics related to the data node. + """ + + filepath: Optional[str] = None + type: str + run_command: Optional[str] = None + preview: Optional[Union[Dict, str]] = None + preview_type: Optional[str] = None + stats: Optional[Dict] = None + model_config = ConfigDict( + json_schema_extra={ + "example": { + "filepath": "/my-kedro-project/data/03_primary/master_table.csv", + "type": "kedro_datasets.pandas.csv_dataset.CSVDataset", + "run_command": "kedro run --to-outputs=master_table", + } + } + ) + + +class TranscodedDataNodeMetadataAPIReponse(BaseAPIResponse): + """ + TranscodedDataNodeMetadataAPIReponse represents the metadata response for a transcoded data node. + + Attributes: + filepath (Optional[str]): The file path of the transcoded data node. + original_type (str): The original type of the data node. + transcoded_types (List[str]): A list of types to which the data node has been transcoded. + run_command (Optional[str]): The command used to run the transcoding process. + stats (Optional[Dict]): Statistics related to the transcoded data node. + """ + + filepath: Optional[str] = None + original_type: str + transcoded_types: List[str] + run_command: Optional[str] = None + stats: Optional[Dict] = None + + +class ParametersNodeMetadataAPIResponse(BaseAPIResponse): + """ + ParametersNodeMetadataAPIResponse is a subclass of BaseAPIResponse that represents the metadata response for parameters nodes. + + Attributes: + parameters (Dict): A dictionary containing the parameters. + """ + + parameters: Dict + model_config = ConfigDict( + json_schema_extra={ + "example": { + "parameters": { + "test_size": 0.2, + "random_state": 3, + "features": [ + "engines", + "passenger_capacity", + "crew", + "d_check_complete", + "moon_clearance_complete", + "iata_approved", + "company_rating", + "review_scores_rating", + ], + } + } + } + ) + + +NodeMetadataAPIResponse = Union[ + TaskNodeMetadataAPIResponse, + DataNodeMetadataAPIResponse, + TranscodedDataNodeMetadataAPIReponse, + ParametersNodeMetadataAPIResponse, +] + + +def get_node_metadata_response(node_id: str): + """API response for `/api/nodes/node_id`.""" + node = data_access_manager.nodes.get_node_by_id(node_id) + if not node: + return JSONResponse(status_code=404, content={"message": "Invalid node ID"}) + + if not node.has_metadata(): + return JSONResponse(content={}) + + if isinstance(node, TaskNode): + return TaskNodeMetadata(task_node=node) + + if isinstance(node, DataNode): + return DataNodeMetadata(data_node=node) + + if isinstance(node, TranscodedDataNode): + return TranscodedDataNodeMetadata(transcoded_data_node=node) + + return ParametersNodeMetadata(parameters_node=node) diff --git a/package/kedro_viz/api/rest/responses/pipelines.py b/package/kedro_viz/api/rest/responses/pipelines.py new file mode 100644 index 0000000000..c5c096b8e5 --- /dev/null +++ b/package/kedro_viz/api/rest/responses/pipelines.py @@ -0,0 +1,256 @@ +"""`kedro_viz.api.rest.responses.pipelines` contains response classes +and utility functions for the `/main` and `/pipelines/* REST endpoints""" + +import json +import logging +from typing import Dict, List, Optional, Union + +from fastapi.responses import JSONResponse +from pydantic import ConfigDict + +from kedro_viz.api.rest.responses.base import BaseAPIResponse +from kedro_viz.api.rest.responses.utils import get_encoded_response +from kedro_viz.data_access import data_access_manager + +logger = logging.getLogger(__name__) + + +class BaseGraphNodeAPIResponse(BaseAPIResponse): + """ + BaseGraphNodeAPIResponse is a data model for representing the response of a graph node in the API. + + Attributes: + id (str): The unique identifier of the graph node. + name (str): The name of the graph node. + tags (List[str]): A list of tags associated with the graph node. + pipelines (List[str]): A list of pipelines that the graph node belongs to. + type (str): The type of the graph node. + modular_pipelines (Optional[List[str]]): A list of modular pipelines associated with the graph node. + This value will be None if the node is a ModularPipeline node. + """ + + id: str + name: str + tags: List[str] + pipelines: List[str] + type: str + + # If a node is a ModularPipeline node, this value will be None, hence Optional. + modular_pipelines: Optional[List[str]] = None + + +class TaskNodeAPIResponse(BaseGraphNodeAPIResponse): + """ + TaskNodeAPIResponse is a subclass of BaseGraphNodeAPIResponse that represents the response for a task node in the API. + + Attributes: + parameters (Dict): A dictionary containing the parameters for the task node. + """ + + parameters: Dict + model_config = ConfigDict( + json_schema_extra={ + "example": { + "id": "6ab908b8", + "name": "split_data_node", + "tags": [], + "pipelines": ["__default__", "ds"], + "modular_pipelines": [], + "type": "task", + "parameters": { + "test_size": 0.2, + "random_state": 3, + "features": [ + "engines", + "passenger_capacity", + "crew", + "d_check_complete", + "moon_clearance_complete", + "iata_approved", + "company_rating", + "review_scores_rating", + ], + }, + } + } + ) + + +class DataNodeAPIResponse(BaseGraphNodeAPIResponse): + """ + DataNodeAPIResponse is a subclass of BaseGraphNodeAPIResponse that represents the response model for a data node in the API. + + Attributes: + layer (Optional[str]): The layer to which the data node belongs. Default is None. + dataset_type (Optional[str]): The type of dataset. Default is None. + stats (Optional[Dict]): Statistics related to the dataset, such as number of rows, columns, and file size. Default is None. + """ + + layer: Optional[str] = None + dataset_type: Optional[str] = None + stats: Optional[Dict] = None + model_config = ConfigDict( + json_schema_extra={ + "example": { + "id": "d7b83b05", + "name": "master_table", + "tags": [], + "pipelines": ["__default__", "dp", "ds"], + "modular_pipelines": [], + "type": "data", + "layer": "primary", + "dataset_type": "kedro_datasets.pandas.csv_dataset.CSVDataset", + "stats": {"rows": 10, "columns": 2, "file_size": 2300}, + } + } + ) + + +NodeAPIResponse = Union[ + TaskNodeAPIResponse, + DataNodeAPIResponse, +] + + +class GraphEdgeAPIResponse(BaseAPIResponse): + """ + GraphEdgeAPIResponse represents the response model for an edge in the graph. + + Attributes: + source (str): The source node id for the edge. + target (str): The target node id for the edge. + """ + + source: str + target: str + + +class NamedEntityAPIResponse(BaseAPIResponse): + """Model an API field that has an ID and a name. + For example, used for representing modular pipelines and pipelines in the API response. + """ + + id: str + name: Optional[str] = None + + +class ModularPipelineChildAPIResponse(BaseAPIResponse): + """Model a child in a modular pipeline's children field in the API response.""" + + id: str + type: str + + +class ModularPipelinesTreeNodeAPIResponse(BaseAPIResponse): + """Model a node in the tree representation of modular pipelines in the API response.""" + + id: str + name: str + inputs: List[str] + outputs: List[str] + children: List[ModularPipelineChildAPIResponse] + + +# Represent the modular pipelines in the API response as a tree. +# The root node is always designated with the __root__ key. +# Example: +# { +# "__root__": { +# "id": "__root__", +# "name": "Root", +# "inputs": [], +# "outputs": [], +# "children": [ +# {"id": "d577578a", "type": "parameters"}, +# {"id": "data_science", "type": "modularPipeline"}, +# {"id": "f1f1425b", "type": "parameters"}, +# {"id": "data_engineering", "type": "modularPipeline"}, +# ], +# }, +# "data_engineering": { +# "id": "data_engineering", +# "name": "Data Engineering", +# "inputs": ["d577578a"], +# "outputs": [], +# "children": [], +# }, +# "data_science": { +# "id": "data_science", +# "name": "Data Science", +# "inputs": ["f1f1425b"], +# "outputs": [], +# "children": [], +# }, +# } +# } +ModularPipelinesTreeAPIResponse = Dict[str, ModularPipelinesTreeNodeAPIResponse] + + +class GraphAPIResponse(BaseAPIResponse): + """ + GraphAPIResponse is a data model for the response of the graph API. + + Attributes: + nodes (List[NodeAPIResponse]): A list of nodes in the graph. + edges (List[GraphEdgeAPIResponse]): A list of edges connecting the nodes in the graph. + layers (List[str]): A list of layers in the graph. + tags (List[NamedEntityAPIResponse]): A list of tags associated with the graph entities. + pipelines (List[NamedEntityAPIResponse]): A list of pipelines in the graph. + modular_pipelines (ModularPipelinesTreeAPIResponse): A tree structure representing modular pipelines. + selected_pipeline (str): The identifier of the selected pipeline. + """ + + nodes: List[NodeAPIResponse] + edges: List[GraphEdgeAPIResponse] + layers: List[str] + tags: List[NamedEntityAPIResponse] + pipelines: List[NamedEntityAPIResponse] + modular_pipelines: ModularPipelinesTreeAPIResponse + selected_pipeline: str + + +def get_pipeline_response( + pipeline_id: Union[str, None] = None, +) -> Union[GraphAPIResponse, JSONResponse]: + """API response for `/api/pipelines/pipeline_id`.""" + if pipeline_id is None: + pipeline_id = data_access_manager.get_default_selected_pipeline().id + + if not data_access_manager.registered_pipelines.has_pipeline(pipeline_id): + return JSONResponse(status_code=404, content={"message": "Invalid pipeline ID"}) + + modular_pipelines_tree = ( + data_access_manager.create_modular_pipelines_tree_for_registered_pipeline( + pipeline_id + ) + ) + + return GraphAPIResponse( + nodes=data_access_manager.get_nodes_for_registered_pipeline(pipeline_id), + edges=data_access_manager.get_edges_for_registered_pipeline(pipeline_id), + tags=data_access_manager.tags.as_list(), + layers=data_access_manager.get_sorted_layers_for_registered_pipeline( + pipeline_id + ), + pipelines=data_access_manager.registered_pipelines.as_list(), + modular_pipelines=modular_pipelines_tree, + selected_pipeline=pipeline_id, + ) + + +def get_kedro_project_json_data(): + """Decodes the default response and returns the Kedro project JSON data. + This will be used in VSCode extension to get current Kedro project data.""" + encoded_response = get_encoded_response(get_pipeline_response()) + + try: + response_str = encoded_response.decode("utf-8") + json_data = json.loads(response_str) + except UnicodeDecodeError as exc: # pragma: no cover + json_data = None + logger.error("Failed to decode response string. Error: %s", str(exc)) + except json.JSONDecodeError as exc: # pragma: no cover + json_data = None + logger.error("Failed to parse JSON data. Error: %s", str(exc)) + + return json_data diff --git a/package/kedro_viz/api/rest/responses/save_responses.py b/package/kedro_viz/api/rest/responses/save_responses.py new file mode 100644 index 0000000000..bcdd335534 --- /dev/null +++ b/package/kedro_viz/api/rest/responses/save_responses.py @@ -0,0 +1,97 @@ +"""`kedro_viz.api.rest.responses.save_responses` contains response classes +and utility functions for writing and saving REST endpoint responses to file system""" + +import logging +from typing import Any + +from kedro_viz.api.rest.responses.nodes import get_node_metadata_response +from kedro_viz.api.rest.responses.pipelines import get_pipeline_response +from kedro_viz.api.rest.responses.utils import get_encoded_response +from kedro_viz.data_access import data_access_manager +from kedro_viz.models.flowchart.node_metadata import DataNodeMetadata + +logger = logging.getLogger(__name__) + + +def save_api_responses_to_fs(path: str, remote_fs: Any, is_all_previews_enabled: bool): + """Saves all Kedro Viz API responses to a directory.""" + try: + logger.debug( + """Saving/Uploading api files to %s""", + path, + ) + + main_path = f"{path}/api/main" + nodes_path = f"{path}/api/nodes" + pipelines_path = f"{path}/api/pipelines" + + if "file" in remote_fs.protocol: + remote_fs.makedirs(path, exist_ok=True) + remote_fs.makedirs(nodes_path, exist_ok=True) + remote_fs.makedirs(pipelines_path, exist_ok=True) + + save_api_main_response_to_fs(main_path, remote_fs) + save_api_node_response_to_fs(nodes_path, remote_fs, is_all_previews_enabled) + save_api_pipeline_response_to_fs(pipelines_path, remote_fs) + + except Exception as exc: # pragma: no cover + logger.exception( + "An error occurred while preparing data for saving. Error: %s", str(exc) + ) + raise exc + + +def save_api_main_response_to_fs(main_path: str, remote_fs: Any): + """Saves API /main response to a directory.""" + try: + write_api_response_to_fs(main_path, get_pipeline_response(), remote_fs) + except Exception as exc: # pragma: no cover + logger.exception("Failed to save default response. Error: %s", str(exc)) + raise exc + + +def save_api_pipeline_response_to_fs(pipelines_path: str, remote_fs: Any): + """Saves API /pipelines/{pipeline} response to a directory.""" + for pipeline_id in data_access_manager.registered_pipelines.get_pipeline_ids(): + try: + write_api_response_to_fs( + f"{pipelines_path}/{pipeline_id}", + get_pipeline_response(pipeline_id), + remote_fs, + ) + except Exception as exc: # pragma: no cover + logger.exception( + "Failed to save pipeline data for pipeline ID %s. Error: %s", + pipeline_id, + str(exc), + ) + raise exc + + +def save_api_node_response_to_fs( + nodes_path: str, remote_fs: Any, is_all_previews_enabled: bool +): + """Saves API /nodes/{node} response to a directory.""" + # Set if preview is enabled/disabled for all data nodes + DataNodeMetadata.set_is_all_previews_enabled(is_all_previews_enabled) + + for node_id in data_access_manager.nodes.get_node_ids(): + try: + write_api_response_to_fs( + f"{nodes_path}/{node_id}", + get_node_metadata_response(node_id), + remote_fs, + ) + except Exception as exc: # pragma: no cover + logger.exception( + "Failed to save node data for node ID %s. Error: %s", node_id, str(exc) + ) + raise exc + + +def write_api_response_to_fs(file_path: str, response: Any, remote_fs: Any): + """Get encoded responses and writes it to a file""" + encoded_response = get_encoded_response(response) + + with remote_fs.open(file_path, "wb") as file: + file.write(encoded_response) diff --git a/package/kedro_viz/api/rest/responses/utils.py b/package/kedro_viz/api/rest/responses/utils.py new file mode 100644 index 0000000000..38bae09460 --- /dev/null +++ b/package/kedro_viz/api/rest/responses/utils.py @@ -0,0 +1,44 @@ +"""`kedro_viz.api.rest.responses.utils` contains utility +response classes and functions for the REST endpoints""" + +import logging +from typing import Any + +import orjson +from fastapi.encoders import jsonable_encoder +from fastapi.responses import ORJSONResponse + +logger = logging.getLogger(__name__) + + +class EnhancedORJSONResponse(ORJSONResponse): + """ + EnhancedORJSONResponse is a subclass of ORJSONResponse that provides + additional functionality for encoding content to a human-readable JSON format. + """ + + @staticmethod + def encode_to_human_readable(content: Any) -> bytes: + """A method to encode the given content to JSON, with the + proper formatting to write a human-readable file. + + Returns: + A bytes object containing the JSON to write. + + """ + return orjson.dumps( + content, + option=orjson.OPT_INDENT_2 + | orjson.OPT_NON_STR_KEYS + | orjson.OPT_SERIALIZE_NUMPY, + ) + + +def get_encoded_response(response: Any) -> bytes: + """Encodes and enhances the default response using human-readable format.""" + jsonable_response = jsonable_encoder(response) + encoded_response = EnhancedORJSONResponse.encode_to_human_readable( + jsonable_response + ) + + return encoded_response diff --git a/package/kedro_viz/api/rest/router.py b/package/kedro_viz/api/rest/router.py index 3cd6a18e9f..2a743239fb 100644 --- a/package/kedro_viz/api/rest/router.py +++ b/package/kedro_viz/api/rest/router.py @@ -1,41 +1,36 @@ """`kedro_viz.api.rest.router` defines REST routes and handling logic.""" -# pylint: disable=missing-function-docstring, broad-exception-caught import logging from fastapi import APIRouter from fastapi.responses import JSONResponse from kedro_viz.api.rest.requests import DeployerConfiguration -from kedro_viz.integrations.deployment.deployer_factory import DeployerFactory - -from .responses import ( - APIErrorMessage, - GraphAPIResponse, +from kedro_viz.api.rest.responses.base import APINotFoundResponse +from kedro_viz.api.rest.responses.metadata import ( MetadataAPIResponse, - NodeMetadataAPIResponse, - get_default_response, get_metadata_response, +) +from kedro_viz.api.rest.responses.nodes import ( + NodeMetadataAPIResponse, get_node_metadata_response, - get_selected_pipeline_response, ) - -try: - from azure.core.exceptions import ServiceRequestError -except ImportError: # pragma: no cover - ServiceRequestError = None # type: ignore +from kedro_viz.api.rest.responses.pipelines import ( + GraphAPIResponse, + get_pipeline_response, +) logger = logging.getLogger(__name__) router = APIRouter( prefix="/api", - responses={404: {"model": APIErrorMessage}}, + responses={404: {"model": APINotFoundResponse}}, ) @router.get("/main", response_model=GraphAPIResponse) async def main(): - return get_default_response() + return get_pipeline_response() @router.get( @@ -52,11 +47,18 @@ async def get_single_node_metadata(node_id: str): response_model=GraphAPIResponse, ) async def get_single_pipeline_data(registered_pipeline_id: str): - return get_selected_pipeline_response(registered_pipeline_id) + return get_pipeline_response(registered_pipeline_id) @router.post("/deploy") async def deploy_kedro_viz(input_values: DeployerConfiguration): + from kedro_viz.integrations.deployment.deployer_factory import DeployerFactory + + try: + from azure.core.exceptions import ServiceRequestError + except ImportError: # pragma: no cover + ServiceRequestError = None # type: ignore + try: deployer = DeployerFactory.create_deployer( input_values.platform, input_values.endpoint, input_values.bucket_name @@ -74,7 +76,6 @@ async def deploy_kedro_viz(input_values: DeployerConfiguration): status_code=401, content={"message": "Please provide valid credentials"} ) except ( - # pylint: disable=catching-non-exception (FileNotFoundError, ServiceRequestError) if ServiceRequestError is not None else FileNotFoundError diff --git a/package/kedro_viz/data_access/__init__.py b/package/kedro_viz/data_access/__init__.py index 2dd525fd7b..c5f408f9ef 100644 --- a/package/kedro_viz/data_access/__init__.py +++ b/package/kedro_viz/data_access/__init__.py @@ -1,4 +1,5 @@ """`kedro_viz.data_access` provides an interface to save and load data for viz backend.""" + from .managers import DataAccessManager data_access_manager = DataAccessManager() diff --git a/package/kedro_viz/data_access/managers.py b/package/kedro_viz/data_access/managers.py index 40e00ebe55..f7e572a497 100644 --- a/package/kedro_viz/data_access/managers.py +++ b/package/kedro_viz/data_access/managers.py @@ -1,28 +1,33 @@ """`kedro_viz.data_access.managers` defines data access managers.""" -# pylint: disable=too-many-instance-attributes,protected-access import logging from collections import defaultdict from typing import Dict, List, Set, Union -import networkx as nx from kedro.io import DataCatalog -from kedro.io.core import DatasetError + +try: + # kedro 0.18.11 onwards + from kedro.io.core import DatasetError +except ImportError: # pragma: no cover + # older versions + from kedro.io.core import DataSetError as DatasetError # type: ignore + from kedro.pipeline import Pipeline as KedroPipeline from kedro.pipeline.node import Node as KedroNode from sqlalchemy.orm import sessionmaker from kedro_viz.constants import DEFAULT_REGISTERED_PIPELINE_ID, ROOT_MODULAR_PIPELINE_ID from kedro_viz.integrations.utils import UnavailableDataset -from kedro_viz.models.flowchart import ( +from kedro_viz.models.flowchart.edge import GraphEdge +from kedro_viz.models.flowchart.model_utils import GraphNodeType +from kedro_viz.models.flowchart.named_entities import RegisteredPipeline +from kedro_viz.models.flowchart.nodes import ( DataNode, - GraphEdge, GraphNode, - GraphNodeType, ModularPipelineChild, ModularPipelineNode, ParametersNode, - RegisteredPipeline, TaskNode, TranscodedDataNode, ) @@ -87,8 +92,7 @@ def resolve_dataset_factory_patterns( for dataset_name in datasets: try: catalog._get_dataset(dataset_name, suggest=False) - # pylint: disable=broad-except - except Exception: # pragma: no cover + except Exception: # noqa: BLE001 # pragma: no cover continue def add_catalog(self, catalog: DataCatalog, pipelines: Dict[str, KedroPipeline]): @@ -230,7 +234,6 @@ def add_node( self.tags.add_tags(task_node.tags) return task_node - # pylint: disable=too-many-positional-arguments def add_node_input( self, registered_pipeline_id: str, @@ -392,9 +395,9 @@ def add_parameters_to_task_node( if parameters_node.is_all_parameters(): task_node.parameters = parameters_node.parameter_value else: - task_node.parameters[ - parameters_node.parameter_name - ] = parameters_node.parameter_value + task_node.parameters[parameters_node.parameter_name] = ( + parameters_node.parameter_value + ) def get_default_selected_pipeline(self) -> RegisteredPipeline: """Return the default selected pipeline ID to display on first page load. @@ -466,8 +469,7 @@ def get_sorted_layers_for_registered_pipeline( self.get_node_dependencies_for_registered_pipeline(registered_pipeline_id), ) - # pylint: disable=too-many-locals,too-many-branches - def create_modular_pipelines_tree_for_registered_pipeline( + def create_modular_pipelines_tree_for_registered_pipeline( # noqa: PLR0912 self, registered_pipeline_id: str = DEFAULT_REGISTERED_PIPELINE_ID ) -> Dict[str, ModularPipelineNode]: """Create the modular pipelines tree for a specific registered pipeline. @@ -546,6 +548,8 @@ def create_modular_pipelines_tree_for_registered_pipeline( # so no need to check non modular pipeline nodes. # # We leverage networkx to help with graph traversal + import networkx as nx + digraph = nx.DiGraph() for edge in edges: digraph.add_edge(edge.source, edge.target) diff --git a/package/kedro_viz/data_access/repositories/__init__.py b/package/kedro_viz/data_access/repositories/__init__.py index d1210cb981..6c0d3842c6 100644 --- a/package/kedro_viz/data_access/repositories/__init__.py +++ b/package/kedro_viz/data_access/repositories/__init__.py @@ -1,5 +1,6 @@ """`kedro_viz.data_access.repositories` defines repositories to centralise access to application data.""" + from .catalog import CatalogRepository from .graph import GraphEdgesRepository, GraphNodesRepository from .modular_pipelines import ModularPipelinesRepository diff --git a/package/kedro_viz/data_access/repositories/catalog.py b/package/kedro_viz/data_access/repositories/catalog.py index 38d9a6772d..d136c498e8 100644 --- a/package/kedro_viz/data_access/repositories/catalog.py +++ b/package/kedro_viz/data_access/repositories/catalog.py @@ -1,7 +1,6 @@ """`kedro_viz.data_access.repositories.catalog` defines interface to centralise access to Kedro data catalog.""" -# pylint: disable=missing-class-docstring,missing-function-docstring,protected-access import logging from typing import TYPE_CHECKING, Dict, Optional @@ -52,8 +51,7 @@ def _validate_layers_for_transcoding(self, dataset_name, layer): ) @property - def layers_mapping(self): - # pylint: disable=too-many-branches + def layers_mapping(self): # noqa: PLR0912 """Return layer mapping: dataset_name -> layer it belongs to in the catalog From kedro-datasets 1.3.0 onwards, the 'layers' attribute is defined inside the 'metadata' under 'kedro-viz' plugin. @@ -83,8 +81,7 @@ def layers_mapping(self): # Temporary try/except block so the Kedro develop branch can work with Viz. try: datasets = self._catalog._data_sets - # pylint: disable=broad-exception-caught - except Exception: # pragma: no cover + except Exception: # noqa: BLE001 # pragma: no cover datasets = self._catalog._datasets # Support for Kedro 0.18.x diff --git a/package/kedro_viz/data_access/repositories/graph.py b/package/kedro_viz/data_access/repositories/graph.py index 90f734ec1d..bea6095bc9 100644 --- a/package/kedro_viz/data_access/repositories/graph.py +++ b/package/kedro_viz/data_access/repositories/graph.py @@ -1,9 +1,10 @@ """`kedro_viz.data_access.repositories.graph` defines interface to centralise access to graph objects.""" -# pylint: disable=missing-class-docstring,missing-function-docstring + from typing import Dict, Generator, List, Optional, Set -from kedro_viz.models.flowchart import GraphEdge, GraphNode +from kedro_viz.models.flowchart.edge import GraphEdge +from kedro_viz.models.flowchart.nodes import GraphNode class GraphNodesRepository: diff --git a/package/kedro_viz/data_access/repositories/modular_pipelines.py b/package/kedro_viz/data_access/repositories/modular_pipelines.py index 25b7645ff4..dc51df7f80 100644 --- a/package/kedro_viz/data_access/repositories/modular_pipelines.py +++ b/package/kedro_viz/data_access/repositories/modular_pipelines.py @@ -1,7 +1,6 @@ """`kedro_viz.data_access.repositories.modular_pipelines` defines repository to centralise access for modular pipelines data.""" - from collections import defaultdict from typing import Dict, List, Set, Tuple, Union @@ -9,9 +8,9 @@ from kedro.pipeline.node import Node as KedroNode from kedro_viz.constants import ROOT_MODULAR_PIPELINE_ID -from kedro_viz.models.flowchart import ( +from kedro_viz.models.flowchart.model_utils import GraphNodeType +from kedro_viz.models.flowchart.nodes import ( GraphNode, - GraphNodeType, ModularPipelineChild, ModularPipelineNode, ) diff --git a/package/kedro_viz/data_access/repositories/registered_pipelines.py b/package/kedro_viz/data_access/repositories/registered_pipelines.py index 16cdd98adf..1309548fac 100644 --- a/package/kedro_viz/data_access/repositories/registered_pipelines.py +++ b/package/kedro_viz/data_access/repositories/registered_pipelines.py @@ -1,10 +1,10 @@ """`kedro_viz.data_access.repositories.registered_pipelines` defines repository to centralise access to registered pipelines data.""" -# pylint: disable=missing-class-docstring,missing-function-docstring + from collections import OrderedDict, defaultdict from typing import Dict, List, Optional, Set -from kedro_viz.models.flowchart import RegisteredPipeline +from kedro_viz.models.flowchart.named_entities import RegisteredPipeline class RegisteredPipelinesRepository: diff --git a/package/kedro_viz/data_access/repositories/runs.py b/package/kedro_viz/data_access/repositories/runs.py index 453cb244c6..c2e5b76282 100644 --- a/package/kedro_viz/data_access/repositories/runs.py +++ b/package/kedro_viz/data_access/repositories/runs.py @@ -1,6 +1,6 @@ """`kedro_viz.data_access.repositories.runs` defines repository to centralise access to runs data from the session store.""" -# pylint: disable=missing-class-docstring,missing-function-docstring + import logging from functools import wraps from typing import Callable, Dict, Iterable, List, Optional @@ -19,7 +19,7 @@ def check_db_session(method: Callable) -> Callable: @wraps(method) def func(self: "RunsRepository", *method_args, **method_kwargs): - if not self._db_session_class: # pylint: disable=protected-access + if not self._db_session_class: return None return method(self, *method_args, **method_kwargs) diff --git a/package/kedro_viz/data_access/repositories/tags.py b/package/kedro_viz/data_access/repositories/tags.py index eae5c68bb0..a7bd33e31f 100644 --- a/package/kedro_viz/data_access/repositories/tags.py +++ b/package/kedro_viz/data_access/repositories/tags.py @@ -1,9 +1,9 @@ """`kedro_viz.data_access.repositories.tags` defines repository to centralise access to tags data.""" -# pylint: disable=missing-class-docstring,missing-function-docstring + from typing import Iterable, List, Set -from kedro_viz.models.flowchart import Tag +from kedro_viz.models.flowchart.named_entities import Tag class TagsRepository: diff --git a/package/kedro_viz/data_access/repositories/tracking_datasets.py b/package/kedro_viz/data_access/repositories/tracking_datasets.py index d8d06cb9a0..911bc439a7 100644 --- a/package/kedro_viz/data_access/repositories/tracking_datasets.py +++ b/package/kedro_viz/data_access/repositories/tracking_datasets.py @@ -1,6 +1,6 @@ """`kedro_viz.data_access.repositories.tracking_datasets` defines an interface to centralise access to datasets used in experiment tracking.""" -# pylint: disable=missing-class-docstring,missing-function-docstring,protected-access + from collections import defaultdict from typing import TYPE_CHECKING, Dict, List @@ -17,7 +17,7 @@ from kedro.io import AbstractVersionedDataset except ImportError: # older versions - from kedro.io import ( # type: ignore # isort:skip + from kedro.io import ( # type: ignore AbstractVersionedDataSet as AbstractVersionedDataset, ) diff --git a/package/kedro_viz/integrations/deployment/azure_deployer.py b/package/kedro_viz/integrations/deployment/azure_deployer.py index a147902545..ad7130e5c8 100644 --- a/package/kedro_viz/integrations/deployment/azure_deployer.py +++ b/package/kedro_viz/integrations/deployment/azure_deployer.py @@ -1,5 +1,6 @@ """`kedro_viz.integrations.deployment.azure_deployer` defines deployment class for Azure Blob Storage""" + import glob import logging import mimetypes diff --git a/package/kedro_viz/integrations/deployment/base_deployer.py b/package/kedro_viz/integrations/deployment/base_deployer.py index 35b7fc1818..d0f0b2a7bf 100644 --- a/package/kedro_viz/integrations/deployment/base_deployer.py +++ b/package/kedro_viz/integrations/deployment/base_deployer.py @@ -12,7 +12,7 @@ from packaging.version import parse from kedro_viz import __version__ -from kedro_viz.api.rest.responses import save_api_responses_to_fs +from kedro_viz.api.rest.responses.save_responses import save_api_responses_to_fs from kedro_viz.integrations.kedro import telemetry as kedro_telemetry _HTML_DIR = Path(__file__).parent.parent.parent.absolute() / "html" diff --git a/package/kedro_viz/integrations/deployment/gcp_deployer.py b/package/kedro_viz/integrations/deployment/gcp_deployer.py index c02010b24f..3e9a6fae09 100644 --- a/package/kedro_viz/integrations/deployment/gcp_deployer.py +++ b/package/kedro_viz/integrations/deployment/gcp_deployer.py @@ -1,5 +1,6 @@ """`kedro_viz.integrations.deployment.gcp_deployer` defines deployment class for Google Cloud Storage Bucket""" + import glob import logging import mimetypes diff --git a/package/kedro_viz/integrations/kedro/abstract_dataset_lite.py b/package/kedro_viz/integrations/kedro/abstract_dataset_lite.py index 582130de00..f7317f4d18 100644 --- a/package/kedro_viz/integrations/kedro/abstract_dataset_lite.py +++ b/package/kedro_viz/integrations/kedro/abstract_dataset_lite.py @@ -5,7 +5,20 @@ import logging from typing import Any, Optional -from kedro.io.core import AbstractDataset, DatasetError +try: + # kedro 0.18.11 onwards + from kedro.io.core import DatasetError +except ImportError: # pragma: no cover + # older versions + from kedro.io.core import DataSetError as DatasetError # type: ignore + +try: + # kedro 0.18.12 onwards + from kedro.io.core import AbstractDataset +except ImportError: # pragma: no cover + # older versions + from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore + from kedro_viz.integrations.utils import UnavailableDataset diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index aabc5b73a5..6232270368 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -3,8 +3,6 @@ load data from projects created in a range of Kedro versions. """ -# pylint: disable=protected-access - import json import logging import sys @@ -46,7 +44,7 @@ def _get_dataset_stats(project_path: Path) -> Dict: stats = json.load(stats_file) return stats - except Exception as exc: # pylint: disable=broad-exception-caught + except Exception as exc: # noqa: BLE001 logger.warning( "Unable to get dataset statistics from project path %s : %s", project_path, @@ -95,7 +93,14 @@ def _load_data_helper( # patch the AbstractDataset class for a custom # implementation to handle kedro.io.core.DatasetError if is_lite: - with patch("kedro.io.data_catalog.AbstractDataset", AbstractDatasetLite): + # kedro 0.18.12 onwards + if hasattr(sys.modules["kedro.io.data_catalog"], "AbstractDataset"): + abstract_ds_patch_target = "kedro.io.data_catalog.AbstractDataset" + else: # pragma: no cover + # older versions + abstract_ds_patch_target = "kedro.io.data_catalog.AbstractDataSet" + + with patch(abstract_ds_patch_target, AbstractDatasetLite): catalog = context.catalog else: catalog = context.catalog @@ -108,7 +113,6 @@ def _load_data_helper( return catalog, pipelines_dict, session_store, stats_dict -# pylint: disable=too-many-positional-arguments def load_data( project_path: Path, env: Optional[str] = None, diff --git a/package/kedro_viz/integrations/kedro/hooks.py b/package/kedro_viz/integrations/kedro/hooks.py index 97da89319b..3089e61f50 100644 --- a/package/kedro_viz/integrations/kedro/hooks.py +++ b/package/kedro_viz/integrations/kedro/hooks.py @@ -1,4 +1,3 @@ -# pylint: disable=broad-exception-caught, protected-access """`kedro_viz.integrations.kedro.hooks` defines hooks to add additional functionalities for a kedro run.""" @@ -108,7 +107,7 @@ def create_dataset_stats(self, dataset_name: str, data: Any): """ try: - import pandas as pd # pylint: disable=import-outside-toplevel + import pandas as pd stats_dataset_name = self.get_stats_dataset_name(dataset_name) diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index 9fe619fe5c..e3af8b42e6 100755 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -48,8 +48,7 @@ def _is_module_importable(module_name: str) -> bool: except ValueError as val_exc: logger.debug("ValueError in resolving %s : %s", module_name, val_exc) return False - # pylint: disable=broad-except - except Exception as exc: # pragma: no cover + except Exception as exc: # noqa: BLE001 # pragma: no cover logger.debug( "An exception occurred while resolving %s : %s", module_name, exc ) @@ -262,8 +261,7 @@ def parse(self, target_path: Path) -> Union[Dict[str, Set[str]], None]: ) if len(missing_dependencies) > 0: unresolved_imports[str(file_path)] = missing_dependencies - # pylint: disable=broad-except - except Exception as exc: # pragma: no cover + except Exception as exc: # noqa: BLE001 # pragma: no cover logger.error( "An error occurred in LiteParser while mocking dependencies : %s", exc, diff --git a/package/kedro_viz/integrations/kedro/sqlite_store.py b/package/kedro_viz/integrations/kedro/sqlite_store.py index 9b9b9e7309..8ba1a5ac9e 100644 --- a/package/kedro_viz/integrations/kedro/sqlite_store.py +++ b/package/kedro_viz/integrations/kedro/sqlite_store.py @@ -1,8 +1,6 @@ """kedro_viz.intergrations.kedro.sqlite_store is a child of BaseSessionStore which stores sessions data in the SQLite database""" -# pylint: disable=no-member, broad-exception-caught - import getpass import json import logging @@ -81,7 +79,7 @@ def _to_json(self) -> str: for key, value in self.data.items(): if key == "git": try: - import git # pylint: disable=import-outside-toplevel + import git branch = git.Repo(search_parent_directories=True).active_branch value["branch"] = branch.name diff --git a/package/kedro_viz/integrations/kedro/telemetry.py b/package/kedro_viz/integrations/kedro/telemetry.py index 2c57c41536..572da919d8 100644 --- a/package/kedro_viz/integrations/kedro/telemetry.py +++ b/package/kedro_viz/integrations/kedro/telemetry.py @@ -1,5 +1,4 @@ -"""`kedro_viz.integrations.kedro.telemetry` helps integrate Kedro-Viz with Kedro-Telemetry -""" +"""`kedro_viz.integrations.kedro.telemetry` helps integrate Kedro-Viz with Kedro-Telemetry""" from pathlib import Path from typing import Optional @@ -26,12 +25,11 @@ def get_heap_app_id(project_path: Path) -> Optional[str]: return None -# pylint: disable=broad-exception-caught def get_heap_identity() -> Optional[str]: # pragma: no cover """Reads a UUID from a configuration file or generates and saves a new one if not present.""" if not _IS_TELEMETRY_INSTALLED: return None try: return _get_or_create_uuid() - except Exception: # pragma: no cover + except Exception: # noqa: BLE001 # pragma: no cover return None diff --git a/package/kedro_viz/integrations/pypi/__init__.py b/package/kedro_viz/integrations/pypi/__init__.py index 06f97172d9..4383f24751 100644 --- a/package/kedro_viz/integrations/pypi/__init__.py +++ b/package/kedro_viz/integrations/pypi/__init__.py @@ -1,4 +1,5 @@ """`kedro_viz.integrations.pypi` provides an interface to integrate Kedro-Viz with PyPI.""" + import logging from typing import Optional, Union diff --git a/package/kedro_viz/integrations/utils.py b/package/kedro_viz/integrations/utils.py index 1875cd7a85..43c4fe6e30 100644 --- a/package/kedro_viz/integrations/utils.py +++ b/package/kedro_viz/integrations/utils.py @@ -4,7 +4,12 @@ from typing import Any, Union -from kedro.io.core import AbstractDataset +try: + # kedro 0.18.12 onwards + from kedro.io.core import AbstractDataset +except ImportError: # pragma: no cover + # older versions + from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore _EMPTY = object() diff --git a/package/kedro_viz/launchers/cli/build.py b/package/kedro_viz/launchers/cli/build.py index d506266019..6e54639782 100644 --- a/package/kedro_viz/launchers/cli/build.py +++ b/package/kedro_viz/launchers/cli/build.py @@ -1,6 +1,6 @@ """`kedro_viz.launchers.cli.build` provides a cli command to build a Kedro-Viz instance""" -# pylint: disable=import-outside-toplevel + import click from kedro_viz.launchers.cli.main import viz diff --git a/package/kedro_viz/launchers/cli/deploy.py b/package/kedro_viz/launchers/cli/deploy.py index 10bb31870f..87e9157033 100644 --- a/package/kedro_viz/launchers/cli/deploy.py +++ b/package/kedro_viz/launchers/cli/deploy.py @@ -1,10 +1,11 @@ """`kedro_viz.launchers.cli.deploy` provides a cli command to deploy a Kedro-Viz instance on cloud platforms""" -# pylint: disable=import-outside-toplevel + import click from kedro_viz.constants import SHAREABLEVIZ_SUPPORTED_PLATFORMS from kedro_viz.launchers.cli.main import viz +from kedro_viz.launchers.utils import display_cli_message @viz.command(context_settings={"help_option_names": ["-h", "--help"]}) @@ -39,10 +40,7 @@ ) def deploy(platform, endpoint, bucket_name, include_hooks, include_previews): """Deploy and host Kedro Viz on provided platform""" - from kedro_viz.launchers.cli.utils import ( - create_shareableviz_process, - display_cli_message, - ) + from kedro_viz.launchers.cli.utils import create_shareableviz_process if not platform or platform.lower() not in SHAREABLEVIZ_SUPPORTED_PLATFORMS: display_cli_message( diff --git a/package/kedro_viz/launchers/cli/lazy_default_group.py b/package/kedro_viz/launchers/cli/lazy_default_group.py index 861d023221..9e832d2b93 100644 --- a/package/kedro_viz/launchers/cli/lazy_default_group.py +++ b/package/kedro_viz/launchers/cli/lazy_default_group.py @@ -1,7 +1,6 @@ """`kedro_viz.launchers.cli.lazy_default_group` provides a custom mutli-command subclass for a lazy subcommand loader""" -# pylint: disable=import-outside-toplevel from typing import Any, Union import click @@ -30,7 +29,7 @@ def __init__( super().__init__(*args, **kwargs) - def list_commands(self, ctx: click.Context) -> list[str]: + def list_commands(self, ctx: click.Context) -> list[str]: # noqa: ARG002 return sorted(self.lazy_subcommands.keys()) def get_command( # type: ignore[override] diff --git a/package/kedro_viz/launchers/cli/main.py b/package/kedro_viz/launchers/cli/main.py index 0ccb1515e1..9d556ab6dd 100644 --- a/package/kedro_viz/launchers/cli/main.py +++ b/package/kedro_viz/launchers/cli/main.py @@ -6,7 +6,7 @@ @click.group(name="Kedro-Viz") -def viz_cli(): # pylint: disable=missing-function-docstring +def viz_cli(): pass @@ -22,5 +22,5 @@ def viz_cli(): # pylint: disable=missing-function-docstring default_if_no_args=True, ) @click.pass_context -def viz(ctx): # pylint: disable=unused-argument +def viz(ctx): """Visualise a Kedro pipeline using Kedro viz.""" diff --git a/package/kedro_viz/launchers/cli/run.py b/package/kedro_viz/launchers/cli/run.py index 97c9ab3dbc..e7dd08b408 100644 --- a/package/kedro_viz/launchers/cli/run.py +++ b/package/kedro_viz/launchers/cli/run.py @@ -83,7 +83,6 @@ is_flag=True, help="An experimental flag to open Kedro-Viz without Kedro project dependencies", ) -# pylint: disable=import-outside-toplevel, too-many-locals, too-many-positional-arguments def run( host, port, @@ -112,13 +111,13 @@ def run( get_latest_version, is_running_outdated_version, ) - from kedro_viz.launchers.cli.utils import display_cli_message from kedro_viz.launchers.utils import ( _PYPROJECT, _check_viz_up, _find_kedro_project, _start_browser, _wait_for, + display_cli_message, ) from kedro_viz.server import run_server diff --git a/package/kedro_viz/launchers/cli/utils.py b/package/kedro_viz/launchers/cli/utils.py index 290a0461c0..60e7403535 100644 --- a/package/kedro_viz/launchers/cli/utils.py +++ b/package/kedro_viz/launchers/cli/utils.py @@ -1,12 +1,11 @@ """`kedro_viz.launchers.cli.utils` provides utility functions for cli commands.""" -# pylint: disable=import-outside-toplevel + from pathlib import Path from time import sleep from typing import Union -import click - from kedro_viz.constants import VIZ_DEPLOY_TIME_LIMIT +from kedro_viz.launchers.utils import display_cli_message def create_shareableviz_process( @@ -96,25 +95,13 @@ def create_shareableviz_process( "you have write access to the current directory", "red", ) - # pylint: disable=broad-exception-caught - except Exception as exc: # pragma: no cover + except Exception as exc: # noqa: BLE001 # pragma: no cover display_cli_message(f"ERROR: Failed to build/deploy Kedro-Viz : {exc} ", "red") finally: viz_deploy_process.terminate() -def display_cli_message(msg, msg_color=None): - """Displays message for Kedro Viz build and deploy commands""" - click.echo( - click.style( - msg, - fg=msg_color, - ) - ) - - -# pylint: disable=too-many-positional-arguments def _load_and_deploy_viz( platform, is_all_previews_enabled, @@ -144,14 +131,12 @@ def _load_and_deploy_viz( deployer.deploy(is_all_previews_enabled) except ( - # pylint: disable=catching-non-exception (FileNotFoundError, ServiceRequestError) if ServiceRequestError is not None else FileNotFoundError ): # pragma: no cover exception_queue.put(Exception("The specified bucket does not exist")) - # pylint: disable=broad-exception-caught - except Exception as exc: # pragma: no cover + except Exception as exc: # noqa: BLE001 # pragma: no cover exception_queue.put(exc) finally: process_completed.value = 1 diff --git a/package/kedro_viz/launchers/jupyter.py b/package/kedro_viz/launchers/jupyter.py index f51f6ce7eb..22af9fb99a 100644 --- a/package/kedro_viz/launchers/jupyter.py +++ b/package/kedro_viz/launchers/jupyter.py @@ -75,7 +75,7 @@ def dbutils_get(attr): def _display_databricks_html(port: int): # pragma: no cover url = _make_databricks_url(port) - displayHTML = _get_databricks_object("displayHTML") # pylint: disable=invalid-name + displayHTML = _get_databricks_object("displayHTML") if displayHTML is not None: displayHTML(f"""Open Kedro-Viz""") else: @@ -92,9 +92,7 @@ def parse_args(args): # pragma: no cover return arg_dict -def run_viz( # pylint: disable=too-many-locals - args: str = "", local_ns: Dict[str, Any] = None -) -> None: +def run_viz(args: str = "", local_ns: Dict[str, Any] = None) -> None: """ Line magic function to start Kedro Viz with optional arguments. diff --git a/package/kedro_viz/launchers/utils.py b/package/kedro_viz/launchers/utils.py index c4b0076677..5c6bbae9e3 100644 --- a/package/kedro_viz/launchers/utils.py +++ b/package/kedro_viz/launchers/utils.py @@ -7,6 +7,7 @@ from time import sleep, time from typing import Any, Callable, Union +import click import requests logger = logging.getLogger(__name__) @@ -49,7 +50,7 @@ def _wait_for( while time() <= end: try: retval = func(**kwargs) - except Exception as err: # pylint: disable=broad-except + except Exception as err: # noqa: BLE001 if print_error: logger.error(err) else: @@ -103,8 +104,7 @@ def _is_project(project_path: Union[str, Path]) -> bool: try: return "[tool.kedro]" in metadata_file.read_text(encoding="utf-8") - # pylint: disable=broad-exception-caught - except Exception: + except Exception: # noqa: BLE001 return False @@ -114,3 +114,13 @@ def _find_kedro_project(current_dir: Path) -> Any: if _is_project(project_dir): return project_dir return None + + +def display_cli_message(msg, msg_color=None): + """Displays message for Kedro Viz build and deploy commands""" + click.echo( + click.style( + msg, + fg=msg_color, + ) + ) diff --git a/package/kedro_viz/models/experiment_tracking.py b/package/kedro_viz/models/experiment_tracking.py index d662a3fead..516b1d2a16 100644 --- a/package/kedro_viz/models/experiment_tracking.py +++ b/package/kedro_viz/models/experiment_tracking.py @@ -1,6 +1,6 @@ """kedro_viz.models.experiment_tracking` defines data models to represent run data and tracking datasets.""" -# pylint: disable=too-few-public-methods,protected-access,missing-function-docstring + import logging from dataclasses import dataclass, field from enum import Enum @@ -21,7 +21,7 @@ from kedro.io import AbstractVersionedDataset except ImportError: # older versions - from kedro.io import ( # type: ignore # isort:skip + from kedro.io import ( # type: ignore AbstractVersionedDataSet as AbstractVersionedDataset, ) @@ -112,7 +112,7 @@ def load_tracking_data(self, run_id: str): } else: self.runs[run_id] = self.dataset.preview() # type: ignore - except Exception as exc: # pylint: disable=broad-except # pragma: no cover + except Exception as exc: # noqa: BLE001 # pragma: no cover logger.warning( "'%s' with version '%s' could not be loaded. Full exception: %s: %s", self.dataset_name, diff --git a/package/kedro_viz/models/flowchart/__init__.py b/package/kedro_viz/models/flowchart/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/package/kedro_viz/models/flowchart/edge.py b/package/kedro_viz/models/flowchart/edge.py new file mode 100644 index 0000000000..439cafc782 --- /dev/null +++ b/package/kedro_viz/models/flowchart/edge.py @@ -0,0 +1,15 @@ +"""`kedro_viz.models.flowchart.edge` defines data models to represent Kedro edges in a viz graph.""" + +from pydantic import BaseModel + + +class GraphEdge(BaseModel, frozen=True): + """Represent an edge in the graph + + Args: + source (str): The id of the source node. + target (str): The id of the target node. + """ + + source: str + target: str diff --git a/package/kedro_viz/models/flowchart/model_utils.py b/package/kedro_viz/models/flowchart/model_utils.py new file mode 100644 index 0000000000..f12e94b669 --- /dev/null +++ b/package/kedro_viz/models/flowchart/model_utils.py @@ -0,0 +1,45 @@ +"""`kedro_viz.models.flowchart.model_utils` defines utils for Kedro entities in a viz graph.""" + +import logging +from enum import Enum +from types import FunctionType +from typing import Any, Dict, Optional + +logger = logging.getLogger(__name__) + + +def _parse_filepath(dataset_description: Dict[str, Any]) -> Optional[str]: + """ + Extract the file path from a dataset description dictionary. + """ + filepath = dataset_description.get("filepath") or dataset_description.get("path") + return str(filepath) if filepath else None + + +def _extract_wrapped_func(func: FunctionType) -> FunctionType: + """Extract a wrapped decorated function to inspect the source code if available. + Adapted from https://stackoverflow.com/a/43506509/1684058 + """ + if func.__closure__ is None: + return func + closure = (c.cell_contents for c in func.__closure__) + wrapped_func = next((c for c in closure if isinstance(c, FunctionType)), None) + # return the original function if it's not a decorated function + return func if wrapped_func is None else wrapped_func + + +# ============================================================================= +# Shared base classes and enumerations for model components +# ============================================================================= + + +class GraphNodeType(str, Enum): + """Represent all possible node types in the graph representation of a Kedro pipeline. + The type needs to inherit from str as well so FastAPI can serialise it. See: + https://fastapi.tiangolo.com/tutorial/path-params/#working-with-python-enumerations + """ + + TASK = "task" + DATA = "data" + PARAMETERS = "parameters" + MODULAR_PIPELINE = "modularPipeline" # CamelCase for frontend compatibility diff --git a/package/kedro_viz/models/flowchart/named_entities.py b/package/kedro_viz/models/flowchart/named_entities.py new file mode 100644 index 0000000000..65944c0764 --- /dev/null +++ b/package/kedro_viz/models/flowchart/named_entities.py @@ -0,0 +1,41 @@ +"""kedro_viz.models.flowchart.named_entities` defines data models for representing named entities +such as tags and registered pipelines within a Kedro visualization graph.""" + +from typing import Optional + +from pydantic import BaseModel, Field, ValidationInfo, field_validator + + +class NamedEntity(BaseModel): + """Represent a named entity (Tag/Registered Pipeline) in a Kedro project + Args: + id (str): Id of the registered pipeline + + Raises: + AssertionError: If id is not supplied during instantiation + """ + + id: str + name: Optional[str] = Field( + default=None, + validate_default=True, + description="The name of the entity", + ) + + @field_validator("name") + @classmethod + def set_name(cls, _, info: ValidationInfo): + """Ensures that the 'name' field is set to the value of 'id' if 'name' is not provided.""" + assert "id" in info.data + return info.data["id"] + + +class RegisteredPipeline(NamedEntity): + """Represent a registered pipeline in a Kedro project.""" + + +class Tag(NamedEntity): + """Represent a tag in a Kedro project.""" + + def __hash__(self) -> int: + return hash(self.id) diff --git a/package/kedro_viz/models/flowchart/node_metadata.py b/package/kedro_viz/models/flowchart/node_metadata.py new file mode 100644 index 0000000000..20940a9b3a --- /dev/null +++ b/package/kedro_viz/models/flowchart/node_metadata.py @@ -0,0 +1,406 @@ +""" +`kedro_viz.models.flowchart.node_metadata` defines data models to represent +Kedro metadata in a visualization graph. +""" + +import inspect +import logging +from abc import ABC +from pathlib import Path +from typing import ClassVar, Dict, List, Optional, Union, cast + +from kedro.pipeline.node import Node as KedroNode +from pydantic import BaseModel, Field, field_validator, model_validator + +try: + # kedro 0.18.12 onwards + from kedro.io.core import AbstractDataset +except ImportError: # pragma: no cover + # older versions + from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore + +from kedro_viz.models.utils import get_dataset_type + +from .model_utils import _extract_wrapped_func, _parse_filepath +from .nodes import DataNode, ParametersNode, TaskNode, TranscodedDataNode + +logger = logging.getLogger(__name__) + + +class GraphNodeMetadata(BaseModel, ABC): + """Represent a graph node's metadata.""" + + +class TaskNodeMetadata(GraphNodeMetadata): + """Represent the metadata of a TaskNode. + + Args: + task_node (TaskNode): Task node to which this metadata belongs to. + + Raises: + AssertionError: If task_node is not supplied during instantiation. + """ + + task_node: TaskNode = Field(..., exclude=True) + + code: Optional[str] = Field( + default=None, + validate_default=True, + description="Source code of the node's function", + ) + + filepath: Optional[str] = Field( + default=None, + validate_default=True, + description="Path to the file where the node is defined", + ) + + parameters: Optional[Dict] = Field( + default=None, + validate_default=True, + description="The parameters of the node, if available", + ) + run_command: Optional[str] = Field( + default=None, + validate_default=True, + description="The command to run the pipeline to this node", + ) + + inputs: Optional[List[str]] = Field( + default=None, validate_default=True, description="The inputs to the TaskNode" + ) + outputs: Optional[List[str]] = Field( + default=None, validate_default=True, description="The outputs from the TaskNode" + ) + + @model_validator(mode="before") + @classmethod + def check_task_node_exists(cls, values): + assert "task_node" in values + cls.set_task_and_kedro_node(values["task_node"]) + return values + + @classmethod + def set_task_and_kedro_node(cls, task_node): + cls.task_node = task_node + cls.kedro_node = cast(KedroNode, task_node.kedro_obj) + + @field_validator("code") + @classmethod + def set_code(cls, code): + # this is required to handle partial, curry functions + if inspect.isfunction(cls.kedro_node.func): + code = inspect.getsource(_extract_wrapped_func(cls.kedro_node.func)) + return code + + return None + + @field_validator("filepath") + @classmethod + def set_filepath(cls, filepath): + # this is required to handle partial, curry functions + if inspect.isfunction(cls.kedro_node.func): + code_full_path = ( + Path(inspect.getfile(cls.kedro_node.func)).expanduser().resolve() + ) + + try: + filepath = code_full_path.relative_to(Path.cwd().parent) + except ValueError: # pragma: no cover + # if the filepath can't be resolved relative to the current directory, + # e.g. either during tests or during launching development server + # outside of a Kedro project, simply return the fullpath to the file. + filepath = code_full_path + + return str(filepath) + + return None + + @field_validator("parameters") + @classmethod + def set_parameters(cls, _): + return cls.task_node.parameters + + @field_validator("run_command") + @classmethod + def set_run_command(cls, _): + return f"kedro run --to-nodes='{cls.kedro_node.name}'" + + @field_validator("inputs") + @classmethod + def set_inputs(cls, _): + return cls.kedro_node.inputs + + @field_validator("outputs") + @classmethod + def set_outputs(cls, _): + return cls.kedro_node.outputs + + +class DataNodeMetadata(GraphNodeMetadata): + """Represent the metadata of a DataNode. + + Args: + data_node (DataNode): Data node to which this metadata belongs to. + + Attributes: + is_all_previews_enabled (bool): Class-level attribute to determine if + previews are enabled for all nodes. This can be configured via CLI + or UI to manage the preview settings. + + Raises: + AssertionError: If data_node is not supplied during instantiation. + """ + + data_node: DataNode = Field(..., exclude=True) + + is_all_previews_enabled: ClassVar[bool] = True + + type: Optional[str] = Field( + default=None, validate_default=True, description="The type of the data node" + ) + + filepath: Optional[str] = Field( + default=None, + validate_default=True, + description="The path to the actual data file for the underlying dataset", + ) + + run_command: Optional[str] = Field( + default=None, + validate_default=True, + description="Command to run the pipeline to this node", + ) + + preview: Optional[Union[Dict, str]] = Field( + default=None, + validate_default=True, + description="Preview data for the underlying datanode", + ) + + preview_type: Optional[str] = Field( + default=None, + validate_default=True, + description="Type of preview for the dataset", + ) + + stats: Optional[Dict] = Field( + default=None, + validate_default=True, + description="The statistics for the data node.", + ) + + @model_validator(mode="before") + @classmethod + def check_data_node_exists(cls, values): + assert "data_node" in values + cls.set_data_node_and_dataset(values["data_node"]) + return values + + @classmethod + def set_is_all_previews_enabled(cls, value: bool): + cls.is_all_previews_enabled = value + + @classmethod + def set_data_node_and_dataset(cls, data_node): + cls.data_node = data_node + cls.dataset = cast(AbstractDataset, data_node.kedro_obj) + + # dataset.release clears the cache before loading to ensure that this issue + # does not arise: https://github.com/kedro-org/kedro-viz/pull/573. + cls.dataset.release() + + @field_validator("type") + @classmethod + def set_type(cls, _): + return cls.data_node.dataset_type + + @field_validator("filepath") + @classmethod + def set_filepath(cls, _): + dataset_description = cls.dataset._describe() + return _parse_filepath(dataset_description) + + @field_validator("run_command") + @classmethod + def set_run_command(cls, _): + if not cls.data_node.is_free_input: + return f"kedro run --to-outputs={cls.data_node.name}" + return None + + @field_validator("preview") + @classmethod + def set_preview(cls, _): + if ( + not cls.data_node.is_preview_enabled() + or not hasattr(cls.dataset, "preview") + or not cls.is_all_previews_enabled + ): + return None + + try: + preview_args = ( + cls.data_node.get_preview_args() if cls.data_node.viz_metadata else None + ) + if preview_args is None: + return cls.dataset.preview() + return cls.dataset.preview(**preview_args) + + except Exception as exc: # noqa: BLE001 + logger.warning( + "'%s' could not be previewed. Full exception: %s: %s", + cls.data_node.name, + type(exc).__name__, + exc, + ) + return None + + @field_validator("preview_type") + @classmethod + def set_preview_type(cls, _): + if ( + not cls.data_node.is_preview_enabled() + or not hasattr(cls.dataset, "preview") + or not cls.is_all_previews_enabled + ): + return None + + try: + preview_type_annotation = inspect.signature( + cls.dataset.preview + ).return_annotation + # Attempt to get the name attribute, if it exists. + # Otherwise, use str to handle the annotation directly. + preview_type_name = getattr( + preview_type_annotation, "__name__", str(preview_type_annotation) + ) + return preview_type_name + + except Exception as exc: # noqa: BLE001 # pragma: no cover + logger.warning( + "'%s' did not have preview type. Full exception: %s: %s", + cls.data_node.name, + type(exc).__name__, + exc, + ) + return None + + @field_validator("stats") + @classmethod + def set_stats(cls, _): + return cls.data_node.stats + + +class TranscodedDataNodeMetadata(GraphNodeMetadata): + """Represent the metadata of a TranscodedDataNode. + Args: + transcoded_data_node: The transcoded data node to which this metadata belongs. + + Raises: + AssertionError: If `transcoded_data_node` is not supplied during instantiation. + """ + + transcoded_data_node: TranscodedDataNode = Field(..., exclude=True) + + # Only available if the dataset has filepath set. + filepath: Optional[str] = Field( + default=None, + validate_default=True, + description="The path to the actual data file for the underlying dataset", + ) + + run_command: Optional[str] = Field( + default=None, + validate_default=True, + description="Command to run the pipeline to this node", + ) + original_type: Optional[str] = Field( + default=None, + validate_default=True, + description="The dataset type of the underlying transcoded data node original version", + ) + transcoded_types: Optional[List[str]] = Field( + default=None, + validate_default=True, + description="The list of all dataset types for the transcoded versions", + ) + + # Statistics for the underlying data node + stats: Optional[Dict] = Field( + default=None, + validate_default=True, + description="The statistics for the transcoded data node metadata.", + ) + + @model_validator(mode="before") + @classmethod + def check_transcoded_data_node_exists(cls, values): + assert "transcoded_data_node" in values + cls.transcoded_data_node = values["transcoded_data_node"] + return values + + @field_validator("filepath") + @classmethod + def set_filepath(cls, _): + dataset_description = cls.transcoded_data_node.original_version._describe() + return _parse_filepath(dataset_description) + + @field_validator("run_command") + @classmethod + def set_run_command(cls, _): + if not cls.transcoded_data_node.is_free_input: + return f"kedro run --to-outputs={cls.transcoded_data_node.original_name}" + return None + + @field_validator("original_type") + @classmethod + def set_original_type(cls, _): + return get_dataset_type(cls.transcoded_data_node.original_version) + + @field_validator("transcoded_types") + @classmethod + def set_transcoded_types(cls, _): + return [ + get_dataset_type(transcoded_version) + for transcoded_version in cls.transcoded_data_node.transcoded_versions + ] + + @field_validator("stats") + @classmethod + def set_stats(cls, _): + return cls.transcoded_data_node.stats + + +class ParametersNodeMetadata(GraphNodeMetadata): + """Represent the metadata of a ParametersNode. + + Args: + parameters_node (ParametersNode): The underlying parameters node + for the parameters metadata node. + + Raises: + AssertionError: If parameters_node is not supplied during instantiation. + """ + + parameters_node: ParametersNode = Field(..., exclude=True) + parameters: Optional[Dict] = Field( + default=None, + validate_default=True, + description="The parameters dictionary for the parameters metadata node", + ) + + @model_validator(mode="before") + @classmethod + def check_parameters_node_exists(cls, values): + assert "parameters_node" in values + cls.parameters_node = values["parameters_node"] + return values + + @field_validator("parameters") + @classmethod + def set_parameters(cls, _): + if cls.parameters_node.is_single_parameter(): + return { + cls.parameters_node.parameter_name: cls.parameters_node.parameter_value + } + return cls.parameters_node.parameter_value diff --git a/package/kedro_viz/models/flowchart.py b/package/kedro_viz/models/flowchart/nodes.py similarity index 52% rename from package/kedro_viz/models/flowchart.py rename to package/kedro_viz/models/flowchart/nodes.py index 8828650a7e..0289fe1e1e 100644 --- a/package/kedro_viz/models/flowchart.py +++ b/package/kedro_viz/models/flowchart/nodes.py @@ -1,13 +1,8 @@ -"""`kedro_viz.models.flowchart` defines data models to represent Kedro entities in a viz graph.""" +"""`kedro_viz.models.flowchart.nodes` defines models to represent Kedro nodes in a viz graph.""" -# pylint: disable=protected-access, missing-function-docstring -import abc -import inspect import logging -from enum import Enum -from pathlib import Path -from types import FunctionType -from typing import Any, ClassVar, Dict, List, Optional, Set, Union, cast +from abc import ABC +from typing import Any, Dict, Optional, Set, Union, cast from fastapi.encoders import jsonable_encoder from kedro.pipeline.node import Node as KedroNode @@ -20,9 +15,6 @@ model_validator, ) -from kedro_viz.models.utils import get_dataset_type -from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding - try: # kedro 0.18.11 onwards from kedro.io.core import DatasetError @@ -36,75 +28,15 @@ # older versions from kedro.io.core import AbstractDataSet as AbstractDataset # type: ignore -logger = logging.getLogger(__name__) - - -def _parse_filepath(dataset_description: Dict[str, Any]) -> Optional[str]: - filepath = dataset_description.get("filepath") or dataset_description.get("path") - return str(filepath) if filepath else None - - -class NamedEntity(BaseModel): - """Represent a named entity (Tag/Registered Pipeline) in a Kedro project - Args: - id (str): Id of the registered pipeline - - Raises: - AssertionError: If id is not supplied during instantiation - """ - - id: str - name: Optional[str] = Field( - default=None, - validate_default=True, - description="The name of the registered pipeline", - ) - - @field_validator("name") - @classmethod - def set_name(cls, _, info: ValidationInfo): - assert "id" in info.data - return info.data["id"] - - -class RegisteredPipeline(NamedEntity): - """Represent a registered pipeline in a Kedro project""" - - -class GraphNodeType(str, Enum): - """Represent all possible node types in the graph representation of a Kedro pipeline. - The type needs to inherit from str as well so FastAPI can serialise it. See: - https://fastapi.tiangolo.com/tutorial/path-params/#working-with-python-enumerations - """ - - TASK = "task" - DATA = "data" - PARAMETERS = "parameters" - MODULAR_PIPELINE = ( - "modularPipeline" # camelCase so it can be referred directly to in the frontend - ) - - -class ModularPipelineChild(BaseModel, frozen=True): - """Represent a child of a modular pipeline. - - Args: - id (str): Id of the modular pipeline child - type (GraphNodeType): Type of modular pipeline child - """ - - id: str - type: GraphNodeType - +from kedro_viz.models.utils import get_dataset_type +from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding -class Tag(NamedEntity): - """Represent a tag in a Kedro project""" +from .model_utils import GraphNodeType - def __hash__(self) -> int: - return hash(self.id) +logger = logging.getLogger(__name__) -class GraphNode(BaseModel, abc.ABC): +class GraphNode(BaseModel, ABC): """Represent a node in the graph representation of a Kedro pipeline. All node models except the metadata node models should inherit from this class @@ -165,7 +97,6 @@ def create_task_node( ) @classmethod - # pylint: disable=too-many-positional-arguments def create_data_node( cls, dataset_id: str, @@ -218,7 +149,6 @@ def create_data_node( ) @classmethod - # pylint: disable=too-many-positional-arguments def create_parameters_node( cls, dataset_id: str, @@ -284,8 +214,16 @@ def has_metadata(self) -> bool: return self.kedro_obj is not None -class GraphNodeMetadata(BaseModel, abc.ABC): - """Represent a graph node's metadata""" +class ModularPipelineChild(BaseModel, frozen=True): + """Represent a child of a modular pipeline. + + Args: + id (str): Id of the modular pipeline child + type (GraphNodeType): Type of modular pipeline child + """ + + id: str + type: GraphNodeType class TaskNode(GraphNode): @@ -320,155 +258,6 @@ def set_namespace(cls, _, info: ValidationInfo): return info.data["kedro_obj"].namespace -def _extract_wrapped_func(func: FunctionType) -> FunctionType: - """Extract a wrapped decorated function to inspect the source code if available. - Adapted from https://stackoverflow.com/a/43506509/1684058 - """ - if func.__closure__ is None: - return func - closure = (c.cell_contents for c in func.__closure__) - wrapped_func = next((c for c in closure if isinstance(c, FunctionType)), None) - # return the original function if it's not a decorated function - return func if wrapped_func is None else wrapped_func - - -class ModularPipelineNode(GraphNode): - """Represent a modular pipeline node in the graph""" - - # A modular pipeline doesn't belong to any other modular pipeline, - # in the same sense as other types of GraphNode do. - # Therefore it's default to None. - # The parent-child relationship between modular pipeline themselves is modelled explicitly. - modular_pipelines: Optional[Set[str]] = None - - # Model the modular pipelines tree using a child-references representation of a tree. - # See: https://docs.mongodb.com/manual/tutorial/model-tree-structures-with-child-references/ - # for more details. - # For example, if a node namespace is "uk.data_science", - # the "uk" modular pipeline node's children are ["uk.data_science"] - children: Set[ModularPipelineChild] = Field( - set(), description="The children for the modular pipeline node" - ) - - inputs: Set[str] = Field( - set(), description="The input datasets to the modular pipeline node" - ) - - outputs: Set[str] = Field( - set(), description="The output datasets from the modular pipeline node" - ) - - # The type for Modular Pipeline Node - type: str = GraphNodeType.MODULAR_PIPELINE.value - - -class TaskNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a TaskNode - - Args: - task_node (TaskNode): Task node to which this metadata belongs to. - - Raises: - AssertionError: If task_node is not supplied during instantiation - """ - - task_node: TaskNode = Field(..., exclude=True) - - code: Optional[str] = Field( - default=None, - validate_default=True, - description="Source code of the node's function", - ) - - filepath: Optional[str] = Field( - default=None, - validate_default=True, - description="Path to the file where the node is defined", - ) - - parameters: Optional[Dict] = Field( - default=None, - validate_default=True, - description="The parameters of the node, if available", - ) - run_command: Optional[str] = Field( - default=None, - validate_default=True, - description="The command to run the pipeline to this node", - ) - - inputs: Optional[List[str]] = Field( - default=None, validate_default=True, description="The inputs to the TaskNode" - ) - outputs: Optional[List[str]] = Field( - default=None, validate_default=True, description="The outputs from the TaskNode" - ) - - @model_validator(mode="before") - @classmethod - def check_task_node_exists(cls, values): - assert "task_node" in values - cls.set_task_and_kedro_node(values["task_node"]) - return values - - @classmethod - def set_task_and_kedro_node(cls, task_node): - cls.task_node = task_node - cls.kedro_node = cast(KedroNode, task_node.kedro_obj) - - @field_validator("code") - @classmethod - def set_code(cls, code): - # this is required to handle partial, curry functions - if inspect.isfunction(cls.kedro_node.func): - code = inspect.getsource(_extract_wrapped_func(cls.kedro_node.func)) - return code - - return None - - @field_validator("filepath") - @classmethod - def set_filepath(cls, filepath): - # this is required to handle partial, curry functions - if inspect.isfunction(cls.kedro_node.func): - code_full_path = ( - Path(inspect.getfile(cls.kedro_node.func)).expanduser().resolve() - ) - - try: - filepath = code_full_path.relative_to(Path.cwd().parent) - except ValueError: # pragma: no cover - # if the filepath can't be resolved relative to the current directory, - # e.g. either during tests or during launching development server - # outside of a Kedro project, simply return the fullpath to the file. - filepath = code_full_path - - return str(filepath) - - return None - - @field_validator("parameters") - @classmethod - def set_parameters(cls, _): - return cls.task_node.parameters - - @field_validator("run_command") - @classmethod - def set_run_command(cls, _): - return f"kedro run --to-nodes='{cls.kedro_node.name}'" - - @field_validator("inputs") - @classmethod - def set_inputs(cls, _): - return cls.kedro_node.inputs - - @field_validator("outputs") - @classmethod - def set_outputs(cls, _): - return cls.kedro_node.outputs - - -# pylint: disable=missing-function-docstring class DataNode(GraphNode): """Represent a graph node of type data @@ -584,241 +373,6 @@ def has_metadata(self) -> bool: return True -class DataNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a DataNode - - Args: - data_node (DataNode): Data node to which this metadata belongs to. - - Attributes: - is_all_previews_enabled (bool): Class-level attribute to determine if - previews are enabled for all nodes. This can be configured via CLI - or UI to manage the preview settings. - - Raises: - AssertionError: If data_node is not supplied during instantiation - """ - - data_node: DataNode = Field(..., exclude=True) - - is_all_previews_enabled: ClassVar[bool] = True - - type: Optional[str] = Field( - default=None, validate_default=True, description="The type of the data node" - ) - - filepath: Optional[str] = Field( - default=None, - validate_default=True, - description="The path to the actual data file for the underlying dataset", - ) - - run_command: Optional[str] = Field( - default=None, - validate_default=True, - description="Command to run the pipeline to this node", - ) - - preview: Optional[Union[Dict, str]] = Field( - default=None, - validate_default=True, - description="Preview data for the underlying datanode", - ) - - preview_type: Optional[str] = Field( - default=None, - validate_default=True, - description="Type of preview for the dataset", - ) - - stats: Optional[Dict] = Field( - default=None, - validate_default=True, - description="The statistics for the data node.", - ) - - @model_validator(mode="before") - @classmethod - def check_data_node_exists(cls, values): - assert "data_node" in values - cls.set_data_node_and_dataset(values["data_node"]) - return values - - @classmethod - def set_is_all_previews_enabled(cls, value: bool): - cls.is_all_previews_enabled = value - - @classmethod - def set_data_node_and_dataset(cls, data_node): - cls.data_node = data_node - cls.dataset = cast(AbstractDataset, data_node.kedro_obj) - - # dataset.release clears the cache before loading to ensure that this issue - # does not arise: https://github.com/kedro-org/kedro-viz/pull/573. - cls.dataset.release() - - @field_validator("type") - @classmethod - def set_type(cls, _): - return cls.data_node.dataset_type - - @field_validator("filepath") - @classmethod - def set_filepath(cls, _): - dataset_description = cls.dataset._describe() - return _parse_filepath(dataset_description) - - @field_validator("run_command") - @classmethod - def set_run_command(cls, _): - if not cls.data_node.is_free_input: - return f"kedro run --to-outputs={cls.data_node.name}" - return None - - @field_validator("preview") - @classmethod - def set_preview(cls, _): - if ( - not cls.data_node.is_preview_enabled() - or not hasattr(cls.dataset, "preview") - or not cls.is_all_previews_enabled - ): - return None - - try: - preview_args = ( - cls.data_node.get_preview_args() if cls.data_node.viz_metadata else None - ) - if preview_args is None: - return cls.dataset.preview() - return cls.dataset.preview(**preview_args) - - except Exception as exc: # pylint: disable=broad-except - logger.warning( - "'%s' could not be previewed. Full exception: %s: %s", - cls.data_node.name, - type(exc).__name__, - exc, - ) - return None - - @field_validator("preview_type") - @classmethod - def set_preview_type(cls, _): - if ( - not cls.data_node.is_preview_enabled() - or not hasattr(cls.dataset, "preview") - or not cls.is_all_previews_enabled - ): - return None - - try: - preview_type_annotation = inspect.signature( - cls.dataset.preview - ).return_annotation - # Attempt to get the name attribute, if it exists. - # Otherwise, use str to handle the annotation directly. - preview_type_name = getattr( - preview_type_annotation, "__name__", str(preview_type_annotation) - ) - return preview_type_name - - except Exception as exc: # pylint: disable=broad-except # pragma: no cover - logger.warning( - "'%s' did not have preview type. Full exception: %s: %s", - cls.data_node.name, - type(exc).__name__, - exc, - ) - return None - - @field_validator("stats") - @classmethod - def set_stats(cls, _): - return cls.data_node.stats - - -class TranscodedDataNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a TranscodedDataNode - Args: - transcoded_data_node (TranscodedDataNode): The underlying transcoded - data node to which this metadata belongs to. - - Raises: - AssertionError: If transcoded_data_node is not supplied during instantiation - """ - - transcoded_data_node: TranscodedDataNode = Field(..., exclude=True) - - # Only available if the dataset has filepath set. - filepath: Optional[str] = Field( - default=None, - validate_default=True, - description="The path to the actual data file for the underlying dataset", - ) - - run_command: Optional[str] = Field( - default=None, - validate_default=True, - description="Command to run the pipeline to this node", - ) - original_type: Optional[str] = Field( - default=None, - validate_default=True, - description="The dataset type of the underlying transcoded data node original version", - ) - transcoded_types: Optional[List[str]] = Field( - default=None, - validate_default=True, - description="The list of all dataset types for the transcoded versions", - ) - - # Statistics for the underlying data node - stats: Optional[Dict] = Field( - default=None, - validate_default=True, - description="The statistics for the transcoded data node metadata.", - ) - - @model_validator(mode="before") - @classmethod - def check_transcoded_data_node_exists(cls, values): - assert "transcoded_data_node" in values - cls.transcoded_data_node = values["transcoded_data_node"] - return values - - @field_validator("filepath") - @classmethod - def set_filepath(cls, _): - dataset_description = cls.transcoded_data_node.original_version._describe() - return _parse_filepath(dataset_description) - - @field_validator("run_command") - @classmethod - def set_run_command(cls, _): - if not cls.transcoded_data_node.is_free_input: - return f"kedro run --to-outputs={cls.transcoded_data_node.original_name}" - return None - - @field_validator("original_type") - @classmethod - def set_original_type(cls, _): - return get_dataset_type(cls.transcoded_data_node.original_version) - - @field_validator("transcoded_types") - @classmethod - def set_transcoded_types(cls, _): - return [ - get_dataset_type(transcoded_version) - for transcoded_version in cls.transcoded_data_node.transcoded_versions - ] - - @field_validator("stats") - @classmethod - def set_stats(cls, _): - return cls.transcoded_data_node.stats - - class ParametersNode(GraphNode): """Represent a graph node of type parameters Args: @@ -877,8 +431,7 @@ def parameter_value(self) -> Any: "Cannot find parameter `%s` in the catalog.", self.parameter_name ) return None - # pylint: disable=broad-exception-caught - except Exception as exc: # pragma: no cover + except Exception as exc: # noqa: BLE001 # pragma: no cover logger.error( "An error occurred when loading parameter `%s` in the catalog :: %s", self.parameter_name, @@ -887,48 +440,31 @@ def parameter_value(self) -> Any: return None -class ParametersNodeMetadata(GraphNodeMetadata): - """Represent the metadata of a ParametersNode - - Args: - parameters_node (ParametersNode): The underlying parameters node - for the parameters metadata node. +class ModularPipelineNode(GraphNode): + """Represent a modular pipeline node in the graph""" - Raises: - AssertionError: If parameters_node is not supplied during instantiation - """ + # A modular pipeline doesn't belong to any other modular pipeline, + # in the same sense as other types of GraphNode do. + # Therefore, it's default to None. + # The parent-child relationship between modular pipeline themselves is modelled explicitly. + modular_pipelines: Optional[Set[str]] = None - parameters_node: ParametersNode = Field(..., exclude=True) - parameters: Optional[Dict] = Field( - default=None, - validate_default=True, - description="The parameters dictionary for the parameters metadata node", + # Model the modular pipelines tree using a child-references representation of a tree. + # See: https://docs.mongodb.com/manual/tutorial/model-tree-structures-with-child-references/ + # for more details. + # For example, if a node namespace is "uk.data_science", + # the "uk" modular pipeline node's children are ["uk.data_science"] + children: Set[ModularPipelineChild] = Field( + set(), description="The children for the modular pipeline node" ) - @model_validator(mode="before") - @classmethod - def check_parameters_node_exists(cls, values): - assert "parameters_node" in values - cls.parameters_node = values["parameters_node"] - return values - - @field_validator("parameters") - @classmethod - def set_parameters(cls, _): - if cls.parameters_node.is_single_parameter(): - return { - cls.parameters_node.parameter_name: cls.parameters_node.parameter_value - } - return cls.parameters_node.parameter_value - - -class GraphEdge(BaseModel, frozen=True): - """Represent an edge in the graph + inputs: Set[str] = Field( + set(), description="The input datasets to the modular pipeline node" + ) - Args: - source (str): The id of the source node. - target (str): The id of the target node. - """ + outputs: Set[str] = Field( + set(), description="The output datasets from the modular pipeline node" + ) - source: str - target: str + # The type for Modular Pipeline Node + type: str = GraphNodeType.MODULAR_PIPELINE.value diff --git a/package/kedro_viz/models/metadata.py b/package/kedro_viz/models/metadata.py index debe1f04e3..6e73c104f1 100644 --- a/package/kedro_viz/models/metadata.py +++ b/package/kedro_viz/models/metadata.py @@ -1,6 +1,5 @@ """`kedro_viz.models.metadata` defines metadata for Kedro-Viz application.""" -# pylint: disable=missing-function-docstring from typing import ClassVar, List from pydantic import BaseModel, field_validator diff --git a/package/kedro_viz/server.py b/package/kedro_viz/server.py index 37d31f2f19..251bb32b6b 100644 --- a/package/kedro_viz/server.py +++ b/package/kedro_viz/server.py @@ -8,13 +8,12 @@ from kedro.io import DataCatalog from kedro.pipeline import Pipeline -from kedro_viz.api.rest.responses import save_api_responses_to_fs from kedro_viz.constants import DEFAULT_HOST, DEFAULT_PORT from kedro_viz.data_access import DataAccessManager, data_access_manager from kedro_viz.database import make_db_session_factory from kedro_viz.integrations.kedro import data_loader as kedro_data_loader from kedro_viz.integrations.kedro.sqlite_store import SQLiteStore -from kedro_viz.launchers.utils import _check_viz_up, _wait_for +from kedro_viz.launchers.utils import _check_viz_up, _wait_for, display_cli_message DEV_PORT = 4142 @@ -25,7 +24,7 @@ def populate_data( pipelines: Dict[str, Pipeline], session_store: BaseSessionStore, stats_dict: Dict, -): # pylint: disable=redefined-outer-name +): """Populate data repositories. Should be called once on application start if creating an api app from project. """ @@ -44,7 +43,6 @@ def populate_data( data_access_manager.add_pipelines(pipelines) -# pylint: disable=too-many-positional-arguments def load_and_populate_data( path: Path, env: Optional[str] = None, @@ -71,7 +69,6 @@ def load_and_populate_data( populate_data(data_access_manager, catalog, pipelines, session_store, stats_dict) -# pylint: disable=too-many-positional-arguments, too-many-locals def run_server( host: str = DEFAULT_HOST, port: int = DEFAULT_PORT, @@ -85,7 +82,7 @@ def run_server( package_name: Optional[str] = None, extra_params: Optional[Dict[str, Any]] = None, is_lite: bool = False, -): # pylint: disable=redefined-outer-name +): """Run a uvicorn server with a FastAPI app that either launches API response data from a file or from reading data from a real Kedro project. @@ -112,10 +109,10 @@ def run_server( # Importing below dependencies inside `run_server` to avoid ImportError # when calling `load_and_populate_data` from VSCode - import fsspec # pylint: disable=C0415 - import uvicorn # pylint: disable=C0415 + import fsspec + import uvicorn - from kedro_viz.api import apps # pylint: disable=C0415 + from kedro_viz.api import apps path = Path(project_path) if project_path else Path.cwd() @@ -126,6 +123,10 @@ def run_server( # [TODO: As we can do this with `kedro viz build`, # we need to shift this feature outside of kedro viz run] if save_file: + from kedro_viz.api.rest.responses.save_responses import ( + save_api_responses_to_fs, + ) + save_api_responses_to_fs(save_file, fsspec.filesystem("file"), True) app = apps.create_api_app_from_project(path, autoreload) @@ -172,13 +173,14 @@ def run_server( target=run_process, daemon=False, kwargs={**run_process_kwargs} ) - print("Starting Kedro Viz ...") + display_cli_message("Starting Kedro Viz ...", "green") viz_process.start() _wait_for(func=_check_viz_up, host=args.host, port=args.port) - print( + display_cli_message( "Kedro Viz started successfully. \n\n" - f"\u2728 Kedro Viz is running at \n http://{args.host}:{args.port}/" + f"\u2728 Kedro Viz is running at \n http://{args.host}:{args.port}/", + "green", ) diff --git a/package/kedro_viz/services/__init__.py b/package/kedro_viz/services/__init__.py index 81991d1b4d..b12ebc2051 100644 --- a/package/kedro_viz/services/__init__.py +++ b/package/kedro_viz/services/__init__.py @@ -1,2 +1,3 @@ """`kedro_viz.services` provides an additional business logic layer for the API.""" + from . import layers as layers_services diff --git a/package/kedro_viz/services/layers.py b/package/kedro_viz/services/layers.py index 4eab727e80..7cba369aa1 100644 --- a/package/kedro_viz/services/layers.py +++ b/package/kedro_viz/services/layers.py @@ -1,10 +1,11 @@ """`kedro_viz.services.layers` defines layers-related logic.""" + import logging from collections import defaultdict from graphlib import CycleError, TopologicalSorter from typing import Dict, List, Set -from kedro_viz.models.flowchart import GraphNode +from kedro_viz.models.flowchart.nodes import GraphNode logger = logging.getLogger(__name__) diff --git a/package/ruff.toml b/package/ruff.toml new file mode 100644 index 0000000000..c911fbf7ec --- /dev/null +++ b/package/ruff.toml @@ -0,0 +1,5 @@ +extend = "../ruff.toml" + +[lint.isort] +known-first-party = ["kedro_viz"] +known-third-party = ["kedro"] diff --git a/package/test_requirements.txt b/package/test_requirements.txt index 14741ab0ea..3260a24806 100644 --- a/package/test_requirements.txt +++ b/package/test_requirements.txt @@ -5,20 +5,16 @@ kedro-datasets[pandas.ParquetDataset, pandas.CSVDataset, pandas.ExcelDataset, pl kedro-telemetry>=0.1.1 # for testing telemetry integration bandit~=1.7 behave~=1.2 -black~=23.3 boto3~=1.34 -flake8~=7.1 -isort~=5.11 matplotlib~=3.9 mypy~=1.11 moto~=5.0.9 psutil==5.9.6 # same as Kedro for now -pylint~=3.2 -pylint-pydantic>=0.3.0 pytest~=8.3 pytest-asyncio~=0.21 pytest-mock~=3.14 pytest-cov~=5.0 +ruff==0.7.0 sqlalchemy-stubs~=0.4 strawberry-graphql[cli]>=0.99.0, <1.0 trufflehog~=2.2 diff --git a/package/tests/conftest.py b/package/tests/conftest.py index d63fca7fd3..5c1a300abb 100644 --- a/package/tests/conftest.py +++ b/package/tests/conftest.py @@ -21,7 +21,8 @@ ) from kedro_viz.integrations.kedro.hooks import DatasetStatsHook from kedro_viz.integrations.kedro.sqlite_store import SQLiteStore -from kedro_viz.models.flowchart import DataNodeMetadata, GraphNode +from kedro_viz.models.flowchart.node_metadata import DataNodeMetadata +from kedro_viz.models.flowchart.nodes import GraphNode from kedro_viz.server import populate_data @@ -60,10 +61,10 @@ def example_stats_dict(): @pytest.fixture def example_pipelines(): def process_data(raw_data, train_test_split): - ... + pass def train_model(model_inputs, parameters): - ... + pass data_processing_pipeline = pipeline( [ @@ -420,10 +421,10 @@ def example_catalog(): @pytest.fixture def example_transcoded_pipelines(): def process_data(raw_data, train_test_split): - ... + pass def train_model(model_inputs, parameters): - ... + pass data_processing_pipeline = pipeline( [ @@ -484,7 +485,12 @@ def example_api( example_stats_dict, ) mocker.patch( - "kedro_viz.api.rest.responses.data_access_manager", new=data_access_manager + "kedro_viz.api.rest.responses.pipelines.data_access_manager", + new=data_access_manager, + ) + mocker.patch( + "kedro_viz.api.rest.responses.nodes.data_access_manager", + new=data_access_manager, ) yield api @@ -503,7 +509,12 @@ def example_api_no_default_pipeline( data_access_manager, example_catalog, example_pipelines, session_store, {} ) mocker.patch( - "kedro_viz.api.rest.responses.data_access_manager", new=data_access_manager + "kedro_viz.api.rest.responses.pipelines.data_access_manager", + new=data_access_manager, + ) + mocker.patch( + "kedro_viz.api.rest.responses.nodes.data_access_manager", + new=data_access_manager, ) yield api @@ -533,7 +544,12 @@ def example_api_for_edge_case_pipelines( {}, ) mocker.patch( - "kedro_viz.api.rest.responses.data_access_manager", new=data_access_manager + "kedro_viz.api.rest.responses.pipelines.data_access_manager", + new=data_access_manager, + ) + mocker.patch( + "kedro_viz.api.rest.responses.nodes.data_access_manager", + new=data_access_manager, ) yield api @@ -555,7 +571,12 @@ def example_transcoded_api( {}, ) mocker.patch( - "kedro_viz.api.rest.responses.data_access_manager", new=data_access_manager + "kedro_viz.api.rest.responses.pipelines.data_access_manager", + new=data_access_manager, + ) + mocker.patch( + "kedro_viz.api.rest.responses.nodes.data_access_manager", + new=data_access_manager, ) yield api diff --git a/package/tests/test_api/test_graphql/test_queries.py b/package/tests/test_api/test_graphql/test_queries.py index 16cfd36ae4..05dcf6fcda 100644 --- a/package/tests/test_api/test_graphql/test_queries.py +++ b/package/tests/test_api/test_graphql/test_queries.py @@ -1,5 +1,3 @@ -# pylint:disable=line-too-long - import json import pytest diff --git a/package/tests/test_api/test_rest/test_responses.py b/package/tests/test_api/test_rest/test_responses.py deleted file mode 100644 index 3f75904404..0000000000 --- a/package/tests/test_api/test_rest/test_responses.py +++ /dev/null @@ -1,1063 +0,0 @@ -# pylint: disable=too-many-lines -import json -import operator -from pathlib import Path -from typing import Any, Dict, Iterable, List -from unittest import mock -from unittest.mock import Mock, call, patch - -import pytest -from fastapi.testclient import TestClient - -from kedro_viz.api import apps -from kedro_viz.api.rest.responses import ( - EnhancedORJSONResponse, - get_kedro_project_json_data, - get_metadata_response, - save_api_main_response_to_fs, - save_api_node_response_to_fs, - save_api_pipeline_response_to_fs, - save_api_responses_to_fs, - write_api_response_to_fs, -) -from kedro_viz.models.flowchart import TaskNode -from kedro_viz.models.metadata import Metadata - - -def _is_dict_list(collection: Any) -> bool: - if isinstance(collection, list): - return isinstance(collection[0], dict) if len(collection) > 0 else True - return False - - -def assert_dict_list_equal( - response: List[Dict], expected: List[Dict], sort_keys: Iterable[str] -): - """Assert two list of dictionaries with undeterministic order - to be equal by sorting them first based on a sort key. - """ - if len(response) == 0: - assert len(expected) == 0 - return - - assert sorted(response, key=operator.itemgetter(*sort_keys)) == sorted( - expected, key=operator.itemgetter(*sort_keys) - ) - - -def assert_nodes_equal(response_nodes, expected_nodes): - node_sort_keys = operator.itemgetter("id") - for response_node, expected_node in zip( - sorted(response_nodes, key=node_sort_keys), - sorted(expected_nodes, key=node_sort_keys), - ): - # since tags and pipelines are Sets, which are unordered, - # to assert them, we have to sort first - response_node_tags = response_node.pop("tags") - expected_node_tags = expected_node.pop("tags") - assert sorted(response_node_tags) == sorted(expected_node_tags) - - response_node_pipelines = response_node.pop("pipelines") - expected_node_pipelines = expected_node.pop("pipelines") - - assert sorted(response_node_pipelines) == sorted(expected_node_pipelines) - - # sort modular pipelines - if response_node["modular_pipelines"]: - response_node["modular_pipelines"].sort() - if expected_node["modular_pipelines"]: - expected_node["modular_pipelines"].sort() - - assert response_node == expected_node - - -def assert_modular_pipelines_tree_equal(response: Dict, expected: Dict): - """Assert if modular pipelines tree are equal.""" - # first assert that they have the same set of keys - assert sorted(response.keys()) == sorted(expected.keys()) - - # then compare the dictionary at each key recursively - for key in response: - if isinstance(response[key], dict): - assert_modular_pipelines_tree_equal(response[key], expected[key]) - elif _is_dict_list(response[key]): - assert_dict_list_equal(response[key], expected[key], sort_keys=("id",)) - elif isinstance(response[key], list): - assert sorted(response[key]) == sorted(expected[key]) - else: - assert response[key] == expected[key] - - -def assert_example_data(response_data): - """Assert graph response for the `example_pipelines` and `example_catalog` fixtures.""" - expected_edges = [ - {"source": "f2b25286", "target": "d5a8b994"}, - {"source": "782e4a43", "target": "0ecea0de"}, - {"source": "13399a82", "target": "782e4a43"}, - {"source": "f1f1425b", "target": "f2b25286"}, - {"source": "0ecea0de", "target": "f2b25286"}, - {"source": "f0ebef01", "target": "782e4a43"}, - {"source": "13399a82", "target": "uk.data_processing"}, - {"source": "uk.data_processing", "target": "0ecea0de"}, - {"source": "f0ebef01", "target": "uk.data_processing"}, - {"source": "f1f1425b", "target": "uk"}, - {"source": "13399a82", "target": "uk"}, - {"source": "f1f1425b", "target": "uk.data_science"}, - {"source": "f0ebef01", "target": "uk"}, - {"source": "uk.data_science", "target": "d5a8b994"}, - {"source": "0ecea0de", "target": "uk.data_science"}, - {"source": "uk", "target": "d5a8b994"}, - ] - assert_dict_list_equal( - response_data.pop("edges"), expected_edges, sort_keys=("source", "target") - ) - # compare nodes - expected_nodes = [ - { - "id": "782e4a43", - "name": "process_data", - "tags": ["split"], - "pipelines": ["__default__", "data_processing"], - "modular_pipelines": ["uk.data_processing"], - "type": "task", - "parameters": {"uk.data_processing.train_test_split": 0.1}, - }, - { - "id": "13399a82", - "name": "uk.data_processing.raw_data", - "tags": ["split"], - "pipelines": ["__default__", "data_processing"], - "modular_pipelines": ["uk", "uk.data_processing"], - "type": "data", - "layer": "raw", - "dataset_type": "pandas.csv_dataset.CSVDataset", - "stats": None, - }, - { - "id": "f0ebef01", - "name": "params:uk.data_processing.train_test_split", - "tags": ["split"], - "pipelines": ["__default__", "data_processing"], - "modular_pipelines": None, - "type": "parameters", - "layer": None, - "dataset_type": None, - "stats": None, - }, - { - "id": "0ecea0de", - "name": "model_inputs", - "tags": ["train", "split"], - "pipelines": ["__default__", "data_science", "data_processing"], - "modular_pipelines": ["uk.data_science", "uk.data_processing"], - "type": "data", - "layer": "model_inputs", - "dataset_type": "pandas.csv_dataset.CSVDataset", - "stats": {"columns": 12, "rows": 29768}, - }, - { - "id": "f2b25286", - "name": "train_model", - "tags": ["train"], - "pipelines": ["__default__", "data_science"], - "modular_pipelines": ["uk.data_science"], - "type": "task", - "parameters": { - "train_test_split": 0.1, - "num_epochs": 1000, - }, - }, - { - "id": "f1f1425b", - "name": "parameters", - "tags": ["train"], - "pipelines": ["__default__", "data_science"], - "modular_pipelines": None, - "type": "parameters", - "layer": None, - "dataset_type": None, - "stats": None, - }, - { - "id": "d5a8b994", - "name": "uk.data_science.model", - "tags": ["train"], - "pipelines": ["__default__", "data_science"], - "modular_pipelines": ["uk", "uk.data_science"], - "type": "data", - "layer": None, - "dataset_type": "io.memory_dataset.MemoryDataset", - "stats": None, - }, - { - "id": "uk.data_processing", - "name": "uk.data_processing", - "tags": ["split"], - "pipelines": ["__default__"], - "type": "modularPipeline", - "modular_pipelines": None, - "layer": None, - "dataset_type": None, - "stats": None, - }, - { - "id": "uk.data_science", - "name": "uk.data_science", - "tags": ["train"], - "pipelines": ["__default__"], - "type": "modularPipeline", - "modular_pipelines": None, - "layer": None, - "dataset_type": None, - "stats": None, - }, - { - "id": "uk", - "name": "uk", - "tags": ["split", "train"], - "pipelines": ["__default__"], - "type": "modularPipeline", - "modular_pipelines": None, - "layer": None, - "dataset_type": None, - "stats": None, - }, - ] - assert_nodes_equal(response_data.pop("nodes"), expected_nodes) - - # compare modular pipelines - expected_modular_pipelines = { - "__root__": { - "id": "__root__", - "name": "__root__", - "inputs": [], - "outputs": [], - "children": [ - {"id": "d5a8b994", "type": "data"}, - {"id": "13399a82", "type": "data"}, - {"id": "f1f1425b", "type": "parameters"}, - {"id": "f0ebef01", "type": "parameters"}, - {"id": "uk", "type": "modularPipeline"}, - ], - }, - "uk": { - "id": "uk", - "name": "uk", - "inputs": ["f1f1425b", "f0ebef01", "13399a82"], - "outputs": ["d5a8b994"], - "children": [ - {"id": "uk.data_processing", "type": "modularPipeline"}, - {"id": "uk.data_science", "type": "modularPipeline"}, - {"id": "0ecea0de", "type": "data"}, - ], - }, - "uk.data_processing": { - "id": "uk.data_processing", - "name": "uk.data_processing", - "inputs": ["f0ebef01", "13399a82"], - "outputs": ["0ecea0de"], - "children": [{"id": "782e4a43", "type": "task"}], - }, - "uk.data_science": { - "id": "uk.data_science", - "name": "uk.data_science", - "inputs": ["0ecea0de", "f1f1425b"], - "outputs": ["d5a8b994"], - "children": [{"id": "f2b25286", "type": "task"}], - }, - } - assert_modular_pipelines_tree_equal( - response_data.pop("modular_pipelines"), expected_modular_pipelines - ) - - # compare the rest - assert response_data == { - "tags": [{"id": "split", "name": "split"}, {"id": "train", "name": "train"}], - "layers": ["raw", "model_inputs"], - "pipelines": [ - {"id": "__default__", "name": "__default__"}, - {"id": "data_science", "name": "data_science"}, - {"id": "data_processing", "name": "data_processing"}, - ], - "selected_pipeline": "__default__", - } - - -def assert_example_data_from_file(response_data): - """Assert graph response for the `example_pipelines` and `example_catalog` fixtures.""" - expected_edges = [ - {"source": "f2b25286", "target": "d5a8b994"}, - {"source": "782e4a43", "target": "0ecea0de"}, - {"source": "13399a82", "target": "782e4a43"}, - {"source": "f1f1425b", "target": "f2b25286"}, - {"source": "0ecea0de", "target": "f2b25286"}, - {"source": "f0ebef01", "target": "782e4a43"}, - {"source": "13399a82", "target": "uk.data_processing"}, - {"source": "uk.data_processing", "target": "0ecea0de"}, - {"source": "f0ebef01", "target": "uk.data_processing"}, - {"source": "f1f1425b", "target": "uk"}, - {"source": "13399a82", "target": "uk"}, - {"source": "f1f1425b", "target": "uk.data_science"}, - {"source": "f0ebef01", "target": "uk"}, - {"source": "uk.data_science", "target": "d5a8b994"}, - {"source": "0ecea0de", "target": "uk.data_science"}, - {"source": "uk", "target": "d5a8b994"}, - ] - assert_dict_list_equal( - response_data.pop("edges"), expected_edges, sort_keys=("source", "target") - ) - # compare nodes - expected_nodes = [ - { - "id": "782e4a43", - "name": "process_data", - "tags": ["split"], - "pipelines": ["__default__", "data_processing"], - "modular_pipelines": ["uk.data_processing"], - "type": "task", - "parameters": {"uk.data_processing.train_test_split": 0.1}, - }, - { - "id": "13399a82", - "name": "uk.data_processing.raw_data", - "tags": ["split"], - "pipelines": ["__default__", "data_processing"], - "modular_pipelines": ["uk", "uk.data_processing"], - "type": "data", - "layer": "raw", - "dataset_type": "pandas.csv_dataset.CSVDataset", - }, - { - "id": "f0ebef01", - "name": "params:uk.data_processing.train_test_split", - "tags": ["split"], - "pipelines": ["__default__", "data_processing"], - "modular_pipelines": None, - "type": "parameters", - "layer": None, - "dataset_type": None, - }, - { - "id": "0ecea0de", - "name": "model_inputs", - "tags": ["train", "split"], - "pipelines": ["__default__", "data_science", "data_processing"], - "modular_pipelines": None, - "type": "data", - "layer": "model_inputs", - "dataset_type": "pandas.csv_dataset.CSVDataset", - }, - { - "id": "f2b25286", - "name": "train_model", - "tags": ["train"], - "pipelines": ["__default__", "data_science"], - "modular_pipelines": ["uk.data_science"], - "type": "task", - "parameters": { - "train_test_split": 0.1, - "num_epochs": 1000, - }, - }, - { - "id": "f1f1425b", - "name": "parameters", - "tags": ["train"], - "pipelines": ["__default__", "data_science"], - "modular_pipelines": None, - "type": "parameters", - "layer": None, - "dataset_type": None, - }, - { - "id": "d5a8b994", - "name": "uk.data_science.model", - "tags": ["train"], - "pipelines": ["__default__", "data_science"], - "modular_pipelines": ["uk", "uk.data_science"], - "type": "data", - "layer": None, - "dataset_type": "io.memory_dataset.MemoryDataset", - }, - { - "id": "uk.data_processing", - "name": "uk.data_processing", - "tags": [], - "pipelines": ["__default__"], - "type": "modularPipeline", - "modular_pipelines": None, - "layer": None, - "dataset_type": None, - }, - { - "id": "uk.data_science", - "name": "uk.data_science", - "tags": [], - "pipelines": ["__default__"], - "type": "modularPipeline", - "modular_pipelines": None, - "layer": None, - "dataset_type": None, - }, - { - "id": "uk", - "name": "uk", - "tags": [], - "pipelines": ["__default__"], - "type": "modularPipeline", - "modular_pipelines": None, - "layer": None, - "dataset_type": None, - }, - ] - assert_nodes_equal(response_data.pop("nodes"), expected_nodes) - - # compare modular pipelines - expected_modular_pipelines = { - "__root__": { - "children": [ - {"id": "f1f1425b", "type": "parameters"}, - {"id": "uk", "type": "modularPipeline"}, - ], - "id": "__root__", - "inputs": [], - "name": "__root__", - "outputs": [], - }, - "uk": { - "children": [ - {"id": "uk.data_science", "type": "modularPipeline"}, - {"id": "uk.data_processing", "type": "modularPipeline"}, - ], - "id": "uk", - "inputs": ["f0ebef01", "13399a82", "f1f1425b"], - "name": "uk", - "outputs": ["d5a8b994"], - }, - "uk.data_processing": { - "children": [ - {"id": "13399a82", "type": "data"}, - {"id": "782e4a43", "type": "task"}, - ], - "id": "uk.data_processing", - "inputs": ["f0ebef01", "13399a82"], - "name": "uk.data_processing", - "outputs": ["0ecea0de"], - }, - "uk.data_science": { - "children": [ - {"id": "f2b25286", "type": "task"}, - {"id": "d5a8b994", "type": "data"}, - ], - "id": "uk.data_science", - "inputs": ["0ecea0de", "f1f1425b"], - "name": "uk.data_science", - "outputs": ["d5a8b994"], - }, - } - assert_modular_pipelines_tree_equal( - response_data.pop("modular_pipelines"), expected_modular_pipelines - ) - - # compare the rest - assert response_data == { - "tags": [{"id": "split", "name": "split"}, {"id": "train", "name": "train"}], - "layers": ["raw", "model_inputs"], - "pipelines": [ - {"id": "__default__", "name": "__default__"}, - {"id": "data_science", "name": "data_science"}, - {"id": "data_processing", "name": "data_processing"}, - ], - "selected_pipeline": "__default__", - } - - -def assert_example_transcoded_data(response_data): - """Assert graph response for the `example_transcoded_pipelines` - and `example_transcoded_catalog` fixtures.""" - expected_edges = [ - {"source": "f1f1425b", "target": "7e29e365"}, - {"source": "f0ebef01", "target": "58a383dd"}, - {"source": "7c58d8e6", "target": "58a383dd"}, - {"source": "58a383dd", "target": "0ecea0de"}, - {"source": "7e29e365", "target": "1d06a0d7"}, - {"source": "0ecea0de", "target": "7e29e365"}, - ] - assert_dict_list_equal( - response_data.pop("edges"), expected_edges, sort_keys=("source", "target") - ) - # compare nodes - expected_nodes = [ - { - "id": "58a383dd", - "name": "process_data", - "tags": ["split"], - "pipelines": ["data_processing", "__default__"], - "type": "task", - "modular_pipelines": None, - "parameters": {"uk.data_processing.train_test_split": 0.1}, - }, - { - "id": "7c58d8e6", - "name": "raw_data", - "tags": ["split"], - "pipelines": ["data_processing", "__default__"], - "type": "data", - "modular_pipelines": None, - "layer": None, - "dataset_type": "io.memory_dataset.MemoryDataset", - "stats": None, - }, - { - "id": "f0ebef01", - "name": "params:uk.data_processing.train_test_split", - "tags": ["split"], - "pipelines": ["data_processing", "__default__"], - "type": "parameters", - "modular_pipelines": None, - "layer": None, - "dataset_type": None, - "stats": None, - }, - { - "id": "0ecea0de", - "name": "model_inputs", - "tags": ["train", "split"], - "pipelines": ["data_processing", "__default__"], - "type": "data", - "modular_pipelines": None, - "layer": None, - "dataset_type": None, - "stats": None, - }, - { - "id": "7e29e365", - "name": "train_model", - "tags": ["train"], - "pipelines": ["data_processing", "__default__"], - "type": "task", - "modular_pipelines": None, - "parameters": {"train_test_split": 0.1, "num_epochs": 1000}, - }, - { - "id": "f1f1425b", - "name": "parameters", - "tags": ["train"], - "pipelines": ["data_processing", "__default__"], - "type": "parameters", - "modular_pipelines": None, - "layer": None, - "dataset_type": None, - "stats": None, - }, - { - "id": "1d06a0d7", - "name": "model", - "tags": ["train"], - "pipelines": ["data_processing", "__default__"], - "type": "data", - "modular_pipelines": None, - "layer": None, - "dataset_type": "io.memory_dataset.MemoryDataset", - "stats": None, - }, - ] - - assert_nodes_equal(response_data.pop("nodes"), expected_nodes) - - -class TestMainEndpoint: - """Test a viz API created from a Kedro project.""" - - def test_endpoint_main(self, client): - response = client.get("/api/main") - assert_example_data(response.json()) - - def test_endpoint_main_no_default_pipeline(self, example_api_no_default_pipeline): - client = TestClient(example_api_no_default_pipeline) - response = client.get("/api/main") - assert len(response.json()["nodes"]) == 6 - assert len(response.json()["edges"]) == 9 - assert response.json()["pipelines"] == [ - {"id": "data_science", "name": "data_science"}, - {"id": "data_processing", "name": "data_processing"}, - ] - - def test_endpoint_main_for_edge_case_pipelines( - self, - example_api_for_edge_case_pipelines, - expected_modular_pipeline_tree_for_edge_cases, - ): - client = TestClient(example_api_for_edge_case_pipelines) - response = client.get("/api/main") - actual_modular_pipelines_tree = response.json()["modular_pipelines"] - assert_modular_pipelines_tree_equal( - actual_modular_pipelines_tree, expected_modular_pipeline_tree_for_edge_cases - ) - - -class TestTranscodedDataset: - """Test a viz API created from a Kedro project.""" - - def test_endpoint_main(self, example_transcoded_api): - client = TestClient(example_transcoded_api) - response = client.get("/api/main") - assert response.status_code == 200 - assert_example_transcoded_data(response.json()) - - def test_transcoded_data_node_metadata(self, example_transcoded_api): - client = TestClient(example_transcoded_api) - response = client.get("/api/nodes/0ecea0de") - assert response.json() == { - "filepath": "model_inputs.csv", - "original_type": "pandas.csv_dataset.CSVDataset", - "transcoded_types": [ - "pandas.parquet_dataset.ParquetDataset", - ], - "run_command": "kedro run --to-outputs=model_inputs@pandas2", - } - - -class TestNodeMetadataEndpoint: - def test_node_not_exist(self, client): - response = client.get("/api/nodes/foo") - assert response.status_code == 404 - - def test_task_node_metadata(self, client): - response = client.get("/api/nodes/782e4a43") - metadata = response.json() - assert ( - metadata["code"].replace(" ", "") - == "defprocess_data(raw_data,train_test_split):\n...\n" - ) - assert metadata["parameters"] == {"uk.data_processing.train_test_split": 0.1} - assert metadata["inputs"] == [ - "uk.data_processing.raw_data", - "params:uk.data_processing.train_test_split", - ] - assert metadata["outputs"] == ["model_inputs"] - assert ( - metadata["run_command"] - == "kedro run --to-nodes='uk.data_processing.process_data'" - ) - assert str(Path("package/tests/conftest.py")) in metadata["filepath"] - - def test_data_node_metadata(self, client): - response = client.get("/api/nodes/0ecea0de") - assert response.json() == { - "filepath": "model_inputs.csv", - "type": "pandas.csv_dataset.CSVDataset", - "preview_type": "TablePreview", - "run_command": "kedro run --to-outputs=model_inputs", - "stats": {"columns": 12, "rows": 29768}, - } - - def test_data_node_metadata_for_free_input(self, client): - response = client.get("/api/nodes/13399a82") - assert response.json() == { - "filepath": "raw_data.csv", - "preview_type": "TablePreview", - "type": "pandas.csv_dataset.CSVDataset", - } - - def test_parameters_node_metadata(self, client): - response = client.get("/api/nodes/f1f1425b") - assert response.json() == { - "parameters": {"train_test_split": 0.1, "num_epochs": 1000} - } - - def test_single_parameter_node_metadata(self, client): - response = client.get("/api/nodes/f0ebef01") - assert response.json() == { - "parameters": {"uk.data_processing.train_test_split": 0.1} - } - - def test_no_metadata(self, client): - with mock.patch.object(TaskNode, "has_metadata", return_value=False): - response = client.get("/api/nodes/782e4a43") - assert response.json() == {} - - -class TestSinglePipelineEndpoint: - def test_get_pipeline(self, client): - response = client.get("/api/pipelines/data_science") - assert response.status_code == 200 - response_data = response.json() - expected_edges = [ - {"source": "f2b25286", "target": "d5a8b994"}, - {"source": "f1f1425b", "target": "uk.data_science"}, - {"source": "f1f1425b", "target": "f2b25286"}, - {"source": "uk.data_science", "target": "d5a8b994"}, - {"source": "uk", "target": "d5a8b994"}, - {"source": "0ecea0de", "target": "uk"}, - {"source": "0ecea0de", "target": "uk.data_science"}, - {"source": "f1f1425b", "target": "uk"}, - {"source": "0ecea0de", "target": "f2b25286"}, - ] - assert_dict_list_equal( - response_data.pop("edges"), expected_edges, sort_keys=("source", "target") - ) - expected_nodes = [ - { - "id": "0ecea0de", - "name": "model_inputs", - "tags": ["train", "split"], - "pipelines": ["__default__", "data_science", "data_processing"], - "modular_pipelines": ["uk.data_science", "uk.data_processing"], - "type": "data", - "layer": "model_inputs", - "dataset_type": "pandas.csv_dataset.CSVDataset", - "stats": {"columns": 12, "rows": 29768}, - }, - { - "id": "f2b25286", - "name": "train_model", - "tags": ["train"], - "pipelines": ["__default__", "data_science"], - "modular_pipelines": ["uk.data_science"], - "type": "task", - "parameters": { - "train_test_split": 0.1, - "num_epochs": 1000, - }, - }, - { - "id": "f1f1425b", - "name": "parameters", - "tags": ["train"], - "pipelines": ["__default__", "data_science"], - "modular_pipelines": None, - "type": "parameters", - "layer": None, - "dataset_type": None, - "stats": None, - }, - { - "id": "d5a8b994", - "name": "uk.data_science.model", - "tags": ["train"], - "pipelines": ["__default__", "data_science"], - "modular_pipelines": ["uk", "uk.data_science"], - "type": "data", - "layer": None, - "dataset_type": "io.memory_dataset.MemoryDataset", - "stats": None, - }, - { - "id": "uk", - "name": "uk", - "tags": ["train"], - "pipelines": ["data_science"], - "type": "modularPipeline", - "modular_pipelines": None, - "layer": None, - "dataset_type": None, - "stats": None, - }, - { - "id": "uk.data_science", - "name": "uk.data_science", - "tags": ["train"], - "pipelines": ["data_science"], - "type": "modularPipeline", - "modular_pipelines": None, - "layer": None, - "dataset_type": None, - "stats": None, - }, - ] - assert_nodes_equal(response_data.pop("nodes"), expected_nodes) - - expected_modular_pipelines = { - "__root__": { - "children": [ - {"id": "f1f1425b", "type": "parameters"}, - {"id": "0ecea0de", "type": "data"}, - {"id": "uk", "type": "modularPipeline"}, - {"id": "d5a8b994", "type": "data"}, - ], - "id": "__root__", - "inputs": [], - "name": "__root__", - "outputs": [], - }, - "uk": { - "children": [ - {"id": "uk.data_science", "type": "modularPipeline"}, - ], - "id": "uk", - "inputs": ["0ecea0de", "f1f1425b"], - "name": "uk", - "outputs": ["d5a8b994"], - }, - "uk.data_science": { - "children": [ - {"id": "f2b25286", "type": "task"}, - ], - "id": "uk.data_science", - "inputs": ["0ecea0de", "f1f1425b"], - "name": "uk.data_science", - "outputs": ["d5a8b994"], - }, - } - - assert_modular_pipelines_tree_equal( - response_data.pop("modular_pipelines"), - expected_modular_pipelines, - ) - - # Extract and sort the layers field - response_data_layers_sorted = sorted(response_data["layers"]) - expected_layers_sorted = sorted(["model_inputs", "raw"]) - assert response_data_layers_sorted == expected_layers_sorted - - # Remove the layers field from response_data for further comparison - response_data.pop("layers") - - # Expected response without the layers field - expected_response_without_layers = { - "tags": [ - {"id": "split", "name": "split"}, - {"id": "train", "name": "train"}, - ], - "pipelines": [ - {"id": "__default__", "name": "__default__"}, - {"id": "data_science", "name": "data_science"}, - {"id": "data_processing", "name": "data_processing"}, - ], - "selected_pipeline": "data_science", - } - assert response_data == expected_response_without_layers - - def test_get_non_existing_pipeline(self, client): - response = client.get("/api/pipelines/foo") - assert response.status_code == 404 - - -class TestAppMetadata: - def test_get_metadata_response(self, mocker): - mock_get_compat = mocker.patch( - "kedro_viz.api.rest.responses.get_package_compatibilities", - return_value="mocked_compatibilities", - ) - mock_set_compat = mocker.patch( - "kedro_viz.api.rest.responses.Metadata.set_package_compatibilities" - ) - - response = get_metadata_response() - - # Assert get_package_compatibilities was called - mock_get_compat.assert_called_once() - - # Assert set_package_compatibilities was called with the mocked compatibilities - mock_set_compat.assert_called_once_with("mocked_compatibilities") - - # Assert the function returns the Metadata instance - assert isinstance(response, Metadata) - - -class TestAPIAppFromFile: - def test_api_app_from_json_file_main_api(self): - filepath = str(Path(__file__).parent.parent) - api_app = apps.create_api_app_from_file(filepath) - client = TestClient(api_app) - response = client.get("/api/main") - assert_example_data_from_file(response.json()) - - def test_api_app_from_json_file_index(self): - filepath = str(Path(__file__).parent.parent) - api_app = apps.create_api_app_from_file(filepath) - client = TestClient(api_app) - response = client.get("/") - assert response.status_code == 200 - - -class TestEnhancedORJSONResponse: - @pytest.mark.parametrize( - "content, expected", - [ - ( - {"key1": "value1", "key2": "value2"}, - b'{\n "key1": "value1",\n "key2": "value2"\n}', - ), - (["item1", "item2"], b'[\n "item1",\n "item2"\n]'), - ], - ) - def test_encode_to_human_readable(self, content, expected): - result = EnhancedORJSONResponse.encode_to_human_readable(content) - assert result == expected - - @pytest.mark.parametrize( - "file_path, response, encoded_response", - [ - ( - "test_output.json", - {"key1": "value1", "key2": "value2"}, - b'{"key1": "value1", "key2": "value2"}', - ), - ], - ) - def test_write_api_response_to_fs( - self, file_path, response, encoded_response, mocker - ): - mock_encode_to_human_readable = mocker.patch( - "kedro_viz.api.rest.responses.EnhancedORJSONResponse.encode_to_human_readable", - return_value=encoded_response, - ) - with patch("fsspec.filesystem") as mock_filesystem: - mockremote_fs = mock_filesystem.return_value - mockremote_fs.open.return_value.__enter__.return_value = Mock() - write_api_response_to_fs(file_path, response, mockremote_fs) - mockremote_fs.open.assert_called_once_with(file_path, "wb") - mock_encode_to_human_readable.assert_called_once() - - def test_get_kedro_project_json_data(self, mocker): - expected_json_data = {"key": "value"} - encoded_response = json.dumps(expected_json_data).encode("utf-8") - - mock_get_default_response = mocker.patch( - "kedro_viz.api.rest.responses.get_default_response", - return_value={"key": "value"}, - ) - mock_get_encoded_response = mocker.patch( - "kedro_viz.api.rest.responses.get_encoded_response", - return_value=encoded_response, - ) - - json_data = get_kedro_project_json_data() - - mock_get_default_response.assert_called_once() - mock_get_encoded_response.assert_called_once_with( - mock_get_default_response.return_value - ) - assert json_data == expected_json_data - - def test_save_api_main_response_to_fs(self, mocker): - expected_default_response = {"test": "json"} - main_path = "/main" - - mock_get_default_response = mocker.patch( - "kedro_viz.api.rest.responses.get_default_response", - return_value=expected_default_response, - ) - mock_write_api_response_to_fs = mocker.patch( - "kedro_viz.api.rest.responses.write_api_response_to_fs" - ) - - remote_fs = Mock() - - save_api_main_response_to_fs(main_path, remote_fs) - - mock_get_default_response.assert_called_once() - mock_write_api_response_to_fs.assert_called_once_with( - main_path, mock_get_default_response.return_value, remote_fs - ) - - def test_save_api_node_response_to_fs(self, mocker): - nodes_path = "/nodes" - nodeIds = ["01f456", "01f457"] - expected_metadata_response = {"test": "json"} - - mock_get_node_metadata_response = mocker.patch( - "kedro_viz.api.rest.responses.get_node_metadata_response", - return_value=expected_metadata_response, - ) - mock_write_api_response_to_fs = mocker.patch( - "kedro_viz.api.rest.responses.write_api_response_to_fs" - ) - mocker.patch( - "kedro_viz.api.rest.responses.data_access_manager.nodes.get_node_ids", - return_value=nodeIds, - ) - remote_fs = Mock() - - save_api_node_response_to_fs(nodes_path, remote_fs, False) - - assert mock_write_api_response_to_fs.call_count == len(nodeIds) - assert mock_get_node_metadata_response.call_count == len(nodeIds) - - expected_calls = [ - call( - f"{nodes_path}/{nodeId}", - mock_get_node_metadata_response.return_value, - remote_fs, - ) - for nodeId in nodeIds - ] - mock_write_api_response_to_fs.assert_has_calls(expected_calls, any_order=True) - - def test_save_api_pipeline_response_to_fs(self, mocker): - pipelines_path = "/pipelines" - pipelineIds = ["01f456", "01f457"] - expected_selected_pipeline_response = {"test": "json"} - - mock_get_selected_pipeline_response = mocker.patch( - "kedro_viz.api.rest.responses.get_selected_pipeline_response", - return_value=expected_selected_pipeline_response, - ) - mock_write_api_response_to_fs = mocker.patch( - "kedro_viz.api.rest.responses.write_api_response_to_fs" - ) - - mocker.patch( - "kedro_viz.api.rest.responses.data_access_manager." - "registered_pipelines.get_pipeline_ids", - return_value=pipelineIds, - ) - - remote_fs = Mock() - - save_api_pipeline_response_to_fs(pipelines_path, remote_fs) - - assert mock_write_api_response_to_fs.call_count == len(pipelineIds) - assert mock_get_selected_pipeline_response.call_count == len(pipelineIds) - - expected_calls = [ - call( - f"{pipelines_path}/{pipelineId}", - mock_get_selected_pipeline_response.return_value, - remote_fs, - ) - for pipelineId in pipelineIds - ] - mock_write_api_response_to_fs.assert_has_calls(expected_calls, any_order=True) - - @pytest.mark.parametrize( - "file_path, protocol, is_all_previews_enabled", - [ - ("s3://shareableviz", "s3", True), - ("abfs://shareableviz", "abfs", False), - ("shareableviz", "file", True), - ], - ) - def test_save_api_responses_to_fs( - self, file_path, protocol, is_all_previews_enabled, mocker - ): - mock_api_main_response_to_fs = mocker.patch( - "kedro_viz.api.rest.responses.save_api_main_response_to_fs" - ) - mock_api_node_response_to_fs = mocker.patch( - "kedro_viz.api.rest.responses.save_api_node_response_to_fs" - ) - mock_api_pipeline_response_to_fs = mocker.patch( - "kedro_viz.api.rest.responses.save_api_pipeline_response_to_fs" - ) - - mock_filesystem = mocker.patch("fsspec.filesystem") - mock_filesystem.return_value.protocol = protocol - - save_api_responses_to_fs( - file_path, mock_filesystem.return_value, is_all_previews_enabled - ) - - mock_api_main_response_to_fs.assert_called_once_with( - f"{file_path}/api/main", mock_filesystem.return_value - ) - mock_api_node_response_to_fs.assert_called_once_with( - f"{file_path}/api/nodes", - mock_filesystem.return_value, - is_all_previews_enabled, - ) - mock_api_pipeline_response_to_fs.assert_called_once_with( - f"{file_path}/api/pipelines", mock_filesystem.return_value - ) diff --git a/package/tests/test_api/test_rest/test_responses/__init__.py b/package/tests/test_api/test_rest/test_responses/__init__.py new file mode 100755 index 0000000000..e69de29bb2 diff --git a/package/tests/test_api/test_rest/test_responses/assert_helpers.py b/package/tests/test_api/test_rest/test_responses/assert_helpers.py new file mode 100644 index 0000000000..a55ecd9b81 --- /dev/null +++ b/package/tests/test_api/test_rest/test_responses/assert_helpers.py @@ -0,0 +1,544 @@ +import operator +from typing import Any, Dict, Iterable, List + + +def _is_dict_list(collection: Any) -> bool: + if isinstance(collection, list): + return isinstance(collection[0], dict) if len(collection) > 0 else True + return False + + +def assert_modular_pipelines_tree_equal(response: Dict, expected: Dict): + """Assert if modular pipelines tree are equal.""" + # first assert that they have the same set of keys + assert sorted(response.keys()) == sorted(expected.keys()) + + # then compare the dictionary at each key recursively + for key in response: + if isinstance(response[key], dict): + assert_modular_pipelines_tree_equal(response[key], expected[key]) + elif _is_dict_list(response[key]): + assert_dict_list_equal(response[key], expected[key], sort_keys=("id",)) + elif isinstance(response[key], list): + assert sorted(response[key]) == sorted(expected[key]) + else: + assert response[key] == expected[key] + + +def assert_nodes_equal(response_nodes, expected_nodes): + node_sort_keys = operator.itemgetter("id") + for response_node, expected_node in zip( + sorted(response_nodes, key=node_sort_keys), + sorted(expected_nodes, key=node_sort_keys), + ): + # since tags and pipelines are Sets, which are unordered, + # to assert them, we have to sort first + response_node_tags = response_node.pop("tags") + expected_node_tags = expected_node.pop("tags") + assert sorted(response_node_tags) == sorted(expected_node_tags) + + response_node_pipelines = response_node.pop("pipelines") + expected_node_pipelines = expected_node.pop("pipelines") + + assert sorted(response_node_pipelines) == sorted(expected_node_pipelines) + + # sort modular pipelines + if response_node["modular_pipelines"]: + response_node["modular_pipelines"].sort() + if expected_node["modular_pipelines"]: + expected_node["modular_pipelines"].sort() + + assert response_node == expected_node + + +def assert_dict_list_equal( + response: List[Dict], expected: List[Dict], sort_keys: Iterable[str] +): + """Assert two list of dictionaries with undeterministic order + to be equal by sorting them first based on a sort key. + """ + if len(response) == 0: + assert len(expected) == 0 + return + + assert sorted(response, key=operator.itemgetter(*sort_keys)) == sorted( + expected, key=operator.itemgetter(*sort_keys) + ) + + +def assert_example_data(response_data): + """Assert graph response for the `example_pipelines` and `example_catalog` fixtures.""" + expected_edges = [ + {"source": "f2b25286", "target": "d5a8b994"}, + {"source": "782e4a43", "target": "0ecea0de"}, + {"source": "13399a82", "target": "782e4a43"}, + {"source": "f1f1425b", "target": "f2b25286"}, + {"source": "0ecea0de", "target": "f2b25286"}, + {"source": "f0ebef01", "target": "782e4a43"}, + {"source": "13399a82", "target": "uk.data_processing"}, + {"source": "uk.data_processing", "target": "0ecea0de"}, + {"source": "f0ebef01", "target": "uk.data_processing"}, + {"source": "f1f1425b", "target": "uk"}, + {"source": "13399a82", "target": "uk"}, + {"source": "f1f1425b", "target": "uk.data_science"}, + {"source": "f0ebef01", "target": "uk"}, + {"source": "uk.data_science", "target": "d5a8b994"}, + {"source": "0ecea0de", "target": "uk.data_science"}, + {"source": "uk", "target": "d5a8b994"}, + ] + assert_dict_list_equal( + response_data.pop("edges"), expected_edges, sort_keys=("source", "target") + ) + # compare nodes + expected_nodes = [ + { + "id": "782e4a43", + "name": "process_data", + "tags": ["split"], + "pipelines": ["__default__", "data_processing"], + "modular_pipelines": ["uk.data_processing"], + "type": "task", + "parameters": {"uk.data_processing.train_test_split": 0.1}, + }, + { + "id": "13399a82", + "name": "uk.data_processing.raw_data", + "tags": ["split"], + "pipelines": ["__default__", "data_processing"], + "modular_pipelines": ["uk", "uk.data_processing"], + "type": "data", + "layer": "raw", + "dataset_type": "pandas.csv_dataset.CSVDataset", + "stats": None, + }, + { + "id": "f0ebef01", + "name": "params:uk.data_processing.train_test_split", + "tags": ["split"], + "pipelines": ["__default__", "data_processing"], + "modular_pipelines": None, + "type": "parameters", + "layer": None, + "dataset_type": None, + "stats": None, + }, + { + "id": "0ecea0de", + "name": "model_inputs", + "tags": ["train", "split"], + "pipelines": ["__default__", "data_science", "data_processing"], + "modular_pipelines": ["uk.data_science", "uk.data_processing"], + "type": "data", + "layer": "model_inputs", + "dataset_type": "pandas.csv_dataset.CSVDataset", + "stats": {"columns": 12, "rows": 29768}, + }, + { + "id": "f2b25286", + "name": "train_model", + "tags": ["train"], + "pipelines": ["__default__", "data_science"], + "modular_pipelines": ["uk.data_science"], + "type": "task", + "parameters": { + "train_test_split": 0.1, + "num_epochs": 1000, + }, + }, + { + "id": "f1f1425b", + "name": "parameters", + "tags": ["train"], + "pipelines": ["__default__", "data_science"], + "modular_pipelines": None, + "type": "parameters", + "layer": None, + "dataset_type": None, + "stats": None, + }, + { + "id": "d5a8b994", + "name": "uk.data_science.model", + "tags": ["train"], + "pipelines": ["__default__", "data_science"], + "modular_pipelines": ["uk", "uk.data_science"], + "type": "data", + "layer": None, + "dataset_type": "io.memory_dataset.MemoryDataset", + "stats": None, + }, + { + "id": "uk.data_processing", + "name": "uk.data_processing", + "tags": ["split"], + "pipelines": ["__default__"], + "type": "modularPipeline", + "modular_pipelines": None, + "layer": None, + "dataset_type": None, + "stats": None, + }, + { + "id": "uk.data_science", + "name": "uk.data_science", + "tags": ["train"], + "pipelines": ["__default__"], + "type": "modularPipeline", + "modular_pipelines": None, + "layer": None, + "dataset_type": None, + "stats": None, + }, + { + "id": "uk", + "name": "uk", + "tags": ["split", "train"], + "pipelines": ["__default__"], + "type": "modularPipeline", + "modular_pipelines": None, + "layer": None, + "dataset_type": None, + "stats": None, + }, + ] + assert_nodes_equal(response_data.pop("nodes"), expected_nodes) + + # compare modular pipelines + expected_modular_pipelines = { + "__root__": { + "id": "__root__", + "name": "__root__", + "inputs": [], + "outputs": [], + "children": [ + {"id": "d5a8b994", "type": "data"}, + {"id": "13399a82", "type": "data"}, + {"id": "f1f1425b", "type": "parameters"}, + {"id": "f0ebef01", "type": "parameters"}, + {"id": "uk", "type": "modularPipeline"}, + ], + }, + "uk": { + "id": "uk", + "name": "uk", + "inputs": ["f1f1425b", "f0ebef01", "13399a82"], + "outputs": ["d5a8b994"], + "children": [ + {"id": "uk.data_processing", "type": "modularPipeline"}, + {"id": "uk.data_science", "type": "modularPipeline"}, + {"id": "0ecea0de", "type": "data"}, + ], + }, + "uk.data_processing": { + "id": "uk.data_processing", + "name": "uk.data_processing", + "inputs": ["f0ebef01", "13399a82"], + "outputs": ["0ecea0de"], + "children": [{"id": "782e4a43", "type": "task"}], + }, + "uk.data_science": { + "id": "uk.data_science", + "name": "uk.data_science", + "inputs": ["0ecea0de", "f1f1425b"], + "outputs": ["d5a8b994"], + "children": [{"id": "f2b25286", "type": "task"}], + }, + } + assert_modular_pipelines_tree_equal( + response_data.pop("modular_pipelines"), expected_modular_pipelines + ) + + # compare the rest + assert response_data == { + "tags": [{"id": "split", "name": "split"}, {"id": "train", "name": "train"}], + "layers": ["raw", "model_inputs"], + "pipelines": [ + {"id": "__default__", "name": "__default__"}, + {"id": "data_science", "name": "data_science"}, + {"id": "data_processing", "name": "data_processing"}, + ], + "selected_pipeline": "__default__", + } + + +def assert_example_data_from_file(response_data): + """Assert graph response for the `example_pipelines` and `example_catalog` fixtures.""" + expected_edges = [ + {"source": "f2b25286", "target": "d5a8b994"}, + {"source": "782e4a43", "target": "0ecea0de"}, + {"source": "13399a82", "target": "782e4a43"}, + {"source": "f1f1425b", "target": "f2b25286"}, + {"source": "0ecea0de", "target": "f2b25286"}, + {"source": "f0ebef01", "target": "782e4a43"}, + {"source": "13399a82", "target": "uk.data_processing"}, + {"source": "uk.data_processing", "target": "0ecea0de"}, + {"source": "f0ebef01", "target": "uk.data_processing"}, + {"source": "f1f1425b", "target": "uk"}, + {"source": "13399a82", "target": "uk"}, + {"source": "f1f1425b", "target": "uk.data_science"}, + {"source": "f0ebef01", "target": "uk"}, + {"source": "uk.data_science", "target": "d5a8b994"}, + {"source": "0ecea0de", "target": "uk.data_science"}, + {"source": "uk", "target": "d5a8b994"}, + ] + assert_dict_list_equal( + response_data.pop("edges"), expected_edges, sort_keys=("source", "target") + ) + # compare nodes + expected_nodes = [ + { + "id": "782e4a43", + "name": "process_data", + "tags": ["split"], + "pipelines": ["__default__", "data_processing"], + "modular_pipelines": ["uk.data_processing"], + "type": "task", + "parameters": {"uk.data_processing.train_test_split": 0.1}, + }, + { + "id": "13399a82", + "name": "uk.data_processing.raw_data", + "tags": ["split"], + "pipelines": ["__default__", "data_processing"], + "modular_pipelines": ["uk", "uk.data_processing"], + "type": "data", + "layer": "raw", + "dataset_type": "pandas.csv_dataset.CSVDataset", + }, + { + "id": "f0ebef01", + "name": "params:uk.data_processing.train_test_split", + "tags": ["split"], + "pipelines": ["__default__", "data_processing"], + "modular_pipelines": None, + "type": "parameters", + "layer": None, + "dataset_type": None, + }, + { + "id": "0ecea0de", + "name": "model_inputs", + "tags": ["train", "split"], + "pipelines": ["__default__", "data_science", "data_processing"], + "modular_pipelines": None, + "type": "data", + "layer": "model_inputs", + "dataset_type": "pandas.csv_dataset.CSVDataset", + }, + { + "id": "f2b25286", + "name": "train_model", + "tags": ["train"], + "pipelines": ["__default__", "data_science"], + "modular_pipelines": ["uk.data_science"], + "type": "task", + "parameters": { + "train_test_split": 0.1, + "num_epochs": 1000, + }, + }, + { + "id": "f1f1425b", + "name": "parameters", + "tags": ["train"], + "pipelines": ["__default__", "data_science"], + "modular_pipelines": None, + "type": "parameters", + "layer": None, + "dataset_type": None, + }, + { + "id": "d5a8b994", + "name": "uk.data_science.model", + "tags": ["train"], + "pipelines": ["__default__", "data_science"], + "modular_pipelines": ["uk", "uk.data_science"], + "type": "data", + "layer": None, + "dataset_type": "io.memory_dataset.MemoryDataset", + }, + { + "id": "uk.data_processing", + "name": "uk.data_processing", + "tags": [], + "pipelines": ["__default__"], + "type": "modularPipeline", + "modular_pipelines": None, + "layer": None, + "dataset_type": None, + }, + { + "id": "uk.data_science", + "name": "uk.data_science", + "tags": [], + "pipelines": ["__default__"], + "type": "modularPipeline", + "modular_pipelines": None, + "layer": None, + "dataset_type": None, + }, + { + "id": "uk", + "name": "uk", + "tags": [], + "pipelines": ["__default__"], + "type": "modularPipeline", + "modular_pipelines": None, + "layer": None, + "dataset_type": None, + }, + ] + assert_nodes_equal(response_data.pop("nodes"), expected_nodes) + + # compare modular pipelines + expected_modular_pipelines = { + "__root__": { + "children": [ + {"id": "f1f1425b", "type": "parameters"}, + {"id": "uk", "type": "modularPipeline"}, + ], + "id": "__root__", + "inputs": [], + "name": "__root__", + "outputs": [], + }, + "uk": { + "children": [ + {"id": "uk.data_science", "type": "modularPipeline"}, + {"id": "uk.data_processing", "type": "modularPipeline"}, + ], + "id": "uk", + "inputs": ["f0ebef01", "13399a82", "f1f1425b"], + "name": "uk", + "outputs": ["d5a8b994"], + }, + "uk.data_processing": { + "children": [ + {"id": "13399a82", "type": "data"}, + {"id": "782e4a43", "type": "task"}, + ], + "id": "uk.data_processing", + "inputs": ["f0ebef01", "13399a82"], + "name": "uk.data_processing", + "outputs": ["0ecea0de"], + }, + "uk.data_science": { + "children": [ + {"id": "f2b25286", "type": "task"}, + {"id": "d5a8b994", "type": "data"}, + ], + "id": "uk.data_science", + "inputs": ["0ecea0de", "f1f1425b"], + "name": "uk.data_science", + "outputs": ["d5a8b994"], + }, + } + assert_modular_pipelines_tree_equal( + response_data.pop("modular_pipelines"), expected_modular_pipelines + ) + + # compare the rest + assert response_data == { + "tags": [{"id": "split", "name": "split"}, {"id": "train", "name": "train"}], + "layers": ["raw", "model_inputs"], + "pipelines": [ + {"id": "__default__", "name": "__default__"}, + {"id": "data_science", "name": "data_science"}, + {"id": "data_processing", "name": "data_processing"}, + ], + "selected_pipeline": "__default__", + } + + +def assert_example_transcoded_data(response_data): + """Assert graph response for the `example_transcoded_pipelines` + and `example_transcoded_catalog` fixtures.""" + expected_edges = [ + {"source": "f1f1425b", "target": "7e29e365"}, + {"source": "f0ebef01", "target": "58a383dd"}, + {"source": "7c58d8e6", "target": "58a383dd"}, + {"source": "58a383dd", "target": "0ecea0de"}, + {"source": "7e29e365", "target": "1d06a0d7"}, + {"source": "0ecea0de", "target": "7e29e365"}, + ] + assert_dict_list_equal( + response_data.pop("edges"), expected_edges, sort_keys=("source", "target") + ) + # compare nodes + expected_nodes = [ + { + "id": "58a383dd", + "name": "process_data", + "tags": ["split"], + "pipelines": ["data_processing", "__default__"], + "type": "task", + "modular_pipelines": None, + "parameters": {"uk.data_processing.train_test_split": 0.1}, + }, + { + "id": "7c58d8e6", + "name": "raw_data", + "tags": ["split"], + "pipelines": ["data_processing", "__default__"], + "type": "data", + "modular_pipelines": None, + "layer": None, + "dataset_type": "io.memory_dataset.MemoryDataset", + "stats": None, + }, + { + "id": "f0ebef01", + "name": "params:uk.data_processing.train_test_split", + "tags": ["split"], + "pipelines": ["data_processing", "__default__"], + "type": "parameters", + "modular_pipelines": None, + "layer": None, + "dataset_type": None, + "stats": None, + }, + { + "id": "0ecea0de", + "name": "model_inputs", + "tags": ["train", "split"], + "pipelines": ["data_processing", "__default__"], + "type": "data", + "modular_pipelines": None, + "layer": None, + "dataset_type": None, + "stats": None, + }, + { + "id": "7e29e365", + "name": "train_model", + "tags": ["train"], + "pipelines": ["data_processing", "__default__"], + "type": "task", + "modular_pipelines": None, + "parameters": {"train_test_split": 0.1, "num_epochs": 1000}, + }, + { + "id": "f1f1425b", + "name": "parameters", + "tags": ["train"], + "pipelines": ["data_processing", "__default__"], + "type": "parameters", + "modular_pipelines": None, + "layer": None, + "dataset_type": None, + "stats": None, + }, + { + "id": "1d06a0d7", + "name": "model", + "tags": ["train"], + "pipelines": ["data_processing", "__default__"], + "type": "data", + "modular_pipelines": None, + "layer": None, + "dataset_type": "io.memory_dataset.MemoryDataset", + "stats": None, + }, + ] + + assert_nodes_equal(response_data.pop("nodes"), expected_nodes) diff --git a/package/tests/test_api/test_rest/test_responses/test_base.py b/package/tests/test_api/test_rest/test_responses/test_base.py new file mode 100755 index 0000000000..d487fc542d --- /dev/null +++ b/package/tests/test_api/test_rest/test_responses/test_base.py @@ -0,0 +1,10 @@ +from kedro_viz.api.rest.responses.base import APINotFoundResponse + + +def test_api_not_found_response_valid_message(): + response = APINotFoundResponse(message="Resource not found") + assert response.message == "Resource not found" + + # Test that the model is serializable to a dictionary + serialized_response = response.model_dump() + assert serialized_response == {"message": "Resource not found"} diff --git a/package/tests/test_api/test_rest/test_responses/test_metadata.py b/package/tests/test_api/test_rest/test_responses/test_metadata.py new file mode 100755 index 0000000000..c6e8dd6d12 --- /dev/null +++ b/package/tests/test_api/test_rest/test_responses/test_metadata.py @@ -0,0 +1,24 @@ +from kedro_viz.api.rest.responses.metadata import get_metadata_response +from kedro_viz.models.metadata import Metadata + + +class TestAppMetadata: + def test_get_metadata_response(self, mocker): + mock_get_compat = mocker.patch( + "kedro_viz.api.rest.responses.metadata.get_package_compatibilities", + return_value="mocked_compatibilities", + ) + mock_set_compat = mocker.patch( + "kedro_viz.api.rest.responses.metadata.Metadata.set_package_compatibilities" + ) + + response = get_metadata_response() + + # Assert get_package_compatibilities was called + mock_get_compat.assert_called_once() + + # Assert set_package_compatibilities was called with the mocked compatibilities + mock_set_compat.assert_called_once_with("mocked_compatibilities") + + # Assert the function returns the Metadata instance + assert isinstance(response, Metadata) diff --git a/package/tests/test_api/test_rest/test_responses/test_nodes.py b/package/tests/test_api/test_rest/test_responses/test_nodes.py new file mode 100644 index 0000000000..6ee2008826 --- /dev/null +++ b/package/tests/test_api/test_rest/test_responses/test_nodes.py @@ -0,0 +1,91 @@ +from pathlib import Path +from unittest import mock + +from fastapi.testclient import TestClient + +from kedro_viz.models.flowchart.nodes import TaskNode +from tests.test_api.test_rest.test_responses.assert_helpers import ( + assert_example_transcoded_data, +) + + +class TestTranscodedDataset: + """Test a viz API created from a Kedro project.""" + + def test_endpoint_main(self, example_transcoded_api): + client = TestClient(example_transcoded_api) + response = client.get("/api/main") + assert response.status_code == 200 + assert_example_transcoded_data(response.json()) + + def test_transcoded_data_node_metadata(self, example_transcoded_api): + client = TestClient(example_transcoded_api) + response = client.get("/api/nodes/0ecea0de") + assert response.json() == { + "filepath": "model_inputs.csv", + "original_type": "pandas.csv_dataset.CSVDataset", + "transcoded_types": [ + "pandas.parquet_dataset.ParquetDataset", + ], + "run_command": "kedro run --to-outputs=model_inputs@pandas2", + } + + +class TestNodeMetadataEndpoint: + def test_node_not_exist(self, client): + response = client.get("/api/nodes/foo") + assert response.status_code == 404 + + def test_task_node_metadata(self, client): + response = client.get("/api/nodes/782e4a43") + metadata = response.json() + assert ( + metadata["code"].replace(" ", "") + == "defprocess_data(raw_data,train_test_split):\npass\n" + ) + assert metadata["parameters"] == {"uk.data_processing.train_test_split": 0.1} + assert metadata["inputs"] == [ + "uk.data_processing.raw_data", + "params:uk.data_processing.train_test_split", + ] + assert metadata["outputs"] == ["model_inputs"] + assert ( + metadata["run_command"] + == "kedro run --to-nodes='uk.data_processing.process_data'" + ) + assert str(Path("package/tests/conftest.py")) in metadata["filepath"] + + def test_data_node_metadata(self, client): + response = client.get("/api/nodes/0ecea0de") + assert response.json() == { + "filepath": "model_inputs.csv", + "type": "pandas.csv_dataset.CSVDataset", + "preview_type": "TablePreview", + "run_command": "kedro run --to-outputs=model_inputs", + "stats": {"columns": 12, "rows": 29768}, + } + + def test_data_node_metadata_for_free_input(self, client): + response = client.get("/api/nodes/13399a82") + assert response.json() == { + "filepath": "raw_data.csv", + "preview_type": "TablePreview", + "type": "pandas.csv_dataset.CSVDataset", + } + + def test_parameters_node_metadata(self, client): + response = client.get("/api/nodes/f1f1425b") + assert response.json() == { + "parameters": {"train_test_split": 0.1, "num_epochs": 1000} + } + + def test_single_parameter_node_metadata(self, client): + response = client.get("/api/nodes/f0ebef01") + assert response.json() == { + "parameters": {"uk.data_processing.train_test_split": 0.1} + } + + def test_no_metadata(self, client): + with mock.patch.object(TaskNode, "has_metadata", return_value=False): + response = client.get("/api/nodes/782e4a43") + assert response.json() == {} diff --git a/package/tests/test_api/test_rest/test_responses/test_pipelines.py b/package/tests/test_api/test_rest/test_responses/test_pipelines.py new file mode 100755 index 0000000000..4b933e33e2 --- /dev/null +++ b/package/tests/test_api/test_rest/test_responses/test_pipelines.py @@ -0,0 +1,241 @@ +import json +from pathlib import Path + +from fastapi.testclient import TestClient + +from kedro_viz.api import apps +from kedro_viz.api.rest.responses.pipelines import get_kedro_project_json_data +from tests.test_api.test_rest.test_responses.assert_helpers import ( + assert_dict_list_equal, + assert_example_data, + assert_example_data_from_file, + assert_modular_pipelines_tree_equal, + assert_nodes_equal, +) + + +class TestMainEndpoint: + """Test a viz API created from a Kedro project.""" + + def test_endpoint_main(self, client, mocker, data_access_manager): + mocker.patch( + "kedro_viz.api.rest.responses.nodes.data_access_manager", + new=data_access_manager, + ) + response = client.get("/api/main") + assert_example_data(response.json()) + + def test_endpoint_main_no_default_pipeline(self, example_api_no_default_pipeline): + client = TestClient(example_api_no_default_pipeline) + response = client.get("/api/main") + assert len(response.json()["nodes"]) == 6 + assert len(response.json()["edges"]) == 9 + assert response.json()["pipelines"] == [ + {"id": "data_science", "name": "data_science"}, + {"id": "data_processing", "name": "data_processing"}, + ] + + def test_endpoint_main_for_edge_case_pipelines( + self, + example_api_for_edge_case_pipelines, + expected_modular_pipeline_tree_for_edge_cases, + ): + client = TestClient(example_api_for_edge_case_pipelines) + response = client.get("/api/main") + actual_modular_pipelines_tree = response.json()["modular_pipelines"] + assert_modular_pipelines_tree_equal( + actual_modular_pipelines_tree, expected_modular_pipeline_tree_for_edge_cases + ) + + def test_get_kedro_project_json_data(self, mocker): + expected_json_data = {"key": "value"} + encoded_response = json.dumps(expected_json_data).encode("utf-8") + + mock_get_default_response = mocker.patch( + "kedro_viz.api.rest.responses.pipelines.get_pipeline_response", + return_value={"key": "value"}, + ) + mock_get_encoded_response = mocker.patch( + "kedro_viz.api.rest.responses.pipelines.get_encoded_response", + return_value=encoded_response, + ) + + json_data = get_kedro_project_json_data() + + mock_get_default_response.assert_called_once() + mock_get_encoded_response.assert_called_once_with( + mock_get_default_response.return_value + ) + assert json_data == expected_json_data + + +class TestSinglePipelineEndpoint: + def test_get_pipeline(self, client): + response = client.get("/api/pipelines/data_science") + assert response.status_code == 200 + response_data = response.json() + expected_edges = [ + {"source": "f2b25286", "target": "d5a8b994"}, + {"source": "f1f1425b", "target": "uk.data_science"}, + {"source": "f1f1425b", "target": "f2b25286"}, + {"source": "uk.data_science", "target": "d5a8b994"}, + {"source": "uk", "target": "d5a8b994"}, + {"source": "0ecea0de", "target": "uk"}, + {"source": "0ecea0de", "target": "uk.data_science"}, + {"source": "f1f1425b", "target": "uk"}, + {"source": "0ecea0de", "target": "f2b25286"}, + ] + assert_dict_list_equal( + response_data.pop("edges"), expected_edges, sort_keys=("source", "target") + ) + expected_nodes = [ + { + "id": "0ecea0de", + "name": "model_inputs", + "tags": ["train", "split"], + "pipelines": ["__default__", "data_science", "data_processing"], + "modular_pipelines": ["uk.data_science", "uk.data_processing"], + "type": "data", + "layer": "model_inputs", + "dataset_type": "pandas.csv_dataset.CSVDataset", + "stats": {"columns": 12, "rows": 29768}, + }, + { + "id": "f2b25286", + "name": "train_model", + "tags": ["train"], + "pipelines": ["__default__", "data_science"], + "modular_pipelines": ["uk.data_science"], + "type": "task", + "parameters": { + "train_test_split": 0.1, + "num_epochs": 1000, + }, + }, + { + "id": "f1f1425b", + "name": "parameters", + "tags": ["train"], + "pipelines": ["__default__", "data_science"], + "modular_pipelines": None, + "type": "parameters", + "layer": None, + "dataset_type": None, + "stats": None, + }, + { + "id": "d5a8b994", + "name": "uk.data_science.model", + "tags": ["train"], + "pipelines": ["__default__", "data_science"], + "modular_pipelines": ["uk", "uk.data_science"], + "type": "data", + "layer": None, + "dataset_type": "io.memory_dataset.MemoryDataset", + "stats": None, + }, + { + "id": "uk", + "name": "uk", + "tags": ["train"], + "pipelines": ["data_science"], + "type": "modularPipeline", + "modular_pipelines": None, + "layer": None, + "dataset_type": None, + "stats": None, + }, + { + "id": "uk.data_science", + "name": "uk.data_science", + "tags": ["train"], + "pipelines": ["data_science"], + "type": "modularPipeline", + "modular_pipelines": None, + "layer": None, + "dataset_type": None, + "stats": None, + }, + ] + assert_nodes_equal(response_data.pop("nodes"), expected_nodes) + + expected_modular_pipelines = { + "__root__": { + "children": [ + {"id": "f1f1425b", "type": "parameters"}, + {"id": "0ecea0de", "type": "data"}, + {"id": "uk", "type": "modularPipeline"}, + {"id": "d5a8b994", "type": "data"}, + ], + "id": "__root__", + "inputs": [], + "name": "__root__", + "outputs": [], + }, + "uk": { + "children": [ + {"id": "uk.data_science", "type": "modularPipeline"}, + ], + "id": "uk", + "inputs": ["0ecea0de", "f1f1425b"], + "name": "uk", + "outputs": ["d5a8b994"], + }, + "uk.data_science": { + "children": [ + {"id": "f2b25286", "type": "task"}, + ], + "id": "uk.data_science", + "inputs": ["0ecea0de", "f1f1425b"], + "name": "uk.data_science", + "outputs": ["d5a8b994"], + }, + } + + assert_modular_pipelines_tree_equal( + response_data.pop("modular_pipelines"), + expected_modular_pipelines, + ) + + # Extract and sort the layers field + response_data_layers_sorted = sorted(response_data["layers"]) + expected_layers_sorted = sorted(["model_inputs", "raw"]) + assert response_data_layers_sorted == expected_layers_sorted + + # Remove the layers field from response_data for further comparison + response_data.pop("layers") + + # Expected response without the layers field + expected_response_without_layers = { + "tags": [ + {"id": "split", "name": "split"}, + {"id": "train", "name": "train"}, + ], + "pipelines": [ + {"id": "__default__", "name": "__default__"}, + {"id": "data_science", "name": "data_science"}, + {"id": "data_processing", "name": "data_processing"}, + ], + "selected_pipeline": "data_science", + } + assert response_data == expected_response_without_layers + + def test_get_non_existing_pipeline(self, client): + response = client.get("/api/pipelines/foo") + assert response.status_code == 404 + + +class TestAPIAppFromFile: + def test_api_app_from_json_file_main_api(self): + filepath = str(Path(__file__).parent.parent.parent) + api_app = apps.create_api_app_from_file(filepath) + client = TestClient(api_app) + response = client.get("/api/main") + assert_example_data_from_file(response.json()) + + def test_api_app_from_json_file_index(self): + filepath = str(Path(__file__).parent.parent.parent) + api_app = apps.create_api_app_from_file(filepath) + client = TestClient(api_app) + response = client.get("/") + assert response.status_code == 200 diff --git a/package/tests/test_api/test_rest/test_responses/test_save_responses.py b/package/tests/test_api/test_rest/test_responses/test_save_responses.py new file mode 100644 index 0000000000..828fe26269 --- /dev/null +++ b/package/tests/test_api/test_rest/test_responses/test_save_responses.py @@ -0,0 +1,168 @@ +from unittest import mock +from unittest.mock import Mock, call, patch + +import pytest + +from kedro_viz.api.rest.responses.save_responses import ( + save_api_main_response_to_fs, + save_api_node_response_to_fs, + save_api_pipeline_response_to_fs, + save_api_responses_to_fs, + write_api_response_to_fs, +) + + +class TestSaveAPIResponse: + @pytest.mark.parametrize( + "file_path, protocol, is_all_previews_enabled", + [ + ("s3://shareableviz", "s3", True), + ("abfs://shareableviz", "abfs", False), + ("shareableviz", "file", True), + ], + ) + def test_save_api_responses_to_fs( + self, file_path, protocol, is_all_previews_enabled, mocker + ): + mock_api_main_response_to_fs = mocker.patch( + "kedro_viz.api.rest.responses.save_responses.save_api_main_response_to_fs" + ) + mock_api_node_response_to_fs = mocker.patch( + "kedro_viz.api.rest.responses.save_responses.save_api_node_response_to_fs" + ) + mock_api_pipeline_response_to_fs = mocker.patch( + "kedro_viz.api.rest.responses.save_responses.save_api_pipeline_response_to_fs" + ) + + mock_filesystem = mocker.patch("fsspec.filesystem") + mock_filesystem.return_value.protocol = protocol + + save_api_responses_to_fs( + file_path, mock_filesystem.return_value, is_all_previews_enabled + ) + + mock_api_main_response_to_fs.assert_called_once_with( + f"{file_path}/api/main", mock_filesystem.return_value + ) + mock_api_node_response_to_fs.assert_called_once_with( + f"{file_path}/api/nodes", + mock_filesystem.return_value, + is_all_previews_enabled, + ) + mock_api_pipeline_response_to_fs.assert_called_once_with( + f"{file_path}/api/pipelines", mock_filesystem.return_value + ) + + def test_save_api_main_response_to_fs(self, mocker): + expected_default_response = {"test": "json"} + main_path = "/main" + + mock_get_default_response = mocker.patch( + "kedro_viz.api.rest.responses.save_responses.get_pipeline_response", + return_value=expected_default_response, + ) + mock_write_api_response_to_fs = mocker.patch( + "kedro_viz.api.rest.responses.save_responses.write_api_response_to_fs" + ) + + remote_fs = Mock() + + save_api_main_response_to_fs(main_path, remote_fs) + + mock_get_default_response.assert_called_once() + mock_write_api_response_to_fs.assert_called_once_with( + main_path, mock_get_default_response.return_value, remote_fs + ) + + def test_save_api_pipeline_response_to_fs(self, mocker): + pipelines_path = "/pipelines" + pipelineIds = ["01f456", "01f457"] + expected_selected_pipeline_response = {"test": "json"} + + mock_get_selected_pipeline_response = mocker.patch( + "kedro_viz.api.rest.responses.save_responses.get_pipeline_response", + return_value=expected_selected_pipeline_response, + ) + mock_write_api_response_to_fs = mocker.patch( + "kedro_viz.api.rest.responses.save_responses.write_api_response_to_fs" + ) + + mocker.patch( + "kedro_viz.api.rest.responses.save_responses.data_access_manager." + "registered_pipelines.get_pipeline_ids", + return_value=pipelineIds, + ) + + remote_fs = Mock() + + save_api_pipeline_response_to_fs(pipelines_path, remote_fs) + + assert mock_write_api_response_to_fs.call_count == len(pipelineIds) + assert mock_get_selected_pipeline_response.call_count == len(pipelineIds) + + expected_calls = [ + call( + f"{pipelines_path}/{pipelineId}", + mock_get_selected_pipeline_response.return_value, + remote_fs, + ) + for pipelineId in pipelineIds + ] + mock_write_api_response_to_fs.assert_has_calls(expected_calls, any_order=True) + + def test_save_api_node_response_to_fs(self, mocker): + nodes_path = "/nodes" + nodeIds = ["01f456", "01f457"] + expected_metadata_response = {"test": "json"} + + mock_get_node_metadata_response = mocker.patch( + "kedro_viz.api.rest.responses.save_responses.get_node_metadata_response", + return_value=expected_metadata_response, + ) + mock_write_api_response_to_fs = mocker.patch( + "kedro_viz.api.rest.responses.save_responses.write_api_response_to_fs" + ) + mocker.patch( + "kedro_viz.api.rest.responses.save_responses.data_access_manager.nodes.get_node_ids", + return_value=nodeIds, + ) + remote_fs = mock.Mock() + + save_api_node_response_to_fs(nodes_path, remote_fs, False) + + assert mock_write_api_response_to_fs.call_count == len(nodeIds) + assert mock_get_node_metadata_response.call_count == len(nodeIds) + + expected_calls = [ + mock.call( + f"{nodes_path}/{nodeId}", + mock_get_node_metadata_response.return_value, + remote_fs, + ) + for nodeId in nodeIds + ] + mock_write_api_response_to_fs.assert_has_calls(expected_calls, any_order=True) + + @pytest.mark.parametrize( + "file_path, response, encoded_response", + [ + ( + "test_output.json", + {"key1": "value1", "key2": "value2"}, + b'{"key1": "value1", "key2": "value2"}', + ), + ], + ) + def test_write_api_response_to_fs( + self, file_path, response, encoded_response, mocker + ): + mock_encode_to_human_readable = mocker.patch( + "kedro_viz.api.rest.responses.utils.EnhancedORJSONResponse.encode_to_human_readable", + return_value=encoded_response, + ) + with patch("fsspec.filesystem") as mock_filesystem: + mockremote_fs = mock_filesystem.return_value + mockremote_fs.open.return_value.__enter__.return_value = Mock() + write_api_response_to_fs(file_path, response, mockremote_fs) + mockremote_fs.open.assert_called_once_with(file_path, "wb") + mock_encode_to_human_readable.assert_called_once() diff --git a/package/tests/test_api/test_rest/test_responses/test_utils.py b/package/tests/test_api/test_rest/test_responses/test_utils.py new file mode 100644 index 0000000000..cad8607e2b --- /dev/null +++ b/package/tests/test_api/test_rest/test_responses/test_utils.py @@ -0,0 +1,43 @@ +import pytest + +from kedro_viz.api.rest.responses.utils import ( + EnhancedORJSONResponse, + get_encoded_response, +) + + +class TestEnhancedORJSONResponse: + @pytest.mark.parametrize( + "content, expected", + [ + ( + {"key1": "value1", "key2": "value2"}, + b'{\n "key1": "value1",\n "key2": "value2"\n}', + ), + (["item1", "item2"], b'[\n "item1",\n "item2"\n]'), + ], + ) + def test_encode_to_human_readable(self, content, expected): + result = EnhancedORJSONResponse.encode_to_human_readable(content) + assert result == expected + + +def test_get_encoded_response(mocker): + mock_jsonable_encoder = mocker.patch( + "kedro_viz.api.rest.responses.utils.jsonable_encoder" + ) + mock_encode_to_human_readable = mocker.patch( + "kedro_viz.api.rest.responses.utils.EnhancedORJSONResponse.encode_to_human_readable" + ) + + mock_response = {"key": "value"} + mock_jsonable_encoder.return_value = mock_response + mock_encoded_response = b"encoded-response" + mock_encode_to_human_readable.return_value = mock_encoded_response + + result = get_encoded_response(mock_response) + + # Assertions + mock_jsonable_encoder.assert_called_once_with(mock_response) + mock_encode_to_human_readable.assert_called_once_with(mock_response) + assert result == mock_encoded_response diff --git a/package/tests/test_api/test_rest/test_router.py b/package/tests/test_api/test_rest/test_router.py index d84f1ce0f2..523043d96d 100644 --- a/package/tests/test_api/test_rest/test_router.py +++ b/package/tests/test_api/test_rest/test_router.py @@ -21,7 +21,7 @@ def test_deploy_kedro_viz( client, platform, endpoint, bucket_name, is_all_previews_enabled, mocker ): mocker.patch( - "kedro_viz.api.rest.router.DeployerFactory.create_deployer", + "kedro_viz.integrations.deployment.deployer_factory.DeployerFactory.create_deployer", return_value=MockDeployer(platform, endpoint, bucket_name), ) response = client.post( diff --git a/package/tests/test_data_access/test_managers.py b/package/tests/test_data_access/test_managers.py index 66bd08f1e9..abb8df9be5 100644 --- a/package/tests/test_data_access/test_managers.py +++ b/package/tests/test_data_access/test_managers.py @@ -15,11 +15,11 @@ ModularPipelinesRepository, ) from kedro_viz.integrations.utils import UnavailableDataset -from kedro_viz.models.flowchart import ( +from kedro_viz.models.flowchart.edge import GraphEdge +from kedro_viz.models.flowchart.named_entities import Tag +from kedro_viz.models.flowchart.nodes import ( DataNode, - GraphEdge, ParametersNode, - Tag, TaskNode, TranscodedDataNode, ) diff --git a/package/tests/test_data_access/test_repositories/test_graph.py b/package/tests/test_data_access/test_repositories/test_graph.py index c45232ebd1..51f8684368 100644 --- a/package/tests/test_data_access/test_repositories/test_graph.py +++ b/package/tests/test_data_access/test_repositories/test_graph.py @@ -4,7 +4,8 @@ GraphEdgesRepository, GraphNodesRepository, ) -from kedro_viz.models.flowchart import GraphEdge, GraphNode +from kedro_viz.models.flowchart.edge import GraphEdge +from kedro_viz.models.flowchart.nodes import GraphNode class TestGraphNodeRepository: diff --git a/package/tests/test_data_access/test_repositories/test_modular_pipelines.py b/package/tests/test_data_access/test_repositories/test_modular_pipelines.py index 5b5a5e783b..ef6058ca8b 100644 --- a/package/tests/test_data_access/test_repositories/test_modular_pipelines.py +++ b/package/tests/test_data_access/test_repositories/test_modular_pipelines.py @@ -6,11 +6,8 @@ from kedro_viz.constants import ROOT_MODULAR_PIPELINE_ID from kedro_viz.data_access.repositories import ModularPipelinesRepository -from kedro_viz.models.flowchart import ( - GraphNodeType, - ModularPipelineChild, - ModularPipelineNode, -) +from kedro_viz.models.flowchart.model_utils import GraphNodeType +from kedro_viz.models.flowchart.nodes import ModularPipelineChild, ModularPipelineNode @pytest.fixture diff --git a/package/tests/test_import.py b/package/tests/test_import.py index e67a60c380..e9e918c6c1 100644 --- a/package/tests/test_import.py +++ b/package/tests/test_import.py @@ -12,8 +12,7 @@ def test_import_kedro_viz_with_no_official_support_emits_warning(mocker): kedro_viz.__loader__.exec_module(kedro_viz) assert len(record) == 1 - assert ( - """Please be advised that Kedro Viz is not yet fully - compatible with the Python version you are currently using.""" - in record[0].message.args[0] - ) + assert """Please be advised that Kedro Viz is not yet fully + compatible with the Python version you are currently using.""" in record[ + 0 + ].message.args[0] diff --git a/package/tests/test_integrations/test_sqlite_store.py b/package/tests/test_integrations/test_sqlite_store.py index ec14c68730..4f0cb6a00b 100644 --- a/package/tests/test_integrations/test_sqlite_store.py +++ b/package/tests/test_integrations/test_sqlite_store.py @@ -1,6 +1,3 @@ -# We need to disable pylint because of this issue - -# https://github.com/pylint-dev/pylint/issues/8138 -# pylint: disable=E1102 import json import os from pathlib import Path diff --git a/package/tests/test_models/test_flowchart/__init__.py b/package/tests/test_models/test_flowchart/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/package/tests/test_models/test_flowchart.py b/package/tests/test_models/test_flowchart/test_node_metadata.py similarity index 55% rename from package/tests/test_models/test_flowchart.py rename to package/tests/test_models/test_flowchart/test_node_metadata.py index 01238f286d..f8ebd4f8ec 100644 --- a/package/tests/test_models/test_flowchart.py +++ b/package/tests/test_models/test_flowchart/test_node_metadata.py @@ -1,7 +1,6 @@ from functools import partial from pathlib import Path from textwrap import dedent -from unittest.mock import call, patch import pytest from kedro.io import MemoryDataset @@ -9,18 +8,13 @@ from kedro_datasets.pandas import CSVDataset, ParquetDataset from kedro_datasets.partitions.partitioned_dataset import PartitionedDataset -from kedro_viz.models.flowchart import ( - DataNode, +from kedro_viz.models.flowchart.node_metadata import ( DataNodeMetadata, - GraphNode, - ParametersNode, ParametersNodeMetadata, - RegisteredPipeline, - TaskNode, TaskNodeMetadata, - TranscodedDataNode, TranscodedDataNodeMetadata, ) +from kedro_viz.models.flowchart.nodes import GraphNode def identity(x): @@ -56,264 +50,6 @@ def full_func(a, b, c, x): partial_func = partial(full_func, 3, 1, 4) -class TestGraphNodeCreation: - @pytest.mark.parametrize( - "namespace,expected_modular_pipelines", - [ - (None, set()), - ( - "uk.data_science.model_training", - set( - [ - "uk", - "uk.data_science", - "uk.data_science.model_training", - ] - ), - ), - ], - ) - def test_create_task_node(self, namespace, expected_modular_pipelines): - kedro_node = node( - identity, - inputs="x", - outputs="y", - name="identity_node", - tags={"tag"}, - namespace=namespace, - ) - task_node = GraphNode.create_task_node( - kedro_node, "identity_node", expected_modular_pipelines - ) - assert isinstance(task_node, TaskNode) - assert task_node.kedro_obj is kedro_node - assert task_node.name == "identity_node" - assert task_node.tags == {"tag"} - assert task_node.pipelines == set() - assert task_node.modular_pipelines == expected_modular_pipelines - assert task_node.namespace == namespace - - @pytest.mark.parametrize( - "dataset_name, expected_modular_pipelines", - [ - ("dataset", set()), - ( - "uk.data_science.model_training.dataset", - set( - [ - "uk", - "uk.data_science", - "uk.data_science.model_training", - ] - ), - ), - ], - ) - def test_create_data_node(self, dataset_name, expected_modular_pipelines): - kedro_dataset = CSVDataset(filepath="foo.csv") - data_node = GraphNode.create_data_node( - dataset_id=dataset_name, - dataset_name=dataset_name, - layer="raw", - tags=set(), - dataset=kedro_dataset, - stats={"rows": 10, "columns": 5, "file_size": 1024}, - modular_pipelines=set(expected_modular_pipelines), - ) - assert isinstance(data_node, DataNode) - assert data_node.kedro_obj is kedro_dataset - assert data_node.id == dataset_name - assert data_node.name == dataset_name - assert data_node.layer == "raw" - assert data_node.tags == set() - assert data_node.pipelines == set() - assert data_node.modular_pipelines == expected_modular_pipelines - assert data_node.stats["rows"] == 10 - assert data_node.stats["columns"] == 5 - assert data_node.stats["file_size"] == 1024 - - @pytest.mark.parametrize( - "transcoded_dataset_name, original_name", - [ - ("dataset@pandas2", "dataset"), - ( - "uk.data_science.model_training.dataset@pandas2", - "uk.data_science.model_training.dataset", - ), - ], - ) - def test_create_transcoded_data_node(self, transcoded_dataset_name, original_name): - kedro_dataset = CSVDataset(filepath="foo.csv") - data_node = GraphNode.create_data_node( - dataset_id=original_name, - dataset_name=transcoded_dataset_name, - layer="raw", - tags=set(), - dataset=kedro_dataset, - stats={"rows": 10, "columns": 2, "file_size": 1048}, - modular_pipelines=set(), - ) - assert isinstance(data_node, TranscodedDataNode) - assert data_node.id == original_name - assert data_node.name == original_name - assert data_node.layer == "raw" - assert data_node.tags == set() - assert data_node.pipelines == set() - assert data_node.stats["rows"] == 10 - assert data_node.stats["columns"] == 2 - assert data_node.stats["file_size"] == 1048 - - def test_create_parameters_all_parameters(self): - parameters_dataset = MemoryDataset( - data={"test_split_ratio": 0.3, "num_epochs": 1000} - ) - parameters_node = GraphNode.create_parameters_node( - dataset_id="parameters", - dataset_name="parameters", - layer=None, - tags=set(), - parameters=parameters_dataset, - modular_pipelines=set(), - ) - assert isinstance(parameters_node, ParametersNode) - assert parameters_node.kedro_obj is parameters_dataset - assert parameters_node.id == "parameters" - assert parameters_node.is_all_parameters() - assert not parameters_node.is_single_parameter() - assert parameters_node.parameter_value == { - "test_split_ratio": 0.3, - "num_epochs": 1000, - } - assert not parameters_node.modular_pipelines - - @pytest.mark.parametrize( - "dataset_name,expected_modular_pipelines", - [ - ("params:test_split_ratio", set()), - ( - "params:uk.data_science.model_training.test_split_ratio", - set(["uk", "uk.data_science", "uk.data_science.model_training"]), - ), - ], - ) - def test_create_parameters_node_single_parameter( - self, dataset_name, expected_modular_pipelines - ): - parameters_dataset = MemoryDataset(data=0.3) - parameters_node = GraphNode.create_parameters_node( - dataset_id=dataset_name, - dataset_name=dataset_name, - layer=None, - tags=set(), - parameters=parameters_dataset, - modular_pipelines=expected_modular_pipelines, - ) - assert isinstance(parameters_node, ParametersNode) - assert parameters_node.kedro_obj is parameters_dataset - assert not parameters_node.is_all_parameters() - assert parameters_node.is_single_parameter() - assert parameters_node.parameter_value == 0.3 - assert parameters_node.modular_pipelines == expected_modular_pipelines - - def test_create_single_parameter_with_complex_type(self): - parameters_dataset = MemoryDataset(data=object()) - parameters_node = GraphNode.create_parameters_node( - dataset_id="params:test_split_ratio", - dataset_name="params:test_split_ratio", - layer=None, - tags=set(), - parameters=parameters_dataset, - modular_pipelines=set(), - ) - assert isinstance(parameters_node, ParametersNode) - assert parameters_node.kedro_obj is parameters_dataset - assert not parameters_node.is_all_parameters() - assert parameters_node.is_single_parameter() - assert isinstance(parameters_node.parameter_value, str) - - def test_create_all_parameters_with_complex_type(self): - mock_object = object() - parameters_dataset = MemoryDataset( - data={ - "test_split_ratio": 0.3, - "num_epochs": 1000, - "complex_param": mock_object, - } - ) - parameters_node = GraphNode.create_parameters_node( - dataset_id="parameters", - dataset_name="parameters", - layer=None, - tags=set(), - parameters=parameters_dataset, - modular_pipelines=set(), - ) - assert isinstance(parameters_node, ParametersNode) - assert parameters_node.kedro_obj is parameters_dataset - assert parameters_node.id == "parameters" - assert parameters_node.is_all_parameters() - assert not parameters_node.is_single_parameter() - assert isinstance(parameters_node.parameter_value, str) - - def test_create_non_existing_parameter_node(self): - """Test the case where ``parameters`` is equal to None""" - parameters_node = GraphNode.create_parameters_node( - dataset_id="non_existing", - dataset_name="non_existing", - layer=None, - tags=set(), - parameters=None, - modular_pipelines=set(), - ) - assert isinstance(parameters_node, ParametersNode) - assert parameters_node.parameter_value is None - - @patch("logging.Logger.warning") - def test_create_non_existing_parameter_node_empty_dataset(self, patched_warning): - """Test the case where ``parameters`` is equal to a MemoryDataset with no data""" - parameters_dataset = MemoryDataset() - parameters_node = GraphNode.create_parameters_node( - dataset_id="non_existing", - dataset_name="non_existing", - layer=None, - tags=set(), - parameters=parameters_dataset, - modular_pipelines=set(), - ) - assert parameters_node.parameter_value is None - patched_warning.assert_has_calls( - [call("Cannot find parameter `%s` in the catalog.", "non_existing")] - ) - - -class TestGraphNodePipelines: - def test_registered_pipeline_name(self): - pipeline = RegisteredPipeline(id="__default__") - assert pipeline.name == "__default__" - - def test_modular_pipeline_name(self): - pipeline = GraphNode.create_modular_pipeline_node("data_engineering") - assert pipeline.name == "data_engineering" - - def test_add_node_to_pipeline(self): - default_pipeline = RegisteredPipeline(id="__default__") - another_pipeline = RegisteredPipeline(id="testing") - kedro_dataset = CSVDataset(filepath="foo.csv") - data_node = GraphNode.create_data_node( - dataset_id="dataset@transcoded", - dataset_name="dataset@transcoded", - layer="raw", - tags=set(), - dataset=kedro_dataset, - stats={"rows": 10, "columns": 2, "file_size": 1048}, - modular_pipelines=set(), - ) - assert data_node.pipelines == set() - data_node.add_pipeline(default_pipeline.id) - assert data_node.belongs_to_pipeline(default_pipeline.id) - assert not data_node.belongs_to_pipeline(another_pipeline.id) - - class TestGraphNodeMetadata: @pytest.mark.parametrize( "dataset,has_metadata", [(MemoryDataset(data=1), True), (None, False)] diff --git a/package/tests/test_models/test_flowchart/test_nodes.py b/package/tests/test_models/test_flowchart/test_nodes.py new file mode 100644 index 0000000000..2d7a59d338 --- /dev/null +++ b/package/tests/test_models/test_flowchart/test_nodes.py @@ -0,0 +1,248 @@ +from unittest.mock import call, patch + +import pytest +from kedro.io import MemoryDataset +from kedro.pipeline.node import node +from kedro_datasets.pandas import CSVDataset + +from kedro_viz.models.flowchart.nodes import ( + DataNode, + GraphNode, + ParametersNode, + TaskNode, + TranscodedDataNode, +) + + +def identity(x): + return x + + +class TestGraphNodeCreation: + @pytest.mark.parametrize( + "namespace,expected_modular_pipelines", + [ + (None, set()), + ( + "uk.data_science.model_training", + set( + [ + "uk", + "uk.data_science", + "uk.data_science.model_training", + ] + ), + ), + ], + ) + def test_create_task_node(self, namespace, expected_modular_pipelines): + kedro_node = node( + identity, + inputs="x", + outputs="y", + name="identity_node", + tags={"tag"}, + namespace=namespace, + ) + task_node = GraphNode.create_task_node( + kedro_node, "identity_node", expected_modular_pipelines + ) + assert isinstance(task_node, TaskNode) + assert task_node.kedro_obj is kedro_node + assert task_node.name == "identity_node" + assert task_node.tags == {"tag"} + assert task_node.pipelines == set() + assert task_node.modular_pipelines == expected_modular_pipelines + assert task_node.namespace == namespace + + @pytest.mark.parametrize( + "dataset_name, expected_modular_pipelines", + [ + ("dataset", set()), + ( + "uk.data_science.model_training.dataset", + set( + [ + "uk", + "uk.data_science", + "uk.data_science.model_training", + ] + ), + ), + ], + ) + def test_create_data_node(self, dataset_name, expected_modular_pipelines): + kedro_dataset = CSVDataset(filepath="foo.csv") + data_node = GraphNode.create_data_node( + dataset_id=dataset_name, + dataset_name=dataset_name, + layer="raw", + tags=set(), + dataset=kedro_dataset, + stats={"rows": 10, "columns": 5, "file_size": 1024}, + modular_pipelines=set(expected_modular_pipelines), + ) + assert isinstance(data_node, DataNode) + assert data_node.kedro_obj is kedro_dataset + assert data_node.id == dataset_name + assert data_node.name == dataset_name + assert data_node.layer == "raw" + assert data_node.tags == set() + assert data_node.pipelines == set() + assert data_node.modular_pipelines == expected_modular_pipelines + assert data_node.stats["rows"] == 10 + assert data_node.stats["columns"] == 5 + assert data_node.stats["file_size"] == 1024 + + @pytest.mark.parametrize( + "transcoded_dataset_name, original_name", + [ + ("dataset@pandas2", "dataset"), + ( + "uk.data_science.model_training.dataset@pandas2", + "uk.data_science.model_training.dataset", + ), + ], + ) + def test_create_transcoded_data_node(self, transcoded_dataset_name, original_name): + kedro_dataset = CSVDataset(filepath="foo.csv") + data_node = GraphNode.create_data_node( + dataset_id=original_name, + dataset_name=transcoded_dataset_name, + layer="raw", + tags=set(), + dataset=kedro_dataset, + stats={"rows": 10, "columns": 2, "file_size": 1048}, + modular_pipelines=set(), + ) + assert isinstance(data_node, TranscodedDataNode) + assert data_node.id == original_name + assert data_node.name == original_name + assert data_node.layer == "raw" + assert data_node.tags == set() + assert data_node.pipelines == set() + assert data_node.stats["rows"] == 10 + assert data_node.stats["columns"] == 2 + assert data_node.stats["file_size"] == 1048 + + def test_create_parameters_all_parameters(self): + parameters_dataset = MemoryDataset( + data={"test_split_ratio": 0.3, "num_epochs": 1000} + ) + parameters_node = GraphNode.create_parameters_node( + dataset_id="parameters", + dataset_name="parameters", + layer=None, + tags=set(), + parameters=parameters_dataset, + modular_pipelines=set(), + ) + assert isinstance(parameters_node, ParametersNode) + assert parameters_node.kedro_obj is parameters_dataset + assert parameters_node.id == "parameters" + assert parameters_node.is_all_parameters() + assert not parameters_node.is_single_parameter() + assert parameters_node.parameter_value == { + "test_split_ratio": 0.3, + "num_epochs": 1000, + } + assert not parameters_node.modular_pipelines + + @pytest.mark.parametrize( + "dataset_name,expected_modular_pipelines", + [ + ("params:test_split_ratio", set()), + ( + "params:uk.data_science.model_training.test_split_ratio", + set(["uk", "uk.data_science", "uk.data_science.model_training"]), + ), + ], + ) + def test_create_parameters_node_single_parameter( + self, dataset_name, expected_modular_pipelines + ): + parameters_dataset = MemoryDataset(data=0.3) + parameters_node = GraphNode.create_parameters_node( + dataset_id=dataset_name, + dataset_name=dataset_name, + layer=None, + tags=set(), + parameters=parameters_dataset, + modular_pipelines=expected_modular_pipelines, + ) + assert isinstance(parameters_node, ParametersNode) + assert parameters_node.kedro_obj is parameters_dataset + assert not parameters_node.is_all_parameters() + assert parameters_node.is_single_parameter() + assert parameters_node.parameter_value == 0.3 + assert parameters_node.modular_pipelines == expected_modular_pipelines + + def test_create_single_parameter_with_complex_type(self): + parameters_dataset = MemoryDataset(data=object()) + parameters_node = GraphNode.create_parameters_node( + dataset_id="params:test_split_ratio", + dataset_name="params:test_split_ratio", + layer=None, + tags=set(), + parameters=parameters_dataset, + modular_pipelines=set(), + ) + assert isinstance(parameters_node, ParametersNode) + assert parameters_node.kedro_obj is parameters_dataset + assert not parameters_node.is_all_parameters() + assert parameters_node.is_single_parameter() + assert isinstance(parameters_node.parameter_value, str) + + def test_create_all_parameters_with_complex_type(self): + mock_object = object() + parameters_dataset = MemoryDataset( + data={ + "test_split_ratio": 0.3, + "num_epochs": 1000, + "complex_param": mock_object, + } + ) + parameters_node = GraphNode.create_parameters_node( + dataset_id="parameters", + dataset_name="parameters", + layer=None, + tags=set(), + parameters=parameters_dataset, + modular_pipelines=set(), + ) + assert isinstance(parameters_node, ParametersNode) + assert parameters_node.kedro_obj is parameters_dataset + assert parameters_node.id == "parameters" + assert parameters_node.is_all_parameters() + assert not parameters_node.is_single_parameter() + assert isinstance(parameters_node.parameter_value, str) + + def test_create_non_existing_parameter_node(self): + """Test the case where ``parameters`` is equal to None""" + parameters_node = GraphNode.create_parameters_node( + dataset_id="non_existing", + dataset_name="non_existing", + layer=None, + tags=set(), + parameters=None, + modular_pipelines=set(), + ) + assert isinstance(parameters_node, ParametersNode) + assert parameters_node.parameter_value is None + + @patch("logging.Logger.warning") + def test_create_non_existing_parameter_node_empty_dataset(self, patched_warning): + """Test the case where ``parameters`` is equal to a MemoryDataset with no data""" + parameters_dataset = MemoryDataset() + parameters_node = GraphNode.create_parameters_node( + dataset_id="non_existing", + dataset_name="non_existing", + layer=None, + tags=set(), + parameters=parameters_dataset, + modular_pipelines=set(), + ) + assert parameters_node.parameter_value is None + patched_warning.assert_has_calls( + [call("Cannot find parameter `%s` in the catalog.", "non_existing")] + ) diff --git a/package/tests/test_models/test_flowchart/test_pipeline.py b/package/tests/test_models/test_flowchart/test_pipeline.py new file mode 100644 index 0000000000..520aff01d9 --- /dev/null +++ b/package/tests/test_models/test_flowchart/test_pipeline.py @@ -0,0 +1,32 @@ +from kedro_datasets.pandas import CSVDataset + +from kedro_viz.models.flowchart.named_entities import RegisteredPipeline +from kedro_viz.models.flowchart.nodes import GraphNode + + +class TestGraphNodePipelines: + def test_registered_pipeline_name(self): + pipeline = RegisteredPipeline(id="__default__") + assert pipeline.name == "__default__" + + def test_modular_pipeline_name(self): + pipeline = GraphNode.create_modular_pipeline_node("data_engineering") + assert pipeline.name == "data_engineering" + + def test_add_node_to_pipeline(self): + default_pipeline = RegisteredPipeline(id="__default__") + another_pipeline = RegisteredPipeline(id="testing") + kedro_dataset = CSVDataset(filepath="foo.csv") + data_node = GraphNode.create_data_node( + dataset_id="dataset@transcoded", + dataset_name="dataset@transcoded", + layer="raw", + tags=set(), + dataset=kedro_dataset, + stats={"rows": 10, "columns": 2, "file_size": 1048}, + modular_pipelines=set(), + ) + assert data_node.pipelines == set() + data_node.add_pipeline(default_pipeline.id) + assert data_node.belongs_to_pipeline(default_pipeline.id) + assert not data_node.belongs_to_pipeline(another_pipeline.id) diff --git a/package/tests/test_server.py b/package/tests/test_server.py index 33fe6f2e1b..2169e9d4da 100644 --- a/package/tests/test_server.py +++ b/package/tests/test_server.py @@ -151,7 +151,7 @@ def test_load_file( def test_save_file(self, tmp_path, mocker): mock_filesystem = mocker.patch("fsspec.filesystem") save_api_responses_to_fs_mock = mocker.patch( - "kedro_viz.server.save_api_responses_to_fs" + "kedro_viz.api.rest.responses.save_responses.save_api_responses_to_fs" ) save_file = tmp_path / "save.json" run_server(save_file=save_file) diff --git a/package/tests/test_services/test_layers.py b/package/tests/test_services/test_layers.py index 80d76fae5a..c949a9f98b 100644 --- a/package/tests/test_services/test_layers.py +++ b/package/tests/test_services/test_layers.py @@ -1,6 +1,6 @@ import pytest -from kedro_viz.models.flowchart import GraphNode +from kedro_viz.models.flowchart.nodes import GraphNode from kedro_viz.services.layers import sort_layers diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000000..166d54a4a7 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,86 @@ +target-version = "py39" + +include = [ + "package/kedro_viz/*.py", + "package/tests/*.py", + "package/features/*.py", + "demo-project/*.py", +] + +[lint] +select = [ + "I", # Isort + "B", # Bugbear + "BLE", # Blind exceptions + "PL", # Pylint + "C90", # Mccabe complexity + "E", # Pycodestyle errors + "F", # Pyflakes + "W", # Pycodestyle warnings + "N", # PEP8-compliant object names + "SLF", # Private members access + "D101", # Class docstrings + "D103", # Function docstrings + "ARG", # Unused arguments + "T10", # Debug statements +] +ignore = [ + "E203", + "E231", + "E266", + "E501", + "F401", + "B030", # Except handler is something other than exception class + "C405", # Inconsistent definition of literal collections + "PLR2004", # Magic values in comparisons +] + +[lint.per-file-ignores] +"*/cli_steps.py" = ["B011"] # assert False instead of AssertionError +"*/base_deployer.py" = ["B024"] # ABCs without abstract methods +"package/kedro_viz/__init__.py" = ["B028"] # Risky usage of positional arguments +"package/tests/test_integrations/test_sqlite_store.py" = ["C401"] # Unnecessary generators +"package/kedro_viz/data_access/repositories/tags.py" = ["C413", "D101", "D103"] +"package/kedro_viz/data_access/repositories/catalog.py" = ["PLW2901", "SLF", "D"] +"package/features/steps/sh_run.py" = ["PLW1510"] # `subprocess.run` without explicit `check` argument +"*/tests/*.py" = ["SLF", "D", "ARG"] +"package/kedro_viz/models/experiment_tracking.py" = ["SLF"] +"package/kedro_viz/models/flowchart/nodes.py" = ["SLF"] +"package/kedro_viz/models/flowchart/node_metadata.py" = ["SLF"] +"package/kedro_viz/integrations/kedro/hooks.py" = ["SLF", "BLE"] +"package/kedro_viz/integrations/kedro/sqlite_store.py" = ["BLE"] +"package/kedro_viz/integrations/kedro/data_loader.py" = ["SLF"] +"package/kedro_viz/data_access/managers.py" = ["SLF"] +"package/kedro_viz/data_access/repositories/tracking_datasets.py" = ["SLF", "D"] +"package/kedro_viz/launchers/cli/main.py" = ["D"] +"package/kedro_viz/api/rest/router.py" = ["D"] +"package/features/steps/cli_steps.py" = ["D"] +"package/features/environment.py" = ["D"] +"package/kedro_viz/api/graphql/schema.py" = ["D"] +"package/kedro_viz/data_access/repositories/registered_pipelines.py" = ["D"] +"package/kedro_viz/api/rest/responses.py" = ["D"] +"package/kedro_viz/api/graphql/types.py" = ["D"] +"package/kedro_viz/data_access/repositories/graph.py" = ["D"] +"package/kedro_viz/data_access/repositories/runs.py" = ["D"] +"demo-project/*.py" = ["D", "ARG", "PLW0603"] # Allow unused arguments in node functions for them to generate constant outputs, but mimic the DAG and technically have inputs. + +[lint.mccabe] +max-complexity = 18 + +[lint.pylint] +max-args = 12 + +[lint.pep8-naming] +extend-ignore-names = [ + "ServiceRequestError", + "mock_DeployerFactory", + "Session", + "WaitForException", + "displayHTML", + "nodeId*", + "pipelineId*", + "*_None_*", + "X_test", + "X_train", + "X", +] diff --git a/src/selectors/tags.js b/src/selectors/tags.js index d2a55869d7..7f040c7c4d 100644 --- a/src/selectors/tags.js +++ b/src/selectors/tags.js @@ -1,5 +1,6 @@ import { createSelector } from 'reselect'; import { getPipelineTagIDs } from './pipeline'; +import { prettifyName } from '../utils'; const getNodeTags = (state) => state.node.tags; const getTagName = (state) => state.tag.name; @@ -14,7 +15,7 @@ export const getTagData = createSelector( (tagIDs, tagName, tagActive, tagEnabled) => tagIDs.sort().map((id) => ({ id, - name: tagName[id], + name: tagName[id] || prettifyName(id), active: Boolean(tagActive[id]), enabled: Boolean(tagEnabled[id]), })) diff --git a/src/utils/index.js b/src/utils/index.js index 2b3e1b9061..83804e0a6c 100644 --- a/src/utils/index.js +++ b/src/utils/index.js @@ -121,6 +121,9 @@ export const stripNamespace = (str) => { * @returns {String} The string with or without replaced values */ export const prettifyName = (str) => { + if (!str) { + return ''; + } const replacedString = str .replace(/-/g, ' ') .replace(/_/g, ' ')