From b52df064fb9be42227d1d4efd3c2e9187807ccd7 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 25 Jan 2021 08:46:24 +0000 Subject: [PATCH 1/3] add fugue_notebook --- Makefile | 8 ++ Untitled.ipynb | 58 ++++++++++++++ fugue_notebook/__init__.py | 0 fugue_notebook/env.py | 129 +++++++++++++++++++++++++++++++ fugue_notebook/jupyter_config.py | 4 + setup.py | 6 +- 6 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 Untitled.ipynb create mode 100644 fugue_notebook/__init__.py create mode 100644 fugue_notebook/env.py create mode 100644 fugue_notebook/jupyter_config.py diff --git a/Makefile b/Makefile index ea2d184..228f4dc 100644 --- a/Makefile +++ b/Makefile @@ -24,3 +24,11 @@ package: test: python3 -bb -m pytest tests/ + +jupyter: + pip install . + rm -rf /root/.jupyter/jupyter_config.py + rm -rf /root/.ipython/profile_default/startup/ + mkdir -p /root/.ipython/profile_default/startup/ + cp fugue_notebook/jupyter_config.py /root/.ipython/profile_default/startup/ + jupyter notebook --port=8888 --ip=0.0.0.0 --no-browser --allow-root --NotebookApp.token='' --NotebookApp.password='' diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..cf015b7 --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,58 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "alive-injury", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ArrayDataFrame\n", + "a:int\n", + "-----\n", + "0 \n", + "Total count: 1\n", + "\n" + ] + } + ], + "source": [ + "%%fsql native {\"x\":2, \"fugue.pre\":10}\n", + "CREATE [[0]] SCHEMA a:int\n", + "PRINT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "close-louisiana", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/fugue_notebook/__init__.py b/fugue_notebook/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fugue_notebook/env.py b/fugue_notebook/env.py new file mode 100644 index 0000000..c441c02 --- /dev/null +++ b/fugue_notebook/env.py @@ -0,0 +1,129 @@ +# flake8: noqa +import html +import json +from typing import Any, List, Dict + +import fugue_sql +import pandas as pd +from fugue import ( + NativeExecutionEngine, + make_execution_engine, + register_execution_engine, +) +from fugue.extensions._builtins.outputters import Show +from IPython.core.magic import register_cell_magic +from IPython.display import HTML, Javascript, display +from triad import Schema, ParamDict +from triad.utils.convert import get_caller_global_local_vars + + +_HIGHLIGHT_JS = r""" +require(["codemirror/lib/codemirror"]); +function set(str) { + var obj = {}, words = str.split(" "); + for (var i = 0; i < words.length; ++i) obj[words[i]] = true; + return obj; + } +var fugue_keywords = "fill hash rand even presort persist broadcast params process output outtransform rowcount concurrency prepartition zip print title save append parquet csv json single checkpoint weak strong deterministic yield connect sample seed"; +CodeMirror.defineMIME("text/x-mssql", { + name: "sql", + keywords: set(fugue_keywords + " add after all alter analyze and anti archive array as asc at between bucket buckets by cache cascade case cast change clear cluster clustered codegen collection column columns comment commit compact compactions compute concatenate cost create cross cube current current_date current_timestamp database databases datata dbproperties defined delete delimited deny desc describe dfs directories distinct distribute drop else end escaped except exchange exists explain export extended external false fields fileformat first following for format formatted from full function functions global grant group grouping having if ignore import in index indexes inner inpath inputformat insert intersect interval into is items join keys last lateral lazy left like limit lines list load local location lock locks logical macro map minus msck natural no not null nulls of on optimize option options or order out outer outputformat over overwrite partition partitioned partitions percent preceding principals purge range recordreader recordwriter recover reduce refresh regexp rename repair replace reset restrict revoke right rlike role roles rollback rollup row rows schema schemas select semi separated serde serdeproperties set sets show skewed sort sorted start statistics stored stratify struct table tables tablesample tblproperties temp temporary terminated then to touch transaction transactions transform true truncate unarchive unbounded uncache union unlock unset use using values view when where window with"), + builtin: set("tinyint smallint int bigint boolean float double string binary timestamp decimal array map struct uniontype delimited serde sequencefile textfile rcfile inputformat outputformat"), + atoms: set("false true null unknown"), + operatorChars: /^[*\/+\-%<>!=&|^\/#@?~]/, + dateSQL: set("datetime date time timestamp"), + support: set("ODBCdotTable doubleQuote binaryNumber hexNumber commentSlashSlash commentHash") + }); +require(['notebook/js/codecell'], function(codecell) { + codecell.CodeCell.options_default.highlight_modes['magic_text/x-mssql'] = {'reg':[/%%fsql/]} ; + Jupyter.notebook.events.one('kernel_ready.Kernel', function(){ + Jupyter.notebook.get_cells().map(function(cell){ + if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ; + }); + }); +""" + +_FUGUE_NOTEBOOK_PRE_CONF = ParamDict() +_FUGUE_NOTEBOOK_POST_CONF = ParamDict() + + +@register_cell_magic("fsql") +def fsql(line: str, cell: str) -> None: + _, lc = get_caller_global_local_vars(start=-2, end=-2) + line = line.strip() + p = line.find("{") + if p >= 0: + engine = line[:p].strip() + conf = json.loads(line[p:]) + else: + parts = line.split(" ", 1) + engine = parts[0] + conf = ParamDict(None if len(parts) == 1 else lc[parts[1]]) + cf = dict(_FUGUE_NOTEBOOK_PRE_CONF) + cf.update(conf) + for k, v in _FUGUE_NOTEBOOK_POST_CONF.items(): + if k in cf and cf[k] != v: + raise ValueError( + f"{k} must be {v}, but you set to {cf[k]}, you may unset it" + ) + cf[k] = v + cf.update(_FUGUE_NOTEBOOK_POST_CONF) + fugue_sql.fsql(cell).run(make_execution_engine(engine, cf)) + + +class NotebookSetup(object): + def get_pre_conf(self) -> Dict[str, Any]: + return {"fugue.pre": 1} + + def get_post_conf(self) -> Dict[str, Any]: + return {"fugue.post": 2} + + def pretty_print( + self, + schema: Schema, + head_rows: List[List[Any]], + title: Any, + rows: int, + count: int, + ): + components: List[Any] = [] + if title is not None: + components.append(HTML(f"

{html.escape(title)}

")) + pdf = pd.DataFrame(head_rows, columns=list(schema.names)) + components.append(pdf) + if count >= 0: + components.append(HTML(f"total count: {count}")) + components.append(HTML(f"schema: {schema}")) + display(*components) + + def register_execution_engines(self): + register_execution_engine( + "native", lambda conf, **kwargs: NativeExecutionEngine(conf=conf) + ) + try: + import pyspark # noqa: F401 + from fugue_spark import SparkExecutionEngine + + register_execution_engine( + "spark", lambda conf, **kwargs: SparkExecutionEngine(conf=conf) + ) + except ImportError: + pass + try: + import dask.dataframe # noqa: F401 + from fugue_dask import DaskExecutionEngine + + register_execution_engine( + "dask", lambda conf, **kwargs: DaskExecutionEngine(conf=conf) + ) + except ImportError: + pass + + def run(self) -> Any: + _FUGUE_NOTEBOOK_PRE_CONF.clear() + _FUGUE_NOTEBOOK_PRE_CONF.update(self.get_pre_conf()) + _FUGUE_NOTEBOOK_POST_CONF.clear() + _FUGUE_NOTEBOOK_POST_CONF.update(self.get_post_conf()) + self.register_execution_engines() + Show.set_hook(self.pretty_print) + return Javascript(_HIGHLIGHT_JS) diff --git a/fugue_notebook/jupyter_config.py b/fugue_notebook/jupyter_config.py new file mode 100644 index 0000000..b04cfbe --- /dev/null +++ b/fugue_notebook/jupyter_config.py @@ -0,0 +1,4 @@ +from fugue_notebook.env import NotebookSetup + +_FUGUE_NOTEBOOK_SETUP = NotebookSetup() +_FUGUE_NOTEBOOK_SETUP.run() diff --git a/setup.py b/setup.py index 724e980..1fb3723 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,11 @@ def get_version() -> str: keywords="fugue incubator experiment", url="http://github.com/fugue-project/fugue-incubator", install_requires=["fugue>=0.5.0", "scikit-learn", "matplotlib"], - extras_require={"hyperopt": ["hyperopt"], "all": ["hyperopt"]}, + extras_require={ + "hyperopt": ["hyperopt"], + "notebook": ["notebook", "jupyterlab"], + "all": ["hyperopt", "notebook", "jupyterlab"], + }, classifiers=[ # "3 - Alpha", "4 - Beta" or "5 - Production/Stable" "Development Status :: 3 - Alpha", From 458d65138abdad97e467f8b514743ed7f4e641d3 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 25 Jan 2021 17:48:55 +0000 Subject: [PATCH 2/3] update --- Untitled.ipynb | 5 ++++- fugue_notebook/env.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Untitled.ipynb b/Untitled.ipynb index cf015b7..b4fca22 100644 --- a/Untitled.ipynb +++ b/Untitled.ipynb @@ -31,7 +31,10 @@ "id": "close-louisiana", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "%%ffsql\n", + "sdf\n" + ] } ], "metadata": { diff --git a/fugue_notebook/env.py b/fugue_notebook/env.py index c441c02..c2caff2 100644 --- a/fugue_notebook/env.py +++ b/fugue_notebook/env.py @@ -35,7 +35,7 @@ support: set("ODBCdotTable doubleQuote binaryNumber hexNumber commentSlashSlash commentHash") }); require(['notebook/js/codecell'], function(codecell) { - codecell.CodeCell.options_default.highlight_modes['magic_text/x-mssql'] = {'reg':[/%%fsql/]} ; + codecell.CodeCell.options_default.highlight_modes['magic_text/x-mssql'] = {'reg':[/%%ffsql/]} ; Jupyter.notebook.events.one('kernel_ready.Kernel', function(){ Jupyter.notebook.get_cells().map(function(cell){ if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ; From ac9c8428282c95183c1063400107bc490cb475d0 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 25 Jan 2021 17:49:44 +0000 Subject: [PATCH 3/3] update --- fugue_notebook/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fugue_notebook/env.py b/fugue_notebook/env.py index c2caff2..c441c02 100644 --- a/fugue_notebook/env.py +++ b/fugue_notebook/env.py @@ -35,7 +35,7 @@ support: set("ODBCdotTable doubleQuote binaryNumber hexNumber commentSlashSlash commentHash") }); require(['notebook/js/codecell'], function(codecell) { - codecell.CodeCell.options_default.highlight_modes['magic_text/x-mssql'] = {'reg':[/%%ffsql/]} ; + codecell.CodeCell.options_default.highlight_modes['magic_text/x-mssql'] = {'reg':[/%%fsql/]} ; Jupyter.notebook.events.one('kernel_ready.Kernel', function(){ Jupyter.notebook.get_cells().map(function(cell){ if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;