From a02eb7bc55eb363454245545ce2be27a82b03223 Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Fri, 6 Nov 2020 22:15:06 +0100 Subject: [PATCH 01/22] owcsvimport: Add some comments --- Orange/widgets/data/owcsvimport.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index 5891a05a1b4..fa53275a66f 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -672,7 +672,12 @@ def __init__(self, *args, **kwargs): self.__watcher = None # type: Optional[qconcurrent.FutureWatcher] self.controlArea.layout().setSpacing(-1) # reset spacing + grid = QGridLayout() + + ############# + # File select + ############# grid.addWidget(QLabel("File:", self), 0, 0, 1, 1) self.import_items_model = VarPathItemModel(self) @@ -724,6 +729,11 @@ def __init__(self, *args, **kwargs): self.summary_text.viewport().setAutoFillBackground(False) box.layout().addWidget(self.summary_text) + self.info.set_output_summary(self.info.NoOutput) + + ######### + # Buttons + ######### button_box = QDialogButtonBox( orientation=Qt.Horizontal, standardButtons=QDialogButtonBox.Cancel | QDialogButtonBox.Retry From 239c9ecb64f816e93ca021effea0df2fbbd7939e Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Sat, 7 Nov 2020 00:50:05 +0100 Subject: [PATCH 02/22] owcsvimport: Add domain editor Some work still required in maintaining consistency between import options and domain editor - establishing a singular deduplication mechanism --- Orange/widgets/data/owcsvimport.py | 60 +++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index fa53275a66f..88a1b9a417c 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -20,6 +20,7 @@ import lzma import bz2 import zipfile +from itertools import chain from xml.sax.saxutils import escape from functools import singledispatch @@ -51,6 +52,7 @@ from pandas.api import types as pdtypes import Orange.data +from Orange.data import Table from Orange.misc.collections import natural_sorted from Orange.widgets import widget, gui, settings @@ -59,6 +61,7 @@ textimport, concurrent as qconcurrent, unique_everseen, enum_get, qname ) from Orange.widgets.utils.combobox import ItemStyledComboBox +from Orange.widgets.utils.domaineditor import DomainEditor from Orange.widgets.utils.pathutils import ( PathItem, VarPath, AbsPath, samepath, prettyfypath, isprefixed, ) @@ -637,6 +640,12 @@ class Error(widget.OWWidget.Error): "might be binary" ) + class Warning(widget.OWWidget.Warning): + performance_warning = widget.Msg( + "Categorical variables with >100 values may decrease performance.") + renamed_vars = widget.Msg("Some variables have been renamed " + "to avoid duplicates.\n{}") + #: Paths and options of files accessed in a 'session' _session_items = settings.Setting( [], schema_only=True) # type: List[Tuple[str, dict]] @@ -671,9 +680,10 @@ def __init__(self, *args, **kwargs): self.__executor = qconcurrent.ThreadExecutor() self.__watcher = None # type: Optional[qconcurrent.FutureWatcher] - self.controlArea.layout().setSpacing(-1) # reset spacing + self.data = None grid = QGridLayout() + self.controlArea.layout().setSpacing(-1) # reset spacing ############# # File select @@ -731,6 +741,16 @@ def __init__(self, *args, **kwargs): self.info.set_output_summary(self.info.NoOutput) + ############### + # Domain editor + ############### + box = gui.widgetBox(self.controlArea, "Columns (click to edit)") + self.domain_editor = DomainEditor(self) + self.editor_model = self.domain_editor.model() + box.layout().addWidget(self.domain_editor) + + self.editor_model.dataChanged.connect(self.__handle_domain_edit) + ######### # Buttons ######### @@ -1153,6 +1173,7 @@ def __set_running_state(self): self.summary_text.setText( "
Loading: {}
".format(prettyfypath(path)) ) + self.domain_editor.setEnabled(False) def __clear_running_state(self, ): self.progressBarFinished() @@ -1160,6 +1181,7 @@ def __clear_running_state(self, ): self.setBlocking(False) self.cancel_button.setEnabled(False) self.load_button.setText("Reload") + self.domain_editor.setEnabled(True) def __set_error_state(self, err): self.Error.clear() @@ -1220,14 +1242,50 @@ def __handle_result(self, f): if df is not None: table = pandas_to_table(df) + domain = table.domain + self._inspect_discrete_variables(domain) filename = self.current_item().path() table.name = os.path.splitext(os.path.split(filename)[-1])[0] else: table = None + domain = None + + self.data = table + self.domain_editor.set_domain(domain) + self.Outputs.data_frame.send(df) self.Outputs.data.send(table) + self._update_status_messages(table) + def __handle_domain_edit(self): + if self.data is None: + table = None + else: + domain, cols, renamed = \ + self.domain_editor.get_domain(self.data.domain, self.data, + deduplicate=True) + if not (domain.variables or domain.metas): + table = None + elif domain is self.data.domain: + table = self.data + else: + X, y, m = cols + table = Table.from_numpy(domain, X, y, m, self.data.W) + table.name = self.data.name + table.ids = np.array(self.data.ids) + table.attributes = getattr(self.data, 'attributes', {}) + self._inspect_discrete_variables(domain) + if renamed: + self.Warning.renamed_vars(f"Renamed: {', '.join(renamed)}") + self.Outputs.data.send(table) + self._update_status_messages(table) + + def _inspect_discrete_variables(self, domain): + for var in chain(domain.variables, domain.metas): + if var.is_discrete and len(var.values) > 100: + self.Warning.performance_warning() + def _update_status_messages(self, data): if data is None: return From bccefe252d52506bef0c1a1a75fac8ad92cf9fa5 Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Tue, 5 Jan 2021 23:03:17 +0000 Subject: [PATCH 03/22] owcsvimport: Automatically browse relative --- Orange/widgets/data/owcsvimport.py | 35 +++++++----------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index 88a1b9a417c..0f247273ebe 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -705,19 +705,6 @@ def __init__(self, *args, **kwargs): "…", icon=self.style().standardIcon(QStyle.SP_DirOpenIcon), toolTip="Browse filesystem", autoDefault=False, ) - # A button drop down menu with selection of explicit workflow dir - # relative import. This is only enabled when 'basedir' workflow env - # is set. XXX: Always use menu, disable Import relative... action? - self.browse_menu = menu = QMenu(self.browse_button) - ac = menu.addAction("Import any file…") - ac.triggered.connect(self.browse) - - ac = menu.addAction("Import relative to workflow file…") - ac.setToolTip("Import a file within the workflow file directory") - ac.triggered.connect(lambda: self.browse_relative("basedir")) - - if "basedir" in self._replacements(): - self.browse_button.setMenu(menu) self.browse_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) self.browse_button.clicked.connect(self.browse) @@ -796,7 +783,6 @@ def update_buttons(cbindex): def workflowEnvChanged(self, key, value, oldvalue): super().workflowEnvChanged(key, value, oldvalue) if key == "basedir": - self.browse_button.setMenu(self.browse_menu) self.import_items_model.setReplacementEnv(self._replacements()) @Slot(int) @@ -916,32 +902,27 @@ def _path_must_be_relative_mb(self, prefix: str) -> QMessageBox: mb.setAttribute(Qt.WA_DeleteOnClose) return mb - @Slot(str) - def browse_relative(self, prefixname): - path = self._replacements().get(prefixname) - self.browse(prefixname=prefixname, directory=path) - @Slot() def browse(self, prefixname=None, directory=None): """ Open a file dialog and select a user specified file. """ dlg = self._browse_dialog() - if directory is not None: + + if "basedir" in self._replacements(): + directory = self._replacements().get("basedir") dlg.setDirectory(directory) + else: + directory = "" status = dlg.exec() dlg.deleteLater() if status == QFileDialog.Accepted: selected_filter = dlg.selectedFileFormat() path = dlg.selectedFiles()[0] - if prefixname: - _prefixpath = self._replacements().get(prefixname, "") - if not isprefixed(_prefixpath, path): - mb = self._path_must_be_relative_mb(_prefixpath) - mb.show() - return - varpath = VarPath(prefixname, os.path.relpath(path, _prefixpath)) + assert os.path.isfile(path) + if directory and os.path.commonprefix([directory, path]) == directory: + varpath = VarPath("basedir", os.path.relpath(path, directory)) else: varpath = PathItem.AbsPath(path) From 2ea134ff974bbd87795fbd59240fc6cd101a1eff Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Tue, 3 Aug 2021 00:22:57 +0100 Subject: [PATCH 04/22] owcsvimport: Make info box as small as possible --- Orange/widgets/data/owcsvimport.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index 0f247273ebe..6a610f29bc7 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -36,7 +36,7 @@ Qt, QFileInfo, QTimer, QSettings, QObject, QSize, QMimeDatabase, QMimeType ) from AnyQt.QtGui import ( - QStandardItem, QStandardItemModel, QPalette, QColor, QIcon + QStandardItem, QStandardItemModel, QPalette, QColor, QIcon, QTextOption ) from AnyQt.QtWidgets import ( QLabel, QComboBox, QPushButton, QDialog, QDialogButtonBox, QGridLayout, @@ -716,8 +716,9 @@ def __init__(self, *args, **kwargs): # Info text ########### box = gui.widgetBox(self.controlArea, "Info") + box.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Maximum) self.summary_text = QTextBrowser( - verticalScrollBarPolicy=Qt.ScrollBarAsNeeded, + verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, readOnly=True, ) self.summary_text.viewport().setBackgroundRole(QPalette.NoRole) @@ -725,6 +726,7 @@ def __init__(self, *args, **kwargs): self.summary_text.setMinimumHeight(self.fontMetrics().ascent() * 2 + 4) self.summary_text.viewport().setAutoFillBackground(False) box.layout().addWidget(self.summary_text) + QTimer.singleShot(0, lambda: self._set_summary_text(None)) self.info.set_output_summary(self.info.NoOutput) @@ -1139,7 +1141,7 @@ def cancel(self): self.__cancel_task() self.__clear_running_state() self.setStatusMessage("Cancelled") - self.summary_text.setText( + self._set_summary_text( "
Cancelled
Press 'Reload' to try again
" ) @@ -1151,7 +1153,7 @@ def __set_running_state(self): self.load_button.setText("Restart") path = self.current_item().path() self.Error.clear() - self.summary_text.setText( + self._set_summary_text( "
Loading: {}
".format(prettyfypath(path)) ) self.domain_editor.setEnabled(False) @@ -1189,11 +1191,11 @@ def __set_error_state(self, err): basename=escape(basename), err="".join(traceback.format_exception_only(type(err), err)) ) - self.summary_text.setText(text) + self._set_summary_text(text) def __clear_error_state(self): self.Error.error.clear() - self.summary_text.setText("") + self._set_summary_text("") def onDeleteWidget(self): """Reimplemented.""" @@ -1282,7 +1284,17 @@ def pluralize(seq): plural_2=pluralize(data.domain.attributes), n_meta=len(data.domain.metas), plural_3=pluralize(data.domain.metas)) - self.summary_text.setText(summary) + self._set_summary_text(summary) + + def _set_summary_text(self, text): + if not text: + text = '
Choose a file or input a URL above to get started.
' + self.summary_text.setText(text) + if self.summary_text.isVisible(): + height = self.summary_text.document().size().height() + else: + height = self.fontMetrics().height() + self.summary_text.setFixedHeight(height) def itemsFromSettings(self): # type: () -> List[Tuple[str, Options]] From 4d91b25bf270676286d2bce14ea5a73de14f0e48 Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Wed, 6 Jan 2021 00:09:05 +0000 Subject: [PATCH 05/22] owcsvimport: Add table preview to mainArea, reorg controlArea --- Orange/widgets/data/owcsvimport.py | 178 +++++++++++++++++++++-------- 1 file changed, 130 insertions(+), 48 deletions(-) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index 6a610f29bc7..e8be42e7ff5 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -20,6 +20,8 @@ import lzma import bz2 import zipfile + +from Orange.widgets.utils.itemdelegates import TableDataDelegate from itertools import chain from xml.sax.saxutils import escape @@ -40,8 +42,9 @@ ) from AnyQt.QtWidgets import ( QLabel, QComboBox, QPushButton, QDialog, QDialogButtonBox, QGridLayout, - QVBoxLayout, QSizePolicy, QStyle, QFileIconProvider, QFileDialog, - QApplication, QMessageBox, QTextBrowser, QMenu + QVBoxLayout, QSizePolicy, QFileIconProvider, QFileDialog, + QApplication, QMessageBox, QTextBrowser, + QStyle, QMenu, QHBoxLayout, QTableView, QHeaderView ) from AnyQt.QtCore import pyqtSlot as Slot, pyqtSignal as Signal @@ -56,6 +59,7 @@ from Orange.misc.collections import natural_sorted from Orange.widgets import widget, gui, settings +from Orange.widgets.data.owtable import DataTableView, RichTableModel, is_sortable, TableSliceProxy from Orange.widgets.utils.concurrent import PyOwned from Orange.widgets.utils import ( textimport, concurrent as qconcurrent, unique_everseen, enum_get, qname @@ -666,9 +670,9 @@ class Warning(widget.OWWidget.Warning): MaxHistorySize = 50 - want_main_area = False + want_main_area = True buttons_area_orientation = None - resizing_enabled = False + resizing_enabled = True def __init__(self, *args, **kwargs): super().__init__(self, *args, **kwargs) @@ -682,35 +686,67 @@ def __init__(self, *args, **kwargs): self.data = None - grid = QGridLayout() self.controlArea.layout().setSpacing(-1) # reset spacing ############# # File select ############# - grid.addWidget(QLabel("File:", self), 0, 0, 1, 1) + box = QHBoxLayout() + box.addWidget(QLabel("File:", self)) self.import_items_model = VarPathItemModel(self) self.import_items_model.setReplacementEnv(self._replacements()) self.recent_combo = ItemStyledComboBox( - self, objectName="recent-combo", toolTip="Recent files.", + self, objectName="recent-combo", sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon, - minimumContentsLength=16, placeholderText="Recent files…" + minimumContentsLength=16 ) self.recent_combo.setModel(self.import_items_model) self.recent_combo.activated.connect(self.activate_recent) self.recent_combo.setSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.Fixed) + self.browse_button = QPushButton( "…", icon=self.style().standardIcon(QStyle.SP_DirOpenIcon), toolTip="Browse filesystem", autoDefault=False, ) + # A button drop down menu with selection of explicit workflow dir + # relative import. This is only enabled when 'basedir' workflow env + # is set. XXX: Always use menu, disable Import relative... action? + self.browse_menu = menu = QMenu(self.browse_button) + ac = menu.addAction("Import any file…") + ac.triggered.connect(self.browse) + + ac = menu.addAction("Import relative to workflow file…") + ac.setToolTip("Import a file within the workflow file directory") + ac.triggered.connect(lambda: self.browse_relative("basedir")) + if "basedir" in self._replacements(): + self.browse_button.setMenu(menu) self.browse_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) self.browse_button.clicked.connect(self.browse) - grid.addWidget(self.recent_combo, 0, 1, 1, 1) - grid.addWidget(self.browse_button, 0, 2, 1, 1) - self.controlArea.layout().addLayout(grid) + + ######### + # Buttons + ######### + + self.import_options_button = QPushButton( + "Import Options", enabled=False, autoDefault=False, + clicked=self._activate_import_dialog + ) + + self.load_button = QPushButton( + "Load", enabled=False, default=True, + clicked=self.__committimer.start + ) + + box.addWidget(self.recent_combo) + box.addWidget(self.browse_button) + self.controlArea.layout().addLayout(box) + box = QHBoxLayout() + box.addWidget(self.import_options_button) + box.addWidget(self.load_button) + self.controlArea.layout().addLayout(box) ########### # Info text @@ -726,7 +762,7 @@ def __init__(self, *args, **kwargs): self.summary_text.setMinimumHeight(self.fontMetrics().ascent() * 2 + 4) self.summary_text.viewport().setAutoFillBackground(False) box.layout().addWidget(self.summary_text) - QTimer.singleShot(0, lambda: self._set_summary_text(None)) + self._set_summary_text(None) self.info.set_output_summary(self.info.NoOutput) @@ -735,46 +771,46 @@ def __init__(self, *args, **kwargs): ############### box = gui.widgetBox(self.controlArea, "Columns (click to edit)") self.domain_editor = DomainEditor(self) + self.domain_editor.setSizePolicy( + QSizePolicy.Expanding, QSizePolicy.Expanding + ) + + # TODO when File is removed, put these changes into domain editor + self.domain_editor.verticalHeader().hide() + self.domain_editor.setColumnHidden(3, True) + self.editor_model = self.domain_editor.model() box.layout().addWidget(self.domain_editor) self.editor_model.dataChanged.connect(self.__handle_domain_edit) - ######### - # Buttons - ######### - button_box = QDialogButtonBox( - orientation=Qt.Horizontal, - standardButtons=QDialogButtonBox.Cancel | QDialogButtonBox.Retry - ) - self.load_button = b = button_box.button(QDialogButtonBox.Retry) - b.setText("Load") - b.clicked.connect(self.__committimer.start) - b.setEnabled(False) - b.setDefault(True) + ############ + # Table view + ############ - self.cancel_button = b = button_box.button(QDialogButtonBox.Cancel) - b.clicked.connect(self.cancel) - b.setEnabled(False) - b.setAutoDefault(False) + # TODO consolidate the following 20 lines, they're copied from OWTable + self.table_view = view = DataTableView() + self.mainArea.layout().addWidget(view) - self.import_options_button = QPushButton( - "Import Options…", enabled=False, autoDefault=False, - clicked=self._activate_import_dialog - ) + view.setSortingEnabled(True) + view.setItemDelegate(TableDataDelegate(view)) + view.setSelectionBehavior(QTableView.SelectRows) - def update_buttons(cbindex): - self.import_options_button.setEnabled(cbindex != -1) - self.load_button.setEnabled(cbindex != -1) - self.recent_combo.currentIndexChanged.connect(update_buttons) + header = view.horizontalHeader() + header.setSectionsMovable(True) + header.setSectionsClickable(True) + header.setSortIndicatorShown(True) + header.setSortIndicator(-1, Qt.AscendingOrder) + + # QHeaderView does not 'reset' the model sort column, + # because there is no guaranty (requirement) that the + # models understand the -1 sort column. + def sort_reset(index, order): + if view.model() is not None and index == -1: + view.model().sort(index, order) + + header.sortIndicatorChanged.connect(sort_reset) - button_box.addButton( - self.import_options_button, QDialogButtonBox.ActionRole - ) - button_box.setStyleSheet( - "button-layout: {:d};".format(QDialogButtonBox.MacLayout) - ) - self.controlArea.layout().addWidget(button_box) self.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Maximum) self._restoreState() @@ -1149,7 +1185,6 @@ def __set_running_state(self): self.progressBarInit() self.setBlocking(True) self.setStatusMessage("Running") - self.cancel_button.setEnabled(True) self.load_button.setText("Restart") path = self.current_item().path() self.Error.clear() @@ -1162,7 +1197,6 @@ def __clear_running_state(self, ): self.progressBarFinished() self.setStatusMessage("") self.setBlocking(False) - self.cancel_button.setEnabled(False) self.load_button.setText("Reload") self.domain_editor.setEnabled(True) @@ -1239,7 +1273,7 @@ def __handle_result(self, f): self.Outputs.data_frame.send(df) self.Outputs.data.send(table) - self._update_status_messages(table) + self._update_table(table) def __handle_domain_edit(self): if self.data is None: @@ -1262,17 +1296,65 @@ def __handle_domain_edit(self): if renamed: self.Warning.renamed_vars(f"Renamed: {', '.join(renamed)}") self.Outputs.data.send(table) - self._update_status_messages(table) + self._update_table(table) def _inspect_discrete_variables(self, domain): for var in chain(domain.variables, domain.metas): if var.is_discrete and len(var.values) > 100: self.Warning.performance_warning() - def _update_status_messages(self, data): + def _update_table(self, data): + # TODO consolidate, most of this is copied from OWTable + view = self.table_view if data is None: + view.setModel(None) return + datamodel = RichTableModel(data) + rowcount = data.approx_len() + view.setItemDelegate(TableDataDelegate(view)) + + # Enable/disable view sorting based on data's type + view.setSortingEnabled(is_sortable(data)) + header = view.horizontalHeader() + header.setSectionsClickable(is_sortable(data)) + header.setSortIndicatorShown(is_sortable(data)) + + view.setModel(datamodel) + + vheader = view.verticalHeader() + option = view.viewOptions() + size = view.style().sizeFromContents( + QStyle.CT_ItemViewItem, option, + QSize(20, 20), view) + + vheader.setDefaultSectionSize(size.height() + 2) + vheader.setMinimumSectionSize(5) + vheader.setSectionResizeMode(QHeaderView.Fixed) + + # Limit the number of rows displayed in the QTableView + # (workaround for QTBUG-18490 / QTBUG-28631) + maxrows = (2 ** 31 - 1) // (vheader.defaultSectionSize() + 2) + if rowcount > maxrows: + sliceproxy = TableSliceProxy( + parent=view, rowSlice=slice(0, maxrows)) + sliceproxy.setSourceModel(datamodel) + # First reset the view (without this the header view retains + # it's state - at this point invalid/broken) + view.setModel(None) + view.setModel(sliceproxy) + + assert view.model().rowCount() <= maxrows + assert vheader.sectionSize(0) > 1 or datamodel.rowCount() == 0 + + model = view.model() + if isinstance(model, TableSliceProxy): + model = model.sourceModel() + + model.setRichHeaderFlags(RichTableModel.Name) + # self.set_corner_text(view, "") + + # update status messages def pluralize(seq): return "s" if len(seq) != 1 else "" From 58669f4af2b7f585c1a996f51558513f8c92dd17 Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Tue, 3 Aug 2021 00:58:21 +0100 Subject: [PATCH 06/22] owcsvimport: Add reset/autocommit to domain editor --- Orange/widgets/data/owcsvimport.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index e8be42e7ff5..1f01a6f9470 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -73,6 +73,7 @@ from Orange.widgets.utils.settings import ( QSettings_readArray, QSettings_writeArray ) +from orangewidget.settings import Setting if typing.TYPE_CHECKING: # pylint: disable=invalid-name @@ -670,6 +671,8 @@ class Warning(widget.OWWidget.Warning): MaxHistorySize = 50 + auto_apply = Setting(True) + want_main_area = True buttons_area_orientation = None resizing_enabled = True @@ -784,6 +787,14 @@ def __init__(self, *args, **kwargs): self.editor_model.dataChanged.connect(self.__handle_domain_edit) + box = gui.hBox(box) + gui.button( + box, self, "Reset", callback=self.reset_domain_edit, + autoDefault=False + ) + gui.rubber(box) + gui.auto_apply(box, self, commit=self.apply_domain_edit) + ############ # Table view ############ @@ -1275,7 +1286,16 @@ def __handle_result(self, f): self._update_table(table) + def reset_domain_edit(self): + self.domain_editor.reset_domain() + self.apply_domain_edit() + def __handle_domain_edit(self): + self.Warning.clear() + self.apply_domain_edit() + + def apply_domain_edit(self): + # commit data to output if self.data is None: table = None else: From d900672d540f07dcba5fc21c6addb7eeafbfdf19 Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Wed, 6 Jan 2021 00:53:15 +0000 Subject: [PATCH 07/22] textimport: Show header/skip row options in dataview --- Orange/widgets/utils/textimport.py | 55 ++++++++++++++++++------------ 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/Orange/widgets/utils/textimport.py b/Orange/widgets/utils/textimport.py index ffc82fbf06f..a3899d2627e 100644 --- a/Orange/widgets/utils/textimport.py +++ b/Orange/widgets/utils/textimport.py @@ -705,8 +705,7 @@ def __init__(self, *args, **kwargs): Qt.UserRole: ColumnType.Categorical}, {Qt.DisplayRole: "Text", Qt.UserRole: ColumnType.Text}, {Qt.DisplayRole: "Datetime", Qt.UserRole: ColumnType.Time}, - {Qt.AccessibleDescriptionRole: "separator"}, - {Qt.DisplayRole: "Ignore", + {Qt.DisplayRole: "Skip Column", Qt.UserRole: ColumnType.Skip, Qt.ToolTipRole: "The column will not be loaded"} ] @@ -1049,38 +1048,50 @@ def __on_column_type_edit_activated(self, idx): self.__setColumnType(columns, coltype) def __dataview_context_menu(self, pos): - pos = self.dataview.viewport().mapToGlobal(pos) cols = self.dataview.selectionModel().selectedColumns(0) cols = [idx.column() for idx in cols] - self.__run_type_columns_menu(pos, cols) + menu = self.__generate_type_columns_menu(cols) + menu.addSection("") + menu = self.__generate_vheader_context_menu(pos, menu) + pos = self.dataview.viewport().mapToGlobal(pos) + menu.popup(pos) def __hheader_context_menu(self, pos): - pos = self.dataview.horizontalHeader().mapToGlobal(pos) cols = self.dataview.selectionModel().selectedColumns(0) cols = [idx.column() for idx in cols] - self.__run_type_columns_menu(pos, cols) + menu = self.__generate_type_columns_menu(cols) + pos = self.dataview.horizontalHeader().mapToGlobal(pos) + menu.popup(pos) def __vheader_context_menu(self, pos): + menu = self.__generate_vheader_context_menu(pos) + pos = self.dataview.verticalHeader().mapToGlobal(pos) + menu.popup(pos) + + def __generate_vheader_context_menu(self, pos, m=None): header = self.dataview.verticalHeader() # type: QHeaderView - index = header.logicalIndexAt(pos) - pos = header.mapToGlobal(pos) model = header.model() # type: QAbstractTableModel + index = self.dataview.indexAt(pos).row() RowStateRole = TablePreviewModel.RowStateRole state = model.headerData(index, Qt.Vertical, RowStateRole) - m = QMenu(header) - skip_action = m.addAction("Skip") - skip_action.setCheckable(True) - skip_action.setChecked(state == TablePreview.Skipped) - m.addSection("") + + if m is None: + m = QMenu(self) + mark_header = m.addAction("Header") mark_header.setCheckable(True) mark_header.setChecked(state == TablePreview.Header) + skip_action = m.addAction("Skip Row") + skip_action.setCheckable(True) + skip_action.setChecked(state == TablePreview.Skipped) def update_row_state(action): # type: (QAction) -> None + if action not in (mark_header, skip_action): + return state = None - if action is mark_header: + if action is mark_header and action.isChecked(): state = TablePreview.Header if action.isChecked() else None elif action is skip_action: state = TablePreview.Skipped if action.isChecked() else None @@ -1088,16 +1099,17 @@ def update_row_state(action): self.dataview.setRowHints({index: state}) m.triggered.connect(update_row_state) - m.popup(pos) + return m - def __run_type_columns_menu(self, pos, columns): - # type: (QPoint, List[int]) -> None + def __generate_type_columns_menu(self, columns, menu=None): + # type: (List[int], Optional[QMenu]) -> None # Open a QMenu at pos for setting column types for column indices list # `columns` model = self.__previewmodel + if menu is None: + menu = QMenu(self) if model is None: - return - menu = QMenu(self) + return menu menu.setAttribute(Qt.WA_DeleteOnClose) coltypes = {model.headerData( i, Qt.Horizontal, TablePreviewModel.ColumnTypeRole) @@ -1126,11 +1138,12 @@ def __run_type_columns_menu(self, pos, columns): def update_types(action): newtype = action.data() - self.__setColumnType(columns, newtype) + if isinstance(newtype, ColumnType): + self.__setColumnType(columns, newtype) menu.triggered.connect(update_types) menu.triggered.connect(self.__update_column_type_edit) - menu.popup(pos, current_action) + return menu def __setColumnType(self, columns, coltype): # type: (List[int], ColumnType) -> None From ca12e6d543fada4ccf79ee5c6044ad1da6b01e0d Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Wed, 6 Jan 2021 00:56:38 +0000 Subject: [PATCH 08/22] textimport: Select column on horizontal header right-click --- Orange/widgets/utils/textimport.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Orange/widgets/utils/textimport.py b/Orange/widgets/utils/textimport.py index a3899d2627e..9e42cbf8f42 100644 --- a/Orange/widgets/utils/textimport.py +++ b/Orange/widgets/utils/textimport.py @@ -1057,6 +1057,8 @@ def __dataview_context_menu(self, pos): menu.popup(pos) def __hheader_context_menu(self, pos): + col = self.dataview.horizontalHeader().logicalIndexAt(pos) + self.dataview.selectColumn(col) cols = self.dataview.selectionModel().selectedColumns(0) cols = [idx.column() for idx in cols] menu = self.__generate_type_columns_menu(cols) From 4eb255f0f7c05bd5a1413fe14c9ec5e7f16d37c8 Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Thu, 7 Jan 2021 02:36:13 +0000 Subject: [PATCH 09/22] test_owcsvimport: Test numeric->text warning --- Orange/widgets/data/tests/test_owcsvimport.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Orange/widgets/data/tests/test_owcsvimport.py b/Orange/widgets/data/tests/test_owcsvimport.py index 44adc1e4931..721e923d66b 100644 --- a/Orange/widgets/data/tests/test_owcsvimport.py +++ b/Orange/widgets/data/tests/test_owcsvimport.py @@ -359,6 +359,24 @@ def test_browse_for_missing_prefixed_parent(self): self.assertEqual(item[0], cur.varPath()) self.assertEqual(item[1].as_dict(), cur.options().as_dict()) + def test_unsafe_cast_warning(self): + dirname = os.path.dirname(__file__) + path = os.path.join(dirname, "data-regions.tab") + + w = self.create_widget( + owcsvimport.OWCSVFileImport, + stored_settings={ + "_session_items": [ + (path, self.data_regions_options.as_dict()) + ] + } + ) + w.activate_recent(0) + self.process_events(until=lambda: w.data is not None) + index = w.domain_editor.model().index(0, 1) + w.domain_editor.model().setData(index, 'text') + self.assertTrue(w.Warning.numeric_cast.is_shown()) + class TestImportDialog(GuiTest): @staticmethod From c7ebd84abbfed937b26279006b3d8d8d30e468d4 Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Wed, 4 Aug 2021 23:46:47 +0100 Subject: [PATCH 10/22] owcsvimport: Use contexts --- Orange/widgets/data/owcsvimport.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index 1f01a6f9470..45215cbc4c0 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -21,6 +21,7 @@ import bz2 import zipfile +from Orange.widgets.settings import PerfectDomainContextHandler from Orange.widgets.utils.itemdelegates import TableDataDelegate from itertools import chain @@ -73,7 +74,7 @@ from Orange.widgets.utils.settings import ( QSettings_readArray, QSettings_writeArray ) -from orangewidget.settings import Setting +from orangewidget.settings import Setting, ContextSetting, SettingProvider if typing.TYPE_CHECKING: # pylint: disable=invalid-name @@ -663,6 +664,11 @@ class Warning(widget.OWWidget.Warning): "filter": "" }) # type: Dict[str, str] + settingsHandler = PerfectDomainContextHandler( + match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL + ) + domain_editor = SettingProvider(DomainEditor) + # we added column type guessing to this widget, which breaks compatibility # with older saved workflows, where types not guessed differently, when # compatibility_mode=True widget have older guessing behaviour @@ -1133,6 +1139,7 @@ def commit(self): Any existing pending task is canceled. """ self.__committimer.stop() + self.closeContext() if self.__watcher is not None: self.__cancel_task() self.error() @@ -1279,14 +1286,16 @@ def __handle_result(self, f): domain = None self.data = table - self.domain_editor.set_domain(domain) + self.Warning.clear() + self.openContext(domain) + self.apply_domain_edit() self.Outputs.data_frame.send(df) - self.Outputs.data.send(table) - + self._update_table(table) def reset_domain_edit(self): + self.Warning.clear() self.domain_editor.reset_domain() self.apply_domain_edit() From 1c3dbcc19dcc57be1214f1b90a8da67667185f7d Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Wed, 4 Aug 2021 23:47:57 +0100 Subject: [PATCH 11/22] owcsvimport: Warn on unsafe cast --- Orange/widgets/data/owcsvimport.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index 45215cbc4c0..83aa9b2fdf7 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -56,7 +56,7 @@ from pandas.api import types as pdtypes import Orange.data -from Orange.data import Table +from Orange.data import Table, ContinuousVariable, StringVariable from Orange.misc.collections import natural_sorted from Orange.widgets import widget, gui, settings @@ -66,7 +66,7 @@ textimport, concurrent as qconcurrent, unique_everseen, enum_get, qname ) from Orange.widgets.utils.combobox import ItemStyledComboBox -from Orange.widgets.utils.domaineditor import DomainEditor +from Orange.widgets.utils.domaineditor import DomainEditor, Column from Orange.widgets.utils.pathutils import ( PathItem, VarPath, AbsPath, samepath, prettyfypath, isprefixed, ) @@ -651,6 +651,11 @@ class Warning(widget.OWWidget.Warning): "Categorical variables with >100 values may decrease performance.") renamed_vars = widget.Msg("Some variables have been renamed " "to avoid duplicates.\n{}") + numeric_cast = widget.Msg('Loading data as numeric and changing it to text ' + 'may result in altered values.\n' + 'For example, 001 turns into 1.\n' + 'Change the variable type to String in ' + 'the Import Options to avoid this.') #: Paths and options of files accessed in a 'session' _session_items = settings.Setting( @@ -1322,6 +1327,7 @@ def apply_domain_edit(self): table.ids = np.array(self.data.ids) table.attributes = getattr(self.data, 'attributes', {}) self._inspect_discrete_variables(domain) + self._inspect_unsafe_cast() if renamed: self.Warning.renamed_vars(f"Renamed: {', '.join(renamed)}") self.Outputs.data.send(table) @@ -1332,6 +1338,15 @@ def _inspect_discrete_variables(self, domain): if var.is_discrete and len(var.values) > 100: self.Warning.performance_warning() + def _inspect_unsafe_cast(self): + for orig_var, var in zip( + self.domain_editor.model().orig_variables, + self.domain_editor.model().variables + ): + if orig_var[Column.tpe] is ContinuousVariable and \ + var[Column.tpe] is StringVariable: + self.Warning.numeric_cast() + def _update_table(self, data): # TODO consolidate, most of this is copied from OWTable view = self.table_view From 202d8c7fe46b1f4ef19b4de9671b6a90aaa75eb2 Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Thu, 5 Aug 2021 00:35:41 +0100 Subject: [PATCH 12/22] domaineditor: Fix combobox hack Before it would set the wrong chice sometimes, as the combobox would move under your cursor and be set again --- Orange/widgets/utils/domaineditor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Orange/widgets/utils/domaineditor.py b/Orange/widgets/utils/domaineditor.py index d9f9b016540..d5688125ff9 100644 --- a/Orange/widgets/utils/domaineditor.py +++ b/Orange/widgets/utils/domaineditor.py @@ -156,7 +156,7 @@ def hidePopup(me): if me.popup_shown: self.view.model().setData( index, me.highlighted_text, Qt.EditRole) - self.popup_shown = False + me.popup_shown = False super().hidePopup() self.view.closeEditor(me, self.NoHint) From 9ac47c6f9e9005d55b65ffc9606b59965e3683b3 Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Thu, 5 Aug 2021 01:41:05 +0100 Subject: [PATCH 13/22] test_owcsvimport: Fixup tests --- Orange/widgets/data/tests/test_owcsvimport.py | 25 ++++++------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/Orange/widgets/data/tests/test_owcsvimport.py b/Orange/widgets/data/tests/test_owcsvimport.py index 721e923d66b..5ce85a295f0 100644 --- a/Orange/widgets/data/tests/test_owcsvimport.py +++ b/Orange/widgets/data/tests/test_owcsvimport.py @@ -76,7 +76,7 @@ def test_basic(self): data_regions_options = owcsvimport.Options( encoding="ascii", dialect=csv.excel_tab(), columntypes=[ - (range(0, 1), ColumnType.Categorical), + (range(0, 1), ColumnType.Numeric), (range(1, 2), ColumnType.Text), (range(2, 3), ColumnType.Categorical), ], rowspec=[ @@ -90,10 +90,10 @@ def test_basic(self): def _check_data_regions(self, table): self.assertEqual(len(table), 3) self.assertEqual(len(table), 3) - self.assertTrue(table.domain["id"].is_discrete) + self.assertTrue(table.domain["id"].is_continuous) self.assertTrue(table.domain["continent"].is_discrete) self.assertTrue(table.domain["state"].is_string) - assert_array_equal(table.X, [[0, 1], [1, 1], [2, 0]]) + assert_array_equal(table.X, [[1, 1], [2, 1], [3, 0]]) assert_array_equal(table.metas, np.array([["UK"], ["Russia"], ["Mexico"]], object)) @@ -253,34 +253,22 @@ def test_browse(self): cur = widget.current_item() self.assertIsNotNone(cur) self.assertTrue(samepath(cur.path(), path)) + self.assertIsInstance(cur.varPath(), PathItem.AbsPath) - def test_browse_prefix(self): + def test_browse_relative(self): widget = self.widget path = self.data_regions_path with self._browse_setup(widget, path): basedir = os.path.dirname(__file__) widget.workflowEnv = lambda: {"basedir": basedir} widget.workflowEnvChanged("basedir", basedir, "") - widget.browse_relative(prefixname="basedir") + widget.browse() cur = widget.current_item() self.assertIsNotNone(cur) self.assertTrue(samepath(cur.path(), path)) self.assertIsInstance(cur.varPath(), PathItem.VarPath) - def test_browse_prefix_parent(self): - widget = self.widget - path = self.data_regions_path - - with self._browse_setup(widget, path): - basedir = os.path.join(os.path.dirname(__file__), "bs") - widget.workflowEnv = lambda: {"basedir": basedir} - widget.workflowEnvChanged("basedir", basedir, "") - mb = widget._path_must_be_relative_mb = mock.Mock() - widget.browse_relative(prefixname="basedir") - mb.assert_called() - self.assertIsNone(widget.current_item()) - def test_browse_for_missing(self): missing = os.path.dirname(__file__) + "/this file does not exist.csv" widget = self.create_widget( @@ -375,6 +363,7 @@ def test_unsafe_cast_warning(self): self.process_events(until=lambda: w.data is not None) index = w.domain_editor.model().index(0, 1) w.domain_editor.model().setData(index, 'text') + w.apply_domain_edit() self.assertTrue(w.Warning.numeric_cast.is_shown()) From cd577d98fbe11dd100925dc1877db41229ebc4ec Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Thu, 5 Aug 2021 01:58:53 +0100 Subject: [PATCH 14/22] pylint --- Orange/widgets/data/owcsvimport.py | 32 +++++++++++++++--------------- Orange/widgets/utils/textimport.py | 6 ++---- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index 83aa9b2fdf7..1fbf725d408 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -20,9 +20,6 @@ import lzma import bz2 import zipfile - -from Orange.widgets.settings import PerfectDomainContextHandler -from Orange.widgets.utils.itemdelegates import TableDataDelegate from itertools import chain from xml.sax.saxutils import escape @@ -39,10 +36,10 @@ Qt, QFileInfo, QTimer, QSettings, QObject, QSize, QMimeDatabase, QMimeType ) from AnyQt.QtGui import ( - QStandardItem, QStandardItemModel, QPalette, QColor, QIcon, QTextOption + QStandardItem, QStandardItemModel, QPalette, QColor, QIcon, ) from AnyQt.QtWidgets import ( - QLabel, QComboBox, QPushButton, QDialog, QDialogButtonBox, QGridLayout, + QLabel, QComboBox, QPushButton, QDialog, QDialogButtonBox, QVBoxLayout, QSizePolicy, QFileIconProvider, QFileDialog, QApplication, QMessageBox, QTextBrowser, QStyle, QMenu, QHBoxLayout, QTableView, QHeaderView @@ -55,12 +52,18 @@ from pandas.api import types as pdtypes +from orangewidget.settings import Setting, SettingProvider, widget_settings_dir + import Orange.data from Orange.data import Table, ContinuousVariable, StringVariable from Orange.misc.collections import natural_sorted -from Orange.widgets import widget, gui, settings +from Orange.widgets import widget, gui from Orange.widgets.data.owtable import DataTableView, RichTableModel, is_sortable, TableSliceProxy +from Orange.widgets.settings import PerfectDomainContextHandler +from Orange.widgets.utils.settings import ( + QSettings_readArray, QSettings_writeArray +) from Orange.widgets.utils.concurrent import PyOwned from Orange.widgets.utils import ( textimport, concurrent as qconcurrent, unique_everseen, enum_get, qname @@ -71,10 +74,7 @@ PathItem, VarPath, AbsPath, samepath, prettyfypath, isprefixed, ) from Orange.widgets.utils.overlay import OverlayWidget -from Orange.widgets.utils.settings import ( - QSettings_readArray, QSettings_writeArray -) -from orangewidget.settings import Setting, ContextSetting, SettingProvider +from Orange.widgets.utils.itemdelegates import TableDataDelegate if typing.TYPE_CHECKING: # pylint: disable=invalid-name @@ -658,13 +658,13 @@ class Warning(widget.OWWidget.Warning): 'the Import Options to avoid this.') #: Paths and options of files accessed in a 'session' - _session_items = settings.Setting( + _session_items = Setting( [], schema_only=True) # type: List[Tuple[str, dict]] - _session_items_v2 = settings.Setting( + _session_items_v2 = Setting( [], schema_only=True) # type: List[Tuple[Dict[str, str], dict]] #: Saved dialog state (last directory and selected filter) - dialog_state = settings.Setting({ + dialog_state = Setting({ "directory": "", "filter": "" }) # type: Dict[str, str] @@ -678,7 +678,7 @@ class Warning(widget.OWWidget.Warning): # with older saved workflows, where types not guessed differently, when # compatibility_mode=True widget have older guessing behaviour settings_version = 3 - compatibility_mode = settings.Setting(False, schema_only=True) + compatibility_mode = Setting(False, schema_only=True) MaxHistorySize = 50 @@ -963,7 +963,7 @@ def _path_must_be_relative_mb(self, prefix: str) -> QMessageBox: return mb @Slot() - def browse(self, prefixname=None, directory=None): + def browse(self): """ Open a file dialog and select a user specified file. """ @@ -1084,7 +1084,7 @@ def _local_settings(cls): # type: () -> QSettings """Return a QSettings instance with local persistent settings.""" filename = "{}.ini".format(qname(cls)) - fname = os.path.join(settings.widget_settings_dir(), filename) + fname = os.path.join(widget_settings_dir(), filename) return QSettings(fname, QSettings.IniFormat) def _add_recent(self, filename, options=None): diff --git a/Orange/widgets/utils/textimport.py b/Orange/widgets/utils/textimport.py index 9e42cbf8f42..a4d0ab0a52c 100644 --- a/Orange/widgets/utils/textimport.py +++ b/Orange/widgets/utils/textimport.py @@ -37,7 +37,7 @@ ) from AnyQt.QtCore import ( - Qt, QSize, QPoint, QRect, QRectF, QRegularExpression, QAbstractTableModel, + Qt, QSize, QRect, QRectF, QRegularExpression, QAbstractTableModel, QModelIndex, QItemSelectionModel, QTextBoundaryFinder, QTimer, QEvent ) from AnyQt.QtCore import pyqtSignal as Signal, pyqtSlot as Slot @@ -1123,7 +1123,6 @@ def __generate_type_columns_menu(self, columns, menu=None): current = None cb = self.column_type_edit_cb g = QActionGroup(menu) - current_action = None # 'Copy' the column types model into a menu for i in range(cb.count()): if cb.itemData(i, Qt.AccessibleDescriptionRole) == "separator": @@ -1135,7 +1134,6 @@ def __generate_type_columns_menu(self, columns, menu=None): ac.setCheckable(True) if ac.data() == current: ac.setChecked(True) - current_action = ac g.addAction(ac) def update_types(action): @@ -1736,7 +1734,7 @@ def format_exception_csv(err): def parse_datetime(text): global _to_datetime if _to_datetime is None: - from pandas import to_datetime as _to_datetime + from pandas import to_datetime as _to_datetime # pylint: disable=redefined-outer-name return _to_datetime(text) From a9e23ba4f00af9813c22b25c6b54a546babc0f86 Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Fri, 6 Aug 2021 17:18:14 +0100 Subject: [PATCH 15/22] owcsvimport: Use more delimiters --- Orange/widgets/data/owcsvimport.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index 1fbf725d408..16e20dd4387 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -594,11 +594,10 @@ def default_options_for_mime_type( "text/tab-separated-values": (csv.excel_tab(), True) } dialect, header, encoding = csv.excel(), True, "utf-8" - delimiters = None try_encodings = ["utf-8", "utf-16", "iso8859-1"] + delimiters = [d[1] for d in CSVOptionsWidget.PresetDelimiters] if mime_type in defaults: dialect, header = defaults[mime_type] - delimiters = [dialect.delimiter] for encoding_ in try_encodings: try: From 28c688a3dcb766d1f77d7ff7e7317599bbf2b393 Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Fri, 6 Aug 2021 19:13:18 +0100 Subject: [PATCH 16/22] owcsvimport: Select file filter to use registered loader --- Orange/widgets/data/owcsvimport.py | 165 ++++++++++-------- Orange/widgets/data/tests/test_owcsvimport.py | 22 ++- 2 files changed, 114 insertions(+), 73 deletions(-) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index 16e20dd4387..e29954cd68e 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -29,7 +29,7 @@ import typing from typing import ( List, Tuple, Dict, Optional, Any, Callable, Iterable, - Union, AnyStr, BinaryIO, Set, Type, Mapping, Sequence, NamedTuple + Union, AnyStr, BinaryIO, Set, Type, Mapping, Sequence ) from AnyQt.QtCore import ( @@ -52,15 +52,21 @@ from pandas.api import types as pdtypes +from orangewidget.utils.filedialogs import format_filter from orangewidget.settings import Setting, SettingProvider, widget_settings_dir +from Orange.util import log_warnings + import Orange.data -from Orange.data import Table, ContinuousVariable, StringVariable +from Orange.data import Table, ContinuousVariable, StringVariable, FileFormat +from Orange.data.io import class_from_qualified_name + from Orange.misc.collections import natural_sorted from Orange.widgets import widget, gui from Orange.widgets.data.owtable import DataTableView, RichTableModel, is_sortable, TableSliceProxy from Orange.widgets.settings import PerfectDomainContextHandler +from Orange.widgets.utils.textimport import CSVOptionsWidget from Orange.widgets.utils.settings import ( QSettings_readArray, QSettings_writeArray ) @@ -129,13 +135,15 @@ class Options: def __init__(self, encoding='utf-8', dialect=csv.excel(), columntypes: Iterable[Tuple[range, 'ColumnType']] = (), rowspec=((range(0, 1), RowSpec.Header),), - decimal_separator=".", group_separator="") -> None: + decimal_separator=".", group_separator="", + reader=None) -> None: self.encoding = encoding self.dialect = dialect self.columntypes = list(columntypes) # type: List[Tuple[range, ColumnType]] self.rowspec = list(rowspec) # type: List[Tuple[range, RowSpec]] self.decimal_separator = decimal_separator self.group_separator = group_separator + self.reader = reader def __eq__(self, other): """ @@ -177,6 +185,7 @@ def as_dict(self): "rowspec": Options.spec_as_encodable(self.rowspec), "decimal_separator": self.decimal_separator, "group_separator": self.group_separator, + "reader": self.reader, } @staticmethod @@ -202,10 +211,11 @@ def from_dict(mapping): rowspec = Options.spec_from_encodable(rowspec, RowSpec) decimal = mapping.get("decimal_separator", ".") group = mapping.get("group_separator", "") + reader = mapping.get("reader", None) return Options(encoding, dialect, colspec, rowspec, decimal_separator=decimal, - group_separator=group) + group_separator=group, reader=reader) @staticmethod def spec_as_encodable(spec): @@ -550,29 +560,23 @@ def move_item_to_index(model: QStandardItemModel, item: QStandardItem, index: in model.insertRow(index, [item]) -class FileFormat(NamedTuple): - mime_type: str - name: str - globs: Sequence[str] - - -FileFormats = [ - FileFormat("text/csv", "Text - comma separated", ("*.csv", "*")), - FileFormat("text/tab-separated-values", "Text - tab separated", ("*.tsv", "*")), - FileFormat("text/plain", "Text - all files", ("*.txt", "*")), -] - - class FileDialog(QFileDialog): - __formats: Sequence[FileFormat] = () - - @staticmethod - def filterStr(f: FileFormat) -> str: - return f"{f.name} ({', '.join(f.globs)})" + __filters: Sequence[FileFormat] = () + __file_formats: Sequence[str] = () def setFileFormats(self, formats: Sequence[FileFormat]): - filters = [FileDialog.filterStr(f) for f in formats] - self.__formats = tuple(formats) + file_formats = sorted(set(formats), key=lambda w: (w.PRIORITY, w.DESCRIPTION)) + self.__file_formats = file_formats + filters = [format_filter(f) for f in file_formats] + self.__filters = filters + + all_extensions = set() + for f in file_formats: + all_extensions.update(f.EXTENSIONS) + file_formats.insert(0, None) + filters.insert(0, "All readable files (*{})".format( + ' *'.join(sorted(all_extensions)))) + self.setNameFilters(filters) def fileFormats(self) -> Sequence[FileFormat]: @@ -580,24 +584,14 @@ def fileFormats(self) -> Sequence[FileFormat]: def selectedFileFormat(self) -> FileFormat: filter_ = self.selectedNameFilter() - index = index_where( - self.__formats, lambda f: FileDialog.filterStr(f) == filter_ - ) - return self.__formats[index] + index = self.__filters.index(filter_) + return self.__file_formats[index] -def default_options_for_mime_type( - path: str, mime_type: str -) -> Options: - defaults = { - "text/csv": (csv.excel(), True), - "text/tab-separated-values": (csv.excel_tab(), True) - } +def default_options_for_path(path: str) -> Options: dialect, header, encoding = csv.excel(), True, "utf-8" try_encodings = ["utf-8", "utf-16", "iso8859-1"] delimiters = [d[1] for d in CSVOptionsWidget.PresetDelimiters] - if mime_type in defaults: - dialect, header = defaults[mime_type] for encoding_ in try_encodings: try: @@ -636,6 +630,7 @@ class Outputs: ) class Error(widget.OWWidget.Error): + unknown = widget.Msg("Read error:\n{}") error = widget.Msg( "Unexpected error" ) @@ -646,6 +641,7 @@ class Error(widget.OWWidget.Error): ) class Warning(widget.OWWidget.Warning): + load_warning = widget.Msg("Read warning:\n{}") performance_warning = widget.Msg( "Categorical variables with >100 values may decrease performance.") renamed_vars = widget.Msg("Some variables have been renamed " @@ -921,8 +917,10 @@ def _browse_dialog(self): acceptMode=QFileDialog.AcceptOpen, fileMode=QFileDialog.ExistingFile ) - - dlg.setFileFormats(FileFormats) + readers = [f for f in FileFormat.formats + if getattr(f, 'read', None) + and getattr(f, "EXTENSIONS", None)] + dlg.setFileFormats(readers) state = self.dialog_state lastdir = state.get("directory", "") lastfilter = state.get("filter", "") @@ -977,42 +975,52 @@ def browse(self): status = dlg.exec() dlg.deleteLater() if status == QFileDialog.Accepted: - selected_filter = dlg.selectedFileFormat() + reader = dlg.selectedFileFormat() path = dlg.selectedFiles()[0] assert os.path.isfile(path) + if directory and os.path.commonprefix([directory, path]) == directory: varpath = VarPath("basedir", os.path.relpath(path, directory)) else: varpath = PathItem.AbsPath(path) - # pre-flight check; try to determine the nature of the file - mtype = _mime_type_for_path(path) - if not mtype.inherits("text/plain"): - mb = self._might_be_binary_mb(path) - if mb.exec() == QMessageBox.Cancel: - return - # initialize options based on selected format - options = default_options_for_mime_type( - path, selected_filter.mime_type, - ) - # Search for path in history. - # If found use the stored params to initialize the import dialog - items = self.itemsFromSettings() - idx = index_where(items, lambda t: samepath(t[0], path)) - if idx is not None: - _, options_ = items[idx] - if options_ is not None: - options = options_ - dlg = CSVImportDialog( - self, windowTitle="Import Options", sizeGripEnabled=True) - dlg.setWindowModality(Qt.WindowModal) - dlg.setPath(path) - dlg.setOptions(options) - status = dlg.exec() - dlg.deleteLater() - if status == QDialog.Accepted: - self.set_selected_file(path, dlg.options()) - self.current_item().setVarPath(varpath) + if reader is not None: + self._prepare_reader(reader, path, varpath) + else: + self._prepare_csv_import(path, varpath) + + def _prepare_reader(self, reader, path, varpath): + options = Options(reader=reader.qualified_name()) + self.set_selected_file(path, options) + self.current_item().setVarPath(varpath) + + def _prepare_csv_import(self, path, varpath): + # pre-flight check; try to determine the nature of the file + mtype = _mime_type_for_path(path) + if not mtype.inherits("text/plain"): + mb = self._might_be_binary_mb(path) + if mb.exec() == QMessageBox.Cancel: + return + # initialize options based on selected format + options = default_options_for_path(path) + # Search for path in history. + # If found use the stored params to initialize the import dialog + items = self.itemsFromSettings() + idx = index_where(items, lambda t: samepath(t[0], path)) + if idx is not None: + _, options_ = items[idx] + if options_ is not None and options_.reader is None: + options = options_ + dlg = CSVImportDialog( + self, windowTitle="Import Options", sizeGripEnabled=True) + dlg.setWindowModality(Qt.WindowModal) + dlg.setPath(path) + dlg.setOptions(options) + status = dlg.exec() + dlg.deleteLater() + if status == QDialog.Accepted: + self.set_selected_file(path, dlg.options()) + self.current_item().setVarPath(varpath) def current_item(self): # type: () -> Optional[ImportItem] @@ -1156,6 +1164,11 @@ def commit(self): if not isinstance(opts, Options): return + if opts.reader: + reader = class_from_qualified_name(opts.reader) + self._use_reader(path, reader) + return + task = state = TaskState() state.future = ... state.watcher = qconcurrent.FutureWatcher() @@ -1176,6 +1189,20 @@ def progress_(i, j): self.__watcher = w self.__set_running_state() + def _use_reader(self, path, reader): + with log_warnings() as warnings: + try: + data = reader(path).read() + except Exception as ex: + _log.exception(ex) + return lambda x=ex: self.Error.unknown(str(x)) + if warnings: + self.Warning.load_warning(warnings[-1].message.args[0]) + self.__clear_running_state() + self.data = data + self.openContext(data.domain) + self.apply_domain_edit() + @Slot('qint64', 'qint64') def __set_read_progress(self, read, count): if count > 0: @@ -1296,8 +1323,6 @@ def __handle_result(self, f): self.Outputs.data_frame.send(df) - self._update_table(table) - def reset_domain_edit(self): self.Warning.clear() self.domain_editor.reset_domain() diff --git a/Orange/widgets/data/tests/test_owcsvimport.py b/Orange/widgets/data/tests/test_owcsvimport.py index 5ce85a295f0..0d210b1b231 100644 --- a/Orange/widgets/data/tests/test_owcsvimport.py +++ b/Orange/widgets/data/tests/test_owcsvimport.py @@ -17,11 +17,12 @@ from AnyQt.QtWidgets import QFileDialog from AnyQt.QtTest import QSignalSpy +from Orange.data.io import CSVReader from orangewidget.tests.utils import simulate from orangewidget.widget import OWBaseWidget from Orange.data import DiscreteVariable, TimeVariable, ContinuousVariable, \ - StringVariable + StringVariable, Table from Orange.tests import named_file from Orange.widgets.tests.base import WidgetTest, GuiTest from Orange.widgets.data import owcsvimport @@ -233,7 +234,7 @@ def test_backward_compatibility(self): @staticmethod @contextmanager - def _browse_setup(widget: OWCSVFileImport, path: str): + def _browse_setup(widget: OWCSVFileImport, path: str, reader=None): browse_dialog = widget._browse_dialog with mock.patch.object(widget, "_browse_dialog") as r: dlg = browse_dialog() @@ -242,7 +243,9 @@ def _browse_setup(widget: OWCSVFileImport, path: str): dlg.exec = lambda: QFileDialog.Accepted r.return_value = dlg with mock.patch.object(owcsvimport.CSVImportDialog, "exec", - lambda _: QFileDialog.Accepted): + lambda _: QFileDialog.Accepted), \ + mock.patch.object(owcsvimport.FileDialog, "selectedFileFormat", + lambda _: reader): yield def test_browse(self): @@ -366,6 +369,19 @@ def test_unsafe_cast_warning(self): w.apply_domain_edit() self.assertTrue(w.Warning.numeric_cast.is_shown()) + def load_dataset(self, path, reader=None): + with self._browse_setup(self.widget, path, reader): + self.widget.browse() + + def test_load_iris(self): + self.load_dataset(os.path.join( + os.path.dirname(__file__), '../../../datasets/iris.tab' + ), CSVReader) + self.process_events(until=lambda: self.widget.data is not None) + data = Table('iris') + self.assertEqual(self.widget.table_view.model().rowCount(), + len(data)) + class TestImportDialog(GuiTest): @staticmethod From 63e736dea65f9e3355732f7401f94dfbbca85e2f Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Fri, 6 Aug 2021 20:29:20 +0100 Subject: [PATCH 17/22] io: Wrap redirect resolve in try/except This would fail with google sheets if it included #gid=? --- Orange/data/io.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Orange/data/io.py b/Orange/data/io.py index 1a952bcd0cf..a797aa9c72a 100644 --- a/Orange/data/io.py +++ b/Orange/data/io.py @@ -13,6 +13,7 @@ from os import path, remove from tempfile import NamedTemporaryFile +from urllib.error import HTTPError from urllib.parse import urlparse, urlsplit, urlunsplit, \ unquote as urlunquote, quote from urllib.request import urlopen, Request @@ -437,8 +438,11 @@ def read(self): def _resolve_redirects(self, url): # Resolve (potential) redirects to a final URL - with contextlib.closing(self.urlopen(url)) as response: - return response.url + try: + with contextlib.closing(self.urlopen(url)) as response: + return response.url + except HTTPError: + return url @classmethod def _trim(cls, url): From 53d2c238fc05fbe803d6256c3dd02845443901a3 Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Fri, 6 Aug 2021 20:44:48 +0100 Subject: [PATCH 18/22] io: Go for https over http --- Orange/data/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Orange/data/io.py b/Orange/data/io.py index a797aa9c72a..9853ceb6857 100644 --- a/Orange/data/io.py +++ b/Orange/data/io.py @@ -406,7 +406,7 @@ class UrlReader(FileFormat): def __init__(self, filename): filename = filename.strip() if not urlparse(filename).scheme: - filename = 'http://' + filename + filename = 'https://' + filename filename = quote(filename, safe="/:") super().__init__(filename) From 0b5d127788d8a56f297c5bc2b6b6dfea108427b3 Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Fri, 6 Aug 2021 20:45:51 +0100 Subject: [PATCH 19/22] io: Fix google spreadsheet sheet id bug --- Orange/data/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Orange/data/io.py b/Orange/data/io.py index 9853ceb6857..3c7cfc17907 100644 --- a/Orange/data/io.py +++ b/Orange/data/io.py @@ -464,7 +464,7 @@ def _trim_googlesheet(url): match = re.match(r'(?:https?://)?(?:www\.)?' r'docs\.google\.com/spreadsheets/d/' r'(?P[-\w_]+)' - r'(?:/.*?gid=(?P\d+).*|.*)?', + r'(?:/.*?gid(=|%3D)(?P\d+).*|.*)?', url, re.IGNORECASE) try: workbook, sheet = match.group('workbook_id'), match.group('sheet_id') From dccf88cd12fa24a9d61c3b083141a83e537c9d9d Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Fri, 6 Aug 2021 21:51:57 +0100 Subject: [PATCH 20/22] owcsvimport: Support spreadsheets and url --- Orange/widgets/data/owcsvimport.py | 179 +++++++++++++++++++++++++---- 1 file changed, 158 insertions(+), 21 deletions(-) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index e29954cd68e..ee0ded1b473 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -21,6 +21,7 @@ import bz2 import zipfile from itertools import chain +from urllib.parse import urlparse from xml.sax.saxutils import escape from functools import singledispatch @@ -42,7 +43,7 @@ QLabel, QComboBox, QPushButton, QDialog, QDialogButtonBox, QVBoxLayout, QSizePolicy, QFileIconProvider, QFileDialog, QApplication, QMessageBox, QTextBrowser, - QStyle, QMenu, QHBoxLayout, QTableView, QHeaderView + QStyle, QMenu, QHBoxLayout, QTableView, QHeaderView, QGridLayout, QCompleter ) from AnyQt.QtCore import pyqtSlot as Slot, pyqtSignal as Signal @@ -59,11 +60,12 @@ import Orange.data from Orange.data import Table, ContinuousVariable, StringVariable, FileFormat -from Orange.data.io import class_from_qualified_name +from Orange.data.io import class_from_qualified_name, CSVReader, UrlReader from Orange.misc.collections import natural_sorted from Orange.widgets import widget, gui +from Orange.widgets.data.owfile import NamedURLModel, LineEditSelectOnFocus from Orange.widgets.data.owtable import DataTableView, RichTableModel, is_sortable, TableSliceProxy from Orange.widgets.settings import PerfectDomainContextHandler from Orange.widgets.utils.textimport import CSVOptionsWidget @@ -136,7 +138,7 @@ def __init__(self, encoding='utf-8', dialect=csv.excel(), columntypes: Iterable[Tuple[range, 'ColumnType']] = (), rowspec=((range(0, 1), RowSpec.Header),), decimal_separator=".", group_separator="", - reader=None) -> None: + reader=None, sheet=None) -> None: self.encoding = encoding self.dialect = dialect self.columntypes = list(columntypes) # type: List[Tuple[range, ColumnType]] @@ -144,6 +146,7 @@ def __init__(self, encoding='utf-8', dialect=csv.excel(), self.decimal_separator = decimal_separator self.group_separator = group_separator self.reader = reader + self.sheet = sheet def __eq__(self, other): """ @@ -186,6 +189,7 @@ def as_dict(self): "decimal_separator": self.decimal_separator, "group_separator": self.group_separator, "reader": self.reader, + "sheet": self.sheet, } @staticmethod @@ -212,10 +216,12 @@ def from_dict(mapping): decimal = mapping.get("decimal_separator", ".") group = mapping.get("group_separator", "") reader = mapping.get("reader", None) + sheet = mapping.get("sheet", None) return Options(encoding, dialect, colspec, rowspec, decimal_separator=decimal, - group_separator=group, reader=reader) + group_separator=group, reader=reader, + sheet=sheet) @staticmethod def spec_as_encodable(spec): @@ -630,6 +636,7 @@ class Outputs: ) class Error(widget.OWWidget.Error): + sheet_error = widget.Msg("Error listing available sheets.") unknown = widget.Msg("Read error:\n{}") error = widget.Msg( "Unexpected error" @@ -652,6 +659,14 @@ class Warning(widget.OWWidget.Warning): 'Change the variable type to String in ' 'the Import Options to avoid this.') + class NoFileSelected: + pass + + LOCAL_FILE, URL = range(2) + source = Setting(LOCAL_FILE) + sheet_names = Setting({}) + recent_urls = Setting([]) + #: Paths and options of files accessed in a 'session' _session_items = Setting( [], schema_only=True) # type: List[Tuple[str, dict]] @@ -693,18 +708,31 @@ def __init__(self, *args, **kwargs): self.__executor = qconcurrent.ThreadExecutor() self.__watcher = None # type: Optional[qconcurrent.FutureWatcher] + self.reader = None self.data = None self.controlArea.layout().setSpacing(-1) # reset spacing + ######## + # Source + ######## + layout = QGridLayout() + layout.setSpacing(4) + gui.widgetBox(self.controlArea, orientation=layout, box='Source') + vbox = gui.radioButtons(None, self, "source", box=True, + callback=self._invalidate, addToLayout=False) + ############# # File select ############# - box = QHBoxLayout() - box.addWidget(QLabel("File:", self)) - self.import_items_model = VarPathItemModel(self) self.import_items_model.setReplacementEnv(self._replacements()) + + rb_button = gui.appendRadioButton(vbox, "File:", addToLayout=False) + layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter) + + # box = gui.hBox(None, addToLayout=False, margin=0) + # box.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed) self.recent_combo = ItemStyledComboBox( self, objectName="recent-combo", sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon, @@ -714,6 +742,8 @@ def __init__(self, *args, **kwargs): self.recent_combo.activated.connect(self.activate_recent) self.recent_combo.setSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.Fixed) + # box.layout().addWidget(self.recent_combo) + layout.addWidget(self.recent_combo, 0, 1) self.browse_button = QPushButton( "…", icon=self.style().standardIcon(QStyle.SP_DirOpenIcon), @@ -734,6 +764,52 @@ def __init__(self, *args, **kwargs): self.browse_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) self.browse_button.clicked.connect(self.browse) + layout.addWidget(self.browse_button, 0, 2) + + ############################### + # Sheet combo (on opening xlsx) + + self.sheet_box = gui.hBox(None, addToLayout=False, margin=0) + self.sheet_combo = QComboBox() + self.sheet_combo.activated[str].connect(self.select_sheet) + self.sheet_combo.setSizePolicy( + QSizePolicy.MinimumExpanding, QSizePolicy.Fixed) + self.sheet_label = QLabel() + self.sheet_label.setText('Sheet') + self.sheet_label.setSizePolicy( + QSizePolicy.MinimumExpanding, QSizePolicy.Fixed) + self.sheet_box.layout().addWidget( + self.sheet_label, Qt.AlignLeft) + self.sheet_box.layout().addWidget( + self.sheet_combo, Qt.AlignVCenter) + layout.addWidget(self.sheet_box, 1, 1) + self.sheet_box.hide() + + ##### + # URL + ##### + + rb_button = gui.appendRadioButton(vbox, "URL:", addToLayout=False) + layout.addWidget(rb_button, 2, 0, Qt.AlignVCenter) + + self.url_combo = url_combo = QComboBox() + url_model = NamedURLModel(self.sheet_names) + url_model.wrap(self.recent_urls) + url_combo.setLineEdit(LineEditSelectOnFocus()) + url_combo.setModel(url_model) + url_combo.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Fixed) + url_combo.setEditable(True) + url_combo.setInsertPolicy(url_combo.InsertAtTop) + url_edit = url_combo.lineEdit() + l, t, r, b = url_edit.getTextMargins() + url_edit.setTextMargins(l + 5, t, r, b) + layout.addWidget(url_combo, 2, 1, 1, 3) + url_combo.activated.connect(self._url_set) + # whit completer we set that combo box is case sensitive when + # matching the history + completer = QCompleter() + completer.setCaseSensitivity(Qt.CaseSensitive) + url_combo.setCompleter(completer) ######### # Buttons @@ -749,9 +825,6 @@ def __init__(self, *args, **kwargs): clicked=self.__committimer.start ) - box.addWidget(self.recent_combo) - box.addWidget(self.browse_button) - self.controlArea.layout().addLayout(box) box = QHBoxLayout() box.addWidget(self.import_options_button) box.addWidget(self.load_button) @@ -984,6 +1057,11 @@ def browse(self): else: varpath = PathItem.AbsPath(path) + if reader is None: + proposed_reader = FileFormat.get_reader(path) + if not isinstance(proposed_reader, CSVReader): + reader = proposed_reader + if reader is not None: self._prepare_reader(reader, path, varpath) else: @@ -995,12 +1073,6 @@ def _prepare_reader(self, reader, path, varpath): self.current_item().setVarPath(varpath) def _prepare_csv_import(self, path, varpath): - # pre-flight check; try to determine the nature of the file - mtype = _mime_type_for_path(path) - if not mtype.inherits("text/plain"): - mb = self._might_be_binary_mb(path) - if mb.exec() == QMessageBox.Cancel: - return # initialize options based on selected format options = default_options_for_path(path) # Search for path in history. @@ -1140,7 +1212,7 @@ def _invalidate(self): self.__committimer.start() if self.__watcher is not None: self.__cancel_task() - self.setBlocking(True) + # self.setBlocking(True) def commit(self): """ @@ -1156,6 +1228,15 @@ def commit(self): self.__cancel_task() self.error() + if self.source == self.URL: + url = self.url_combo.currentText().strip() + if url: + self.reader = UrlReader(url) + else: + self.reader = self.NoFileSelected + self._use_reader() + return + item = self.current_item() if item is None: return @@ -1166,7 +1247,8 @@ def commit(self): if opts.reader: reader = class_from_qualified_name(opts.reader) - self._use_reader(path, reader) + self.reader = reader(path) + self._use_reader() return task = state = TaskState() @@ -1189,20 +1271,75 @@ def progress_(i, j): self.__watcher = w self.__set_running_state() - def _use_reader(self, path, reader): + def _use_reader(self): + if self.reader is self.NoFileSelected: + self.Outputs.data.send(None) + return + + try: + self._update_sheet_combo() + except Exception: + self.Error.sheet_error() + return + + options = self.current_item().options() + if options.sheet: + self.reader.select_sheet(options.sheet) + with log_warnings() as warnings: try: - data = reader(path).read() + data = self.reader.read() except Exception as ex: _log.exception(ex) - return lambda x=ex: self.Error.unknown(str(x)) + self.Error.unknown(str(ex)) + return if warnings: self.Warning.load_warning(warnings[-1].message.args[0]) + self.__clear_running_state() self.data = data self.openContext(data.domain) self.apply_domain_edit() + def _update_sheet_combo(self): + if len(self.reader.sheets) < 2: + self.sheet_box.hide() + self.reader.select_sheet(None) + return + + self.sheet_combo.clear() + self.sheet_combo.addItems(self.reader.sheets) + self._select_active_sheet() + self.sheet_box.show() + + def _select_active_sheet(self): + try: + idx = self.reader.sheets.index(self.reader.sheet) + self.sheet_combo.setCurrentIndex(idx) + except ValueError: + # Requested sheet does not exist in this file + self.reader.select_sheet(None) + self.sheet_combo.setCurrentIndex(0) + + def select_sheet(self): + opts = self.current_item().options() + opts.sheet = self.sheet_combo.currentText() + self.current_item().setOptions(opts) + self._invalidate() + + def _url_set(self): + url = self.url_combo.currentText() + pos = self.recent_urls.index(url) + url = url.strip() + + if not urlparse(url).scheme: + url = 'https://' + url + self.url_combo.setItemText(pos, url) + self.recent_urls[pos] = url + + self.source = self.URL + self._invalidate() + @Slot('qint64', 'qint64') def __set_read_progress(self, read, count): if count > 0: From 3a54a224d1c3e3121bda7168c7108fa2ccfb39b7 Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Fri, 6 Aug 2021 22:31:59 +0100 Subject: [PATCH 21/22] owcsvimport: If sniff orange types or flags, use reader --- Orange/widgets/data/owcsvimport.py | 56 ++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index ee0ded1b473..683f30d24ee 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -61,6 +61,7 @@ import Orange.data from Orange.data import Table, ContinuousVariable, StringVariable, FileFormat from Orange.data.io import class_from_qualified_name, CSVReader, UrlReader +from Orange.data.io_base import DataTableMixin, _TableHeader from Orange.misc.collections import natural_sorted @@ -1047,34 +1048,51 @@ def browse(self): status = dlg.exec() dlg.deleteLater() - if status == QFileDialog.Accepted: - reader = dlg.selectedFileFormat() - path = dlg.selectedFiles()[0] - assert os.path.isfile(path) + if status != QFileDialog.Accepted: + return - if directory and os.path.commonprefix([directory, path]) == directory: - varpath = VarPath("basedir", os.path.relpath(path, directory)) - else: - varpath = PathItem.AbsPath(path) + reader = dlg.selectedFileFormat() + path = dlg.selectedFiles()[0] + assert os.path.isfile(path) - if reader is None: - proposed_reader = FileFormat.get_reader(path) - if not isinstance(proposed_reader, CSVReader): - reader = proposed_reader + if directory and os.path.commonprefix([directory, path]) == directory: + varpath = VarPath("basedir", os.path.relpath(path, directory)) + else: + varpath = PathItem.AbsPath(path) - if reader is not None: - self._prepare_reader(reader, path, varpath) - else: - self._prepare_csv_import(path, varpath) + options = default_options_for_path(path) + if not reader: + reader = self._choose_reader(path, options) + + if reader is not None: + self._prepare_reader(reader, path, varpath) + else: + self._prepare_csv_import(options, path, varpath) + + def _choose_reader(self, path, options): + try: + proposed_reader = FileFormat.get_reader(path) + if not isinstance(proposed_reader, CSVReader): + return proposed_reader + + sample = _open(path, 'rb') + wrapper = io.TextIOWrapper(sample, newline='') + rows = csv.reader(wrapper, dialect=options.dialect) + headers, data = DataTableMixin.parse_headers(rows) + header = _TableHeader(headers) + + if any(header.types) or any(header.flags): + return proposed_reader + except IOError: + pass + return None def _prepare_reader(self, reader, path, varpath): options = Options(reader=reader.qualified_name()) self.set_selected_file(path, options) self.current_item().setVarPath(varpath) - def _prepare_csv_import(self, path, varpath): - # initialize options based on selected format - options = default_options_for_path(path) + def _prepare_csv_import(self, options, path, varpath): # Search for path in history. # If found use the stored params to initialize the import dialog items = self.itemsFromSettings() From 797ab7b54318ffa8cbee6a1eee2427a0ae6e085b Mon Sep 17 00:00:00 2001 From: Rafael Irgolic Date: Fri, 6 Aug 2021 22:36:25 +0100 Subject: [PATCH 22/22] owcsvimport: Clear on invalidate --- Orange/widgets/data/owcsvimport.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index 683f30d24ee..90dd06308ea 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -1232,6 +1232,17 @@ def _invalidate(self): self.__cancel_task() # self.setBlocking(True) + def _update_buttons(self): + cbindex = self.recent_combo.currentIndex() + self.load_button.setEnabled(cbindex != -1) + + import_enabled = (cbindex != -1 and + self.source == self.LOCAL_FILE and + self.reader is None) + self.import_options_button.setEnabled(import_enabled) + + self._update_sheet_combo() + def commit(self): """ Commit the current state and submit the load task for execution. @@ -1242,6 +1253,8 @@ def commit(self): """ self.__committimer.stop() self.closeContext() + self._set_summary_text(None) + self._update_table(None) if self.__watcher is not None: self.__cancel_task() self.error() @@ -1269,6 +1282,9 @@ def commit(self): self._use_reader() return + self.reader = None + self._update_buttons() + task = state = TaskState() state.future = ... state.watcher = qconcurrent.FutureWatcher() @@ -1294,12 +1310,6 @@ def _use_reader(self): self.Outputs.data.send(None) return - try: - self._update_sheet_combo() - except Exception: - self.Error.sheet_error() - return - options = self.current_item().options() if options.sheet: self.reader.select_sheet(options.sheet) @@ -1314,12 +1324,17 @@ def _use_reader(self): if warnings: self.Warning.load_warning(warnings[-1].message.args[0]) + self._update_buttons() self.__clear_running_state() self.data = data self.openContext(data.domain) self.apply_domain_edit() def _update_sheet_combo(self): + if not hasattr(self.reader, 'sheets'): + self.sheet_box.hide() + return + if len(self.reader.sheets) < 2: self.sheet_box.hide() self.reader.select_sheet(None)