Skip to content

Commit

Permalink
Merge pull request #79 from jaybythebay/hyperapi-subpackage
Browse files Browse the repository at this point in the history
Create subpackage for functionality reliant on tableauhyperapi.
  • Loading branch information
JustinGrilli authored May 6, 2024
2 parents 6277ec7 + c2d8dfc commit a6b4246
Show file tree
Hide file tree
Showing 9 changed files with 158 additions and 113 deletions.
1 change: 1 addition & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
python -m pip install --upgrade pip
python -m pip install flake8 pytest
pip install .
pip install ./[hyper]
- name: Check linting with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
Expand Down
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,16 @@ A module and CLI Utility for managing Tableau objects, locally, and in Tableau O
### Installation

#### From pypi

##### Core Package
- `pip install tableau-utilities`

##### Hyper Subpackage
This extra package depends on the tableauhyperapi which is incompatible with Apple Silicon computers. See the [tableauhyperapi installation docs](https://tableau.github.io/hyper-db/docs/installation) for workarounds to use the package on Applie Silicon.

- `pip install tableau-utilities[hyper]`
- `pip install 'tableau-utilities[hyper]'` if you're using zsh make sure to add quotes

#### Locally using pip
- `cd tableau-utilities`
- `pip install ./`
Expand Down Expand Up @@ -207,4 +215,4 @@ tableau_utilities --definitions_csv /Desktop/new_definitions.csv merge_config --

This project is actively maintained by the Data Platform team at [@hoverinc][hover-github-link].

[hover-github-link]: https://github.com/hoverinc
[hover-github-link]: https://github.com/hoverinc
11 changes: 7 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
long_description=readme,
long_description_content_type='text/markdown',
name="tableau_utilities",
version="2.1.21",
version="2.2.0",
requires_python=">=3.8",
packages=[
'tableau_utilities',
'tableau_utilities.general',
'tableau_utilities.tableau_file',
'tableau_utilities.tableau_server',
'tableau_utilities.scripts'
'tableau_utilities.hyper',
'tableau_utilities.scripts',
],
package_data={'tableau_utilities': ['tableau_file/*.yml']},
include_package_data=True,
Expand All @@ -27,10 +29,11 @@
'requests>=2.27.1,<3.0.0',
'pandas>=1.4.1,<2.0.0',
'tabulate>=0.8.9,<1.0.0',
'tableauhyperapi==0.0.18825'],
],
extras_require={"hyper": ['tableauhyperapi==0.0.18825']},
entry_points={
'console_scripts': [
'tableau_utilities = tableau_utilities.scripts.cli:main',
]
}
)
)
Empty file.
112 changes: 112 additions & 0 deletions tableau_utilities/hyper/hyper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import os
import shutil
from zipfile import ZipFile
from tableauhyperapi import HyperProcess, Connection, Telemetry, CreateMode, TableDefinition, TableName, SqlType

from tableau_utilities.tableau_file.tableau_file import TableauFileError, Datasource


def create_empty_hyper_extract(datasource: Datasource):
""" Creates an empty extract (.hyper file) for the Tableau file.
If the extract exists, it will be overwritten.
Args:
datasource: The tableau_utilities Datasource class
"""
# Get relevant paths, and create a temp folder and move the Tableau file into it
temp_folder = os.path.join(datasource.file_directory, f'__TEMP_{datasource.file_name}')
extract_folder = os.path.join(temp_folder, 'Data', 'Extracts')
hyper_rel_path = os.path.join('Data', 'Extracts', f'{datasource.file_name}.hyper')
temp_path = os.path.join(temp_folder, datasource.file_basename)
tdsx_basename = f'{datasource.file_name}.tdsx'
tdsx_path = os.path.join(temp_folder, tdsx_basename)
os.makedirs(extract_folder, exist_ok=True)
shutil.move(datasource.file_path, temp_path)
if datasource.extension == 'tdsx':
# Unzip the TDS file
with ZipFile(temp_path) as z:
for f in z.filelist:
ext = f.filename.split('.')[-1]
if ext in ['tds', 'twb']:
tds_path = z.extract(member=f, path=temp_folder)
else:
tds_path = temp_path
hyper_path = os.path.join(extract_folder, f'{datasource.file_name}.hyper')
params = {"default_database_version": "2"}
# Get columns from the metadata
columns = dict() # Use a dict to ensure no duplicate columns are referenced
for metadata in datasource.connection.metadata_records:
if metadata.local_type == 'integer':
column = TableDefinition.Column(metadata.remote_name, SqlType.int())
elif metadata.local_type == 'real':
column = TableDefinition.Column(metadata.remote_name, SqlType.double())
elif metadata.local_type == 'string':
column = TableDefinition.Column(metadata.remote_name, SqlType.varchar(metadata.width or 1020))
elif metadata.local_type == 'boolean':
column = TableDefinition.Column(metadata.remote_name, SqlType.bool())
elif metadata.local_type == 'datetime':
column = TableDefinition.Column(metadata.remote_name, SqlType.timestamp())
elif metadata.local_type == 'date':
column = TableDefinition.Column(metadata.remote_name, SqlType.date())
else:
raise TableauFileError(f'Got unexpected metadata type for hyper table: {metadata.local_type}')
columns[metadata.remote_name] = column
# Create an empty .hyper file based on the metadata of the Tableau file
with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU, parameters=params) as hyper:
with Connection(hyper.endpoint, hyper_path, CreateMode.CREATE_AND_REPLACE) as connection:
# Create an `Extract` table inside an `Extract` schema
connection.catalog.create_schema('Extract')
table = TableDefinition(TableName('Extract', 'Extract'), columns.values())
connection.catalog.create_table(table)
# Archive the extract with the TDS file
with ZipFile(tdsx_path, 'w') as z:
z.write(tds_path, arcname=os.path.basename(tds_path))
z.write(hyper_path, arcname=hyper_rel_path)
# Update datasource extract to reference .hyper file
if datasource.extract:
datasource.extract.connection.class_name = 'hyper'
datasource.extract.connection.authentication = 'auth-none'
datasource.extract.connection.author_locale = 'en_US'
datasource.extract.connection.extract_engine = None
datasource.extract.connection.dbname = hyper_rel_path
# Move the tdsx out of the temp_folder and delete temp_folder
datasource.file_path = os.path.join(datasource.file_directory, tdsx_basename)
datasource.file_basename = tdsx_basename
datasource.extension = 'tdsx'
shutil.move(tdsx_path, datasource.file_path)
shutil.rmtree(temp_folder, ignore_errors=True)


def filter_hyper_extract(datasource: Datasource, delete_condition):
""" Filters the data in the extract (.hyper file) for the Tableau file.
Args:
datasource: The tableau_utilities Datasource class
delete_condition (str): A condition string to add to the WHERE clause of data to delete.
"""
if datasource.extension != 'tdsx' or not datasource.has_extract_data:
return None
# Get relevant paths, and create a temp folder and move the Tableau file into it
temp_folder = os.path.join(datasource.file_directory, f'__TEMP_{datasource.file_name}')
temp_path = os.path.join(temp_folder, datasource.file_basename)
os.makedirs(temp_folder, exist_ok=True)
shutil.move(datasource.file_path, temp_path)
# Unzip the TDS file
unzipped_files = list()
with ZipFile(temp_path) as z:
for f in z.filelist:
ext = f.filename.split('.')[-1]
path = z.extract(member=f, path=temp_folder)
unzipped_files.append(path)
if ext == 'hyper':
hyper_path = path
# Update .hyper file based on the filter condition
with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
with Connection(hyper.endpoint, hyper_path, CreateMode.NONE) as connection:
connection.execute_command(f'DELETE FROM "Extract"."Extract" WHERE {delete_condition}')
# Archive the extract with the TDS file
with ZipFile(temp_path, 'w') as z:
for file in unzipped_files:
arcname = file.split(temp_folder)[-1]
z.write(file, arcname=arcname)
# Move the tdsx out of the temp_folder and delete temp_folder
shutil.move(temp_path, datasource.file_path)
shutil.rmtree(temp_folder, ignore_errors=True)
25 changes: 22 additions & 3 deletions tableau_utilities/scripts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from argparse import RawTextHelpFormatter
import yaml
import pkg_resources
import pkgutil
import importlib.metadata

import tableau_utilities.tableau_server.tableau_server as ts

Expand Down Expand Up @@ -157,9 +159,9 @@
help='The datatype persona of the column. Required for adding a new column')
parser_datasource.add_argument('--desc', help='A Tableau column description')
parser_datasource.add_argument('--calculation', help='A Tableau calculation')
parser_datasource.add_argument('-E', '--empty_extract', action='store_true',
parser_datasource.add_argument('-ee', '--empty_extract', action='store_true',
help='Adds an empty extract to the Datasource if specified.')
parser_datasource.add_argument('-F', '--filter_extract',
parser_datasource.add_argument('-fe', '--filter_extract',
help='Deletes data from the extract based on the condition string provided. '
"""E.g. "CREATED_AT" < '1/1/2024'""")
parser_datasource.set_defaults(func=datasource)
Expand Down Expand Up @@ -262,6 +264,16 @@ def validate_args_command_merge_config(args):
parser.error(f'--merge_with {args.merge_with} requires --target_directory')


def validate_subpackage_hyper():
""" Checks that the hyper subpackage is installed for functions that use it """

try:
version = importlib.metadata.version("tableauhyperapi")
except importlib.metadata.PackageNotFoundError:
parser.error(
'--filter_extract and --empty_extract require the tableau_utilities[hyper] subpackage. See installation notes if you are on an Apple Silicon (Apple M1, Apple M2, ...)')


def tableau_authentication(args):
""" Creates the Tableau server authentication from a variety of methods for passing in credentials """
debug = args.debugging_logs
Expand Down Expand Up @@ -453,6 +465,10 @@ def main():
os.makedirs(tmp_folder, exist_ok=True)
os.chdir(tmp_folder)

needs_subpackage_hyper = (
args.command == 'datasource' and (args.empty_extract or args.filter_extract)
)

needs_tableau_server = (
(args.command == 'generate_config' and args.location == 'online')
or (args.command == 'merge_config' and args.location == 'online')
Expand All @@ -462,6 +478,9 @@ def main():
or args.command == 'server_operate'
)

if needs_subpackage_hyper:
validate_subpackage_hyper()

if needs_tableau_server:
server = tableau_authentication(args)
args.func(args, server)
Expand All @@ -471,4 +490,4 @@ def main():


if __name__ == '__main__':
main()
main()
5 changes: 3 additions & 2 deletions tableau_utilities/scripts/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from tableau_utilities.general.cli_styling import Symbol
from tableau_utilities.tableau_file.tableau_file import Datasource
from tableau_utilities.tableau_server.tableau_server import TableauServer
from tableau_utilities.hyper.hyper import empty_hyper_extract, filter_hyper_extract


def datasource(args, server=None):
Expand Down Expand Up @@ -73,13 +74,13 @@ def datasource(args, server=None):

# Add an empty .hyper file to the Datasource; Useful for publishing without data
if empty_extract:
ds.empty_extract()
empty_hyper_extract(ds)
print(f'{color.fg_green}Added empty .hyper extract for {datasource_path}{color.reset}')
# Otherwise, filter the extract if filter_extract string provided
elif filter_extract:
start = time()
print(f'{color.fg_cyan}...Filtering extract data...{color.reset}')
ds.filter_extract(filter_extract)
filter_hyper_extract(ds, filter_extract)
print(f'{color.fg_green}{symbol.success} (Done in {round(time() - start)} sec) '
f'Filtered extract data for {datasource_path}{color.reset}')

Expand Down
104 changes: 1 addition & 103 deletions tableau_utilities/tableau_file/tableau_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
import shutil
import xmltodict
from tableauhyperapi import HyperProcess, Connection, Telemetry, CreateMode, TableDefinition, TableName, SqlType
from zipfile import ZipFile

import tableau_utilities.tableau_file.tableau_file_objects as tfo
Expand Down Expand Up @@ -302,107 +301,6 @@ def enforce_column(self, column, folder_name=None, remote_name=None):
if not found:
self.extract.connection.cols.append(extract_col)

def empty_extract(self):
""" Creates an empty extract (.hyper file) for the Tableau file.
If the extract exists, it will be overwritten. """
# Get relevant paths, and create a temp folder and move the Tableau file into it
temp_folder = os.path.join(self.file_directory, f'__TEMP_{self.file_name}')
extract_folder = os.path.join(temp_folder, 'Data', 'Extracts')
hyper_rel_path = os.path.join('Data', 'Extracts', f'{self.file_name}.hyper')
temp_path = os.path.join(temp_folder, self.file_basename)
tdsx_basename = f'{self.file_name}.tdsx'
tdsx_path = os.path.join(temp_folder, tdsx_basename)
os.makedirs(extract_folder, exist_ok=True)
shutil.move(self.file_path, temp_path)
if self.extension == 'tdsx':
# Unzip the TDS file
with ZipFile(temp_path) as z:
for f in z.filelist:
ext = f.filename.split('.')[-1]
if ext in ['tds', 'twb']:
tds_path = z.extract(member=f, path=temp_folder)
else:
tds_path = temp_path
hyper_path = os.path.join(extract_folder, f'{self.file_name}.hyper')
params = {"default_database_version": "2"}
# Get columns from the metadata
columns = dict() # Use a dict to ensure no duplicate columns are referenced
for metadata in self.connection.metadata_records:
if metadata.local_type == 'integer':
column = TableDefinition.Column(metadata.remote_name, SqlType.int())
elif metadata.local_type == 'real':
column = TableDefinition.Column(metadata.remote_name, SqlType.double())
elif metadata.local_type == 'string':
column = TableDefinition.Column(metadata.remote_name, SqlType.varchar(metadata.width or 1020))
elif metadata.local_type == 'boolean':
column = TableDefinition.Column(metadata.remote_name, SqlType.bool())
elif metadata.local_type == 'datetime':
column = TableDefinition.Column(metadata.remote_name, SqlType.timestamp())
elif metadata.local_type == 'date':
column = TableDefinition.Column(metadata.remote_name, SqlType.date())
else:
raise TableauFileError(f'Got unexpected metadata type for hyper table: {metadata.local_type}')
columns[metadata.remote_name] = column
# Create an empty .hyper file based on the metadata of the Tableau file
with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU, parameters=params) as hyper:
with Connection(hyper.endpoint, hyper_path, CreateMode.CREATE_AND_REPLACE) as connection:
# Create an `Extract` table inside an `Extract` schema
connection.catalog.create_schema('Extract')
table = TableDefinition(TableName('Extract', 'Extract'), columns.values())
connection.catalog.create_table(table)
# Archive the extract with the TDS file
with ZipFile(tdsx_path, 'w') as z:
z.write(tds_path, arcname=os.path.basename(tds_path))
z.write(hyper_path, arcname=hyper_rel_path)
# Update datasource extract to reference .hyper file
if self.extract:
self.extract.connection.class_name = 'hyper'
self.extract.connection.authentication = 'auth-none'
self.extract.connection.author_locale = 'en_US'
self.extract.connection.extract_engine = None
self.extract.connection.dbname = hyper_rel_path
# Move the tdsx out of the temp_folder and delete temp_folder
self.file_path = os.path.join(self.file_directory, tdsx_basename)
self.file_basename = tdsx_basename
self.extension = 'tdsx'
shutil.move(tdsx_path, self.file_path)
shutil.rmtree(temp_folder, ignore_errors=True)

def filter_extract(self, delete_condition: str):
""" Filters the data in the extract (.hyper file) for the Tableau file.
Args:
delete_condition (str): A condition string to add to the WHERE clause of data to delete.
"""
if self.extension != 'tdsx' or not self.has_extract_data:
return None
# Get relevant paths, and create a temp folder and move the Tableau file into it
temp_folder = os.path.join(self.file_directory, f'__TEMP_{self.file_name}')
temp_path = os.path.join(temp_folder, self.file_basename)
os.makedirs(temp_folder, exist_ok=True)
shutil.move(self.file_path, temp_path)
# Unzip the TDS file
unzipped_files = list()
with ZipFile(temp_path) as z:
for f in z.filelist:
ext = f.filename.split('.')[-1]
path = z.extract(member=f, path=temp_folder)
unzipped_files.append(path)
if ext == 'hyper':
hyper_path = path
# Update .hyper file based on the filter condition
with HyperProcess(Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
with Connection(hyper.endpoint, hyper_path, CreateMode.NONE) as connection:
connection.execute_command(f'DELETE FROM "Extract"."Extract" WHERE {delete_condition}')
# Archive the extract with the TDS file
with ZipFile(temp_path, 'w') as z:
for file in unzipped_files:
arcname = file.split(temp_folder)[-1]
z.write(file, arcname=arcname)
# Move the tdsx out of the temp_folder and delete temp_folder
shutil.move(temp_path, self.file_path)
shutil.rmtree(temp_folder, ignore_errors=True)

def save(self):
""" Save all changes made to each section of the Datasource """
parent = self._root.find('.')
Expand Down Expand Up @@ -437,4 +335,4 @@ def save(self):
if unzip:
ds.unzip(unzip_all=unzip_all_files)

print(ds.columns.get('[USER_ID]'))
print(ds.columns.get('[USER_ID]'))
Loading

0 comments on commit a6b4246

Please sign in to comment.