Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finalize testing for gcs filesystem #1400

Merged
merged 4 commits into from
May 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 48 additions & 31 deletions tests/test_filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
ROOT_PREFIX = f"tf-io-root-{int(time.time())}/"

# This is the number of attributes each filesystem should return in `*_fs`.
NUM_ATR_FS = 7
NUM_ATR_FS = 6

S3_URI = "s3"
AZ_URI = "az"
Expand Down Expand Up @@ -127,7 +127,7 @@ def mkdirs(path):
path += "/"
write(path, b"")

yield S3_URI, path_to, read, write, mkdirs, posixpath.join, (client, bucket_name)
yield path_to, read, write, mkdirs, posixpath.join, (client, bucket_name)
monkeypatch.undo()


Expand Down Expand Up @@ -182,7 +182,7 @@ def mkdirs(path):
if path[-1] == "/":
write(path, b"")

yield AZ_URI, path_to, read, write, mkdirs, posixpath.join, (
yield path_to, read, write, mkdirs, posixpath.join, (
client,
container_name,
account,
Expand All @@ -196,13 +196,13 @@ def az_dsn_fs(az_fs):
yield [None] * NUM_ATR_FS
return

uri, _, read, write, mkdirs, join, fs_internal = az_fs
_, read, write, mkdirs, join, fs_internal = az_fs
_, container_name, account = fs_internal

def path_to_dsn(*args):
return f"{AZ_URI}://{account}.blob.core.windows.net/{container_name}/{posixpath.join(ROOT_PREFIX, *args)}"

yield uri, path_to_dsn, read, write, mkdirs, join, fs_internal
yield path_to_dsn, read, write, mkdirs, join, fs_internal


@pytest.fixture(scope="module")
Expand All @@ -223,10 +223,9 @@ def write(*_):
def mkdirs(_):
pass

yield HTTPS_URI, path_to, read, write, mkdirs, posixpath.join, None
yield path_to, read, write, mkdirs, posixpath.join, None


# TODO(vnvo2409): some tests with `gcs` are falling.
@pytest.fixture(scope="module")
def gcs_fs():
if should_skip(GCS_URI):
Expand Down Expand Up @@ -275,29 +274,41 @@ def mkdirs(path):
path += "/"
write(path, b"")

yield GCS_URI, path_to, read, write, mkdirs, posixpath.join, None
yield path_to, read, write, mkdirs, posixpath.join, None
monkeypatch.undo()


@pytest.fixture
def fs(request, s3_fs, az_fs, az_dsn_fs, https_fs, gcs_fs):
uri, path_to, read, write, mkdirs, join, internal = [None] * NUM_ATR_FS
should_skip(request.param, check_only=False)
path_to, read, write, mkdirs, join, internal = [None] * NUM_ATR_FS
test_fs_uri = request.param
real_uri = test_fs_uri
should_skip(test_fs_uri, check_only=False)

if test_fs_uri == S3_URI:
path_to, read, write, mkdirs, join, internal = s3_fs
elif test_fs_uri == AZ_URI:
path_to, read, write, mkdirs, join, internal = az_fs
elif test_fs_uri == AZ_DSN_URI:
real_uri = AZ_URI
path_to, read, write, mkdirs, join, internal = az_dsn_fs
elif test_fs_uri == HTTPS_URI:
path_to, read, write, mkdirs, join, internal = https_fs
elif test_fs_uri == GCS_URI:
path_to, read, write, mkdirs, join, internal = gcs_fs

path_to_rand = None
test_patchs = request.getfixturevalue("patchs")
if (test_fs_uri, test_patchs) in fs.path_to_rand_cache:
path_to_rand = fs.path_to_rand_cache[(test_fs_uri, test_patchs)]
else:
path_to_rand = functools.partial(path_to, str(random.getrandbits(32)))
mkdirs(path_to_rand(""))
fs.path_to_rand_cache[(test_fs_uri, test_patchs)] = path_to_rand
yield real_uri, path_to_rand, read, write, mkdirs, join, internal

if request.param == S3_URI:
uri, path_to, read, write, mkdirs, join, internal = s3_fs
elif request.param == AZ_URI:
uri, path_to, read, write, mkdirs, join, internal = az_fs
elif request.param == AZ_DSN_URI:
uri, path_to, read, write, mkdirs, join, internal = az_dsn_fs
elif request.param == HTTPS_URI:
uri, path_to, read, write, mkdirs, join, internal = https_fs
elif request.param == GCS_URI:
uri, path_to, read, write, mkdirs, join, internal = gcs_fs

path_to_rand = functools.partial(path_to, str(random.getrandbits(32)))
mkdirs(path_to_rand(""))
yield uri, path_to_rand, read, write, mkdirs, join, internal
fs.path_to_rand_cache = {}


@pytest.mark.parametrize(
Expand Down Expand Up @@ -328,7 +339,9 @@ def test_io_read_file(fs, patchs, monkeypatch):


@pytest.mark.parametrize(
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (AZ_DSN_URI, None)], indirect=["fs"]
"fs, patchs",
[(S3_URI, None), (AZ_URI, None), (AZ_DSN_URI, None), (GCS_URI, None)],
indirect=["fs"],
)
def test_io_write_file(fs, patchs, monkeypatch):
_, path_to, read, _, _, _, _ = fs
Expand Down Expand Up @@ -459,7 +472,7 @@ def test_dataset_from_remote_filename(fs, patchs, monkeypatch):


@pytest.mark.parametrize(
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
)
def test_gfile_GFile_writable(fs, patchs, monkeypatch):
uri, path_to, read, _, _, _, _ = fs
Expand Down Expand Up @@ -488,7 +501,7 @@ def test_gfile_GFile_writable(fs, patchs, monkeypatch):


@pytest.mark.parametrize(
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
)
def test_gfile_isdir(fs, patchs, monkeypatch):
_, path_to, _, write, mkdirs, join, _ = fs
Expand All @@ -506,10 +519,10 @@ def test_gfile_isdir(fs, patchs, monkeypatch):


@pytest.mark.parametrize(
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
)
def test_gfile_listdir(fs, patchs, monkeypatch):
_, path_to, _, write, mkdirs, join, _ = fs
uri, path_to, _, write, mkdirs, join, _ = fs
mock_patchs(monkeypatch, patchs)

root_path = "test_gfile_listdir"
Expand All @@ -519,6 +532,10 @@ def test_gfile_listdir(fs, patchs, monkeypatch):
num_childs = 5
childrens = [None] * num_childs
childrens[0] = join(dname, "subdir")
# TODO(vnvo2409): `gs` filesystem requires `/` at the end of directory's path.
# Consider if we could change the behavior for matching the other filesystems.
if uri == GCS_URI:
childrens[0] += "/"
mkdirs(childrens[0])

body = b"123456789"
Expand All @@ -532,7 +549,7 @@ def test_gfile_listdir(fs, patchs, monkeypatch):


@pytest.mark.parametrize(
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
)
def test_gfile_makedirs(fs, patchs, monkeypatch):
_, path_to, _, write, _, join, _ = fs
Expand Down Expand Up @@ -569,7 +586,7 @@ def test_gfile_remove(fs, patchs, monkeypatch):


@pytest.mark.parametrize(
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
)
def test_gfile_rmtree(fs, patchs, monkeypatch):
_, path_to, _, write, mkdirs, join, _ = fs
Expand Down Expand Up @@ -646,7 +663,7 @@ def test_gfile_rename(fs, patchs, monkeypatch):


@pytest.mark.parametrize(
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
)
def test_gfile_glob(fs, patchs, monkeypatch):
_, path_to, _, write, _, join, _ = fs
Expand Down
43 changes: 26 additions & 17 deletions tests/test_gcloud/testbench/testbench_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
import hashlib
import json
import random
import re

field_match = re.compile(r"(?:(\w+)\((\w+(?:,\w+)*)\))|(\w+)")


def filter_fields_from_response(fields, response):
Expand All @@ -33,23 +36,29 @@ def filter_fields_from_response(fields, response):
if fields is None:
return json.dumps(response)
tmp = {}
for key in fields.split(","):
key.replace(" ", "")
parentheses_idx = key.find("(")
if parentheses_idx != -1:
main_key = key[:parentheses_idx]
child_key = key[parentheses_idx + 1 : -1]
if main_key in response:
children = response[main_key]
if isinstance(children, list):
tmp_list = []
for value in children:
tmp_list.append(value[child_key])
tmp[main_key] = tmp_list
elif isinstance(children, dict):
tmp[main_key] = children[child_key]
elif key in response:
tmp[key] = response[key]
fields.replace(" ", "")
for keys in field_match.findall(fields):
if keys[2]:
if keys[2] not in response:
continue
tmp[keys[2]] = response[keys[2]]
else:
if keys[0] not in response:
continue
childrens = response[keys[0]]
if isinstance(childrens, list):
tmp_list = []
for children in childrens:
child = {}
for child_key in keys[1].split(","):
child[child_key] = children[child_key]
tmp_list.append(child)
tmp[keys[0]] = tmp_list
elif isinstance(childrens, dict):
child = {}
for child_key in keys[1].split(","):
child[child_key] = children[child_key]
tmp[keys[0]] = child
return json.dumps(tmp)


Expand Down