Skip to content

Commit

Permalink
Finalize testing for gcs filesystem (#1400)
Browse files Browse the repository at this point in the history
* filesystem with the same config should share the same random prefix

* `fs` texture returns `uri` instead of each `{filesystem}_fs`

* fix matching fields for gcs testbench

* add more tests for `gcs` filesystem
  • Loading branch information
vnghia authored May 6, 2021
1 parent 22f4103 commit e397b6b
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 48 deletions.
79 changes: 48 additions & 31 deletions tests/test_filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
ROOT_PREFIX = f"tf-io-root-{int(time.time())}/"

# This is the number of attributes each filesystem should return in `*_fs`.
NUM_ATR_FS = 7
NUM_ATR_FS = 6

S3_URI = "s3"
AZ_URI = "az"
Expand Down Expand Up @@ -127,7 +127,7 @@ def mkdirs(path):
path += "/"
write(path, b"")

yield S3_URI, path_to, read, write, mkdirs, posixpath.join, (client, bucket_name)
yield path_to, read, write, mkdirs, posixpath.join, (client, bucket_name)
monkeypatch.undo()


Expand Down Expand Up @@ -182,7 +182,7 @@ def mkdirs(path):
if path[-1] == "/":
write(path, b"")

yield AZ_URI, path_to, read, write, mkdirs, posixpath.join, (
yield path_to, read, write, mkdirs, posixpath.join, (
client,
container_name,
account,
Expand All @@ -196,13 +196,13 @@ def az_dsn_fs(az_fs):
yield [None] * NUM_ATR_FS
return

uri, _, read, write, mkdirs, join, fs_internal = az_fs
_, read, write, mkdirs, join, fs_internal = az_fs
_, container_name, account = fs_internal

def path_to_dsn(*args):
return f"{AZ_URI}://{account}.blob.core.windows.net/{container_name}/{posixpath.join(ROOT_PREFIX, *args)}"

yield uri, path_to_dsn, read, write, mkdirs, join, fs_internal
yield path_to_dsn, read, write, mkdirs, join, fs_internal


@pytest.fixture(scope="module")
Expand All @@ -223,10 +223,9 @@ def write(*_):
def mkdirs(_):
pass

yield HTTPS_URI, path_to, read, write, mkdirs, posixpath.join, None
yield path_to, read, write, mkdirs, posixpath.join, None


# TODO(vnvo2409): some tests with `gcs` are falling.
@pytest.fixture(scope="module")
def gcs_fs():
if should_skip(GCS_URI):
Expand Down Expand Up @@ -275,29 +274,41 @@ def mkdirs(path):
path += "/"
write(path, b"")

yield GCS_URI, path_to, read, write, mkdirs, posixpath.join, None
yield path_to, read, write, mkdirs, posixpath.join, None
monkeypatch.undo()


@pytest.fixture
def fs(request, s3_fs, az_fs, az_dsn_fs, https_fs, gcs_fs):
uri, path_to, read, write, mkdirs, join, internal = [None] * NUM_ATR_FS
should_skip(request.param, check_only=False)
path_to, read, write, mkdirs, join, internal = [None] * NUM_ATR_FS
test_fs_uri = request.param
real_uri = test_fs_uri
should_skip(test_fs_uri, check_only=False)

if test_fs_uri == S3_URI:
path_to, read, write, mkdirs, join, internal = s3_fs
elif test_fs_uri == AZ_URI:
path_to, read, write, mkdirs, join, internal = az_fs
elif test_fs_uri == AZ_DSN_URI:
real_uri = AZ_URI
path_to, read, write, mkdirs, join, internal = az_dsn_fs
elif test_fs_uri == HTTPS_URI:
path_to, read, write, mkdirs, join, internal = https_fs
elif test_fs_uri == GCS_URI:
path_to, read, write, mkdirs, join, internal = gcs_fs

path_to_rand = None
test_patchs = request.getfixturevalue("patchs")
if (test_fs_uri, test_patchs) in fs.path_to_rand_cache:
path_to_rand = fs.path_to_rand_cache[(test_fs_uri, test_patchs)]
else:
path_to_rand = functools.partial(path_to, str(random.getrandbits(32)))
mkdirs(path_to_rand(""))
fs.path_to_rand_cache[(test_fs_uri, test_patchs)] = path_to_rand
yield real_uri, path_to_rand, read, write, mkdirs, join, internal

if request.param == S3_URI:
uri, path_to, read, write, mkdirs, join, internal = s3_fs
elif request.param == AZ_URI:
uri, path_to, read, write, mkdirs, join, internal = az_fs
elif request.param == AZ_DSN_URI:
uri, path_to, read, write, mkdirs, join, internal = az_dsn_fs
elif request.param == HTTPS_URI:
uri, path_to, read, write, mkdirs, join, internal = https_fs
elif request.param == GCS_URI:
uri, path_to, read, write, mkdirs, join, internal = gcs_fs

path_to_rand = functools.partial(path_to, str(random.getrandbits(32)))
mkdirs(path_to_rand(""))
yield uri, path_to_rand, read, write, mkdirs, join, internal
fs.path_to_rand_cache = {}


@pytest.mark.parametrize(
Expand Down Expand Up @@ -328,7 +339,9 @@ def test_io_read_file(fs, patchs, monkeypatch):


@pytest.mark.parametrize(
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (AZ_DSN_URI, None)], indirect=["fs"]
"fs, patchs",
[(S3_URI, None), (AZ_URI, None), (AZ_DSN_URI, None), (GCS_URI, None)],
indirect=["fs"],
)
def test_io_write_file(fs, patchs, monkeypatch):
_, path_to, read, _, _, _, _ = fs
Expand Down Expand Up @@ -459,7 +472,7 @@ def test_dataset_from_remote_filename(fs, patchs, monkeypatch):


@pytest.mark.parametrize(
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
)
def test_gfile_GFile_writable(fs, patchs, monkeypatch):
uri, path_to, read, _, _, _, _ = fs
Expand Down Expand Up @@ -488,7 +501,7 @@ def test_gfile_GFile_writable(fs, patchs, monkeypatch):


@pytest.mark.parametrize(
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
)
def test_gfile_isdir(fs, patchs, monkeypatch):
_, path_to, _, write, mkdirs, join, _ = fs
Expand All @@ -506,10 +519,10 @@ def test_gfile_isdir(fs, patchs, monkeypatch):


@pytest.mark.parametrize(
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
)
def test_gfile_listdir(fs, patchs, monkeypatch):
_, path_to, _, write, mkdirs, join, _ = fs
uri, path_to, _, write, mkdirs, join, _ = fs
mock_patchs(monkeypatch, patchs)

root_path = "test_gfile_listdir"
Expand All @@ -519,6 +532,10 @@ def test_gfile_listdir(fs, patchs, monkeypatch):
num_childs = 5
childrens = [None] * num_childs
childrens[0] = join(dname, "subdir")
# TODO(vnvo2409): `gs` filesystem requires `/` at the end of directory's path.
# Consider if we could change the behavior for matching the other filesystems.
if uri == GCS_URI:
childrens[0] += "/"
mkdirs(childrens[0])

body = b"123456789"
Expand All @@ -532,7 +549,7 @@ def test_gfile_listdir(fs, patchs, monkeypatch):


@pytest.mark.parametrize(
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
)
def test_gfile_makedirs(fs, patchs, monkeypatch):
_, path_to, _, write, _, join, _ = fs
Expand Down Expand Up @@ -569,7 +586,7 @@ def test_gfile_remove(fs, patchs, monkeypatch):


@pytest.mark.parametrize(
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
)
def test_gfile_rmtree(fs, patchs, monkeypatch):
_, path_to, _, write, mkdirs, join, _ = fs
Expand Down Expand Up @@ -646,7 +663,7 @@ def test_gfile_rename(fs, patchs, monkeypatch):


@pytest.mark.parametrize(
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
)
def test_gfile_glob(fs, patchs, monkeypatch):
_, path_to, _, write, _, join, _ = fs
Expand Down
43 changes: 26 additions & 17 deletions tests/test_gcloud/testbench/testbench_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
import hashlib
import json
import random
import re

field_match = re.compile(r"(?:(\w+)\((\w+(?:,\w+)*)\))|(\w+)")


def filter_fields_from_response(fields, response):
Expand All @@ -33,23 +36,29 @@ def filter_fields_from_response(fields, response):
if fields is None:
return json.dumps(response)
tmp = {}
for key in fields.split(","):
key.replace(" ", "")
parentheses_idx = key.find("(")
if parentheses_idx != -1:
main_key = key[:parentheses_idx]
child_key = key[parentheses_idx + 1 : -1]
if main_key in response:
children = response[main_key]
if isinstance(children, list):
tmp_list = []
for value in children:
tmp_list.append(value[child_key])
tmp[main_key] = tmp_list
elif isinstance(children, dict):
tmp[main_key] = children[child_key]
elif key in response:
tmp[key] = response[key]
fields.replace(" ", "")
for keys in field_match.findall(fields):
if keys[2]:
if keys[2] not in response:
continue
tmp[keys[2]] = response[keys[2]]
else:
if keys[0] not in response:
continue
childrens = response[keys[0]]
if isinstance(childrens, list):
tmp_list = []
for children in childrens:
child = {}
for child_key in keys[1].split(","):
child[child_key] = children[child_key]
tmp_list.append(child)
tmp[keys[0]] = tmp_list
elif isinstance(childrens, dict):
child = {}
for child_key in keys[1].split(","):
child[child_key] = children[child_key]
tmp[keys[0]] = child
return json.dumps(tmp)


Expand Down

0 comments on commit e397b6b

Please sign in to comment.