Skip to content

Commit

Permalink
Avoid ignoring files visible in Hive
Browse files Browse the repository at this point in the history
Ignore in Hive only the files which have their names or the
names of their ancestor beginning with `.` or `_` characters.
  • Loading branch information
dprophet authored and findinpath committed Apr 11, 2023
1 parent c9309ab commit 05657f7
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ static boolean isHiddenOrWithinHiddenParentDirectory(Path path, String prefix)
{
String pathString = path.toUri().getPath();
checkArgument(pathString.startsWith(prefix), "path %s does not start with prefix %s", pathString, prefix);
return containsHiddenPathPartAfterIndex(pathString, prefix.length() + 1);
return containsHiddenPathPartAfterIndex(pathString, prefix.endsWith("/") ? prefix.length() : prefix.length() + 1);
}

@VisibleForTesting
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ public void testRelativeHiddenPathDetection()
String root = new Path("file:///root-path").toUri().getPath();
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(root, ".hidden/child"), root));
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(root, "_hidden.txt"), root));
String rootWithSlash = new Path("file:///root-path/").toUri().getPath();
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithSlash, ".hidden/child"), rootWithSlash));
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithSlash, "_hidden.txt"), rootWithSlash));
String rootWithinHidden = new Path("file:///root/.hidden/listing-root").toUri().getPath();
assertFalse(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithinHidden, "file.txt"), rootWithinHidden));
String rootHiddenEnding = new Path("file:///root/hidden-ending_").toUri().getPath();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.tests.product.hive;

import com.google.inject.Inject;
import io.trino.tempto.ProductTest;
import io.trino.tempto.assertions.QueryAssert;
import io.trino.tempto.hadoop.hdfs.HdfsClient;
import org.assertj.core.api.Assertions;
import org.testng.annotations.Test;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;

import static io.trino.tempto.assertions.QueryAssert.Row.row;
import static io.trino.tempto.assertions.QueryAssert.assertThat;
import static io.trino.testing.TestingNames.randomNameSuffix;
import static io.trino.tests.product.hive.util.TableLocationUtils.getTablePath;
import static io.trino.tests.product.utils.QueryExecutors.onHive;
import static io.trino.tests.product.utils.QueryExecutors.onTrino;

public class TestHiveHiddenFiles
extends ProductTest
{
@Inject
private HdfsClient hdfsClient;

@Test
public void testSelectFromTableContainingHiddenFiles()
throws Exception
{
String tableName = "test_table_hidden_files" + randomNameSuffix();
onTrino().executeQuery("CREATE TABLE " + tableName + " (col integer)");

onTrino().executeQuery("INSERT INTO " + tableName + " VALUES 1");
onTrino().executeQuery("INSERT INTO " + tableName + " VALUES 2");

List<QueryAssert.Row> tableRows = List.of(row(1), row(2));
assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)).containsOnly(tableRows);
assertThat(onHive().executeQuery("SELECT * FROM " + tableName)).containsOnly(tableRows);

String tableLocation = getTablePath(tableName);
// Rename the table files to Hive hidden tableFiles (prefixed by `.` or `_` characters)
List<String> tableFiles = hdfsClient.listDirectory(tableLocation);
Assertions.assertThat(tableFiles).hasSize(2);
renameFile(tableLocation, tableFiles.get(0), '.' + tableFiles.get(0));
renameFile(tableLocation, tableFiles.get(1), '_' + tableFiles.get(1));

assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)).hasNoRows();
assertThat(onHive().executeQuery("SELECT * FROM " + tableName)).hasNoRows();

onTrino().executeQuery("DROP TABLE IF EXISTS " + tableName);
}

@Test
public void testSelectFromTableContainingFilenamesWithUnderscore()
throws Exception
{
String tableName = "test_table_visible_underscore_files" + randomNameSuffix();
onTrino().executeQuery("CREATE TABLE " + tableName + " AS SELECT 1 AS col");

List<QueryAssert.Row> tableRows = List.of(row(1));
assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)).containsOnly(tableRows);
assertThat(onHive().executeQuery("SELECT * FROM " + tableName)).containsOnly(tableRows);

String tableLocation = getTablePath(tableName);
// Prefix the table files with `f_` which should still keep them visible to Hive
for (String filename : hdfsClient.listDirectory(tableLocation)) {
// As long as the file is not hidden (starting with `.` or `_`), it should not be ignored by Hive
renameFile(tableLocation, filename, "f_" + filename);
}

assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)).containsOnly(tableRows);
assertThat(onHive().executeQuery("SELECT * FROM " + tableName)).containsOnly(tableRows);

onTrino().executeQuery("DROP TABLE " + tableName);
}

private void renameFile(String directoryLocation, String filename, String newFilename)
throws IOException
{
try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) {
hdfsClient.loadFile(directoryLocation + "/" + filename, bos);
hdfsClient.saveFile(directoryLocation + "/" + newFilename, new ByteArrayInputStream(bos.toByteArray()));
hdfsClient.delete(directoryLocation + "/" + filename);
}
}
}

0 comments on commit 05657f7

Please sign in to comment.