diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md index ff27783ad76..7b411c9a1a4 100644 --- a/docs/hadoop-catalog.md +++ b/docs/hadoop-catalog.md @@ -13,12 +13,12 @@ Hadoop catalog is a fileset catalog that using Hadoop Compatible File System (HC the storage location of the fileset. Currently, it supports local filesystem and HDFS. For object storage like S3, GCS, and Azure Blob Storage, you can put the hadoop object store jar like hadoop-aws into the `$GRAVITINO_HOME/catalogs/hadoop/libs` directory to enable the support. -Gravitino itself haven't yest tested the object storage support, so if you have any issue, +Gravitino itself hasn't yet tested the object storage support, so if you have any issue, please create an [issue](https://github.com/datastrato/gravitino/issues). -Note that the Hadoop catalog is built against Hadoop 3, it should be compatible with both Hadoop -2.x and 3.x, since we don't leverage any new features in Hadoop 3. If there's any compatibility -issue, please create an [issue](https://github.com/datastrato/gravitino/issues). +Note that Gravitino uses Hadoop 3 dependencies to build Hadoop catalog. Theoretically, it should be +compatible with both Hadoop 2.x and 3.x, since Gravitino doesn't leverage any new features in +Hadoop 3. If there's any compatibility issue, please create an [issue](https://github.com/datastrato/gravitino/issues). ## Catalog @@ -56,7 +56,7 @@ Refer to [Schema operation](./manage-fileset-metadata-using-gravitino.md#schema- ### Fileset properties -No. +None. ### Fileset operations diff --git a/docs/manage-fileset-metadata-using-gravitino.md b/docs/manage-fileset-metadata-using-gravitino.md index 1b4a506890b..c46e9861e56 100644 --- a/docs/manage-fileset-metadata-using-gravitino.md +++ b/docs/manage-fileset-metadata-using-gravitino.md @@ -60,19 +60,16 @@ curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \ ```java GravitinoClient gravitinoClient = GravitinoClient .builder("http://127.0.0.1:8090") + .withMetalake("metalake") .build(); -// Assuming you have just created a metalake named `metalake` -GravitinoMetaLake gravitinoMetaLake = - gravitinoClient.loadMetalake(NameIdentifier.of("metalake")); - Map properties = ImmutableMap.builder() .put("location", "file:/tmp/root") // Property "location" is optional, if specified all the managed fileset without // specifying storage location will be stored under this location. .build(); -Catalog catalog = gravitinoMetaLake.createCatalog( +Catalog catalog = gravitinoClient.createCatalog( NameIdentifier.of("metalake", "catalog"), Type.FILESET, "hadoop", // provider, We support only support "hadoop". @@ -154,14 +151,11 @@ curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \ ```java GravitinoClient gravitinoClient = GravitinoClient .builder("http://127.0.0.1:8090") + .withMetalake("metalake") .build(); -// Assuming you have just created a metalake named `metalake` -GravitinoMetaLake gravitinoMetaLake = - gravitinoClient.loadMetalake(NameIdentifier.of("metalake")); - // Assuming you have just created a Hadoop catalog named `catalog` -Catalog catalog = gravitinoMetaLake.loadCatalog(NameIdentifier.of("metalake", "catalog")); +Catalog catalog = gravitinoClient.loadCatalog(NameIdentifier.of("metalake", "catalog")); SupportsSchemas supportsSchemas = catalog.asSchemas(); @@ -224,7 +218,7 @@ same. ### Create a fileset You can create a fileset by sending a `POST` request to the `/api/metalakes/{metalake_name} -/catalogs/{catalog_name}/schemas/{schema_name}/files` endpoint or just use the Gravitino Java +/catalogs/{catalog_name}/schemas/{schema_name}/filesets` endpoint or just use the Gravitino Java client. The following is an example of creating a fileset: @@ -249,13 +243,10 @@ curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \ ```java GravitinoClient gravitinoClient = GravitinoClient .builder("http://127.0.0.1:8090") + .withMetalake("metalake") .build(); -// Assuming you have just created a metalake named `metalake` -GravitinoMetaLake gravitinoMetaLake = - gravitinoClient.loadMetalake(NameIdentifier.of("metalake")); - -Catalog catalog = gravitinoMetaLake.loadCatalog(NameIdentifier.of("metalake", "catalog")); +Catalog catalog = gravitinoClient.loadCatalog(NameIdentifier.of("metalake", "catalog")); FilesetCatalog filesetCatalog = catalog.asFilesetCatalog(); Map propertiesMap = ImmutableMap.builder() @@ -279,7 +270,7 @@ Currently, Gravitino supports two **types** of the fileset: - `MANAGED`: The storage location of the fileset is managed by Gravitino, when specified as `MANAGED`, the physical location of the fileset will be deleted when this fileset is dropped. - `EXTERNAL`: The storage location of the fileset is managed by users, when specified as - `EXTERNAL`, the physical location of the fileset will not be deleted when this fileset is + `EXTERNAL`, the physical location of the fileset will **not** be deleted when this fileset is dropped. **storageLocation** @@ -299,8 +290,8 @@ For `MANAGED` fileset, the storage location is: 5. When both catalog property `location` and schema property `location` are not specified, user should specify the `storageLocation` in the fileset creation. -For `EXTERNAL` fileset, the storage location should be specified by user when creating the -fileset via `storageLocation`. +For `EXTERNAL` fileset, users should specify `storageLocation` during fileset creation, +otherwise, Gravitino will throw an exception. ### Alter a fileset @@ -333,7 +324,7 @@ curl -X PUT -H "Accept: application/vnd.gravitino.v1+json" \ ```java // ... // Assuming you have just created a Fileset catalog named `catalog` -Catalog catalog = gravitinoMetaLake.loadCatalog(NameIdentifier.of("metalake", "catalog")); +Catalog catalog = gravitinoClient.loadCatalog(NameIdentifier.of("metalake", "catalog")); FilesetCatalog filesetCatalog = catalog.asFilesetCatalog(); @@ -375,7 +366,7 @@ http://localhost:8090/api/metalakes/metalake/catalogs/catalog/schemas/schema/fil ```java // ... // Assuming you have just created a Fileset catalog named `catalog` -Catalog catalog = gravitinoMetaLake.loadCatalog(NameIdentifier.of("metalake", "catalog")); +Catalog catalog = gravitinoClient.loadCatalog(NameIdentifier.of("metalake", "catalog")); FilesetCatalog filesetCatalog = catalog.asFilesetCatalog(); @@ -410,7 +401,7 @@ http://localhost:8090/api/metalakes/metalake/catalogs/catalog/schemas/schema/fil ```java // ... -Catalog catalog = gravitinoMetaLake.loadCatalog(NameIdentifier.of("metalake", "catalog")); +Catalog catalog = gravitinoClient.loadCatalog(NameIdentifier.of("metalake", "catalog")); FilesetCatalog filesetCatalog = catalog.asFilesetCatalog(); NameIdentifier[] identifiers =