Skip to content

Commit

Permalink
YCDOCS-18586: Data Proc renaming (#3)
Browse files Browse the repository at this point in the history
* Update README.md

* Update data-proc-for-spark-jobs.tf
  • Loading branch information
lanieuszko authored Oct 31, 2024
1 parent 8814891 commit 36c9cb1
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 17 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Запуск и управление приложениями для Spark и PySpark в сервисе Yandex Data Proc
# Запуск и управление приложениями для Spark и PySpark в сервисе Yandex Data Processing

В кластере [Yandex Data Proc](https://yandex.cloud/ru/docs/data-proc) вы можете запустить Spark- и PySpark-задания с помощью инструментов:
В кластере [Yandex Data Processing](https://yandex.cloud/ru/docs/data-proc) вы можете запустить Spark- и PySpark-задания с помощью инструментов:

* [Spark Shell](https://spark.apache.org/docs/latest/quick-start) (командная оболочка для языков программирования Scala и Python). Расчеты запускаются не с помощью скрипта, а построчно.
* [Spark-submit](https://spark.apache.org/docs/latest/submitting-applications.html#submitting-applications). Скрипт сохраняет результаты расчета в HDFS.
* [CLI Yandex Cloud](https://yandex.cloud/ru/docs/cli/). Команды CLI позволяют сохранить результаты расчета не только в HDFS, но и в бакете [Yandex Object Storage](https://yandex.cloud/ru/docs/storage).

Подготовка инфраструктуры для Yandex Data Proc через Terraform описана в [практическом руководстве](https://yandex.cloud/ru/docs/data-proc/tutorials/run-spark-job), необходимый для настройки конфигурационный файл [data-proc-for-spark-jobs.tf](data-proc-for-spark-jobs.tf) расположен в этом репозитории.
Подготовка инфраструктуры для Yandex Data Processing через Terraform описана в [практическом руководстве](https://yandex.cloud/ru/docs/data-proc/tutorials/run-spark-job), необходимый для настройки конфигурационный файл [data-proc-for-spark-jobs.tf](data-proc-for-spark-jobs.tf) расположен в этом репозитории.
28 changes: 14 additions & 14 deletions data-proc-for-spark-jobs.tf
Original file line number Diff line number Diff line change
@@ -1,37 +1,37 @@
# Infrastructure for Yandex Data Proc cluster with NAT gateway
# Infrastructure for Yandex Data Processing cluster with NAT gateway
#
# RU: https://cloud.yandex.ru/docs/data-proc/tutorials/configure-network
# EN: https://cloud.yandex.com/en-ru/docs/data-proc/tutorials/configure-network

# Specify the following settings:
locals {
folder_id = "" # Cloud folder ID, the same as for the provider
dp_ssh_key = "" # Absolute path to the SSH public key for the Data Proc cluster. Example: "~/.ssh/key.pub"
dp_ssh_key = "" # Absolute path to the SSH public key for the Yandex Data Processing cluster. Example: "~/.ssh/key.pub"

# The following settings are predefined. Change them only if necessary.
network_name = "data-proc_network" # Name of the network
nat_name = "nat-gateway" # Name of the NAT gateway
routing_table_name = "data-proc-routing-table" # Name of the routing table
subnet_name = "data-proc-subnet-a" # Name of the subnet
security_group_name = "data-proc-security-group" # Name of the security group
data_proc_sa_name = "data-proc-sa" # Name of the service account to manage the Data Proc cluster
data_proc_sa_name = "data-proc-sa" # Name of the service account to manage the Yandex Data Processing cluster
bucket_name = "data-proc-bucket" # Set a unique bucket name
data_proc_cluster_name = "data-proc-cluster" # Name of the Data Proc cluster
data_proc_version = "2.0" # Version of the Data Proc cluster
data_proc_cluster_name = "data-proc-cluster" # Name of the Yandex Data Processing cluster
data_proc_version = "2.0" # Version of the Yandex Data Processing cluster
}

resource "yandex_vpc_network" "data-proc-network" {
description = "Network for the Data Proc cluster"
description = "Network for the Yandex Data Processing cluster"
name = local.network_name
}

# NAT gateway for Data Proc
# NAT gateway for Yandex Data Processing
resource "yandex_vpc_gateway" "nat-gateway" {
name = local.nat_name
shared_egress_gateway {}
}

# Routing table for Data Proc
# Routing table for Yandex Data Processing
resource "yandex_vpc_route_table" "data-proc-routing-table" {
name = local.routing_table_name
network_id = yandex_vpc_network.data-proc-network.id
Expand All @@ -43,7 +43,7 @@ resource "yandex_vpc_route_table" "data-proc-routing-table" {
}

resource "yandex_vpc_subnet" "data-proc-subnet" {
description = "Subnet for the Data Proc cluster"
description = "Subnet for the Yandex Data Processing cluster"
name = local.subnet_name
network_id = yandex_vpc_network.data-proc-network.id
v4_cidr_blocks = ["192.168.1.0/24"]
Expand All @@ -52,7 +52,7 @@ resource "yandex_vpc_subnet" "data-proc-subnet" {
}

resource "yandex_vpc_security_group" "data-proc-security-group" {
description = "Security group for the Data Proc cluster"
description = "Security group for the Yandex Data Processing cluster"
name = local.security_group_name
network_id = yandex_vpc_network.data-proc-network.id

Expand Down Expand Up @@ -100,21 +100,21 @@ resource "yandex_iam_service_account" "data-proc-sa" {
name = local.data_proc_sa_name
}

# Assign the "dataproc.agent" role to the Data Proc service account
# Assign the "dataproc.agent" role to the Yandex Data Processing service account
resource "yandex_resourcemanager_folder_iam_member" "sa-dataproc-agent" {
folder_id = local.folder_id
role = "dataproc.agent"
member = "serviceAccount:${yandex_iam_service_account.data-proc-sa.id}"
}

# Assign the "dataproc.provisioner" role to the Data Proc service account
# Assign the "dataproc.provisioner" role to the Yandex Data Processing service account
resource "yandex_resourcemanager_folder_iam_member" "sa-dataproc-provisioner" {
folder_id = local.folder_id
role = "dataproc.provisioner"
member = "serviceAccount:${yandex_iam_service_account.data-proc-sa.id}"
}

# Assign the "storage.admin" role to the Data Proc service account
# Assign the "storage.admin" role to the Yandex Data Processing service account
resource "yandex_resourcemanager_folder_iam_member" "sa-storage-admin" {
folder_id = local.folder_id
role = "storage.admin"
Expand All @@ -139,7 +139,7 @@ resource "yandex_storage_bucket" "data-proc-bucket" {
}

resource "yandex_dataproc_cluster" "data-proc-cluster" {
description = "Yandex Data Proc cluster"
description = "Yandex Data Processing cluster"
name = local.data_proc_cluster_name
service_account_id = yandex_iam_service_account.data-proc-sa.id
zone_id = "ru-central1-a"
Expand Down

0 comments on commit 36c9cb1

Please sign in to comment.