Skip to content

Commit

Permalink
Add spark exec
Browse files Browse the repository at this point in the history
  • Loading branch information
lukemartinlogan committed Mar 30, 2024
1 parent 11f5a15 commit cdfe467
Showing 1 changed file with 30 additions and 0 deletions.
30 changes: 30 additions & 0 deletions jarvis_util/shell/spark_exec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""
This module provides methods to execute a process in parallel using the
Message Passing Interface (MPI). This module assumes MPI is installed
on the system. This class is intended to be called from Exec,
not by general users.
"""

from jarvis_util.jutil_manager import JutilManager
from jarvis_util.shell.local_exec import LocalExec
from .exec_info import ExecInfo, ExecType
from abc import abstractmethod


class SparkExec(LocalExec):
def __init__(self, cmd, master_host, master_port,
driver_mem='1g', executor_mem='1g', scratch='/tmp', replication=1,
exec_info=None):
master_url = f'spark://{master_host}:{master_port}'
sparkcmd = [
'spark-submit',
f'--master {master_url}',
f'--driver-memory {driver_mem}',
f'--executor-memory {executor_mem}',
f'--conf spark.speculation=false',
f'--conf spark.storage.replication={replication}',
f'--conf spark.local.dir={scratch}',
cmd
]
sparkcmd = ' '.join(cmd)
super().__init__(sparkcmd, exec_info)

0 comments on commit cdfe467

Please sign in to comment.