diff --git a/config/devstack.cfg b/config/devstack.cfg index 3a3c6ee00c..afb769340f 100644 --- a/config/devstack.cfg +++ b/config/devstack.cfg @@ -120,3 +120,9 @@ api_root_url = http://localhost:8000/api/courses/v1/courses/ [course-blocks] api_root_url = http://localhost:8000/api/courses/v1/blocks/ + +[spark] +driver-memory=3g +executor-memory=3g +executor-cores=1 +num-executors=1 \ No newline at end of file diff --git a/edx/analytics/tasks/common/spark.py b/edx/analytics/tasks/common/spark.py index 60fc89ba45..fcca7285f1 100644 --- a/edx/analytics/tasks/common/spark.py +++ b/edx/analytics/tasks/common/spark.py @@ -206,8 +206,26 @@ class SparkJobTask(OverwriteOutputMixin, PySparkTask): _hive_context = None _tmp_dir = None - driver_memory = '2g' - executor_memory = '3g' + driver_memory = luigi.Parameter( + config_path={'section': 'spark', 'name': 'driver-memory'}, + description='Memory for spark driver', + significant=False, + ) + executor_memory = luigi.Parameter( + config_path={'section': 'spark', 'name': 'executor-memory'}, + description='Memory for each executor', + significant=False, + ) + executor_cores = luigi.Parameter( + config_path={'section': 'spark', 'name': 'executor-cores'}, + description='No. of cores for each executor', + significant=False, + ) + num_executors = luigi.Parameter( + config_path={'section': 'spark', 'name': 'num-executors'}, + description='Total no. of executors for a job', + significant=False, + ) always_log_stderr = False # log stderr if spark fails, True for verbose log def init_spark(self, sc):