Skip to content

Commit

Permalink
Merge pull request GoogleCloudPlatform#3946 from gadgilrajeev:gadgilr…
Browse files Browse the repository at this point in the history
…ajeev/hadoop-blocksize-variable

PiperOrigin-RevId: 511315170
  • Loading branch information
copybara-github committed Feb 21, 2023
2 parents 4f5b863 + f7bf196 commit cb95d65
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
</property>
<property>
<name>dfs.block.size</name>
<!-- 128 MB -->
<value>134217728</value>
<value>{{ block_size }}</value>
</property>
<!-- TODO(pclay): consider dfs.client.read.shortcircuit (requires native libs) -->
<!-- https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html -->
Expand Down
12 changes: 10 additions & 2 deletions perfkitbenchmarker/linux_packages/hadoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,14 @@
_URL_OVERRIDE = flags.DEFINE_string(
'hadoop_bin_url', None, 'Specify to override url from HADOOP_URL_BASE.')

_BLOCKSIZE_OVERRIDE = flags.DEFINE_integer(
'hadoop_hdfs_blocksize', 128,
'Blocksize in MiB to be used by the HDFS filesystem. '
'This is the chunksize in which the HDFS file will be divided into.')

DATA_FILES = [
'hadoop/core-site.xml.j2', 'hadoop/yarn-site.xml.j2',
'hadoop/hdfs-site.xml', 'hadoop/mapred-site.xml.j2',
'hadoop/hdfs-site.xml.j2', 'hadoop/mapred-site.xml.j2',
'hadoop/hadoop-env.sh.j2', 'hadoop/workers.j2'
]
START_HADOOP_SCRIPT = 'hadoop/start-hadoop.sh.j2'
Expand Down Expand Up @@ -200,6 +205,8 @@ def _RenderConfig(vm,
# This determines the number of reduce tasks in Terasort and is critical to
# scale with the cluster.
num_reduce_tasks = reduces_per_node * num_workers
if _BLOCKSIZE_OVERRIDE.value:
block_size = _BLOCKSIZE_OVERRIDE.value * 1024 * 1024

if vm.scratch_disks:
# TODO(pclay): support multiple scratch disks. A current suboptimal
Expand Down Expand Up @@ -231,7 +238,8 @@ def _RenderConfig(vm,
'num_reduce_tasks': num_reduce_tasks,
'aws_access_key': aws_access_key,
'aws_secret_key': aws_secret_key,
'optional_tools': optional_tools
'optional_tools': optional_tools,
'block_size': block_size
}

for file_name in DATA_FILES:
Expand Down

0 comments on commit cb95d65

Please sign in to comment.