LambdaColdStorage · jeremybobbin · Jul 4, 2020 · Jul 4, 2020 · Jul 4, 2020 · Apr 17, 2020
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,4 @@
 [submodule "benchmarks"]
 	path = benchmarks
-	url = http://github.com/lambdal/benchmarks
+	url = https://github.com/tensorflow/benchmarks.git
+	branch = master
diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@ This is the code used for a few of the blog posts on: https://lambdalabs.com/blo
 
 Environment:
 - OS: Ubuntu 18.04
-- TensorFlow version: 1.14.0
+- TensorFlow version: 1.15.3
 - CUDA Version 10.0
 - CUDNN Version 7.6.2
 
@@ -19,15 +19,47 @@ git clone https://github.com/lambdal/lambda-tensorflow-benchmark.git
 #### Step Two: Run benchmark with thermal profile
 
 ```
-TF_XLA_FLAGS=--tf_xla_auto_jit=2 ./batch_benchmark.sh min_num_gpus max_num_gpus num_runs num_batches_per_run thermal_sampling_frequency
+./benchmark.sh -l <min_num_gpus> -h <max_num_gpus> -n <num_runs> -b <num_batches_per_run> -t <thermal_sampling_frequency>
 python display_thermal.py path-to-thermal.log --thermal_threshold
 
-# example of benchmarking 4 2080_Ti (all used), 1 run, 200 batches per run, measuring thermal every 2 second. 2080_Ti throttles at 89 C.
-TF_XLA_FLAGS=--tf_xla_auto_jit=2 ./batch_benchmark.sh 4 4 1 200 2
-python display_thermal.py i9-7920X-GeForce_RTX_2080_Ti.logs/resnet152-syn-replicated-fp32-4gpus-32-1-thermal.log --thermal_threshold 89
+# example of benchmarking 4 2080_Ti (all used), 1 run, 100 batches per run, measuring thermal every 2 second. 2080_Ti throttles at 89 C.
+./benchmark.sh -l 4 -h 4 -n 1 -b 100 -t 2 -c config_resnet50_replicated_fp32_train_syn
+python display_thermal.py path-to-thermal/1 --thermal_threshold 89
 
 ```
 
+#### AMD
+
+Follow the guidance [here](https://github.com/ROCmSoftwarePlatform/tensorflow-upstream)
+
+```
+alias drun='sudo docker run \
+      -it \
+      --network=host \
+      --device=/dev/kfd \
+      --device=/dev/dri \
+      --ipc=host \
+      --shm-size 16G \
+      --group-add video \
+      --cap-add=SYS_PTRACE \
+      --security-opt seccomp=unconfined \
+      -v $HOME/dockerx:/dockerx'
+
+drun rocm/tensorflow:latest
+
+apt install rocm-libs hipcub miopen-hip
+pip3 install --user tensorflow-rocm --upgrade
+pip3 install tensorflow
+
+cd /home/dockerx
+git clone https://github.com/lambdal/lambda-tensorflow-benchmark.git --recursive
+git checkout tf2
+git submodule update --init --recursive
+
+./benchmark.sh -l 1 -h 1 -n 1 -b 100 -t 2 -c config_resnet50_replicated_fp32_train_syn
+```
+
+
 #### Note
 
 Use large num_batches_per_run for a thorough test.
@@ -38,23 +70,23 @@ Use large num_batches_per_run for a thorough test.
 * Input proper gpu_indices (a comma seperated list, default 0) and num_iterations (default 10)
 ```
 cd lambda-tensorflow-benchmark
-./benchmark.sh gpu_indices num_iterations
+./benchmark.sh -i <gpu_indices> -n <num_iterations>
 ```
 
 #### Step Three: Report results
 
 * Check the repo directory for folder \<cpu>-\<gpu>.logs (generated by benchmark.sh)
 * Use the same num_iterations and gpu_indices for both benchmarking and reporting.
 ```
-./report.sh <cpu>-<gpu>.logs num_iterations gpu_indices
+./report.sh <cpu>-<gpu>.logs
 ```
 
 #### Batch process:
 
 ```
-TF_XLA_FLAGS=--tf_xla_auto_jit=2 ./batch_benchmark.sh min_num_gpus max_num_gpus num_iterations
+TF_XLA_FLAGS=--tf_xla_auto_jit=2 ./benchmark.sh -l <min_num_gpus> -h <max_num_gpus> -n <num_iterations>
 
-./batch_report.sh <cpu>-<gpu>.logs min_num_gpus max_num_gpus num_iterations
+./report.sh <cpu>-<gpu>.logs
 
 ./gether.sh
 ```

diff --git a/batch_benchmark.sh b/batch_benchmark.sh
diff --git a/batch_report.sh b/batch_report.sh