-
Notifications
You must be signed in to change notification settings - Fork 11
/
runfft
executable file
·200 lines (175 loc) · 5.51 KB
/
runfft
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env bash
#
# Script for running the Splash2 FFT benchmark in gem5 SE mode with a Garnet2.0
# network
#
# Usage: ./runfft n_cpus n_rows concentration_factor topology_py_file M cpu_clock
#
# n_cpus: number of cpu's
# n_rows: number of rows in the topology structure
# concentration_factor: number of cpu's per router
# topology_py_file: name of the .py-file in configs/topologies/
# M: problem size = 2^M data points, must be an even number
# The base problem size for an up-to-64 processor
# machine is 65,536 complex data points (M=16)
# cpu_clock: clock frequency for all cpu's
# default: 1GHz
# Defaults:
PROBLEM_SIZE=18 # Problem size M
HIDEWARNERROR=0 # Bool: hide warnings and errors
HIDESTDOUT=0 # Bool: hide stdout
NVCS=4 # Number of virtual channels (VC) per virtual network
NBUFFERS_PER_DATA_VC=8 # Number of buffers per data VC
NBUFFERS_PER_CTRL_VC=1 # Number of buffers per control VC
LINKWIDTHBITS=512 # Width in bits for all links inside the network
DEADLOCKTHRESHOLD=5000000 # Network-level deadlock threshold
CONCENTRATION_FACTOR=4 # Number of cpu's per router
NCPUS=16 # Number of cpu's
NDIRS=-1 # Number of directory controllers, must <= NCPUS;
# set to -1 to equal the number of routers <= 256.
NROWS=4
TOPOLOGY=Mesh_XY
ROUTINGALGORITHM=0 # Routing_algorithm: routing algorithm in network,
# implemented in src/mem/ruby/network/garnet2.0/RoutingUnit.cc:
# 0: Weight-based table (shortest path)
# 1: XY (for Mesh)
# 2: Random (custom)
# 3: Adaptive (not implemented)
# Get parameter arguments
if [ "$#" -gt 0 ] && [[ $1 =~ ^[0-9]+$ ]]; then
NCPUS=$1
else
# Print help info
sed -n "3,36p" $0
exit
fi
if [ "$#" -gt 1 ]; then
NROWS=$2
fi
if [ "$#" -gt 2 ]; then
CONCENTRATION_FACTOR=$3
fi
if [ "$#" -gt 3 ]; then
TOPOLOGY=$4
fi
if [ "$#" -gt 4 ]; then
PROBLEM_SIZE=$5
else
PROBLEM_SIZE=18
fi
PROBLEM_SIZE_STRING="--psize="$PROBLEM_SIZE
if [ "$#" -gt 5 ]; then
CLOCK=$6
else
CLOCK=1GHz
fi
# Set the number of directory controllers, with a maximum of 256
if [ $NDIRS -eq -1 ]; then
NDIRS=$(($NCPUS/$CONCENTRATION_FACTOR))
fi
if [ $NDIRS -gt 256 ]; then
NDIRS=256
fi
# To avoid deadlock in HierarchicalRing topologies with NROWS>4,
# limit NDIRS to NROWS and limit NCPUS to 128
if [ "$TOPOLOGY" == "HierarchicalRing" ] && [ $NCPUS -gt 128 ]; then
echo HierarchicalRing is limited to 128 cores. Exiting...
exit
fi
if [ "$TOPOLOGY" == "HierarchicalRing" ] && [ $NROWS -gt 4 ]; then
NDIRS=$NROWS
fi
# Automatically enable Escape VC's for Ring and disable otherwise
if [ "$TOPOLOGY" == "Ring" ]; then
USE_ESCAPE_VC="--escapevc"
else
USE_ESCAPE_VC=""
fi
# Recalculate ncols for concentration factor
if [ $CONCENTRATION_FACTOR -gt 1 ]; then
NCOLS=$(($NCPUS/$NROWS/$CONCENTRATION_FACTOR))
CONCENTRATION_STRING="-"$CONCENTRATION_FACTOR"-cpus_per_router"
CONCENTRATION_FACTOR="--concentration-factor="$CONCENTRATION_FACTOR
else
NCOLS=$(($NCPUS/$NROWS))
CONCENTRATION_STRING=""
CONCENTRATION_FACTOR=""
fi
# Suffix output dir name with routing algorithm : disabled
case "$ROUTINGALGORITHM" in
0) RALGNAME=weighted_table_routing ;;
1) RALGNAME=mesh_xy_routing ;;
2) RALGNAME=random_routing ;;
3) RALGNAME=adaptive_routing ;;
*) RALGNAME=unknown_routing ;;
esac
OUTDIR="m5out/FFT-M"$PROBLEM_SIZE"-"$TOPOLOGY"-"$NCPUS"core-"$NROWS"x"$NCOLS""$CONCENTRATION_STRING"-"$CLOCK
LATENCY_FILE=$OUTDIR"-latency.txt"
# Generate output dir name
if [ -d $OUTDIR ]; then
MAXOUTDIRS=99
for ((i=2;i<=MAXOUTDIRS;i++)); do
NOUTDIR=$OUTDIR"-"$i
if [ ! -d $NOUTDIR ]; then
OUTDIR=$NOUTDIR
break
fi
done
fi
mkdir -p $OUTDIR
# Redirect stdout to /dev/null
if [ $HIDESTDOUT -eq 1 ]; then
HIDESTDOUT="-r --stdout-file=/dev/null"
else
HIDESTDOUT=""
fi
# Redirect stderr /dev/null
if [ $HIDEWARNERROR -eq 1 ]; then
HIDEWARNERROR="-e --stderr-file=/dev/null"
else
HIDEWARNERROR=""
fi
# Set environment variable for recognizing simulation type in gem5 source files
export GEM5SIMTYPE=GarnetStandalone
RUNCMD="./build/X86_MESI_Two_Level/gem5.fast -v $HIDEWARNERROR $HIDESTDOUT \
-d $OUTDIR configs/example/fft_benchmark.py -n $NCPUS \
$PROBLEM_SIZE_STRING \
--num-cpus=$NCPUS \
--num-dirs=$NDIRS \
--cpu-type=DerivO3CPU \
--cpu-clock=$CLOCK \
--ruby-clock=$CLOCK \
--mem-size=1GB \
--l1d_size=32kB \
--l1i_size=32kB \
--l1d_assoc=4 \
--l1i_assoc=4 \
--num-l2caches=$NCPUS \
--l2_size=1024kB \
--l2_assoc=8 \
--ruby \
--network=garnet2.0 \
--topology=$TOPOLOGY \
--mesh-rows=$NROWS \
--routing-algorithm=$ROUTINGALGORITHM \
--link-width-bits=$LINKWIDTHBITS \
--vcs-per-vnet=$NVCS \
--buffers-per-data-vc=$NBUFFERS_PER_DATA_VC \
--buffers-per-ctrl-vc=$NBUFFERS_PER_CTRL_VC \
--garnet-deadlock-threshold=$DEADLOCKTHRESHOLD \
$CONCENTRATION_FACTOR"
redirect_cmd() {
"$@"
return $?
}
redirect_cmd $RUNCMD
if [ $? -eq 0 ]; then
# Copy network-related stats to network_stats.txt
python grepnetworkstats.py $OUTDIR
# Generate topology.png from topology.tex
./tex2png $OUTDIR > /dev/null
# Calculate power and area with DSENT
#./rundsent $OUTDIR
else
rmdir $OUTDIR &> /dev/null
fi