forked from etmc/tmLQCD
-
Notifications
You must be signed in to change notification settings - Fork 2
/
configure.in
910 lines (821 loc) · 28.2 KB
/
configure.in
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
#
# Process this file with autoconf to produce a configure script
#
AC_PREREQ(2.59)
AC_INIT(tmLQCD, 5.1.6, [email protected])
AC_CONFIG_HEADER(config.h)
AC_CONFIG_SRCDIR([hmc_tm.c])
AC_CANONICAL_HOST()
AC_PREFIX_DEFAULT($HOME)
AC_ARG_PROGRAM
if test "$host_vendor" = "cray"; then
ac_cv_c_bigendian=yes
fi
AC_PROG_CC
AC_PROG_CC_C99
dnl AC_PROG_CC_STDC
AC_C_CONST
AC_C_INLINE
AC_C_RESTRICT
AC_F77_LIBRARY_LDFLAGS
AC_CHECK_TOOL(AR, ar, [ar])
LIBS="$LIBS $FLIBS -lm"
AC_PROG_LEX
dnl AC_PROG_LEX sets $LEX to ":" if neither lex nor flex are found!
if test "$LEX" = ":"; then
AC_MSG_ERROR([(F)LEX is required for building read_input.c. Please install it and run configure again.])
fi
AC_PROG_MAKE_SET
AC_PROG_RANLIB
AC_CHECK_PROG(CCDEP, gcc, "gcc", "$CC")
#(endian="", AC_DEFINE(LITTLE_ENDIAN,1,The endian of the architechture))
# AC_PROG_FC([ifort gfortran])
# AC_FC_FUNC(testfunc, )
LDFLAGS="$LDFLAGS -L\${HOME}/lib -L\${top_builddir}/lib"
CCLD=${CC}
USESUBDIRS="buffers cu io solver linalg"
AC_CHECK_HEADERS([stdint.h],
[ dnl for inttypes.h and stdint.h for uint_xxx types
dnl if successful check for the actual types too
AC_CHECK_TYPES([uint16_t, uint32_t, uint64_t],
[],
[AC_MSG_ERROR([stdint.h found but either uint16_t, uint32_t or uint64_t not found]) ]
)
],
[
dnl no inttypes.h or stdint.h found check common unsigned types
dnl for sizes and make appropriate decisions in the lime_fixed_types.h file
AC_CHECK_SIZEOF(unsigned char)
AC_CHECK_SIZEOF(unsigned short)
AC_CHECK_SIZEOF(unsigned int)
AC_CHECK_SIZEOF(unsigned long)
AC_CHECK_SIZEOF(unsigned long long)
]
)
AC_MSG_CHECKING(where to find lime)
AC_ARG_WITH(limedir,
AS_HELP_STRING([--with-limedir[=dir]], [search lime in dir [default=./lime]]),
lime_dir=$withval, lime_dir="./lime")
AC_MSG_RESULT($lime_dir)
LDFLAGS="$LDFLAGS -L${lime_dir}/lib/"
AC_CHECK_LIB([lime], [limeReaderNextRecord],[],
[AC_MSG_ERROR([library liblime is missing or needed function is not available])])
#LIBS="$LIBS $FLIBS -lm"
AC_MSG_CHECKING(whether we want to use lemon)
AC_ARG_WITH(lemondir,
AS_HELP_STRING([--with-lemondir[=dir]], [use lemon, to be found in dir]),
[echo yes
LEMON_AVAILABLE=1
lemon_dir=$withval
LDFLAGS="$LDFLAGS -L${lemon_dir}/lib"
AC_CHECK_LIB([lemon],
[lemonReaderNextRecord],
[],
[AC_MSG_ERROR([library liblemon was not found])])],
[echo no
LEMON_AVAILABLE=0])
AC_MSG_CHECKING(whether we use the general geometry)
AC_ARG_ENABLE(indexindepgeom,
AS_HELP_STRING([--enable-indexindepgeom], [enable Index independent addressing [default=no]]),
enable_iig=$enableval, enable_iig=no)
if test $enable_iig = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(_INDEX_INDEP_GEOM,1,Index independent addressing)
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING(whether we want to use MPI)
AC_ARG_ENABLE(mpi,
AS_HELP_STRING([--enable-mpi], [enable use of mpi [default=yes]]),
enable_mpi=$enableval, enable_mpi=yes)
if test $enable_mpi = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(MPI,1,Compile with MPI support)
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING(whether to use QPX intrinsics)
AC_ARG_ENABLE(qpx,
AS_HELP_STRING([--enable-qpx], [enable use of qpx intrinsics [default=no]]),
enable_qpx=$enableval, enable_qpx=no)
if test $enable_qpx = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(BGQ,1,Compile with QPX intrinsics)
AC_MSG_NOTICE([Compiling with QPX intrinsics on BGQ, enabling compiler optimizations for XLC.])
OPTARGS="-O3 -qstrict=all -qtune=qp -qarch=qp -qmaxmem=-1"
SOPTARGS="$OPTARGS"
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING(whether to use IBM BG/Q SPI for communications)
AC_ARG_ENABLE(spi,
AS_HELP_STRING([--enable-spi], [enable use of SPI [default=no]]),
enable_spi=$enableval, enable_spi=no)
if test $enable_spi = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(SPI,1,Compile with SPI for communications)
SPI_FILES="DirectPut"
else
AC_MSG_RESULT(no)
SPI_FILES=""
fi
AC_MSG_CHECKING(whether we want to use OpenMP)
AC_ARG_ENABLE(omp,
AS_HELP_STRING([--enable-omp], [enable use of OpenMP [default=yes]]),
enable_omp=$enableval, enable_omp=yes)
if test $enable_omp = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(OMP,1,Compile with OpenMP support)
AC_CHECK_HEADERS([omp.h],,[AC_MSG_ERROR([Cannot find OpenMP headers!])])
AC_OPENMP
# -- AC_OPENMP provides a compiler-dependent OPENMP_CFLAGS so we can set it here
# on the BG/Q with XLC we force a special set of options for OpenMP support
if test $enable_qpx = yes; then
AC_MSG_NOTICE([Using OpenMP with XLC on BG/Q. Compiling with "-qsmp=omp:noauto:schedule=static".])
CFLAGS="$CFLAGS -qsmp=omp:noauto:schedule=static"
CPPFLAGS="$CPPFLAGS -qsmp=omp:noauto:schedule=static"
LDFLAGS="$LDFLAGS -qsmp=omp:noauto:schedule=static"
else
CFLAGS="$CFLAGS $OPENMP_CFLAGS"
CPPFLAGS="$CPPFLAGS $OPENMP_CFLAGS"
LDFLAGS="$LDFLAGS $OPENMP_CFLAGS"
fi
else
AC_MSG_RESULT(no)
fi
fftw_lib=/usr
AC_MSG_CHECKING(whether we want to use FFTW)
AC_ARG_ENABLE(fftw,
AS_HELP_STRING([--enable-fftw], [enable use of fftw [default=no]]),
enable_fftw=$enableval, enable_fftw=no)
if test $enable_fftw = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_FFTW,1,Compile with FFTW support)
LIBS="-lfftw3 ${LIBS}"
elif test $enable_fftw = no; then
AC_MSG_RESULT(no)
else
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_FFTW,1,Compile with FFTW support)
fftw_lib=${enable_fftw}
LDFLAGS="$LDFLAGS -L${fftw_lib}/lib64"
LIBS="-lfftw3 ${LIBS}"
INCLUDES="-I${fftw_lib}/include ${INCLUDES}"
fi
if test $enable_mpi = yes; then
AC_MSG_CHECKING(which parallelisation to use for MPI)
AC_ARG_WITH(mpidimension,
AS_HELP_STRING([--with-mpidimension[=n]], [use n dimensional parallelisation [default=1]]),
withmpidimension=$withval, withmpidimension=1)
if test $withmpidimension = 1; then
AC_MSG_RESULT(n=1 [t])
AC_DEFINE(PARALLELT,1,One dimensional parallelisation)
elif test $withmpidimension = 2; then
AC_MSG_RESULT(n=2 [xt])
AC_DEFINE(PARALLELXT,1,Two dimensional parallelisation)
elif test $withmpidimension = 3; then
AC_MSG_RESULT(n=3 [xyt])
AC_DEFINE(PARALLELXYT,1,Three dimensional parallelisation)
elif test $withmpidimension = 4; then
AC_MSG_RESULT(n=4 [xyzt])
AC_DEFINE(PARALLELXYZT,1,Four dimensional parallelisation)
elif test $withmpidimension = X; then
AC_MSG_RESULT(n=1 [x])
AC_DEFINE(PARALLELX,1, X parallelisation)
elif test $withmpidimension = XY; then
AC_MSG_RESULT(n=2 [xy])
AC_DEFINE(PARALLELXY,1, XY parallelisation)
elif test $withmpidimension = XYZ; then
AC_MSG_RESULT(n=3 [xyz])
AC_DEFINE(PARALLELXYZ,1, XYZ parallelisation)
elif test $withmpidimension = T; then
AC_MSG_RESULT(n=1 [t])
AC_DEFINE(PARALLELT,1, T parallelisation)
elif test $withmpidimension = XT; then
AC_MSG_RESULT(n=2 [xt])
AC_DEFINE(PARALLELXT,1, XT parallelisation)
elif test $withmpidimension = XYT; then
AC_MSG_RESULT(n=3 [xyt])
AC_DEFINE(PARALLELXYT,1, XYT parallelisation)
elif test $withmpidimension = XYZT; then
AC_MSG_RESULT(n=4 [xyzt])
AC_DEFINE(PARALLELXYZT,1, XYZT parallelisation)
else
AC_MSG_RESULT(unknown)
AC_MSG_ERROR([Only t, xt, xyt, xyzt, x, xy, xyz parallelisation available])
fi
AC_MSG_CHECKING(whether we shall use persistent MPI calls for halfspinor)
AC_ARG_WITH([persistentmpi],
AS_HELP_STRING([--with-persistentmpi], [use persistent MPI calls for halfspinor [default=no]]),
withpersistent=$withval, withpersistent=no)
if test $withpersistent = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(_PERSISTENT,1,use persistent MPI calls for halfspinor)
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING(whether we shall use non-blocking MPI calls)
AC_ARG_WITH([nonblockingmpi],
AS_HELP_STRING([--with-nonblockingmpi], [use non-blocking MPI calls for spinor and gauge [default=yes]]),
withnonblock=$withval, withnonblock=yes)
if test $withnonblock = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(_NON_BLOCKING,1,use non-blocking MPI calls for spinor ang gauge)
else
AC_MSG_RESULT(no)
fi
fi
AC_MSG_CHECKING([whether we want to fix volume at compiletime])
AC_ARG_WITH([fixedvolume],
AS_HELP_STRING([--with-fixedvolume], [fix volume at compiletime [default=no]]),
with_fixvol=$withval, with_fixvol=no)
if test $with_fixvol = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(FIXEDVOLUME,1,Fixed volume at compiletime)
AC_CONFIG_FILES([fixed_volume.h])
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING([whether we want to use KOJAK instrumentalisation])
AC_ARG_WITH([kojakinst],
AS_HELP_STRING([--with-kojakinst], [instrumentalise for KOJAK [default=no]]),
with_kojakinst=$withval, with_kojakinst=no)
if test $with_kojakinst = yes; then
AC_MSG_RESULT(yes)
CC="kinst-pomp ${CC}"
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING(whether we want to use lapack and blas)
AC_ARG_WITH(lapack,
AS_HELP_STRING([--with-lapack], [enable use of lapack [default=yes]]),
with_lapack=$withval, with_lapack=yes)
if test "$with_lapack" = yes; then
AC_MSG_RESULT(yes)
LAPACKLIB=
AC_DEFINE(HAVE_LAPACK,1,lapack available)
elif test "$with_lapack" != no; then
AC_MSG_RESULT(yes)
LIBS="$withval $LIBS"
with_lapack=yes
AC_DEFINE(HAVE_LAPACK,1,lapack available)
else
AC_MSG_RESULT(no)
AC_MSG_ERROR([lapack is needed! Will stop here.])
fi
if test $enable_mpi = yes; then
dnl In general one cannot run mpi programs directly
dnl thats why we need here cross_compiling=yes
dnl for non CRAY
if test "$host_vendor" != "cray"; then
cross_compiling=yes
fi
fi
dnl for the case of other configure scripts
dnl AC_CONFIG_SUBDIRS( rng )
dnl check for clock_gettime and set correct library flag if one is required
dnl (as done by AC_CHECK_LIB)
AC_CHECK_FUNCS(clock_gettime, [], [AC_CHECK_LIB(rt, clock_gettime)])
dnl with glibc and c99 mode the timespec required for clock_gettime is only
dnl available in POSIX 199309L compatibility mode when clock_gettime is in librt
if test "$ac_cv_lib_rt_clock_gettime" = "yes"; then
AC_DEFINE(HAVE_CLOCK_GETTIME,1)
CFLAGS="$CFLAGS -D_POSIX_C_SOURCE=199309L"
AC_MSG_NOTICE([Instructing the compiler to use POSIX 199309L])
fi
dnl Checks for lapack and defines proper name mangling scheme for
dnl linking with f77 code
AC_F77_FUNC(zheev)
if test "$zheev" = "zheev"; then
AC_DEFINE(NOF77_,1,Fortran has no extra _)
fi
AC_SEARCH_LIBS([$zheev],[lapack], [], [AC_MSG_ERROR([Cannot find lapack])])
dnl Checks for header files.
AC_HEADER_STDC
AC_CHECK_HEADERS([float.h libintl.h limits.h stdint.h stdlib.h string.h strings.h sys/time.h unistd.h endian.h])
AC_CHECK_HEADER( getopt.h, [])
dnl Checks for typedefs, structures, and compiler characteristics.
AC_C_CONST
AC_TYPE_OFF_T
AC_TYPE_SIZE_T
AC_HEADER_TIME
dnl Checks for library functions.
AC_SYS_LARGEFILE
AC_FUNC_FSEEKO
AC_FUNC_MALLOC
AC_TYPE_SIGNAL
AC_CHECK_FUNCS([gettimeofday pow sqrt])
dnl We now define some replacement variables
AC_SUBST(OPTARGS)
AC_SUBST(SOPTARGS)
AC_SUBST(INCLUDES)
AC_SUBST(AUTOCONF)
AC_SUBST(SOLVEROUT)
AC_SUBST(CCDEP)
AC_SUBST(CCLD)
AC_SUBST(DEPFLAGS)
AC_SUBST(DEBUG_FLAG)
AC_SUBST(PROFILE_FLAG)
AC_SUBST(XCHANGELIB)
AC_SUBST(XCHANGEDIR)
AC_SUBST(XLIB)
AC_SUBST([LEMON_AVAILABLE])
AC_SUBST(SPI_FILES)
INCLUDES="$INCLUDES -I\$(HOME)/include/ -I. -I\${abs_top_builddir}/ -I\${abs_top_srcdir}/ -I${lime_dir}/include/ -I${lemon_dir}/include/"
DEPFLAGS="$DEPFLAGS"
AC_MSG_CHECKING(what alignment we want for arrays)
AC_ARG_WITH(alignment,
[AS_HELP_STRING([--with-alignment[=n]], [align arrays to 0, 16, 32, 64 or 128 bits [default=auto]])],
withalign=$withval, withalign=auto)
if test "$withalign" = "no"; then
withalign=0
fi
if test "$withalign" = "0"; then
AC_MSG_RESULT(none)
AC_DEFINE(ALIGN_BASE, 0x00, [Align base])
AC_DEFINE(ALIGN, [])
elif test $withalign = 16; then
AC_MSG_RESULT(16 bits)
AC_DEFINE(ALIGN_BASE, 0x0f, [Align base])
AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
elif test $withalign = 32; then
AC_MSG_RESULT(32 bits)
AC_DEFINE(ALIGN_BASE, 0x1f, [Align base])
AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
elif test $withalign = 64; then
AC_MSG_RESULT(64 bits)
AC_DEFINE(ALIGN_BASE, 0x3f, [Align base])
AC_DEFINE(ALIGN, [__attribute__ ((aligned (64)))])
elif test $withalign = 128; then
AC_MSG_RESULT(128 bits)
AC_DEFINE(ALIGN_BASE, 0x7f, [Align base])
AC_DEFINE(ALIGN, [__attribute__ ((aligned (128)))])
elif test $withalign = auto; then
withautoalign=0
AC_MSG_RESULT(auto)
AC_DEFINE(ALIGN_BASE, 0x00, [Align base])
AC_DEFINE(ALIGN, [], [])
else
AC_MSG_RESULT(Unusable value for array alignment)
AC_MSG_ERROR([Only alignment to 0, 16, 32, 64 or 128 bits, or auto alignment available])
fi
dnl in the following we check for extra options
if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
AC_MSG_CHECKING(whether we want to use P4 instructions)
AC_ARG_ENABLE(p4,
AS_HELP_STRING([--enable-p4],[enable use of P4 instructions [default=no]]),
enable_p4=$enableval, enable_p4=no)
if test $enable_p4 = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(P4,1,Use Pentium4 instructions)
if test $withalign = auto; then
if test $withautoalign = 0 || test $withautoalign = 16 ||test $withautoalign = 32; then
AC_MSG_RESULT(changing array alignment to 64 bits for P4 instructions)
AC_DEFINE(ALIGN_BASE, 0x3f, [Align base])
AC_DEFINE(ALIGN, [__attribute__ ((aligned (64)))])
withautoalign=64
fi
elif test $withalign = 0 || test $withalign = 16 ||test $withalign = 32; then
AC_MSG_ERROR([alignment incompatible with P4 instructions (64 bits required)!])
fi
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING(whether we want to use Opteron instructions)
AC_ARG_ENABLE(opteron,
AS_HELP_STRING([--enable-opteron], [enable use of Opteron instructions [default=no]]),
enable_opteron=$enableval, enable_opteron=no)
if test $enable_opteron = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(OPTERON,1,Use Opteron instructions)
if test $withalign = auto; then
if test $withautoalign = 0 || test $withautoalign = 16; then
AC_MSG_RESULT(changing array alignment to 32 bits for Opteron instructions)
AC_DEFINE(ALIGN_BASE, 0x1f, [Align base])
AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
withautoalign=0x1f
fi
elif test $withalign = 0 || test $withalign = 16; then
AC_MSG_ERROR([alignment incompatible with Opteron instructions (32 bits required)!])
fi
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING(whether we want to use SSE2 instructions)
AC_ARG_ENABLE(sse2,
AS_HELP_STRING([--enable-sse2], [enable use of SSE2 instructions [default=no]]),
enable_sse2=$enableval, enable_sse2=no)
if test $enable_sse2 = yes; then
AC_MSG_RESULT(yes)
if test $withalign = auto; then
if test $withautoalign = 0 || test $withautoalign = 16; then
AC_MSG_WARN(changing array alignment to 32 bits for SSE2 instructions)
AC_DEFINE(ALIGN_BASE, 0x1f, [Align base])
AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
withautoalign=0x32
fi
elif test $withalign = 0 || test $withalign = 16; then
AC_MSG_ERROR([alignment incompatible with SSE2 instructions (32 bits required)]!)
fi
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING(whether we want to use SSE3 instructions)
AC_ARG_ENABLE(sse3,
AS_HELP_STRING([--enable-sse3], [enable use of SSE3 instructions [default=no]]),
enable_sse3=$enableval, enable_sse3=no)
if test $enable_sse3 = yes; then
AC_MSG_RESULT(yes)
if test $withalign = auto; then
if test $withautoalign = 0 || test $withautoalign = 16; then
AC_MSG_RESULT(changing array alignment to 32 bits for SSE3 instructions)
AC_DEFINE(ALIGN_BASE, 0x1f, [Align base])
AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
withautoalign=32
fi
elif test $withalign = 0 || test $withalign = 16; then
AC_MSG_ERROR([alignment incompatible with SSE2 instructions (32 bits required)])
fi
else
AC_MSG_RESULT(no)
fi
fi
AC_MSG_CHECKING(whether we want to use gprof as profiler)
AC_ARG_WITH(gprof,
AS_HELP_STRING([--with-gprof], [use of gprof profiler [default=no]]),
enable_gprof=$withval, enable_gprof=no)
if test $enable_gprof = yes; then
AC_MSG_RESULT(yes)
if test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm"; then
PROFILE_FLAG="-pg -qfullpath -g"
else
PROFILE_FLAG="-pg -g"
fi
else
AC_MSG_RESULT(no)
PROFILE_FLAG=
fi
AC_MSG_CHECKING(whether we shall use rts dram window)
AC_ARG_WITH([bgldram],
AS_HELP_STRING([--with-bgldram], [use BGL dram window (BGL only!) [default=yes]]),
with_bgldram=$withval, with_bgldram=yes)
if test $with_bgldram = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(_USE_BGLDRAM,1,use BGL dram window)
else
AC_MSG_RESULT(no)
fi
dnl Now we have to set all Flags and compiler properly
XLCGREP=`$CC -V 2>&1 | grep -i xlc`
if test "$XLCGREP" != ""; then
XLC="yes"
AC_DEFINE(XLC,1,Are we using the IBM xlc compiler?)
fi
PGCC=`$CC -V 2>&1 | grep pgcc`
ICC=`$CC -V 2>&1 | grep -i intel`
dnl first for PC's
if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
dnl the GNU compiler
if test "$GCC" = yes && test "$ICC" = ""; then
DEPFLAGS="-MM"
CFLAGS="$CFLAGS -pedantic -Wall"
OPTARGS='-O'
SOPTARGS='-O'
if test $enable_sse3 = yes; then
echo Using SSE3 and SSE2 macros!
AC_DEFINE(SSE3,1,Compile with SSE3 support)
DEPFLAGS="$DEPFLAGS -DSSE3"
if test "$host_cpu" = "x86_64"; then
CFLAGS="$CFLAGS -mfpmath=387"
fi
elif test $enable_sse2 = yes; then
DEPFLAGS="$DEPFLAGS -DSSE2"
AC_DEFINE(SSE2,1,Compile with SSE2 support)
if test "$host_cpu" = "x86_64"; then
CFLAGS="$CFLAGS -mfpmath=387"
fi
fi
if test "$host_cpu" = "x86_64"; then
AC_DEFINE(_x86_64,1,x86 64 Bit architecture)
fi
CCDEP="$CC"
if test $enable_mpi = yes; then
CCDEP="gcc"
fi
DEBUG_FLAG="-g"
dnl other compilers
else
dnl check for pgcc
if test "$PGCC" != ""; then
DEPFLAGS="-M"
echo "We are using the Portland Group C compiler!"
OPTARGS="-O2"
SOPTARGS="-O2"
DEBUG_FLAG="-g"
PROFILE_FLAG="-p -g"
CCDEP="$CC"
dnl check for icc
elif test "$ICC" != ""; then
echo "We are using the Intel C compiler!"
DEPFLAGS="-M"
OPTARGS="-O3"
SOPTARGS="-O3"
DEBUG_FLAG="-g"
PROFILE_FLAG="-p -g"
CCDEP="$CC"
else
DEPFLAGS="-M"
CFLAGS="$CFLAGS -O"
DEBUG_FLAG="-g"
CCDEP="$CC"
fi
fi
# The MareNostrum: powerpc on a linux system
# this will also evaluate to "true" on BG/Q with XLC
elif test "$host_cpu" = "powerpc64" && test "$host_vendor" = "unknown" && test "$host_os" = "linux-gnu"; then
DEBUGFLAG="-g"
if test "$XLC" = "yes"; then
CFLAGS="-qsrcmsg $CFLAGS"
DEBUGFLAG="$DEBUGFLAG -qfullpath"
fi
OPTARGS="$OPTARGS -O3"
SOPTARGS="$OPTARGS"
if test "$CCDEP" = "gcc"; then
DEPFLAGS="-MM"
else
DEPFLAGS="-M"
fi
#The BLue Gene/L
elif test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm" && test "$host_os" = "blrts"; then
if test "$with_bgldram" = yes; then
if (test -e /bgl/local/bin/blrts_gcc); then
BLRTSGCC=/bgl/local/bin/blrts_gcc
elif (test -e /bgl/BlueLight/ppcfloor/blrts-gnu/bin/powerpc-bgl-blrts-gnu-gcc); then
BLRTSGCC=/bgl/BlueLight/ppcfloor/blrts-gnu/bin/powerpc-bgl-blrts-gnu-gcc
else
AC_MSG_ERROR([Sorry, don't know where to find blrts_gcc, see README.bgl!])
fi
CCLD="$BLRTSGCC -Xlinker --script=./elf32ppcblrts.x"
if (!(test -s ./elf32ppcblrts.x)); then
AC_MSG_ERROR([Sorry, elf32ppcblrts.x is missing, see README.bgl!])
fi
fi
DEBUGFLAG="-g"
OPTARGS="-O3"
SOPTARGS="-O3"
AC_DEFINE(BGL,1,[Optimize for Blue Gene/L])
if test $withalign = auto; then
if test $withautoalign = 0; then
AC_MSG_RESULT(changing array alignment to 16 bits for BGL instructions)
AC_DEFINE(ALIGN_BASE, 0x0f, [Align base])
AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
withautoalign=16
fi
elif test $withalign = 0; then
AC_MSG_ERROR([alignment incompatible with BGL instructions (16 bits required)!])
fi
if test "$XLC" = "yes"; then
CFLAGS="-qsrcmsg $CFLAGS"
OPTARGS="$OPTARGS -qarch=440d -qtune=440"
SOPTARGS="$SOPTARGS -qarch=440d -qtune=440"
DEBUGFLAG="$DEBUGFLAG -qfullpath"
# OPTARGS="-qhot" leads to wrong code
fi
LIBS="-lmpich.rts -lfmpich.rts -lmsglayer.rts -lrts.rts -ldevices.rts $LIBS"
LDFLAGS="$LDFLAGS -L/bgl/BlueLight/ppcfloor/bglsys/lib"
if test $with_lapack = yes; then
LIBS="-lesslbg -llapack.rts -lesslbg -lxlf90 -lxlfmath -lxl -lxlopt $LIBS"
LDFLAGS="$LDFLAGS -L/opt/ibmcmp/xlf/bg/10.1/blrts_lib -L/bgl/local/lib/ -L/opt/ibmmath/lib/"
fi
if test "$CCDEP" = "gcc"; then
DEPFLAGS="-MM"
else
DEPFLAGS="-M"
fi
CPPFLAGS="-I/bgl/BlueLight/ppcfloor/bglsys/include"
INCLUDES="$INCLUDES -I/bgl/BlueLight/ppcfloor/bglsys/include/"
#The BLue Gene/P
elif test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm" && test "$host_os" = "bprts"; then
CFLAGS="$CFLAGS"
DEBUGFLAG="-g"
OPTARGS="-O3"
SOPTARGS="-O3"
AC_DEFINE(BGL,1,[Optimize for Blue Gene/L])
AC_DEFINE(BGP,1,[Optimize for Blue Gene/P])
if test $withalign = auto; then
if test $withautoalign = 0; then
AC_MSG_RESULT(changing array alignment to 16 bits for BGP instructions)
AC_DEFINE(ALIGN_BASE, 0x0f, [Align base])
AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
withautoalign=16
fi
elif test $withalign = 0; then
AC_MSG_ERROR([alignment incompatible with BGP instructions (16 bits required)!])
fi
if test "$XLC" = "yes"; then
CFLAGS="-qsrcmsg $CFLAGS"
OPTARGS="$OPTARGS -qarch=450d -qtune=450"
SOPTARGS="$SOPTARGS -qarch=450d -qtune=450"
DEBUGFLAG="$DEBUGFLAG -qfullpath"
# OPTARGS="-qhot" leads to wrong code
fi
# LIBS="-lxlf90_r -lxlomp_ser -lxl -lxlopt -lxlfmath -ldl -lrt -lpthread $LIBS"
# LDFLAGS="$LDFLAGS -L/bgsys/local/lib/ -L/opt/ibmcmp/xlf/bg/11.1/lib -L/bgsys/drivers/ppcfloor/comm/"
# if test $with_lapack = yes; then
# LIBS="-lesslbg -llapack -lesslbg $LIBS"
# LDFLAGS="$LDFLAGS -L/opt/ibmmath/lib/"
# fi
if test "$CCDEP" = "gcc"; then
DEPFLAGS="-MM"
else
DEPFLAGS="-M"
fi
CPPFLAGS="-I/bgsys/drivers/ppcfloor/arch/include/ -I/bgsys/drivers/ppcfloor/comm/include"
INCLUDES="$INCLUDES -I/bgsys/local/include/ -I/bgsys/drivers/ppcfloor/arch/include/ -I/bgsys/drivers/ppcfloor/comm/include"
# The IBM Power PC
elif test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm"; then
CFLAGS="$CFLAGS -q64 -qsrcmsg"
LDFLAGS="$LDFLAGS -q64"
OPTARGS="-O3 -qhot"
SOPTARGS="-O3 -qhot"
DEBUG_FLAG="-qfullpath -g"
if test "$CCDEP" = "gcc"; then
DEPFLAGS="-MM"
else
DEPFLAGS="-M"
fi
# The CRAY
elif test "$host_vendor" = "cray"; then
echo
echo "Hey, we are on a cray, you should take some time for this..."
echo "get yourself a coffee or so!"
echo
CFLAGS="$CFLAGS -dp"
AC_DEFINE(CRAY,1,We are on a CRAY)
OPTARGS="-O3"
SOPTARGS="-O3"
DEBUG_FLAG="-g"
CCDEP="$CC"
DEPFLAGS="-M"
else
AC_CHECK_PROG(CCDEP, gcc, "gcc", "$CC")
if test "$CCDEP" = "gcc"; then
DEPFLAGS="-MM"
else
DEPFLAGS="-M"
fi
OPTARGS=
SOPTARGS=
fi
AC_MSG_CHECKING(whether we want to switch on optimisation)
AC_ARG_ENABLE(optimize,
AS_HELP_STRING([--enable-optimize], [enable optimisation [default=yes]]),
enable_optimize=$enableval, enable_optimize=yes)
if test $enable_optimize = no; then
AC_MSG_RESULT(no)
OPTARGS=
SOPTARGS=
else
AC_MSG_RESULT(yes)
fi
AC_MSG_CHECKING(whether we want to use a copy of the gauge field)
AC_ARG_ENABLE(gaugecopy,
AS_HELP_STRING([--enable-gaugecopy], [enable use of a copy of the gauge field [default=yes]]),
enable_gaugecopy=$enableval, enable_gaugecopy=yes)
if test $enable_gaugecopy = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(_GAUGE_COPY,1,Construct an extra copy of the gauge fields)
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING(whether we want to use a Dirac Op. with halfspinor exchange)
AC_ARG_ENABLE(halfspinor,
AS_HELP_STRING([--enable-halfspinor], [use a Dirac Op. with halfspinor exchange [default=yes]]),
enable_halfspinor=$enableval, enable_halfspinor=yes)
if test $enable_halfspinor = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(_USE_HALFSPINOR,1,Exchange only a halfspinor in the Dirac Operator)
if test $enable_gaugecopy = no; then
AC_MSG_WARN([switching on gaugecopy for Dirac operator with halfspinor!])
AC_DEFINE(_GAUGE_COPY,1,Construct an extra copy of the gauge fields)
fi
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING(whether we want to use shmem API)
AC_ARG_ENABLE(shmem,
AS_HELP_STRING([--enable-shmem],[use shmem API [default=no]]),
enable_shmem=$enableval, enable_shmem=no)
if test $enable_shmem = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(_USE_SHMEM,1,Use shmem API)
LIBS="$LIBS -lsma"
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING(whether we want to use timeslice-splitted communications)
AC_ARG_ENABLE(tsplitpar,
AS_HELP_STRING([--enable-tsplitpar],[enable timeslice-splitted communications [default=no]]),
enable_tsp=$enableval, enable_tsp=no)
if test $enable_tsp = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(_USE_TSPLITPAR,1,timeslice-splitted communications)
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING(whether we want to compute the LapH eigenvalues)
AC_ARG_ENABLE(laph,
AS_HELP_STRING([--enable-laph], [enable computation of LapH eigensystem [default=no]]),
enable_laph=$enableval, enable_laph=no)
if test $enable_laph = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(WITHLAPH,1,LapH eigensystem)
else
AC_MSG_RESULT(no)
fi
AC_MSG_CHECKING(whether we want to use CUDA GPU)
AC_ARG_ENABLE(gpu,
AS_HELP_STRING([--enable-gpu],[use GPU [default=no]]),
usegpu=$enableval, usegpu=no)
if test $usegpu = yes; then
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_GPU,1,Using CUDA GPU)
NVCC="nvcc"
USESUBDIRS="$USESUBDIRS GPU"
GPUDIR="GPU"
LIBS="$LIBS -lcuda -lcudart -lcublas"
AC_MSG_CHECKING([where to search for CUDA libs])
AC_ARG_WITH(cuda,
AS_HELP_STRING([--with-cuda[=dir]], [use CUDA GPU with lib dir [default=/usr/local/cuda/lib]]),
cuda_dir=$withval, cuda_dir="/usr/local/cuda/lib")
AC_MSG_RESULT($cuda_dir)
if test $usegpu = yes; then
LDFLAGS="$LDFLAGS -L$cuda_dir"
fi
AC_MSG_CHECKING([CUDA compile args])
AC_ARG_WITH(cudacompileargs,
AS_HELP_STRING([--with-cudacompileargs[=string]], [use CUDA compile args [default="--gpu-architecture sm_13 --use_fast_math -O3"]]),
cuda_compileargs=$withval, cuda_compileargs="--gpu-architecture sm_13 --use_fast_math -O3")
AC_MSG_RESULT($cuda_compileargs)
if test $usegpu = yes; then
GPUCFLAGS="$GPUCFLAGS $cuda_compileargs"
fi
if test $enable_mpi = yes; then
GPUMPICOMPILER="--compiler-bindir mpicc"
if test $withmpidimension != 1; then
AC_MSG_ERROR(ERROR! The GPU Code is only parallelized in t-direction so far!)
fi
else
GPUMPICOMPILER=""
fi
else
AC_MSG_RESULT(no)
NVCC=""
fi
AC_SUBST(USESUBDIRS)
AC_SUBST(NVCC)
AC_SUBST(GPUDIR)
AC_SUBST(GPUCFLAGS)
AC_SUBST(GPUMPICOMPILER)
AC_MSG_CHECKING(checking consistency)
if test $enable_mpi = yes ; then
if test $enable_iig = yes && test $withpersistent = yes ; then
AC_MSG_ERROR(ERROR! indexindepgeom is not compatible with persistent communications )
fi
if test $enable_iig = yes && test $enable_shmem = yes ; then
AC_MSG_ERROR(ERROR! indexindepgeom is not compatible with shmem API )
fi
if test $enable_tsp = yes && test $enable_iig = no; then
AC_MSG_ERROR(ERROR! tsplitpar needs indexindepgeom)
fi
if test $enable_tsp = yes && test $enable_sse2 != yes ; then
AC_MSG_ERROR(ERROR! tsplitpar needs at least SSE2 )
fi
if test $enable_tsp = yes && test $enable_gaugecopy != yes ; then
AC_MSG_ERROR(ERROR! tsplitpar needs gaugecopy)
fi
if test $enable_laph = yes && test $enable_tsp != yes ; then
AC_MSG_ERROR(ERROR! laph needs tsplitpar)
fi
fi
if test ! -e lib; then
mkdir lib
fi
dnl create the test and tests directory here
if test ! -e test; then
mkdir test
fi
if test ! -e tests; then
mkdir tests
fi
if test ! -e tests/regressions; then
mkdir tests/regressions
fi
LIBS="-lhmc -lsolver -llinalg -lhmc -lio $LIBS"
AUTOCONF=autoconf
for i in $USESUBDIRS
do
make_files="$make_files $i/Makefile"
done
AC_CONFIG_FILES([Makefile $make_files])
AC_OUTPUT