diff --git a/README.md b/README.md index 2a7309a4..a135449d 100644 --- a/README.md +++ b/README.md @@ -334,6 +334,7 @@ You can set an environment variable `SKETCHLIB_INSTALL` to affect `python setup. - Empty: uses cmake - `conda`: sets library location to the conda environment, and uses `src/Makefile` (used to be used in conda-forge recipe) - `azure`: Uses `src/Makefile` +- `jlees`: Uses `src/Makefile_fedora38` ### cmake Now requires v3.19. If nvcc version is 11.0 or higher, sm8.6 with device link time optimisation will be used. diff --git a/setup.py b/setup.py index da41f68e..08a6b033 100755 --- a/setup.py +++ b/setup.py @@ -88,6 +88,9 @@ def build_extension(self, ext): elif target == 'azure': subprocess.check_call(['make', 'python'], cwd=ext.sourcedir + '/src', env=env) subprocess.check_call(['make', 'install_python', 'PYTHON_LIB_PATH=' + extdir], cwd=ext.sourcedir + '/src', env=env) + elif target == 'jlees': + subprocess.check_call(['make', '-f', 'Makefile_fedora38', 'python'], cwd=ext.sourcedir + '/src', env=env) + subprocess.check_call(['make', 'install_python', 'PYTHON_LIB_PATH=' + extdir], cwd=ext.sourcedir + '/src', env=env) else: subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env) subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp) diff --git a/src/Makefile_fedora38 b/src/Makefile_fedora38 new file mode 100644 index 00000000..25c9ff98 --- /dev/null +++ b/src/Makefile_fedora38 @@ -0,0 +1,141 @@ +CXX=gcc-11 +CC=gcc-11 +CFLAGS+=-Wall -Wextra -fPIC +CXXFLAGS+=-Wall -Wextra -std=c++14 -fopenmp -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -fPIC +ifdef DEBUG + CXXFLAGS+= -O0 -g + CUDAFLAGS = -g -G +else ifdef PROFILE + CXXFLAGS+= -O2 -g -flto -fno-fat-lto-objects -fvisibility=hidden + CUDAFLAGS = -O2 -pg -lineinfo +else + CXXFLAGS+= -march=native -O3 -flto -fno-fat-lto-objects -fvisibility=hidden + CFLAGS+= -march=native -O3 -flto -fno-fat-lto-objects -fvisibility=hidden +endif + +UNAME_S := $(shell uname -s) +LIBLOC = ${CONDA_PREFIX} +LDLIBS = -lz -lhdf5_cpp -lhdf5 -lopenblas -lgomp +ifeq ($(UNAME_S),Linux) + CXXFLAGS+= -m64 + ifdef PROFILE + CXXFLAGS+= -Wl,--compress-debug-sections=none + endif + LDLIBS+= -lpthread -lgfortran -lm -ldl -lrt + LDFLAGS=-Wl,-as-needed +endif +ifeq ($(UNAME_S),Darwin) + LDLIBS+= -pthread +endif + +CPPFLAGS+=-I"/home/linuxbrew/.linuxbrew/include" -I"." -I"../vendor/highfive/include" -I$(LIBLOC)/include -I$(LIBLOC)/include/eigen3 +LDFLAGS+= -L$(LIBLOC)/lib -L"/home/linuxbrew/.linuxbrew/lib" -L/usr/local/cuda-12.3/lib64 +CUDA_LDLIBS=-lcudadevrt -lcudart_static $(LDLIBS) + +CUDA_LDFLAGS =-L$(LIBLOC)/lib -L${CUDA_HOME}/targets/x86_64-linux/lib/stubs -L${CUDA_HOME}/targets/x86_64-linux/lib +CUDAFLAGS +=-ccbin /home/linuxbrew/.linuxbrew/bin/g++-11 -Xcompiler -fPIC --cudart static --relocatable-device-code=true --expt-relaxed-constexpr -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 +ifdef GPU + CXXFLAGS += -DGPU_AVAILABLE + CUDAFLAGS += -gencode arch=compute_86,code=sm_86 + CUDA_LDFLAGS += -L/usr/local/cuda-12.3/lib64 +endif + +PYTHON_LIB = pp_sketchlib$(shell python3-config --extension-suffix) + +# python specific options +python: CPPFLAGS += -DGPU_AVAILABLE -DPYTHON_EXT -DNDEBUG -Dpp_sketchlib_EXPORTS $(shell python3 -m pybind11 --includes) + +PROGRAMS=sketch_test matrix_test read_test gpu_dist_test + +SKETCH_OBJS=dist/dist.o dist/matrix_ops.o reference.o sketch/seqio.o sketch/sketch.o database/database.o sketch/countmin.o api.o dist/linear_regression.o random/rng.o random/random_match.o random/kmeans/KMeansRexCore.o random/kmeans/mersenneTwister2002.o +GPU_SKETCH_OBJS=gpu/gpu_api.o +CUDA_OBJS=gpu/dist.cu.o gpu/sketch.cu.o gpu/device_reads.cu.o gpu/gpu_countmin.cu.o gpu/device_memory.cu.o + +# web specific options +web: CXX = em++ +# optimised compile options +# NB turn exceptions back on for testing +# NB `--closure 1` can be used to reduce size of js file (this minifies variable names!) +web: CXXFLAGS = -O3 -s ASSERTIONS=1 \ + -DNOEXCEPT \ + -DJSON_NOEXCEPTION \ + -s DISABLE_EXCEPTION_CATCHING=1 \ + -fno-exceptions \ + -flto --bind -s STRICT=1 \ + -s ALLOW_MEMORY_GROWTH=1 \ + -s USE_ZLIB=1 \ + -s MODULARIZE=1 \ + -s "EXPORTED_FUNCTIONS=['_malloc']" \ + -s 'EXPORTED_RUNTIME_METHODS=["FS"]' \ + -s EXPORT_NAME=WebSketch \ + -Wall -Wextra -std=c++14 +web: CPPFLAGS += -DWEB_SKETCH +web: LDFLAGS = -lnodefs.js -lworkerfs.js + +WEB_OUT=web/web_sketch +WEB_OBJS=${WEB_OUT}.js ${WEB_OUT}.html ${WEB_OUT}.wasm + +web: web/web_sketch.o sketch/seqio.o sketch/sketch.o sketch/countmin.o + $(LINK.cpp) $^ -o ${WEB_OUT}.js + sed -i.old '1s;^;\/* eslint-disable *\/;' ${WEB_OUT}.js + +all: $(PROGRAMS) + +clean: + $(RM) $(SKETCH_OBJS) $(GPU_SKETCH_OBJS) $(CUDA_OBJS) $(WEB_OBJS) *.o *.so version.h ~* $(PROGRAMS) + +install: all + install -d $(BINDIR) + install $(PROGRAMS) $(BINDIR) + +sketch_test: $(SKETCH_OBJS) test/main.o + $(LINK.cpp) $(CUDA_LDFLAGS) $(LDFLAGS) $^ -o $@ $(LDLIBS) + +matrix_test: $(SKETCH_OBJS) test/matrix_test.o + $(LINK.cpp) $^ -o $@ $(LDLIBS) + +read_test: $(SKETCH_OBJS) $(GPU_SKETCH_OBJS) $(CUDA_OBJS) test/read_test.o + nvcc $(CUDAFLAGS) $(CUDA_LDFLAGS) -Wno-deprecated-gpu-targets -shared -dlink $^ -o device_link.o -Xnvlink $(CUDA_LDLIBS) + $(LINK.cpp) $(CUDA_LDFLAGS) $(LDFLAGS) $^ device_link.o -o $@ $(CUDA_LDLIBS) + +gpu_dist_test: $(SKETCH_OBJS) $(GPU_SKETCH_OBJS) $(CUDA_OBJS) test/gpu_dist_test.o + nvcc $(CUDAFLAGS) $(CUDA_LDFLAGS) -Wno-deprecated-gpu-targets -shared -dlink $^ -o device_link.o -Xnvlink $(CUDA_LDLIBS) + $(LINK.cpp) $(CUDA_LDFLAGS) $(LDFLAGS) $^ device_link.o -o $@ $(CUDA_LDLIBS) + +version.h: + cat sketch/*.cpp sketch/*.hpp gpu/sketch.cu | openssl sha1 | awk '{print "#define SKETCH_VERSION \"" $$2 "\""}' > version.h + +database/database.o: version.h + +web/web_sketch.o: version.h + +python: $(PYTHON_LIB) + +$(PYTHON_LIB): $(SKETCH_OBJS) $(GPU_SKETCH_OBJS) $(CUDA_OBJS) sketchlib_bindings.o + nvcc $(CUDAFLAGS) $(CUDA_LDFLAGS) -Wno-deprecated-gpu-targets -shared -dlink $^ -o device_link.o -Xnvlink $(CUDA_LDLIBS) + $(LINK.cpp) $(CUDA_LDFLAGS) $(LDFLAGS) -shared $^ device_link.o -o $(PYTHON_LIB) $(CUDA_LDLIBS) + +install_python: python + install -d $(PYTHON_LIB_PATH) + install $(PYTHON_LIB) $(PYTHON_LIB_PATH) + +gpu/dist.cu.o: + echo ${CUDAFLAGS} + echo ${CPPFLAGS} + echo ${CXXFLAGS} + echo ${CFLAGS} + nvcc $(CUDAFLAGS) $(CPPFLAGS) -DGPU_AVAILABLE -x cu -c gpu/dist.cu -o $@ + +gpu/sketch.cu.o: + nvcc $(CUDAFLAGS) $(CPPFLAGS) -DGPU_AVAILABLE -x cu -c gpu/sketch.cu -o $@ + +gpu/device_memory.cu.o: + nvcc $(CUDAFLAGS) $(CPPFLAGS) -DGPU_AVAILABLE -x cu -c gpu/device_memory.cu -o $@ + +gpu/device_reads.cu.o: + nvcc $(CUDAFLAGS) $(CPPFLAGS) -DGPU_AVAILABLE -x cu -c gpu/device_reads.cu -o $@ + +gpu/gpu_countmin.cu.o: + nvcc $(CUDAFLAGS) $(CPPFLAGS) -DGPU_AVAILABLE -x cu -c gpu/gpu_countmin.cu -o $@ + +.PHONY: all clean install python install_python web