diff --git a/README.md b/README.md
index f6d3f9b0..57683733 100644
--- a/README.md
+++ b/README.md
@@ -79,6 +79,52 @@ Then [Python scripts](https://github.com/projectchrono/DEM-Engine/tree/pyDEME_de
If you use _pyDEME_ in conjunction with PyChrono, you should `import pyDEME` first, then PyChrono.
+### Persistent Jitify startup cache
+
+_DEME_ uses Jitify/NVRTC to compile CUDA kernels at solver initialization time.
+By default, _DEME_ keeps upstream Jitify header loading behavior.
+
+For workloads that start many fresh _DEME_ processes, users can opt in to a
+persistent header source cache. Set `DEME_PERSISTENT_JITIFY_CACHE` to any
+writable file path where _DEME_ should create and reuse the cache file. The file
+does not need to exist before the first run. For a simple per-user temporary
+cache path, set it to `1`, `true`, `on`, or `yes`.
+
+Automatic path:
+
+```
+export DEME_PERSISTENT_JITIFY_CACHE=1
+```
+
+Explicit path:
+
+```
+export DEME_PERSISTENT_JITIFY_CACHE="$HOME/.cache/deme/jitify_header_cache.bin"
+mkdir -p "$(dirname "$DEME_PERSISTENT_JITIFY_CACHE")"
+```
+
+The persistent cache follows the CUDA/toolchain setup, not the simulation
+setup. Changing material properties, geometry, particle counts, or timesteps
+does not invalidate it. If CUDA versions, include paths, or compiler options
+change, _DEME_ ignores the old cache file and fills it again during that run.
+Unset `DEME_PERSISTENT_JITIFY_CACHE`, or set it to `0`, `false`, `off`, or
+`no`, to use the default Jitify behavior. With the automatic path on Linux/WSL,
+_DEME_ uses `/tmp/deme_jitify_header_cache_$USER.bin`.
+
+Pros:
+- Can significantly reduce startup time for workflows that launch many fresh
+ _DEME_ processes with the same CUDA/toolchain configuration.
+- Keeps the default behavior unchanged unless the environment variable is set.
+- Safely falls back to rediscovering headers when the CUDA/toolchain setup
+ changes.
+
+Cons:
+- The first run still has to discover and save the header sources.
+- Users must choose a writable cache location and manage it like other local
+ build/runtime caches.
+- If CUDA headers or include paths change, the old file may no longer be useful;
+ deleting the cache file forces a clean rebuild.
+
Compilation
You can also build C++ _DEME_ from source. It allows for potentially more performance and more tailoring.
diff --git a/src/DEM/kT.cpp b/src/DEM/kT.cpp
index c56815df..ad0481f3 100644
--- a/src/DEM/kT.cpp
+++ b/src/DEM/kT.cpp
@@ -889,16 +889,17 @@ void DEMKinematicThread::jitifyKernels(const std::unordered_map(std::move(JitHelper::buildProgram(
"DEMBinTriangleKernels", JitHelper::KERNEL_DIR / "DEMBinTriangleKernels.cu", Subs, JitifyOptions)));
- }
- // Then sphere--triangle contact detection-related kernels
- {
sphTri_contact_kernels = std::make_shared(std::move(JitHelper::buildProgram(
"DEMContactKernels_SphereTriangle", JitHelper::KERNEL_DIR / "DEMContactKernels_SphereTriangle.cu", Subs,
JitifyOptions)));
+ } else {
+ bin_triangle_kernels.reset();
+ sphTri_contact_kernels.reset();
}
// Then contact history mapping kernels
{
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index c01c748e..10999e39 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -14,9 +14,14 @@ configure_file(
)
-message(STATUS "${core_message} Extracting NVIDIA Jitify header...")
+set(DEMEJitifyHeader "${ProjectIncludeSource}/jitify/jitify.hpp")
+if(NOT EXISTS "${DEMEJitifyHeader}")
+ set(DEMEJitifyHeader "${NVIDIAJitifyPath}/jitify.hpp")
+endif()
+
+message(STATUS "${core_message} Extracting Jitify header...")
configure_file(
- ${NVIDIAJitifyPath}/jitify.hpp
+ ${DEMEJitifyHeader}
${CMAKE_BINARY_DIR}/src/jitify/jitify.hpp
COPYONLY
)
@@ -101,7 +106,7 @@ install(
# Install Third-party Headers
install(
FILES
- "${NVIDIAJitifyPath}/jitify.hpp"
+ "${DEMEJitifyHeader}"
DESTINATION
${CMAKE_INSTALL_INCLUDEDIR}/jitify
)
@@ -201,4 +206,3 @@ set_target_properties(
# Make sure this target is generated **AFTER** the build-tree version
add_dependencies(DEMERuntimeDataHelper_install DEMERuntimeDataHelper)
-
diff --git a/src/jitify/jitify.hpp b/src/jitify/jitify.hpp
new file mode 100644
index 00000000..a0744fc1
--- /dev/null
+++ b/src/jitify/jitify.hpp
@@ -0,0 +1,4839 @@
+/*
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of NVIDIA CORPORATION nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ -----------
+ Jitify 0.9
+ -----------
+ A C++ library for easy integration of CUDA runtime compilation into
+ existing codes.
+
+ --------------
+ How to compile
+ --------------
+ Compiler dependencies: , -std=c++11
+ Linker dependencies: dl cuda nvrtc
+
+ --------------------------------------
+ Embedding source files into executable
+ --------------------------------------
+ g++ ... -ldl -rdynamic -DJITIFY_ENABLE_EMBEDDED_FILES=1
+ -Wl,-b,binary,my_kernel.cu,include/my_header.cuh,-b,default nvcc ... -ldl
+ -Xcompiler "-rdynamic
+ -Wl\,-b\,binary\,my_kernel.cu\,include/my_header.cuh\,-b\,default"
+ JITIFY_INCLUDE_EMBEDDED_FILE(my_kernel_cu);
+ JITIFY_INCLUDE_EMBEDDED_FILE(include_my_header_cuh);
+
+ ----
+ TODO
+ ----
+ Extract valid compile options and pass the rest to cuModuleLoadDataEx
+ See if can have stringified headers automatically looked-up
+ by having stringify add them to a (static) global map.
+ The global map can be updated by creating a static class instance
+ whose constructor performs the registration.
+ Can then remove all headers from JitCache constructor in example code
+ See other TODOs in code
+*/
+
+/*! \file jitify.hpp
+ * \brief The Jitify library header
+ */
+
+/*! \mainpage Jitify - A C++ library that simplifies the use of NVRTC
+ * \p Use class jitify::JitCache to manage and launch JIT-compiled CUDA
+ * kernels.
+ *
+ * \p Use namespace jitify::reflection to reflect types and values into
+ * code-strings.
+ *
+ * \p Use JITIFY_INCLUDE_EMBEDDED_FILE() to declare files that have been
+ * embedded into the executable using the GCC linker.
+ *
+ * \p Use jitify::parallel_for and JITIFY_LAMBDA() to generate and launch
+ * simple kernels.
+ */
+
+#pragma once
+
+#ifndef JITIFY_THREAD_SAFE
+#define JITIFY_THREAD_SAFE 1
+#endif
+
+#if JITIFY_ENABLE_EMBEDDED_FILES
+#include
+#endif
+#include
+#include
+#include
+#include
+#include // For strtok_r etc.
+#include
+#include
+#include
+#include
+#include