#ifndef ALGOIM_SPARKSTACK_HPP #define ALGOIM_SPARKSTACK_HPP // algoim::SparkStack implements a fast, thread-safe, stack-based allocator, // similar in function to alloca() but with additional guarantees regarding // portability, alignment, and type consistency. #include #include "uvector.hpp" namespace algoim { template class xarray; template class SparkStack { static constexpr size_t capacity = 1u << 23; static constexpr int capacity_line = __LINE__ - 1; template static size_t alloc(T** ptr, size_t len, R... rest) { if (pos() + len > capacity) { std::cerr << "SparkStack: capacity=" << capacity << " and pos=" << pos() << " insufficient for request len=" << len << '\n'; std::cerr << " consider increasing const 'capacity', defined on line " << capacity_line << " in file " << __FILE__ << '\n'; throw std::bad_alloc(); } // std::cout << "Before alloc, the **ptr pointing to nullptr is not nullptr: " << ptr << std::endl; *ptr = base() + pos(); // std::cout << "pos before += len:" << pos() << std::endl; pos() += len; // std::cout << "pos after += len:" << pos() << std::endl; // std::cout << "base: " << base() << std::endl; // std::cout << "ptr: " << ptr << std::endl; // std::cout << "*ptr: " << *ptr << std::endl; // std::cout << "==========" << std::endl; if constexpr (sizeof...(rest) == 0) return len; else return len + alloc(rest...); } static T* base() { static thread_local std::vector buff(capacity); return buff.data(); } static ptrdiff_t& pos() { static thread_local ptrdiff_t pos_ = 0; return pos_; }; size_t len_; SparkStack(const SparkStack&) = delete; SparkStack(SparkStack&&) = delete; SparkStack& operator=(const SparkStack&) = delete; SparkStack& operator=(SparkStack&&) = delete; public: // With parameters x0, n0, x1, n1, x2, n2, ..., allocate n0 elements and assign to x0, etc. template explicit SparkStack(T** ptr, size_t len, R&&... rest) { len_ = alloc(ptr, len, rest...); } // With parameters value, x0, n0, x1, n1, x2, n2, ..., allocate n0 elements and assign to x0, ..., // and assign the given value to all n0*n1*n2*... values allocated template explicit SparkStack(T value, T** ptr, size_t len, R&&... rest) { T* start = base() + pos(); len_ = alloc(ptr, len, rest...); for (int i = 0; i < len_; ++i) *(start + i) = value; } // For each i, allocate ext(i) elements and assign to ptr(i) template explicit SparkStack(uvector& ptr, const uvector& ext) { len_ = 0; for (int i = 0; i < N; ++i) len_ += alloc(&ptr(i), ext(i)); } // Allocate enough elements for one or more xarray's having pre-set extent template explicit SparkStack(xarray&... a) { len_ = (alloc(&a.data_, a.size()) + ...); } template explicit SparkStack(std::vector>& as) { len_ = 0; for (auto& a : as) len_ += alloc(&a.data_, a.size()); } // Release memory when the SparkStack object goes out of scope ~SparkStack() { pos() -= len_; // std::cout << "Here!" << std::endl; } }; #define algoim_CONCAT2(x, y) x##y #define algoim_CONCAT(x, y) algoim_CONCAT2(x, y) #define algoim_spark_alloc(T, ...) SparkStack algoim_CONCAT(spark_alloc_var_, __LINE__)(__VA_ARGS__) #define algoim_spark_alloc_def(T, val, ...) SparkStack algoim_CONCAT(spark_alloc_var_, __LINE__)(val, __VA_ARGS__) #define algoim_spark_alloc_vec(T, ptr, ext) SparkStack algoim_CONCAT(spark_alloc_var_, __LINE__)(ptr, ext) #define algoim_spark_alloc_heap(T, ...) new SparkStack(__VA_ARGS__) #define algoim_spark_release_heap(SparkStackPtr) delete SparkStackPtr void algoimSparkAllocHeapVector(std::vector*>& sparkStackPtrs, std::vector>& tensors) { for (int i = 0; i < tensors.size(); ++i) { sparkStackPtrs.push_back(algoim_spark_alloc_heap(real, tensors[i])); } } } // namespace algoim #endif