15#ifndef OPENJIJ_SYSTEM_GPU_CHIMERA_CUDA_INDEX_HPP__
16#define OPENJIJ_SYSTEM_GPU_CHIMERA_CUDA_INDEX_HPP__
23#include <cuda_runtime.h>
39 constexpr static std::size_t chimera_unitsize = 8;
43namespace chimera_cuda {
56__host__ __device__ __forceinline__ std::uint64_t
57glIdx_x(ChimeraInfo info, std::uint64_t r, std::uint64_t c, std::uint64_t i,
59 assert(r < info.rows);
60 assert(c < info.cols);
61 assert(i < info.chimera_unitsize);
62 assert(t < info.trotters);
63 return info.chimera_unitsize * c + i;
77__host__ __device__ __forceinline__ std::uint64_t
78glIdx_y(ChimeraInfo info, std::uint64_t r, std::uint64_t c, std::uint64_t i,
80 assert(r < info.rows);
81 assert(c < info.cols);
82 assert(i < info.chimera_unitsize);
83 assert(t < info.trotters);
98__host__ __device__ __forceinline__ std::uint64_t
99glIdx_z(ChimeraInfo info, std::uint64_t r, std::uint64_t c, std::uint64_t i,
101 assert(r < info.rows);
102 assert(c < info.cols);
103 assert(i < info.chimera_unitsize);
104 assert(t < info.trotters);
119__host__ __device__ __forceinline__ std::uint64_t
120glIdx(ChimeraInfo info, std::uint64_t r, std::uint64_t c, std::uint64_t i,
122 return (info.chimera_unitsize * info.cols * info.rows) *
123 glIdx_z(info, r, c, i, t) +
124 (info.chimera_unitsize * info.cols) * glIdx_y(info, r, c, i, t) +
125 glIdx_x(info, r, c, i, t);
139template <std::
size_t block_row, std::
size_t block_col, std::
size_t block_trot>
140__host__ __device__ __forceinline__ std::uint64_t
141bkIdx(ChimeraInfo info, std::uint64_t b_r, std::uint64_t b_c, std::uint64_t i,
143 return (info.chimera_unitsize * block_col * block_row) * b_t +
144 (info.chimera_unitsize * block_col) * b_r +
145 (info.chimera_unitsize) * b_c + i;
159template <std::
size_t block_row, std::
size_t block_col, std::
size_t block_trot>
160__host__ __device__ __forceinline__ std::uint64_t
161bkIdx_ext(ChimeraInfo info, std::int64_t b_r, std::int64_t b_c, std::int64_t i,
163 return bkIdx<block_row + 2, block_col + 2, block_trot + 2>(
164 info, b_r + 1, b_c + 1, i, b_t + 1);
177__host__ __device__ __forceinline__ std::uint64_t
178glIdx(ChimeraInfo info, std::uint64_t r, std::uint64_t c, std::uint64_t i) {
179 return glIdx(info, r, c, i, 0);
192__host__ __device__ __forceinline__ std::uint64_t
193idx_i(ChimeraInfo info, std::uint64_t x, std::uint64_t y, std::uint64_t z) {
194 return x % info.chimera_unitsize;
207__host__ __device__ __forceinline__ std::uint64_t
208idx_c(ChimeraInfo info, std::uint64_t x, std::uint64_t y, std::uint64_t z) {
209 return x / info.chimera_unitsize;
222__host__ __device__ __forceinline__ std::uint64_t
223idx_r(ChimeraInfo info, std::uint64_t x, std::uint64_t y, std::uint64_t z) {
237__host__ __device__ __forceinline__ std::uint64_t
238idx_t(ChimeraInfo info, std::uint64_t x, std::uint64_t y, std::uint64_t z) {
Definition algorithm.hpp:24