Directory: | src/ |
---|---|
File: | src/xt_gpu.c |
Date: | 2024-11-08 09:02:52 |
Exec | Total | Coverage | |
---|---|---|---|
Lines: | 18 | 30 | 60.0% |
Branches: | 2 | 8 | 25.0% |
Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * @file xt_gpu.c | ||
3 | * | ||
4 | * @copyright Copyright (C) 2022 Jörg Behrens <behrens@dkrz.de> | ||
5 | * Moritz Hanke <hanke@dkrz.de> | ||
6 | * Thomas Jahns <jahns@dkrz.de> | ||
7 | * | ||
8 | * @author Jörg Behrens <behrens@dkrz.de> | ||
9 | * Moritz Hanke <hanke@dkrz.de> | ||
10 | * Thomas Jahns <jahns@dkrz.de> | ||
11 | */ | ||
12 | /* | ||
13 | * Keywords: | ||
14 | * Maintainer: Jörg Behrens <behrens@dkrz.de> | ||
15 | * Moritz Hanke <hanke@dkrz.de> | ||
16 | * Thomas Jahns <jahns@dkrz.de> | ||
17 | * URL: https://dkrz-sw.gitlab-pages.dkrz.de/yaxt/ | ||
18 | * | ||
19 | * Redistribution and use in source and binary forms, with or without | ||
20 | * modification, are permitted provided that the following conditions are | ||
21 | * met: | ||
22 | * | ||
23 | * Redistributions of source code must retain the above copyright notice, | ||
24 | * this list of conditions and the following disclaimer. | ||
25 | * | ||
26 | * Redistributions in binary form must reproduce the above copyright | ||
27 | * notice, this list of conditions and the following disclaimer in the | ||
28 | * documentation and/or other materials provided with the distribution. | ||
29 | * | ||
30 | * Neither the name of the DKRZ GmbH nor the names of its contributors | ||
31 | * may be used to endorse or promote products derived from this software | ||
32 | * without specific prior written permission. | ||
33 | * | ||
34 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS | ||
35 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | ||
36 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | ||
37 | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | ||
38 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
39 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
40 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
41 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
42 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
43 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
44 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
45 | */ | ||
46 | #ifdef HAVE_CONFIG_H | ||
47 | #include "config.h" | ||
48 | #endif | ||
49 | |||
50 | #include <string.h> | ||
51 | |||
52 | #include "core/core.h" | ||
53 | #include "core/ppm_xfuncs.h" | ||
54 | #include "xt_gpu.h" | ||
55 | |||
56 | |||
57 | static const char filename[] = "xt_gpu.c"; | ||
58 | |||
59 | #ifdef HAVE_CUDA | ||
60 | # include "xt_cuda.h" | ||
61 | #endif | ||
62 | #ifdef _OPENACC | ||
63 | # include "openacc.h" | ||
64 | #endif | ||
65 | |||
66 | static void * default_malloc(size_t alloc_size, enum xt_memtype memtype); | ||
67 | static void default_free(void * ptr, enum xt_memtype memtype); | ||
68 | static void default_memcpy( | ||
69 | void * dst, void const * src, size_t buffer_size, | ||
70 | enum xt_memtype dst_memtype, enum xt_memtype src_memtype); | ||
71 | static enum xt_memtype default_get_memtype(const void *ptr); | ||
72 | static int default_instr_push(char const *); | ||
73 | static int default_instr_pop(void); | ||
74 | |||
75 | static struct xt_gpu_vtable vtable = { | ||
76 | .Malloc = default_malloc, | ||
77 | .Free = default_free, | ||
78 | .Memcpy = default_memcpy, | ||
79 | .Get_memtype = default_get_memtype, | ||
80 | .Instr_push = default_instr_push, | ||
81 | .Instr_pop = default_instr_pop, | ||
82 | }; | ||
83 | |||
84 | #ifdef HAVE_CUDA | ||
85 | static const struct xt_gpu_vtable *cuda_vtable; | ||
86 | #endif | ||
87 | |||
88 | #ifdef _OPENACC | ||
89 | /* better move to its own file? */ | ||
90 | static void *xt_acc_malloc( | ||
91 | size_t alloc_size, enum xt_memtype memtype) { | ||
92 | |||
93 | switch (memtype) { | ||
94 | default: | ||
95 | Xt_abort( | ||
96 | Xt_default_comm, | ||
97 | "ERROR(xt_cuda_malloc): unsupported memory type", | ||
98 | filename, __LINE__); | ||
99 | return NULL; | ||
100 | case (XT_MEMTYPE_HOST): | ||
101 | return xmalloc(alloc_size); | ||
102 | case (XT_MEMTYPE_DEVICE): | ||
103 | return acc_malloc(alloc_size + (alloc_size == 0)); | ||
104 | } | ||
105 | } | ||
106 | |||
107 | static void xt_acc_free(void *ptr, enum xt_memtype memtype) { | ||
108 | |||
109 | switch (memtype) { | ||
110 | default: | ||
111 | Xt_abort( | ||
112 | Xt_default_comm, | ||
113 | "ERROR(xt_cuda_free): unsupported memory type", | ||
114 | filename, __LINE__); | ||
115 | break; | ||
116 | case (XT_MEMTYPE_HOST): | ||
117 | free(ptr); | ||
118 | break; | ||
119 | case (XT_MEMTYPE_DEVICE): | ||
120 | acc_free(ptr); | ||
121 | break; | ||
122 | } | ||
123 | } | ||
124 | #endif | ||
125 | |||
126 | 340 | void xt_gpu_init(void) { | |
127 | |||
128 | #ifdef HAVE_CUDA | ||
129 | cuda_vtable = xt_cuda_init(); | ||
130 | if (cuda_vtable != NULL) vtable = *cuda_vtable; | ||
131 | #endif | ||
132 | #ifdef _OPENACC | ||
133 | /* better make this configurable? */ | ||
134 | vtable.Malloc = xt_acc_malloc; | ||
135 | vtable.Free = xt_acc_free; | ||
136 | #endif | ||
137 | 340 | } | |
138 | |||
139 | #define STR(s) #s | ||
140 | #define CHECK_FOR_HOST_MEM(type) \ | ||
141 | do { \ | ||
142 | if (type != XT_MEMTYPE_HOST) { \ | ||
143 | Xt_abort( \ | ||
144 | Xt_default_comm, \ | ||
145 | "ERROR(" STR(__func__) "): unsupported memory type", \ | ||
146 | filename, __LINE__); \ | ||
147 | } \ | ||
148 | } while (0) | ||
149 | |||
150 | 6393 | static void * default_malloc(size_t alloc_size, enum xt_memtype memtype) { | |
151 | |||
152 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6393 times.
|
6393 | CHECK_FOR_HOST_MEM(memtype); |
153 | |||
154 | 6393 | return xmalloc(alloc_size); | |
155 | } | ||
156 | |||
157 | 6386 | static void default_free(void * ptr, enum xt_memtype memtype) { | |
158 | |||
159 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6386 times.
|
6386 | CHECK_FOR_HOST_MEM(memtype); |
160 | |||
161 | 6386 | free(ptr); | |
162 | 6386 | } | |
163 | |||
164 | ✗ | static void default_memcpy( | |
165 | void * dst, void const * src, size_t buffer_size, | ||
166 | enum xt_memtype dst_memtype, enum xt_memtype src_memtype) { | ||
167 | |||
168 | ✗ | CHECK_FOR_HOST_MEM(src_memtype); | |
169 | ✗ | CHECK_FOR_HOST_MEM(dst_memtype); | |
170 | |||
171 | ✗ | memcpy(dst, src, buffer_size); | |
172 | } | ||
173 | |||
174 | 2124 | static enum xt_memtype default_get_memtype(const void *ptr) | |
175 | { | ||
176 | (void)ptr; | ||
177 | 2124 | return XT_MEMTYPE_HOST; | |
178 | } | ||
179 | |||
180 | ✗ | static int default_instr_push(char const * XT_UNUSED(name)) {return 0;} | |
181 | ✗ | static int default_instr_pop(void) {return 0;} | |
182 | |||
183 | 6393 | void * xt_gpu_malloc(size_t alloc_size, enum xt_memtype memtype) { | |
184 | 6393 | return vtable.Malloc(alloc_size, memtype); | |
185 | } | ||
186 | |||
187 | 6386 | void xt_gpu_free(void * ptr, enum xt_memtype memtype) { | |
188 | 6386 | vtable.Free(ptr, memtype); | |
189 | 6386 | } | |
190 | |||
191 | ✗ | void xt_gpu_memcpy( | |
192 | void * dst, void const * src, size_t buffer_size, | ||
193 | enum xt_memtype dst_memtype, enum xt_memtype src_memtype) { | ||
194 | ✗ | vtable.Memcpy(dst, src, buffer_size, dst_memtype, src_memtype); | |
195 | } | ||
196 | |||
197 | 2124 | enum xt_memtype xt_gpu_get_memtype(const void *ptr) { | |
198 | 2124 | return vtable.Get_memtype(ptr); | |
199 | } | ||
200 | |||
201 | ✗ | int xt_gpu_instr_push(char const * name) { | |
202 | ✗ | return vtable.Instr_push(name); | |
203 | } | ||
204 | |||
205 | ✗ | int xt_gpu_instr_pop(void) { | |
206 | ✗ | return vtable.Instr_pop(); | |
207 | } | ||
208 | |||
209 | /* | ||
210 | * Local Variables: | ||
211 | * c-basic-offset: 2 | ||
212 | * coding: utf-8 | ||
213 | * indent-tabs-mode: nil | ||
214 | * show-trailing-whitespace: t | ||
215 | * require-trailing-newline: t | ||
216 | * End: | ||
217 | */ | ||
218 |