gpu_array_tile.h
1.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#ifndef GPU_ARRAY_TILE_H
#define GPU_ARRAY_TILE_H
#include <isl/aff_type.h>
#include <isl/map_type.h>
#include <isl/val.h>
/* The fields stride and shift only contain valid information
* if shift != NULL.
* If so, they express that current index is such that if you add shift,
* then the result is always a multiple of stride.
* Let D represent the initial tile->depth dimensions of the computed schedule.
* The spaces of "lb" and "shift" are of the form
*
* D -> [b]
*/
struct gpu_array_bound {
isl_val *size;
isl_aff *lb;
isl_val *stride;
isl_aff *shift;
};
/* A tile of an outer array.
*
* requires_unroll is set if the schedule dimensions that are mapped
* to threads need to be unrolled for this (private) tile to be used.
*
* "depth" reflects the number of schedule dimensions that affect the tile.
* The copying into and/or out of the tile is performed at that depth.
*
* n is the dimension of the array.
* bound is an array of size "n" representing the lower bound
* and size for each index.
*
* tiling maps a tile in the global array to the corresponding
* shared/private memory tile and is of the form
*
* { [D[i] -> A[a]] -> T[(a + shift(i))/stride - lb(i)] }
*
* where D represents the initial "depth" dimensions
* of the computed schedule.
*/
struct gpu_array_tile {
isl_ctx *ctx;
int requires_unroll;
int depth;
int n;
struct gpu_array_bound *bound;
isl_multi_aff *tiling;
};
struct gpu_array_tile *gpu_array_tile_create(isl_ctx *ctx, int n_index);
struct gpu_array_tile *gpu_array_tile_free(struct gpu_array_tile *tile);
__isl_give isl_val *gpu_array_tile_size(struct gpu_array_tile *tile);
#endif