- Notifications
You must be signed in to change notification settings - Fork 75
/
Copy pathkernel.lua
63 lines (53 loc) · 1.45 KB
/
kernel.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
print("lua locked and loaded")
functionprintf(...)
io.write(string.format(...))
end
type_sizes= {
char=1,
short=2,
int=4,
long=8,
float=4,
double=8
}
functionis_float(type)
return"float" ==typeor"double" ==type
end
functionkernel_params(key)
-- This function has your kernel's special sauce. It runs each time the
-- kernel function template is instantiated, and key has fields
-- int sm
-- string type
-- describing the template parameters.
-- This file is not distributed with the resulting executable. It and the
-- Lua interpreter are used only at compile-time. However, the luacir.hxx
-- Circle/Lua bindings work at both compile-time (inside the interpreter)
-- and runtime.
printf(" **Lua gets key { %d, %s }\n", key.sm, key.type)
params= { }
params.flags= { }
ifis_float(key.type) andkey.sm>52then
params.flags[1] ="fast_math"
end
ifkey.type=="short" then
params.bytes_per_lane=8
ifkey.sm<50then
params.lanes_per_thread=2
else
params.lanes_per_thread=4
end
elseifkey.type=="float" then
params.bytes_per_lane=16
ifkey.sm<50then
params.lanes_per_thread=4
else
params.lanes_per_thread=8
end
params.flags[#params.flags+1] ="ftz"
else
params.bytes_per_lane=24
params.lanes_per_thread=32//type_sizes[key.type]
params.flags[#params.flags+1] ="ldg"
end
returnparams
end