Skip to content

Commit 74155b0

Browse files
committed
softmax
1 parent 37257e4 commit 74155b0

File tree

5 files changed

+166
-13
lines changed

5 files changed

+166
-13
lines changed

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ plugins {
33
}
44

55
group 'com.github.TannerLow'
6-
version '0.3'
6+
version '0.4'
77
description 'Matrix Library with GPU compatibility.'
88

99
repositories {

src/main/java/com/github/TannerLow/JavaMatrixMath/Matrix.java

Lines changed: 83 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,38 @@ public Matrix relu() {
9191
return result;
9292
}
9393

94+
public Matrix softmax() {
95+
Matrix result = new Matrix(rows, cols);
96+
97+
float[] buffer = new float[rows];
98+
for(int row = 0; row < rows; row++) {
99+
int offset = row * cols;
100+
101+
// calculate the max values
102+
buffer[row] = -Float.MAX_VALUE;
103+
for(int i = 0; i < cols; i++) {
104+
float value = data[offset + i];
105+
if(value > buffer[row]) {
106+
buffer[row] = value;
107+
}
108+
}
109+
110+
// calculate the sums
111+
float sum = 0;
112+
float max = buffer[row];
113+
for(int i = 0; i < cols; i++) {
114+
sum += Math.exp(data[offset + i] - max);
115+
}
116+
117+
// calculate the softmax vectors
118+
for(int i = 0; i < cols; i++) {
119+
result.data[offset + i] = (float) (Math.exp(data[offset + i] - max) / sum);
120+
}
121+
}
122+
123+
return result;
124+
}
125+
94126
public static boolean isCompatibleWithGPU(GPU gpu) {
95127
return gpu.isInitialized() &&
96128
gpu.getKernel("Matrices::matrixMultiply") != null &&
@@ -222,21 +254,21 @@ public Matrix relu(GPU gpu) {
222254

223255
Matrix result = new Matrix(rows, cols);
224256

225-
Pointer pointerA = Pointer.to(data);
257+
Pointer pointerIn = Pointer.to(data);
226258
Pointer pointerOut = Pointer.to(result.data);
227259

228260
// Allocate the memory objects for the input- and output data
229-
cl_mem memoryA = clCreateBuffer(context,
261+
cl_mem memoryIn = clCreateBuffer(context,
230262
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
231-
Sizeof.cl_float * data.length, pointerA, null);
263+
Sizeof.cl_float * data.length, pointerIn, null);
232264
cl_mem memoryOut = clCreateBuffer(context,
233265
CL_MEM_READ_WRITE,
234266
Sizeof.cl_float * result.data.length, null, null);
235267

236268
// Set the arguments for the kernel
237269
int argNum = 0;
238270
clSetKernelArg(kernel, argNum++, Sizeof.cl_mem, Pointer.to(memoryOut));
239-
clSetKernelArg(kernel, argNum++, Sizeof.cl_mem, Pointer.to(memoryA));
271+
clSetKernelArg(kernel, argNum++, Sizeof.cl_mem, Pointer.to(memoryIn));
240272
clSetKernelArg(kernel, argNum++, Sizeof.cl_uint, Pointer.to(new int[]{cols}));
241273

242274
// Set the work-item dimensions
@@ -251,7 +283,53 @@ public Matrix relu(GPU gpu) {
251283
clEnqueueReadBuffer(commandQueue, memoryOut, CL_TRUE, 0,
252284
result.data.length * Sizeof.cl_float, pointerOut, 0, null, null);
253285

254-
clReleaseMemObject(memoryA);
286+
clReleaseMemObject(memoryIn);
287+
clReleaseMemObject(memoryOut);
288+
289+
return result;
290+
}
291+
292+
public Matrix softmax(GPU gpu) {
293+
cl_context context = gpu.getContext();
294+
cl_command_queue commandQueue = gpu.getCommandQueue();
295+
cl_kernel kernel = gpu.getKernel("Matrices::softmax");
296+
297+
if(kernel == null) {
298+
return null;
299+
}
300+
301+
Matrix result = new Matrix(rows, cols);
302+
303+
Pointer pointerIn = Pointer.to(data);
304+
Pointer pointerOut = Pointer.to(result.data);
305+
306+
// Allocate the memory objects for the input- and output data
307+
cl_mem memoryIn = clCreateBuffer(context,
308+
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
309+
Sizeof.cl_float * data.length, pointerIn, null);
310+
cl_mem memoryOut = clCreateBuffer(context,
311+
CL_MEM_READ_WRITE,
312+
Sizeof.cl_float * result.data.length, null, null);
313+
314+
// Set the arguments for the kernel
315+
int argNum = 0;
316+
clSetKernelArg(kernel, argNum++, Sizeof.cl_mem, Pointer.to(memoryOut));
317+
clSetKernelArg(kernel, argNum++, Sizeof.cl_mem, Pointer.to(memoryIn));
318+
clSetKernelArg(kernel, argNum++, Sizeof.cl_uint, Pointer.to(new int[]{cols}));
319+
320+
// Set the work-item dimensions
321+
long local_work_sizes[] = new long[]{1};
322+
long global_work_sizes[] = new long[]{rows};
323+
324+
// Execute the kernel
325+
clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
326+
global_work_sizes, local_work_sizes, 0, null, null);
327+
328+
// Read the output data
329+
clEnqueueReadBuffer(commandQueue, memoryOut, CL_TRUE, 0,
330+
result.data.length * Sizeof.cl_float, pointerOut, 0, null, null);
331+
332+
clReleaseMemObject(memoryIn);
255333
clReleaseMemObject(memoryOut);
256334

257335
return result;

src/main/resources/kernels/Matrices.cl

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@ addRowToRows(__global float* C,
6262
}
6363
}
6464

65-
// Add row to rows: C = ReLu(A).
65+
// Add row to rows: output = ReLu(A).
6666
__kernel void
67-
relu(__global float* C,
68-
__global float* A,
67+
relu(__global float* output,
68+
__global float* input,
6969
const int rowSize)
7070
{
7171
int globalRow = get_global_id(0);
@@ -75,13 +75,46 @@ relu(__global float* C,
7575
{
7676
int index = globalRow * rowSize + i;
7777

78-
// C[i] = max(C[i], 0)
79-
float value = A[index];
78+
// output[i] = max(output[i], 0)
79+
float value = input[index];
8080
float newValue = 0;
8181
if(value > 0) {
8282
newValue = value;
8383
}
8484

85-
C[index] = newValue;
85+
output[index] = newValue;
8686
}
87-
}
87+
}
88+
89+
// Add row to rows: C = exp(A[i]) for all rows i.
90+
__kernel void softmax(__global float* output,
91+
__global float* input,
92+
const int rowSize)
93+
{
94+
int globalRow = get_global_id(0);
95+
96+
int offset = globalRow * rowSize;
97+
98+
// get the max value of the row
99+
float max = -3.4028235E37f;
100+
float value;
101+
for (int i = 0; i < rowSize; i++) {
102+
value = input[offset + i];
103+
if(value > max) {
104+
max = value;
105+
}
106+
}
107+
108+
// Calculate sum of exponentials of input elements
109+
float sum = 0.0f;
110+
for (int i = 0; i < rowSize; i++) {
111+
sum += exp(input[offset + i] - max);
112+
}
113+
114+
// Calculate softmax for each element
115+
int index;
116+
for (int i = 0; i < rowSize; i++) {
117+
index = offset + i;
118+
output[index] = exp(input[index] - max) / sum;
119+
}
120+
}

src/test/java/com/github/TannerLow/JavaMatrixMath/CpuTest.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ public static void testAll() {
88
testMultiply();
99
testAddRowToRows();
1010
testRelu();
11+
testSoftmax();
1112
}
1213

1314
private static void testMultiply() {
@@ -70,4 +71,23 @@ private static void testRelu() {
7071
}
7172
}
7273
}
74+
75+
private static void testSoftmax() {
76+
float[] data = {1.1f,2.2f,0.2f,-1.7f};
77+
float[] expected = {0.223636f,0.671841f,0.090923f,0.013599f};
78+
79+
Matrix m = new Matrix(1, 4, data);
80+
81+
Matrix result = m.softmax();
82+
83+
if(result.rows != m.rows || result.cols != m.cols) {
84+
throw new TestFailedException();
85+
}
86+
87+
for(int i = 0; i < result.data.length; i++) {
88+
if(!TestMath.withinMariginOfError(expected[i], result.data[i], 0.0005f)) {
89+
throw new TestFailedException();
90+
}
91+
}
92+
}
7393
}

src/test/java/com/github/TannerLow/JavaMatrixMath/GpuTest.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ public static void testAll() throws IOException {
1818
testMultiply();
1919
testAddRowToRows();
2020
testRelu();
21+
testSoftmax();
2122
}
2223
}
2324

@@ -34,6 +35,7 @@ private static void setup() throws IOException {
3435
gpu.loadKernel(programId, "Matrices", "matrixMultiply");
3536
gpu.loadKernel(programId, "Matrices", "addRowToRows");
3637
gpu.loadKernel(programId, "Matrices", "relu");
38+
gpu.loadKernel(programId, "Matrices", "softmax");
3739

3840
if(!gpu.isInitialized()) {
3941
throw new IllegalStateException("GPU in unexpected state.");
@@ -101,6 +103,26 @@ private static void testRelu() {
101103
}
102104
}
103105

106+
private static void testSoftmax() {
107+
float[] data = {1.1f,2.2f,0.2f,-1.7f};
108+
float[] expected = {0.223636f,0.671841f,0.090923f,0.013599f};
109+
110+
Matrix m = new Matrix(1, 4, data);
111+
112+
Matrix result = m.softmax(gpu);
113+
114+
if(result.rows != m.rows || result.cols != m.cols) {
115+
throw new TestFailedException();
116+
}
117+
118+
for(int i = 0; i < result.data.length; i++) {
119+
if(!TestMath.withinMariginOfError(expected[i], result.data[i], 0.0005f)) {
120+
System.out.println(expected[i] + " vs. " + result.data[i]);
121+
throw new TestFailedException();
122+
}
123+
}
124+
}
125+
104126
private static String readFromInternalFile(String filepath) {
105127
try(InputStream fileInputStream = InternalFile.getInstance().getFileInputStream(filepath)) {
106128
byte[] bytes = fileInputStream.readAllBytes();

0 commit comments

Comments
 (0)