newport_j

January 11th, 2010, 08:53 PM

#include <stdio.h>

#include <stdlib.h>

#include <cuda.h>

#include <cuda_runtime.h>

#include <time.h>

// Matrices will be stored in row major order

// M(row, col) = *(M.elements + row * M.stride + col)

typedef struct {

int width;

int height;

int stride;

float *elements;

} Matrix;

// Thread block size

#define BLOCK_SIZE 16

// Get a matrix element

__device__ float GetElement(const Matrix A, int row, int col)

{

return A.elements[row * A.stride + col];

}

// Set a matrix element

__device__ void SetElement(Matrix A, int row, int col, float value)

{

A.elements[row * A.stride + col] = value;

}

// Get the BLOCK_SIZExBLOCK_SIZE sub-matrix Asub of A that is

// located col sub-matrices to the right and row sub-matrix down

// from the upper-left corner of A

__device__ Matrix GetSubMatrix(Matrix A, int row, int col)

{

Matrix Asub;

Asub.width = BLOCK_SIZE;

Asub.height = BLOCK_SIZE;

Asub.stride = A.stride;

Asub.elements = &A.elements[A.stride * BLOCK_SIZE * row + BLOCK_SIZE * col];

return Asub;

}

When I am in subroutine in GDB debugger and I ask it to print out BLOCK_SIZE it says no symbol "BLOCK_SIZE" in current context. Yet, I define it globally at the top of the program. Like :

#define BLOCK_SIZE 16

So BLOCK_SIZE equals 16. Now in the calculations in __device__ Matrix GetSubMatrix(Matrix A, int row, int col)., are all okay. Everything that includes BLOCK_SIZE in the calculation is works out to what it should be if BLOCK_SIZE were 16.. They would be

Asub.width = BLOCK_SIZE;

Asub.height = BLOCK_SIZE;

Asub.stride = A.stride;

Asub.elements = &A.elements[A.stride * BLOCK_SIZE * row + BLOCK_SIZE * col];

So how can it do those calculations if it does no know the BLOCK_SIZE is 16? At least it says it does not know that, but the calculations say otherwise.

Newport_j.

#include <stdlib.h>

#include <cuda.h>

#include <cuda_runtime.h>

#include <time.h>

// Matrices will be stored in row major order

// M(row, col) = *(M.elements + row * M.stride + col)

typedef struct {

int width;

int height;

int stride;

float *elements;

} Matrix;

// Thread block size

#define BLOCK_SIZE 16

// Get a matrix element

__device__ float GetElement(const Matrix A, int row, int col)

{

return A.elements[row * A.stride + col];

}

// Set a matrix element

__device__ void SetElement(Matrix A, int row, int col, float value)

{

A.elements[row * A.stride + col] = value;

}

// Get the BLOCK_SIZExBLOCK_SIZE sub-matrix Asub of A that is

// located col sub-matrices to the right and row sub-matrix down

// from the upper-left corner of A

__device__ Matrix GetSubMatrix(Matrix A, int row, int col)

{

Matrix Asub;

Asub.width = BLOCK_SIZE;

Asub.height = BLOCK_SIZE;

Asub.stride = A.stride;

Asub.elements = &A.elements[A.stride * BLOCK_SIZE * row + BLOCK_SIZE * col];

return Asub;

}

When I am in subroutine in GDB debugger and I ask it to print out BLOCK_SIZE it says no symbol "BLOCK_SIZE" in current context. Yet, I define it globally at the top of the program. Like :

#define BLOCK_SIZE 16

So BLOCK_SIZE equals 16. Now in the calculations in __device__ Matrix GetSubMatrix(Matrix A, int row, int col)., are all okay. Everything that includes BLOCK_SIZE in the calculation is works out to what it should be if BLOCK_SIZE were 16.. They would be

Asub.width = BLOCK_SIZE;

Asub.height = BLOCK_SIZE;

Asub.stride = A.stride;

Asub.elements = &A.elements[A.stride * BLOCK_SIZE * row + BLOCK_SIZE * col];

So how can it do those calculations if it does no know the BLOCK_SIZE is 16? At least it says it does not know that, but the calculations say otherwise.

Newport_j.