typedef struct sparse_matrix sparse_matrix_t;
static struct timeval tv0;
gettimeofday ( &tv, NULL );
if ( tv0.tv_usec > tv.tv_usec )
tv.tv_usec = 1000000 - (tv0.tv_usec - tv.tv_usec);
tv.tv_usec -= tv0.tv_usec;
return ((double) tv.tv_sec + 1.0e-6 * tv.tv_usec);
( sparse_matrix_t* matrix,
memset ( matrix, 0x0, sizeof(*matrix) );
matrix->offset = (int*) malloc ( (nrows + 1) * sizeof(*matrix->offset) );
matrix->index = (int*) malloc ( nnz * sizeof(*matrix->index) );
matrix->value = (double*) malloc ( nnz * sizeof(*matrix->value) );
if ( (matrix->offset == NULL) ||
(matrix->index == NULL) ||
(matrix->value == NULL) )
"ERROR: failed to allocate a sparse matrix.\n" );
memset ( matrix->offset, 0x0,
(nrows + 1) * sizeof(*matrix->offset) );
void sparse_matrix_destroy
( sparse_matrix_t* matrix )
memset ( matrix, 0x0, sizeof(*matrix) );
( sparse_matrix_t* matrix,
FILE* file = fopen ( fname, "r" );
fprintf ( stderr, "ERROR: failed to open the file `%s\'.\n",
if ( fscanf( file, " %d %d %d", &nrows, &ncols, &nnz ) != 3 )
if ( (nrows < 0) || (ncols < 0) || (nnz < 0) )
sparse_matrix_init ( matrix, nrows, ncols, nnz );
for ( i = 0; i < nnz; i++ )
if ( fscanf( file, " %d %d %le", &irow, &jcol, &val ) != 3 )
if ( (irow < 1) || (irow > nrows) ||
(jcol < 1) || (jcol > ncols) )
matrix->offset[irow - 1]++;
for ( irow = n = 0; irow <= nrows; irow++ )
k = matrix->offset[irow];
matrix->offset[irow] = n;
file = fopen ( fname, "r" );
fprintf ( stderr, "ERROR: failed to open the file `%s\'.\n",
if ( fscanf( file, " %d %d %d", &nrows, &ncols, &nnz ) != 3 )
if ( (nrows != matrix->nrows) ||
(ncols != matrix->ncols) ||
(nnz != matrix->offset[nrows]) )
for ( i = 0; i < nnz; i++ )
if ( fscanf( file, " %d %d %le", &irow, &jcol, &val ) != 3 )
if ( (irow < 1) || (irow > nrows) ||
(jcol < 1) || (jcol > ncols) )
k = matrix->offset[irow - 1]++;
matrix->index[k] = jcol - 1;
for ( irow = n = 0; irow < nrows; irow++ )
k = matrix->offset[irow];
matrix->offset[irow] = n;
fprintf ( stderr, "ERROR: failed to read the file `%s\' "
for ( i = 0; i < len; i++ )
const sparse_matrix_t* mat,
const int nrows = mat->nrows;
#pragma omp parallel shared(mat, lhs, rhs)
#pragma omp for schedule(dynamic, chunksize)
for (int irow = 0; irow < nrows; irow++ )
#pragma omp simd reduction(+:sum)
for (int i = mat->offset[irow]; i < mat->offset[irow + 1]; i++ )
sum += mat->value[i] * rhs[mat->index[i]];
double t0_matvec, t0_read, t0_total, t1_matvec, t1_read, t1_total;
const char* fname = "matrix.dat";
printf ( "----- Matrix-vector multiplication: OpenMP -----\n" );
int max_num_threads = omp_get_max_threads();
omp_set_num_threads(max_num_threads);
printf("used number of threads: %d \n", max_num_threads);
printf ( "Reading matrix from `%s\' ...\n\n", fname );
sparse_matrix_read ( &matrix, fname );
printf ( " nrows : %d\n", matrix.nrows );
printf ( " ncols : %d\n", matrix.ncols );
printf ( " nnz : %d\n\n", matrix.offset[matrix.nrows] );
printf ( "Initialising the first vector ...\n" );
lhs = (double*) malloc ( matrix.nrows * sizeof(*lhs) );
rhs = (double*) malloc ( matrix.ncols * sizeof(*rhs) );
if ( (lhs == NULL) || (rhs == NULL) )
fprintf ( stderr, "ERROR: failed to allocate the vectors.\n" );
vector_init(rhs,matrix.ncols);
t1_read = wtime() - t0_read;
printf ( "Executing %d matrix-vector products ...\n", niter );
int chunksize = (matrix.nrows / (max_num_threads*10)) +1;
for ( iiter = 0; iiter < niter; iiter++ )
matvec ( lhs, &matrix, rhs, chunksize );
t1_matvec = wtime() - t0_matvec;
sparse_matrix_destroy ( &matrix );
t1_total = wtime() - t0_total;
printf ( "matrix read: \t\t\t done in %g seconds.\n", t1_read );
printf ( "matrix vector multiplication: \t done in %g seconds.\n", t1_matvec );
printf ( "total: \t\t\t\t done in %g seconds.\n", t1_total );