Examples - Compiling, linking, and running a simple matrix multiplication ATLAS program

This simple sample achieves a multiplication of two matrices, A and B. A and B have elements randomly generated with values between 0 and 1. The multiplication is achieved in the following ways: The resulting matrices C and D will contain the same elements.
Sample output produced by all executables across all platforms and architectures should look like this:
Matrix A has 3 rows and 6 columns:
0.566 0.974 0.202 0.941 0.294 0.427
0.580 0.539 0.772 0.248 0.832 0.848
0.080 0.533 0.434 0.163 0.576 0.416
Matrix B has 6 rows and 4 columns:
0.309 0.316 0.569 0.182
0.725 0.389 0.472 0.649
0.448 0.368 0.354 0.665
0.994 0.740 0.649 0.616
0.133 0.906 0.447 0.590
0.773 0.774 0.893 0.913
Matrix C has 3 rows and 4 columns:
2.276 1.926 1.978 2.013
1.928 2.270 2.148 2.387
1.165 1.356 1.185 1.469
Matrix D has 3 rows and 4 columns:
2.276 1.926 1.978 2.013
1.928 2.270 2.148 2.387
1.165 1.356 1.185 1.469

Pay attention to the fact that matrices C and D are congruent.

Also note that matrix data is organized or ordered in the Fortran way, namely columns major.

Sample 1

This program contains a C invocation of the Fortran BLAS function dgemm_ provided by the ATLAS framework.

Observation: In this sample, the invocation of dgemm_ has no previously declared prototype, hence the compiler might issue a warning message. Prototypes may be declared by including the atlas_f77 header files, but source files might not have these header files specified (i.e. old source code written prior to ATLAS).
Source code:
 #include <stdio.h>
 #include <time.h>
 #include <stdlib.h>
 
 void init(double* matrix, int row, int column)
 {
   for (int j = 0; j < column; j++){
     for (int i = 0; i < row; i++){
       matrix[j*row + i] = ((double)rand())/RAND_MAX;
     }
   }
 }
 
 void print(const char * name, const double* matrix, int row, int column)
 {
   printf("Matrix %s has %d rows and %d columns:\n", name, row, column);
   for (int i = 0; i < row; i++){
     for (int j = 0; j < column; j++){
       printf("%.3f ", matrix[j*row + i]);
     }
     printf("\n");
   }
   printf("\n");
 }
 
 int main(int argc, char * argv[])
 {
   int rowsA, colsB, common;
   int i,j,k;
 
   if (argc != 4){
     printf("Using defaults\n");
     rowsA = 2; colsB = 4; common = 6;
   }
   else{
     rowsA = atoi(argv[1]); colsB = atoi(argv[2]);common = atoi(argv[3]);
   }
 
   double A[rowsA * common]; double B[common * colsB];
   double C[rowsA * colsB]; double D[rowsA * colsB];
 
   char transA = 'N', transB = 'N';
   double one = 1.0, zero = 0.0;
 
   srand(time(NULL));
 
   init(A, rowsA, common); init(B, common, colsB);
 
   dgemm_(&transA, &transB, &rowsA, &colsB, &common, &one, A, 
          &rowsA, B, &common, &zero, C, &rowsA);
 
   for(i=0;i<colsB;i++){
     for(j=0;j<rowsA;j++){
       D[i*rowsA+j]=0;
       for(k=0;k<common;k++){
         D[i*rowsA+j]+=A[k*rowsA+j]*B[k+common*i];
       }
     }
   }
 
   print("A", A, rowsA, common); print("B", B, common, colsB);
   print("C", C, rowsA, colsB); print("D", D, rowsA, colsB);
 
   return 0;
 }
To compile the program for ARCH(10):
xlc -c -qfloat=ieee -qround=n -qarch=10 -qtarget=zosv2r1 -I 
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.c
To compile the program for ARCH(11) and higher ARCHITECTURE levels:
xlc -c -qfloat=ieee -qround=n -qarch=11 -qtarget=zosv2r1 -I 
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.c
To link the program for ARCH(10):
xlc sample.o -L /usr/lpp/cbclib/lib/atlas -lf77blas.arch10 
-latlas.arch10 -lf2c.arch10 -qfloat=ieee -o sample
To link the program for ARCH(11) and higher ARCHITECTURE levels:
xlc sample.o -L /usr/lpp/cbclib/lib/atlas -lf77blas.arch11 
-latlas.arch11 -lf2c.arch11 -qfloat=ieee -o sample

Sample 2

This program contains a C++ invocation of the Fortran BLAS function dgemm_ provided by the ATLAS framework.

Observation: As opposed to sample 1, the compiler must be explicitly instructed that the function dgemm_ has C linkage and thus no mangling should be attempted. This can be achieved either as specified, or by including the appropriate header file with the extern "C" designation.
Source code:
 #include <stdio.h>
 #include <time.h>
 #include <stdlib.h>
 
 extern "C"
 {
   int dgemm_(char *, char *, int *, int *, int *, double *, double *, int *, 
              double *, int *, double *, double *, int *);
 }
 
 
 void init(double* matrix, int row, int column)
 {
   for (int j = 0; j < column; j++){
     for (int i = 0; i < row; i++){
       matrix[j*row + i] = ((double)rand())/RAND_MAX;
     }
   }
 }
 
 void print(const char * name, const double* matrix, int row, int column)
 {
   printf("Matrix %s has %d rows and %d columns:\n", name, row, column);
   for (int i = 0; i < row; i++){
     for (int j = 0; j < column; j++){
       printf("%.3f ", matrix[j*row + i]);
     }
     printf("\n");
   }
   printf("\n");
 }
 
 int main(int argc, char * argv[])
 {
   int rowsA, colsB, common;
   int i,j,k;
 
   if (argc != 4){
     printf("Using defaults\n");
     rowsA = 2; colsB = 4; common = 6;
   }
   else{
     rowsA = atoi(argv[1]); colsB = atoi(argv[2]);common = atoi(argv[3]);
   }
 
   double A[rowsA * common]; double B[common * colsB];
   double C[rowsA * colsB]; double D[rowsA * colsB];
 
   char transA = 'N', transB = 'N';
   double one = 1.0, zero = 0.0;
 
   srand(time(NULL));
 
   init(A, rowsA, common); init(B, common, colsB);
 
   dgemm_(&transA, &transB, &rowsA, &colsB, &common, &one, A, 
          &rowsA, B, &common, &zero, C, &rowsA);
 
   for(i=0;i<colsB;i++){
     for(j=0;j<rowsA;j++){
       D[i*rowsA+j]=0;
       for(k=0;k<common;k++){
         D[i*rowsA+j]+=A[k*rowsA+j]*B[k+common*i];
       }
     }
   }
 
   print("A", A, rowsA, common); print("B", B, common, colsB);
   print("C", C, rowsA, colsB); print("D", D, rowsA, colsB);
 
   return 0;
 }
To compile the program for ARCH(10):
xlC -c -qfloat=ieee -qround=n -qarch=10 -qtarget=zosv2r1 -I 
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.C
To compile the program for ARCH(11) and higher ARCHITECTURE levels:
xlC -c -qfloat=ieee -qround=n -qarch=11 -qtarget=zosv2r1 -I 
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.C
To link the program for ARCH(10):
xlC sample.o -L /usr/lpp/cbclib/lib/atlas -lf77blas.arch10 
-latlas.arch10 -lf2c.arch10 -qfloat=ieee -o sample
To link the program for ARCH(11) and higher ARCHITECTURE levels:
xlC sample.o -L /usr/lpp/cbclib/lib/atlas -lf77blas.arch11 
-latlas.arch11 -lf2c.arch11 -qfloat=ieee -o sample

Sample 3

This program contains a C invocation of the CBLAS function cblas_dgemm_ provided by the ATLAS framework.

Observation: Same result and functionality as if dgemm_ would be called, but this program uses the CBLAS version of the functions.
Source code:
 #include <time.h>
 #include <stdlib.h>
 #include <cblas.h>
 
 void init(double* matrix, int row, int column)
 {
   for (int j = 0; j < column; j++){
     for (int i = 0; i < row; i++){
       matrix[j*row + i] = ((double)rand())/RAND_MAX;
     }
   }
 }
 
 void print(const char * name, const double* matrix, int row, int column)
 {
   printf("Matrix %s has %d rows and %d columns:\n", name, row, column);
   for (int i = 0; i < row; i++){
     for (int j = 0; j < column; j++){
       printf("%.3f ", matrix[j*row + i]);
     }
     printf("\n");
   }
   printf("\n");
 }
 
 int main(int argc, char * argv[])
 {
   int rowsA, colsB, common;
   int i,j,k;
 
   if (argc != 4){
     printf("Using defaults\n");
     rowsA = 2; colsB = 4; common = 6;
   }
   else{
     rowsA = atoi(argv[1]); colsB = atoi(argv[2]);common = atoi(argv[3]);
   }
 
   double A[rowsA * common]; double B[common * colsB];
   double C[rowsA * colsB]; double D[rowsA * colsB];
 
   enum CBLAS_ORDER order = CblasColMajor;
   enum CBLAS_TRANSPOSE transA = CblasNoTrans;
   enum CBLAS_TRANSPOSE transB = CblasNoTrans;
 
   double one = 1.0, zero = 0.0;
 
   srand(time(NULL));
 
   init(A, rowsA, common); init(B, common, colsB);
 
   cblas_dgemm(order,transA,transB, rowsA, colsB, common ,1.0,A, 
               rowsA ,B, common ,0.0,C, rowsA);
 
   for(i=0;i<colsB;i++){
     for(j=0;j<rowsA;j++){
       D[i*rowsA+j]=0;
       for(k=0;k<common;k++){
         D[i*rowsA+j]+=A[k*rowsA+j]*B[k+common*i];
       }
     }
   }
 
   print("A", A, rowsA, common); print("B", B, common, colsB);
   print("C", C, rowsA, colsB); print("D", D, rowsA, colsB);
 
   return 0;
 }
To compile the program for ARCH(10):
xlc -c -qfloat=ieee -qround=n -qarch=10 -qtarget=zosv2r1 -I 
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.c
To compile the program for ARCH(11) and higher ARCHITECTURE levels:
xlc -c -qfloat=ieee -qround=n -qarch=11 -qtarget=zosv2r1 -I 
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.c
To link the program for ARCH(10):
xlc sample.o -L /usr/lpp/cbclib/lib/atlas -lcblas.arch10 
-latlas.arch10 -qfloat=ieee -o sample
To link the program for ARCH(11) and higher ARCHITECTURE levels:
xlc sample.o -L /usr/lpp/cbclib/lib/atlas -lcblas.arch11 
-latlas.arch11 -qfloat=ieee -o sample

Sample 4

This program contains a C++ invocation of the CBLAS function cblas_dgemm_ provided by the ATLAS framework.

Observation: Same result and functionality as if dgemm_ would be called, but this program uses the CBLAS version of the functions.
Source code:
 #include <time.h>
 #include <stdlib.h>
 
 extern "C"
 {
 #include <cblas.h>
 }
 
 void init(double* matrix, int row, int column)
 {
   for (int j = 0; j < column; j++){
     for (int i = 0; i < row; i++){
       matrix[j*row + i] = ((double)rand())/RAND_MAX;
     }
   }
 }
 
 void print(const char * name, const double* matrix, int row, int column)
 {
   printf("Matrix %s has %d rows and %d columns:\n", name, row, column);
   for (int i = 0; i < row; i++){
     for (int j = 0; j < column; j++){
       printf("%.3f ", matrix[j*row + i]);
     }
     printf("\n");
   }
   printf("\n");
 }
 
 int main(int argc, char * argv[])
 {
   int rowsA, colsB, common;
   int i,j,k;
 
   if (argc != 4){
     printf("Using defaults\n");
     rowsA = 2; colsB = 4; common = 6;
   }
   else{
     rowsA = atoi(argv[1]); colsB = atoi(argv[2]);common = atoi(argv[3]);
   }
 
   double A[rowsA * common]; double B[common * colsB];
   double C[rowsA * colsB]; double D[rowsA * colsB];
 
   enum CBLAS_ORDER order = CblasColMajor;
   enum CBLAS_TRANSPOSE transA = CblasNoTrans;
   enum CBLAS_TRANSPOSE transB = CblasNoTrans;
 
   double one = 1.0, zero = 0.0;
 
   srand(time(NULL));
 
   init(A, rowsA, common); init(B, common, colsB);
 
   cblas_dgemm(order,transA,transB, rowsA, colsB, common ,1.0,A, 
               rowsA ,B, common ,0.0,C, rowsA);
 
   for(i=0;i<colsB;i++){
     for(j=0;j<rowsA;j++){
       D[i*rowsA+j]=0;
       for(k=0;k<common;k++){
         D[i*rowsA+j]+=A[k*rowsA+j]*B[k+common*i];
       }
     }
   }
 
   print("A", A, rowsA, common); print("B", B, common, colsB);
   print("C", C, rowsA, colsB); print("D", D, rowsA, colsB);
 
   return 0;
 }
To compile the program for ARCH(10):
xlC -c -qfloat=ieee -qround=n -qarch=10 -qtarget=zosv2r1 -I 
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.C
To compile the program for ARCH(11) and higher ARCHITECTURE levels:
xlC -c -qfloat=ieee -qround=n -qarch=11 -qtarget=zosv2r1 -I 
/usr/lpp/cbclib/include/atlas -qfloat=ieee -o sample.o sample.C
To link the program for ARCH(10):
xlC sample.o -L /usr/lpp/cbclib/lib/atlas -lcblas.arch10 
-latlas.arch10 -qfloat=ieee -o sample
To link the program for ARCH(11) and higher ARCHITECTURE levels:
xlC sample.o -L /usr/lpp/cbclib/lib/atlas -lcblas.arch11 
-latlas.arch11 -qfloat=ieee -o sample