Hi all, I implemented fast matrix multiplication in GroovyLab using OpenBLAS as: final public Matrix multiply( double[][] that)  {    double [] flmThis = Mat.oneDTransposeDoubleArray(d);   // construct a FloatMatrix for the receiver    double [] flmThat = Mat.oneDTransposeDoubleArray(that);  // construct a FloatMatrix for the argument    int Arows = Nrows(); int Acolumns = Ncols();    int Ccolumns = that[0].length;    double [] result = new double[Arows*Ccolumns];    double alpha=1.0;    double beta=0.0;    int lda = Arows;    int ldb = Acolumns;    int ldc = Arows;       // perform the multiplication using openblas     cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, Arows, Ccolumns,  Acolumns, alpha, flmThis, lda, flmThat, ldb, beta,          result, ldc);     double [][] rd = new double[Arows][Ccolumns];     int cnt = 0;     int r = 0;     int c = 0;     while (c < Ccolumns) {       r=0;      while (r < Arows ) {         rd[r][c] = result[cnt];         cnt++;         r++;       }       c++;     }     return new Matrix(rd); } public Matrix multiply( Matrix that)  {     return multiply(that.d); } however, calling it on Matrix objects is slow: n=2000 x=rand(n,n) tic() xx=x*x   // Matrix * Matrix, slow however the method that calls OpenBLAS is called tm=toc()   // delays a lot!! but however on double[][] * Matrix is very fast: n=2000 x=rand(n,n) tic() xx=x.d * x   // double[][] * Matrix, is very fast !! tm=toc()    // very fast, uses OpenBLAS What can happen here? Best Regards Stergios