001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.linear;
019    
020    import java.io.Serializable;
021    import java.util.Arrays;
022    
023    import org.apache.commons.math.MathRuntimeException;
024    import org.apache.commons.math.linear.MatrixVisitorException;
025    import org.apache.commons.math.exception.util.LocalizedFormats;
026    import org.apache.commons.math.util.FastMath;
027    
028    /**
029     * Cache-friendly implementation of RealMatrix using a flat arrays to store
030     * square blocks of the matrix.
031     * <p>
032     * This implementation is specially designed to be cache-friendly. Square blocks are
033     * stored as small arrays and allow efficient traversal of data both in row major direction
034     * and columns major direction, one block at a time. This greatly increases performances
035     * for algorithms that use crossed directions loops like multiplication or transposition.
036     * </p>
037     * <p>
038     * The size of square blocks is a static parameter. It may be tuned according to the cache
039     * size of the target computer processor. As a rule of thumbs, it should be the largest
040     * value that allows three blocks to be simultaneously cached (this is necessary for example
041     * for matrix multiplication). The default value is to use 52x52 blocks which is well suited
042     * for processors with 64k L1 cache (one block holds 2704 values or 21632 bytes). This value
043     * could be lowered to 36x36 for processors with 32k L1 cache.
044     * </p>
045     * <p>
046     * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
047     * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
048     * blocks are flattened in row major order in single dimension arrays which are therefore
049     * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
050     * organized in row major order.
051     * </p>
052     * <p>
053     * As an example, for a block size of 52x52, a 100x60 matrix would be stored in 4 blocks.
054     * Block 0 would be a double[2704] array holding the upper left 52x52 square, block 1 would be
055     * a double[416] array holding the upper right 52x8 rectangle, block 2 would be a double[2496]
056     * array holding the lower left 48x52 rectangle and block 3 would be a double[384] array
057     * holding the lower right 48x8 rectangle.
058     * </p>
059     * <p>
060     * The layout complexity overhead versus simple mapping of matrices to java
061     * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
062     * to up to 3-fold improvements for matrices of moderate to large size.
063     * </p>
064     * @version $Revision: 1073158 $ $Date: 2011-02-21 22:46:52 +0100 (lun. 21 f??vr. 2011) $
065     * @since 2.0
066     */
067    public class BlockRealMatrix extends AbstractRealMatrix implements Serializable {
068    
069        /** Block size. */
070        public static final int BLOCK_SIZE = 52;
071    
072        /** Serializable version identifier */
073        private static final long serialVersionUID = 4991895511313664478L;
074    
075        /** Blocks of matrix entries. */
076        private final double blocks[][];
077    
078        /** Number of rows of the matrix. */
079        private final int rows;
080    
081        /** Number of columns of the matrix. */
082        private final int columns;
083    
084        /** Number of block rows of the matrix. */
085        private final int blockRows;
086    
087        /** Number of block columns of the matrix. */
088        private final int blockColumns;
089    
090        /**
091         * Create a new matrix with the supplied row and column dimensions.
092         *
093         * @param rows  the number of rows in the new matrix
094         * @param columns  the number of columns in the new matrix
095         * @throws IllegalArgumentException if row or column dimension is not
096         *  positive
097         */
098        public BlockRealMatrix(final int rows, final int columns)
099            throws IllegalArgumentException {
100    
101            super(rows, columns);
102            this.rows    = rows;
103            this.columns = columns;
104    
105            // number of blocks
106            blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
107            blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
108    
109            // allocate storage blocks, taking care of smaller ones at right and bottom
110            blocks = createBlocksLayout(rows, columns);
111    
112        }
113    
114        /**
115         * Create a new dense matrix copying entries from raw layout data.
116         * <p>The input array <em>must</em> already be in raw layout.</p>
117         * <p>Calling this constructor is equivalent to call:
118         * <pre>matrix = new BlockRealMatrix(rawData.length, rawData[0].length,
119         *                                   toBlocksLayout(rawData), false);</pre>
120         * </p>
121         * @param rawData data for new matrix, in raw layout
122         *
123         * @exception IllegalArgumentException if <code>blockData</code> shape is
124         * inconsistent with block layout
125         * @see #BlockRealMatrix(int, int, double[][], boolean)
126         */
127        public BlockRealMatrix(final double[][] rawData)
128            throws IllegalArgumentException {
129            this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
130        }
131    
132        /**
133         * Create a new dense matrix copying entries from block layout data.
134         * <p>The input array <em>must</em> already be in blocks layout.</p>
135         * @param rows  the number of rows in the new matrix
136         * @param columns  the number of columns in the new matrix
137         * @param blockData data for new matrix
138         * @param copyArray if true, the input array will be copied, otherwise
139         * it will be referenced
140         *
141         * @exception IllegalArgumentException if <code>blockData</code> shape is
142         * inconsistent with block layout
143         * @see #createBlocksLayout(int, int)
144         * @see #toBlocksLayout(double[][])
145         * @see #BlockRealMatrix(double[][])
146         */
147        public BlockRealMatrix(final int rows, final int columns,
148                               final double[][] blockData, final boolean copyArray)
149            throws IllegalArgumentException {
150    
151            super(rows, columns);
152            this.rows    = rows;
153            this.columns = columns;
154    
155            // number of blocks
156            blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
157            blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
158    
159            if (copyArray) {
160                // allocate storage blocks, taking care of smaller ones at right and bottom
161                blocks = new double[blockRows * blockColumns][];
162            } else {
163                // reference existing array
164                blocks = blockData;
165            }
166    
167            int index = 0;
168            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
169                final int iHeight = blockHeight(iBlock);
170                for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
171                    if (blockData[index].length != iHeight * blockWidth(jBlock)) {
172                        throw MathRuntimeException.createIllegalArgumentException(
173                                LocalizedFormats.WRONG_BLOCK_LENGTH,
174                                blockData[index].length, iHeight * blockWidth(jBlock));
175                    }
176                    if (copyArray) {
177                        blocks[index] = blockData[index].clone();
178                    }
179                }
180            }
181    
182        }
183    
184        /**
185         * Convert a data array from raw layout to blocks layout.
186         * <p>
187         * Raw layout is the straightforward layout where element at row i and
188         * column j is in array element <code>rawData[i][j]</code>. Blocks layout
189         * is the layout used in {@link BlockRealMatrix} instances, where the matrix
190         * is split in square blocks (except at right and bottom side where blocks may
191         * be rectangular to fit matrix size) and each block is stored in a flattened
192         * one-dimensional array.
193         * </p>
194         * <p>
195         * This method creates an array in blocks layout from an input array in raw layout.
196         * It can be used to provide the array argument of the {@link
197         * #BlockRealMatrix(int, int, double[][], boolean)} constructor.
198         * </p>
199         * @param rawData data array in raw layout
200         * @return a new data array containing the same entries but in blocks layout
201         * @exception IllegalArgumentException if <code>rawData</code> is not rectangular
202         *  (not all rows have the same length)
203         * @see #createBlocksLayout(int, int)
204         * @see #BlockRealMatrix(int, int, double[][], boolean)
205         */
206        public static double[][] toBlocksLayout(final double[][] rawData)
207            throws IllegalArgumentException {
208    
209            final int rows         = rawData.length;
210            final int columns      = rawData[0].length;
211            final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
212            final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
213    
214            // safety checks
215            for (int i = 0; i < rawData.length; ++i) {
216                final int length = rawData[i].length;
217                if (length != columns) {
218                    throw MathRuntimeException.createIllegalArgumentException(
219                            LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
220                            columns, length);
221                }
222            }
223    
224            // convert array
225            final double[][] blocks = new double[blockRows * blockColumns][];
226            int blockIndex = 0;
227            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
228                final int pStart  = iBlock * BLOCK_SIZE;
229                final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
230                final int iHeight = pEnd - pStart;
231                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
232                    final int qStart = jBlock * BLOCK_SIZE;
233                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
234                    final int jWidth = qEnd - qStart;
235    
236                    // allocate new block
237                    final double[] block = new double[iHeight * jWidth];
238                    blocks[blockIndex] = block;
239    
240                    // copy data
241                    int index = 0;
242                    for (int p = pStart; p < pEnd; ++p) {
243                        System.arraycopy(rawData[p], qStart, block, index, jWidth);
244                        index += jWidth;
245                    }
246    
247                    ++blockIndex;
248    
249                }
250            }
251    
252            return blocks;
253    
254        }
255    
256        /**
257         * Create a data array in blocks layout.
258         * <p>
259         * This method can be used to create the array argument of the {@link
260         * #BlockRealMatrix(int, int, double[][], boolean)} constructor.
261         * </p>
262         * @param rows  the number of rows in the new matrix
263         * @param columns  the number of columns in the new matrix
264         * @return a new data array in blocks layout
265         * @see #toBlocksLayout(double[][])
266         * @see #BlockRealMatrix(int, int, double[][], boolean)
267         */
268        public static double[][] createBlocksLayout(final int rows, final int columns) {
269    
270            final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
271            final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
272    
273            final double[][] blocks = new double[blockRows * blockColumns][];
274            int blockIndex = 0;
275            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
276                final int pStart  = iBlock * BLOCK_SIZE;
277                final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
278                final int iHeight = pEnd - pStart;
279                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
280                    final int qStart = jBlock * BLOCK_SIZE;
281                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
282                    final int jWidth = qEnd - qStart;
283                    blocks[blockIndex] = new double[iHeight * jWidth];
284                    ++blockIndex;
285                }
286            }
287    
288            return blocks;
289    
290        }
291    
292        /** {@inheritDoc} */
293        @Override
294        public BlockRealMatrix createMatrix(final int rowDimension, final int columnDimension)
295            throws IllegalArgumentException {
296            return new BlockRealMatrix(rowDimension, columnDimension);
297        }
298    
299        /** {@inheritDoc} */
300        @Override
301        public BlockRealMatrix copy() {
302    
303            // create an empty matrix
304            BlockRealMatrix copied = new BlockRealMatrix(rows, columns);
305    
306            // copy the blocks
307            for (int i = 0; i < blocks.length; ++i) {
308                System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
309            }
310    
311            return copied;
312    
313        }
314    
315        /** {@inheritDoc} */
316        @Override
317        public BlockRealMatrix add(final RealMatrix m)
318            throws IllegalArgumentException {
319            try {
320                return add((BlockRealMatrix) m);
321            } catch (ClassCastException cce) {
322    
323                // safety check
324                MatrixUtils.checkAdditionCompatible(this, m);
325    
326                final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
327    
328                // perform addition block-wise, to ensure good cache behavior
329                int blockIndex = 0;
330                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
331                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
332    
333                        // perform addition on the current block
334                        final double[] outBlock = out.blocks[blockIndex];
335                        final double[] tBlock   = blocks[blockIndex];
336                        final int      pStart   = iBlock * BLOCK_SIZE;
337                        final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
338                        final int      qStart   = jBlock * BLOCK_SIZE;
339                        final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
340                        int k = 0;
341                        for (int p = pStart; p < pEnd; ++p) {
342                            for (int q = qStart; q < qEnd; ++q) {
343                                outBlock[k] = tBlock[k] + m.getEntry(p, q);
344                                ++k;
345                            }
346                        }
347    
348                        // go to next block
349                        ++blockIndex;
350    
351                    }
352                }
353    
354                return out;
355    
356            }
357        }
358    
359        /**
360         * Compute the sum of this and <code>m</code>.
361         *
362         * @param m    matrix to be added
363         * @return     this + m
364         * @throws  IllegalArgumentException if m is not the same size as this
365         */
366        public BlockRealMatrix add(final BlockRealMatrix m)
367            throws IllegalArgumentException {
368    
369            // safety check
370            MatrixUtils.checkAdditionCompatible(this, m);
371    
372            final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
373    
374            // perform addition block-wise, to ensure good cache behavior
375            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
376                final double[] outBlock = out.blocks[blockIndex];
377                final double[] tBlock   = blocks[blockIndex];
378                final double[] mBlock   = m.blocks[blockIndex];
379                for (int k = 0; k < outBlock.length; ++k) {
380                    outBlock[k] = tBlock[k] + mBlock[k];
381                }
382            }
383    
384            return out;
385    
386        }
387    
388        /** {@inheritDoc} */
389        @Override
390        public BlockRealMatrix subtract(final RealMatrix m)
391            throws IllegalArgumentException {
392            try {
393                return subtract((BlockRealMatrix) m);
394            } catch (ClassCastException cce) {
395    
396                // safety check
397                MatrixUtils.checkSubtractionCompatible(this, m);
398    
399                final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
400    
401                // perform subtraction block-wise, to ensure good cache behavior
402                int blockIndex = 0;
403                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
404                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
405    
406                        // perform subtraction on the current block
407                        final double[] outBlock = out.blocks[blockIndex];
408                        final double[] tBlock   = blocks[blockIndex];
409                        final int      pStart   = iBlock * BLOCK_SIZE;
410                        final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
411                        final int      qStart   = jBlock * BLOCK_SIZE;
412                        final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
413                        int k = 0;
414                        for (int p = pStart; p < pEnd; ++p) {
415                            for (int q = qStart; q < qEnd; ++q) {
416                                outBlock[k] = tBlock[k] - m.getEntry(p, q);
417                                ++k;
418                            }
419                        }
420    
421                        // go to next block
422                        ++blockIndex;
423    
424                    }
425                }
426    
427                return out;
428    
429            }
430        }
431    
432        /**
433         * Compute this minus <code>m</code>.
434         *
435         * @param m    matrix to be subtracted
436         * @return     this - m
437         * @throws  IllegalArgumentException if m is not the same size as this
438         */
439        public BlockRealMatrix subtract(final BlockRealMatrix m)
440            throws IllegalArgumentException {
441    
442            // safety check
443            MatrixUtils.checkSubtractionCompatible(this, m);
444    
445            final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
446    
447            // perform subtraction block-wise, to ensure good cache behavior
448            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
449                final double[] outBlock = out.blocks[blockIndex];
450                final double[] tBlock   = blocks[blockIndex];
451                final double[] mBlock   = m.blocks[blockIndex];
452                for (int k = 0; k < outBlock.length; ++k) {
453                    outBlock[k] = tBlock[k] - mBlock[k];
454                }
455            }
456    
457            return out;
458    
459        }
460    
461        /** {@inheritDoc} */
462        @Override
463        public BlockRealMatrix scalarAdd(final double d)
464            throws IllegalArgumentException {
465    
466            final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
467    
468            // perform subtraction block-wise, to ensure good cache behavior
469            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
470                final double[] outBlock = out.blocks[blockIndex];
471                final double[] tBlock   = blocks[blockIndex];
472                for (int k = 0; k < outBlock.length; ++k) {
473                    outBlock[k] = tBlock[k] + d;
474                }
475            }
476    
477            return out;
478    
479        }
480    
481        /** {@inheritDoc} */
482        @Override
483        public RealMatrix scalarMultiply(final double d)
484            throws IllegalArgumentException {
485    
486            final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
487    
488            // perform subtraction block-wise, to ensure good cache behavior
489            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
490                final double[] outBlock = out.blocks[blockIndex];
491                final double[] tBlock   = blocks[blockIndex];
492                for (int k = 0; k < outBlock.length; ++k) {
493                    outBlock[k] = tBlock[k] * d;
494                }
495            }
496    
497            return out;
498    
499        }
500    
501        /** {@inheritDoc} */
502        @Override
503        public BlockRealMatrix multiply(final RealMatrix m)
504            throws IllegalArgumentException {
505            try {
506                return multiply((BlockRealMatrix) m);
507            } catch (ClassCastException cce) {
508    
509                // safety check
510                MatrixUtils.checkMultiplicationCompatible(this, m);
511    
512                final BlockRealMatrix out = new BlockRealMatrix(rows, m.getColumnDimension());
513    
514                // perform multiplication block-wise, to ensure good cache behavior
515                int blockIndex = 0;
516                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
517    
518                    final int pStart = iBlock * BLOCK_SIZE;
519                    final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
520    
521                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
522    
523                        final int qStart = jBlock * BLOCK_SIZE;
524                        final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
525    
526                        // select current block
527                        final double[] outBlock = out.blocks[blockIndex];
528    
529                        // perform multiplication on current block
530                        for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
531                            final int kWidth      = blockWidth(kBlock);
532                            final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
533                            final int rStart      = kBlock * BLOCK_SIZE;
534                            int k = 0;
535                            for (int p = pStart; p < pEnd; ++p) {
536                                final int lStart = (p - pStart) * kWidth;
537                                final int lEnd   = lStart + kWidth;
538                                for (int q = qStart; q < qEnd; ++q) {
539                                    double sum = 0;
540                                    int r = rStart;
541                                    for (int l = lStart; l < lEnd; ++l) {
542                                        sum += tBlock[l] * m.getEntry(r, q);
543                                        ++r;
544                                    }
545                                    outBlock[k] += sum;
546                                    ++k;
547                                }
548                            }
549                        }
550    
551                        // go to next block
552                        ++blockIndex;
553    
554                    }
555                }
556    
557                return out;
558    
559            }
560        }
561    
562        /**
563         * Returns the result of postmultiplying this by m.
564         *
565         * @param m    matrix to postmultiply by
566         * @return     this * m
567         * @throws     IllegalArgumentException
568         *             if columnDimension(this) != rowDimension(m)
569         */
570        public BlockRealMatrix multiply(BlockRealMatrix m) throws IllegalArgumentException {
571    
572            // safety check
573            MatrixUtils.checkMultiplicationCompatible(this, m);
574    
575            final BlockRealMatrix out = new BlockRealMatrix(rows, m.columns);
576    
577            // perform multiplication block-wise, to ensure good cache behavior
578            int blockIndex = 0;
579            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
580    
581                final int pStart = iBlock * BLOCK_SIZE;
582                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
583    
584                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
585                    final int jWidth = out.blockWidth(jBlock);
586                    final int jWidth2 = jWidth  + jWidth;
587                    final int jWidth3 = jWidth2 + jWidth;
588                    final int jWidth4 = jWidth3 + jWidth;
589    
590                    // select current block
591                    final double[] outBlock = out.blocks[blockIndex];
592    
593                    // perform multiplication on current block
594                    for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
595                        final int kWidth = blockWidth(kBlock);
596                        final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
597                        final double[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
598                        int k = 0;
599                        for (int p = pStart; p < pEnd; ++p) {
600                            final int lStart = (p - pStart) * kWidth;
601                            final int lEnd   = lStart + kWidth;
602                            for (int nStart = 0; nStart < jWidth; ++nStart) {
603                                double sum = 0;
604                                int l = lStart;
605                                int n = nStart;
606                                while (l < lEnd - 3) {
607                                    sum += tBlock[l] * mBlock[n] +
608                                           tBlock[l + 1] * mBlock[n + jWidth] +
609                                           tBlock[l + 2] * mBlock[n + jWidth2] +
610                                           tBlock[l + 3] * mBlock[n + jWidth3];
611                                    l += 4;
612                                    n += jWidth4;
613                                }
614                                while (l < lEnd) {
615                                    sum += tBlock[l++] * mBlock[n];
616                                    n += jWidth;
617                                }
618                                outBlock[k] += sum;
619                                ++k;
620                            }
621                        }
622                    }
623    
624                    // go to next block
625                    ++blockIndex;
626    
627                }
628            }
629    
630            return out;
631    
632        }
633    
634        /** {@inheritDoc} */
635        @Override
636        public double[][] getData() {
637    
638            final double[][] data = new double[getRowDimension()][getColumnDimension()];
639            final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
640    
641            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
642                final int pStart = iBlock * BLOCK_SIZE;
643                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
644                int regularPos   = 0;
645                int lastPos      = 0;
646                for (int p = pStart; p < pEnd; ++p) {
647                    final double[] dataP = data[p];
648                    int blockIndex = iBlock * blockColumns;
649                    int dataPos    = 0;
650                    for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
651                        System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
652                        dataPos += BLOCK_SIZE;
653                    }
654                    System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
655                    regularPos += BLOCK_SIZE;
656                    lastPos    += lastColumns;
657                }
658            }
659    
660            return data;
661    
662        }
663    
664        /** {@inheritDoc} */
665        @Override
666        public double getNorm() {
667            final double[] colSums = new double[BLOCK_SIZE];
668            double maxColSum = 0;
669            for (int jBlock = 0; jBlock < blockColumns; jBlock++) {
670                final int jWidth = blockWidth(jBlock);
671                Arrays.fill(colSums, 0, jWidth, 0.0);
672                for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
673                    final int iHeight = blockHeight(iBlock);
674                    final double[] block = blocks[iBlock * blockColumns + jBlock];
675                    for (int j = 0; j < jWidth; ++j) {
676                        double sum = 0;
677                        for (int i = 0; i < iHeight; ++i) {
678                            sum += FastMath.abs(block[i * jWidth + j]);
679                        }
680                        colSums[j] += sum;
681                    }
682                }
683                for (int j = 0; j < jWidth; ++j) {
684                    maxColSum = FastMath.max(maxColSum, colSums[j]);
685                }
686            }
687            return maxColSum;
688        }
689    
690        /** {@inheritDoc} */
691        @Override
692        public double getFrobeniusNorm() {
693            double sum2 = 0;
694            for (int blockIndex = 0; blockIndex < blocks.length; ++blockIndex) {
695                for (final double entry : blocks[blockIndex]) {
696                    sum2 += entry * entry;
697                }
698            }
699            return FastMath.sqrt(sum2);
700        }
701    
702        /** {@inheritDoc} */
703        @Override
704        public BlockRealMatrix getSubMatrix(final int startRow, final int endRow,
705                                       final int startColumn, final int endColumn)
706            throws MatrixIndexException {
707    
708            // safety checks
709            MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
710    
711            // create the output matrix
712            final BlockRealMatrix out =
713                new BlockRealMatrix(endRow - startRow + 1, endColumn - startColumn + 1);
714    
715            // compute blocks shifts
716            final int blockStartRow    = startRow    / BLOCK_SIZE;
717            final int rowsShift        = startRow    % BLOCK_SIZE;
718            final int blockStartColumn = startColumn / BLOCK_SIZE;
719            final int columnsShift     = startColumn % BLOCK_SIZE;
720    
721            // perform extraction block-wise, to ensure good cache behavior
722            int pBlock = blockStartRow;
723            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
724                final int iHeight = out.blockHeight(iBlock);
725                int qBlock = blockStartColumn;
726                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
727                    final int jWidth = out.blockWidth(jBlock);
728    
729                    // handle one block of the output matrix
730                    final int      outIndex = iBlock * out.blockColumns + jBlock;
731                    final double[] outBlock = out.blocks[outIndex];
732                    final int      index    = pBlock * blockColumns + qBlock;
733                    final int      width    = blockWidth(qBlock);
734    
735                    final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
736                    final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
737                    if (heightExcess > 0) {
738                        // the submatrix block spans on two blocks rows from the original matrix
739                        if (widthExcess > 0) {
740                            // the submatrix block spans on two blocks columns from the original matrix
741                            final int width2 = blockWidth(qBlock + 1);
742                            copyBlockPart(blocks[index], width,
743                                          rowsShift, BLOCK_SIZE,
744                                          columnsShift, BLOCK_SIZE,
745                                          outBlock, jWidth, 0, 0);
746                            copyBlockPart(blocks[index + 1], width2,
747                                          rowsShift, BLOCK_SIZE,
748                                          0, widthExcess,
749                                          outBlock, jWidth, 0, jWidth - widthExcess);
750                            copyBlockPart(blocks[index + blockColumns], width,
751                                          0, heightExcess,
752                                          columnsShift, BLOCK_SIZE,
753                                          outBlock, jWidth, iHeight - heightExcess, 0);
754                            copyBlockPart(blocks[index + blockColumns + 1], width2,
755                                          0, heightExcess,
756                                          0, widthExcess,
757                                          outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
758                        } else {
759                            // the submatrix block spans on one block column from the original matrix
760                            copyBlockPart(blocks[index], width,
761                                          rowsShift, BLOCK_SIZE,
762                                          columnsShift, jWidth + columnsShift,
763                                          outBlock, jWidth, 0, 0);
764                            copyBlockPart(blocks[index + blockColumns], width,
765                                          0, heightExcess,
766                                          columnsShift, jWidth + columnsShift,
767                                          outBlock, jWidth, iHeight - heightExcess, 0);
768                        }
769                    } else {
770                        // the submatrix block spans on one block row from the original matrix
771                        if (widthExcess > 0) {
772                            // the submatrix block spans on two blocks columns from the original matrix
773                            final int width2 = blockWidth(qBlock + 1);
774                            copyBlockPart(blocks[index], width,
775                                          rowsShift, iHeight + rowsShift,
776                                          columnsShift, BLOCK_SIZE,
777                                          outBlock, jWidth, 0, 0);
778                            copyBlockPart(blocks[index + 1], width2,
779                                          rowsShift, iHeight + rowsShift,
780                                          0, widthExcess,
781                                          outBlock, jWidth, 0, jWidth - widthExcess);
782                        } else {
783                            // the submatrix block spans on one block column from the original matrix
784                            copyBlockPart(blocks[index], width,
785                                          rowsShift, iHeight + rowsShift,
786                                          columnsShift, jWidth + columnsShift,
787                                          outBlock, jWidth, 0, 0);
788                        }
789                   }
790    
791                    ++qBlock;
792    
793                }
794    
795                ++pBlock;
796    
797            }
798    
799            return out;
800    
801        }
802    
803        /**
804         * Copy a part of a block into another one
805         * <p>This method can be called only when the specified part fits in both
806         * blocks, no verification is done here.</p>
807         * @param srcBlock source block
808         * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
809         * @param srcStartRow start row in the source block
810         * @param srcEndRow end row (exclusive) in the source block
811         * @param srcStartColumn start column in the source block
812         * @param srcEndColumn end column (exclusive) in the source block
813         * @param dstBlock destination block
814         * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
815         * @param dstStartRow start row in the destination block
816         * @param dstStartColumn start column in the destination block
817         */
818        private void copyBlockPart(final double[] srcBlock, final int srcWidth,
819                                   final int srcStartRow, final int srcEndRow,
820                                   final int srcStartColumn, final int srcEndColumn,
821                                   final double[] dstBlock, final int dstWidth,
822                                   final int dstStartRow, final int dstStartColumn) {
823            final int length = srcEndColumn - srcStartColumn;
824            int srcPos = srcStartRow * srcWidth + srcStartColumn;
825            int dstPos = dstStartRow * dstWidth + dstStartColumn;
826            for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
827                System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
828                srcPos += srcWidth;
829                dstPos += dstWidth;
830            }
831        }
832    
833        /** {@inheritDoc} */
834        @Override
835        public void setSubMatrix(final double[][] subMatrix, final int row, final int column)
836            throws MatrixIndexException {
837    
838            // safety checks
839            final int refLength = subMatrix[0].length;
840            if (refLength < 1) {
841                throw MathRuntimeException.createIllegalArgumentException(LocalizedFormats.AT_LEAST_ONE_COLUMN);
842            }
843            final int endRow    = row + subMatrix.length - 1;
844            final int endColumn = column + refLength - 1;
845            MatrixUtils.checkSubMatrixIndex(this, row, endRow, column, endColumn);
846            for (final double[] subRow : subMatrix) {
847                if (subRow.length != refLength) {
848                    throw MathRuntimeException.createIllegalArgumentException(
849                            LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
850                            refLength, subRow.length);
851                }
852            }
853    
854            // compute blocks bounds
855            final int blockStartRow    = row / BLOCK_SIZE;
856            final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
857            final int blockStartColumn = column / BLOCK_SIZE;
858            final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
859    
860            // perform copy block-wise, to ensure good cache behavior
861            for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
862                final int iHeight  = blockHeight(iBlock);
863                final int firstRow = iBlock * BLOCK_SIZE;
864                final int iStart   = FastMath.max(row,    firstRow);
865                final int iEnd     = FastMath.min(endRow + 1, firstRow + iHeight);
866    
867                for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
868                    final int jWidth      = blockWidth(jBlock);
869                    final int firstColumn = jBlock * BLOCK_SIZE;
870                    final int jStart      = FastMath.max(column,    firstColumn);
871                    final int jEnd        = FastMath.min(endColumn + 1, firstColumn + jWidth);
872                    final int jLength     = jEnd - jStart;
873    
874                    // handle one block, row by row
875                    final double[] block = blocks[iBlock * blockColumns + jBlock];
876                    for (int i = iStart; i < iEnd; ++i) {
877                        System.arraycopy(subMatrix[i - row], jStart - column,
878                                         block, (i - firstRow) * jWidth + (jStart - firstColumn),
879                                         jLength);
880                    }
881    
882                }
883            }
884        }
885    
886        /** {@inheritDoc} */
887        @Override
888        public BlockRealMatrix getRowMatrix(final int row)
889            throws MatrixIndexException {
890    
891            MatrixUtils.checkRowIndex(this, row);
892            final BlockRealMatrix out = new BlockRealMatrix(1, columns);
893    
894            // perform copy block-wise, to ensure good cache behavior
895            final int iBlock  = row / BLOCK_SIZE;
896            final int iRow    = row - iBlock * BLOCK_SIZE;
897            int outBlockIndex = 0;
898            int outIndex      = 0;
899            double[] outBlock = out.blocks[outBlockIndex];
900            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
901                final int jWidth     = blockWidth(jBlock);
902                final double[] block = blocks[iBlock * blockColumns + jBlock];
903                final int available  = outBlock.length - outIndex;
904                if (jWidth > available) {
905                    System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
906                    outBlock = out.blocks[++outBlockIndex];
907                    System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
908                    outIndex = jWidth - available;
909                } else {
910                    System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
911                    outIndex += jWidth;
912                }
913            }
914    
915            return out;
916    
917        }
918    
919        /** {@inheritDoc} */
920        @Override
921        public void setRowMatrix(final int row, final RealMatrix matrix)
922            throws MatrixIndexException, InvalidMatrixException {
923            try {
924                setRowMatrix(row, (BlockRealMatrix) matrix);
925            } catch (ClassCastException cce) {
926                super.setRowMatrix(row, matrix);
927            }
928        }
929    
930        /**
931         * Sets the entries in row number <code>row</code>
932         * as a row matrix.  Row indices start at 0.
933         *
934         * @param row the row to be set
935         * @param matrix row matrix (must have one row and the same number of columns
936         * as the instance)
937         * @throws MatrixIndexException if the specified row index is invalid
938         * @throws InvalidMatrixException if the matrix dimensions do not match one
939         * instance row
940         */
941        public void setRowMatrix(final int row, final BlockRealMatrix matrix)
942            throws MatrixIndexException, InvalidMatrixException {
943    
944            MatrixUtils.checkRowIndex(this, row);
945            final int nCols = getColumnDimension();
946            if ((matrix.getRowDimension() != 1) ||
947                (matrix.getColumnDimension() != nCols)) {
948                throw new InvalidMatrixException(
949                        LocalizedFormats.DIMENSIONS_MISMATCH_2x2,
950                        matrix.getRowDimension(), matrix.getColumnDimension(),
951                        1, nCols);
952            }
953    
954            // perform copy block-wise, to ensure good cache behavior
955            final int iBlock = row / BLOCK_SIZE;
956            final int iRow   = row - iBlock * BLOCK_SIZE;
957            int mBlockIndex  = 0;
958            int mIndex       = 0;
959            double[] mBlock  = matrix.blocks[mBlockIndex];
960            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
961                final int jWidth     = blockWidth(jBlock);
962                final double[] block = blocks[iBlock * blockColumns + jBlock];
963                final int available  = mBlock.length - mIndex;
964                if (jWidth > available) {
965                    System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
966                    mBlock = matrix.blocks[++mBlockIndex];
967                    System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
968                    mIndex = jWidth - available;
969                } else {
970                    System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
971                    mIndex += jWidth;
972               }
973            }
974    
975        }
976    
977        /** {@inheritDoc} */
978        @Override
979        public BlockRealMatrix getColumnMatrix(final int column)
980            throws MatrixIndexException {
981    
982            MatrixUtils.checkColumnIndex(this, column);
983            final BlockRealMatrix out = new BlockRealMatrix(rows, 1);
984    
985            // perform copy block-wise, to ensure good cache behavior
986            final int jBlock  = column / BLOCK_SIZE;
987            final int jColumn = column - jBlock * BLOCK_SIZE;
988            final int jWidth  = blockWidth(jBlock);
989            int outBlockIndex = 0;
990            int outIndex      = 0;
991            double[] outBlock = out.blocks[outBlockIndex];
992            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
993                final int iHeight = blockHeight(iBlock);
994                final double[] block = blocks[iBlock * blockColumns + jBlock];
995                for (int i = 0; i < iHeight; ++i) {
996                    if (outIndex >= outBlock.length) {
997                        outBlock = out.blocks[++outBlockIndex];
998                        outIndex = 0;
999                    }
1000                    outBlock[outIndex++] = block[i * jWidth + jColumn];
1001                }
1002            }
1003    
1004            return out;
1005    
1006        }
1007    
1008        /** {@inheritDoc} */
1009        @Override
1010        public void setColumnMatrix(final int column, final RealMatrix matrix)
1011            throws MatrixIndexException, InvalidMatrixException {
1012            try {
1013                setColumnMatrix(column, (BlockRealMatrix) matrix);
1014            } catch (ClassCastException cce) {
1015                super.setColumnMatrix(column, matrix);
1016            }
1017        }
1018    
1019        /**
1020         * Sets the entries in column number <code>column</code>
1021         * as a column matrix.  Column indices start at 0.
1022         *
1023         * @param column the column to be set
1024         * @param matrix column matrix (must have one column and the same number of rows
1025         * as the instance)
1026         * @throws MatrixIndexException if the specified column index is invalid
1027         * @throws InvalidMatrixException if the matrix dimensions do not match one
1028         * instance column
1029         */
1030        void setColumnMatrix(final int column, final BlockRealMatrix matrix)
1031            throws MatrixIndexException, InvalidMatrixException {
1032    
1033            MatrixUtils.checkColumnIndex(this, column);
1034            final int nRows = getRowDimension();
1035            if ((matrix.getRowDimension() != nRows) ||
1036                (matrix.getColumnDimension() != 1)) {
1037                throw new InvalidMatrixException(
1038                        LocalizedFormats.DIMENSIONS_MISMATCH_2x2,
1039                        matrix.getRowDimension(), matrix.getColumnDimension(),
1040                        nRows, 1);
1041            }
1042    
1043            // perform copy block-wise, to ensure good cache behavior
1044            final int jBlock  = column / BLOCK_SIZE;
1045            final int jColumn = column - jBlock * BLOCK_SIZE;
1046            final int jWidth  = blockWidth(jBlock);
1047            int mBlockIndex = 0;
1048            int mIndex      = 0;
1049            double[] mBlock = matrix.blocks[mBlockIndex];
1050            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1051                final int iHeight = blockHeight(iBlock);
1052                final double[] block = blocks[iBlock * blockColumns + jBlock];
1053                for (int i = 0; i < iHeight; ++i) {
1054                    if (mIndex >= mBlock.length) {
1055                        mBlock = matrix.blocks[++mBlockIndex];
1056                        mIndex = 0;
1057                    }
1058                    block[i * jWidth + jColumn] = mBlock[mIndex++];
1059                }
1060            }
1061    
1062        }
1063    
1064        /** {@inheritDoc} */
1065        @Override
1066        public RealVector getRowVector(final int row)
1067            throws MatrixIndexException {
1068    
1069            MatrixUtils.checkRowIndex(this, row);
1070            final double[] outData = new double[columns];
1071    
1072            // perform copy block-wise, to ensure good cache behavior
1073            final int iBlock  = row / BLOCK_SIZE;
1074            final int iRow    = row - iBlock * BLOCK_SIZE;
1075            int outIndex      = 0;
1076            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1077                final int jWidth     = blockWidth(jBlock);
1078                final double[] block = blocks[iBlock * blockColumns + jBlock];
1079                System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1080                outIndex += jWidth;
1081            }
1082    
1083            return new ArrayRealVector(outData, false);
1084    
1085        }
1086    
1087        /** {@inheritDoc} */
1088        @Override
1089        public void setRowVector(final int row, final RealVector vector)
1090            throws MatrixIndexException, InvalidMatrixException {
1091            try {
1092                setRow(row, ((ArrayRealVector) vector).getDataRef());
1093            } catch (ClassCastException cce) {
1094                super.setRowVector(row, vector);
1095            }
1096        }
1097    
1098        /** {@inheritDoc} */
1099        @Override
1100        public RealVector getColumnVector(final int column)
1101            throws MatrixIndexException {
1102    
1103            MatrixUtils.checkColumnIndex(this, column);
1104            final double[] outData = new double[rows];
1105    
1106            // perform copy block-wise, to ensure good cache behavior
1107            final int jBlock  = column / BLOCK_SIZE;
1108            final int jColumn = column - jBlock * BLOCK_SIZE;
1109            final int jWidth  = blockWidth(jBlock);
1110            int outIndex      = 0;
1111            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1112                final int iHeight = blockHeight(iBlock);
1113                final double[] block = blocks[iBlock * blockColumns + jBlock];
1114                for (int i = 0; i < iHeight; ++i) {
1115                    outData[outIndex++] = block[i * jWidth + jColumn];
1116                }
1117            }
1118    
1119            return new ArrayRealVector(outData, false);
1120    
1121        }
1122    
1123        /** {@inheritDoc} */
1124        @Override
1125        public void setColumnVector(final int column, final RealVector vector)
1126            throws MatrixIndexException, InvalidMatrixException {
1127            try {
1128                setColumn(column, ((ArrayRealVector) vector).getDataRef());
1129            } catch (ClassCastException cce) {
1130                super.setColumnVector(column, vector);
1131            }
1132        }
1133    
1134        /** {@inheritDoc} */
1135        @Override
1136        public double[] getRow(final int row)
1137            throws MatrixIndexException {
1138    
1139            MatrixUtils.checkRowIndex(this, row);
1140            final double[] out = new double[columns];
1141    
1142            // perform copy block-wise, to ensure good cache behavior
1143            final int iBlock  = row / BLOCK_SIZE;
1144            final int iRow    = row - iBlock * BLOCK_SIZE;
1145            int outIndex      = 0;
1146            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1147                final int jWidth     = blockWidth(jBlock);
1148                final double[] block = blocks[iBlock * blockColumns + jBlock];
1149                System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1150                outIndex += jWidth;
1151            }
1152    
1153            return out;
1154    
1155        }
1156    
1157        /** {@inheritDoc} */
1158        @Override
1159        public void setRow(final int row, final double[] array)
1160            throws MatrixIndexException, InvalidMatrixException {
1161    
1162            MatrixUtils.checkRowIndex(this, row);
1163            final int nCols = getColumnDimension();
1164            if (array.length != nCols) {
1165                throw new InvalidMatrixException(
1166                        LocalizedFormats.DIMENSIONS_MISMATCH_2x2,
1167                        1, array.length, 1, nCols);
1168            }
1169    
1170            // perform copy block-wise, to ensure good cache behavior
1171            final int iBlock  = row / BLOCK_SIZE;
1172            final int iRow    = row - iBlock * BLOCK_SIZE;
1173            int outIndex      = 0;
1174            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1175                final int jWidth     = blockWidth(jBlock);
1176                final double[] block = blocks[iBlock * blockColumns + jBlock];
1177                System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1178                outIndex += jWidth;
1179            }
1180    
1181        }
1182    
1183        /** {@inheritDoc} */
1184        @Override
1185        public double[] getColumn(final int column)
1186            throws MatrixIndexException {
1187    
1188            MatrixUtils.checkColumnIndex(this, column);
1189            final double[] out = new double[rows];
1190    
1191            // perform copy block-wise, to ensure good cache behavior
1192            final int jBlock  = column / BLOCK_SIZE;
1193            final int jColumn = column - jBlock * BLOCK_SIZE;
1194            final int jWidth  = blockWidth(jBlock);
1195            int outIndex      = 0;
1196            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1197                final int iHeight = blockHeight(iBlock);
1198                final double[] block = blocks[iBlock * blockColumns + jBlock];
1199                for (int i = 0; i < iHeight; ++i) {
1200                    out[outIndex++] = block[i * jWidth + jColumn];
1201                }
1202            }
1203    
1204            return out;
1205    
1206        }
1207    
1208        /** {@inheritDoc} */
1209        @Override
1210        public void setColumn(final int column, final double[] array)
1211            throws MatrixIndexException, InvalidMatrixException {
1212    
1213            MatrixUtils.checkColumnIndex(this, column);
1214            final int nRows = getRowDimension();
1215            if (array.length != nRows) {
1216                throw new InvalidMatrixException(
1217                        LocalizedFormats.DIMENSIONS_MISMATCH_2x2,
1218                        array.length, 1, nRows, 1);
1219            }
1220    
1221            // perform copy block-wise, to ensure good cache behavior
1222            final int jBlock  = column / BLOCK_SIZE;
1223            final int jColumn = column - jBlock * BLOCK_SIZE;
1224            final int jWidth  = blockWidth(jBlock);
1225            int outIndex      = 0;
1226            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1227                final int iHeight = blockHeight(iBlock);
1228                final double[] block = blocks[iBlock * blockColumns + jBlock];
1229                for (int i = 0; i < iHeight; ++i) {
1230                    block[i * jWidth + jColumn] = array[outIndex++];
1231                }
1232            }
1233    
1234        }
1235    
1236        /** {@inheritDoc} */
1237        @Override
1238        public double getEntry(final int row, final int column)
1239            throws MatrixIndexException {
1240            try {
1241                final int iBlock = row    / BLOCK_SIZE;
1242                final int jBlock = column / BLOCK_SIZE;
1243                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1244                                   (column - jBlock * BLOCK_SIZE);
1245                return blocks[iBlock * blockColumns + jBlock][k];
1246            } catch (ArrayIndexOutOfBoundsException e) {
1247                throw new MatrixIndexException(
1248                        LocalizedFormats.NO_SUCH_MATRIX_ENTRY,
1249                        row, column, getRowDimension(), getColumnDimension());
1250            }
1251        }
1252    
1253        /** {@inheritDoc} */
1254        @Override
1255        public void setEntry(final int row, final int column, final double value)
1256            throws MatrixIndexException {
1257            try {
1258                final int iBlock = row    / BLOCK_SIZE;
1259                final int jBlock = column / BLOCK_SIZE;
1260                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1261                                   (column - jBlock * BLOCK_SIZE);
1262                blocks[iBlock * blockColumns + jBlock][k] = value;
1263            } catch (ArrayIndexOutOfBoundsException e) {
1264                throw new MatrixIndexException(
1265                        LocalizedFormats.NO_SUCH_MATRIX_ENTRY,
1266                        row, column, getRowDimension(), getColumnDimension());
1267            }
1268        }
1269    
1270        /** {@inheritDoc} */
1271        @Override
1272        public void addToEntry(final int row, final int column, final double increment)
1273            throws MatrixIndexException {
1274            try {
1275                final int iBlock = row    / BLOCK_SIZE;
1276                final int jBlock = column / BLOCK_SIZE;
1277                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1278                                   (column - jBlock * BLOCK_SIZE);
1279                blocks[iBlock * blockColumns + jBlock][k] += increment;
1280            } catch (ArrayIndexOutOfBoundsException e) {
1281                throw new MatrixIndexException(
1282                        LocalizedFormats.NO_SUCH_MATRIX_ENTRY,
1283                        row, column, getRowDimension(), getColumnDimension());
1284            }
1285        }
1286    
1287        /** {@inheritDoc} */
1288        @Override
1289        public void multiplyEntry(final int row, final int column, final double factor)
1290            throws MatrixIndexException {
1291            try {
1292                final int iBlock = row    / BLOCK_SIZE;
1293                final int jBlock = column / BLOCK_SIZE;
1294                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1295                                   (column - jBlock * BLOCK_SIZE);
1296                blocks[iBlock * blockColumns + jBlock][k] *= factor;
1297            } catch (ArrayIndexOutOfBoundsException e) {
1298                throw new MatrixIndexException(
1299                        LocalizedFormats.NO_SUCH_MATRIX_ENTRY,
1300                        row, column, getRowDimension(), getColumnDimension());
1301            }
1302        }
1303    
1304        /** {@inheritDoc} */
1305        @Override
1306        public BlockRealMatrix transpose() {
1307    
1308            final int nRows = getRowDimension();
1309            final int nCols = getColumnDimension();
1310            final BlockRealMatrix out = new BlockRealMatrix(nCols, nRows);
1311    
1312            // perform transpose block-wise, to ensure good cache behavior
1313            int blockIndex = 0;
1314            for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1315                for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1316    
1317                    // transpose current block
1318                    final double[] outBlock = out.blocks[blockIndex];
1319                    final double[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1320                    final int      pStart   = iBlock * BLOCK_SIZE;
1321                    final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, columns);
1322                    final int      qStart   = jBlock * BLOCK_SIZE;
1323                    final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, rows);
1324                    int k = 0;
1325                    for (int p = pStart; p < pEnd; ++p) {
1326                        final int lInc = pEnd - pStart;
1327                        int l = p - pStart;
1328                        for (int q = qStart; q < qEnd; ++q) {
1329                            outBlock[k] = tBlock[l];
1330                            ++k;
1331                            l+= lInc;
1332                        }
1333                    }
1334    
1335                    // go to next block
1336                    ++blockIndex;
1337    
1338                }
1339            }
1340    
1341            return out;
1342    
1343        }
1344    
1345        /** {@inheritDoc} */
1346        @Override
1347        public int getRowDimension() {
1348            return rows;
1349        }
1350    
1351        /** {@inheritDoc} */
1352        @Override
1353        public int getColumnDimension() {
1354            return columns;
1355        }
1356    
1357        /** {@inheritDoc} */
1358        @Override
1359        public double[] operate(final double[] v)
1360            throws IllegalArgumentException {
1361    
1362            if (v.length != columns) {
1363                throw MathRuntimeException.createIllegalArgumentException(
1364                        LocalizedFormats.VECTOR_LENGTH_MISMATCH,
1365                        v.length, columns);
1366            }
1367            final double[] out = new double[rows];
1368    
1369            // perform multiplication block-wise, to ensure good cache behavior
1370            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1371                final int pStart = iBlock * BLOCK_SIZE;
1372                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1373                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1374                    final double[] block  = blocks[iBlock * blockColumns + jBlock];
1375                    final int      qStart = jBlock * BLOCK_SIZE;
1376                    final int      qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1377                    int k = 0;
1378                    for (int p = pStart; p < pEnd; ++p) {
1379                        double sum = 0;
1380                        int q = qStart;
1381                        while (q < qEnd - 3) {
1382                            sum += block[k]     * v[q]     +
1383                                   block[k + 1] * v[q + 1] +
1384                                   block[k + 2] * v[q + 2] +
1385                                   block[k + 3] * v[q + 3];
1386                            k += 4;
1387                            q += 4;
1388                        }
1389                        while (q < qEnd) {
1390                            sum += block[k++] * v[q++];
1391                        }
1392                        out[p] += sum;
1393                    }
1394                }
1395            }
1396    
1397            return out;
1398    
1399        }
1400    
1401        /** {@inheritDoc} */
1402        @Override
1403        public double[] preMultiply(final double[] v)
1404            throws IllegalArgumentException {
1405    
1406            if (v.length != rows) {
1407                throw MathRuntimeException.createIllegalArgumentException(
1408                        LocalizedFormats.VECTOR_LENGTH_MISMATCH,
1409                        v.length, rows);
1410            }
1411            final double[] out = new double[columns];
1412    
1413            // perform multiplication block-wise, to ensure good cache behavior
1414            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1415                final int jWidth  = blockWidth(jBlock);
1416                final int jWidth2 = jWidth  + jWidth;
1417                final int jWidth3 = jWidth2 + jWidth;
1418                final int jWidth4 = jWidth3 + jWidth;
1419                final int qStart = jBlock * BLOCK_SIZE;
1420                final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1421                for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1422                    final double[] block  = blocks[iBlock * blockColumns + jBlock];
1423                    final int      pStart = iBlock * BLOCK_SIZE;
1424                    final int      pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1425                    for (int q = qStart; q < qEnd; ++q) {
1426                        int k = q - qStart;
1427                        double sum = 0;
1428                        int p = pStart;
1429                        while (p < pEnd - 3) {
1430                            sum += block[k]           * v[p]     +
1431                                   block[k + jWidth]  * v[p + 1] +
1432                                   block[k + jWidth2] * v[p + 2] +
1433                                   block[k + jWidth3] * v[p + 3];
1434                            k += jWidth4;
1435                            p += 4;
1436                        }
1437                        while (p < pEnd) {
1438                            sum += block[k] * v[p++];
1439                            k += jWidth;
1440                        }
1441                        out[q] += sum;
1442                    }
1443                }
1444            }
1445    
1446            return out;
1447    
1448        }
1449    
1450        /** {@inheritDoc} */
1451        @Override
1452        public double walkInRowOrder(final RealMatrixChangingVisitor visitor)
1453            throws MatrixVisitorException {
1454            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1455            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1456                final int pStart = iBlock * BLOCK_SIZE;
1457                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1458                for (int p = pStart; p < pEnd; ++p) {
1459                    for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1460                        final int jWidth = blockWidth(jBlock);
1461                        final int qStart = jBlock * BLOCK_SIZE;
1462                        final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1463                        final double[] block = blocks[iBlock * blockColumns + jBlock];
1464                        int k = (p - pStart) * jWidth;
1465                        for (int q = qStart; q < qEnd; ++q) {
1466                            block[k] = visitor.visit(p, q, block[k]);
1467                            ++k;
1468                        }
1469                    }
1470                 }
1471            }
1472            return visitor.end();
1473        }
1474    
1475        /** {@inheritDoc} */
1476        @Override
1477        public double walkInRowOrder(final RealMatrixPreservingVisitor visitor)
1478            throws MatrixVisitorException {
1479            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1480            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1481                final int pStart = iBlock * BLOCK_SIZE;
1482                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1483                for (int p = pStart; p < pEnd; ++p) {
1484                    for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1485                        final int jWidth = blockWidth(jBlock);
1486                        final int qStart = jBlock * BLOCK_SIZE;
1487                        final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1488                        final double[] block = blocks[iBlock * blockColumns + jBlock];
1489                        int k = (p - pStart) * jWidth;
1490                        for (int q = qStart; q < qEnd; ++q) {
1491                            visitor.visit(p, q, block[k]);
1492                            ++k;
1493                        }
1494                    }
1495                 }
1496            }
1497            return visitor.end();
1498        }
1499    
1500        /** {@inheritDoc} */
1501        @Override
1502        public double walkInRowOrder(final RealMatrixChangingVisitor visitor,
1503                                     final int startRow, final int endRow,
1504                                     final int startColumn, final int endColumn)
1505            throws MatrixIndexException, MatrixVisitorException {
1506            MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1507            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1508            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1509                final int p0     = iBlock * BLOCK_SIZE;
1510                final int pStart = FastMath.max(startRow, p0);
1511                final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1512                for (int p = pStart; p < pEnd; ++p) {
1513                    for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1514                        final int jWidth = blockWidth(jBlock);
1515                        final int q0     = jBlock * BLOCK_SIZE;
1516                        final int qStart = FastMath.max(startColumn, q0);
1517                        final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1518                        final double[] block = blocks[iBlock * blockColumns + jBlock];
1519                        int k = (p - p0) * jWidth + qStart - q0;
1520                        for (int q = qStart; q < qEnd; ++q) {
1521                            block[k] = visitor.visit(p, q, block[k]);
1522                            ++k;
1523                        }
1524                    }
1525                 }
1526            }
1527            return visitor.end();
1528        }
1529    
1530        /** {@inheritDoc} */
1531        @Override
1532        public double walkInRowOrder(final RealMatrixPreservingVisitor visitor,
1533                                     final int startRow, final int endRow,
1534                                     final int startColumn, final int endColumn)
1535            throws MatrixIndexException, MatrixVisitorException {
1536            MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1537            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1538            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1539                final int p0     = iBlock * BLOCK_SIZE;
1540                final int pStart = FastMath.max(startRow, p0);
1541                final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1542                for (int p = pStart; p < pEnd; ++p) {
1543                    for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1544                        final int jWidth = blockWidth(jBlock);
1545                        final int q0     = jBlock * BLOCK_SIZE;
1546                        final int qStart = FastMath.max(startColumn, q0);
1547                        final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1548                        final double[] block = blocks[iBlock * blockColumns + jBlock];
1549                        int k = (p - p0) * jWidth + qStart - q0;
1550                        for (int q = qStart; q < qEnd; ++q) {
1551                            visitor.visit(p, q, block[k]);
1552                            ++k;
1553                        }
1554                    }
1555                 }
1556            }
1557            return visitor.end();
1558        }
1559    
1560        /** {@inheritDoc} */
1561        @Override
1562        public double walkInOptimizedOrder(final RealMatrixChangingVisitor visitor)
1563            throws MatrixVisitorException {
1564            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1565            int blockIndex = 0;
1566            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1567                final int pStart = iBlock * BLOCK_SIZE;
1568                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1569                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1570                    final int qStart = jBlock * BLOCK_SIZE;
1571                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1572                    final double[] block = blocks[blockIndex];
1573                    int k = 0;
1574                    for (int p = pStart; p < pEnd; ++p) {
1575                        for (int q = qStart; q < qEnd; ++q) {
1576                            block[k] = visitor.visit(p, q, block[k]);
1577                            ++k;
1578                        }
1579                    }
1580                    ++blockIndex;
1581                }
1582            }
1583            return visitor.end();
1584        }
1585    
1586        /** {@inheritDoc} */
1587        @Override
1588        public double walkInOptimizedOrder(final RealMatrixPreservingVisitor visitor)
1589            throws MatrixVisitorException {
1590            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1591            int blockIndex = 0;
1592            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1593                final int pStart = iBlock * BLOCK_SIZE;
1594                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1595                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1596                    final int qStart = jBlock * BLOCK_SIZE;
1597                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1598                    final double[] block = blocks[blockIndex];
1599                    int k = 0;
1600                    for (int p = pStart; p < pEnd; ++p) {
1601                        for (int q = qStart; q < qEnd; ++q) {
1602                            visitor.visit(p, q, block[k]);
1603                            ++k;
1604                        }
1605                    }
1606                    ++blockIndex;
1607                }
1608            }
1609            return visitor.end();
1610        }
1611    
1612        /** {@inheritDoc} */
1613        @Override
1614        public double walkInOptimizedOrder(final RealMatrixChangingVisitor visitor,
1615                                           final int startRow, final int endRow,
1616                                           final int startColumn, final int endColumn)
1617            throws MatrixIndexException, MatrixVisitorException {
1618            MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1619            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1620            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1621                final int p0     = iBlock * BLOCK_SIZE;
1622                final int pStart = FastMath.max(startRow, p0);
1623                final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1624                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1625                    final int jWidth = blockWidth(jBlock);
1626                    final int q0     = jBlock * BLOCK_SIZE;
1627                    final int qStart = FastMath.max(startColumn, q0);
1628                    final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1629                    final double[] block = blocks[iBlock * blockColumns + jBlock];
1630                    for (int p = pStart; p < pEnd; ++p) {
1631                        int k = (p - p0) * jWidth + qStart - q0;
1632                        for (int q = qStart; q < qEnd; ++q) {
1633                            block[k] = visitor.visit(p, q, block[k]);
1634                            ++k;
1635                        }
1636                    }
1637                }
1638            }
1639            return visitor.end();
1640        }
1641    
1642        /** {@inheritDoc} */
1643        @Override
1644        public double walkInOptimizedOrder(final RealMatrixPreservingVisitor visitor,
1645                                           final int startRow, final int endRow,
1646                                           final int startColumn, final int endColumn)
1647            throws MatrixIndexException, MatrixVisitorException {
1648            MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1649            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1650            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1651                final int p0     = iBlock * BLOCK_SIZE;
1652                final int pStart = FastMath.max(startRow, p0);
1653                final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1654                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1655                    final int jWidth = blockWidth(jBlock);
1656                    final int q0     = jBlock * BLOCK_SIZE;
1657                    final int qStart = FastMath.max(startColumn, q0);
1658                    final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1659                    final double[] block = blocks[iBlock * blockColumns + jBlock];
1660                    for (int p = pStart; p < pEnd; ++p) {
1661                        int k = (p - p0) * jWidth + qStart - q0;
1662                        for (int q = qStart; q < qEnd; ++q) {
1663                            visitor.visit(p, q, block[k]);
1664                            ++k;
1665                        }
1666                    }
1667                }
1668            }
1669            return visitor.end();
1670        }
1671    
1672        /**
1673         * Get the height of a block.
1674         * @param blockRow row index (in block sense) of the block
1675         * @return height (number of rows) of the block
1676         */
1677        private int blockHeight(final int blockRow) {
1678            return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1679        }
1680    
1681        /**
1682         * Get the width of a block.
1683         * @param blockColumn column index (in block sense) of the block
1684         * @return width (number of columns) of the block
1685         */
1686        private int blockWidth(final int blockColumn) {
1687            return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1688        }
1689    
1690    }