From 6c4f7a41b66995a660964e5854f93ad5963618a3 Mon Sep 17 00:00:00 2001 From: Sameer Abhyankar Date: Tue, 28 Jul 2015 10:29:24 -0700 Subject: [PATCH 1/6] Add @since tag to mllib.linalg.* --- .../apache/spark/mllib/linalg/Matrices.scala | 86 +++++++++++++++++-- .../linalg/SingularValueDecomposition.scala | 1 + .../apache/spark/mllib/linalg/Vectors.scala | 62 +++++++++++++ .../linalg/distributed/BlockMatrix.scala | 43 ++++++++-- .../linalg/distributed/CoordinateMatrix.scala | 28 ++++-- .../distributed/DistributedMatrix.scala | 1 + .../linalg/distributed/IndexedRowMatrix.scala | 24 +++++- .../mllib/linalg/distributed/RowMatrix.scala | 25 +++++- 8 files changed, 246 insertions(+), 24 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index d82ba2456df1a..1ea27e2495782 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -30,6 +30,7 @@ import org.apache.spark.sql.types._ /** * Trait for a local matrix. + * @since 1.2.0 */ @SQLUserDefinedType(udt = classOf[MatrixUDT]) sealed trait Matrix extends Serializable { @@ -40,10 +41,14 @@ sealed trait Matrix extends Serializable { /** Number of columns. */ def numCols: Int - /** Flag that keeps track whether the matrix is transposed or not. False by default. */ + /** Flag that keeps track whether the matrix is transposed or not. False by default. + * @since 1.3.0 + */ val isTransposed: Boolean = false - /** Converts to a dense array in column major. */ + /** Converts to a dense array in column major. + * @since 1.2.0 + */ def toArray: Array[Double] = { val newArray = new Array[Double](numRows * numCols) foreachActive { (i, j, v) => @@ -70,29 +75,39 @@ sealed trait Matrix extends Serializable { /** Transpose the Matrix. Returns a new `Matrix` instance sharing the same underlying data. */ def transpose: Matrix - /** Convenience method for `Matrix`-`DenseMatrix` multiplication. */ + /** Convenience method for `Matrix`-`DenseMatrix` multiplication. + * @since 1.2.0 + */ def multiply(y: DenseMatrix): DenseMatrix = { val C: DenseMatrix = DenseMatrix.zeros(numRows, y.numCols) BLAS.gemm(1.0, this, y, 0.0, C) C } - /** Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility. */ + /** Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility. + * @since 1.2.0 + */ def multiply(y: DenseVector): DenseVector = { multiply(y.asInstanceOf[Vector]) } - /** Convenience method for `Matrix`-`Vector` multiplication. */ + /** Convenience method for `Matrix`-`Vector` multiplication. + * @since 1.4.0 + */ def multiply(y: Vector): DenseVector = { val output = new DenseVector(new Array[Double](numRows)) BLAS.gemv(1.0, this, y, 0.0, output) output } - /** A human readable representation of the matrix */ + /** A human readable representation of the matrix + * @since 1.0.0 + */ override def toString: String = toBreeze.toString() - /** A human readable representation of the matrix with maximum lines and width */ + /** A human readable representation of the matrix with maximum lines and width + * @since 1.4.0 + */ def toString(maxLines: Int, maxLineWidth: Int): String = toBreeze.toString(maxLines, maxLineWidth) /** Map the values of this matrix using a function. Generates a new matrix. Performs the @@ -229,6 +244,7 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] { * @param values matrix entries in column major if not transposed or in row major otherwise * @param isTransposed whether the matrix is transposed. If true, `values` stores the matrix in * row major. + * @since 1.0.0 */ @SQLUserDefinedType(udt = classOf[MatrixUDT]) class DenseMatrix( @@ -254,16 +270,23 @@ class DenseMatrix( * @param numRows number of rows * @param numCols number of columns * @param values matrix entries in column major + * @since 1.3.0 */ def this(numRows: Int, numCols: Int, values: Array[Double]) = this(numRows, numCols, values, false) + /** + * @since 1.4.0 + */ override def equals(o: Any): Boolean = o match { case m: DenseMatrix => m.numRows == numRows && m.numCols == numCols && Arrays.equals(toArray, m.toArray) case _ => false } + /** + * @since 1.4.0 + */ override def hashCode: Int = { com.google.common.base.Objects.hashCode(numRows : Integer, numCols: Integer, toArray) } @@ -279,6 +302,9 @@ class DenseMatrix( private[mllib] def apply(i: Int): Double = values(i) + /** + * @since 1.3.0 + */ override def apply(i: Int, j: Int): Double = values(index(i, j)) private[mllib] def index(i: Int, j: Int): Int = { @@ -289,6 +315,9 @@ class DenseMatrix( values(index(i, j)) = v } + /** + * @since 1.4.0 + */ override def copy: DenseMatrix = new DenseMatrix(numRows, numCols, values.clone()) private[spark] def map(f: Double => Double) = new DenseMatrix(numRows, numCols, values.map(f), @@ -304,6 +333,9 @@ class DenseMatrix( this } + /** + * @since 1.3.0 + */ override def transpose: DenseMatrix = new DenseMatrix(numCols, numRows, values, !isTransposed) private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = { @@ -341,6 +373,7 @@ class DenseMatrix( /** * Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed * set to false. + * @since 1.3.0 */ def toSparse: SparseMatrix = { val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble @@ -368,6 +401,7 @@ class DenseMatrix( /** * Factory methods for [[org.apache.spark.mllib.linalg.DenseMatrix]]. + * @since 1.3.0 */ object DenseMatrix { @@ -376,6 +410,7 @@ object DenseMatrix { * @param numRows number of rows of the matrix * @param numCols number of columns of the matrix * @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros + * @since 1.2.0 */ def zeros(numRows: Int, numCols: Int): DenseMatrix = { require(numRows.toLong * numCols <= Int.MaxValue, @@ -388,6 +423,7 @@ object DenseMatrix { * @param numRows number of rows of the matrix * @param numCols number of columns of the matrix * @return `DenseMatrix` with size `numRows` x `numCols` and values of ones + * @since 1.2.0 */ def ones(numRows: Int, numCols: Int): DenseMatrix = { require(numRows.toLong * numCols <= Int.MaxValue, @@ -399,6 +435,7 @@ object DenseMatrix { * Generate an Identity Matrix in `DenseMatrix` format. * @param n number of rows and columns of the matrix * @return `DenseMatrix` with size `n` x `n` and values of ones on the diagonal + * @since 1.2.0 */ def eye(n: Int): DenseMatrix = { val identity = DenseMatrix.zeros(n, n) @@ -416,6 +453,7 @@ object DenseMatrix { * @param numCols number of columns of the matrix * @param rng a random number generator * @return `DenseMatrix` with size `numRows` x `numCols` and values in U(0, 1) + * @since 1.2.0 */ def rand(numRows: Int, numCols: Int, rng: Random): DenseMatrix = { require(numRows.toLong * numCols <= Int.MaxValue, @@ -429,6 +467,7 @@ object DenseMatrix { * @param numCols number of columns of the matrix * @param rng a random number generator * @return `DenseMatrix` with size `numRows` x `numCols` and values in N(0, 1) + * @since 1.2.0 */ def randn(numRows: Int, numCols: Int, rng: Random): DenseMatrix = { require(numRows.toLong * numCols <= Int.MaxValue, @@ -441,6 +480,7 @@ object DenseMatrix { * @param vector a `Vector` that will form the values on the diagonal of the matrix * @return Square `DenseMatrix` with size `values.length` x `values.length` and `values` * on the diagonal + * @since 1.2.0 */ def diag(vector: Vector): DenseMatrix = { val n = vector.size @@ -476,6 +516,7 @@ object DenseMatrix { * @param isTransposed whether the matrix is transposed. If true, the matrix can be considered * Compressed Sparse Row (CSR) format, where `colPtrs` behaves as rowPtrs, * and `rowIndices` behave as colIndices, and `values` are stored in row major. + * @since 1.2.0 */ @SQLUserDefinedType(udt = classOf[MatrixUDT]) class SparseMatrix( @@ -513,6 +554,7 @@ class SparseMatrix( * @param rowIndices the row index of the entry. They must be in strictly increasing * order for each column * @param values non-zero matrix entries in column major + * @since 1.3.0 */ def this( numRows: Int, @@ -530,6 +572,9 @@ class SparseMatrix( } } + /** + * @since 1.3.0 + */ override def apply(i: Int, j: Int): Double = { val ind = index(i, j) if (ind < 0) 0.0 else values(ind) @@ -553,6 +598,9 @@ class SparseMatrix( } } + /** + * @since 1.4.0 + */ override def copy: SparseMatrix = { new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.clone()) } @@ -570,6 +618,9 @@ class SparseMatrix( this } + /** + * @since 1.3.0 + */ override def transpose: SparseMatrix = new SparseMatrix(numCols, numRows, colPtrs, rowIndices, values, !isTransposed) @@ -603,6 +654,7 @@ class SparseMatrix( /** * Generate a `DenseMatrix` from the given `SparseMatrix`. The new matrix will have isTransposed * set to false. + * @since 1.3.0 */ def toDense: DenseMatrix = { new DenseMatrix(numRows, numCols, toArray) @@ -616,6 +668,7 @@ class SparseMatrix( /** * Factory methods for [[org.apache.spark.mllib.linalg.SparseMatrix]]. + * @since 1.3.0 */ object SparseMatrix { @@ -627,6 +680,7 @@ object SparseMatrix { * @param numCols number of columns of the matrix * @param entries Array of (i, j, value) tuples * @return The corresponding `SparseMatrix` + * @since 1.3.0 */ def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix = { val sortedEntries = entries.toSeq.sortBy(v => (v._2, v._1)) @@ -675,6 +729,7 @@ object SparseMatrix { * Generate an Identity Matrix in `SparseMatrix` format. * @param n number of rows and columns of the matrix * @return `SparseMatrix` with size `n` x `n` and values of ones on the diagonal + * @since 1.3.0 */ def speye(n: Int): SparseMatrix = { new SparseMatrix(n, n, (0 to n).toArray, (0 until n).toArray, Array.fill(n)(1.0)) @@ -744,6 +799,7 @@ object SparseMatrix { * @param density the desired density for the matrix * @param rng a random number generator * @return `SparseMatrix` with size `numRows` x `numCols` and values in U(0, 1) + * @since 1.3.0 */ def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = { val mat = genRandMatrix(numRows, numCols, density, rng) @@ -757,6 +813,7 @@ object SparseMatrix { * @param density the desired density for the matrix * @param rng a random number generator * @return `SparseMatrix` with size `numRows` x `numCols` and values in N(0, 1) + * @since 1.3.0 */ def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = { val mat = genRandMatrix(numRows, numCols, density, rng) @@ -768,6 +825,7 @@ object SparseMatrix { * @param vector a `Vector` that will form the values on the diagonal of the matrix * @return Square `SparseMatrix` with size `values.length` x `values.length` and non-zero * `values` on the diagonal + * @since 1.3.0 */ def spdiag(vector: Vector): SparseMatrix = { val n = vector.size @@ -784,6 +842,7 @@ object SparseMatrix { /** * Factory methods for [[org.apache.spark.mllib.linalg.Matrix]]. + * @since 1.0.0 */ object Matrices { @@ -793,6 +852,7 @@ object Matrices { * @param numRows number of rows * @param numCols number of columns * @param values matrix entries in column major + * @since 1.0.0 */ def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix = { new DenseMatrix(numRows, numCols, values) @@ -806,6 +866,7 @@ object Matrices { * @param colPtrs the index corresponding to the start of a new column * @param rowIndices the row index of the entry * @param values non-zero matrix entries in column major + * @since 1.2.0 */ def sparse( numRows: Int, @@ -839,6 +900,7 @@ object Matrices { * @param numRows number of rows of the matrix * @param numCols number of columns of the matrix * @return `Matrix` with size `numRows` x `numCols` and values of zeros + * @since 1.3.0 */ def zeros(numRows: Int, numCols: Int): Matrix = DenseMatrix.zeros(numRows, numCols) @@ -847,6 +909,7 @@ object Matrices { * @param numRows number of rows of the matrix * @param numCols number of columns of the matrix * @return `Matrix` with size `numRows` x `numCols` and values of ones + * @since 1.3.0 */ def ones(numRows: Int, numCols: Int): Matrix = DenseMatrix.ones(numRows, numCols) @@ -854,6 +917,7 @@ object Matrices { * Generate a dense Identity Matrix in `Matrix` format. * @param n number of rows and columns of the matrix * @return `Matrix` with size `n` x `n` and values of ones on the diagonal + * @since 1.3.0 */ def eye(n: Int): Matrix = DenseMatrix.eye(n) @@ -861,6 +925,7 @@ object Matrices { * Generate a sparse Identity Matrix in `Matrix` format. * @param n number of rows and columns of the matrix * @return `Matrix` with size `n` x `n` and values of ones on the diagonal + * @since 1.3.0 */ def speye(n: Int): Matrix = SparseMatrix.speye(n) @@ -870,6 +935,7 @@ object Matrices { * @param numCols number of columns of the matrix * @param rng a random number generator * @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1) + * @since 1.2.0 */ def rand(numRows: Int, numCols: Int, rng: Random): Matrix = DenseMatrix.rand(numRows, numCols, rng) @@ -881,6 +947,7 @@ object Matrices { * @param density the desired density for the matrix * @param rng a random number generator * @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1) + * @since 1.3.0 */ def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix = SparseMatrix.sprand(numRows, numCols, density, rng) @@ -891,6 +958,7 @@ object Matrices { * @param numCols number of columns of the matrix * @param rng a random number generator * @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1) + * @since 1.2.0 */ def randn(numRows: Int, numCols: Int, rng: Random): Matrix = DenseMatrix.randn(numRows, numCols, rng) @@ -902,6 +970,7 @@ object Matrices { * @param density the desired density for the matrix * @param rng a random number generator * @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1) + * @since 1.3.0 */ def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix = SparseMatrix.sprandn(numRows, numCols, density, rng) @@ -911,6 +980,7 @@ object Matrices { * @param vector a `Vector` that will form the values on the diagonal of the matrix * @return Square `Matrix` with size `values.length` x `values.length` and `values` * on the diagonal + * @since 1.3.0 */ def diag(vector: Vector): Matrix = DenseMatrix.diag(vector) @@ -920,6 +990,7 @@ object Matrices { * a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned. * @param matrices array of matrices * @return a single `Matrix` composed of the matrices that were horizontally concatenated + * @since 1.3.0 */ def horzcat(matrices: Array[Matrix]): Matrix = { if (matrices.isEmpty) { @@ -978,6 +1049,7 @@ object Matrices { * a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned. * @param matrices array of matrices * @return a single `Matrix` composed of the matrices that were vertically concatenated + * @since 1.3.0 */ def vertcat(matrices: Array[Matrix]): Matrix = { if (matrices.isEmpty) { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala index 9669c364bad8f..ede99ed54a1e5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala @@ -22,6 +22,7 @@ import org.apache.spark.annotation.Experimental /** * :: Experimental :: * Represents singular value decomposition (SVD) factors. + * @since 1.0.0 */ @Experimental case class SingularValueDecomposition[UType, VType](U: UType, s: Vector, V: VType) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 0cb28d78bec05..3bdc18451f071 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -36,6 +36,7 @@ import org.apache.spark.sql.types._ * Represents a numeric vector, whose index type is Int and value type is Double. * * Note: Users should not implement this interface. + * @since 1.2.0 */ @SQLUserDefinedType(udt = classOf[VectorUDT]) sealed trait Vector extends Serializable { @@ -50,6 +51,9 @@ sealed trait Vector extends Serializable { */ def toArray: Array[Double] + /** + * @since 1.0.0 + */ override def equals(other: Any): Boolean = { other match { case v2: Vector => @@ -70,6 +74,7 @@ sealed trait Vector extends Serializable { /** * Returns a hash code value for the vector. The hash code is based on its size and its nonzeros * in the first 16 entries, using a hash algorithm similar to [[java.util.Arrays.hashCode]]. + * @since 1.2.2 */ override def hashCode(): Int = { // This is a reference implementation. It calls return in foreachActive, which is slow. @@ -98,11 +103,13 @@ sealed trait Vector extends Serializable { /** * Gets the value of the ith element. * @param i index + * @since 1.0.0 */ def apply(i: Int): Double = toBreeze(i) /** * Makes a deep copy of this vector. + * @since 1.1.0 */ def copy: Vector = { throw new NotImplementedError(s"copy is not implemented for ${this.getClass}.") @@ -140,6 +147,7 @@ sealed trait Vector extends Serializable { /** * Returns a vector in either dense or sparse format, whichever uses less storage. + * @since 1.4.0 */ def compressed: Vector = { val nnz = numNonzeros @@ -245,11 +253,13 @@ private[spark] class VectorUDT extends UserDefinedType[Vector] { * Factory methods for [[org.apache.spark.mllib.linalg.Vector]]. * We don't use the name `Vector` because Scala imports * [[scala.collection.immutable.Vector]] by default. + * @since 1.0.0 */ object Vectors { /** * Creates a dense vector from its values. + * @since 1.0.0 */ @varargs def dense(firstValue: Double, otherValues: Double*): Vector = @@ -258,6 +268,7 @@ object Vectors { // A dummy implicit is used to avoid signature collision with the one generated by @varargs. /** * Creates a dense vector from a double array. + * @since 1.0.0 */ def dense(values: Array[Double]): Vector = new DenseVector(values) @@ -267,6 +278,7 @@ object Vectors { * @param size vector size. * @param indices index array, must be strictly increasing. * @param values value array, must have the same length as indices. + * @since 1.0.0 */ def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector = new SparseVector(size, indices, values) @@ -276,6 +288,7 @@ object Vectors { * * @param size vector size. * @param elements vector elements in (index, value) pairs. + * @since 1.0.0 */ def sparse(size: Int, elements: Seq[(Int, Double)]): Vector = { require(size > 0, "The size of the requested sparse vector must be greater than 0.") @@ -297,6 +310,7 @@ object Vectors { * * @param size vector size. * @param elements vector elements in (index, value) pairs. + * @since 1.0.0 */ def sparse(size: Int, elements: JavaIterable[(JavaInteger, JavaDouble)]): Vector = { sparse(size, elements.asScala.map { case (i, x) => @@ -309,6 +323,7 @@ object Vectors { * * @param size vector size * @return a zero vector + * @since 1.1.0 */ def zeros(size: Int): Vector = { new DenseVector(new Array[Double](size)) @@ -316,6 +331,7 @@ object Vectors { /** * Parses a string resulted from [[Vector.toString]] into a [[Vector]]. + * @since 1.1.0 */ def parse(s: String): Vector = { parseNumeric(NumericParser.parse(s)) @@ -359,6 +375,7 @@ object Vectors { * @param vector input vector. * @param p norm. * @return norm in L^p^ space. + * @since 1.2.0 */ def norm(vector: Vector, p: Double): Double = { require(p >= 1.0, "To compute the p-norm of the vector, we require that you specify a p>=1. " + @@ -411,6 +428,7 @@ object Vectors { * @param v1 first Vector. * @param v2 second Vector. * @return squared distance between two Vectors. + * @since 1.3.0 */ def sqdist(v1: Vector, v2: Vector): Double = { require(v1.size == v2.size, s"Vector dimensions do not match: Dim(v1)=${v1.size} and Dim(v2)" + @@ -524,6 +542,7 @@ object Vectors { /** * A dense vector represented by a value array. + * @since 1.0.0 */ @SQLUserDefinedType(udt = classOf[VectorUDT]) class DenseVector(val values: Array[Double]) extends Vector { @@ -553,6 +572,9 @@ class DenseVector(val values: Array[Double]) extends Vector { } } + /** + * @since 1.4.0 + */ override def hashCode(): Int = { var result: Int = 31 + size var i = 0 @@ -571,6 +593,9 @@ class DenseVector(val values: Array[Double]) extends Vector { override def numActives: Int = size + /** + * @since 1.4.0 + */ override def numNonzeros: Int = { // same as values.count(_ != 0.0) but faster var nnz = 0 @@ -582,6 +607,9 @@ class DenseVector(val values: Array[Double]) extends Vector { nnz } + /** + * @since 1.4.0 + */ override def toSparse: SparseVector = { val nnz = numNonzeros val ii = new Array[Int](nnz) @@ -597,6 +625,9 @@ class DenseVector(val values: Array[Double]) extends Vector { new SparseVector(size, ii, vv) } + /** + * @since 1.4.0 + */ override def argmax: Int = { if (size == 0) { -1 @@ -616,6 +647,9 @@ class DenseVector(val values: Array[Double]) extends Vector { } } +/** + * @since 1.3.0 + */ object DenseVector { /** Extracts the value array from a dense vector. */ def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values) @@ -627,6 +661,7 @@ object DenseVector { * @param size size of the vector. * @param indices index array, assume to be strictly increasing. * @param values value array, must have the same length as the index array. + * @since 1.0.0 */ @SQLUserDefinedType(udt = classOf[VectorUDT]) class SparseVector( @@ -638,9 +673,15 @@ class SparseVector( s" indices match the dimension of the values. You provided ${indices.length} indices and " + s" ${values.length} values.") + /** + * @since 1.0.0 + */ override def toString: String = s"($size,${indices.mkString("[", ",", "]")},${values.mkString("[", ",", "]")})" + /** + * @since 1.0.0 + */ override def toArray: Array[Double] = { val data = new Array[Double](size) var i = 0 @@ -652,6 +693,9 @@ class SparseVector( data } + /** + * @since 1.1.0 + */ override def copy: SparseVector = { new SparseVector(size, indices.clone(), values.clone()) } @@ -670,6 +714,9 @@ class SparseVector( } } + /** + * @since 1.2.2 + */ override def hashCode(): Int = { var result: Int = 31 + size val end = values.length @@ -692,8 +739,14 @@ class SparseVector( result } + /** + * @since 1.4.0 + */ override def numActives: Int = values.length + /** + * @since 1.4.0 + */ override def numNonzeros: Int = { var nnz = 0 values.foreach { v => @@ -704,6 +757,9 @@ class SparseVector( nnz } + /** + * @since 1.4.0 + */ override def toSparse: SparseVector = { val nnz = numNonzeros if (nnz == numActives) { @@ -723,6 +779,9 @@ class SparseVector( } } + /** + * @since 1.4.0 + */ override def argmax: Int = { if (size == 0) { -1 @@ -769,6 +828,9 @@ class SparseVector( } } +/** + * @since 1.3.0 + */ object SparseVector { def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] = Some((sv.size, sv.indices, sv.values)) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala index 3323ae7b1fba0..cfb6680a18b34 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala @@ -128,6 +128,8 @@ private[mllib] object GridPartitioner { * the number of rows will be calculated when `numRows` is invoked. * @param nCols Number of columns of this matrix. If the supplied value is less than or equal to * zero, the number of columns will be calculated when `numCols` is invoked. + * @since 1.3.0 + * */ @Experimental class BlockMatrix( @@ -149,6 +151,9 @@ class BlockMatrix( * rows are not required to have the given number of rows * @param colsPerBlock Number of columns that make up each block. The blocks forming the final * columns are not required to have the given number of columns + * + * @since 1.3.0 + * */ def this( blocks: RDD[((Int, Int), Matrix)], @@ -157,11 +162,20 @@ class BlockMatrix( this(blocks, rowsPerBlock, colsPerBlock, 0L, 0L) } + /** + * @since 1.3.0 + * */ + override def numRows(): Long = { if (nRows <= 0L) estimateDim() nRows } + /** + * + * @since 1.3.0 + */ + override def numCols(): Long = { if (nCols <= 0L) estimateDim() nCols @@ -192,6 +206,7 @@ class BlockMatrix( /** * Validates the block matrix info against the matrix data (`blocks`) and throws an exception if * any error is found. + * @since 1.3.0 */ def validate(): Unit = { logDebug("Validating BlockMatrix...") @@ -228,19 +243,25 @@ class BlockMatrix( logDebug("BlockMatrix is valid!") } - /** Caches the underlying RDD. */ + /** Caches the underlying RDD. + * @since 1.3.0 + * */ def cache(): this.type = { blocks.cache() this } - /** Persists the underlying RDD with the specified storage level. */ + /** Persists the underlying RDD with the specified storage level. + * @since 1.3.0 + * */ def persist(storageLevel: StorageLevel): this.type = { blocks.persist(storageLevel) this } - /** Converts to CoordinateMatrix. */ + /** Converts to CoordinateMatrix. + * @since 1.3.0 + * */ def toCoordinateMatrix(): CoordinateMatrix = { val entryRDD = blocks.flatMap { case ((blockRowIndex, blockColIndex), mat) => val rowStart = blockRowIndex.toLong * rowsPerBlock @@ -254,7 +275,9 @@ class BlockMatrix( new CoordinateMatrix(entryRDD, numRows(), numCols()) } - /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */ + /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. + * @since 1.3.0 + * */ def toIndexedRowMatrix(): IndexedRowMatrix = { require(numCols() < Int.MaxValue, "The number of columns must be within the integer range. " + s"numCols: ${numCols()}") @@ -262,7 +285,9 @@ class BlockMatrix( toCoordinateMatrix().toIndexedRowMatrix() } - /** Collect the distributed matrix on the driver as a `DenseMatrix`. */ + /** Collect the distributed matrix on the driver as a `DenseMatrix`. + * @since 1.3.0 + * */ def toLocalMatrix(): Matrix = { require(numRows() < Int.MaxValue, "The number of rows of this matrix should be less than " + s"Int.MaxValue. Currently numRows: ${numRows()}") @@ -288,7 +313,10 @@ class BlockMatrix( } /** Transpose this `BlockMatrix`. Returns a new `BlockMatrix` instance sharing the - * same underlying data. Is a lazy operation. */ + * same underlying data. Is a lazy operation. + * @since 1.3.0 + * + * */ def transpose: BlockMatrix = { val transposedBlocks = blocks.map { case ((blockRowIndex, blockColIndex), mat) => ((blockColIndex, blockRowIndex), mat.transpose) @@ -307,6 +335,7 @@ class BlockMatrix( * instances of [[SparseMatrix]], the resulting sub matrix will also be a [[SparseMatrix]], even * if it is being added to a [[DenseMatrix]]. If two dense matrices are added, the output will * also be a [[DenseMatrix]]. + * @since 1.3.0 */ def add(other: BlockMatrix): BlockMatrix = { require(numRows() == other.numRows(), "Both matrices must have the same number of rows. " + @@ -340,6 +369,8 @@ class BlockMatrix( * [[SparseMatrix]], they will have to be converted to a [[DenseMatrix]]. The output * [[BlockMatrix]] will only consist of blocks of [[DenseMatrix]]. This may cause * some performance issues until support for multiplying two sparse matrices is added. + * + * @since 1.3.0 */ def multiply(other: BlockMatrix): BlockMatrix = { require(numCols() == other.numRows(), "The number of columns of A and the number of rows " + diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala index 078d1fac44443..2b751e45dd76c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala @@ -29,6 +29,7 @@ import org.apache.spark.mllib.linalg.{Matrix, SparseMatrix, Vectors} * @param i row index * @param j column index * @param value value of the entry + * @since 1.0.0 */ @Experimental case class MatrixEntry(i: Long, j: Long, value: Double) @@ -42,6 +43,7 @@ case class MatrixEntry(i: Long, j: Long, value: Double) * be determined by the max row index plus one. * @param nCols number of columns. A non-positive value means unknown, and then the number of * columns will be determined by the max column index plus one. + * @since 1.0.0 */ @Experimental class CoordinateMatrix( @@ -49,10 +51,14 @@ class CoordinateMatrix( private var nRows: Long, private var nCols: Long) extends DistributedMatrix { - /** Alternative constructor leaving matrix dimensions to be determined automatically. */ + /** Alternative constructor leaving matrix dimensions to be determined automatically. + * @since 1.0.0 + * */ def this(entries: RDD[MatrixEntry]) = this(entries, 0L, 0L) - /** Gets or computes the number of columns. */ + /** Gets or computes the number of columns. + * @since 1.0.0 + * */ override def numCols(): Long = { if (nCols <= 0L) { computeSize() @@ -60,7 +66,9 @@ class CoordinateMatrix( nCols } - /** Gets or computes the number of rows. */ + /** Gets or computes the number of rows. + * @since 1.0.0 + * */ override def numRows(): Long = { if (nRows <= 0L) { computeSize() @@ -68,12 +76,16 @@ class CoordinateMatrix( nRows } - /** Transposes this CoordinateMatrix. */ + /** Transposes this CoordinateMatrix. + * @since 1.3.0 + * */ def transpose(): CoordinateMatrix = { new CoordinateMatrix(entries.map(x => MatrixEntry(x.j, x.i, x.value)), numCols(), numRows()) } - /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */ + /** Converts to IndexedRowMatrix. The number of columns must be within the integer range. + * @since 1.0.0 + * */ def toIndexedRowMatrix(): IndexedRowMatrix = { val nl = numCols() if (nl > Int.MaxValue) { @@ -92,12 +104,15 @@ class CoordinateMatrix( /** * Converts to RowMatrix, dropping row indices after grouping by row index. * The number of columns must be within the integer range. + * @since 1.0.0 */ def toRowMatrix(): RowMatrix = { toIndexedRowMatrix().toRowMatrix() } - /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */ + /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. + * @since 1.3.0 + * */ def toBlockMatrix(): BlockMatrix = { toBlockMatrix(1024, 1024) } @@ -109,6 +124,7 @@ class CoordinateMatrix( * @param colsPerBlock The number of columns of each block. The blocks at the right edge may have * a smaller value. Must be an integer value greater than 0. * @return a [[BlockMatrix]] + * @since 1.3.0 */ def toBlockMatrix(rowsPerBlock: Int, colsPerBlock: Int): BlockMatrix = { require(rowsPerBlock > 0, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala index a0e26ce3bc465..98e90af84abac 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala @@ -21,6 +21,7 @@ import breeze.linalg.{DenseMatrix => BDM} /** * Represents a distributively stored matrix backed by one or more RDDs. + * @since 1.0.0 */ trait DistributedMatrix extends Serializable { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala index 1c33b43ea7a8a..a09f88ce28e58 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala @@ -27,6 +27,7 @@ import org.apache.spark.mllib.linalg.SingularValueDecomposition /** * :: Experimental :: * Represents a row of [[org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix]]. + * @since 1.0.0 */ @Experimental case class IndexedRow(index: Long, vector: Vector) @@ -41,6 +42,7 @@ case class IndexedRow(index: Long, vector: Vector) * be determined by the max row index plus one. * @param nCols number of columns. A non-positive value means unknown, and then the number of * columns will be determined by the size of the first row. + * @since 1.0.0 */ @Experimental class IndexedRowMatrix( @@ -48,9 +50,15 @@ class IndexedRowMatrix( private var nRows: Long, private var nCols: Int) extends DistributedMatrix { - /** Alternative constructor leaving matrix dimensions to be determined automatically. */ + /** Alternative constructor leaving matrix dimensions to be determined automatically. + * @since 1.0.0 + * */ def this(rows: RDD[IndexedRow]) = this(rows, 0L, 0) + /** + * + * @since 1.0.0 + */ override def numCols(): Long = { if (nCols <= 0) { // Calling `first` will throw an exception if `rows` is empty. @@ -59,6 +67,10 @@ class IndexedRowMatrix( nCols } + /** + * + * @since 1.0.0 + */ override def numRows(): Long = { if (nRows <= 0L) { // Reduce will throw an exception if `rows` is empty. @@ -70,12 +82,15 @@ class IndexedRowMatrix( /** * Drops row indices and converts this matrix to a * [[org.apache.spark.mllib.linalg.distributed.RowMatrix]]. + * @since 1.0.0 */ def toRowMatrix(): RowMatrix = { new RowMatrix(rows.map(_.vector), 0L, nCols) } - /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */ + /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. + * @since 1.3.0 + * */ def toBlockMatrix(): BlockMatrix = { toBlockMatrix(1024, 1024) } @@ -87,6 +102,7 @@ class IndexedRowMatrix( * @param colsPerBlock The number of columns of each block. The blocks at the right edge may have * a smaller value. Must be an integer value greater than 0. * @return a [[BlockMatrix]] + * @since 1.3.0 */ def toBlockMatrix(rowsPerBlock: Int, colsPerBlock: Int): BlockMatrix = { // TODO: This implementation may be optimized @@ -96,6 +112,7 @@ class IndexedRowMatrix( /** * Converts this matrix to a * [[org.apache.spark.mllib.linalg.distributed.CoordinateMatrix]]. + * @since 1.3.0 */ def toCoordinateMatrix(): CoordinateMatrix = { val entries = rows.flatMap { row => @@ -132,6 +149,7 @@ class IndexedRowMatrix( * @param rCond the reciprocal condition number. All singular values smaller than rCond * sigma(0) * are treated as zero, where sigma(0) is the largest singular value. * @return SingularValueDecomposition(U, s, V) + * @since 1.0.0 */ def computeSVD( k: Int, @@ -158,6 +176,7 @@ class IndexedRowMatrix( * * @param B a local matrix whose number of rows must match the number of columns of this matrix * @return an IndexedRowMatrix representing the product, which preserves partitioning + * @since 1.0.0 */ def multiply(B: Matrix): IndexedRowMatrix = { val mat = toRowMatrix().multiply(B) @@ -169,6 +188,7 @@ class IndexedRowMatrix( /** * Computes the Gramian matrix `A^T A`. + * @since 1.0.0 */ def computeGramianMatrix(): Matrix = { toRowMatrix().computeGramianMatrix() diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 1626da9c3d2ee..564f9a6f4c052 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -44,6 +44,7 @@ import org.apache.spark.storage.StorageLevel * be determined by the number of records in the RDD `rows`. * @param nCols number of columns. A non-positive value means unknown, and then the number of * columns will be determined by the size of the first row. + * @since 1.0.0 */ @Experimental class RowMatrix( @@ -51,10 +52,14 @@ class RowMatrix( private var nRows: Long, private var nCols: Int) extends DistributedMatrix with Logging { - /** Alternative constructor leaving matrix dimensions to be determined automatically. */ + /** Alternative constructor leaving matrix dimensions to be determined automatically. + * @since 1.0.0 + * */ def this(rows: RDD[Vector]) = this(rows, 0L, 0) - /** Gets or computes the number of columns. */ + /** Gets or computes the number of columns. + * @since 1.0.0 + * */ override def numCols(): Long = { if (nCols <= 0) { try { @@ -69,7 +74,9 @@ class RowMatrix( nCols } - /** Gets or computes the number of rows. */ + /** Gets or computes the number of rows. + * @since 1.0.0 + * */ override def numRows(): Long = { if (nRows <= 0L) { nRows = rows.count() @@ -107,6 +114,7 @@ class RowMatrix( /** * Computes the Gramian matrix `A^T A`. + * @since 1.0.0 */ def computeGramianMatrix(): Matrix = { val n = numCols().toInt @@ -177,6 +185,7 @@ class RowMatrix( * @param rCond the reciprocal condition number. All singular values smaller than rCond * sigma(0) * are treated as zero, where sigma(0) is the largest singular value. * @return SingularValueDecomposition(U, s, V). U = null if computeU = false. + * @since 1.0.0 */ def computeSVD( k: Int, @@ -317,6 +326,7 @@ class RowMatrix( /** * Computes the covariance matrix, treating each row as an observation. * @return a local dense matrix of size n x n + * @since 1.0.0 */ def computeCovariance(): Matrix = { val n = numCols().toInt @@ -370,6 +380,7 @@ class RowMatrix( * * @param k number of top principal components. * @return a matrix of size n-by-k, whose columns are principal components + * @since 1.0.0 */ def computePrincipalComponents(k: Int): Matrix = { val n = numCols().toInt @@ -388,6 +399,7 @@ class RowMatrix( /** * Computes column-wise summary statistics. + * @since 1.0.0 */ def computeColumnSummaryStatistics(): MultivariateStatisticalSummary = { val summary = rows.treeAggregate(new MultivariateOnlineSummarizer)( @@ -403,6 +415,7 @@ class RowMatrix( * @param B a local matrix whose number of rows must match the number of columns of this matrix * @return a [[org.apache.spark.mllib.linalg.distributed.RowMatrix]] representing the product, * which preserves partitioning + * @since 1.0.0 */ def multiply(B: Matrix): RowMatrix = { val n = numCols().toInt @@ -435,6 +448,7 @@ class RowMatrix( * * @return An n x n sparse upper-triangular matrix of cosine similarities between * columns of this matrix. + * @since 1.2.0 */ def columnSimilarities(): CoordinateMatrix = { columnSimilarities(0.0) @@ -478,6 +492,7 @@ class RowMatrix( * with the cost vs estimate quality trade-off described above. * @return An n x n sparse upper-triangular matrix of cosine similarities * between columns of this matrix. + * @since 1.2.0 */ def columnSimilarities(threshold: Double): CoordinateMatrix = { require(threshold >= 0, s"Threshold cannot be negative: $threshold") @@ -613,6 +628,10 @@ class RowMatrix( } @Experimental + +/** + * @since 1.0.0 + */ object RowMatrix { /** From 2e5ebd6a24ebc3a2a44c770459de7a973d4f1b86 Mon Sep 17 00:00:00 2001 From: Sameer Abhyankar Date: Tue, 28 Jul 2015 10:36:37 -0700 Subject: [PATCH 2/6] Add @since tag to mllib.linalg.* --- .../org/apache/spark/mllib/linalg/distributed/RowMatrix.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 564f9a6f4c052..207e79d829eae 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -627,11 +627,10 @@ class RowMatrix( } } -@Experimental - /** * @since 1.0.0 */ +@Experimental object RowMatrix { /** From 3be09e9366875bd311d9f586c48d1cc00a34d2b2 Mon Sep 17 00:00:00 2001 From: Sameer Abhyankar Date: Sat, 1 Aug 2015 15:39:07 -0400 Subject: [PATCH 3/6] Remove @since tag from sealed traits Vector and Matrix --- .../apache/spark/mllib/linalg/Matrices.scala | 29 +++++-------------- .../apache/spark/mllib/linalg/Vectors.scala | 8 ----- 2 files changed, 7 insertions(+), 30 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 1ea27e2495782..e793842c0c480 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -30,7 +30,6 @@ import org.apache.spark.sql.types._ /** * Trait for a local matrix. - * @since 1.2.0 */ @SQLUserDefinedType(udt = classOf[MatrixUDT]) sealed trait Matrix extends Serializable { @@ -41,14 +40,10 @@ sealed trait Matrix extends Serializable { /** Number of columns. */ def numCols: Int - /** Flag that keeps track whether the matrix is transposed or not. False by default. - * @since 1.3.0 - */ + /** Flag that keeps track whether the matrix is transposed or not. False by default. */ val isTransposed: Boolean = false - /** Converts to a dense array in column major. - * @since 1.2.0 - */ + /** Converts to a dense array in column major. */ def toArray: Array[Double] = { val newArray = new Array[Double](numRows * numCols) foreachActive { (i, j, v) => @@ -75,39 +70,29 @@ sealed trait Matrix extends Serializable { /** Transpose the Matrix. Returns a new `Matrix` instance sharing the same underlying data. */ def transpose: Matrix - /** Convenience method for `Matrix`-`DenseMatrix` multiplication. - * @since 1.2.0 - */ + /** Convenience method for `Matrix`-`DenseMatrix` multiplication. */ def multiply(y: DenseMatrix): DenseMatrix = { val C: DenseMatrix = DenseMatrix.zeros(numRows, y.numCols) BLAS.gemm(1.0, this, y, 0.0, C) C } - /** Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility. - * @since 1.2.0 - */ + /** Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility. */ def multiply(y: DenseVector): DenseVector = { multiply(y.asInstanceOf[Vector]) } - /** Convenience method for `Matrix`-`Vector` multiplication. - * @since 1.4.0 - */ + /** Convenience method for `Matrix`-`Vector` multiplication. */ def multiply(y: Vector): DenseVector = { val output = new DenseVector(new Array[Double](numRows)) BLAS.gemv(1.0, this, y, 0.0, output) output } - /** A human readable representation of the matrix - * @since 1.0.0 - */ + /** A human readable representation of the matrix */ override def toString: String = toBreeze.toString() - /** A human readable representation of the matrix with maximum lines and width - * @since 1.4.0 - */ + /** A human readable representation of the matrix with maximum lines and width */ def toString(maxLines: Int, maxLineWidth: Int): String = toBreeze.toString(maxLines, maxLineWidth) /** Map the values of this matrix using a function. Generates a new matrix. Performs the diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 3bdc18451f071..c1ffb5afaf99e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -36,7 +36,6 @@ import org.apache.spark.sql.types._ * Represents a numeric vector, whose index type is Int and value type is Double. * * Note: Users should not implement this interface. - * @since 1.2.0 */ @SQLUserDefinedType(udt = classOf[VectorUDT]) sealed trait Vector extends Serializable { @@ -51,9 +50,6 @@ sealed trait Vector extends Serializable { */ def toArray: Array[Double] - /** - * @since 1.0.0 - */ override def equals(other: Any): Boolean = { other match { case v2: Vector => @@ -74,7 +70,6 @@ sealed trait Vector extends Serializable { /** * Returns a hash code value for the vector. The hash code is based on its size and its nonzeros * in the first 16 entries, using a hash algorithm similar to [[java.util.Arrays.hashCode]]. - * @since 1.2.2 */ override def hashCode(): Int = { // This is a reference implementation. It calls return in foreachActive, which is slow. @@ -103,13 +98,11 @@ sealed trait Vector extends Serializable { /** * Gets the value of the ith element. * @param i index - * @since 1.0.0 */ def apply(i: Int): Double = toBreeze(i) /** * Makes a deep copy of this vector. - * @since 1.1.0 */ def copy: Vector = { throw new NotImplementedError(s"copy is not implemented for ${this.getClass}.") @@ -147,7 +140,6 @@ sealed trait Vector extends Serializable { /** * Returns a vector in either dense or sparse format, whichever uses less storage. - * @since 1.4.0 */ def compressed: Vector = { val nnz = numNonzeros From 30128647396b4a3ff150f5ad8b25c1c3dc5a7ab0 Mon Sep 17 00:00:00 2001 From: Sameer Abhyankar Date: Mon, 3 Aug 2015 21:55:04 -0400 Subject: [PATCH 4/6] Fixed various tagging issues per review by @mechcoder --- .../apache/spark/mllib/linalg/Matrices.scala | 30 +++++++++++-------- .../apache/spark/mllib/linalg/Vectors.scala | 30 +++++++++++++------ 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index e793842c0c480..9d1c258c3184f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -260,18 +260,12 @@ class DenseMatrix( def this(numRows: Int, numCols: Int, values: Array[Double]) = this(numRows, numCols, values, false) - /** - * @since 1.4.0 - */ override def equals(o: Any): Boolean = o match { case m: DenseMatrix => m.numRows == numRows && m.numCols == numCols && Arrays.equals(toArray, m.toArray) case _ => false } - /** - * @since 1.4.0 - */ override def hashCode: Int = { com.google.common.base.Objects.hashCode(numRows : Integer, numCols: Integer, toArray) } @@ -351,8 +345,14 @@ class DenseMatrix( } } + /** + * @since 1.5.0 + */ override def numNonzeros: Int = values.count(_ != 0) + /** + * @since 1.5.0 + */ override def numActives: Int = values.length /** @@ -395,7 +395,7 @@ object DenseMatrix { * @param numRows number of rows of the matrix * @param numCols number of columns of the matrix * @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros - * @since 1.2.0 + * @since 1.3.0 */ def zeros(numRows: Int, numCols: Int): DenseMatrix = { require(numRows.toLong * numCols <= Int.MaxValue, @@ -408,7 +408,7 @@ object DenseMatrix { * @param numRows number of rows of the matrix * @param numCols number of columns of the matrix * @return `DenseMatrix` with size `numRows` x `numCols` and values of ones - * @since 1.2.0 + * @since 1.3.0 */ def ones(numRows: Int, numCols: Int): DenseMatrix = { require(numRows.toLong * numCols <= Int.MaxValue, @@ -420,7 +420,7 @@ object DenseMatrix { * Generate an Identity Matrix in `DenseMatrix` format. * @param n number of rows and columns of the matrix * @return `DenseMatrix` with size `n` x `n` and values of ones on the diagonal - * @since 1.2.0 + * @since 1.3.0 */ def eye(n: Int): DenseMatrix = { val identity = DenseMatrix.zeros(n, n) @@ -438,7 +438,7 @@ object DenseMatrix { * @param numCols number of columns of the matrix * @param rng a random number generator * @return `DenseMatrix` with size `numRows` x `numCols` and values in U(0, 1) - * @since 1.2.0 + * @since 1.3.0 */ def rand(numRows: Int, numCols: Int, rng: Random): DenseMatrix = { require(numRows.toLong * numCols <= Int.MaxValue, @@ -452,7 +452,7 @@ object DenseMatrix { * @param numCols number of columns of the matrix * @param rng a random number generator * @return `DenseMatrix` with size `numRows` x `numCols` and values in N(0, 1) - * @since 1.2.0 + * @since 1.3.0 */ def randn(numRows: Int, numCols: Int, rng: Random): DenseMatrix = { require(numRows.toLong * numCols <= Int.MaxValue, @@ -465,7 +465,7 @@ object DenseMatrix { * @param vector a `Vector` that will form the values on the diagonal of the matrix * @return Square `DenseMatrix` with size `values.length` x `values.length` and `values` * on the diagonal - * @since 1.2.0 + * @since 1.3.0 */ def diag(vector: Vector): DenseMatrix = { val n = vector.size @@ -645,8 +645,14 @@ class SparseMatrix( new DenseMatrix(numRows, numCols, toArray) } + /** + * @since 1.5.0 + */ override def numNonzeros: Int = values.count(_ != 0) + /** + * @since 1.5.0 + */ override def numActives: Int = values.length } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index c1ffb5afaf99e..05d3e5a82f705 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -367,7 +367,7 @@ object Vectors { * @param vector input vector. * @param p norm. * @return norm in L^p^ space. - * @since 1.2.0 + * @since 1.3.0 */ def norm(vector: Vector, p: Double): Double = { require(p >= 1.0, "To compute the p-norm of the vector, we require that you specify a p>=1. " + @@ -539,16 +539,31 @@ object Vectors { @SQLUserDefinedType(udt = classOf[VectorUDT]) class DenseVector(val values: Array[Double]) extends Vector { + /** + * @since 1.0.0 + */ override def size: Int = values.length + /** + * @since 1.0.0 + */ override def toString: String = values.mkString("[", ",", "]") + /** + * @since 1.0.0 + */ override def toArray: Array[Double] = values private[spark] override def toBreeze: BV[Double] = new BDV[Double](values) + /** + * @since 1.0.0 + */ override def apply(i: Int): Double = values(i) + /** + * @since 1.1.0 + */ override def copy: DenseVector = { new DenseVector(values.clone()) } @@ -564,9 +579,6 @@ class DenseVector(val values: Array[Double]) extends Vector { } } - /** - * @since 1.4.0 - */ override def hashCode(): Int = { var result: Int = 31 + size var i = 0 @@ -583,6 +595,9 @@ class DenseVector(val values: Array[Double]) extends Vector { result } + /** + * @since 1.4.0 + */ override def numActives: Int = size /** @@ -618,7 +633,7 @@ class DenseVector(val values: Array[Double]) extends Vector { } /** - * @since 1.4.0 + * @since 1.5.0 */ override def argmax: Int = { if (size == 0) { @@ -706,9 +721,6 @@ class SparseVector( } } - /** - * @since 1.2.2 - */ override def hashCode(): Int = { var result: Int = 31 + size val end = values.length @@ -772,7 +784,7 @@ class SparseVector( } /** - * @since 1.4.0 + * @since 1.5.0 */ override def argmax: Int = { if (size == 0) { From 09aad77dc5b389bb17bf275f7add16579d17c015 Mon Sep 17 00:00:00 2001 From: Sameer Abhyankar Date: Fri, 14 Aug 2015 14:01:45 -0400 Subject: [PATCH 5/6] Remove @since tag from toString and confirmed alternate constructor has tags --- .../main/scala/org/apache/spark/mllib/linalg/Vectors.scala | 6 ------ 1 file changed, 6 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 05d3e5a82f705..d502d04b8a9f6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -544,9 +544,6 @@ class DenseVector(val values: Array[Double]) extends Vector { */ override def size: Int = values.length - /** - * @since 1.0.0 - */ override def toString: String = values.mkString("[", ",", "]") /** @@ -680,9 +677,6 @@ class SparseVector( s" indices match the dimension of the values. You provided ${indices.length} indices and " + s" ${values.length} values.") - /** - * @since 1.0.0 - */ override def toString: String = s"($size,${indices.mkString("[", ",", "]")},${values.mkString("[", ",", "]")})" From 68b3ed9c40700165deb81916310a2ebd34f76143 Mon Sep 17 00:00:00 2001 From: Sameer Abhyankar Date: Mon, 17 Aug 2015 08:48:10 -0400 Subject: [PATCH 6/6] Correct @since tag versions --- .../scala/org/apache/spark/mllib/linalg/Matrices.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 9d1c258c3184f..b80e619cad020 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -891,7 +891,7 @@ object Matrices { * @param numRows number of rows of the matrix * @param numCols number of columns of the matrix * @return `Matrix` with size `numRows` x `numCols` and values of zeros - * @since 1.3.0 + * @since 1.2.0 */ def zeros(numRows: Int, numCols: Int): Matrix = DenseMatrix.zeros(numRows, numCols) @@ -900,7 +900,7 @@ object Matrices { * @param numRows number of rows of the matrix * @param numCols number of columns of the matrix * @return `Matrix` with size `numRows` x `numCols` and values of ones - * @since 1.3.0 + * @since 1.2.0 */ def ones(numRows: Int, numCols: Int): Matrix = DenseMatrix.ones(numRows, numCols) @@ -908,7 +908,7 @@ object Matrices { * Generate a dense Identity Matrix in `Matrix` format. * @param n number of rows and columns of the matrix * @return `Matrix` with size `n` x `n` and values of ones on the diagonal - * @since 1.3.0 + * @since 1.2.0 */ def eye(n: Int): Matrix = DenseMatrix.eye(n) @@ -971,7 +971,7 @@ object Matrices { * @param vector a `Vector` that will form the values on the diagonal of the matrix * @return Square `Matrix` with size `values.length` x `values.length` and `values` * on the diagonal - * @since 1.3.0 + * @since 1.2.0 */ def diag(vector: Vector): Matrix = DenseMatrix.diag(vector)