这下面的练习中,需要自己将spark的jar包 添加进来。
1.spark Mlib 底层使用的向量、矩阵运算使用了Breeze库。
scalaNLP 是一套 机器学习和数值技算的库。它主要是关于科学技术(sc)、机器学习(ML)和自然语言处理(NLP)的。它包括三个库,Breeze、Epic 和 Puck。
Breeze :是机器学习和数值技术库 ,它是sparkMlib的核心,包括线性代数、数值技术和优化,是一种通用、功能强大、有效的机器学习方法。
Epic :是一种高性能能统计分析器和结构化预测库
Puck :是一个快速GPU加速解析器
在使用Breeze 库时,需要导入相关包:
import breeze.linalg._import breeze.numerics._
具体练习如下:
package leaningimport breeze.linalg._ import breeze.numerics._ import breeze.stats.distributions.Rand/*** Created by dy9776 on 2017/12/5.*/object Practise_breeze{def main(args: Array[String]) {val matrix: DenseMatrix[Double] = DenseMatrix.zeros[Double](3,2)println(matrix)/*0.0 0.00.0 0.00.0 0.0*///全0向量val testVector: DenseVector[Double] = DenseVector.zeros[Double](2)println(testVector)//全1向量val allOneVector=DenseVector.ones[Double](2)println(allOneVector)//按数值填充向量val haveNumberFill =DenseVector.fill[Double](3,2)println(haveNumberFill)//生成随机向量val rangeNUm= DenseVector.range(1, 10 , 2)//DenseVector(1, 3, 5, 7, 9)val rangeNUmD= DenseVector.rangeD(1, 9 , 2)//DenseVector(1.0, 3.0, 5.0, 7.0)val rangeNUmF= DenseVector.rangeF(1, 7 , 2)//DenseVector(1.0, 3.0, 5.0)println(rangeNUm)println(rangeNUmD)println(rangeNUmF)//单位矩阵val unitMatrix=DenseMatrix.eye[Double](4) // println(unitMatrix)/*1.0 0.0 0.0 0.00.0 1.0 0.0 0.00.0 0.0 1.0 0.00.0 0.0 0.0 1.0*///对角矩阵val doubleVecoter=diag(DenseVector(3.0, 4.0 , 5.0)) // println(doubleVecoter)/*3.0 0.0 0.00.0 4.0 0.00.0 0.0 5.0*///按照行创建矩阵val byRowCreateMatrix= DenseMatrix( (4.0, 5.0, 6.0 ) , (7.0 ,8.0 ,9.0)) // println(byRowCreateMatrix)/*4.0 5.0 6.07.0 8.0 9.0*///按照行创建向量val denseCreateVector = DenseVector((4.0, 5.0, 6.0, 7.0, 8.0, 9.0)) // println(denseCreateVector) ///DenseVector((4.0,5.0,6.0,7.0,8.0,9.0)//向量装置val vectorTranspostion= DenseVector( (4.0, 5.0, 6.0, 7.0, 8.0, 9.0) ).tprintln(vectorTranspostion)//Transpose(DenseVector((4.0,5.0,6.0,7.0,8.0,9.0)))//从函数创建向量val funCreateVector=DenseVector.tabulate(5)(i=> i*i)println(funCreateVector)//DenseVector(0, 1, 4, 9, 16)val funCreateVector2=DenseVector.tabulate( 0 to 5)(i=> i*i)println(funCreateVector2)//DenseVector(0, 1, 4, 9, 16, 25)//从函数创建矩阵val createFuncMatrix= DenseMatrix.tabulate(3, 4) {case (i ,j ) => i*i + j*j} // println(createFuncMatrix)/*0 1 4 91 2 5 104 5 8 13*///从数组创建矩阵val createFunctionMatrix= new DenseMatrix[Double](3, 2, Array(1.0, 4.0, 7.0, 3.0, 6.0, 9.0)) // println(createFunctionMatrix)/*1.0 3.04.0 6.07.0 9.0*///0 到 1的随机向量val formZeroToOneRandomVector= DenseVector.rand( 9, Rand.uniform)println(formZeroToOneRandomVector) // DenseVector(0.7978222133507369, 0.48978247271729325, 0.24943434133065834, 0.6619447026155139, 0.5324942068054981, 0.9051865626036415, 0.5989291014099107, 0.9221881029987078, 0.17371486701192662)val formZeroToOneRandomVector2= DenseVector.rand( 9, Rand.uniform)println(formZeroToOneRandomVector2) //DenseVector(0.7978222133507369, 0.48978247271729325, 0.24943434133065834, 0.6619447026155139, 0.5324942068054981, 0.9051865626036415, 0.5989291014099107, 0.9221881029987078, 0.17371486701192662)//0 到 1 的随机矩阵val formZeroToOneRandomMatrix= DenseMatrix.rand(3, 2, Rand.uniform)println(formZeroToOneRandomMatrix)/*0.8036324612618653 0.5381120878900350.6864375371630702 0.31239932725490750.9458628172312897 0.01137554621536796*/val formZeroToOneRandomMatrix2=DenseMatrix.rand(3, 2, Rand.gaussian)println(formZeroToOneRandomMatrix2)/*0.9510499901472648 0.287812938654061-0.5266499883462216 0.9380426076781263-0.3959295333472151 -0.9057610233257112*///Breeze元素访问val a = new DenseVector[Int](Array(1 to 20 : _*))println(a)//DenseVector(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)//指定位置println(a(0)) //1//向量子集println( a(1 to 4) )//DenseVector(2, 3, 4, 5)println( a(1 until 4) )//DenseVector(2, 3, 4)//指定开始位置至结尾println( a(1 to -1) )//DenseVector(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)//按照指定步长去子集 这个是倒序方式println( a(5 to 0 by -1) )//DenseVector(6, 5, 4, 3, 2, 1)//最后一个元素println( a(-1)) //20val m = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0))println(m)/*1.0 2.0 3.04.0 5.0 6.0*///指定位置println( m(0 ,1) ) //2.0//矩阵指定列println( m(:: ,1) ) // DenseVector(2.0, 5.0)//Breeze元素操作//调整矩阵形状val justAdjustMatrix =m.reshape(3, 2)println(justAdjustMatrix)/*DenseVector(2.0, 5.0)1.0 5.04.0 3.02.0 6.0*///矩阵转成向量val toVector=m.toDenseVectorprintln(toVector)//DenseVector(1.0, 4.0, 2.0, 5.0, 3.0, 6.0)println(toVector.toDenseMatrix)//1.0 4.0 2.0 5.0 3.0 6.0//复制下三角println(lowerTriangular(m))///*1.0 0.04.0 5.0*///复制上三角println(upperTriangular(m))/*1.0 2.00.0 5.0*///矩阵复制println(m.copy)//m 原始为这样的// 1.0 2.0// 0.0 5.0//取对角线元素println(diag(upperTriangular(m)))//DenseVector(1.0, 5.0) 个人觉得很怪异,不是应该为(1.0, 6.0)吗?上面上、下三角 也好像出乎的意料//子集赋数值println(a(1 to 4 ):=5)//(1.0, 5.0)//子集赋向量println( a(1 to 4):=DenseVector(1,2,3,4) )//DenseVector(1, 2, 3, 4)println(m)//矩阵赋值 // println( m( 1 to 2, 1 to 2) := 0.0 ) //Exception in thread "main" java.lang.IndexOutOfBoundsException: Row slice of Range(1, 2) was bigger than matrix rows of 2println("-==========m1================-")println( m( 0 to 1, 1 to 2) := 0.0 )println("-==========m================-")println(m)println("-==========m end================-")/*-==========m1================-0.0 0.00.0 0.0-==========m================-0.0 0.0 3.00.0 0.0 6.0-==========m end================-*///矩阵列赋值val re=m(::, 2) := 5.0println(re.toDenseMatrix)//5.0 5.0val a1 = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0))val a2 = DenseMatrix((7.0, 8.0, 9.0), (10.0, 11.0, 12.0))//垂直连接矩阵val verticalLike=DenseMatrix.vertcat(a1, a2)println(verticalLike)println("-==========================-")/*1.0 2.0 3.04.0 5.0 6.07.0 8.0 9.010.0 11.0 12.0*///横向连接矩阵val twoMatrixConn=DenseMatrix.horzcat( a1, a2)println(twoMatrixConn)println("-==========================-") /* 1.0 2.0 3.0 7.0 8.0 9.0 4.0 5.0 6.0 10.0 11.0 12.0*///向量的连接val connnectVector1=DenseVector.vertcat(DenseVector(20, 21, 22), DenseVector(23, 24, 25))val connnectVector2=DenseVector.horzcat(DenseVector(20, 21, 22), DenseVector(23, 24, 25))println(connnectVector1)//DenseVector(20, 21, 22, 23, 24, 25)println(connnectVector2)/*20 2321 2422 25*///Breeze数值计算函数//元素加法println(a1 + a2)/*8.0 10.0 12.014.0 16.0 18.0*///元素乘法println(a1 :* a2)/*7.0 16.0 27.040.0 55.0 72.0*///元素除法println(a1 :/ a2)/*0.14285714285714285 0.25 0.33333333333333330.4 0.45454545454545453 0.5*///元素比较println(a1 :< a2)/*true true truetrue true true*///元素相等println(a1 :== a2)/*false false falsefalse false false*///元素追加println(a1 :+=2.0)/*3.0 4.0 5.06.0 7.0 8.0*///元素追乘println(a1 :*=2.0)/*6.0 8.0 10.012.0 14.0 16.0*///向量点积val vectorDot=DenseVector(1, 2, 3, 4) dot DenseVector(1, 1, 1, 1)println(vectorDot)//10//元素最大值println(max(a1))//16.0//元素最小值println(min(a1))//6.0//元素最大值的位置println(argmax(a1))// (1,2)//元素最小值的位置println(argmin(a1))// (0,0)//Breeze求和函数val m1 = DenseMatrix((1.0, 2.0, 3.0, 4.0), (5.0, 6.0, 7.0, 8.0), (9.0, 10.0, 11.0, 12.0))println(m1)/*1.0 2.0 3.0 4.05.0 6.0 7.0 8.09.0 10.0 11.0 12.0*/println("-==========================-")//元素求和println(sum(m1))//78.0//每一列求和println(sum(m1, Axis._0))//res59: breeze.linalg.DenseMatrix[Double] = 15.0 18.0 21.0 24.0//每一行求和println(sum(m1, Axis._1))//res60: breeze.linalg.DenseVector[Double] = DenseVector(10.0, 26.0, 42.0)//对角线元素和println(trace(lowerTriangular(m1)))// res61: Double = 18.0//累积和val a3 = new DenseVector[Int](Array(10 to 20: _*))println(accumulate(a3)) // DenseVector(10, 21, 33, 46, 60, 75, 91, 108, 126, 145, 165)//Breeze布尔函数val c = DenseVector(true, false, true)val d = DenseVector(false, true, true)//元素与操作println(c :& d) // DenseVector(false, false, true)//元素或操作println(c :| d) //DenseVector(true, true, true)//元素非操作println(!c) //DenseVector(false, true, false)val e = DenseVector[Int](-3, 0, 2)//存在非零元素println(any(e)) //true//所有元素非零println(all(e)) //false//Breeze线性代数函数val f = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0), (7.0, 8.0, 9.0))val g = DenseMatrix((1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0))//线性求解,AX = B,求解Xprintln(f \ g)/* breeze.linalg.DenseMatrix[Double] =-2.5 -2.5 -2.54.0 4.0 4.0-1.5 -1.5 -1.5*///转置println(f.t)/* breeze.linalg.DenseMatrix[Double] =1.0 4.0 7.02.0 5.0 8.03.0 6.0 9.0*///求特征值println(det(f)) // Double = 6.661338147750939E-16//求逆println(inv(f))/*-4.503599627370499E15 9.007199254740992E15 -4.503599627370495E159.007199254740998E15 -1.8014398509481984E16 9.007199254740991E15-4.503599627370498E15 9.007199254740992E15 -4.5035996273704955E15*///求伪逆println(pinv(f))/*-3.7720834019330525E14 7.544166803866101E14 -3.77208340193305E147.544166803866094E14 -1.5088333607732208E15 7.544166803866108E14-3.772083401933041E14 7.544166803866104E14 -3.772083401933055E14*///特征值和特征向量println(eig(f))/*Eig(DenseVector(16.116843969807043, -1.1168439698070427, -1.3036777264747022E-15),DenseVector(0.0, 0.0, 0.0),-0.23197068724628617 -0.7858302387420671 0.40824829046386363-0.5253220933012336 -0.08675133925662833 -0.816496580927726-0.8186734993561815 0.61232756022881 0.4082482904638625)*///奇异值分解val svd.SVD(u,s,v) = svd(g)println(u)/*-0.5773502691896255 -0.5773502691896257 -0.5773502691896256-0.5773502691896256 -0.2113248654051871 0.7886751345948126-0.5773502691896256 0.7886751345948129 -0.21132486540518708*/println("==============================")println(s) //DenseVector(3.0000000000000004, 0.0, 0.0)println("==============================")println(v)/*-0.5773502691896256 -0.5773502691896257 -0.57735026918962560.0 -0.7071067811865474 0.70710678118654770.816496580927726 -0.4082482904638629 -0.4082482904638628*///求矩阵的秩println(rank(f)) //2//矩阵长度println(f.size) //9//矩阵行数println(f.rows) // 3//矩阵列数f.cols // 3//Breeze取整函数val h = DenseVector(-1.2, 0.7, 2.3) // breeze.linalg.DenseVector[Double] = DenseVector(-1.2, 0.7, 2.3)//四舍五入println( round(h) ) // breeze.linalg.DenseVector[Long] = DenseVector(-1, 1, 2)//大于它的最小整数println( ceil(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-1.0, 1.0, 3.0)//小于它的最大整数println( floor(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-2.0, 0.0, 2.0)//符号函数println( signum(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-1.0, 1.0, 1.0)//取正数println( abs(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(1.2, 0.7, 2.3)}}