您好,登錄后才能下訂單哦!
通用文件處理:
import numpy as np //文件名和文件中每行的分隔符 def loadDataSet(fileName,dotSplit): numFeat = len(open(fileName).readline().split(dotSplit)) dataMat = []; labelMat = [] fr = open(fileName) //該數據集默認是最后一列是因變量 for line in fr.readlines(): lineArr=[] curline = line.split(dotSplit) for i in range(0,numFeat-1): lineArr.append(float(curline[i])) dataMat.append(lineArr) labelMat.append(float(curline[numFeat-1])) xMat = np.mat(dataMat) yMat = np.mat(labelMat).T return xMat,yMat
這里是處理嶺回歸的實現:
import numpy as np def ridgeRegres(xMat,yMat,lam=0.2): xTx = xMat.T*xMat denom = xTx + np.eye(np.shape(xMat)[1])*lam print np.shape(xMat)[0] if np.linalg.det(denom) == 0.0: print "wrong" return ws = denom.I*(xMat.T*yMat) return ws def normalizing(xMat,yMat): yMean = np.mean(yMat,0) y = yMat-yMean xMeans = np.mean(xMat,0) xVar = np.var(xMat,0) x = (xMat-xMeans)/xVar return x,y def ridgeTest(xM,yM): xMat,yMat = normalizing(xM,yM) numTestPts = 30 wMat = np.zeros((numTestPts,np.shape(xMat)[1])) print wMat for i in range(numTestPts): ws = ridgeRegres(xMat,yMat,np.exp(i-10)) wMat[i,:] = ws.T return wMat
向前逐步回歸:
import numpy as np def rssError(yArr,yHatArr): return ((yArr-yHatArr)**2).sum() def stageWise(xM,yM,eps=0.01,numIt=100): m,n = np.shape(xM) returnMat = np.zeros((numIt,n)) ws = np.zeros((n,1));wsTest = ws.copy();wsMax = ws.copy() lowestError = 0 for i in range(numIt): print ws.T for j in range(n): for sign in [-1,1]: wsTest = ws.copy() wsTest[j] += eps*sign yTest = xM*wsTest rssE = rssError(yM.A,yTest.A) if i == 0: lowestError = rssE if rssE < lowestError: lowestError = rssE print lowestError wsMax = wsTest ws = wsMax.copy() returnMat[i,:] = ws.T return returnMat
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。