# mahout-user mailing list archives

##### Site index · List index
Message view
Top
From Lee Ho Yeung <jobmatt...@gmail.com>
Subject which function and how to use to guess missing value in matrix like this svd?
Date Tue, 21 Jun 2016 05:20:07 GMT
```is there a svd function like this function to guess missing floating value
in matrix?
if there is this function, how to use this function and where is the result
stored?

import numpy as np
from scipy.sparse.linalg import svds
from functools import partial

def emsvd(Y, k=None, tol=1E-3, maxiter=None):
"""
Approximate SVD on data with missing values via expectation-maximization

Inputs:
-----------
Y:          (nobs, ndim) data matrix, missing values denoted by NaN/Inf
k:          number of singular values/vectors to find (default: k=ndim)
tol:        convergence tolerance on change in trace norm
maxiter:    maximum number of EM steps to perform (default: no limit)

Returns:
-----------
Y_hat:      (nobs, ndim) reconstructed data matrix
mu_hat:     (ndim,) estimated column means for reconstructed data
U, s, Vt:   singular values and vectors (see np.linalg.svd and
scipy.sparse.linalg.svds for details)
"""

if k is None:
svdmethod = partial(np.linalg.svd, full_matrices=False)
else:
svdmethod = partial(svds, k=k)
if maxiter is None:
maxiter = np.inf

# initialize the missing values to their respective column means
mu_hat = np.nanmean(Y, axis=0, keepdims=1)
valid = np.isfinite(Y)
Y_hat = np.where(valid, Y, mu_hat)

halt = False
ii = 1
v_prev = 0

while not halt:

# SVD on filled-in data
U, s, Vt = svdmethod(Y_hat - mu_hat)

# impute missing values
Y_hat[~valid] = (U.dot(np.diag(s)).dot(Vt) + mu_hat)[~valid]

# update bias parameter
mu_hat = Y_hat.mean(axis=0, keepdims=1)

# test convergence using relative change in trace norm
v = s.sum()
if ii >= maxiter or ((v - v_prev) / v_prev) < tol:
halt = True
ii += 1
v_prev = v

return Y_hat, mu_hat, U, s, Vt

```
Mime
• Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message