Match the columns of two MTX files

rcpp_mmutil_match_files(
  src_mtx,
  tgt_mtx,
  knn,
  RANK,
  TAKE_LN = TRUE,
  TAU = 1,
  COL_NORM = 10000,
  EM_ITER = 10L,
  EM_TOL = 1e-04,
  LU_ITER = 5L,
  KNN_BILINK = 10L,
  KNN_NNLIST = 10L,
  row_weight_file = "",
  NUM_THREADS = 1L,
  BLOCK_SIZE = 10000L
)

Arguments

src_mtx

source data file

tgt_mtx

target data file

knn

k-nearest neighbour

RANK

SVD rank

TAKE_LN

take log(1 + x) trans or not

TAU

regularization parameter (default = 1)

COL_NORM

column normalization (default: 1e4)

EM_ITER

EM iteration for factorization (default: 10)

EM_TOL

EM convergence (default: 1e-4)

LU_ITER

LU iteration (default: 5)

KNN_BILINK

num. of bidirectional links (default: 10)

KNN_NNLIST

num. of nearest neighbor lists (default: 10)

row_weight_file

row-wise weight file

NUM_THREADS

number of threads for multi-core processing

BLOCK_SIZE

number of columns per block

Value

a list of source, target, distance

Examples

## Generate some data
rr <- rgamma(100, 1, 6) # 100 cells
mm <- matrix(rgamma(100 * 3, 1, 1), 100, 3)
dat <- mmutilR::rcpp_mmutil_simulate_poisson(mm, rr, "sim_test")
.matched <- mmutilR::rcpp_mmutil_match_files(dat$mtx, dat$mtx,
                                             knn=1, RANK=5)
## Do they match well?
mean(.matched$src.index == .matched$tgt.index)
#> [1] 0.9791667
summary(.matched$dist)
#>      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
#> 3.460e-06 1.263e-04 5.302e-04 3.467e-03 1.750e-03 1.738e-01 
## clean up temp directory
unlink(list.files(pattern = "sim_test"))