Fits a decision tree in one data set and tests the performance in another

cross_validate(
  train,
  test,
  cluster,
  genes_use = Seurat::VariableFeatures(train),
  warn.gene.removal = TRUE,
  ...
)

Arguments

train

a Seurat object to be used for trainning.

test

another Seurat object to be used for testing.

cluster

the cluster whose equivalence needs to be found.

genes_use

character vector specifying which genes to use for the classification, defaults to Seurat::VariableFeatures(train)

warn.gene.removal

logical indicating wether to warn the user when genes are removed because they are missing in one of the datasets. defults to TRUE

...

additional arguments to be passed to ranger_importances.Seurat

Value

a list containing the (1) tree fit, (2) a summary_table (3) the concensus rules of the tree (4) ranger_significance_table (5) the suggested genes for the gating

Examples

cross_validate(small_5050_mix, small_9901_mix, cluster = "0")
#> Warning: Only few negative importance values found, inaccurate p-values. Consider the 'altmann' approach. #> #> This can be done by setting the argument 'imp_method' to 'altmann', note that this method is extremely computationally intensive. #> #> This warning can be disabled by setting the argument `warn.imp.method` to `FALSE` #> #> For more information please refer to ?ranger::ranger
#> Warning: Some important genes were removed because they are not present in the test dataset. #> Removed genes: ASNS, CD3D, HEY1, ADA, XIST, CDKN2A, RPL26, TSC22D3, CA2, AIF1, GAL, MDK, MAP1A, PSMB9, TSTD1, FAM127B, ID2, DMKN, PSMB8, KRT18, HOXA9, ZNF503, MAP1B, PYGL, HSPA1B, CSRP2
#> $party_fit #> #> Model formula: #> ident ~ ARHGDIB + TMSB4X + MZB1 + SOX4 + FYB + CD1E + UBE2C + #> HIST1H1E + ITGA4 + CDK1 + ITM2A + CHI3L2 + HIST1H4C + MALAT1 + #> JUN + CXCR4 + DDIT4 + HIST1H2BK #> #> Fitted party: #> [1] root #> | [2] ARHGDIB <= 2.29381 #> | | [3] SOX4 <= 2.74241: not clus 0 (n = 90, err = 27.8%) #> | | [4] SOX4 > 2.74241: clus 0 (n = 8, err = 0.0%) #> | [5] ARHGDIB > 2.29381 #> | | [6] DDIT4 <= 2.13797: clus 0 (n = 148, err = 4.1%) #> | | [7] DDIT4 > 2.13797: clus 0 (n = 9, err = 44.4%) #> #> Number of inner nodes: 3 #> Number of terminal nodes: 4 #> #> $confusion_matrix #> cluster #> predicted 0 1 2 #> clus 0 263 98 8 #> not clus 0 0 0 15 #> #> $concensus_rules #> Cluster-clus 0: #> all elements: #> ARHGDIB + #> majority elements: #> DDIT4 - #> Cluster-not clus 0: #> all elements: #> ARHGDIB - #> SOX4 - #> #> $ranger_significance_table #> importance pvalue gene #> ASNS 6.1981030 0.000000000 ASNS #> ARHGDIB 4.6568825 0.000000000 ARHGDIB #> CD3D 4.5486561 0.000000000 CD3D #> TMSB4X 3.8594044 0.000000000 TMSB4X #> HEY1 3.4640647 0.000000000 HEY1 #> ADA 2.7639487 0.000000000 ADA #> MZB1 2.5351154 0.000000000 MZB1 #> XIST 2.1050942 0.000000000 XIST #> CDKN2A 2.0906554 0.000000000 CDKN2A #> RPL26 2.0897659 0.000000000 RPL26 #> SOX4 2.0034795 0.000000000 SOX4 #> TSC22D3 1.9670025 0.000000000 TSC22D3 #> CA2 1.9547972 0.000000000 CA2 #> FYB 1.6685485 0.000000000 FYB #> AIF1 1.5561841 0.000000000 AIF1 #> GAL 1.1628424 0.000000000 GAL #> MDK 1.0391597 0.000000000 MDK #> CD1E 0.9016758 0.000000000 CD1E #> UBE2C 0.7935225 0.009433962 UBE2C #> MAP1A 0.7925588 0.009433962 MAP1A #> PSMB9 0.6315636 0.009433962 PSMB9 #> TSTD1 0.5575521 0.009433962 TSTD1 #> HIST1H1E 0.5060435 0.009433962 HIST1H1E #> FAM127B 0.5018757 0.009433962 FAM127B #> ID2 0.4493514 0.009433962 ID2 #> DMKN 0.4488922 0.009433962 DMKN #> PSMB8 0.4441328 0.009433962 PSMB8 #> ITGA4 0.4201880 0.009433962 ITGA4 #> CDK1 0.4042143 0.009433962 CDK1 #> ITM2A 0.3883246 0.009433962 ITM2A #> KRT18 0.3738098 0.009433962 KRT18 #> CHI3L2 0.3534820 0.009433962 CHI3L2 #> HIST1H4C 0.3438604 0.009433962 HIST1H4C #> HOXA9 0.3198131 0.009433962 HOXA9 #> ZNF503 0.3125776 0.009433962 ZNF503 #> MALAT1 0.3075915 0.009433962 MALAT1 #> JUN 0.3029134 0.009433962 JUN #> MAP1B 0.3000326 0.009433962 MAP1B #> PYGL 0.2815934 0.018867925 PYGL #> CXCR4 0.2759808 0.018867925 CXCR4 #> DDIT4 0.2183946 0.028301887 DDIT4 #> HSPA1B 0.2162486 0.028301887 HSPA1B #> HIST1H2BK 0.2082975 0.037735849 HIST1H2BK #> CSRP2 0.2003424 0.037735849 CSRP2 #> #> $gating_genes #> [1] "ARHGDIB" "SOX4" "DDIT4" #>
cross_validate(small_5050_mix, small_9901_mix, cluster = "ALL")
#> Warning: Only few negative importance values found, inaccurate p-values. Consider the 'altmann' approach. #> #> This can be done by setting the argument 'imp_method' to 'altmann', note that this method is extremely computationally intensive. #> #> This warning can be disabled by setting the argument `warn.imp.method` to `FALSE` #> #> For more information please refer to ?ranger::ranger
#> Warning: Some important genes were removed because they are not present in the test dataset. #> Removed genes: ASNS, CD3D, ADA, HEY1, TSC22D3, XIST, MAP1A, RPL26, CA2, AIF1, CDKN2A, FAM127B, GAL, CSRP2, PSMB8, TSTD1, MDK, HOXA9, ZNF503, DMKN, ID2, CTC1, IFI16, KRT18, PYGL, PSMB9, MAP1B, SPRED2, RNF138, LNP1
#> $party_fit #> #> Model formula: #> ident ~ TMSB4X + ARHGDIB + MZB1 + SOX4 + FYB + UBE2C + CD1E + #> ITM2A + HIST1H1E + HIST1H4C + DDIT4 + CHI3L2 + JUN + CDK1 + #> ITGA4 + CXCR4 + HIST1H1C + MYC #> #> Fitted party: #> [1] root #> | [2] ARHGDIB <= 2.29381 #> | | [3] SOX4 <= 2.74241: 1 (n = 90, err = 27.8%) #> | | [4] SOX4 > 2.74241: 0 (n = 8, err = 0.0%) #> | [5] ARHGDIB > 2.29381 #> | | [6] DDIT4 <= 2.13797: 0 (n = 148, err = 4.1%) #> | | [7] DDIT4 > 2.13797: 0 (n = 9, err = 44.4%) #> #> Number of inner nodes: 3 #> Number of terminal nodes: 4 #> #> $confusion_matrix #> cluster #> predicted 0 1 2 #> 0 263 98 8 #> 1 0 0 15 #> #> $concensus_rules #> Cluster-0: #> all elements: #> ARHGDIB + #> majority elements: #> DDIT4 - #> Cluster-1: #> all elements: #> ARHGDIB - #> SOX4 - #> #> $ranger_significance_table #> importance pvalue gene #> ASNS 5.0523069 0.00000000 ASNS #> CD3D 4.5759691 0.00000000 CD3D #> TMSB4X 4.3567501 0.00000000 TMSB4X #> ARHGDIB 4.3289627 0.00000000 ARHGDIB #> MZB1 3.5292376 0.00000000 MZB1 #> ADA 3.0016297 0.00000000 ADA #> HEY1 2.7798605 0.00000000 HEY1 #> TSC22D3 2.3415148 0.00000000 TSC22D3 #> XIST 2.0798456 0.00000000 XIST #> SOX4 1.8714852 0.00000000 SOX4 #> MAP1A 1.6867208 0.00000000 MAP1A #> RPL26 1.6366171 0.00000000 RPL26 #> CA2 1.4931733 0.00000000 CA2 #> AIF1 1.4917225 0.00000000 AIF1 #> FYB 1.4892930 0.00000000 FYB #> CDKN2A 1.4302791 0.00000000 CDKN2A #> UBE2C 1.1031831 0.00000000 UBE2C #> CD1E 0.9464429 0.00000000 CD1E #> FAM127B 0.9218146 0.00000000 FAM127B #> GAL 0.7591950 0.00000000 GAL #> ITM2A 0.6595954 0.00000000 ITM2A #> HIST1H1E 0.6520071 0.00000000 HIST1H1E #> CSRP2 0.5320566 0.00000000 CSRP2 #> HIST1H4C 0.4876504 0.01020408 HIST1H4C #> PSMB8 0.4840830 0.01020408 PSMB8 #> DDIT4 0.3981723 0.01020408 DDIT4 #> TSTD1 0.3894432 0.01020408 TSTD1 #> MDK 0.3768194 0.01020408 MDK #> HOXA9 0.3689784 0.01020408 HOXA9 #> ZNF503 0.3525577 0.01020408 ZNF503 #> CHI3L2 0.3054693 0.01020408 CHI3L2 #> DMKN 0.2965305 0.01020408 DMKN #> JUN 0.2865324 0.01020408 JUN #> CDK1 0.2831493 0.01020408 CDK1 #> ID2 0.2830716 0.01020408 ID2 #> CTC1 0.2813576 0.01020408 CTC1 #> IFI16 0.2710781 0.01020408 IFI16 #> KRT18 0.2704436 0.01020408 KRT18 #> ITGA4 0.2689324 0.01020408 ITGA4 #> PYGL 0.2584941 0.01020408 PYGL #> PSMB9 0.2523764 0.01020408 PSMB9 #> MAP1B 0.2228242 0.04081633 MAP1B #> CXCR4 0.2227893 0.04081633 CXCR4 #> HIST1H1C 0.2195790 0.04081633 HIST1H1C #> MYC 0.2099993 0.04081633 MYC #> SPRED2 0.1998300 0.04081633 SPRED2 #> RNF138 0.1982636 0.04081633 RNF138 #> LNP1 0.1882561 0.04081633 LNP1 #> #> $gating_genes #> [1] "ARHGDIB" "SOX4" "DDIT4" #>