cross_validate.Rd
Fits a decision tree in one data set and tests the performance in another
cross_validate( train, test, cluster, genes_use = Seurat::VariableFeatures(train), warn.gene.removal = TRUE, ... )
train | a Seurat object to be used for trainning. |
---|---|
test | another Seurat object to be used for testing. |
cluster | the cluster whose equivalence needs to be found. |
genes_use | character vector specifying which genes to use for the classification, defaults to Seurat::VariableFeatures(train) |
warn.gene.removal | logical indicating wether to warn the user when genes are removed because they are missing in one of the datasets. defults to TRUE |
... | additional arguments to be passed to ranger_importances.Seurat |
a list containing the (1) tree fit, (2) a summary_table (3) the concensus rules of the tree (4) ranger_significance_table (5) the suggested genes for the gating
cross_validate(small_5050_mix, small_9901_mix, cluster = "0")#> Warning: Only few negative importance values found, inaccurate p-values. Consider the 'altmann' approach. #> #> This can be done by setting the argument 'imp_method' to 'altmann', note that this method is extremely computationally intensive. #> #> This warning can be disabled by setting the argument `warn.imp.method` to `FALSE` #> #> For more information please refer to ?ranger::ranger#> Warning: Some important genes were removed because they are not present in the test dataset. #> Removed genes: ASNS, CD3D, HEY1, ADA, XIST, CDKN2A, RPL26, TSC22D3, CA2, AIF1, GAL, MDK, MAP1A, PSMB9, TSTD1, FAM127B, ID2, DMKN, PSMB8, KRT18, HOXA9, ZNF503, MAP1B, PYGL, HSPA1B, CSRP2#> $party_fit #> #> Model formula: #> ident ~ ARHGDIB + TMSB4X + MZB1 + SOX4 + FYB + CD1E + UBE2C + #> HIST1H1E + ITGA4 + CDK1 + ITM2A + CHI3L2 + HIST1H4C + MALAT1 + #> JUN + CXCR4 + DDIT4 + HIST1H2BK #> #> Fitted party: #> [1] root #> | [2] ARHGDIB <= 2.29381 #> | | [3] SOX4 <= 2.74241: not clus 0 (n = 90, err = 27.8%) #> | | [4] SOX4 > 2.74241: clus 0 (n = 8, err = 0.0%) #> | [5] ARHGDIB > 2.29381 #> | | [6] DDIT4 <= 2.13797: clus 0 (n = 148, err = 4.1%) #> | | [7] DDIT4 > 2.13797: clus 0 (n = 9, err = 44.4%) #> #> Number of inner nodes: 3 #> Number of terminal nodes: 4 #> #> $confusion_matrix #> cluster #> predicted 0 1 2 #> clus 0 263 98 8 #> not clus 0 0 0 15 #> #> $concensus_rules #> Cluster-clus 0: #> all elements: #> ARHGDIB + #> majority elements: #> DDIT4 - #> Cluster-not clus 0: #> all elements: #> ARHGDIB - #> SOX4 - #> #> $ranger_significance_table #> importance pvalue gene #> ASNS 6.1981030 0.000000000 ASNS #> ARHGDIB 4.6568825 0.000000000 ARHGDIB #> CD3D 4.5486561 0.000000000 CD3D #> TMSB4X 3.8594044 0.000000000 TMSB4X #> HEY1 3.4640647 0.000000000 HEY1 #> ADA 2.7639487 0.000000000 ADA #> MZB1 2.5351154 0.000000000 MZB1 #> XIST 2.1050942 0.000000000 XIST #> CDKN2A 2.0906554 0.000000000 CDKN2A #> RPL26 2.0897659 0.000000000 RPL26 #> SOX4 2.0034795 0.000000000 SOX4 #> TSC22D3 1.9670025 0.000000000 TSC22D3 #> CA2 1.9547972 0.000000000 CA2 #> FYB 1.6685485 0.000000000 FYB #> AIF1 1.5561841 0.000000000 AIF1 #> GAL 1.1628424 0.000000000 GAL #> MDK 1.0391597 0.000000000 MDK #> CD1E 0.9016758 0.000000000 CD1E #> UBE2C 0.7935225 0.009433962 UBE2C #> MAP1A 0.7925588 0.009433962 MAP1A #> PSMB9 0.6315636 0.009433962 PSMB9 #> TSTD1 0.5575521 0.009433962 TSTD1 #> HIST1H1E 0.5060435 0.009433962 HIST1H1E #> FAM127B 0.5018757 0.009433962 FAM127B #> ID2 0.4493514 0.009433962 ID2 #> DMKN 0.4488922 0.009433962 DMKN #> PSMB8 0.4441328 0.009433962 PSMB8 #> ITGA4 0.4201880 0.009433962 ITGA4 #> CDK1 0.4042143 0.009433962 CDK1 #> ITM2A 0.3883246 0.009433962 ITM2A #> KRT18 0.3738098 0.009433962 KRT18 #> CHI3L2 0.3534820 0.009433962 CHI3L2 #> HIST1H4C 0.3438604 0.009433962 HIST1H4C #> HOXA9 0.3198131 0.009433962 HOXA9 #> ZNF503 0.3125776 0.009433962 ZNF503 #> MALAT1 0.3075915 0.009433962 MALAT1 #> JUN 0.3029134 0.009433962 JUN #> MAP1B 0.3000326 0.009433962 MAP1B #> PYGL 0.2815934 0.018867925 PYGL #> CXCR4 0.2759808 0.018867925 CXCR4 #> DDIT4 0.2183946 0.028301887 DDIT4 #> HSPA1B 0.2162486 0.028301887 HSPA1B #> HIST1H2BK 0.2082975 0.037735849 HIST1H2BK #> CSRP2 0.2003424 0.037735849 CSRP2 #> #> $gating_genes #> [1] "ARHGDIB" "SOX4" "DDIT4" #>cross_validate(small_5050_mix, small_9901_mix, cluster = "ALL")#> Warning: Only few negative importance values found, inaccurate p-values. Consider the 'altmann' approach. #> #> This can be done by setting the argument 'imp_method' to 'altmann', note that this method is extremely computationally intensive. #> #> This warning can be disabled by setting the argument `warn.imp.method` to `FALSE` #> #> For more information please refer to ?ranger::ranger#> Warning: Some important genes were removed because they are not present in the test dataset. #> Removed genes: ASNS, CD3D, ADA, HEY1, TSC22D3, XIST, MAP1A, RPL26, CA2, AIF1, CDKN2A, FAM127B, GAL, CSRP2, PSMB8, TSTD1, MDK, HOXA9, ZNF503, DMKN, ID2, CTC1, IFI16, KRT18, PYGL, PSMB9, MAP1B, SPRED2, RNF138, LNP1#> $party_fit #> #> Model formula: #> ident ~ TMSB4X + ARHGDIB + MZB1 + SOX4 + FYB + UBE2C + CD1E + #> ITM2A + HIST1H1E + HIST1H4C + DDIT4 + CHI3L2 + JUN + CDK1 + #> ITGA4 + CXCR4 + HIST1H1C + MYC #> #> Fitted party: #> [1] root #> | [2] ARHGDIB <= 2.29381 #> | | [3] SOX4 <= 2.74241: 1 (n = 90, err = 27.8%) #> | | [4] SOX4 > 2.74241: 0 (n = 8, err = 0.0%) #> | [5] ARHGDIB > 2.29381 #> | | [6] DDIT4 <= 2.13797: 0 (n = 148, err = 4.1%) #> | | [7] DDIT4 > 2.13797: 0 (n = 9, err = 44.4%) #> #> Number of inner nodes: 3 #> Number of terminal nodes: 4 #> #> $confusion_matrix #> cluster #> predicted 0 1 2 #> 0 263 98 8 #> 1 0 0 15 #> #> $concensus_rules #> Cluster-0: #> all elements: #> ARHGDIB + #> majority elements: #> DDIT4 - #> Cluster-1: #> all elements: #> ARHGDIB - #> SOX4 - #> #> $ranger_significance_table #> importance pvalue gene #> ASNS 5.0523069 0.00000000 ASNS #> CD3D 4.5759691 0.00000000 CD3D #> TMSB4X 4.3567501 0.00000000 TMSB4X #> ARHGDIB 4.3289627 0.00000000 ARHGDIB #> MZB1 3.5292376 0.00000000 MZB1 #> ADA 3.0016297 0.00000000 ADA #> HEY1 2.7798605 0.00000000 HEY1 #> TSC22D3 2.3415148 0.00000000 TSC22D3 #> XIST 2.0798456 0.00000000 XIST #> SOX4 1.8714852 0.00000000 SOX4 #> MAP1A 1.6867208 0.00000000 MAP1A #> RPL26 1.6366171 0.00000000 RPL26 #> CA2 1.4931733 0.00000000 CA2 #> AIF1 1.4917225 0.00000000 AIF1 #> FYB 1.4892930 0.00000000 FYB #> CDKN2A 1.4302791 0.00000000 CDKN2A #> UBE2C 1.1031831 0.00000000 UBE2C #> CD1E 0.9464429 0.00000000 CD1E #> FAM127B 0.9218146 0.00000000 FAM127B #> GAL 0.7591950 0.00000000 GAL #> ITM2A 0.6595954 0.00000000 ITM2A #> HIST1H1E 0.6520071 0.00000000 HIST1H1E #> CSRP2 0.5320566 0.00000000 CSRP2 #> HIST1H4C 0.4876504 0.01020408 HIST1H4C #> PSMB8 0.4840830 0.01020408 PSMB8 #> DDIT4 0.3981723 0.01020408 DDIT4 #> TSTD1 0.3894432 0.01020408 TSTD1 #> MDK 0.3768194 0.01020408 MDK #> HOXA9 0.3689784 0.01020408 HOXA9 #> ZNF503 0.3525577 0.01020408 ZNF503 #> CHI3L2 0.3054693 0.01020408 CHI3L2 #> DMKN 0.2965305 0.01020408 DMKN #> JUN 0.2865324 0.01020408 JUN #> CDK1 0.2831493 0.01020408 CDK1 #> ID2 0.2830716 0.01020408 ID2 #> CTC1 0.2813576 0.01020408 CTC1 #> IFI16 0.2710781 0.01020408 IFI16 #> KRT18 0.2704436 0.01020408 KRT18 #> ITGA4 0.2689324 0.01020408 ITGA4 #> PYGL 0.2584941 0.01020408 PYGL #> PSMB9 0.2523764 0.01020408 PSMB9 #> MAP1B 0.2228242 0.04081633 MAP1B #> CXCR4 0.2227893 0.04081633 CXCR4 #> HIST1H1C 0.2195790 0.04081633 HIST1H1C #> MYC 0.2099993 0.04081633 MYC #> SPRED2 0.1998300 0.04081633 SPRED2 #> RNF138 0.1982636 0.04081633 RNF138 #> LNP1 0.1882561 0.04081633 LNP1 #> #> $gating_genes #> [1] "ARHGDIB" "SOX4" "DDIT4" #>