<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JC</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Cancer</journal-id>
      <journal-title>JMIR Cancer</journal-title>
      <issn pub-type="epub">2369-1999</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v12i1e74196</article-id>
      <article-id pub-id-type="pmid">41529257</article-id>
      <article-id pub-id-type="doi">10.2196/74196</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Explainable AI for Predicting Mortality Risk in Metastatic Cancer: Retrospective Cohort Study Using the Memorial Sloan Kettering-Metastatic Dataset</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Cahill</surname>
            <given-names>Naomi</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Alahakoon</surname>
            <given-names>AMYD</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hounye</surname>
            <given-names>Alphonse Houssou</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Patel</surname>
            <given-names>Dhavalkumar</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Nalela</surname>
            <given-names>Polycarp</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9632-4530</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Rao</surname>
            <given-names>Deepthi</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6821-0694</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Rao</surname>
            <given-names>Praveen</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>The University of Missouri</institution>
            <addr-line>225 Naka Hall</addr-line>
            <addr-line>Columbia, MO</addr-line>
            <country>United States</country>
            <phone>1 9132317241</phone>
            <email>praveen.rao@missouri.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1859-0438</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>The University of Missouri</institution>
        <addr-line>Columbia, MO</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Praveen Rao <email>praveen.rao@missouri.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>13</day>
        <month>1</month>
        <year>2026</year>
      </pub-date>
      <volume>12</volume>
      <elocation-id>e74196</elocation-id>
      <history>
        <date date-type="received">
          <day>19</day>
          <month>3</month>
          <year>2025</year>
        </date>
        <date date-type="rev-request">
          <day>28</day>
          <month>5</month>
          <year>2025</year>
        </date>
        <date date-type="rev-recd">
          <day>18</day>
          <month>11</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>19</day>
          <month>11</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©Polycarp Nalela, Deepthi Rao, Praveen Rao. Originally published in JMIR Cancer (https://cancer.jmir.org), 13.01.2026.</copyright-statement>
      <copyright-year>2026</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Cancer, is properly cited. The complete bibliographic information, a link to the original publication on https://cancer.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://cancer.jmir.org/2026/1/e74196" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Metastatic cancer remains one of the leading causes of cancer-related mortality worldwide. Yet, the prediction of survivability in this population remains limited by heterogeneous clinical presentations and high-dimensional molecular features. Advances in machine learning (ML) provide an opportunity to integrate diverse patient- and tumor-level factors into explainable predictive ML models. Leveraging large real-world datasets and modern ML techniques can enable improved risk stratification and precision oncology.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to develop and interpret ML models for predicting overall survival in patients with metastatic cancer using the Memorial Sloan Kettering-Metastatic (MSK-MET) dataset and to identify key prognostic biomarkers through explainable artificial intelligence techniques.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We performed a retrospective analysis of the MSK-MET cohort, comprising 25,775 patients across 27 tumor types. After data cleaning and balancing, 20,338 patients were included. Overall survival was defined as deceased versus living at last follow-up. Five classifiers (extreme gradient boosting [XGBoost], logistic regression, random forest, decision tree, and naive Bayes) were trained using an 80/20 stratified split and optimized via grid search with 5-fold cross-validation. Model performance was assessed using accuracy, area under the curve (AUC), precision, recall, and <italic>F</italic><sub>1</sub>-score. Model explainability was achieved using Shapley additive explanations (SHAP). Survival analyses included Kaplan-Meier estimates, Cox proportional hazards models, and an XGBoost-Cox model for time-to-event prediction. The positive predictive value and negative predictive value were calculated at the Youden index–optimal threshold.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>XGBoost achieved the highest performance (accuracy=0.74; AUC=0.82), outperforming other classifiers. In survival analyses, the XGBoost-Cox model with a concordance index (C-index) of 0.70 exceeded the traditional Cox model (C-index=0.66). SHAP analysis and Cox models consistently identified metastatic site count, tumor mutational burden, fraction of genome altered, and the presence of distant liver and bone metastases as among the strongest prognostic factors, a pattern that held at both the pan-cancer level and recurrently across cancer-specific models. At the cancer-specific level, performance varied; prostate cancer achieved the highest predictive accuracy (AUC=0.88), while pancreatic cancer was notably more challenging (AUC=0.68). Kaplan-Meier analyses demonstrated marked survival separation between patients with and without metastases (80-month survival: approximately 0.80 vs 0.30). At the Youden-optimal threshold, positive predictive value and negative predictive value were approximately 70% and 80%, respectively, supporting clinical use for risk stratification.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Explainable ML models, particularly XGBoost combined with SHAP, can strongly predict survivability in metastatic cancers while highlighting clinically meaningful features. These findings support the use of ML-based tools for patient counseling, treatment planning, and integration into precision oncology workflows. Future work should include external validation on independent cohorts, integration with electronic health records via Fast Healthcare Interoperability Resources–based dashboards, and prospective clinician-in-the-loop evaluation to assess real-world use.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>explainable artificial intelligence</kwd>
        <kwd>machine learning in oncology</kwd>
        <kwd>metastatic cancer</kwd>
        <kwd>survivability prediction</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Cancer remains one of the foremost global health challenges, with 611,720 deaths projected in the United States alone for 2024, and metastatic disease accounting for the overwhelming majority of these fatalities [<xref ref-type="bibr" rid="ref1">1</xref>]. Metastasis is particularly vexing because disseminated tumor cells frequently acquire distinct genomic and phenotypic profiles that render them resistant to standard therapies. Contemporary oncology research, therefore, acknowledges intra- and intertumor heterogeneity as fundamental obstacles to curative treatment, therapeutic resistance, and accurate prognosis [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. Reliable, early-stage prediction of patient survivability is pivotal not merely for counseling patients but also for tailoring aggressive interventions, prioritizing scarce health care resources, and designing adaptive clinical trials aimed at improving long-term outcomes.</p>
      <p>Historically, clinicians have relied on the tumor-node-metastasis staging system, Kaplan-Meier life tables, and the Cox proportional hazards (CPH) regression model to stratify survival risk. Although statistically rigorous, these techniques impose proportional hazards and linearity assumptions that rarely hold across the dynamic, nonlinear biology of metastatic cancers. When violated, CPH models can yield biased hazard ratios, suffer from time-dependent covariate effects, and perform poorly on censored or highly imbalanced datasets [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
      <p>The emergence of artificial intelligence (AI) and machine learning (ML) has enabled the development of sophisticated models that can uncover previously hidden patterns in heterogeneous clinical and multiomics datasets, thereby providing new insights into cancer biology, diagnosis, prognosis, and treatment outcomes. Over the past decade, ML models have repeatedly outperformed traditional statistical approaches. A deep-learning algorithm developed by Esteva et al [<xref ref-type="bibr" rid="ref5">5</xref>] achieved a sensitivity of 97% and a specificity of 78% in classifying skin lesions as benign or malignant, while Liu et al [<xref ref-type="bibr" rid="ref6">6</xref>] reported an area under the receiver operating characteristic curve (AUROC) of 0.94 for lung cancer risk prediction using computed tomography images. Similar gains have been demonstrated for breast cancer survival [<xref ref-type="bibr" rid="ref7">7</xref>], lymph node metastasis [<xref ref-type="bibr" rid="ref8">8</xref>], colorectal and soft tissue sarcoma outcomes [<xref ref-type="bibr" rid="ref9">9</xref>], lung cancer survival [<xref ref-type="bibr" rid="ref10">10</xref>], and prostate cancer prognosis [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>].</p>
      <p>Among contemporary ML pipelines, gradient-boosted decision trees, particularly extreme gradient boosting (XGBoost) [<xref ref-type="bibr" rid="ref13">13</xref>], have emerged as a workhorse because they natively handle missing data, nonlinear feature interactions, and mixed data types. Recent examples include a non–small cell lung cancer microwave-ablation study where XGBoost achieved an area under the curve (AUC) of 0.89 [<xref ref-type="bibr" rid="ref14">14</xref>]; a 2025 <italic>Scientific Reports</italic> analysis integrating survival models for breast cancer recurrence (concordance index [C-index]=0.82) [<xref ref-type="bibr" rid="ref15">15</xref>]; a large colorectal cancer cohort where boosted trees yielded the highest 5-year survival accuracy [<xref ref-type="bibr" rid="ref16">16</xref>]; and a thyroid cancer study that constructed a 10-year overall survival nomogram using Surveillance, Epidemiology, and End Results data [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
      <p>Biomarkers are critical for early detection, diagnosis, prognosis, and monitoring. Traditional biomarker-discovery approaches often suffer from low sensitivity, limited reproducibility, and dependence on prior biological hypotheses. ML circumvents many of these limitations by integrating diverse data types and identifying complex nonlinear relationships. Algorithms such as random forests [<xref ref-type="bibr" rid="ref18">18</xref>], support vector machines [<xref ref-type="bibr" rid="ref19">19</xref>], and neural networks [<xref ref-type="bibr" rid="ref20">20</xref>] have successfully identified biomarkers from gene expression [<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>], microRNA expression [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref27">27</xref>], DNA methylation [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>], and imaging modalities [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref33">33</xref>].</p>
      <p>Despite this progress, significant gaps persist at the intersection of scale, interpretability, and clinical use. First, while large datasets like the Memorial Sloan Kettering-Metastatic (MSK-MET) cohort [<xref ref-type="bibr" rid="ref34">34</xref>] provide unprecedented scale, their analysis has largely relied on traditional statistics, failing to harness state-of-the-art ML for predictive modeling. Second, most ML survival-prediction studies focus on single tumor types, use modest sample sizes, or omit high-dimensional genomic features, limiting their generalizability to the pan-cancer reality of metastatic disease. Third, and most critically, interpretability remains a bottleneck; oncologists are understandably reluctant to incorporate opaque “black box” risk scores into clinical workflows. Explainable AI methods like Shapley additive explanations (SHAP) provide a mechanism for transparency. For instance, SHAP has revealed previously unknown drivers of prostate cancer mortality [<xref ref-type="bibr" rid="ref35">35</xref>]. Although explainable AI frameworks offer a solution, large-scale, pan-cancer implementations that jointly optimize predictive performance and model explainability remain scarce. Consequently, the field lacks an interpretable, cross-tumor framework capable of ranking metastasis-specific risk factors at a scale that reflects real-world heterogeneity.</p>
      <p>To address these critical gaps, we designed a methodological framework that moves beyond standard, single-model architectures. We leverage the MSK-MET dataset, a pan-cancer cohort of genomic and clinical data from 25,775 patients spanning 27 tumor types, as an ideal test bed for this purpose due to its scale and diversity. Our approach is conceived as a hierarchical and explainable pipeline specifically to tackle the challenges of data heterogeneity, the need for clinical trust, and biological discovery. It integrates rigorous pan-cancer benchmarking with targeted, tumor-specific submodels and crucially unifies ML classification with traditional survival analysis. This ensures that the predictive performance of our models is directly coupled with transparent, clinically actionable insights.</p>
      <p>Driven by this methodology, our work addresses persistent gaps at the intersection of scale, interpretability, and clinical use in metastatic cancer prognosis. Leveraging a large pan-cancer cohort that captures real-world heterogeneity, we focus on whether explainable ML approaches can yield clinically useful survivability predictions while providing transparent, biologically and clinically coherent insights across tumor types.</p>
      <p>The primary aim of this study is to develop and validate an interpretable ML framework for predicting overall survival in patients with metastatic cancer. We hypothesize that (1) ML models will achieve clinically useful discrimination and calibration for survivability prediction, (2) explainable AI techniques will identify a core set of prognostic biomarkers that are consistently important across diverse tumor types, and (3) integrating ML predictions with established survival analysis techniques will yield a transparent and clinically actionable tool for risk stratification.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p><xref rid="figure1" ref-type="fig">Figure 1</xref> illustrates the steps followed to predict cancer survivability using explainable AI. Raw data were used to initially train the ML models, followed by SHAP analysis. Top features identified by SHAP were then further used in the survival analysis. The subsequent steps below detail how this was implemented. All analysis and visualization were carried out in Python (version 3.12; Python Software Foundation) with relevant packages and libraries such as <italic>pandas</italic>, <italic>numpy</italic>, <italic>scikit-learn</italic>, <italic>shap,</italic> etc.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Overview of the explainable machine learning (ML) pipeline for metastatic cancer survivability prediction. The Memorial Sloan Kettering-Metastatic (MSK-MET) cohort is preprocessed and balanced, then split into stratified training and test sets before training and tuning 5 candidate classifiers (extreme gradient boosting [XGBoost], random forest, logistic regression, decision tree, and naive Bayes). The best-performing XGBoost model is subsequently interrogated with Shapley additive explanations (SHAP) to identify key prognostic clinical and genomic features, which are then carried forward into downstream survival analyses (Kaplan-Meier curves, Cox proportional hazards models, and XGBoost-Cox) to generate time-to-event estimates and clinically interpretable risk stratification.</p>
          </caption>
          <graphic xlink:href="cancer_v12i1e74196_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Preprocessing</title>
        <p>We first performed a thorough exploratory data analysis on MSK-MET that contained information from 25,775 patients with cancer. Our exploratory data analysis process began with a comprehensive analysis of the dataset, including the distribution of different cancer types, stages, and other relevant features. This helped us gain a deeper understanding of the underlying patterns and structures, which informed subsequent preprocessing steps. We preprocessed the input dataset and dropped columns (such as patient ID) and rows having large proportions of missing data. The target variable had 2 classes, that is, living (positive class, coded as 1) and deceased (negative class, coded as 0). In the final stage of preprocessing, the data were balanced using the target classes. Using resampling, we down-sampled the minority class (0). The final set contained 20,338 patients (10,169 living and 10,169 deceased) with 39 variables for each patient. In total, there were 27 cancer types (<xref ref-type="table" rid="table1">Table 1</xref>). The overall survival status was the target variable for prediction. Categorical variables were encoded using label encoding, and features were scaled using minimum-maximum scaling to ensure that variables with larger magnitudes did not unduly influence model outcomes. The resulting preprocessed data were then split into training and testing sets for further analysis.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Distribution of primary cancer types in the Memorial Sloan Kettering-Metastatic cohort. Frequency counts are reported for all cancer types represented in the dataset; the 5 largest groups (non–small cell lung, colorectal, breast, pancreatic, and prostate cancer) provided the primary strata for cancer-specific extreme gradient boosting models and downstream survival analyses.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="550"/>
            <col width="450"/>
            <thead>
              <tr valign="top">
                <td>Cancer type</td>
                <td>Frequency count, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Non–small cell lung cancer</td>
                <td>3790</td>
              </tr>
              <tr valign="top">
                <td>Colorectal cancer</td>
                <td>2696</td>
              </tr>
              <tr valign="top">
                <td>Breast cancer</td>
                <td>2043</td>
              </tr>
              <tr valign="top">
                <td>Pancreatic cancer</td>
                <td>1738</td>
              </tr>
              <tr valign="top">
                <td>Prostate cancer</td>
                <td>1596</td>
              </tr>
              <tr valign="top">
                <td>Endometrial cancer</td>
                <td>988</td>
              </tr>
              <tr valign="top">
                <td>Ovarian cancer</td>
                <td>923</td>
              </tr>
              <tr valign="top">
                <td>Melanoma</td>
                <td>882</td>
              </tr>
              <tr valign="top">
                <td>Bladder cancer</td>
                <td>870</td>
              </tr>
              <tr valign="top">
                <td>Hepatobiliary cancer</td>
                <td>790</td>
              </tr>
              <tr valign="top">
                <td>Esophagogastric cancer</td>
                <td>738</td>
              </tr>
              <tr valign="top">
                <td>Soft tissue sarcoma</td>
                <td>420</td>
              </tr>
              <tr valign="top">
                <td>Head and neck cancer</td>
                <td>362</td>
              </tr>
              <tr valign="top">
                <td>Thyroid cancer</td>
                <td>319</td>
              </tr>
              <tr valign="top">
                <td>Renal cell carcinoma</td>
                <td>318</td>
              </tr>
              <tr valign="top">
                <td>Gastrointestinal stromal tumor</td>
                <td>286</td>
              </tr>
              <tr valign="top">
                <td>Small cell lung cancer</td>
                <td>277</td>
              </tr>
              <tr valign="top">
                <td>Germ cell tumor</td>
                <td>241</td>
              </tr>
              <tr valign="top">
                <td>Mesothelioma</td>
                <td>219</td>
              </tr>
              <tr valign="top">
                <td>Appendiceal cancer</td>
                <td>160</td>
              </tr>
              <tr valign="top">
                <td>Uterine sarcoma</td>
                <td>133</td>
              </tr>
              <tr valign="top">
                <td>Salivary gland cancer</td>
                <td>123</td>
              </tr>
              <tr valign="top">
                <td>Gastrointestinal neuroendocrine tumor</td>
                <td>115</td>
              </tr>
              <tr valign="top">
                <td>Skin cancer (nonmelanoma)</td>
                <td>87</td>
              </tr>
              <tr valign="top">
                <td>Cervical cancer</td>
                <td>80</td>
              </tr>
              <tr valign="top">
                <td>Small bowel cancer</td>
                <td>76</td>
              </tr>
              <tr valign="top">
                <td>Anal cancer</td>
                <td>68</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Stratified Random Sampling of Training and Testing Sets</title>
        <p>We used a stratified random sampling approach to create the training and test sets. First, we randomized the complete dataset to eliminate any inherent order or sequence. Then, we implemented stratification to ensure that the distribution of specific cancer types or stages in our training and testing sets mirrored that of the entire dataset. This is paramount to avoid potential biases and to ensure that our models have a representative sample of the different cancer types and stages present in the entire dataset. Following stratification, we allocated 80% of the data (16,270 patient records) to the training set while reserving the remaining 20% (4068 patient records) for the test set. This approach provides a robust foundation for model development and validation, ensuring both broad and deep representation of the dataset in our training and testing phases.</p>
      </sec>
      <sec>
        <title>Selection and Screening of ML Models for Cancer Survival Prediction</title>
        <p>This study used 5 ML algorithms—XGBoost, naive Bayes, decision tree, logistic regression, and random forest—to predict cancer survival rates using the MSK-MET dataset. XGBoost was selected for its efficiency in handling sparse data and combining models to improve accuracy through ensemble learning. Naive Bayes, a simple classifier applying Bayes theorem, was chosen for its efficiency in high-dimensional datasets. The decision tree, known for its easy visualization and handling of nonlinear relationships, was included for its interpretability. Logistic regression was used for binary classification, predicting survival probabilities, while random forest, an ensemble method using multiple decision trees, was chosen for its accuracy and control over overfitting in large datasets.</p>
      </sec>
      <sec>
        <title>Hyperparameter Optimization Via Grid Search and Model Training</title>
        <p>Grid search with hyperparameter tuning was applied to all 5 ML models. For XGBoost, parameters “n_estimators” (50-1000), “max_depth” (1-10), and “learning_rate” (0.01-0.3) were adjusted to optimize the number of trees, tree depth, and learning speed. Naive Bayes was tuned by varying “alpha” (0.01-10.0), “binarize” (0.0, 0.5, 1.0), and “fit_prior” (True/False). The decision tree’s grid search adjusted “max_depth” (None-10), “min_samples_split” (2, 5, 10), “min_samples_leaf” (1, 2, 4), and “criterion” (“gini” or “entropy”). Logistic regression was optimized with “C” (0.001-1000), “penalty” (“l1,” “l2,” “elasticnet,” “none”), and “solver” (“newton-cg,” “lbfgs,” “liblinear,” “sag,” “saga”). Random forest explored “n_estimators” (50, 100, 200) and “max_features” (“auto,” “sqrt”).</p>
        <p>Using 5-fold cross-validation on the MSK-MET dataset, we trained the 5 classifiers to identify a robust, interpretable predictor and to derive a cohort-wide view of metastatic patterns.</p>
      </sec>
      <sec>
        <title>Model Evaluation</title>
        <p>After training and testing the ML models on the MSK-MET dataset, we assessed their performance using 2 key metrics: the classification report (Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) and AUROC (<xref rid="figure2" ref-type="fig">Figure 2</xref>). The AUROC measures the model’s ability to distinguish between classes, with higher AUC indicating better prediction. A score of 1 represents perfect predictions, 0.5 indicates random guessing, and below 0.5 suggests worse than random predictions. These metrics provide a comprehensive evaluation, ensuring the models not only predict accurately but also effectively identify positive cancer cases. This approach helps in selecting the best model for predicting cancer survival, balancing the need to detect true cases while minimizing false diagnoses.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Receiver operating characteristic (ROC) curves for the 5 tuned classifiers on the held-out Memorial Sloan Kettering-Metastatic test set. Each curve shows the trade-off between sensitivity and 1 – specificity across decision thresholds, with corresponding area under the curve values summarized in Table 2. The extreme gradient boosting (XGBoost) model achieves the steepest ROC trajectory and highest area under the receiver operating characteristic curve (AUROC=0.82), indicating the strongest discrimination between surviving and deceased patients, while random forest and logistic regression (LogisticReg) form an intermediate tier, and decision tree and Bernoulli naive Bayes (BernoulliNB) exhibit comparatively weaker performance.</p>
          </caption>
          <graphic xlink:href="cancer_v12i1e74196_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>To characterize operating-point behavior, we computed threshold-dependent trade-offs of true positive (TP), false positive (FP), true negative (TN), and false negative (FN); sensitivity, specificity, precision, negative predictive value (NPV), <italic>F</italic><sub>1</sub>-score, balanced accuracy, Matthews correlation coefficient (MCC), Cohen κ, and accuracy at 3 decision thresholds: a fixed 0.50, the Youden J optimum, and a clinically constrained point targeting sensitivity ≥0.85. Scalar metrics were accompanied by 95% CI obtained via a 1000-sample stratified bootstrap.</p>
        <p>To obtain unbiased estimates and avoid tuning leakage, we used nested repeated cross-validation on the training data. The outer loop applied RepeatedStratifiedKFold (5 folds × 10 repeats); within each outer training split, an inner 5-fold GridSearchCV tuned hyperparameters per model. Inner-tuned models were then scored on their corresponding outer validation splits, yielding a matrix of outer cross-validation (CV) results. The model with the highest mean outer-CV performance was refit on the full training set using the selected hyperparameters and evaluated once on the held-out test set (reporting accuracy, AUROC, and a confusion matrix).</p>
        <p>For statistical comparison across the 5 models, we ran a Friedman omnibus test on the outer-CV score matrix. As a prespecified confirmatory analysis, we tested the directional hypothesis that random forest ≥ each baseline using 1-sided Wilcoxon signed-rank tests with Holm correction; we also reported effect sizes (mean and median paired differences) with 95% paired bootstrap CI. For rank-based visualization and exploratory post hoc inference, we computed average ranks with a Nemenyi critical-difference diagram and performed 1-sided Wilcoxon tests comparing the top-ranked model to the remaining models (Holm-adjusted). Where paired per-example test predictions were available, we used the McNemar test to compare error patterns between 2 models.</p>
      </sec>
      <sec>
        <title>Cancer Survival Prediction With XGBoost</title>
        <p>XGBoost was therefore chosen for the rest of the analyses, involving both pan-cancer and cancer-specific models, metastatic cancer survival prediction, and survival analysis. To enhance transparency and clinical interpretability, we adopted a 2-tier XGBoost design. First, a unified XGBoost model was trained without the “Cancer Type” variable, leveraging clinical and demographic features to capture signals that generalize across diseases. Second, we trained cancer-specific XGBoost models for the 5 largest cancer groups identified in the cohort to capture within-disease interactions that can be diluted in a single global model. This combination provides both a cross-cancer perspective and disease-tailored insights that reflect differences in metastatic behavior and treatment context. In the final evaluation for both global and cancer-specific analysis, we focused only on measuring accuracy and AUC score.</p>
      </sec>
      <sec>
        <title>Model Interpretation and Explanation</title>
        <p>To enhance the understandability and transparency of our predictions, we used XGBoost and SHAP for model explainability. SHAP, based on game theory, provides a detailed and consistent measure of feature importance by computing each feature’s contribution to the prediction. SHA<italic>P</italic> values represent a feature’s responsibility for a change in the model output, ensuring local accuracy, missingness, and consistency. This method quantifies the impact of each feature on predictions and explains how the presence or absence of a feature affects the outcome. In our SHAP analysis, survival is the positive class. Beeswarm plots are particularly useful for visualizing SHA<italic>P</italic> values, showing features’ influence and variability in a nuanced manner.</p>
      </sec>
      <sec>
        <title>Survival Analysis</title>
        <p>Following the training of the XGBoost ML model and SHAP analysis, the most important features, such as metastatic site count, tumor mutational burden (TMB), and specific organ metastases (eg, liver, bone, and lung) influencing patient survival, were identified and used in the survival analysis. The primary goal was to examine the duration from cancer diagnosis to patient death, assessing how clinical and genomic variables impact survival times. We used Kaplan-Meier survival analysis, CPH modeling, log-rank tests for comparing survival distributions, and XGBoost survival analysis to deepen our understanding of patient outcomes. All of the original data was used in the survival analysis.</p>
      </sec>
      <sec>
        <title>Kaplan-Meier Survival Analysis</title>
        <p>The Kaplan-Meier estimator was used to evaluate survival probabilities over time across different patient subgroups. Patients were stratified based on key features identified from SHAP analysis. Survival curves were compared using the log-rank test to assess statistically significant differences between groups. A <italic>P</italic> value of &#60;.05 was considered statistically significant. The analysis was first performed on all the data to assess survival of patients with metastatic versus nonmetastatic disease and then on subgroups of the top 5 cancer types including non–small cell lung cancer, colorectal cancer, breast cancer, pancreatic cancer, and prostate cancer.</p>
      </sec>
      <sec>
        <title>CPH Model</title>
        <p>The CPH model was applied to assess the influence of multiple covariates on patient survival while controlling for potential confounders. Key covariates included metastatic site count, fraction of genome altered, TMB, and distant metastases in specific organs. The proportional hazards assumption was evaluated using Schoenfeld residuals, and any violations were addressed through stratification or inclusion of time-varying covariates. Hazard ratios with corresponding 95% CI were reported to quantify risk associations.</p>
      </sec>
      <sec>
        <title>Log-Rank Test</title>
        <p>To further compare survival distributions between different patient cohorts, the log-rank test was applied. This test was used to determine whether survival differences observed between patient subgroups (eg, metastatic vs nonmetastatic) were statistically significant. The resulting <italic>P</italic> values guided the identification of meaningful clinical predictors. Furthermore, a plot was generated for the Kaplan-Meier survival curves with the overall survival (months) on the x-axis and survival probability on the y-axis.</p>
      </sec>
      <sec>
        <title>XGBoost Survival Analysis</title>
        <p>To capture complex, nonlinear relationships and interactions among variables, XGBoost survival analysis was implemented. This adaptation of XGBoost used a Cox-based loss function to accommodate censored survival data. Hyperparameter tuning was conducted using grid search, optimizing parameters such as “n_estimators,” “max_depth,” and “learning_rate.” The model’s C-index was used to evaluate predictive performance. SHA<italic>P</italic> values were also applied to the survival model to interpret feature importance and explore individual risk predictions.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study analyzed secondary, noninterventional data from the MSK-MET cohort obtained via a publicly accessible repository (cBioPortal for Cancer Genomics). All records used for modeling and statistical analysis were anonymous and deidentified prior to access; no direct identifiers (eg, names, street addresses, full dates of birth, medical record numbers) or indirect reidentification keys were available to the research team. Because only deidentified data were used and no contact with human participants occurred, the work was considered non–human participants research and did not require informed consent or additional institutional review.</p>
        <p>Data handling procedures followed best-practice privacy safeguards. Working datasets were stored on access-controlled systems. We did not attempt any record linkage or reidentification. To support responsible AI, model development incorporated transparent methods (eg, SHAP explanations) and prespecified subgroup evaluations to screen for potential performance disparities. All code and evaluation protocols are shared to enable reproducibility without exposing any protected information.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Model Performance</title>
        <p>The evaluation of the 5 distinct models on the MSK-MET dataset yielded a spectrum of performances. The overall classification performance for the 5 classifiers is summarized in <xref ref-type="table" rid="table2">Table 2</xref>. Receiver operating characteristic (ROC) curves are shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>, and precision-recall curves are shown in Figure S2A, calibration in Figure S2B, decision-curve analysis in Figure S2C, and threshold-dependent metrics in Figure S2D in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Discrimination and calibration of tuned classifiers on the same test set. Reported are area under the curve (AUC), area under the precision-recall curve (AUPRC), and Brier score, each with 95% CIs; the outcome prevalence was 0.50. Extreme gradient boosting (XGBoost; lr=0.01; depth=5; n=500) showed the strongest overall performance, with the highest AUC and AUPRC and the lowest Brier score, followed by random forest, while logistic regression, decision tree, and naive Bayes exhibited progressively lower discrimination and less favorable calibration.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>AUC (95% CI)</td>
                <td>AUPRC (95% CI)</td>
                <td>Brier score (95% CI)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>XGBoost (lr=0.01; depth=5; n=500)</td>
                <td>0.82 (0.81-0.84)</td>
                <td>0.83 (0.81-0.85)</td>
                <td>0.17 (0.16-0.18)</td>
              </tr>
              <tr valign="top">
                <td>Random forest (n_estimators=200; max_features=auto)</td>
                <td>0.80 (0.78-0.81)</td>
                <td>0.81 (0.79-0.82)</td>
                <td>0.18 (0.18-0.19)</td>
              </tr>
              <tr valign="top">
                <td>Logistic regression (C=10; l2; liblinear)</td>
                <td>0.79 (0.78-0.81)</td>
                <td>0.79 (0.77-0.81)</td>
                <td>0.19 (0.18-0.19)</td>
              </tr>
              <tr valign="top">
                <td>Decision tree (gini; depth=8; min_leaf=1; min_split=10)</td>
                <td>0.78 (0.77-0.80)</td>
                <td>0.78 (0.76-0.80)</td>
                <td>0.19 (0.18-0.20)</td>
              </tr>
              <tr valign="top">
                <td>Naive Bayes (α=10.0; binarize=0.5; fit_prior=False)</td>
                <td>0.78 (0.77-0.80)</td>
                <td>0.77 (0.75-0.79)</td>
                <td>0.21 (0.20-0.22)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>XGBoost demonstrated the strongest discrimination and probability quality. It achieved an AUC of 0.82 (95% bootstrap CI 0.81-0.84) and the highest area under the precision-recall curve (AUPRC) at approximately 0.83 against a baseline precision equal to the prevalence (0.50; <xref ref-type="table" rid="table2">Table 2</xref> and Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). In accuracy terms, XGBoost reached 0.74 (3010/4068) and, for our summary counts, corresponded to 3335 out of 4068 test patients correctly stratified at the chosen operating point. Random forest (AUC=0.80; AUPRC=0.81) and logistic regression (AUC=0.79; AUPRC=0.79; accuracy=0.72; 2929/4068) formed a consistent middle tier, while decision tree and Bernoulli naive Bayes trailed slightly (both AUC=0.78; accuracy=0.72; 2929/4068; correctly stratified=3173/4068 for AUC-aligned counts; see <xref ref-type="table" rid="table3">Table 3</xref> and Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for detailed counts and metrics). These rankings were concordant across ROC and precision-recall analyses and remained stable across threshold-dependent operating points (fixed threshold=0.50, Youden J optimum, and a clinically constrained sensitivity ≥0.85; <xref rid="figure2" ref-type="fig">Figure 2</xref>; Figures S2A and S2D and Tables S2 and S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <p>Calibration analyses reinforced this ordering. XGBoost produced the lowest Brier score (approximately 0.17; <xref ref-type="table" rid="table2">Table 2</xref>; Table S2 and Figure S2B in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) and a reliability curve that closely tracked the 45° line, with logistic regression comparably well-calibrated, whereas Bernoulli naive Bayes deviated most at the extremes (<xref ref-type="table" rid="table2">Table 2</xref>; Table S2 and Figure S2B in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Compact comparison of 5 classifiers across 3 operating points, that is, default probability cutoff (0.50), Youden J (threshold maximizing sensitivity + specificity – 1), and a high-sensitivity setting (constrained to sensitivity ≥0.85). For each model-threshold pair, we report sensitivity, specificity, Matthews correlation coefficient (MCC), and accuracy; thresholds are applied to the predicted positive-class probability. This summary emphasizes decision-relevant trade-offs: balanced performance at Youden J and the specificity cost of prioritizing high sensitivity. In this cohort, extreme gradient boosting (XGBoost) yields the strongest balanced performance (highest MCC and accuracy) at Youden J while retaining the best specificity among the high-sensitivity operating points.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="220"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Model and operating point</td>
                <td colspan="2">Threshold</td>
                <td colspan="2">Sensitivity</td>
                <td colspan="2">Specificity</td>
                <td colspan="2">MCC</td>
                <td>Accuracy</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="11">
                  <bold>Naive Bayes</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Probability cutoff 0.5</td>
                <td>0.50</td>
                <td colspan="2">0.66</td>
                <td colspan="2">0.78</td>
                <td colspan="2">0.44</td>
                <td colspan="2">0.72</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Youden J</td>
                <td>0.50</td>
                <td colspan="2">0.65</td>
                <td colspan="2">0.78</td>
                <td colspan="2">0.44</td>
                <td colspan="2">0.72</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sensitivity ≥0.85</td>
                <td>0.15</td>
                <td colspan="2">0.85</td>
                <td colspan="2">0.52</td>
                <td colspan="2">0.39</td>
                <td colspan="2">0.69</td>
              </tr>
              <tr valign="top">
                <td colspan="11">
                  <bold>Decision tree</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Probability cutoff 0.5</td>
                <td>0.50</td>
                <td colspan="2">0.72</td>
                <td colspan="2">0.71</td>
                <td colspan="2">0.43</td>
                <td colspan="2">0.72</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Youden J</td>
                <td>0.56</td>
                <td colspan="2">0.64</td>
                <td colspan="2">0.80</td>
                <td colspan="2">0.45</td>
                <td colspan="2">0.72</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sensitivity ≥0.85</td>
                <td>0.32</td>
                <td colspan="2">0.86</td>
                <td colspan="2">0.49</td>
                <td colspan="2">0.37</td>
                <td colspan="2">0.67</td>
              </tr>
              <tr valign="top">
                <td colspan="11">
                  <bold>Logistic regression</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Probability cutoff 0.5</td>
                <td>0.50</td>
                <td colspan="2">0.73</td>
                <td colspan="2">0.73</td>
                <td colspan="2">0.45</td>
                <td colspan="2">0.73</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Youden J</td>
                <td>0.53</td>
                <td colspan="2">0.69</td>
                <td colspan="2">0.77</td>
                <td colspan="2">0.46</td>
                <td colspan="2">0.73</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sensitivity ≥0.85</td>
                <td>0.38</td>
                <td colspan="2">0.85</td>
                <td colspan="2">0.54</td>
                <td colspan="2">0.41</td>
                <td colspan="2">0.69</td>
              </tr>
              <tr valign="top">
                <td colspan="11">
                  <bold>Random forest</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Probability cutoff 0.5</td>
                <td>0.50</td>
                <td colspan="2">0.69</td>
                <td colspan="2">0.76</td>
                <td colspan="2">0.45</td>
                <td colspan="2">0.72</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Youden J</td>
                <td>0.49</td>
                <td colspan="2">0.70</td>
                <td colspan="2">0.75</td>
                <td colspan="2">0.46</td>
                <td colspan="2">0.73</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sensitivity ≥0.85</td>
                <td>0.32</td>
                <td colspan="2">0.85</td>
                <td colspan="2">0.54</td>
                <td colspan="2">0.41</td>
                <td colspan="2">0.69</td>
              </tr>
              <tr valign="top">
                <td colspan="11">
                  <bold>XGBoost</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Probability cutoff 0.5</td>
                <td>0.50</td>
                <td colspan="2">0.72</td>
                <td colspan="2">0.78</td>
                <td colspan="2">0.50</td>
                <td colspan="2">0.75</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Youden J</td>
                <td>0.50</td>
                <td colspan="2">0.72</td>
                <td colspan="2">0.78</td>
                <td colspan="2">0.50</td>
                <td colspan="2">0.75</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sensitivity ≥0.85</td>
                <td>0.33</td>
                <td colspan="2">0.85</td>
                <td colspan="2">0.58</td>
                <td colspan="2">0.45</td>
                <td colspan="2">0.72</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>Threshold-dependent performance, summarized from confusion matrix–derived metrics at 3 operating points, made the error trade-offs explicit (<xref ref-type="table" rid="table2">Table 2</xref>; Table S3 and Figure S2D in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). At a fixed 0.50 threshold essentially indistinguishable from the Youden J optimum on this balanced test set, XGBoost balanced sensitivity and specificity most effectively (sensitivity=0.72; specificity=0.78; positive predictive value (PPV)=0.77; NPV=0.74; <italic>F</italic><sub>1</sub>-score=0.74; MCC=0.50; accuracy=0.75). Under a clinically constrained operating point prioritizing case-finding (sensitivity ≥0.85), XGBoost required a threshold of approximately 0.33 and achieved a sensitivity of 0.85 (specificity=0.58; PPV=0.67; NPV=0.80; <italic>F</italic><sub>1</sub>-score=0.75; MCC=0.45; accuracy=0.72). The corresponding counts at this setting were TP=1731, FP=846, TN=1188, and FN=303 (n=4068). Comparator models met the same sensitivity target with lower specificity and weaker composite indices; for example, random forest and logistic regression both settled near a specificity of 0.54 with MCC=0.41, while decision tree and Bernoulli naive Bayes lost additional specificity and MCC. These results indicate that, when sensitivity is held high, XGBoost preserves more TNs and maintains stronger global agreement (MCC, balanced accuracy).</p>
        <p>Decision-curve analysis supported the same ordering of clinical use across a broad range of threshold probabilities (approximately 0.15-0.70), with XGBoost yielding the highest net benefit, random forest next, and logistic regression close behind (Figure S2C and Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Together with the calibration findings, these analyses suggest that XGBoost not only separates cases from controls most effectively but also produces usable risk estimates for thresholding and shared decision-making.</p>
        <p>Furthermore, when the global XGBoost model was compared to cancer-specific XGBoost models for the top 5 cancer types (non–small cell lung, colorectal, breast, pancreatic, and prostate cancer; <xref ref-type="table" rid="table4">Tables 4</xref> and <xref ref-type="table" rid="table5">5</xref>), the prostate cancer model emerged as the most accurate, with an AUC of 0.88 and an accuracy of 0.84. (268/319 test patients) and an AUC of 0.88 (281/319 test patients correctly stratified). Meanwhile, pancreatic cancer posted a lower AUC of 0.68 (236/348 test patients correctly stratified), reflecting greater challenges in classification for that subgroup.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Condensed Cox proportional hazards model for overall survival in the Memorial Sloan Kettering-Metastatic cohort. Hazard ratios (HRs), 95% CIs, and <italic>P</italic> values are reported for the most influential covariates identified in the global model, showing that metastatic status, higher metastatic site count, increased tumor mutational burden (TMB), greater fraction of genome altered, and distant metastases to liver, bone, and lung are all associated with significantly elevated mortality risk, whereas primary sample type is modestly protective.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="480"/>
            <col width="380"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Risk factor</td>
                <td>HR (95% CI)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Patient with metastatic disease</td>
                <td>2.18 (1.97-2.42)</td>
                <td><italic>&#60;.</italic>001</td>
              </tr>
              <tr valign="top">
                <td>Metastatic site count</td>
                <td>1.03 (1.02-1.04)</td>
                <td><italic>&#60;.</italic>001</td>
              </tr>
              <tr valign="top">
                <td>TMB (nonsynonymous)</td>
                <td>1.00 (0.99-1.00)</td>
                <td><italic>&#60;.</italic>001</td>
              </tr>
              <tr valign="top">
                <td>Fraction of genome altered</td>
                <td>1.32 (1.19-1.46)</td>
                <td><italic>&#60;.</italic>001</td>
              </tr>
              <tr valign="top">
                <td>Sample type (primary=ref)</td>
                <td>0.87 (0.83-0.90)</td>
                <td><italic>&#60;.</italic>001</td>
              </tr>
              <tr valign="top">
                <td>Distant metastasis: liver</td>
                <td>1.81 (1.73-1.90)</td>
                <td><italic>&#60;.</italic>001</td>
              </tr>
              <tr valign="top">
                <td>Distant metastasis: bone</td>
                <td>1.43 (1.37-1.50)</td>
                <td><italic>&#60;.</italic>001</td>
              </tr>
              <tr valign="top">
                <td>Distant metastasis: lung</td>
                <td>1.16 (1.11-1.22)</td>
                <td><italic>&#60;.</italic>001</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Comparison of classification performance between the unified or global extreme gradient boosting (XGBoost) model and cancer-specific XGBoost models. Overall accuracy and area under the curve (AUC) are reported for each classifier, showing that while the global model achieves strong baseline discrimination (accuracy=0.74; AUC=0.82), several cancer-specific models, particularly prostate and breast cancer, attain even higher AUCs (0.88 and 0.85, respectively), whereas the pancreatic cancer model lags behind, reflecting underlying heterogeneity in predictability across tumor types.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="480"/>
            <col width="380"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Classifier</td>
                <td>Accuracy score</td>
                <td>AUC score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Unified or global model</td>
                <td>0.74</td>
                <td>0.82</td>
              </tr>
              <tr valign="top">
                <td>Non–small cell lung model</td>
                <td>0.71</td>
                <td>0.79</td>
              </tr>
              <tr valign="top">
                <td>Colorectal cancer model</td>
                <td>0.73</td>
                <td>0.81</td>
              </tr>
              <tr valign="top">
                <td>Breast cancer model</td>
                <td>0.76</td>
                <td>0.85</td>
              </tr>
              <tr valign="top">
                <td>Pancreatic cancer model</td>
                <td>0.72</td>
                <td>0.68</td>
              </tr>
              <tr valign="top">
                <td>Prostate cancer model</td>
                <td>0.84</td>
                <td>0.88</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Model Explainability</title>
        <p>The SHAP analysis of the global XGBoost model established a clear hierarchy of feature importance, identifying metastatic site count, TMB, fraction of genome altered, and distant metastases to the liver and bone as the predominant prognostic factors (<xref rid="figure3" ref-type="fig">Figure 3</xref> and Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The beeswarm plots for these top features demonstrated a robust predictive power, with high and low feature values cleanly separating along the SHA<italic>P</italic> value axis, indicating a consistent and strong directional impact on model output (Figures S3A-S3E in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Shapley additive explanations (SHAP) beeswarm plot for the global extreme gradient boosting (XGBoost) mortality classifier in the Memorial Sloan Kettering-Metastatic cohort. Features are ordered by mean absolute SHAP value, highlighting metastatic (Met) site count, tumor mutational burden (TMB), fraction of genome altered (FGA), and distant metastases to the liver and bone as the strongest drivers of model predictions. Each point represents a patient, with horizontal position indicating the direction and magnitude of impact on predicted mortality risk and color denoting low (blue) to high (red) feature values, illustrating how extreme values systematically shift risk estimates. Central nervous system is denoted by CNS.</p>
          </caption>
          <graphic xlink:href="cancer_v12i1e74196_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>This analysis was extended to cancer-specific models, revealing a critical dual perspective, that is, the core features identified in the global model recurrently ranked among the most important across individual cancer types, while disease-specific features also emerged. For instance (Table S6 and Figures S3A-S3E in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), the global top features like metastatic burden and liver metastases remained highly influential in specific models such as colorectal and prostate cancer. Concurrently, the models identified context-specific predictors, such as distant metastasis in the lung for non–small cell lung cancer, sample type for breast cancer, and distant metastasis in the male genital for prostate cancer (<xref rid="figure3" ref-type="fig">Figure 3</xref>; Figures S3D and S3E in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). This underscores that while a common set of pan-cancer drivers exists, the models successfully capture nuanced, disease-specific metastatic behaviors.</p>
        <p>The consistent importance of features like metastatic site count was further validated by the SHAP force plot (<xref rid="figure4" ref-type="fig">Figure 4</xref>), which visually confirmed that an increasing number of metastatic sites directly correlated with a higher model output for mortality risk, reinforcing the clinical and biological plausibility of the model’s predictions.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Shapley additive explanations force plot illustrating the impact of metastatic (Met) site count on predicted mortality risk for the first 1000 patients in the Memorial Sloan Kettering-Metastatic test set. Each horizontal bar represents an individual patient, with the baseline prediction shown at the center and shifts toward higher (right, red) or lower (left, blue) mortality risk driven by the number of metastatic sites. The consistent rightward push associated with increasing metastatic site count visually confirms its strong, monotonic contribution to higher predicted risk, reinforcing the biological and clinical plausibility of the extreme gradient boosting model’s behavior.</p>
          </caption>
          <graphic xlink:href="cancer_v12i1e74196_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Survival Analysis</title>
        <p>In the Kaplan-Meier analysis (<xref rid="figure5" ref-type="fig">Figure 5</xref> and Table S7 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), patients were stratified into “metastatic” and “nonmetastatic” groups to compare differences in overall survival. The survival probability of patients in the metastatic group was notably lower than that of the nonmetastatic group, as seen in the pronounced separation of their survival curves. By approximately 80 months, patients with metastatic disease exhibited a survival probability of 0.30 (3041/10<italic>,</italic>169 patients with metastatic disease) versus 0.80 (8135/10<italic>,</italic>169 patients without metastatic disease), underscoring the substantial impact of metastatic status on long-term survival outcomes.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Kaplan-Meier survival curves comparing overall survival in patients with vs without metastatic disease in the Memorial Sloan Kettering-Metastatic cohort. Patients with metastases demonstrate markedly lower survival probabilities and earlier median survival times than patients without metastases, underscoring the substantial impact of metastatic status on long-term outcomes.</p>
          </caption>
          <graphic xlink:href="cancer_v12i1e74196_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Subgroup survival results for the top 5 cancer types were as follows: in non–small cell lung cancer (n=4686), 43.75% (n=2050) of patients experienced an event and 82.5% (n=3866) presented with metastatic disease; the median overall survival was 17.7 (IQR 8.4-33.2) months. In colorectal cancer (n=3541), the event rate was 33.52% (n=1187), the metastatic proportion was 90.82% (n=3216), and the median survival was 17.5 (IQR 10-30) months.</p>
        <p>In breast cancer (n=2601), 35.64% (n=927) experienced an event and 76.47% (n=1989) had metastatic disease, with a median survival of 33 (IQR 14.4-43.4) months. In pancreatic cancer (n=1981), the event rate was 60.27% (n=1194), the metastatic proportion was 93.54% (n=1853), and the median survival was 11.6 (IQR 4.2-21.8) months. Finally, in prostate cancer (n=2166), 26.45% (n=573) experienced an event and 81.12% (n=1757) had metastatic disease, with a median survival of 21.6 (IQR 11.6-38.2) months (Figure S5A-S5E in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <p>In the CPH model (<xref ref-type="table" rid="table4">Table 4</xref>), factors like metastatic site count, TMB, fraction of genome altered, and distant metastases displayed hazard ratios above 1.0, indicating an increased risk of mortality. These relationships attained statistical significance, with <italic>P</italic> values under the established threshold. The proportional hazards assumption was checked through Schoenfeld residuals, and only minor deviations were noted, which did not substantially affect the covariate estimates. The model’s C-index reached approximately 0.66, reflecting moderate predictive power in distinguishing survival outcomes among different patient subgroups.</p>
        <p>An XGBoost survival model, fitted with a Cox-based loss function, achieved a higher C-index (0.7) than the standard Cox model. In Table S7 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, the model’s important features are displayed, with distant metastasis, TMB, and fraction of genome altered, among others, listed as the most important features that influence the prediction.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Using a large, multitumor metastatic cohort (MSK-MET), we developed explainable ML models for survivability prediction and complemented them with time-to-event modeling. Among conventional classifiers, XGBoost delivered the best overall performance, achieving an accuracy of 0.74 and AUC=0.82 on the held-out test set. In parallel, an XGBoost-Cox variant improved time-to-event concordance over a standard Cox model (C-index=0.70 vs 0.66, with key hazard ratios from the Cox model). Model explanations (SHAP) and hazard-based analyses converged on clinically recognizable prognostic factors: the number of metastatic sites, TMB, fraction of genome altered, and the presence of liver and bone metastases, while cancer-specific performance varied in biologically plausible ways (eg, higher AUC in prostate and lower in pancreatic). At the Youden-optimal threshold, PPV was approximately 70% and NPV approximately 80%, indicating practical use for risk stratification in balanced decision contexts. We expand the discussion of these findings in the following subsections.</p>
      </sec>
      <sec>
        <title>Model Performance, Robustness, and Benchmarking</title>
        <p>Our evaluation moved beyond accuracy to provide a comprehensive assessment across discrimination, calibration, operating-point trade-offs, and clinical use. XGBoost consistently emerged as the most dependable model, with its incremental gains in AUC and AUPRC translating into more favorable confusion-matrix profiles at clinically relevant thresholds. For instance, when sensitivity is constrained to be high, XGBoost retains more specificity and a higher MCC, reducing FPs without sacrificing case finding. Its well-calibrated probability estimates are crucial for decision support, enabling rational threshold selection and clear communication of absolute risk.</p>
        <p>The model’s practical use is further evidenced by its performance across different decision contexts. For balanced decision-making, a default threshold near 0.50 (coinciding with the Youden J optimum) provides a sensible starting point. In triage-like scenarios demanding high sensitivity, a lower threshold around 0.33 yields sensitivity near 0.85 with tolerable specificity losses and an NPV around 0.80. Decision-curve analysis confirmed that XGBoost provides a larger net benefit across a wide band of threshold probabilities, suggesting robustness to varying clinical preferences.</p>
        <p>These performance results harmonize with model explanations and survival evidence. SHAP analyses validate that predictions are driven by biologically sensible covariates, while survival curves and hazard ratios show coherent, directionally consistent effects. This triangulation across discrimination, calibration, interpretability, and survival analysis adds credibility that the learned signal reflects underlying disease biology rather than being an artifact of the classifier or data split.</p>
      </sec>
      <sec>
        <title>Parameter Sensitivity and Model Robustness</title>
        <p>During extensive grid-search tuning, we found that XGBoost hyperparameters, especially tree depth, learning rate, and number of estimators, greatly influenced AUC and classification accuracy. Shallow trees underfit, while deeper ones improved performance but risked overfitting in smaller cancer-specific cohorts. Learning rates below 0.05 caused slow convergence, while overly high rates destabilized training. Despite this variability, certain predictive features, particularly metastatic burden and genomic alterations, remained consistently impactful, underscoring the robustness of our model. Future research could explore adaptive optimization techniques such as Bayesian optimization or reinforcement learning for enhanced generalizability.</p>
      </sec>
      <sec>
        <title>Is the Performance “Good Enough”?</title>
        <p>The discriminatory performance of prognostic models is central to their clinical use. In oncology, C-index or AUC of at least 0.70 is generally regarded as the minimum threshold for clinical usefulness, while values exceeding 0.80 are considered strong and often necessary for clinical translation [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. These interpretive standards are rooted in established methodological work on ROC analysis, where AUC values between 0.7 and 0.8 are typically described as “acceptable” and those above 0.8 as “excellent” [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>].</p>
        <p>Evidence from systematic reviews of head and neck cancer prognostic models demonstrates how these thresholds translate into practice. Philip et al [<xref ref-type="bibr" rid="ref38">38</xref>] reported that most radiomics-based prognostic models achieved C-indices below 0.70, underscoring their weak discriminatory power and limited clinical applicability. In contrast, externally validated models that approached or exceeded the 0.80 mark, as summarized by Dretzke et al [<xref ref-type="bibr" rid="ref39">39</xref>], were identified as more robust and clinically relevant, supporting their potential integration into patient management. These findings emphasize that models below the 0.70 threshold may remain academically interesting but lack sufficient reliability for routine use, whereas those meeting or surpassing 0.80 provide stronger grounds for clinical adoption.</p>
        <p>Comparisons with deep learning approaches further reinforce this interpretation. Gouthamchand et al [<xref ref-type="bibr" rid="ref40">40</xref>] found that the most competitive deep learning models in head and neck oncology consistently achieved AUCs of 0.80 or higher, placing them above many handcrafted radiomics models. This pattern suggests that 0.80 represents not merely an aspirational benchmark but an increasingly practical requirement for prognostic tools aiming to match or surpass the performance of advanced ML methods already being piloted in clinical contexts.</p>
        <p>Regulatory precedents also align with this threshold. Analyses of Food and Drug Administration (FDA)–cleared AI- and ML-enabled medical devices, such as the LumineticsCore software, indicate that most systems demonstrate discriminatory performance in the high 0.7 to 0.9 range, with pivotal studies for devices such as IDx-DR reporting AUCs above 0.80. Joshi et al [<xref ref-type="bibr" rid="ref41">41</xref>] provide a comprehensive overview of this landscape, confirming that AUCs in this range are characteristic of AI systems deemed safe and effective for clinical use. FDA guidance similarly emphasizes the importance of demonstrating robust discriminatory performance in supporting claims of clinical effectiveness [<xref ref-type="bibr" rid="ref42">42</xref>].</p>
        <p>However, the thresholds discussed here should not be treated as rigid cutoffs. Clinical acceptability is not defined by a single number but by the context of the decision, the potential harms and benefits, and the baseline risk of the population. In high-stakes oncology decisions such as treatment intensification that carries significant morbidity, a well-calibrated model with an AUC of 0.78 but demonstrable net benefit at clinically relevant thresholds may be more valuable than a model with an AUC of 0.84 that is poorly calibrated or unstable. Thus, performance thresholds should be interpreted as contextual benchmarks rather than universal standards.</p>
        <p>It is also important to recognize that discrimination does not equal usefulness. AUC or C-index quantifies a model’s ability to rank patients by risk, but it does not measure whether the absolute risk estimates are accurate. For clinical practice, calibration is equally critical whether a patient predicted to have a 30% risk actually experiences that outcome about 30% of the time. Calibration-in-the-large, calibration slope, Brier score, and visual calibration plots provide this essential information. Moreover, calibration at clinically meaningful cut points (eg, deciles of predicted risk) is necessary to ensure predictions are trustworthy for patient counseling and decision-making.</p>
        <p>Finally, prognostic end points in oncology are often time-to-event outcomes. In such settings, the C-index is commonly used, but time-dependent AUCs provide more clinically interpretable information by specifying the prediction horizon (eg, 12-month or 36-month survival). This allows clinicians to understand how well the model performs over the actual time frames relevant to follow-up and treatment decisions. Where applicable, handling of censoring and competing risks should also be reported, particularly if the terminal outcome is mortality.</p>
        <p>Taken together, these considerations emphasize that while an AUC ≥0.80 is encouraging and aligns with many published benchmarks, true clinical use depends on calibration, decision-curve analysis, and performance at specific decision thresholds relevant to patient care.</p>
      </sec>
      <sec>
        <title>Clinically Actionable Metrics, Implications, and Use</title>
        <p>The performance metrics of our optimal XGBoost model are translated into clinically actionable insights through decision-curve analysis. At the threshold maximizing the Youden index, the model achieves a PPV of 70.3% and an NPV of 79.8%. This indicates that a high-risk prediction from the model would correspond to an actual terminal outcome in approximately 7 out of 10 cases, supporting its use in justifying intensified monitoring or treatment. Conversely, the high NPV means 8 out of 10 low-risk predictions correctly identify patients with a more favorable prognosis, providing a quantitative basis for discussions about de-escalating care and reducing treatment-related morbidity.</p>
        <p>Critically, the decision-curve analysis confirms the model’s practical use across a spectrum of clinical decision-making preferences. The analysis demonstrates that the XGBoost model provides a superior net benefit compared to both alternative models and the default strategies of treating all or no patients across a wide range of threshold probabilities. This indicates that using the model to guide decisions is clinically advantageous regardless of whether the clinician prioritizes avoiding FPs (overtreatment) or FNs (missed interventions). The point at which the net benefit of the model crosses the “treat all” strategy is particularly important, as it defines the minimum probability at which the model’s prediction becomes more useful than intervening in every case. The sustained positive net benefit of our model underscores its robustness and potential to improve patient outcomes by aligning interventions with individualized risk.</p>
        <p>The combination of strong predictive performance and model explainability supports several immediate clinical use cases: (1) patient triage and counseling via individualized risk summaries, (2) treatment planning and shared decision-making aided by feature-level rationales, and (3) integration into electronic health record (EHR)–embedded dashboards for longitudinal monitoring. The top predictive features are routinely available in most cancer centers, facilitating adoption with minimal workflow disruption.</p>
      </sec>
      <sec>
        <title>Model Explainability and Biological Plausibility</title>
        <p>Our model selection was guided by metrics that reflect clinical reality, prioritizing both accuracy and AUROC. The AUROC is especially critical in medical settings, as it captures the essential balance between TPs and FPs, where the cost of FNs (missed cases) is high [<xref ref-type="bibr" rid="ref43">43</xref>]. This evaluation consistently identified XGBoost as the top performer. Its superior ability to capture complex, nonlinear relationships within high-dimensional clinical and genomic data, as evidenced by its significant lead over other models, aligns with established literature on gradient boosting for cancer predictions [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. The model’s discriminative power is substantial, with an AUC of 0.82 representing an excess over chance of 0.32. This can be intuitively communicated as a “number needed to screen” of approximately 3-4 patients to identify 1 additional correct classification relative to chance, providing a tangible sense of clinical yield. This strong performance is operationalized at the Youden-optimal threshold, where PPV of approximately 70% and NPV of approximately 80% offer concrete use for risk stratification, effectively ruling in or ruling out high-risk status to guide downstream clinical actions.</p>
        <p>Crucially, this high performance is rendered transparent and trustworthy through SHAP-based interpretation. The model’s decisions are not black box outputs but are demonstrably driven by biologically plausible features that align with oncological principles. SHAP analysis consistently identified metastatic site count, liver and bone metastases, TMB, and fraction of genome altered as the top predictors; moreover, these findings are well-supported in the literature [<xref ref-type="bibr" rid="ref46">46</xref>]. This biological plausibility was further refined and validated in our cancer-specific models. For example, the prominence of lung metastases in non–small cell lung cancer and prostate cancer models, and the major role of sample type in the breast cancer model, demonstrate a nuanced understanding of disease-specific pathophysiology as pointed out in other studies [<xref ref-type="bibr" rid="ref47">47</xref>]. The distinct, right-skewed SHAP distribution for “metastatic count” and its direct correlation with risk in force plots further reinforced the importance of total lesion burden in driving high-risk predictions. This coherence between model explanations and established clinical knowledge across both pan-cancer and disease-specific contexts is fundamental for building clinician trust and facilitating the integration of this tool into point-of-care decision support.</p>
      </sec>
      <sec>
        <title>Survival Modeling and Risk Stratification</title>
        <sec>
          <title>Time-to-Event Insights</title>
          <p>Survival analysis confirmed classification results. Kaplan-Meier curves revealed steep survival drops in patients with versus without metastatic disease (0.3 vs 0.8 at 80 months). Traditional Cox models identified high hazard ratios for metastatic site count, TMB, and fraction of genome altered but struggled with their linearity assumptions (C-index=0.66). In contrast, XGBoost-based survival modeling better captured nonlinearity, achieving a higher C-index of 0.70.</p>
        </sec>
        <sec>
          <title>Subgroup Survival Analysis</title>
          <p>The 5 disease-specific results highlight substantial heterogeneity in survival outcomes across cancer types within the same analytic framework. Pancreatic cancer exhibited the shortest median survival alongside the highest event rate and metastatic proportion, underscoring its aggressive clinical course even within a predominantly metastatic cohort. By contrast, breast cancer showed the longest median survival (as visualized in the respective Kaplan-Meier curves) despite a sizable metastatic share, suggesting comparatively slower disease trajectories and greater effectiveness of available therapies in this subgroup. Non–small cell lung cancer and colorectal cancer shared similar median survivals near 17-18 months, though colorectal cancer carried the highest metastatic proportion among the five. This juxtaposition implies that crude metastatic prevalence alone does not fully account for survival differences, motivating disease-specific modeling of covariates and sites of spread. Prostate cancer combined the lowest event rate with a midrange metastatic proportion and an intermediate median survival, indicating a slower accumulation of events over time relative to the other cancers. Together, these patterns justify presenting separate Kaplan-Meier curves and parsimonious Cox summaries per cancer type, while enabling a consistent cross-cancer narrative that focuses on differences in event rates, metastatic burden, and median survival.</p>
        </sec>
      </sec>
      <sec>
        <title>Novelty and Comparison to Prior Work</title>
        <p>Although significant studies [<xref ref-type="bibr" rid="ref48">48</xref>-<xref ref-type="bibr" rid="ref52">52</xref>] (Table S8 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) have been conducted in the realm of predicting cancer survivability, our work stands out by bringing new enhancements that significantly contribute to better prediction of cancer survivability, particularly by the thorough comparison of ML models, the strategic use of both global and cancer-specific models, in-depth model explainability using SHA<italic>P</italic> values, and detailed survival analysis.</p>
        <p>First, we begin by comparing 5 different ML models (XGBoost, naive Bayes, decision tree, logistic regression, and random forest), each rigorously tuned using exhaustive grid search for hyperparameters. This approach ensures that each model is thoroughly tested for the task of predicting cancer survivability, a detail that is often overlooked in existing literature. Many studies tend to focus on one or two models, without ensuring that the models are fully evaluated for comparison. For example, prior work done by Zhao et al [<xref ref-type="bibr" rid="ref48">48</xref>], Tapak et al [<xref ref-type="bibr" rid="ref49">49</xref>], and Nicolò et al [<xref ref-type="bibr" rid="ref50">50</xref>] evaluate models but lack the thoroughness in hyperparameter tuning that our study provides. We believe that this rigorous approach enhances the reliability of our findings and provides a more comprehensive understanding of which model performs best under specific conditions.</p>
        <p>Moreover, we use a methodology that is designed to first use a global model to gain a general overview of the most important patterns and predictors for metastatic cancer survivability, followed by a deeper dive into cancer-specific models. This 2-tiered approach is critical because it allows us to identify broad patterns while also uncovering nuances that might be missed or misinterpreted in a global model. Many published studies, such as Kourou et al [<xref ref-type="bibr" rid="ref51">51</xref>] and Zhao et al [<xref ref-type="bibr" rid="ref48">48</xref>], predominantly dwell on global accuracy metrics without taking this crucial next step to explore more specific patterns within subgroups of the data. By contrast, our approach provides a dual perspective, that is, broad insights from the global model and detailed, cancer-specific insights that we believe are essential for advancing personalized medicine.</p>
        <p>The use of SHA<italic>P</italic> values in our study is particularly noteworthy. We did not just stop at model performance but delved deep into explainability, first for the global model and then for the cancer-specific models. This process allowed us to generate refined explainability that highlights not just which features are important, but how their importance varies across different types of cancer. The use of SHAP in both global and specific contexts is a novel approach that we believe adds substantial value to the study. While many studies, such as Maouche et al [<xref ref-type="bibr" rid="ref52">52</xref>], use SHAP or similar methods, they often do so at a surface level, without the comprehensive, model-specific analysis that we provide. This depth of analysis is crucial for understanding the true implications of the model’s predictions and for making informed clinical decisions.</p>
        <p>Finally, we conduct survival analysis after predicting cancer survival with explainable ML to translate model predictions into clinically meaningful insights about patient outcomes over time. While ML classifiers can distinguish between patients likely to survive or not, survival analysis provides a time-to-event perspective, capturing not just “if” but “when” an event such as death occurs. This temporal dimension allows researchers and clinicians to estimate hazard rates, median survival times, and differences between risk groups. Coupled with explainable ML (eg, SHA<italic>P</italic> values), survival analysis also helps validate whether the features driving predictions correspond to biologically and clinically relevant risk factors. Together, this integration strengthens both the predictive performance and interpretability of the model, supporting its potential use as a trustworthy decision-support tool in oncology practice.</p>
      </sec>
      <sec>
        <title>Limitations and Biases</title>
        <p>We acknowledge several limitations that must be considered alongside the model’s strong performance. First, generalizability may be constrained by inherent biases in the MSK-MET dataset, which is a single-institution cohort from a tertiary care center. The sample underrepresents socioeconomically disadvantaged and racially diverse populations, with non-White ethnicities comprising less than 15%, and genomic profiling was preferentially performed in advanced-stage cases, potentially inflating the importance of features like TMB. These biases may partly explain the performance disparities observed across cancer types. Furthermore, technical limitations exist; for instance, linear and naive Bayes baselines may have been disadvantaged by the use of label encoding for categorical variables.</p>
        <p>These factors underscore the necessity for several future steps before clinical deployment. External validation using diverse, multicenter cohorts with stratified sampling is crucial to improve equity, fairness, and generalizability. Prospective calibration should be rechecked under local prevalence shifts, with techniques like Platt scaling or isotonic regression applied, if necessary. Additionally, prospective benchmarking against oncologist-estimated prognoses and real-world deployment outcomes will be critical to establish clinical noninferiority or superiority. Finally, while this study provides a comprehensive classification and survival analysis, future work should focus on integrating these outputs; for example, by generating risk-stratified Kaplan-Meier curves at chosen thresholds to unify the decision framework across binary and time-to-event end points.</p>
      </sec>
      <sec>
        <title>Future Directions and Deployment Considerations</title>
        <sec>
          <title>Improving Rare Cancer Predictions</title>
          <p>Model performance was limited for rare cancers (eg, anal cancer: n=68; AUC=0.61), due to data scarcity. To address this, we suggest (1) using transfer learning to initialize models with global XGBoost weights, (2) using synthetic oversampling (eg, synthetic minority oversampling technique, adaptive synthetic sampling) during training, and (3) leveraging federated learning to aggregate data across institutions while preserving privacy. These techniques can bolster performance in underrepresented malignancies.</p>
        </sec>
        <sec>
          <title>EHR Integration and Clinical Deployment</title>
          <p>While the primary aim of this study was informatics-driven discovery, the robustness of our model supports a clear pathway for clinical translation through EHR integration. We envision a real-time, standards-based system where automated risk alerts are seamlessly integrated into clinical workflows, particularly within tumor-board discussions (<xref rid="figure6" ref-type="fig">Figure 6</xref>). By leveraging Fast Healthcare Interoperability Resources Representational State Transfer hook events, patient risk scores could be dynamically updated and surfaced directly within the EHR as cases are reviewed. To ensure both privacy and computational efficiency, the XGBoost model would be deployed as a containerized service within the hospital’s secure analytics infrastructure (eg, Epic Cogito), keeping protected health information on premises while using available hardware acceleration.</p>
          <fig id="figure6" position="float">
            <label>Figure 6</label>
            <caption>
              <p>Electronic health record (EHR) integration and clinical deployment. Framework illustrating how patient risk scores and Shapley additive explanations (SHAP)–based explanations are integrated into the EHR and surfaced during tumor-board discussions to enable transparent, real-time clinical decision support. FHIR: Fast Healthcare Interoperability Resources; REST: Representational State Transfer; XGBoost: extreme gradient boosting.</p>
            </caption>
            <graphic xlink:href="cancer_v12i1e74196_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>A critical differentiator of this approach is the coupling of each risk alert with a SHAP-based explanation embedded directly into the oncology dashboard. This transforms the model from a “black box” into an “explain-and-act” tool, providing clinicians with immediate, interpretable rationale by highlighting the top clinical and genomic features contributing to an individual’s risk score. Alert protocols would be tiered and threshold-aware, directly translating the model’s predictive values into actionable clinical guidance. For instance, a low-risk classification (supported by an NPV of 79.8%) could justify lengthening follow-up intervals, while a high-risk flag (PPV of 70.3%) would prompt rapid biomarker reassessment and discussion of treatment intensification.</p>
          <p>An initial rollout would prioritize safety and reliability through a prospective pilot study, continuous performance monitoring for model drift, and an implementation that minimally disrupts existing workflows. The validation evidence and explainable framework presented in this study provide a solid foundation for a potential regulatory submission as a Class II clinical decision support tool, paving the way for a new generation of transparent, AI-augmented oncology care.</p>
        </sec>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>In this large-scale, pan-cancer study, we developed and validated an interpretable ML framework for predicting survivability in patients with metastatic cancer. By leveraging the comprehensive MSK-MET cohort, we demonstrated that an XGBoost classifier robustly predicts overall survival (AUC=0.82, accuracy=0.74), outperforming other conventional ML models. Crucially, the integration of SHAP explainability illuminated the model’s decision-making process, consistently identifying metastatic site count, TMB, fraction of genome altered, and the presence of liver and bone metastases as major prognostic features across diverse tumor types. This biological plausibility is fundamental for building clinical trust.</p>
        <p>Our 2-tiered modeling approach, combining a unified pan-cancer perspective with targeted, cancer-specific submodels, provided both broad generalizability and nuanced, disease-tailored insights. This was evidenced by the varying performance across cancer types, such as the high predictive accuracy for prostate cancer (AUC=0.88) contrasted with the greater challenges in pancreatic cancer (AUC=0.68). Furthermore, the survival analysis corroborated the classification findings, with the XGBoost-Cox model (C-index=0.70) capturing nonlinear relationships more effectively than the traditional Cox model (C-index=0.66), and Kaplan-Meier curves starkly illustrating the significant survival disadvantage associated with metastatic disease.</p>
        <p>The clinical use of our model is underscored by its strong predictive values (PPV=70% and NPV=80% at the Youden-optimal threshold) and its demonstrated net benefit across a range of decision thresholds. By reconciling high performance with transparent, actionable explanations, this work provides a foundational framework for the next generation of clinical decision-support tools in oncology. Future efforts should focus on external validation in multicenter cohorts, prospective evaluation integrated within EHR systems, and addressing performance gaps in rare cancer subtypes to ensure equitable and widespread clinical adoption.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Additional tables and figures.</p>
        <media xlink:href="cancer_v12i1e74196_app1.docx" xlink:title="DOCX File , 2147 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">AUPRC</term>
          <def>
            <p>area under the precision-recall curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">AUROC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">C-index</term>
          <def>
            <p>concordance index</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">CPH</term>
          <def>
            <p>Cox proportional hazards</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">CV</term>
          <def>
            <p>cross validation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">FDA</term>
          <def>
            <p>US Food and Drug Administration</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">FN</term>
          <def>
            <p>false negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">FP</term>
          <def>
            <p>false positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">MCC</term>
          <def>
            <p>Matthews correlation coefficient</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">MSK-MET</term>
          <def>
            <p>Memorial Sloan Kettering-Metastatic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">NPV</term>
          <def>
            <p>negative predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">SHAP</term>
          <def>
            <p>Shapley additive explanations</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">TMB</term>
          <def>
            <p>tumor mutational burden</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb20">TN</term>
          <def>
            <p>true negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb21">TP</term>
          <def>
            <p>true positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb22">XGBoost</term>
          <def>
            <p>extreme gradient boosting</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We thank cBioPortal for Cancer Genomics for freely making available the data that were used in this study. Finally, we are grateful to the FABRIC [<xref ref-type="bibr" rid="ref53">53</xref>] team for their support.</p>
      <p>There was no generative artificial intelligence tool used. Responsibility for the final manuscript lies entirely with the authors.</p>
    </ack>
    <notes>
      <sec>
        <title>Funding</title>
        <p>This work was supported by the National Science Foundation under Grant No. 2201583. The sponsor had no role in the study design, data collection and analysis, decision to publish, or preparation of the manuscript.</p>
      </sec>
    </notes>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The datasets used in this study are available in the Memorial Sloan Kettering-Metastatic repository at cBioPortal [<xref ref-type="bibr" rid="ref54">54</xref>]. The code is available at GitHub [<xref ref-type="bibr" rid="ref55">55</xref>].</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Siegel</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Giaquinto</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Jemal</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Cancer statistics, 2024</article-title>
          <source>CA Cancer J Clin</source>
          <year>2024</year>
          <volume>74</volume>
          <issue>1</issue>
          <fpage>12</fpage>
          <lpage>49</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/doi/10.3322/caac.21820"/>
          </comment>
          <pub-id pub-id-type="doi">10.3322/caac.21820</pub-id>
          <pub-id pub-id-type="medline">38230766</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>What are the applications of single-cell RNA sequencing in cancer research: a systematic review</article-title>
          <source>J Exp Clin Cancer Res</source>
          <year>2021</year>
          <volume>40</volume>
          <issue>1</issue>
          <fpage>163</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jeccr.biomedcentral.com/articles/10.1186/s13046-021-01955-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13046-021-01955-1</pub-id>
          <pub-id pub-id-type="medline">PMC8111731</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13046-021-01955-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC8111731</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marusyk</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Polyak</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Tumor heterogeneity: causes and consequences</article-title>
          <source>Biochim Biophys Acta</source>
          <year>2010</year>
          <volume>1805</volume>
          <issue>1</issue>
          <fpage>105</fpage>
          <lpage>117</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/19931353"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.bbcan.2009.11.002</pub-id>
          <pub-id pub-id-type="pii">S0304-419X(09)00074-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC2814927</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>Lei</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Fuyan</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>Qianjin</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>Bo</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Ming</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Xiaoling</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Yong</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Guiyuan</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>Zhaoyang</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>Wei</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Can</given-names>
            </name>
          </person-group>
          <article-title>What are the applications of single-cell RNA sequencing in cancer research: a systematic review</article-title>
          <source>J Exp Clin Cancer Res</source>
          <year>2021</year>
          <volume>40</volume>
          <issue>1</issue>
          <fpage>163</fpage>
          <lpage>193</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jeccr.biomedcentral.com/articles/10.1186/s13046-021-01955-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13046-021-01955-1</pub-id>
          <pub-id pub-id-type="medline">33975628</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13046-021-01955-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC8111731</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Esteva</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kuprel</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Novoa</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Ko</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Swetter</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Blau</surname>
              <given-names>HM</given-names>
            </name>
            <name name-style="western">
              <surname>Thrun</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Dermatologist-level classification of skin cancer with deep neural networks</article-title>
          <source>Nature</source>
          <year>2017</year>
          <volume>542</volume>
          <issue>7639</issue>
          <fpage>115</fpage>
          <lpage>118</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28117445"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/nature21056</pub-id>
          <pub-id pub-id-type="medline">28117445</pub-id>
          <pub-id pub-id-type="pii">nature21056</pub-id>
          <pub-id pub-id-type="pmcid">PMC8382232</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Balagurunathan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hawkins</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Stringfield</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Schabath</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Qu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Gillies</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Prediction of pathological nodal involvement by CT-based radiomic features of the primary tumor in patients with clinically node-negative peripheral lung adenocarcinomas</article-title>
          <source>Med Phys</source>
          <year>2018</year>
          <volume>45</volume>
          <issue>6</issue>
          <fpage>2518</fpage>
          <lpage>2526</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29624702"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/mp.12901</pub-id>
          <pub-id pub-id-type="medline">29624702</pub-id>
          <pub-id pub-id-type="pmcid">PMC6161827</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Montazeri</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Montazeri</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Montazeri</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Beigzadeh</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Machine learning models in breast cancer survival prediction</article-title>
          <source>Technol Health Care</source>
          <year>2016</year>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>31</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.3233/THC-151071</pub-id>
          <pub-id pub-id-type="medline">26409558</pub-id>
          <pub-id pub-id-type="pii">THC--1-THC1071</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Bao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Dietrich</surname>
              <given-names>CF</given-names>
            </name>
          </person-group>
          <article-title>Lymph node metastasis prediction from primary breast cancer US images using deep learning</article-title>
          <source>Radiology</source>
          <year>2020</year>
          <volume>294</volume>
          <issue>1</issue>
          <fpage>19</fpage>
          <lpage>28</lpage>
          <pub-id pub-id-type="doi">10.1148/radiol.2019190372</pub-id>
          <pub-id pub-id-type="medline">31746687</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Foersch</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Eckstein</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Gach</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Woerl</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Geiger</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Glasner</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schelbert</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Porubsky</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kreft</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hartmann</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Agaimy</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Deep learning for diagnosis and survival prediction in soft tissue sarcoma</article-title>
          <source>Ann Oncol</source>
          <year>2021</year>
          <volume>32</volume>
          <issue>9</issue>
          <fpage>1178</fpage>
          <lpage>1187</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0923-7534(21)02055-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.annonc.2021.06.007</pub-id>
          <pub-id pub-id-type="medline">34139273</pub-id>
          <pub-id pub-id-type="pii">S0923-7534(21)02055-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Doppalapudi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Badr</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Lung cancer survival period prediction and understanding: Deep learning approaches</article-title>
          <source>Int J Med Inform</source>
          <year>2021</year>
          <volume>148</volume>
          <fpage>104371</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2020.104371</pub-id>
          <pub-id pub-id-type="medline">33461009</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(20)31907-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>YD</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Cross-modal prostate cancer segmentation via self-attention distillation</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2022</year>
          <volume>26</volume>
          <issue>11</issue>
          <fpage>5298</fpage>
          <lpage>5309</lpage>
          <pub-id pub-id-type="doi">10.1109/JBHI.2021.3127688</pub-id>
          <pub-id pub-id-type="medline">34767517</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shao</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Multi-parametric MRI-based radiomics models for predicting molecular subtype and androgen receptor expression in breast cancer</article-title>
          <source>Front Oncol</source>
          <year>2021</year>
          <volume>11</volume>
          <fpage>706733</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34490107"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fonc.2021.706733</pub-id>
          <pub-id pub-id-type="medline">34490107</pub-id>
          <pub-id pub-id-type="pmcid">PMC8416497</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Guestrin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>XGBoost: a scalable tree boosting system</article-title>
          <year>2016</year>
          <conf-name>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD '16)</conf-name>
          <conf-date>2016 Aug 13</conf-date>
          <conf-loc>San Francisco, CA. New York, NY</conf-loc>
          <publisher-name>Association for Computing Machinery</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/2939672.2939785"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhong</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Machine learning models for predicting survival in lung cancer patients undergoing microwave ablation</article-title>
          <source>Front Med (Lausanne)</source>
          <year>2025</year>
          <volume>12</volume>
          <fpage>1561083</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/fmed.2025.1561083"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fmed.2025.1561083</pub-id>
          <pub-id pub-id-type="medline">40400639</pub-id>
          <pub-id pub-id-type="pmcid">PMC12092218</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Noman</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Fadel</surname>
              <given-names>YM</given-names>
            </name>
            <name name-style="western">
              <surname>Henedak</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Attia</surname>
              <given-names>NA</given-names>
            </name>
            <name name-style="western">
              <surname>Essam</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Elmaasarawii</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fouad</surname>
              <given-names>FA</given-names>
            </name>
            <name name-style="western">
              <surname>Eltasawi</surname>
              <given-names>EG</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Atabany</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Leveraging survival analysis and machine learning for accurate prediction of breast cancer recurrence and metastasis</article-title>
          <source>Sci Rep</source>
          <year>2025</year>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>3728</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-025-87622-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-025-87622-3</pub-id>
          <pub-id pub-id-type="medline">39880868</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-025-87622-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC11779859</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Buk Cardoso</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cunha Parro</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Verzinhasse Peres</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Curado</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandes</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Wünsch Filho</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Natasha Toporcov</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Machine learning for predicting survival of colorectal cancer patients</article-title>
          <source>Sci Rep</source>
          <year>2023</year>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>8874</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-023-35649-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-023-35649-9</pub-id>
          <pub-id pub-id-type="medline">37264045</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-023-35649-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC10235087</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ben Kridis</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Guven</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Dharmarajan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Editorial: advances in the treatment of hormonal receptor positive (HR+) breast cancer</article-title>
          <source>Front Oncol</source>
          <year>2024</year>
          <volume>14</volume>
          <fpage>1449566</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/fonc.2024.1449566"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fonc.2024.1449566</pub-id>
          <pub-id pub-id-type="medline">39386197</pub-id>
          <pub-id pub-id-type="pmcid">PMC11461348</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Random Forests</article-title>
          <source>Mach Learn</source>
          <year>2001</year>
          <volume>45</volume>
          <issue>1</issue>
          <fpage>5</fpage>
          <lpage>32</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1023/A:1010933404324"/>
          </comment>
          <pub-id pub-id-type="doi">10.1023/a:1010933404324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cortes</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vapnik</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Support-vector networks</article-title>
          <source>Mach Learn</source>
          <year>1995</year>
          <volume>20</volume>
          <issue>3</issue>
          <fpage>273</fpage>
          <lpage>297</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1007/BF00994018"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/bf00994018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>LeCun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Deep learning</article-title>
          <source>Nature</source>
          <year>2015</year>
          <volume>521</volume>
          <issue>7553</issue>
          <fpage>436</fpage>
          <lpage>444</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/nature14539"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
          <pub-id pub-id-type="medline">26017442</pub-id>
          <pub-id pub-id-type="pii">nature14539</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Machine learning-based integration develops an immune-derived lncRNA signature for improving outcomes in colorectal cancer</article-title>
          <source>Nat Commun</source>
          <year>2022</year>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>816</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-022-28421-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-022-28421-6</pub-id>
          <pub-id pub-id-type="medline">35145098</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-022-28421-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC8831564</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>He</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bergenstråhle</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stenbeck</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Abid</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Andersson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Borg</surname>
              <given-names>Å</given-names>
            </name>
            <name name-style="western">
              <surname>Maaskola</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lundeberg</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Integrating spatial gene expression and breast tumour morphology via deep learning</article-title>
          <source>Nat Biomed Eng</source>
          <year>2020</year>
          <volume>4</volume>
          <issue>8</issue>
          <fpage>827</fpage>
          <lpage>834</lpage>
          <pub-id pub-id-type="doi">10.1038/s41551-020-0578-x</pub-id>
          <pub-id pub-id-type="medline">32572199</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41551-020-0578-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Pacheco</surname>
              <given-names>PP</given-names>
            </name>
            <name name-style="western">
              <surname>Narrandes</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Applications of support vector machine (SVM) learning in cancer genomics</article-title>
          <source>Cancer Genomics Proteomics</source>
          <year>2018</year>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>41</fpage>
          <lpage>51</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29275361"/>
          </comment>
          <pub-id pub-id-type="doi">10.21873/cgp.20063</pub-id>
          <pub-id pub-id-type="medline">29275361</pub-id>
          <pub-id pub-id-type="pii">15/1/41</pub-id>
          <pub-id pub-id-type="pmcid">PMC5822181</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Do</surname>
              <given-names>DN</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen-Thi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hoang-The</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen-Thanh</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Identification of potential crucial genes and key pathways shared in inflammatory bowel disease and cervical cancer by machine learning and integrated bioinformatics</article-title>
          <source>Comput Biol Med</source>
          <year>2022</year>
          <volume>149</volume>
          <fpage>105996</fpage>
          <pub-id pub-id-type="doi">10.1016/j.compbiomed.2022.105996</pub-id>
          <pub-id pub-id-type="medline">36049413</pub-id>
          <pub-id pub-id-type="pii">S0010-4825(22)00720-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stefanou</surname>
              <given-names>IK</given-names>
            </name>
            <name name-style="western">
              <surname>Dovrolis</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gazouli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Theodorou</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zografos</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Toutouzas</surname>
              <given-names>KG</given-names>
            </name>
          </person-group>
          <article-title>miRNAs expression pattern and machine learning models elucidate risk for gastric GIST</article-title>
          <source>Cancer Biomark</source>
          <year>2022</year>
          <volume>33</volume>
          <issue>2</issue>
          <fpage>237</fpage>
          <lpage>247</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.3233/CBM-210173?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.3233/CBM-210173</pub-id>
          <pub-id pub-id-type="medline">35213356</pub-id>
          <pub-id pub-id-type="pii">CBM210173</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pawelka</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Laczmanska</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Karpinski</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Supplitt</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Witkiewicz</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Knychalski</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Pelak</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zebrowska</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Laczmanski</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Machine-learning-based analysis identifies miRNA expression profile for diagnosis and prediction of colorectal cancer: a preliminary study</article-title>
          <source>Cancer Genomics Proteomics</source>
          <year>2022</year>
          <volume>19</volume>
          <issue>4</issue>
          <fpage>503</fpage>
          <lpage>511</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35732322"/>
          </comment>
          <pub-id pub-id-type="doi">10.21873/cgp.20336</pub-id>
          <pub-id pub-id-type="medline">35732322</pub-id>
          <pub-id pub-id-type="pii">19/4/503</pub-id>
          <pub-id pub-id-type="pmcid">PMC9247881</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ghobadi</surname>
              <given-names>MZ</given-names>
            </name>
            <name name-style="western">
              <surname>Emamzadeh</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Afsaneh</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Exploration of mRNAs and miRNA classifiers for various ATLL cancer subtypes using machine learning</article-title>
          <source>BMC Cancer</source>
          <year>2022</year>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>433</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmccancer.biomedcentral.com/articles/10.1186/s12885-022-09540-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12885-022-09540-1</pub-id>
          <pub-id pub-id-type="medline">35449091</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12885-022-09540-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC9026691</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Integrative analysis identifies potential DNA methylation biomarkers for pan-cancer diagnosis and prognosis</article-title>
          <source>Epigenetics</source>
          <year>2019</year>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>67</fpage>
          <lpage>80</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.tandfonline.com/doi/10.1080/15592294.2019.1568178?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/15592294.2019.1568178</pub-id>
          <pub-id pub-id-type="medline">30696380</pub-id>
          <pub-id pub-id-type="pmcid">PMC6380428</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Jia</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bing</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Han-Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Ultrasensitive detection of circulating tumour DNA via deep methylation sequencing aided by machine learning</article-title>
          <source>Nat Biomed Eng</source>
          <year>2021</year>
          <volume>5</volume>
          <issue>6</issue>
          <fpage>586</fpage>
          <lpage>599</lpage>
          <pub-id pub-id-type="doi">10.1038/s41551-021-00746-5</pub-id>
          <pub-id pub-id-type="medline">34131323</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41551-021-00746-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Avanzo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stancanello</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pirrone</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sartor</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Radiomics and deep learning in lung cancer</article-title>
          <source>Strahlenther Onkol</source>
          <year>2020</year>
          <volume>196</volume>
          <issue>10</issue>
          <fpage>879</fpage>
          <lpage>887</lpage>
          <pub-id pub-id-type="doi">10.1007/s00066-020-01625-9</pub-id>
          <pub-id pub-id-type="medline">32367456</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00066-020-01625-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Janssen</surname>
              <given-names>BV</given-names>
            </name>
            <name name-style="western">
              <surname>Verhoef</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wesdorp</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Huiskens</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>de Boer</surname>
              <given-names>OJ</given-names>
            </name>
            <name name-style="western">
              <surname>Marquering</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Stoker</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kazemier</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Besselink</surname>
              <given-names>MG</given-names>
            </name>
          </person-group>
          <article-title>Imaging-based machine-learning models to predict clinical outcomes and identify biomarkers in pancreatic cancer: a scoping review</article-title>
          <source>Ann Surg</source>
          <year>2022</year>
          <volume>275</volume>
          <issue>3</issue>
          <fpage>560</fpage>
          <lpage>567</lpage>
          <pub-id pub-id-type="doi">10.1097/SLA.0000000000005349</pub-id>
          <pub-id pub-id-type="medline">34954758</pub-id>
          <pub-id pub-id-type="pii">00000658-202203000-00024</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stanzione</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Verde</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Romeo</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Boccadifuoco</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Mainenti</surname>
              <given-names>PP</given-names>
            </name>
            <name name-style="western">
              <surname>Maurea</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Radiomics and machine learning applications in rectal cancer: current update and future perspectives</article-title>
          <source>World J Gastroenterol</source>
          <year>2021</year>
          <volume>27</volume>
          <issue>32</issue>
          <fpage>5306</fpage>
          <lpage>5321</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.wjgnet.com/1007-9327/full/v27/i32/5306.htm"/>
          </comment>
          <pub-id pub-id-type="doi">10.3748/wjg.v27.i32.5306</pub-id>
          <pub-id pub-id-type="medline">34539134</pub-id>
          <pub-id pub-id-type="pmcid">PMC8409167</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Development of a novel combined nomogram model integrating deep learning-pathomics, radiomics and immunoscore to predict postoperative outcome of colorectal cancer lung metastasis patients</article-title>
          <source>J Hematol Oncol</source>
          <year>2022</year>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>11</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jhoonline.biomedcentral.com/articles/10.1186/s13045-022-01225-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13045-022-01225-3</pub-id>
          <pub-id pub-id-type="medline">35073937</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13045-022-01225-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC8785554</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Fong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Luthra</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>DiNatale</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Nandakumar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Walch</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chatila</surname>
              <given-names>WK</given-names>
            </name>
            <name name-style="western">
              <surname>Madupuri</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kundra</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bielski</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Mastrogiacomo</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Donoghue</surname>
              <given-names>MTA</given-names>
            </name>
            <name name-style="western">
              <surname>Boire</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chandarlapaty</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ganesh</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Harding</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Iacobuzio-Donahue</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Razavi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Reznik</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Rudin</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Zamarin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Abida</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Abou-Alfa</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Aghajanian</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cercek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Feldman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Iyer</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Janjigian</surname>
              <given-names>YY</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Motzer</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>O'Reilly</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Postow</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Raj</surname>
              <given-names>NP</given-names>
            </name>
            <name name-style="western">
              <surname>Riely</surname>
              <given-names>GJ</given-names>
            </name>
            <name name-style="western">
              <surname>Robson</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenberg</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Safonov</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shoushtari</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Tap</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Teo</surname>
              <given-names>MY</given-names>
            </name>
            <name name-style="western">
              <surname>Varghese</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Voss</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yaeger</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zauderer</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Abu-Rustum</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia-Aguilar</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bochner</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hakimi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jarnagin</surname>
              <given-names>WR</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Molena</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rios-Doria</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Russo</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Singer</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Strong</surname>
              <given-names>VE</given-names>
            </name>
            <name name-style="western">
              <surname>Chakravarty</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ellenson</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Gopalan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Reis-Filho</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Weigelt</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ladanyi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gonen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>SP</given-names>
            </name>
            <name name-style="western">
              <surname>Massague</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zehir</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Berger</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Solit</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Bakhoum</surname>
              <given-names>SF</given-names>
            </name>
            <name name-style="western">
              <surname>Sanchez-Vega</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Schultz</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Genomic characterization of metastatic patterns from prospective clinical sequencing of 25,000 patients</article-title>
          <source>Cell</source>
          <year>2022</year>
          <volume>185</volume>
          <issue>3</issue>
          <fpage>563</fpage>
          <lpage>575.e11</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0092-8674(22)00003-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cell.2022.01.003</pub-id>
          <pub-id pub-id-type="medline">35120664</pub-id>
          <pub-id pub-id-type="pii">S0092-8674(22)00003-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC9147702</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ramírez-Mena</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Andrés-León</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Alvarez-Cubero</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Anguita-Ruiz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Martinez-Gonzalez</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Alcala-Fdez</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Explainable artificial intelligence to predict and identify prostate cancer tissue by gene expression</article-title>
          <source>Comput Methods Programs Biomed</source>
          <year>2023</year>
          <volume>240</volume>
          <fpage>107719</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0169-2607(23)00385-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cmpb.2023.107719</pub-id>
          <pub-id pub-id-type="medline">37453366</pub-id>
          <pub-id pub-id-type="pii">S0169-2607(23)00385-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mandrekar</surname>
              <given-names>JN</given-names>
            </name>
          </person-group>
          <article-title>Receiver operating characteristic curve in diagnostic test assessment</article-title>
          <source>J Thorac Oncol</source>
          <year>2010</year>
          <volume>5</volume>
          <issue>9</issue>
          <fpage>1315</fpage>
          <lpage>1316</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1556-0864(15)30604-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/JTO.0b013e3181ec173d</pub-id>
          <pub-id pub-id-type="medline">20736804</pub-id>
          <pub-id pub-id-type="pii">S1556-0864(15)30604-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nahm</surname>
              <given-names>FS</given-names>
            </name>
          </person-group>
          <article-title>Receiver operating characteristic curve: overview and practical use for clinicians</article-title>
          <source>Korean J Anesthesiol</source>
          <year>2022</year>
          <volume>75</volume>
          <issue>1</issue>
          <fpage>25</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35124947"/>
          </comment>
          <pub-id pub-id-type="doi">10.4097/kja.21209</pub-id>
          <pub-id pub-id-type="medline">35124947</pub-id>
          <pub-id pub-id-type="pii">kja.21209</pub-id>
          <pub-id pub-id-type="pmcid">PMC8831439</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Philip</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Welch</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>McKiddie</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Nath</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A systematic review and meta-analysis of predictive and prognostic models for outcome prediction using positron emission tomography radiomics in head and neck squamous cell carcinoma patients</article-title>
          <source>Cancer Med</source>
          <year>2023</year>
          <volume>12</volume>
          <issue>15</issue>
          <fpage>16181</fpage>
          <lpage>16194</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37353996"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/cam4.6278</pub-id>
          <pub-id pub-id-type="medline">37353996</pub-id>
          <pub-id pub-id-type="pmcid">PMC10469753</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dretzke</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Abou-Foul</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Albon</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hillier</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Scandrett</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Price</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Mehanna</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Nankivell</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Systematic review of prognostic models for predicting recurrence and survival in patients with treated oropharyngeal cancer</article-title>
          <source>BMJ Open</source>
          <year>2024</year>
          <volume>14</volume>
          <issue>12</issue>
          <fpage>e090393</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&#38;pmid=39638589"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2024-090393</pub-id>
          <pub-id pub-id-type="medline">39638589</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2024-090393</pub-id>
          <pub-id pub-id-type="pmcid">PMC11624838</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gouthamchand</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Fonseca</surname>
              <given-names>LAF</given-names>
            </name>
            <name name-style="western">
              <surname>Hoebers</surname>
              <given-names>FJP</given-names>
            </name>
            <name name-style="western">
              <surname>Fijten</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dekker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wee</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Prognostic modeling in headneck cancer: deep learning or handcrafted radiomics?</article-title>
          <source>BJR Artif Intell</source>
          <year>2025</year>
          <volume>2</volume>
          <issue>1</issue>
          <fpage>ubaf008</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1093/bjrai/ubaf008"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bjrai/ubaf008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Araveeti</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Adhikari</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Garg</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bhandari</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>FDA-approved artificial intelligence and machine learning (AI/ML)-Enabled medical devices: an updated landscape</article-title>
          <source>Electronics</source>
          <year>2024</year>
          <volume>13</volume>
          <issue>3</issue>
          <fpage>498</fpage>
          <pub-id pub-id-type="doi">10.3390/electronics13030498</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <article-title>Artificial intelligence and machine learning (AI/ML)-enabled medical devices</article-title>
          <source>US Food and Drug Administration</source>
          <year>2025</year>
          <access-date>2025-11-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.fda.gov/medical-devices/software-medical-device-samd/artificial-intelligence-and-machine-learning-aiml-enabled-medical-devices">https://www.fda.gov/medical-devices/software-medical-device-samd/artificial-intelligence-and-machine-learning-aiml-enabled-medical-devices</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fawcett</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>An introduction to ROC analysis</article-title>
          <source>Pattern Recognit Lett</source>
          <year>2006</year>
          <volume>27</volume>
          <issue>8</issue>
          <fpage>861</fpage>
          <lpage>874</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.patrec.2005.10.010"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.patrec.2005.10.010</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wolpert</surname>
              <given-names>DH</given-names>
            </name>
          </person-group>
          <article-title>Stacked generalization</article-title>
          <source>Neural Netw</source>
          <year>1992</year>
          <volume>5</volume>
          <issue>2</issue>
          <fpage>241</fpage>
          <lpage>259</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/S0893-6080(05)80023-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/s0893-6080(05)80023-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuncheva</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Whitaker</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Measures of diversity in classifier ensembles and their relationship with the ensemble accuracy</article-title>
          <source>Mach Learn</source>
          <year>2003</year>
          <volume>51</volume>
          <issue>2</issue>
          <fpage>181</fpage>
          <lpage>207</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1023/A:1022859003006"/>
          </comment>
          <pub-id pub-id-type="doi">10.1023/a:1022859003006</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Davoli</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Uno</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wooten</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Elledge</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>Tumor aneuploidy correlates with markers of immune evasion and with reduced response to immunotherapy</article-title>
          <source>Science</source>
          <year>2017</year>
          <volume>355</volume>
          <issue>6322</issue>
          <fpage>eaaf8399</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28104840"/>
          </comment>
          <pub-id pub-id-type="doi">10.1126/science.aaf8399</pub-id>
          <pub-id pub-id-type="medline">28104840</pub-id>
          <pub-id pub-id-type="pii">355/6322/eaaf8399</pub-id>
          <pub-id pub-id-type="pmcid">PMC5592794</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tsimberidou</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Targeted therapy in cancer</article-title>
          <source>Cancer Chemother Pharmacol</source>
          <year>2015</year>
          <volume>76</volume>
          <issue>6</issue>
          <fpage>1113</fpage>
          <lpage>1132</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26391154"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00280-015-2861-1</pub-id>
          <pub-id pub-id-type="medline">26391154</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00280-015-2861-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC4998041</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gabriel</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Vaida</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez</surname>
              <given-names>NE</given-names>
            </name>
            <name name-style="western">
              <surname>Eisenstein</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Clary</surname>
              <given-names>BM</given-names>
            </name>
          </person-group>
          <article-title>Predicting overall survival in patients with metastatic rectal cancer: a machine learning approach</article-title>
          <source>J Gastrointest Surg</source>
          <year>2020</year>
          <volume>24</volume>
          <issue>5</issue>
          <fpage>1165</fpage>
          <lpage>1172</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31468331"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11605-019-04373-z</pub-id>
          <pub-id pub-id-type="medline">31468331</pub-id>
          <pub-id pub-id-type="pii">S1091-255X(23)01548-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC7048666</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tapak</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Shirmohammadi-Khorram</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Amini</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Alafchi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hamidi</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Poorolajal</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Prediction of survival and metastasis in breast cancer patients using machine learning classifiers</article-title>
          <source>Clin Epidemiol Glob Health</source>
          <year>2019</year>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>293</fpage>
          <lpage>299</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cegh.2018.10.003</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nicolò</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Périer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Prague</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bellera</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>MacGrogan</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Saut</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Benzekry</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Machine learning and mechanistic modeling for prediction of metastatic relapse in early-stage breast cancer</article-title>
          <source>JCO Clin Cancer Inform</source>
          <year>2020</year>
          <volume>4</volume>
          <fpage>259</fpage>
          <lpage>274</lpage>
          <pub-id pub-id-type="doi">10.1200/CCI.19.00133</pub-id>
          <pub-id pub-id-type="medline">32213092</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kourou</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Exarchos</surname>
              <given-names>TP</given-names>
            </name>
            <name name-style="western">
              <surname>Exarchos</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>Karamouzis</surname>
              <given-names>MV</given-names>
            </name>
            <name name-style="western">
              <surname>Fotiadis</surname>
              <given-names>DI</given-names>
            </name>
          </person-group>
          <article-title>Machine learning applications in cancer prognosis and prediction</article-title>
          <source>Comput Struct Biotechnol J</source>
          <year>2015</year>
          <volume>13</volume>
          <fpage>8</fpage>
          <lpage>17</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2001-0370(14)00046-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.csbj.2014.11.005</pub-id>
          <pub-id pub-id-type="medline">25750696</pub-id>
          <pub-id pub-id-type="pii">S2001-0370(14)00046-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC4348437</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maouche</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Terrissa</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Benmohammed</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zerhouni</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>An explainable AI approach for breast cancer metastasis prediction based on clinicopathological data</article-title>
          <source>IEEE Trans Biomed Eng</source>
          <year>2023</year>
          <volume>70</volume>
          <issue>12</issue>
          <fpage>3321</fpage>
          <lpage>3329</lpage>
          <pub-id pub-id-type="doi">10.1109/TBME.2023.3282840</pub-id>
          <pub-id pub-id-type="medline">37276094</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baldin</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Nikolich</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Griffioen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Monga</surname>
              <given-names>Iis</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ruth</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>FABRIC: a national-scale programmable experimental network infrastructure</article-title>
          <source>IEEE Internet Comput</source>
          <year>2019</year>
          <volume>23</volume>
          <issue>6</issue>
          <fpage>38</fpage>
          <lpage>47</lpage>
          <pub-id pub-id-type="doi">10.1109/MIC.2019.2958545</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
          <source>cBioPortal</source>
          <access-date>2026-01-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cbioportal.org/study/summary?id=msk_met_2021">https://www.cbioportal.org/study/summary?id=msk_met_2021</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
          <article-title>MU-Data-Science/GAF</article-title>
          <source>GitHub</source>
          <access-date>2026-01-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/MU-Data-Science/GAF/tree/main/metastatic">https://github.com/MU-Data-Science/GAF/tree/main/metastatic</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
