<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Cancer</journal-id><journal-id journal-id-type="publisher-id">cancer</journal-id><journal-id journal-id-type="index">21</journal-id><journal-title>JMIR Cancer</journal-title><abbrev-journal-title>JMIR Cancer</abbrev-journal-title><issn pub-type="epub">2369-1999</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v11i1e72665</article-id><article-id pub-id-type="doi">10.2196/72665</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Comparison of Machine Learning Models for Colon Cancer Survival: Predictive Modeling Approach</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Adatorwovor</surname><given-names>Reuben</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ogunsanya</surname><given-names>Motolani E</given-names></name><degrees>DPH, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Huang</surname><given-names>Bin</given-names></name><degrees>DrPH</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Charnigo</surname><given-names>Richard</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Abraham</surname><given-names>Olufunmilola</given-names></name><degrees>BPharm, MS, PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Biostatistics, College of Public Health, University of Kentucky</institution><addr-line>760 Rose street, Suite 208H</addr-line><addr-line>Lexington</addr-line><addr-line>KY</addr-line><country>United States</country></aff><aff id="aff2"><institution>TSET Health Promotion Research Center, Department of Family and Preventive Medicine, University of Oklahoma Health Sciences Center</institution><addr-line>Oklahoma City</addr-line><addr-line>OK</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Internal Medicine, College of Medicine, University of Kentucky</institution><addr-line>Lexington</addr-line><addr-line>KY</addr-line><country>United States</country></aff><aff id="aff4"><institution>Dr. Bing Zhang Department of Statistics, College of Arts and Sciences, University of Kentucky</institution><addr-line>Lexington</addr-line><addr-line>KY</addr-line><country>United States</country></aff><aff id="aff5"><institution>Department of Pharmacy Practice and Science, College of Pharmacy, University of Kentucky</institution><addr-line>Lexington</addr-line><addr-line>KY</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Cahill</surname><given-names>Naomi</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Ahn</surname><given-names>Stephen</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Cao</surname><given-names>Xiaoyong</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Reuben Adatorwovor, PhD, Department of Biostatistics, College of Public Health, University of Kentucky, 760 Rose street, Suite 208H, Lexington, KY, 40536, United States, 1 859-218-0959; <email>radatorwovor@uky.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>26</day><month>11</month><year>2025</year></pub-date><volume>11</volume><elocation-id>e72665</elocation-id><history><date date-type="received"><day>09</day><month>03</month><year>2025</year></date><date date-type="rev-recd"><day>31</day><month>10</month><year>2025</year></date><date date-type="accepted"><day>31</day><month>10</month><year>2025</year></date></history><copyright-statement>&#x00A9; Reuben Adatorwovor, Motolani E Ogunsanya, Bin Huang, Richard Charnigo, Olufunmilola Abraham. Originally published in JMIR Cancer (<ext-link ext-link-type="uri" xlink:href="https://cancer.jmir.org">https://cancer.jmir.org</ext-link>), 26.11.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Cancer, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://cancer.jmir.org/">https://cancer.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://cancer.jmir.org/2025/1/e72665"/><abstract><sec><title>Background</title><p>Colon cancer is a leading cause of cancer-related deaths worldwide, with survival influenced by risk factors, treatment type, and patient characteristics. Traditional statistical models, such as Kaplan-Meier curves, have been widely used to estimate survival probabilities. However, these models often have difficulty handling complex interactions, covariates, and nonlinear relationships between risk factors. Recently, machine learning (ML) techniques have emerged as promising tools for improving survival prediction by handling large covariates and capturing complex patterns.</p></sec><sec><title>Objective</title><p>This study compares several ML models to accurately estimate colon cancer survival by leveraging data from the Kentucky Cancer Registry. By identifying key risk factors, these analyses aim to improve risk stratification, treatment planning, and prognosis for overall colon cancer survival within subgroups.</p></sec><sec sec-type="methods"><title>Methods</title><p>We conducted a retrospective analysis of colon cancer cases diagnosed between 2010 and 2022 (n=33,825), using Kentucky Cancer Registry data linked to mortality records, with approval from the University of Kentucky Institutional Review Board (#63067). We compared multiple predictive modeling techniques, including Cox proportional hazards, accelerated failure time models, Extreme Gradient Boosting, random survival forests, least absolute shrinkage and selection operator (LASSO), and elastic net regression, to estimate survival probabilities. The Kaplan-Meier method provided baseline survival estimates, and multivariate models, including ML approaches, evaluated contributions of key risk factors. Model performance was compared across evaluation metrics such as the Brier score, concordance index, out-of-bag error, and Continuous Ranked Probability Score. Missing data were handled via multiple imputation, and leave-one-out cross-validation was applied to reduce overfitting.</p></sec><sec sec-type="results"><title>Results</title><p>The ML models identified key covariates influencing survival outcomes, such as age, treatment type, positive nodes, tumor stage, smoking, and comorbidities. In the overall model, patients who refused or received no treatment had a 3.24-fold higher risk of mortality compared to those who underwent surgery at primary and regional sites. Elevated mortality risk was also observed among smokers (24% higher than non-smokers) and Appalachian residents (7% higher than non-Appalachian residents). Our overall model achieved a concordance index of 0.8146, with strong discriminatory performance across subgroups, including early-age diagnosis (0.8175), late-age diagnosis (0.7841), Appalachia (0.8135), non-Appalachia (0.8126), White patients (0.8164), and Black patients (0.7881). The results highlight the strengths and limitations of each ML approach, with the random survival forest and LASSO models outperforming traditional methods such as the Cox model in prediction accuracy and model discrimination.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Our study demonstrated the utility of ML in identifying risk factors associated with colon cancer survival, with positive lymph nodes, age at diagnosis, treatment received, clinical tumor size, tumor grade, smoking status, geographic region, and marital status emerging as dominant predictors across all statistical models. This comparative analysis offers valuable insights for clinical decision-making and prognosis, highlighting the potential of ML to identify risk factors specific to different subgroups, ultimately advancing personalized care for patients with colon cancer.</p></sec></abstract><kwd-group><kwd>colon cancer survival</kwd><kwd>colorectal cancer</kwd><kwd>Cox model</kwd><kwd>elastic net</kwd><kwd>LASSO</kwd><kwd>machine learning models</kwd><kwd>random survival forests</kwd><kwd>risk factors</kwd><kwd>survival estimation</kwd><kwd>least absolute shrinkage and selection operator</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Colon cancer, also known as colorectal cancer (CRC), is a leading cause of cancer-related deaths globally [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>], typically developing from adenomatous polyps in the colon or rectum. While advancements in screening, early detection, and treatment have improved survival rates from about 50% in the 1970s to approximately 64% today [<xref ref-type="bibr" rid="ref3">3</xref>], CRC remains a significant public health concern because there are still many deaths and because of survival disparities influenced by stage at diagnosis, the presence of metastasis, and various individual and biological risk factors [<xref ref-type="bibr" rid="ref1">1</xref>]. Early-stage detection offers better survival outcomes (5 y survival rate of ~90% for localized cases vs ~14% for late-stage diagnoses) [<xref ref-type="bibr" rid="ref4">4</xref>], yet disparities persist across subgroups, particularly among non-Hispanic Black patients, driven by factors such as socioeconomic status and health care access [<xref ref-type="bibr" rid="ref3">3</xref>]. Understanding risk factors, both generally and specific to demographic and regional subgroups, is relevant to informing interventions, both broad-based and targeted, for better survival outcomes. Specifically, between 2016 and 2020, Kentucky&#x2019;s CRC mortality rate was 16.2 per 100,000, compared to the national rate of 13.1 [<xref ref-type="bibr" rid="ref5">5</xref>]. Thus, in Kentucky, survival rates are notably lower than the national average. The state&#x2019;s overall 5-year survival rate for colon cancer is approximately 61% [<xref ref-type="bibr" rid="ref3">3</xref>]. Indeed, Kentucky has one of the highest CRC mortality rates in the United States, which is partially due to high rates of smoking, obesity, and physical inactivity, all of which are modifiable risk factors [<xref ref-type="bibr" rid="ref4">4</xref>]. Limited access to health care and early screening programs, particularly in rural or Appalachian regions, contributes to delayed diagnoses and poorer survival outcomes [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Although Kentucky has implemented various cancer control initiatives, such as increasing access to screenings, these efforts have been less effective in rural areas, where health care resources remain scarce [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. As a result, more patients in Kentucky are diagnosed at later stages compared to those in other states, which significantly impacts their chances of survival [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>Colon cancer is influenced by both nonmodifiable and modifiable risk factors. Age is one of the most significant nonmodifiable risk factors, with most cases diagnosed in individuals aged 60 years and above due to accumulated genetic mutations and cellular damage over time [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. A family history of CRC, particularly in first-degree relatives (parents, siblings, or children), implies a higher risk [<xref ref-type="bibr" rid="ref13">13</xref>], as do inherited genetic syndromes, such as familial adenomatous polyposis and Lynch syndrome, which involve mutations in mismatch repair genes [<xref ref-type="bibr" rid="ref14">14</xref>]. Among modifiable risk factors, smoking and alcohol use elevate CRC risk through DNA damage and carcinogenic metabolites like acetaldehyde [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>], with combined use posing an even greater threat [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. Physical inactivity, obesity, and chronic inflammatory conditions like Crohn disease and ulcerative colitis also heighten risk [<xref ref-type="bibr" rid="ref19">19</xref>]. Regular exercise is associated with reduced CRC risk, by enhancing gastrointestinal motility, reducing systemic inflammation, and supporting a healthy body weight [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. Abdominal obesity increases CRC risk [<xref ref-type="bibr" rid="ref21">21</xref>] via visceral fat, which triggers chronic inflammation and hormone dysregulation (involving, eg, insulin and leptin) [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>], particularly for proximal colon cancers [<xref ref-type="bibr" rid="ref23">23</xref>]. Dietary habits, hormonal factors (especially in women), and screening practices also impact CRC risk, with early detection through colonoscopy, sigmoidoscopy, or fecal occult blood tests significantly improving survival rates [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>].</p><p>The prognosis for patients with colon cancer largely depends on the cancer&#x2019;s stage at diagnosis, as defined by the TNM staging system [<xref ref-type="bibr" rid="ref26">26</xref>], which classifies cancer based on tumor size (T), lymph node involvement (N), and the presence of metastasis (M). Patients with stages I and II typically have localized disease, with no lymph node involvement and high survival rates (90%&#x2010;95%) when treated early [<xref ref-type="bibr" rid="ref27">27</xref>]. In stage III, cancer has spread to nearby lymph nodes (but not to distant organs), leading to moderate survival rates (40%&#x2010;70%), depending on treatment effectiveness [<xref ref-type="bibr" rid="ref27">27</xref>]. Stage IV involves distant metastasis, commonly to liver or lungs, resulting in significantly lower survival rates (10%&#x2010;15%) [<xref ref-type="bibr" rid="ref28">28</xref>]. Nonetheless, newer treatment approaches such as targeted therapies, immunotherapies, and palliative care have improved survival and quality of life for patients with metastatic disease [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref31">31</xref>]. Colon cancer continues to be a major global health challenge, underscoring the importance of accurate survival estimation to guide clinical treatment decision-making and public health strategies [<xref ref-type="bibr" rid="ref32">32</xref>]. Survival outcomes are influenced by early detection, timely intervention, and modifiable lifestyle factors such as diet, exercise, and smoking cessation, although genetic predispositions and medical conditions like inflammatory bowel disease further complicate individual risk profiles. Early screening continues to be the most effective strategy for improving colon cancer survival outcomes.</p><p>In recent years, numerous studies [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>] have applied ML techniques to predict outcomes in colon cancer, demonstrating promising performance in risk stratification and prognosis. However, many of these studies focus on a single model or dataset, limiting generalizability and offering limited comparison across model types using standardized metrics [<xref ref-type="bibr" rid="ref35">35</xref>]. Additionally, interpretability and the role of geographically linked social determinants, such as residence in medically underserved regions like Appalachia, are often underexplored [<xref ref-type="bibr" rid="ref10">10</xref>]. This study addresses these gaps by systematically comparing traditional and modern ML approaches including Cox proportional hazards, accelerated failure time (AFT) models, Extreme Gradient Boosting (XGBoost), random survival forests (RSFs), least absolute shrinkage and selection operator (LASSO), and elastic net regression using a large, population-based dataset. By incorporating region-specific variables and evaluating models with consistent performance criteria, this study offers novel insights into both predictive accuracy and the complex risk factors influencing colon cancer survival.</p><p>Accurately predicting survival outcomes based on individual risk factors remains a major challenge [<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref38">38</xref>], despite significant progress in cancer surveillance and treatment. Understanding how these risk factors influence colon cancer survival is critical for enhancing the effectiveness of surveillance efforts and guiding targeted public health interventions. However, cancer survival surveillance using registry data is often complicated by incomplete or unreliable cause-of-death information, which can undermine the precision of public health interventions and cancer control strategies [<xref ref-type="bibr" rid="ref39">39</xref>-<xref ref-type="bibr" rid="ref41">41</xref>]. In this study, we explore the influence of risk factors on prognostic outcomes in colon cancer survival using ML models. Specifically, we seek to answer 2 key questions: (1) how do different risk factors impact cancer survival across various population subgroups? (2) which risk factors emerge as the strongest predictors of survival outcomes within these subgroups?</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Data Source and Cohort Selection</title><p>The Kentucky Cancer Registry (KCR), a part of the Surveillance, Epidemiology, and End Results (SEER) Program and the National Program of Cancer Registries, provides comprehensive, population-based data on cancer incidence, survival, treatment, and patient demographic factors in Kentucky. KCR routinely links cancer incidence data to Kentucky state mortality data and National Death Index data to ensure updated mortality information. This study included all 33,825 noninstitutionalized adult patients (aged &#x2265;18 y) diagnosed with colon cancer during the study period. Patients with missing diagnosis, treatment, or staging data were excluded, ensuring data integrity.</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>This retrospective study utilized deidentified case-level colon cancer data from the KCR for the period January 1, 2010, to December 31, 2022. The study was approved by the University of Kentucky Institutional Review Board (#63067) following the completion of the data use agreement. Because the data were fully anonymized prior to analysis, informed consent was waived. Participant privacy and confidentiality were ensured through strict deidentification procedures, in compliance with institutional policies and federal regulations governing human participant research.</p></sec><sec id="s2-3"><title>Patient, Disease, and Treatment Factors</title><p>Potential risk factors were selected based on a review of existing literature on colon cancer survival and input from oncology and epidemiology experts on our research team. Clinically relevant variables extracted from the KCR included patient demographic and risk factors such as age at diagnosis, sex, race, ethnicity, marital status, smoking status, cigarette pack years, and geographical residence (Appalachian or non-Appalachian regions of Kentucky). Age at diagnosis was categorized into 2 groups: early onset (18&#x2010;60 y) and late onset (over 60 y). Due to the low representation of certain groups, patients who were neither White nor Black were categorized as &#x201C;Other.&#x201D; Likewise, ethnicity was classified into &#x201C;Not of Spanish or Latino origin&#x201D; and &#x201C;Other.&#x201D; Marital status was grouped into broader categories, with divorced and separated individuals combined into a single &#x201C;Divorced or Separated&#x201D; category, and those living with a partner or having unknown marital status grouped under &#x201C;Unknown.&#x201D;</p><p>Histological types [<xref ref-type="bibr" rid="ref25">25</xref>] not matching &#x201C;8140&#x201D; (Adenocarcinoma NOS), &#x201C;8210&#x201D; (Papillary Adenocarcinoma, Papillary subtype), &#x201C;8240&#x201D; (Tubulovillous Adenocarcinoma), and &#x201C;8480&#x201D; (Mucinous Adenocarcinoma) were categorized as &#x201C;Other/Unspecified.&#x201D; Treatment records were similarly streamlined based on the first course of treatment: patients reporting no treatment, unknown treatment, or refusal were grouped under &#x201C;No/Unknown or Refused Treatment.&#x201D; Those undergoing surgery for primary or regional sites were listed as &#x201C;Surgery (Primary and Regional),&#x201D; while individuals receiving multimodal treatment with surgery, chemotherapy, or radiation belonged to &#x201C;Surgery+(Chemotherapy and/or Radiation).&#x201D; Therapies not included in the categories described above were classified as &#x201C;Other.&#x201D;</p><p>Colon cancer staging is often performed using either the American Joint Committee on Cancer (AJCC) TNM system or the SEER Summary Stage system. The AJCC TNM staging is detailed, providing specific breakdowns of the tumor&#x2019;s size, regional lymph node involvement, and presence of distant metastasis [<xref ref-type="bibr" rid="ref42">42</xref>]. However, we chose to rely on the SEER 2018 Summary Stage, a simplified system that categorizes cancer&#x2019;s spread into broader groups: in situ (confined to the origin site), localized (confined to the organ of origin without spread), regional (spread to nearby tissues or lymph nodes), distant (spread to distant organs), and unknown (extent of spread cannot be determined). While the AJCC system offers a more granular, detailed assessment of cancer progression, the SEER Summary Stage aggregates these details into more general categories and is more consistent across years, making it useful for large-scale epidemiological studies and population-based cancer survival tracking [<xref ref-type="bibr" rid="ref43">43</xref>].</p><p>Insurance coverage was reclassified into 4 primary categories to streamline analysis. Patients with private or employer-based insurance were grouped under &#x201C;Private Insurance.&#x201D; Those covered by government-funded programs, including Medicare, Medicaid, TRICARE (Military), Workers&#x2019; Compensation, Veterans Affairs, and the Indian Health Service, were classified as &#x201C;Government-Related Programs.&#x201D; Patients who were uninsured or self-paid were categorized as &#x201C;Uninsured or Self-Pay,&#x201D; while individuals with nonspecific, unknown, or unreported insurance status were assigned to the &#x201C;Other/Unknown Payers&#x201D; group.</p></sec><sec id="s2-4"><title>Statistical Analysis</title><p>Descriptive analyses were conducted to summarize patient and clinical characteristics of the colon cancer cohort. Continuous variables, such as age, were reported as means, medians, standard deviations, minimum, and maximum values. Categorical variables, including sex, tumor stage, treatment status, and comorbidity indicators, were summarized using frequency counts and percentages. The descriptive analysis provided an overview of the colon cancer study population cohort and served as the foundation for subsequent survival modeling. They also facilitated the identification of demographic and clinical patterns and covariate distributions relevant to prognosis and interpretation of survival differences across patient subgroups. The results are presented in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Distribution of colon cancer characteristics (continuous variables)<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Risk factors</td><td align="left" valign="bottom">Frequency, n</td><td align="left" valign="bottom">Mean (SD)</td><td align="left" valign="bottom">Median (IQR)</td><td align="left" valign="bottom">Minimum</td><td align="left" valign="bottom">Maximum</td></tr></thead><tbody><tr><td align="left" valign="top">Age (y)</td><td align="left" valign="bottom">33,825</td><td align="left" valign="bottom">65.95 (13.74)</td><td align="left" valign="bottom">67.00</td><td align="left" valign="bottom">18.00</td><td align="left" valign="bottom">103.00</td></tr><tr><td align="left" valign="top">Cigarette pack years</td><td align="left" valign="bottom">18,602</td><td align="left" valign="bottom">16.76 (27.29)</td><td align="left" valign="bottom">0.00</td><td align="left" valign="bottom">0.00</td><td align="left" valign="bottom">300.00</td></tr><tr><td align="left" valign="top">Tumor size clinical (mm)</td><td align="left" valign="bottom">7094</td><td align="left" valign="bottom">49.04 (36.74)</td><td align="left" valign="bottom">46.00</td><td align="left" valign="bottom">0.00</td><td align="left" valign="bottom">989.00</td></tr><tr><td align="left" valign="top">CS<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> lymph nodes</td><td align="left" valign="bottom">19,932</td><td align="left" valign="bottom">72.72 (125.04)</td><td align="left" valign="bottom">0.00</td><td align="left" valign="bottom">0.00</td><td align="left" valign="bottom">800.00</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Analyses are based on a retrospective cohort study of patients with colon cancer using data from the Kentucky Cancer Registry collected between January 1, 2010, and December 31, 2022.</p></fn><fn id="table1fn2"><p><sup>b</sup>CS: clinical stage.</p></fn></table-wrap-foot></table-wrap><p>Classical survival analysis methods, such as the Kaplan-Meier estimator [<xref ref-type="bibr" rid="ref44">44</xref>] and the Cox proportional hazards model [<xref ref-type="bibr" rid="ref45">45</xref>], have long been the standard approach for analyzing time-to-event data due to their interpretability, capacity for coping with right censoring, and well-established statistical foundations. Yet, they may not be well suited to high-dimensional data. The Kaplan-Meier estimator relies on stratification. If only a few strata are specified, then the results may be subject to confounding or may obscure interactions. If many strata are specified, then the results may be limited by small stratum sizes or may be subject to concerns about multiple comparisons. In contrast, ML methods like XGBoost, RSFs, LASSO, and elastic net Cox regression [<xref ref-type="bibr" rid="ref1">1</xref>] models offer greater flexibility. RSFs can capture interactions and nonlinear relationships without assuming proportional hazards, while elastic net provides variable selection and regularization in high-dimensional settings. AFT models, though parametric, can serve as a bridge between classical and ML approaches by directly modeling survival time.</p><p>We now outline our key statistical approaches for identifying risk factors associated with colon cancer survival (from time of diagnosis until death or right censoring) in Kentucky adults. We begin by summarizing the colon cancer data from the KCR. We constructed nonparametric Kaplan-Meier curves [<xref ref-type="bibr" rid="ref44">44</xref>] to evaluate differences in survival distributions across subgroups and to explore associations between individual risk factors and survival. To assess the joint impact of risk factors, we used multivariable models, employing both classical and ML approaches. Classical methods included the Cox proportional hazards model [<xref ref-type="bibr" rid="ref45">45</xref>] and the AFT model [<xref ref-type="bibr" rid="ref46">46</xref>], which rely on specific assumptions such as proportional hazards (for Cox) or predefined survival time (for AFT) distributions. We also used the ML methods of XGBoost [<xref ref-type="bibr" rid="ref47">47</xref>], RSF [<xref ref-type="bibr" rid="ref48">48</xref>], LASSO regression [<xref ref-type="bibr" rid="ref49">49</xref>], and elastic net [<xref ref-type="bibr" rid="ref50">50</xref>], which offer greater flexibility.</p><p>Subgroup analyses were also conducted to identify influential predictors of survival outcomes within strata based on race (White, Black, and other patient subgroups), time of diagnosis (early onset: 18&#x2010;60 y vs late onset: over 60 y), and geographic region (Appalachian vs non-Appalachian Kentucky). These stratified analyses allowed us to assess whether the impacts of key risk factors differed across demographic and regional subgroups so that subgroup-specific predictors of colon cancer survival might be identified.</p><p>The multivariate Cox proportional hazards model is a semiparametric statistical method used in cancer survival studies and allows for the simultaneous analysis of multiple risk factors, estimating the hazard of death while controlling for confounders. This approach is beneficial in identifying independent risk factors impacting survival, while adjusting for the effects of other covariates. Additionally, the model enables the investigation of differences between subgroups, providing a more comprehensive understanding of the factors affecting survival outcomes.</p><p>The AFT model is an alternative to the Cox proportional hazards model that is useful when the proportional hazards assumption is not met or when focusing on modeling time to an event rather than hazards. The AFT model assumes that covariates accelerate or decelerate survival time, which is useful in assessing how risk factors can shorten or extend a patient&#x2019;s survival time. Unlike the Cox model&#x2019;s focus on hazard ratios, the AFT model directly interprets time, offering clearer insights into absolute survival durations, an essential factor in understanding patient prognoses.</p><p>XGBoost is a powerful and scalable ML algorithm widely used for structured data. Built on the framework of gradient-boosted decision trees, it is highly effective in tasks such as classification, regression, and survival analysis. Its popularity stems from its ability to handle large datasets efficiently and reduce overfitting through regularization methods. Additionally, XGBoost supports parallel and distributed computing, which speeds up model training and accommodates missing data.</p><p>The RSF is a nonparametric ML technique that extends the traditional random forest algorithm to survival data. In our study, the RSF model was utilized to predict patient outcomes based on extensive risk factors. Its major advantage lies in its ability to capture interactions and nonlinear relationships between risk factors, dynamics that traditional models like Cox or AFT might miss. Moreover, RSF&#x2019;s capacity to handle censored data and missing values makes it robust for real-world clinical datasets. The model also aids in feature selection by identifying the strongest predictor variables, helping clinicians focus on key risk factors that drive survival outcomes.</p><p>The LASSO and the elastic net regularization techniques were utilized for the colon cancer survival data because the number of risk factors was relatively large. Regularization is a method of constraining or penalizing model parameters to prevent overfitting, leading to better model fit and generalization.</p></sec><sec id="s2-5"><title>LASSO Regression</title><p>LASSO is a regularized version of linear regression, where the model is penalized using the <inline-formula><mml:math id="ieqn1"><mml:msub><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mi> </mml:mi><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:math></inline-formula> (sum of the absolute values of the coefficients). The goal of using LASSO is to improve the prediction accuracy and interpretability of the model by forcing some of the coefficients that do not contribute to predicting survival to exactly 0. This essentially is equivalent to performing feature selection. The mathematical formulation of the LASSO is as follows:</p><disp-formula id="equWL1"><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mover><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mi>o</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mo>=</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>g</mml:mi><mml:mi>m</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>&#x03B2;</mml:mi></mml:mtd></mml:mtr></mml:mtable><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msubsup><mml:mi mathvariant="bold-italic">&#x03B2;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>+</mml:mo><mml:mi>&#x03BB;</mml:mi><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">&#x03B2;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Here, <inline-formula><mml:math id="ieqn2"><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the observed target or censored time for colon cancer death in an <inline-formula><mml:math id="ieqn3"><mml:msup><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> individual, <inline-formula><mml:math id="ieqn4"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the vector of risk factors, <inline-formula><mml:math id="ieqn5"><mml:msub><mml:mrow><mml:mi>&#x03B2;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the coefficient of the <inline-formula><mml:math id="ieqn6"><mml:msup><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> predictor or risk factor, <inline-formula><mml:math id="ieqn7"><mml:mi>&#x03BB;</mml:mi></mml:math></inline-formula> is the regularization parameter that controls the strength of penalty, and <italic>p</italic> is the number of parameters.</p></sec><sec id="s2-6"><title>Elastic Net</title><p>Elastic net is a regularization technique that combines both <inline-formula><mml:math id="ieqn8"><mml:msub><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> regularization (LASSO) and <inline-formula><mml:math id="ieqn9"><mml:msub><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> regularization (Ridge). It was introduced to overcome some of the limitations of LASSO, particularly when dealing with highly correlated risk factors. Elastic net performs better in situations where there are many predictors, some of which may be correlated. The mathematical formulation of elastic net is as follows:</p><disp-formula id="equWL2"><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mover><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>e</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mo>=</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>g</mml:mi><mml:mi>m</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mi>&#x03B2;</mml:mi></mml:mtd></mml:mtr></mml:mtable><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msubsup><mml:mi mathvariant="bold-italic">&#x03B2;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03BB;</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Here, <inline-formula><mml:math id="ieqn10"><mml:msub><mml:mrow><mml:mi>&#x03BB;</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mi> </mml:mi></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn11"><mml:msub><mml:mrow><mml:mi>&#x03BB;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> are the regularization hyperparameters corresponding to <inline-formula><mml:math id="ieqn12"><mml:msub><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn13"><mml:msub><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> penalties, respectively.</p><p>The LASSO procedure was chosen because we believe that only a subset of the predictors is most relevant for colon cancer survival prognosis. LASSO is particularly effective for sparse data, where many features are present, but only a few are significant, and its automatic feature selection enhances model interpretability. Elastic net was selected because the risk factors may be correlated with each other, and elastic net is better suited for handling multicollinearity.</p></sec><sec id="s2-7"><title>Model Performance</title><p>Model performance was evaluated using the Brier score (<xref ref-type="fig" rid="figure1">Figures 1</xref> and <xref ref-type="fig" rid="figure2">2</xref>), standardized out-of-bag (OOB) Continuous Ranked Probability Score (CRPS), and concordance index (C-index; see the <italic>Result</italic> section) to assess both calibration and discrimination across the overall cohort and predefined subgroups. The Brier score evaluates the accuracy of predicted probabilities by measuring the mean squared error between predictions and outcomes, with strengths in calibration and sharpness. The CRPS extends the Brier score to continuous outcomes and, when standardized and computed using OOB samples, offers an unbiased performance estimate that helps guard against overfitting. The C-index measures a model&#x2019;s ability to correctly rank outcomes, making it useful for survival or ranking tasks. Multiple imputation was used to address missing data, and leave-one-out cross-validation was employed to reduce overfitting and ensure robust performance estimates. All hypothesis tests were performed at the 5% significance level. All statistical analyses were conducted using R statistical software [<xref ref-type="bibr" rid="ref51">51</xref>] or Python [<xref ref-type="bibr" rid="ref52">52</xref>]. The reporting of this study conforms to STROBE (Strengthening the Reporting of Observational Studies in Epidemiology) and TRIPOD+AI ( Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis Using Artificial Intelligence) guidelines [<xref ref-type="bibr" rid="ref53">53</xref>-<xref ref-type="bibr" rid="ref55">55</xref>].</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Model performance based on Brier scores for the Cox model. Analyses are based on a retrospective cohort study of patients with colon cancer using data from the Kentucky Cancer Registry collected between January 1, 2010, and December 31, 2022.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e72665_fig01.png"/></fig><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Random survival forest (RSF) model fit assessed via Brier scores by subgroups. Analyses are based on a retrospective cohort study of patients with colon cancer using data from the Kentucky Cancer Registry collected between January 1, 2010, and December 31, 2022.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e72665_fig02.png"/></fig></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>The results from a total sample of 33,825 including 15,838 patients who were alive and censored as of December 31, 2022, are shown below. The following risk factors for colon cancer survival were investigated: age at diagnosis, treatment, sex, race, ethnicity, marital status, smoking status, cigarette pack years, tumor grade, positive lymph nodes, clinical tumor size, geographic region (Appalachian status), histology classification or type, clinical stage (CS) lymph nodes, insurance, and SEER 2018 summary classification of the tumor. <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref> show the distribution of the risk factors, while <xref ref-type="table" rid="table3">Tables 3</xref> and <xref ref-type="table" rid="table4">4</xref> (Tables S1 and S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) display or compare findings from the various methods.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Distribution of colon cancer characteristics (categorical variables)<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Risk factors</td><td align="left" valign="bottom">Count, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Sex</td></tr><tr><td align="left" valign="top">&#x2003;Male</td><td align="left" valign="top">17,740 (53.0)</td></tr><tr><td align="left" valign="top">&#x2003;Female</td><td align="left" valign="top">15,730 (47.0)</td></tr><tr><td align="left" valign="top" colspan="2">Race</td></tr><tr><td align="left" valign="top">&#x2003;White patients</td><td align="left" valign="top">30,854 (92.2)</td></tr><tr><td align="left" valign="top">&#x2003;Black patients</td><td align="left" valign="top">2262 (6.8)</td></tr><tr><td align="left" valign="top">&#x2003;Other patient subgroups</td><td align="left" valign="top">354 (1.1)</td></tr><tr><td align="left" valign="top" colspan="2">Ethnicity</td></tr><tr><td align="left" valign="top">&#x2003;Not Spanish or Latino</td><td align="left" valign="top">32,970 (98.5)</td></tr><tr><td align="left" valign="top">&#x2003;Other</td><td align="left" valign="top">500 (1.5)</td></tr><tr><td align="left" valign="top" colspan="2">Smoking status</td></tr><tr><td align="left" valign="top">&#x2003;Non-smoker</td><td align="left" valign="top">11,551 (34.5)</td></tr><tr><td align="left" valign="top">&#x2003;Smoker</td><td align="left" valign="top">12,079 (36.1)</td></tr><tr><td align="left" valign="top">&#x2003;Unknown or unspecified</td><td align="left" valign="top">9840 (29.4)</td></tr><tr><td align="left" valign="top" colspan="2">Marital status</td></tr><tr><td align="left" valign="top">&#x2003;Married</td><td align="left" valign="top">4268 (12.8)</td></tr><tr><td align="left" valign="top">&#x2003;Single or never married</td><td align="left" valign="top">17,905 (53.5)</td></tr><tr><td align="left" valign="top">&#x2003;Widowed</td><td align="left" valign="top">3969 (11.9)</td></tr><tr><td align="left" valign="top">&#x2003;Divorced or separated</td><td align="left" valign="top">6139 (18.3)</td></tr><tr><td align="left" valign="top">&#x2003;Living with partner or unknown or not reported</td><td align="left" valign="top">1142 (3.6)</td></tr><tr><td align="left" valign="top" colspan="2">Appalachia status</td></tr><tr><td align="left" valign="top">&#x2003;Non-Appalachia</td><td align="left" valign="top">23,126 (69.1)</td></tr><tr><td align="left" valign="top">&#x2003;Appalachia</td><td align="left" valign="top">10,344 (30.9)</td></tr><tr><td align="left" valign="top" colspan="2">Treatment group</td></tr><tr><td align="left" valign="top">&#x2003;No or unknown or refused treatment</td><td align="left" valign="top">3366 (10.1)</td></tr><tr><td align="left" valign="top">&#x2003;Surgery</td><td align="left" valign="top">23,510 (70.2)</td></tr><tr><td align="left" valign="top">&#x2003;Chemotherapy and radiation</td><td align="left" valign="top">1191 (3.6)</td></tr><tr><td align="left" valign="top">&#x2003;Chemotherapy or radiation with surgery</td><td align="left" valign="top">2986 (8.9)</td></tr><tr><td align="left" valign="top">&#x2003;Other therapies</td><td align="left" valign="top">2417 (7.2)</td></tr><tr><td align="left" valign="top" colspan="2">Insurance type</td></tr><tr><td align="left" valign="top">&#x2003;Private insurance</td><td align="left" valign="top">289 (0.9)</td></tr><tr><td align="left" valign="top">&#x2003;Government-related programs</td><td align="left" valign="top">21,563 (64.4)</td></tr><tr><td align="left" valign="top">&#x2003;Uninsured or self-pay</td><td align="left" valign="top">8293 (24.8)</td></tr><tr><td align="left" valign="top">&#x2003;Other programs</td><td align="left" valign="top">3325 (9.9)</td></tr><tr><td align="left" valign="top" colspan="2">Histology</td></tr><tr><td align="left" valign="top">&#x2003;Adenocarcinoma NOS<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="top">21,841 (65.3)</td></tr><tr><td align="left" valign="top">&#x2003;Papillary adenocarcinoma</td><td align="left" valign="top">2189 (6.5)</td></tr><tr><td align="left" valign="top">&#x2003;Tubulovillous adenocarcinoma</td><td align="left" valign="top">1126 (3.4)</td></tr><tr><td align="left" valign="top">&#x2003;Mucinous adenocarcinoma</td><td align="left" valign="top">2051 (6.1)</td></tr><tr><td align="left" valign="top">&#x2003;Other or unspecified</td><td align="left" valign="top">6263 (18.7)</td></tr><tr><td align="left" valign="top" colspan="2">Sentinel nodes</td></tr><tr><td align="left" valign="top">&#x2003;Negative sentinel node</td><td align="left" valign="top">13,975 (41.8)</td></tr><tr><td align="left" valign="top">&#x2003;Sentinel nodes positive</td><td align="left" valign="top">8183 (24.4)</td></tr><tr><td align="left" valign="top">&#x2003;Unknown or other</td><td align="left" valign="top">11,312 (33.8)</td></tr><tr><td align="left" valign="bottom" colspan="2">SEER<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup> 2018 summary stage</td></tr><tr><td align="left" valign="top">&#x2003;In situ, localized</td><td align="left" valign="top">4911 (14.7)</td></tr><tr><td align="left" valign="top">&#x2003;Regional, early spread</td><td align="left" valign="top">1645 (4.9)</td></tr><tr><td align="left" valign="top">&#x2003;Regional, more extensive spread</td><td align="left" valign="top">1282 (3.8)</td></tr><tr><td align="left" valign="top">&#x2003;Distant spread or metastasis</td><td align="left" valign="top">1563 (4.7)</td></tr><tr><td align="left" valign="top">&#x2003;Unknown or unspecified</td><td align="left" valign="top">24,069 (71.9)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Analyses are based on a retrospective cohort study of patients with colon cancer using data from the Kentucky Cancer Registry collected between January 1, 2010, and December 31, 2022.</p></fn><fn id="table2fn2"><p><sup>b</sup>NOS: not otherwise specified.</p></fn><fn id="table2fn3"><p><sup>c</sup>SEER: Surveillance, Epidemiology, and End Results.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Comparison of hazard ratios across racial, diagnosis age, and geographical subgroups using least absolute shrinkage and selection operator (LASSO) regression<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup>.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Risk factor</td><td align="left" valign="bottom">Overall</td><td align="left" valign="bottom">White patients</td><td align="left" valign="bottom">Black patients</td><td align="left" valign="bottom">Early age<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="bottom">Late age<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="bottom">Non-Appalachia</td><td align="left" valign="bottom">Appalachia</td></tr></thead><tbody><tr><td align="left" valign="top">Age (y)</td><td align="left" valign="top">1.035</td><td align="left" valign="top">1.035</td><td align="left" valign="top">1.033</td><td align="left" valign="top">NA<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="top">NA</td><td align="left" valign="top">1.036</td><td align="left" valign="top">1.032</td></tr><tr><td align="left" valign="top" colspan="8">Treatment (reference=no or unknown or refused)</td></tr><tr><td align="left" valign="top">&#x2003;Surgery (primary and regional)</td><td align="left" valign="top">0.309</td><td align="left" valign="top">0.304</td><td align="left" valign="top">0.321</td><td align="left" valign="top">0.297</td><td align="left" valign="top">0.280</td><td align="left" valign="top">0.327</td><td align="left" valign="top">0.289</td></tr><tr><td align="left" valign="top">&#x2003;Chemo and or radiation therapies</td><td align="left" valign="top">0.434</td><td align="left" valign="top">0.423</td><td align="left" valign="top">0.501</td><td align="left" valign="top">0.539</td><td align="left" valign="top">0.364</td><td align="left" valign="top">0.433</td><td align="left" valign="top">0.412</td></tr><tr><td align="left" valign="top">&#x2003;Surgery+(chemo/radiation)</td><td align="left" valign="top">0.264</td><td align="left" valign="top">0.260</td><td align="left" valign="top">0.286</td><td align="left" valign="top">0.269</td><td align="left" valign="top">0.202</td><td align="left" valign="top">0.277</td><td align="left" valign="top">0.246</td></tr><tr><td align="left" valign="top">&#x2003;Other therapies (immunotherapy, endoscopic, gene, etc)</td><td align="left" valign="top">0.622</td><td align="left" valign="top">0.610</td><td align="left" valign="top">0.68</td><td align="left" valign="top">0.653</td><td align="left" valign="top">0.430</td><td align="left" valign="top">0.680</td><td align="left" valign="top">0.558</td></tr><tr><td align="left" valign="top" colspan="8">Sex (reference=male)</td></tr><tr><td align="left" valign="top">&#x2003;Female</td><td align="left" valign="top">0.865</td><td align="left" valign="top">0.859</td><td align="left" valign="top">0.897</td><td align="left" valign="top">0.929</td><td align="left" valign="top">0.853</td><td align="left" valign="top">0.855</td><td align="left" valign="top">0.901</td></tr><tr><td align="left" valign="top" colspan="8">Race (reference=White patients)</td></tr><tr><td align="left" valign="top">&#x2003;Black patients</td><td align="left" valign="top">1.087</td><td align="left" valign="top">NA</td><td align="left" valign="top">NA</td><td align="left" valign="top">1.176</td><td align="left" valign="top">1.027</td><td align="left" valign="top">1.069</td><td align="left" valign="top">1.127</td></tr><tr><td align="left" valign="top">&#x2003;Other patient subgroups</td><td align="left" valign="top">0.546</td><td align="left" valign="top">NA</td><td align="left" valign="top">NA</td><td align="left" valign="top">0.449</td><td align="left" valign="top">0.548</td><td align="left" valign="top">0.554</td><td align="left" valign="top">0.381</td></tr><tr><td align="left" valign="top" colspan="8">Ethnicity (reference=not Spanish or Latino)</td></tr><tr><td align="left" valign="top">&#x2003;Other</td><td align="left" valign="top">0.960</td><td align="left" valign="top">1.000</td><td align="left" valign="top">0.866</td><td align="left" valign="top">0.796</td><td align="left" valign="top">1.000</td><td align="left" valign="top">0.978</td><td align="left" valign="top">1.000</td></tr><tr><td align="left" valign="top" colspan="8">Marital status (reference=married)</td></tr><tr><td align="left" valign="top">&#x2003;Single (never married)</td><td align="left" valign="top">0.779</td><td align="left" valign="top">0.776</td><td align="left" valign="top">0.842</td><td align="left" valign="top">0.813</td><td align="left" valign="top">0.845</td><td align="left" valign="top">0.774</td><td align="left" valign="top">0.807</td></tr><tr><td align="left" valign="top">&#x2003;Widowed</td><td align="left" valign="top">0.985</td><td align="left" valign="top">0.981</td><td align="left" valign="top">1.007</td><td align="left" valign="top">1.002</td><td align="left" valign="top">1.000</td><td align="left" valign="top">1.000</td><td align="left" valign="top">1.000</td></tr><tr><td align="left" valign="top">&#x2003;Divorced or separated</td><td align="left" valign="top">0.977</td><td align="left" valign="top">0.968</td><td align="left" valign="top">1.049</td><td align="left" valign="top">0.996</td><td align="left" valign="top">1.275</td><td align="left" valign="top">0.965</td><td align="left" valign="top">0.980</td></tr><tr><td align="left" valign="top">&#x2003;Living with partner or unknown or unreported</td><td align="left" valign="top">0.766</td><td align="left" valign="top">0.767</td><td align="left" valign="top">0.893</td><td align="left" valign="top">0.732</td><td align="left" valign="top">0.827</td><td align="left" valign="top">0.771</td><td align="left" valign="top">0.864</td></tr><tr><td align="left" valign="top" colspan="8">Smoking status (reference=non-smoker)</td></tr><tr><td align="left" valign="top">&#x2003;Smoker (cigarettes, e-cigarette, cigar)</td><td align="left" valign="top">1.244</td><td align="left" valign="top">1.234</td><td align="left" valign="top">1.178</td><td align="left" valign="top">1.234</td><td align="left" valign="top">1.177</td><td align="left" valign="top">1.268</td><td align="left" valign="top">1.163</td></tr><tr><td align="left" valign="top">&#x2003;Smoker (unknown)</td><td align="left" valign="top">0.872</td><td align="left" valign="top">0.890</td><td align="left" valign="top">0.784</td><td align="left" valign="top">0.871</td><td align="left" valign="top">0.848</td><td align="left" valign="top">0.848</td><td align="left" valign="top">0.869</td></tr><tr><td align="left" valign="top">&#x2003;Cigarette pack years</td><td align="left" valign="top">1.002</td><td align="left" valign="top">1.002</td><td align="left" valign="top">1.000</td><td align="left" valign="top">1.004</td><td align="left" valign="top">1.001</td><td align="left" valign="top">1.001</td><td align="left" valign="top">1.003</td></tr><tr><td align="left" valign="top" colspan="8">Tumor grade (reference=localized)</td></tr><tr><td align="left" valign="top">&#x2003;Regional by direct extension</td><td align="left" valign="top">1.149</td><td align="left" valign="top">1.003</td><td align="left" valign="top">1.126</td><td align="left" valign="top">1.273</td><td align="left" valign="top">1.156</td><td align="left" valign="top">1.031</td><td align="left" valign="top">1.000</td></tr><tr><td align="left" valign="top">&#x2003;Regional to lymph nodes</td><td align="left" valign="top">1.545</td><td align="left" valign="top">1.338</td><td align="left" valign="top">1.219</td><td align="left" valign="top">1.955</td><td align="left" valign="top">1.504</td><td align="left" valign="top">1.407</td><td align="left" valign="top">1.218</td></tr><tr><td align="left" valign="top">&#x2003;Regional by both direct extension and regional lymph nodes</td><td align="left" valign="top">1.419</td><td align="left" valign="top">1.224</td><td align="left" valign="top">1.221</td><td align="left" valign="top">1.843</td><td align="left" valign="top">1.360</td><td align="left" valign="top">1.332</td><td align="left" valign="top">1.158</td></tr><tr><td align="left" valign="top">&#x2003;Unknown or unstageable</td><td align="left" valign="top">1.259</td><td align="left" valign="top">1.088</td><td align="left" valign="top">1.000</td><td align="left" valign="top">1.289</td><td align="left" valign="top">1.244</td><td align="left" valign="top">1.076</td><td align="left" valign="top">1.179</td></tr><tr><td align="left" valign="top" colspan="8">Positive nodes (reference=all sentinel nodes examined are negative)</td></tr><tr><td align="left" valign="top">&#x2003;Sentinel nodes are positive</td><td align="left" valign="top">1.538</td><td align="left" valign="top">1.555</td><td align="left" valign="top">1.668</td><td align="left" valign="top">1.838</td><td align="left" valign="top">1.502</td><td align="left" valign="top">1.559</td><td align="left" valign="top">1.586</td></tr><tr><td align="left" valign="top">&#x2003;Other or unknown</td><td align="left" valign="top">1.917</td><td align="left" valign="top">1.909</td><td align="left" valign="top">2.132</td><td align="left" valign="top">2.724</td><td align="left" valign="top">1.749</td><td align="left" valign="top">1.938</td><td align="left" valign="top">1.883</td></tr><tr><td align="left" valign="top">&#x2003;Tumor size</td><td align="left" valign="top">1.003</td><td align="left" valign="top">1.003</td><td align="left" valign="top">1.007</td><td align="left" valign="top">1.004</td><td align="left" valign="top">1.003</td><td align="left" valign="top">1.005</td><td align="left" valign="top">1.003</td></tr><tr><td align="left" valign="top">&#x2003;CS<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup> lymph nodes</td><td align="left" valign="top">1.001</td><td align="left" valign="top">1.001</td><td align="left" valign="top">1.000</td><td align="left" valign="top">1.000</td><td align="left" valign="top">1.001</td><td align="left" valign="top">1.001</td><td align="left" valign="top">1.001</td></tr><tr><td align="left" valign="top" colspan="8">Geographical region (reference=non-Appalachia)</td></tr><tr><td align="left" valign="top">&#x2003;Appalachia</td><td align="left" valign="top">1.073</td><td align="left" valign="top">1.077</td><td align="left" valign="top">1.000</td><td align="left" valign="top">1.053</td><td align="left" valign="top">1.012</td><td align="left" valign="top">NA</td><td align="left" valign="top">NA</td></tr><tr><td align="left" valign="top" colspan="8">Histology (reference=adenocarcinoma NOS<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup>)</td></tr><tr><td align="left" valign="top">&#x2003;Papillary adenocarcinoma</td><td align="left" valign="top">0.585</td><td align="left" valign="top">0.586</td><td align="left" valign="top">0.617</td><td align="left" valign="top">0.416</td><td align="left" valign="top">0.612</td><td align="left" valign="top">0.599</td><td align="left" valign="top">0.599</td></tr><tr><td align="left" valign="top">&#x2003;Tubulovillous adenocarcinoma</td><td align="left" valign="top">0.268</td><td align="left" valign="top">0.281</td><td align="left" valign="top">0.217</td><td align="left" valign="top">0.123</td><td align="left" valign="top">0.347</td><td align="left" valign="top">0.280</td><td align="left" valign="top">0.248</td></tr><tr><td align="left" valign="top">&#x2003;Mucinous adenocarcinoma</td><td align="left" valign="top">1.058</td><td align="left" valign="top">1.051</td><td align="left" valign="top">1.051</td><td align="left" valign="top">1.054</td><td align="left" valign="top">1.056</td><td align="left" valign="top">1.076</td><td align="left" valign="top">1.000</td></tr><tr><td align="left" valign="top">&#x2003;Other or unspecified</td><td align="left" valign="top">0.791</td><td align="left" valign="top">0.811</td><td align="left" valign="top">0.695</td><td align="left" valign="top">0.592</td><td align="left" valign="top">0.852</td><td align="left" valign="top">0.825</td><td align="left" valign="top">0.793</td></tr><tr><td align="left" valign="top" colspan="8">Insurance (reference=private insurance)</td></tr><tr><td align="left" valign="top">&#x2003;Government-related programs</td><td align="left" valign="top">0.985</td><td align="left" valign="top">1.000</td><td align="left" valign="top">1.000</td><td align="left" valign="top">0.953</td><td align="left" valign="top">1.091</td><td align="left" valign="top">1.000</td><td align="left" valign="top">0.981</td></tr><tr><td align="left" valign="top">&#x2003;Uninsured or self-pay</td><td align="left" valign="top">0.796</td><td align="left" valign="top">0.814</td><td align="left" valign="top">0.827</td><td align="left" valign="top">0.727</td><td align="left" valign="top">0.697</td><td align="left" valign="top">0.825</td><td align="left" valign="top">0.780</td></tr><tr><td align="left" valign="top">&#x2003;Other or unknown payers</td><td align="left" valign="top">1.227</td><td align="left" valign="top">1.278</td><td align="left" valign="top">1.036</td><td align="left" valign="top">1.211</td><td align="left" valign="top">1.385</td><td align="left" valign="top">1.304</td><td align="left" valign="top">1.181</td></tr><tr><td align="left" valign="top" colspan="8">SEER<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup> 2018 summary (reference=in situ, localized)</td></tr><tr><td align="left" valign="top">&#x2003;Regional, early spread</td><td align="left" valign="top">1.581</td><td align="left" valign="top">1.548</td><td align="left" valign="top">1.190</td><td align="left" valign="top">1.698</td><td align="left" valign="top">1.585</td><td align="left" valign="top">1.595</td><td align="left" valign="top">1.364</td></tr><tr><td align="left" valign="top">&#x2003;Regional, more extensive spread</td><td align="left" valign="top">1.112</td><td align="left" valign="top">1.121</td><td align="left" valign="top">0.795</td><td align="left" valign="top">1.253</td><td align="left" valign="top">1.146</td><td align="left" valign="top">1.103</td><td align="left" valign="top">1.000</td></tr><tr><td align="left" valign="top">&#x2003;Distant spread or metastasis</td><td align="left" valign="top">1.325</td><td align="left" valign="top">1.298</td><td align="left" valign="top">1.170</td><td align="left" valign="top">1.868</td><td align="left" valign="top">1.284</td><td align="left" valign="top">1.213</td><td align="left" valign="top">1.411</td></tr><tr><td align="left" valign="top">&#x2003;Unknown or unspecified</td><td align="left" valign="top">1.974</td><td align="left" valign="top">1.949</td><td align="left" valign="top">1.621</td><td align="left" valign="top">3.297</td><td align="left" valign="top">1.797</td><td align="left" valign="top">1.976</td><td align="left" valign="top">1.832</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Analyses are based on a retrospective cohort study of patients with colon cancer using data from the Kentucky Cancer Registry collected between January 1, 2010, and December 31, 2022.</p></fn><fn id="table3fn2"><p><sup>b</sup>Early age: early-onset of colon cancer (18-60 y).</p></fn><fn id="table3fn3"><p><sup>c</sup>Late age: late-onset colon cancer (over 60 y).</p></fn><fn id="table3fn4"><p><sup>d</sup>NA: not applicable.</p></fn><fn id="table3fn5"><p><sup>e</sup>CS: clinical stage.</p></fn><fn id="table3fn6"><p><sup>f</sup>NOS: not otherwise specified.</p></fn><fn id="table3fn7"><p><sup>g</sup>SEER: Surveillance, Epidemiology, and End Results.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Comparison of concordance index (C-index) for population subgroups for the fitted models<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup>.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Subgroups</td><td align="left" valign="bottom" colspan="6">C-index for population subgroups models</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Cox</td><td align="left" valign="bottom">AFT<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td><td align="left" valign="bottom">XGBoost<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td><td align="left" valign="bottom">LASSO<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></td><td align="left" valign="bottom">Elastic net</td><td align="left" valign="bottom">RSF<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Overall</td><td align="left" valign="top">0.7605</td><td align="left" valign="top">0.7606</td><td align="left" valign="top">0.7007</td><td align="left" valign="top">0.7544</td><td align="left" valign="top">0.7569</td><td align="left" valign="top">0.8141</td></tr><tr><td align="left" valign="top">Appalachia</td><td align="left" valign="top">0.7624</td><td align="left" valign="top">0.7626</td><td align="left" valign="top">0.6971</td><td align="left" valign="top">0.7607</td><td align="left" valign="top">0.7614</td><td align="left" valign="top">0.8131</td></tr><tr><td align="left" valign="top">Non-Appalachia</td><td align="left" valign="top">0.7827</td><td align="left" valign="top">0.7819</td><td align="left" valign="top">0.7103</td><td align="left" valign="top">0.7581</td><td align="left" valign="top">0.7564</td><td align="left" valign="top">0.8101</td></tr><tr><td align="left" valign="top">Early diagnosis</td><td align="left" valign="top">0.7754</td><td align="left" valign="top">0.7755</td><td align="left" valign="top">0.7210</td><td align="left" valign="top">0.6852</td><td align="left" valign="top">0.7310</td><td align="left" valign="top">0.8158</td></tr><tr><td align="left" valign="top">Late diagnosis</td><td align="left" valign="top">0.7417</td><td align="left" valign="top">0.7420</td><td align="left" valign="top">0.7030</td><td align="left" valign="top">0.7369</td><td align="left" valign="top">0.7409</td><td align="left" valign="top">0.7832</td></tr><tr><td align="left" valign="top">White patients</td><td align="left" valign="top">0.7634</td><td align="left" valign="top">0.7633</td><td align="left" valign="top">0.7419</td><td align="left" valign="top">0.7598</td><td align="left" valign="top">0.7595</td><td align="left" valign="top">0.8161</td></tr><tr><td align="left" valign="top">Black patients</td><td align="left" valign="top">0.8037</td><td align="left" valign="top">0.8041</td><td align="left" valign="top">0.6889</td><td align="left" valign="top">0.7449</td><td align="left" valign="top">0.7469</td><td align="left" valign="top">0.7849</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>Analyses are based on a retrospective cohort study of patients with colon cancer using data from the Kentucky Cancer Registry collected between January 1, 2010, and December 31, 2022.</p></fn><fn id="table4fn2"><p><sup>b</sup>AFT: accelerated failure time.</p></fn><fn id="table4fn3"><p><sup>c</sup>XGBoost: Extreme Gradient Boosting.</p></fn><fn id="table4fn4"><p><sup>d</sup>LASSO: least absolute shrinkage and selection operator.</p></fn><fn id="table4fn5"><p><sup>e</sup>RSF: random survival forest.</p></fn></table-wrap-foot></table-wrap><p>The RSF model demonstrated strong overall discriminatory performance, achieving a C-index of 0.8146, indicating a high level of agreement between predicted and observed survival outcomes. Model evaluation across key subgroups showed similar robust performance: patients diagnosed early (0.8175), patients diagnosed late (0.7841), Appalachia (0.8135), non-Appalachia (0.8126), White patients (0.8164), and Black patients (0.7881). The slightly higher C-index in the early diagnosis and White patient subgroups suggests marginally better discrimination in these populations, while the modestly lower score in the late diagnosis and Black patient subgroups still reflects acceptable performance. These results support the generalizability and reliability of the RSF model across diverse demographic and clinical strata. Overall, the model demonstrated stable predictive accuracy and effective risk stratification for colon cancer survival.</p></sec><sec id="s3-2"><title>Survival and Prevalence of Colon Cancer Death</title><p><xref ref-type="fig" rid="figure3">Figure 3</xref> shows the annual incidence and mortality of colon cancer, highlighting temporal trends including a slight dip in 2020 likely due to the COVID-19 pandemic disruption in screening and diagnosis. This decline may reflect delayed detection rather than a true reduction in CRC disease burden. <xref ref-type="fig" rid="figure4">Figure 4</xref> shows Kaplan-Meier curves for CRC survival stratified by each risk factor, highlighting statistically significant differences between the distributions of at least 2 subgroups for each risk factor. Model accuracy for both the LASSO (<italic>&#x03BB;</italic>=0.01) and elastic net (<italic>&#x03BB;</italic>&#x2081;=0.02, <italic>&#x03BB;</italic>&#x2082;=0) procedures was evaluated using leave-one cross-validation to minimize overfitting and enhance predictive performance. Elastic net results closely mirrored those of the LASSO and are omitted for brevity. <xref ref-type="fig" rid="figure5">Figure 5</xref> illustrates the variable importance for the risk factors in the model. The plot highlights the order of importance, with the &#x201C;red&#x201D; boxplots indicating significant predictors of survival. These predictors are ranked based on their contribution to the model&#x2019;s ability to accurately assess survival outcomes.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>A comparison of new colorectal cancer (CRC) cases and deaths across the study period. Analyses are based on a retrospective cohort study of patients with colon cancer using data from the Kentucky Cancer Registry collected between January 1, 2010, and December 31, 2022.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e72665_fig03.png"/></fig><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Kaplan-Meier colon cancer survival curves comparison colorectal cancer (CRC) cancer survival distribution by risk factors and time since diagnosis, with vertical dashes representing 2-, 5-, 10-, and 15-year survival probabilities. Analyses are based on a retrospective cohort study of patients with colon cancer using data from the Kentucky Cancer Registry collected between January 1, 2010, and December 31, 2022.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e72665_fig04.png"/></fig><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>The overall ranking of risk factor importance and for each subgroup, as determined by the random survival forest model. Analyses are based on a retrospective cohort study of patients with colon cancer using data from the Kentucky Cancer Registry collected between January 1, 2010, and December 31, 2022. Overall is the overall model. &#x201C;Overall&#x201D; displays the significant risk factors in a "red" box plot and the nonsignificant ones in a "blue" box plot. &#x201C;Early1&#x201D; or &#x201C;Early2&#x201D; represent individuals diagnosed at early or late age, respectively. &#x201C;App1&#x201D; and &#x201C;App2&#x201D; correspond to results of individuals residing in non-Appalachia and Appalachia regions, respectively. &#x201C;White&#x201D; and &#x201C;Black&#x201D; show the results stratified by White and Black patient subgroups, respectively. Red and blue colored boxplots correspond to significant and nonsignificant risk factors, respectively.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e72665_fig05.png"/></fig><p><xref ref-type="table" rid="table4">Table 4</xref> shows the C-index values for the Cox, AFT, XGBoost, LASSO, elastic net, and RSF models across various subgroups. Among the ML models evaluated, RSF consistently achieved the highest C-index values, indicating superior discriminatory performance and a better overall fit to the survival data. This suggests that RSF is better able to capture complex, nonlinear relationships and interactions among the risk factors, which traditional models may overlook. While classical models like Cox and AFT performed reasonably well, their performance was generally outpaced by the more flexible ensemble-based RSF. These findings highlight the potential value of nonparametric ML approaches in improving predictive accuracy for survival outcomes.</p><p><xref ref-type="fig" rid="figure1">Figure 1</xref> displays the Brier scores by population subgroups for the Cox proportional hazards model. As shown, the Brier scores for the Cox model range between 0 and 0.25, indicating reasonably good predictive accuracy across the subgroups. Lower Brier scores reflect better performance in terms of both discrimination and calibration, suggesting that the Cox model fits the data relatively well. The consistency of low Brier scores across diverse subgroups further supports the model&#x2019;s robustness among the classical methods. These results highlight the suitability of the Cox model for risk prediction in these population subgroups.</p><p><xref ref-type="fig" rid="figure2">Figure 2</xref> shows the evaluation of the predictive models from RSF for overall cohort and subgroups using the Brier score. The overall model showed consistent and stable performance at each point in time, while subgroup analyses indicated slightly higher Brier scores among Black patients, Appalachian residents, and those with late CRC diagnoses, suggesting modestly reduced calibration and predictive ability. However, the differences were not substantial, indicating no major bias in model performance across these groups.</p></sec><sec id="s3-3"><title>Model Results: Overall</title><p>In the Cox model, the most significant predictors of colon cancer survival, listed in order of importance (determined by <italic>P</italic> values), were positive nodes, treatment group, age, tumor size, CS lymph nodes, and tumor grade, with the remaining factors having no significant impact on survival (Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, &#x201C;Overall&#x201D; column). The AFT model identified the same set of key predictors as the Cox model; sex was also significant in the AFT model, but the other predictors mentioned above were ranked in the same order for the AFT model (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, &#x201C;Overall&#x201D; column). However, in the RSF, the importance of risk factors differed slightly, with treatment group, positive nodes, age, clinical tumor size, CS lymph nodes, marital status, race, histology, insurance, cigarette pack years, tumor grade, smoking status, ethnicity, Appalachia, and sex following in descending order (<xref ref-type="fig" rid="figure5">Figure 5</xref>, overall panel). The effects of ethnicity, Appalachia, and sex were relatively small compared to those of other factors. The standardized OOB CRPS estimate was 0.155, with an OOB requested performance error of 0.254, indicating a very good fit of the model.</p><p><xref ref-type="table" rid="table3">Table 3</xref> presents the results for the LASSO model. For instance, the hazard ratio for overall age is 1.035, indicating that for each additional year of age, the hazard of death from colon cancer increased by 3.5%. Patients who underwent surgery (primary and regional) had a 69.1% lower risk of hazard compared to those who refused treatment or had no treatment, representing a significant reduction in risk. The most effective treatment regimen was surgery combined with either chemotherapy or radiation therapy, which showed the greatest benefit compared to all other treatment options. Female patients had a 13.5 lower hazard of colon cancer death compared to male patients (compare <xref ref-type="fig" rid="figure4">Figure 4</xref> panel C). Black patients had an 8.7% higher risk of colon cancer death compared to White patients, even after adjusting for other risk factors in the model.</p></sec><sec id="s3-4"><title>Racial Subgroup</title><p>The significant risk factors for predicting colon cancer survival differ across racial groups, as identified by the AFT (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) and the Cox (Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) models. Among White patients, the most important predictors are treatment, positive nodes, age, tumor size, tumor grade, CS lymph nodes, sex, and histology. For Black patients, however, the key predictors shift slightly, with positive nodes, smoking status, insurance, age, histology, CS lymph nodes, and cigarette pack years emerging as the primary factors. These differences highlight the potential impact of both genetic and lifestyle factors on survival outcomes in different racial groups.</p><p>In the RSF model for the White patients, the risk factors were ranked in the following order of importance: treatment group, positive nodes, age, tumor size, marital status, histology, cigarette pack years, CS lymph nodes, and tumor grade. The standardized OOB CRPS was 0.149, with an OOB requested performance error of 0.241. The rest of the risk factors had minimal impact on survival predictions. For Black patients, the key predictors included positive nodes, treatment group, CS lymph nodes, insurance, age, cigarette pack years, tumor size, and smoking. The standardized OOB CRPS was 0.184, with an OOB requested performance error of 0.327. All other risk factors contributed minimally to influence survival outcomes (<xref ref-type="fig" rid="figure5">Figure 5</xref> panels White and Black patients). The above analysis was not repeated for the &#x201C;Other&#x201D; racial group due to limited data.</p></sec><sec id="s3-5"><title>Geographical Region</title><p>In both the AFT and Cox models, key risk factors for colon cancer survival vary between individuals living in the Appalachia region and those outside of it. For non-Appalachia residents, significant factors include treatment group, positive lymph nodes, age at diagnosis, tumor grade, marital status, and clinical tumor size (in the AFT and Cox models), with histology and CS lymph nodes also emerging as significant factors in the Cox model (Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). In contrast, for Appalachia residents, survival is influenced by treatment group, age at diagnosis, positive lymph nodes, clinical tumor size, marital status, tumor grade, CS lymph node, and histology classification. The main differences between the 2 groups are the inclusion of CS lymph node and histology as significant factors for Appalachia residents, while clinical tumor size and smoking status are less relevant for non-Appalachia patients.</p><p>In the RSF model, the risk factors associated with colon cancer survival differ slightly between individuals living in Appalachia and those in non-Appalachia regions. For non-Appalachia residents (<xref ref-type="fig" rid="figure5">Figure 5</xref>, App1), the most influential factors, in order of importance, include treatment group, age at diagnosis, positive lymph nodes, marital status, CS lymph nodes, race, cigarette pack years, and smoking status. The impact of clinical tumor size was not quantified precisely but appeared to be high. Insurance, histology classification, and tumor grade also had high relevance for survival. The rest of the variables had a minimal impact. The standardized OOB CRPS was 0.151, and the OOB requested performance error was 0.244. In contrast, for patients in Appalachia (<xref ref-type="fig" rid="figure5">Figure 5</xref>, App2), the key significant risk factors were positive lymph nodes, age at diagnosis, treatment group, clinical tumor size, and smoking status. The standardized OOB CRPS was 0.191 and the OOB requested performance error of 0.353. The impact of cigarette pack years, histology, insurance, race, tumor grade, CS lymph nodes, and smoking status was not quantified precisely but appeared to be high. The primary differences lie in the order of importance, with treatment received and positive lymph nodes being important for both non-Appalachia and Appalachian residents, with factors such as marital status, insurance, and clinical tumor size being more prominent outside Appalachia.</p></sec><sec id="s3-6"><title>Early- Versus Late-Age Diagnosis</title><p>In both the Cox and AFT models, the risk factors for colon cancer survival differ slightly between early and late stages. For early diagnosis, the most significant risk factors across both models include positive nodes, treatment group, tumor grade, tumor size, histology, and race. In the Cox model, clinical tumor size is also an important factor, while the AFT model does not highlight this. For late diagnosis, both models identify treatment group, positive nodes, tumor size, marital status, and CS lymph node as key predictors. The Cox model ranked CS lymph node and tumor size higher than marital status, while the AFT model emphasizes CS lymph nodes and marital status with different weights (Tables S1 and S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><p>In the RSF model, the key risk factors for patients diagnosed at early age with colon cancer include positive nodes, treatment group, and tumor size. Other high-priority factors are tumor grade, race, histology, cigarette pack years, insurance, CS lymph nodes, and marital status, while the remaining factors have a minimal impact on survival (<xref ref-type="fig" rid="figure5">Figure 5</xref>, panel Early1). For patients diagnosed at an older age, the key risk factors shift slightly to include treatment group, positive nodes, marital status, clinical tumor size, insurance, cigarette pack years, CS lymph nodes, and histology. Tumor grade is also a high-priority factor. Race, smoking status, sex, ethnicity, and Appalachia status contribute minimally to survival outcomes (<xref ref-type="fig" rid="figure5">Figure 5</xref>, panel Early2).</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Key Findings</title><p>The main findings of this study highlight that several individual and contextual risk factors significantly influence colon cancer survival. Across almost all models, positive sentinel lymph node, age at diagnosis, treatment received, clinical tumor size, tumor grade, smoking, geographic region, and marital status consistently emerged as dominant predictors. The highest risk of death (<xref ref-type="table" rid="table3">Table 3</xref>) was observed among individuals who received no treatment or refused treatment, compared to those who underwent any other form of treatment (primary and regional). Social determinants of health such as insurance (which serves as a proxy for poverty), Appalachia region, marital status, and modifiable risk factors such as smoking significantly impact survival. Not having insurance, smoking, living in the Appalachia region, and being married are associated with higher risk of dying. The comparison of classical and ML models demonstrated that ML approaches, particularly RSF and LASSO, offered improved predictive accuracy and variable importance insights compared to traditional methods. Consistent with prior studies, these results affirm that men have a higher risk of colon cancer death than women [<xref ref-type="bibr" rid="ref56">56</xref>-<xref ref-type="bibr" rid="ref58">58</xref>].</p><p>Overall, smokers had a higher hazard of mortality compared to non-smokers, a finding consistent with the results reported by Tsao et al [<xref ref-type="bibr" rid="ref59">59</xref>]. Additionally, Appalachian residents had higher hazard than non-Appalachians (<xref ref-type="table" rid="table3">Table 3</xref>). In non-Appalachian residents, the hazard of death was higher for those with distant metastasis compared to those with in situ localized cancer diagnosis. In contrast, Appalachian residents had a higher hazard of death with distant metastasis, suggesting an interaction between geography and cancer spread, which may be compared with the findings of Wang et al [<xref ref-type="bibr" rid="ref60">60</xref>].</p></sec><sec id="s4-2"><title>Model Comparisons</title><p>When comparing significant risk factors across racial groups in the Cox model, it was evident that while the type of treatment received was a significant predictor of survival for the White patients, it did not appear to be a useful predictor for the Black patients (Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). This disparity may highlight the possibility that Black patients often seek treatment later in the disease course, potentially reducing the effectiveness of the treatment they receive. However, modifiable risk factors such as smoking, cigarette pack years, and insurance status were found to impact survival among Black patients compared to White patients. This suggests that addressing these factors could help narrow the gap in colon cancer survival between White and Black patients.</p><p>In the RSF model, differences emerged in how treatment and socioeconomic factors (like insurance and smoking history) influence survival outcomes across racial groups. Both groups shared predictors like positive nodes, clinical tumor size, and tumor grade, but White patients&#x2019; survival was more impacted by treatment received than positive node, while Black patients&#x2019; survival was more impacted by positive node than treatment received. Other important risk factors affecting survival include smoking and insurance, which showed differences compared with White patients. This contrast suggests that while biological factors (such as tumor characteristics) are universally relevant, socioeconomic and access disparities exacerbate survival gaps between racial groups. Overall, positive lymph nodes, treatment, age at diagnosis, CS lymph nodes, tumor size, tumor grade, and histology were important risk factors in both Appalachian and non-Appalachian areas of Kentucky. CS lymph nodes, however, were more influential within the non-Appalachian population compared to patients in Appalachia.</p><p>For the Cox model, while positive nodes, treatment group, and histology were consistently significant for both early and late diagnosis, tumor characteristics and social risk factors like marital status varied between the 2 groups (early vs late diagnosis). These findings suggest that we need to integrate critical risk factors into individualized prognostic models.</p></sec><sec id="s4-3"><title>Strengths</title><p>The key strength of this study is its comprehensive comparison of both classical and ML survival models and using large, population-based cancer registry data. By evaluating multiple modeling approaches including classical methods like Cox proportional hazards, AFT models, RSF, LASSO, and elastic net, our study offers valuable insights into the strengths and limitations of each method in predicting key risk factors associated with colon cancer survival. This side-by-side evaluation (<xref ref-type="table" rid="table3">Tables 3</xref> and <xref ref-type="table" rid="table4">4</xref>, Tables S1 and S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, and <xref ref-type="fig" rid="figure1">Figures 1</xref><xref ref-type="fig" rid="figure2"/><xref ref-type="fig" rid="figure3"/><xref ref-type="fig" rid="figure4"/>-<xref ref-type="fig" rid="figure5">5</xref>) enhances our understanding of how different algorithms handle clinical and demographic risk factors, ultimately contributing to more accurate and interpretable survival predictions. The inclusion of subgroup analyses further strengthens the findings by highlighting how predictive performance and risk factors vary across different population subgroups.</p></sec><sec id="s4-4"><title>Limitations and Future Directions</title><p>First, a major limitation of this study is the lack of access to detailed, case-level data, which prevented us from analyzing several important modifiable risk factors such as alcohol consumption, obesity, familial associations or family history of CRC, inflammatory bowel disease, and dietary intake (including consumption of processed and red meats). These factors are well-established contributors to CRC risk and may also influence survival outcomes. The absence of these variables could lead to residual confounding and limits our ability to fully characterize the risk profile of patients. Future studies incorporating more granular lifestyle and clinical data would provide a more comprehensive understanding of survival determinants and allow for the evaluation of modifiable behavioral risk factors.</p></sec><sec id="s4-5"><title>Clinical Implication</title><p>The findings from our survival analysis models (Kaplan-Meier curve, Cox, AFT, XGBoost, RSF, LASSO, and elastic net) provide several actionable insights for both clinical practice and public health. Across all models, consistent predictors such as positive lymph node involvement, tumor size, tumor grade, and age were associated with both early and late diagnoses of CRC. Clinically, this supports the development of risk-stratified care pathways, where patients presenting with these characteristics can be prioritized for more intensive surveillance and earlier intervention. For example, individuals with larger or high-grade tumors may benefit from more frequent imaging or expedited treatment planning. Incorporating these factors into clinical decision-making tools could help optimize resource allocation and improve outcomes through early detection and treatment.</p><p>Beyond clinical implications, the models also highlight social and structural determinants of health that warrant attention. Race and insurance status, which were significant in the AFT and RSF models, suggest potential disparities in access to timely diagnosis. These findings can inform public health strategies such as targeted screening and outreach programs in underserved communities. The significance of marital status, particularly in late diagnosis, points to the role of social support in health care engagement, indicating a need to connect socially isolated individuals with navigators or support services. Additionally, the inclusion of cigarette pack-years and CS lymph node involvement as key factors further supports integrating lifestyle and clinical history into risk prediction frameworks, enhancing early identification of high-risk patients in both clinical and community settings. Together, these findings support the creation of risk-adapted screening, surveillance, and treatment pathways, as well as inform broader policy interventions aimed at improving access and reducing disparities in CRC outcomes.</p></sec><sec id="s4-6"><title>Colon Cancer Survival and Prognosis</title><p>Different subgroups may have varying risk profiles influencing colon cancer survival and prognosis, highlighting the limitations of a one-size-fits-all approach. Understanding subgroup-specific risk factors enables tailored interventions, screening, and treatment strategies for diverse populations. By leveraging results from classical and ML survival models including Cox, AFT, RSF, LASSO, and elastic net methods, clinicians and researchers can identify the most relevant predictors of patients&#x2019; survival outcome and develop individualized risk assessments. For example, the AFT model indicates that among early-onset patients, smokers experience death approximately 10% sooner than never-smokers, whereas in late-onset patients, this difference increases to 13%. Such insights highlight modifiable risk factors like smoking and suggest prioritization for targeted intervention in the higher-risk subgroups.</p><p>Colon cancer survival outcomes are shaped by both modifiable risk factors, such as smoking, insurance status, and geographic disparities, and non-modifiable factors, including age, genetics, and comorbidities. Early detection through screening remains the most effective strategy [<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref62">62</xref>], as it significantly improves survival by enabling timely and effective treatment. Integrating these findings across different subgroups supports more precise, personalized treatment and preventive strategies, ultimately reducing the burden of colon cancer at both individual and population levels.</p></sec><sec id="s4-7"><title>Conclusion</title><p>This study evaluated multiple survival analysis models, including the Cox proportional hazards model (Cox model), the AFT model, XGBoost, RSF, the LASSO regression, and elastic net. Each of these models has distinct strengths and assumptions, making them suitable for different aspects of our research questions. Both LASSO and elastic net are powerful regularization techniques that helped improve the model generalization, interpretability, and predictive accuracy.</p></sec></sec></body><back><ack><p>The authors also thank Jaclyn McDowell, DrPH, at the Kentucky Cancer Registry for her assistance in extracting the colon cancer data and Jerod Stapleton, PhD, for reading through this manuscript.</p></ack><notes><sec><title>Funding</title><p>Manuscript preparation was supported in part by the funds from the Department of Biostatistics at the University of Kentucky.</p></sec><sec><title>Data Availability</title><p>The dataset analyzed during this study is publicly available from the Kentucky Cancer Registry.</p></sec></notes><fn-group><fn fn-type="con"><p>RA conceptualized the study and led the study design, data collection, formal analysis, data interpretation, synthesis, and writing of the original draft and revisions. MEO assisted with drafting the introduction, contributed to interpretation of findings, and reviewed the manuscript. BH provided guidance on data extraction, analysis, interpretation, and reviewed the manuscript. RC contributed to the interpretation of results and manuscript review. OA contributed to conceptualization, provided critical feedback, ensured alignment with journal submission guidelines, and reviewed the manuscript. All authors read and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AFT</term><def><p>accelerated failure time</p></def></def-item><def-item><term id="abb2">AJCC</term><def><p>American Joint Committee on Cancer</p></def></def-item><def-item><term id="abb3">C-index</term><def><p>concordance index</p></def></def-item><def-item><term id="abb4">CRC</term><def><p>colorectal cancer</p></def></def-item><def-item><term id="abb5">CRPS</term><def><p>Continuous Ranked Probability Score</p></def></def-item><def-item><term id="abb6">CS</term><def><p>clinical stage</p></def></def-item><def-item><term id="abb7">KCR</term><def><p>Kentucky Cancer Registry</p></def></def-item><def-item><term id="abb8">LASSO</term><def><p>least absolute shrinkage and selection operator</p></def></def-item><def-item><term id="abb9">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb10">OOB</term><def><p>out-of-bag</p></def></def-item><def-item><term id="abb11">RSF</term><def><p>random survival forest</p></def></def-item><def-item><term id="abb12">SEER</term><def><p>Surveillance, Epidemiology, and End Results</p></def></def-item><def-item><term id="abb13">STROBE </term><def><p>Strengthening the Reporting of Observational Studies in Epidemiology</p></def></def-item><def-item><term id="abb14">TNM</term><def><p>tumor, nodes, and metastasis</p></def></def-item><def-item><term id="abb15">TRIPOD+AI</term><def><p>Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis Using Artificial Intelligence</p></def></def-item><def-item><term id="abb16">XGBoost</term><def><p>Extreme Gradient Boosting</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>P</given-names> </name></person-group><article-title>Global colorectal cancer burden in 2020 and projections to 2040</article-title><source>Transl Oncol</source><year>2021</year><month>10</month><volume>14</volume><issue>10</issue><fpage>101174</fpage><pub-id pub-id-type="doi">10.1016/j.tranon.2021.101174</pub-id><pub-id pub-id-type="medline">34243011</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gandomani</surname><given-names>HS</given-names> </name><name name-style="western"><surname>Yousefi</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Aghajani</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Colorectal cancer in the world: incidence, mortality and risk factors</article-title><source>Biomed Res Ther</source><year>2017</year><volume>4</volume><issue>10</issue><fpage>1656</fpage><lpage>1675</lpage><pub-id pub-id-type="doi">10.15419/bmrat.v4i10.372</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="web"><article-title>Colorectal cancer statistics</article-title><source>Centers for Disease Control and Prevention</source><access-date>2025-02-13</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/colorectal-cancer/statistics/index.html">https://www.cdc.gov/colorectal-cancer/statistics/index.html</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="web"><article-title>Kentucky cancer burden report</article-title><source>Kentucky Cancer Consortium</source><access-date>2025-02-13</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.kycancerc.org/">https://www.kycancerc.org/</ext-link></comment></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>O&#x2019;Shaughnessy</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Sabharwal</surname><given-names>K</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>C</given-names> </name><name name-style="western"><surname>Garcia</surname><given-names>O</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>RC</given-names> </name></person-group><article-title>Disparities in colorectal cancer incidence and mortality in Appalachian Kentucky</article-title><source>J Clin Oncol</source><year>2025</year><month>06</month><volume>43</volume><issue>16_suppl</issue><pub-id pub-id-type="doi">10.1200/JCO.2025.43.16_suppl.e15673</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="web"><article-title>Cancer facts &#x0026; figures 2020</article-title><source>American Cancer Society</source><year>2020</year><access-date>2025-02-13</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancer.org/research/cancer-facts-statistics/all-cancer-facts-figures/cancer-facts-figures-2020.html">https://www.cancer.org/research/cancer-facts-statistics/all-cancer-facts-figures/cancer-facts-figures-2020.html</ext-link></comment></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kenamond</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Mourad</surname><given-names>WF</given-names> </name><name name-style="western"><surname>Randall</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Kaushal</surname><given-names>A</given-names> </name></person-group><article-title>No oncology patient left behind: challenges and solutions in rural radiation oncology</article-title><source>Lancet Reg Health Am</source><year>2022</year><volume>13</volume><fpage>e100293</fpage><pub-id pub-id-type="doi">10.1016/j.lana.2022.100293</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Paskett</surname><given-names>ED</given-names> </name><name name-style="western"><surname>Fisher</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Lengerich</surname><given-names>EJ</given-names> </name><etal/></person-group><article-title>Disparities in underserved white populations: the case of cancer-related disparities in Appalachia</article-title><source>Oncologist</source><year>2011</year><volume>16</volume><issue>8</issue><fpage>1072</fpage><lpage>1081</lpage><pub-id pub-id-type="doi">10.1634/theoncologist.20110145</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wolbert</surname><given-names>T</given-names> </name><name name-style="western"><surname>Leigh</surname><given-names>EC</given-names> </name><name name-style="western"><surname>Barry</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Later stage disease and earlier onset of rectal cancer: epidemiology and outcomes comparison of rectal cancer in a rural Appalachian area to state and national rates</article-title><source>Am Surg</source><year>2018</year><month>07</month><day>1</day><volume>84</volume><issue>7</issue><fpage>1229</fpage><lpage>1235</lpage><pub-id pub-id-type="medline">30064594</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lengerich</surname><given-names>EJ</given-names> </name><name name-style="western"><surname>Tucker</surname><given-names>TC</given-names> </name><name name-style="western"><surname>Powell</surname><given-names>RK</given-names> </name><etal/></person-group><article-title>Cancer incidence in Kentucky, Pennsylvania, and West Virginia: disparities in Appalachia</article-title><source>J Rural Health</source><year>2005</year><volume>21</volume><issue>1</issue><fpage>39</fpage><lpage>47</lpage><pub-id pub-id-type="doi">10.1111/j.17480361.2005.tb00060.x</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="web"><article-title>Colorectal cancer survival statistics</article-title><source>Cancer Research UK</source><access-date>2025-02-13</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancerresearchuk.org">https://www.cancerresearchuk.org</ext-link></comment></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Siegel</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>KD</given-names> </name><name name-style="western"><surname>Jemal</surname><given-names>A</given-names> </name></person-group><article-title>Cancer statistics, 2018</article-title><source>CA Cancer J Clin</source><year>2018</year><month>01</month><volume>68</volume><issue>1</issue><fpage>7</fpage><lpage>30</lpage><pub-id pub-id-type="doi">10.3322/caac.21442</pub-id><pub-id pub-id-type="medline">29313949</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lynch</surname><given-names>HT</given-names> </name><name name-style="western"><surname>de la Chapelle</surname><given-names>A</given-names> </name></person-group><article-title>Hereditary colorectal cancer</article-title><source>N Engl J Med</source><year>2015</year><volume>372</volume><issue>16</issue><fpage>1601</fpage><lpage>1613</lpage><pub-id pub-id-type="doi">10.1056/NEJMra1401510</pub-id><pub-id pub-id-type="medline">26352827</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Giardiello</surname><given-names>FM</given-names> </name><name name-style="western"><surname>Allen</surname><given-names>JI</given-names> </name><name name-style="western"><surname>Axilbund</surname><given-names>JE</given-names> </name><etal/></person-group><article-title>Guidelines on genetic evaluation and management of Lynch syndrome: a consensus statement by the US Multi-society Task Force on colorectal cancer</article-title><source>Am J Gastroenterol</source><year>2014</year><month>08</month><volume>109</volume><issue>8</issue><fpage>1159</fpage><lpage>1179</lpage><pub-id pub-id-type="doi">10.1038/ajg.2014.186</pub-id><pub-id pub-id-type="medline">25070057</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Baena</surname><given-names>R</given-names> </name><name name-style="western"><surname>Salinas</surname><given-names>P</given-names> </name></person-group><article-title>Diet and colorectal cancer</article-title><source>Maturitas</source><year>2015</year><month>03</month><volume>80</volume><issue>3</issue><fpage>258</fpage><lpage>264</lpage><pub-id pub-id-type="doi">10.1016/j.maturitas.2014.12.017</pub-id><pub-id pub-id-type="medline">25619144</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="book"><source>Red Meat and Processed Meat: IARC Monographs on the Evaluation of Carcinogenic Risks to Humans</source><year>2015</year><access-date>2025-02-13</access-date><volume>114</volume><publisher-name>International Agency for Research on Cancer</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://publications.iarc.who.int/Book-And-Report-Series/Iarc-Monographs-On-The-Identification-Of-Carcinogenic-Hazards-To-Humans/Red-Meat-And-Processed-Meat-2018">https://publications.iarc.who.int/Book-And-Report-Series/Iarc-Monographs-On-The-Identification-Of-Carcinogenic-Hazards-To-Humans/Red-Meat-And-Processed-Meat-2018</ext-link></comment></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Menezes</surname><given-names>RF</given-names> </name><name name-style="western"><surname>Bergmann</surname><given-names>A</given-names> </name><name name-style="western"><surname>Thuler</surname><given-names>LCS</given-names> </name></person-group><article-title>Alcohol consumption and risk of cancer: a systematic literature review</article-title><source>Asian Pac J Cancer Prev</source><year>2013</year><volume>14</volume><issue>9</issue><fpage>4965</fpage><lpage>4972</lpage><pub-id pub-id-type="doi">10.7314/apjcp.2013.14.9.4965</pub-id><pub-id pub-id-type="medline">24175760</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mart&#x00ED;nez</surname><given-names>ME</given-names> </name><name name-style="western"><surname>McPherson</surname><given-names>RS</given-names> </name><name name-style="western"><surname>Annegers</surname><given-names>JF</given-names> </name><name name-style="western"><surname>Levin</surname><given-names>B</given-names> </name></person-group><article-title>Cigarette smoking and alcohol consumption as risk factors for colorectal adenomatous polyps</article-title><source>J Natl Cancer Inst</source><year>1995</year><month>02</month><day>15</day><volume>87</volume><issue>4</issue><fpage>274</fpage><lpage>279</lpage><pub-id pub-id-type="doi">10.1093/jnci/87.4.274</pub-id><pub-id pub-id-type="medline">7707418</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Friedenreich</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Shaw</surname><given-names>E</given-names> </name><name name-style="western"><surname>Neilson</surname><given-names>HK</given-names> </name><name name-style="western"><surname>Brenner</surname><given-names>DR</given-names> </name></person-group><article-title>Epidemiology and biology of physical activity and cancer recurrence</article-title><source>J Mol Med (Berl)</source><year>2017</year><month>10</month><volume>95</volume><issue>10</issue><fpage>1029</fpage><lpage>1041</lpage><pub-id pub-id-type="doi">10.1007/s00109-017-1558-9</pub-id><pub-id pub-id-type="medline">28620703</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wiseman</surname><given-names>M</given-names> </name></person-group><article-title>The second World Cancer Research Fund/American Institute for Cancer Research expert report. Food, nutrition, physical activity, and the prevention of cancer: a global perspective</article-title><source>Proc Nutr Soc</source><year>2008</year><month>08</month><volume>67</volume><issue>3</issue><fpage>253</fpage><lpage>256</lpage><pub-id pub-id-type="doi">10.1017/S002966510800712X</pub-id><pub-id pub-id-type="medline">18452640</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ma</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Obesity and risk of colorectal cancer: a systematic review of prospective studies</article-title><source>PLoS ONE</source><year>2013</year><volume>8</volume><issue>1</issue><fpage>e53916</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0053916</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Giovannucci</surname><given-names>E</given-names> </name></person-group><article-title>Nutrition, insulin, insulin-like growth factors and cancer</article-title><source>Horm Metab Res</source><year>2003</year><volume>35</volume><issue>11-12</issue><fpage>694</fpage><lpage>704</lpage><pub-id pub-id-type="doi">10.1055/s-2004-814147</pub-id><pub-id pub-id-type="medline">14710348</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gibson</surname><given-names>TM</given-names> </name><name name-style="western"><surname>Park</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Robien</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Body mass index and risk of second obesity-associated cancers after colorectal cancer: a pooled analysis of prospective cohort studies</article-title><source>J Clin Oncol</source><year>2014</year><month>12</month><day>10</day><volume>32</volume><issue>35</issue><fpage>4004</fpage><lpage>4011</lpage><pub-id pub-id-type="doi">10.1200/JCO.2014.56.8444</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kahi</surname><given-names>CJ</given-names> </name><name name-style="western"><surname>Imperiale</surname><given-names>TF</given-names> </name><name name-style="western"><surname>Juliar</surname><given-names>BE</given-names> </name><name name-style="western"><surname>Rex</surname><given-names>DK</given-names> </name></person-group><article-title>Effect of screening colonoscopy on colorectal cancer incidence and mortality</article-title><source>Clin Gastroenterol Hepatol</source><year>2009</year><month>07</month><volume>7</volume><issue>7</issue><fpage>770</fpage><lpage>775</lpage><pub-id pub-id-type="doi">10.1016/j.cgh.2008.12.030</pub-id><pub-id pub-id-type="medline">19268269</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Siegel</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>KD</given-names> </name><name name-style="western"><surname>Goding Sauer</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Colorectal cancer statistics, 2020</article-title><source>CA A Cancer J Clinicians</source><year>2020</year><month>05</month><volume>70</volume><issue>3</issue><fpage>145</fpage><lpage>164</lpage><pub-id pub-id-type="doi">10.3322/caac.21601</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shia</surname><given-names>J</given-names> </name><name name-style="western"><surname>Klimstra</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Bagci</surname><given-names>P</given-names> </name><name name-style="western"><surname>Basturk</surname><given-names>O</given-names> </name><name name-style="western"><surname>Adsay</surname><given-names>NV</given-names> </name></person-group><article-title>TNM staging of colorectal carcinoma: issues and caveats</article-title><source>Semin Diagn Pathol</source><year>2012</year><month>08</month><volume>29</volume><issue>3</issue><fpage>142</fpage><lpage>153</lpage><pub-id pub-id-type="doi">10.1053/j.semdp.2012.02.001</pub-id><pub-id pub-id-type="medline">23062421</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Albadari</surname><given-names>N</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>W</given-names> </name></person-group><article-title>Deciphering treatment resistance in metastatic colorectal cancer: roles of drug transports, EGFR mutations, and HGF/c-MET signaling</article-title><source>Front Pharmacol</source><year>2024</year><volume>14</volume><fpage>1340401</fpage><pub-id pub-id-type="doi">10.3390/ijms24021344</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="web"><article-title>SEER summary stage manual 2018</article-title><source>National Cancer Institute Surveillance, Epidemiology, and End Results Program</source><access-date>2025-02-13</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://seer.cancer.gov/tools/ssm/2018-Summary-Stage-Manual.pdf">https://seer.cancer.gov/tools/ssm/2018-Summary-Stage-Manual.pdf</ext-link></comment></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rawla</surname><given-names>P</given-names> </name><name name-style="western"><surname>Barsouk</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hadjinicolaou</surname><given-names>AV</given-names> </name><name name-style="western"><surname>Barsouk</surname><given-names>A</given-names> </name></person-group><article-title>Immunotherapies and targeted therapies in the treatment of metastatic colorectal cancer</article-title><source>Med Sci (Basel)</source><year>2019</year><month>07</month><day>30</day><volume>7</volume><issue>8</issue><fpage>83</fpage><pub-id pub-id-type="doi">10.3390/medsci7080083</pub-id><pub-id pub-id-type="medline">31366129</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Biller</surname><given-names>LH</given-names> </name><name name-style="western"><surname>Schrag</surname><given-names>D</given-names> </name></person-group><article-title>Diagnosis and treatment of metastatic colorectal cancer: a review</article-title><source>JAMA</source><year>2021</year><month>02</month><day>16</day><volume>325</volume><issue>7</issue><fpage>669</fpage><lpage>685</lpage><pub-id pub-id-type="doi">10.1001/jama.2021.0106</pub-id><pub-id pub-id-type="medline">33591350</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Underwood</surname><given-names>PW</given-names> </name><name name-style="western"><surname>Ruff</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Pawlik</surname><given-names>TM</given-names> </name></person-group><article-title>Update on targeted therapy and immunotherapy for metastatic colorectal cancer</article-title><source>Cells</source><year>2024</year><month>01</month><day>28</day><volume>13</volume><issue>3</issue><fpage>245</fpage><pub-id pub-id-type="doi">10.3390/cells13030245</pub-id><pub-id pub-id-type="medline">38334637</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Siegel</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>KD</given-names> </name><name name-style="western"><surname>Wagle</surname><given-names>NS</given-names> </name><name name-style="western"><surname>Jemal</surname><given-names>A</given-names> </name></person-group><article-title>Cancer statistics, 2023</article-title><source>CA A Cancer J Clinicians</source><year>2023</year><month>01</month><volume>73</volume><issue>1</issue><fpage>17</fpage><lpage>48</lpage><pub-id pub-id-type="doi">10.3322/caac.21763</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kourou</surname><given-names>K</given-names> </name><name name-style="western"><surname>Exarchos</surname><given-names>TP</given-names> </name><name name-style="western"><surname>Exarchos</surname><given-names>KP</given-names> </name><name name-style="western"><surname>Karamouzis</surname><given-names>MV</given-names> </name><name name-style="western"><surname>Fotiadis</surname><given-names>DI</given-names> </name></person-group><article-title>Machine learning applications in cancer prognosis and prediction</article-title><source>Comput Struct Biotechnol J</source><year>2015</year><volume>13</volume><fpage>8</fpage><lpage>17</lpage><pub-id pub-id-type="doi">10.1016/j.csbj.2014.11.005</pub-id><pub-id pub-id-type="medline">25750696</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alboaneen</surname><given-names>D</given-names> </name><name name-style="western"><surname>Alqarni</surname><given-names>R</given-names> </name><name name-style="western"><surname>Alqahtani</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Predicting colorectal cancer using machine and deep learning algorithms: challenges and opportunities</article-title><source>Big Data Cogn Comput</source><year>2023</year><volume>7</volume><issue>2</issue><fpage>74</fpage><pub-id pub-id-type="doi">10.3390/bdcc7020074</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>P</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Reddy</surname><given-names>CK</given-names> </name></person-group><article-title>Machine learning for survival analysis</article-title><source>ACM Comput Surv</source><year>2019</year><month>11</month><day>30</day><volume>51</volume><issue>6</issue><fpage>1</fpage><lpage>36</lpage><pub-id pub-id-type="doi">10.1145/3214306</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kolla</surname><given-names>L</given-names> </name><name name-style="western"><surname>Parikh</surname><given-names>RB</given-names> </name></person-group><article-title>Uses and limitations of artificial intelligence for oncology</article-title><source>Cancer</source><year>2024</year><month>06</month><day>15</day><volume>130</volume><issue>12</issue><fpage>2101</fpage><lpage>2107</lpage><pub-id pub-id-type="doi">10.1002/cncr.35307</pub-id><pub-id pub-id-type="medline">38554271</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>J</given-names> </name><name name-style="western"><surname>Li</surname><given-names>M</given-names> </name><name name-style="western"><surname>Aparasu</surname><given-names>RR</given-names> </name></person-group><article-title>Application of machine learning in predicting survival outcomes involving real-world data: a scoping review</article-title><source>BMC Med Res Methodol</source><year>2023</year><month>11</month><day>13</day><volume>23</volume><issue>1</issue><fpage>268</fpage><pub-id pub-id-type="doi">10.1186/s12874-023-02078-1</pub-id><pub-id pub-id-type="medline">37957593</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Glare</surname><given-names>P</given-names> </name><name name-style="western"><surname>Virik</surname><given-names>K</given-names> </name><name name-style="western"><surname>Jones</surname><given-names>M</given-names> </name><etal/></person-group><article-title>A systematic review of physicians&#x2019; survival predictions in terminally ill cancer patients</article-title><source>Ann Palliat Med</source><year>2003</year><volume>1</volume><issue>2</issue><fpage>141</fpage><lpage>149</lpage><pub-id pub-id-type="doi">10.21037/apm.2019.09.02</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schaffar</surname><given-names>R</given-names> </name><name name-style="western"><surname>Rapiti</surname><given-names>E</given-names> </name><name name-style="western"><surname>Rachet</surname><given-names>B</given-names> </name><name name-style="western"><surname>Woods</surname><given-names>L</given-names> </name></person-group><article-title>Accuracy of cause of death data routinely recorded in a population-based cancer registry: impact on cause-specific survival and validation using the Geneva Cancer Registry</article-title><source>BMC Cancer</source><year>2013</year><month>12</month><day>27</day><volume>13</volume><issue>1</issue><fpage>609</fpage><pub-id pub-id-type="doi">10.1186/1471-2407-13-609</pub-id><pub-id pub-id-type="medline">24373194</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brenner</surname><given-names>H</given-names> </name><name name-style="western"><surname>Hakulinen</surname><given-names>T</given-names> </name></person-group><article-title>Implications of incomplete registration of deaths on long-term survival estimates from population-based cancer registries</article-title><source>Int J Cancer</source><year>2009</year><month>07</month><day>15</day><volume>125</volume><issue>2</issue><fpage>432</fpage><lpage>437</lpage><pub-id pub-id-type="doi">10.1002/ijc.24344</pub-id><pub-id pub-id-type="medline">19422045</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Howlader</surname><given-names>N</given-names> </name><name name-style="western"><surname>Mariotto</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Woloshin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Schwartz</surname><given-names>LM</given-names></name></person-group><article-title>Providing clinicians and patients with actual prognosis: SEER cancer survival statistics at the point of care</article-title><source>J Natl Cancer Inst Monogr</source><year>2014</year><month>08</month><volume>2014</volume><issue>49</issue><fpage>282</fpage><lpage>289</lpage><pub-id pub-id-type="doi">10.1093/jncimonographs/lgu022</pub-id><pub-id pub-id-type="medline">25417239</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Amin</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Edge</surname><given-names>SB</given-names> </name><name name-style="western"><surname>Greene</surname><given-names>FL</given-names> </name><etal/></person-group><source>AJCC Cancer Staging Manual</source><year>2017</year><access-date>2025-11-17</access-date><edition>8</edition><publisher-name>Springer</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://link.springer.com/book/9783319406176">https://link.springer.com/book/9783319406176</ext-link></comment></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="web"><article-title>SEER summary staging manual 2000</article-title><source>National Cancer Institute Surveillance, Epidemiology, and End Results Program</source><year>2001</year><access-date>2025-09-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://seer.cancer.gov/tools/ssm/ssm2000/">https://seer.cancer.gov/tools/ssm/ssm2000/</ext-link></comment></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kaplan</surname><given-names>EL</given-names> </name><name name-style="western"><surname>Meier</surname><given-names>P</given-names> </name></person-group><article-title>Nonparametric estimation from incomplete observations</article-title><source>J Am Stat Assoc</source><year>1958</year><month>06</month><volume>53</volume><issue>282</issue><fpage>457</fpage><lpage>481</lpage><pub-id pub-id-type="doi">10.1080/01621459.1958.10501452</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cox</surname><given-names>DR</given-names> </name></person-group><article-title>Regression models and life-tables</article-title><source>J R Stat Soc Series B Stat Methodol</source><year>1972</year><month>01</month><day>1</day><volume>34</volume><issue>2</issue><fpage>187</fpage><lpage>202</lpage><pub-id pub-id-type="doi">10.1111/j.2517-6161.1972.tb00899.x</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wei</surname><given-names>LJ</given-names> </name></person-group><article-title>The accelerated failure time model: a useful alternative to the Cox regression model in survival analysis</article-title><source>Stat Med</source><year>1992</year><volume>11</volume><issue>14-15</issue><fpage>1871</fpage><lpage>1879</lpage><pub-id pub-id-type="doi">10.1002/sim.4780111409</pub-id><pub-id pub-id-type="medline">1480879</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>T</given-names> </name><name name-style="western"><surname>Guestrin</surname><given-names>C</given-names> </name></person-group><article-title>XGBoost: a scalable tree boosting system</article-title><source>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source><year>2016</year><publisher-name>Association for Computing Machinery</publisher-name><fpage>785</fpage><lpage>794</lpage><pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ishwaran</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kogalur</surname><given-names>UB</given-names> </name><name name-style="western"><surname>Blackstone</surname><given-names>EH</given-names> </name><name name-style="western"><surname>Lauer</surname><given-names>MS</given-names> </name></person-group><article-title>Random survival forests</article-title><source>Ann Appl Stat</source><year>2008</year><volume>2</volume><issue>3</issue><fpage>841</fpage><lpage>860</lpage><pub-id pub-id-type="doi">10.1214/08-AOAS169</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tibshirani</surname><given-names>R</given-names> </name></person-group><article-title>Regression shrinkage and selection via the Lasso</article-title><source>J R Stat Soc Series B Stat Methodol</source><year>1996</year><month>01</month><day>1</day><volume>58</volume><issue>1</issue><fpage>267</fpage><lpage>288</lpage><pub-id pub-id-type="doi">10.1111/j.2517-6161.1996.tb02080.x</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zou</surname><given-names>H</given-names> </name><name name-style="western"><surname>Hastie</surname><given-names>T</given-names> </name></person-group><article-title>Regularization and variable selection via the elastic net</article-title><source>J R Stat Soc Series B Stat Methodol</source><year>2005</year><month>04</month><day>1</day><volume>67</volume><issue>2</issue><fpage>301</fpage><lpage>320</lpage><pub-id pub-id-type="doi">10.1111/j.1467-9868.2005.00503.x</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="web"><person-group person-group-type="author"><collab>R Core Team</collab></person-group><article-title>R: a language and environment for statistical computing</article-title><source>R Foundation for Statistical Computing</source><access-date>2025-11-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.R-project.org/">https://www.R-project.org/</ext-link></comment></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="web"><article-title>Python: version 311</article-title><source>Python Software Foundation</source><access-date>2025-09-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.python.org">https://www.python.org</ext-link></comment></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>von Elm</surname><given-names>E</given-names> </name><name name-style="western"><surname>Altman</surname><given-names>DG</given-names> </name><name name-style="western"><surname>Egger</surname><given-names>M</given-names> </name><etal/></person-group><article-title>The Strengthening the Reporting of Observational Studies in Epidemiology (STROBE) Statement: guidelines for reporting observational studies</article-title><source>Int J Surg</source><year>2014</year><month>12</month><volume>12</volume><issue>12</issue><fpage>1495</fpage><lpage>1499</lpage><pub-id pub-id-type="doi">10.1016/j.ijsu.2014.07.013</pub-id><pub-id pub-id-type="medline">25046131</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Benchimol</surname><given-names>EI</given-names> </name><name name-style="western"><surname>Smeeth</surname><given-names>L</given-names> </name><name name-style="western"><surname>Guttmann</surname><given-names>A</given-names> </name><etal/></person-group><article-title>The REporting of studies Conducted using Observational Routinely-collected health Data (RECORD) statement</article-title><source>PLoS Med</source><year>2015</year><month>10</month><volume>12</volume><issue>10</issue><fpage>e1001885</fpage><pub-id pub-id-type="doi">10.1371/journal.pmed.1001885</pub-id><pub-id pub-id-type="medline">26440803</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Collins</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Moons</surname><given-names>KGM</given-names> </name><name name-style="western"><surname>Dhiman</surname><given-names>P</given-names> </name><etal/></person-group><article-title>TRIPOD+AI statement: updated guidance for reporting clinical prediction models that use regression or machine learning methods</article-title><source>BMJ</source><year>2024</year><month>04</month><day>16</day><volume>385</volume><fpage>e078378</fpage><pub-id pub-id-type="doi">10.1136/bmj-2023-078378</pub-id><pub-id pub-id-type="medline">38626948</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Singh</surname><given-names>PN</given-names> </name><name name-style="western"><surname>Fraser</surname><given-names>GE</given-names> </name></person-group><article-title>Dietary risk factors for colon cancer in a low-risk population</article-title><source>Am J Epidemiol</source><year>1998</year><month>10</month><day>15</day><volume>148</volume><issue>8</issue><fpage>761</fpage><lpage>774</lpage><pub-id pub-id-type="doi">10.1093/oxfordjournals.aje.a009697</pub-id><pub-id pub-id-type="medline">9786231</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Giovannucci</surname><given-names>E</given-names> </name></person-group><article-title>Obesity, gender, and colon cancer</article-title><source>Gut</source><year>2002</year><month>08</month><volume>51</volume><issue>2</issue><fpage>147</fpage><pub-id pub-id-type="doi">10.1136/gut.51.2.147</pub-id><pub-id pub-id-type="medline">12117867</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ning</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Giovannucci</surname><given-names>EL</given-names> </name></person-group><article-title>A quantitative analysis of body mass index and colorectal cancer: findings from 56 observational studies</article-title><source>Obes Rev</source><year>2010</year><month>01</month><volume>11</volume><issue>1</issue><fpage>19</fpage><lpage>30</lpage><pub-id pub-id-type="doi">10.1111/j.1467-789X.2009.00613.x</pub-id><pub-id pub-id-type="medline">19538439</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tsoi</surname><given-names>KKF</given-names> </name><name name-style="western"><surname>Pau</surname><given-names>CYY</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>WKK</given-names> </name><name name-style="western"><surname>Chan</surname><given-names>FKL</given-names> </name><name name-style="western"><surname>Griffiths</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sung</surname><given-names>JJY</given-names> </name></person-group><article-title>Cigarette smoking and the risk of colorectal cancer: a meta-analysis of prospective cohort studies</article-title><source>Clin Gastroenterol Hepatol</source><year>2009</year><month>06</month><volume>7</volume><issue>6</issue><fpage>682</fpage><lpage>688</lpage><pub-id pub-id-type="doi">10.1016/j.cgh.2009.02.016</pub-id><pub-id pub-id-type="medline">19245853</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>CC</given-names> </name><name name-style="western"><surname>Farmer</surname><given-names>T</given-names> </name><name name-style="western"><surname>Garland-Kledzik</surname><given-names>M</given-names> </name><name name-style="western"><surname>Magge</surname><given-names>DR</given-names> </name></person-group><article-title>Disparities in advanced stage colorectal cancer outcomes in Appalachia: a comprehensive review</article-title><source>Am Surg</source><year>2025</year><month>04</month><volume>91</volume><issue>4</issue><fpage>633</fpage><lpage>638</lpage><pub-id pub-id-type="doi">10.1177/00031348241312124</pub-id><pub-id pub-id-type="medline">39749412</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="web"><article-title>Colorectal cancer screening (PDQ&#x00AE;)</article-title><source>National Cancer Institute</source><year>2024</year><access-date>2025-02-13</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancer.gov/types/colorectal/hp/colorectal-screening-pdq">https://www.cancer.gov/types/colorectal/hp/colorectal-screening-pdq</ext-link></comment></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="web"><article-title>Colorectal cancer prevention (PDQ&#x00AE;)</article-title><source>National Cancer Institute</source><year>2024</year><access-date>2025-02-13</access-date><comment><ext-link ext-link-type="uri" xlink:href="http://www.cancer.gov/cancertopics/pdq/prevention/colorectal/HealthProfessional/page2#Section_995">http://www.cancer.gov/cancertopics/pdq/prevention/colorectal/HealthProfessional/page2#Section_995</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Parameter estimates from both the accelerated time model and Cox model, shown for the overall cohort and for each subgroup.</p><media xlink:href="cancer_v11i1e72665_app1.docx" xlink:title="DOCX File, 49 KB"/></supplementary-material></app-group></back></article>