<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Cancer</journal-id><journal-id journal-id-type="publisher-id">cancer</journal-id><journal-id journal-id-type="index">21</journal-id><journal-title>JMIR Cancer</journal-title><abbrev-journal-title>JMIR Cancer</abbrev-journal-title><issn pub-type="epub">2369-1999</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v11i1e71937</article-id><article-id pub-id-type="doi">10.2196/71937</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Association Between Comorbidity Clusters and Mortality in Patients With Cancer: Predictive Modeling Using Machine Learning Approaches of Data From the United States and Hong Kong</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Lam</surname><given-names>Chun Sing</given-names></name><degrees>BPharm, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hua</surname><given-names>Rong</given-names></name><degrees>MPhil</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Loong</surname><given-names>Herbert Ho-Fung</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ngan</surname><given-names>Chun-Kit</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Cheung</surname><given-names>Yin Ting</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff4">4</xref></contrib></contrib-group><aff id="aff1"><institution>School of Pharmacy, Faculty of Medicine, Chinese University of Hong Kong</institution><addr-line>8th Floor, Lo Kwee-Seong Integrated Biomedical Sciences Building, Area 39, The Chinese University of Hong Kong, Shatin, N.T</addr-line><addr-line>Hong Kong</addr-line><country>China</country></aff><aff id="aff2"><institution>Department of Clinical Oncology, Faculty of Medicine, Chinese University of Hong Kong</institution><addr-line>Hong Kong</addr-line><country>China</country></aff><aff id="aff3"><institution>Data Science Program, Worcester Polytechnic Institute</institution><addr-line>Worcester</addr-line><country>United States</country></aff><aff id="aff4"><institution>Hong Kong Hub of Pediatric Excellence, Chinese University of Hong Kong</institution><addr-line>Hong Kong</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Cahill</surname><given-names>Naomi</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Afolabi</surname><given-names>Boluwatife</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>David</surname><given-names>Temitope</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Yin Ting Cheung, PhD, School of Pharmacy, Faculty of Medicine, Chinese University of Hong Kong, 8th Floor, Lo Kwee-Seong Integrated Biomedical Sciences Building, Area 39, The Chinese University of Hong Kong, Shatin, N.T, Hong Kong, China, 852 39436833; <email>yinting.cheung@cuhk.edu.hk</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>16</day><month>7</month><year>2025</year></pub-date><volume>11</volume><elocation-id>e71937</elocation-id><history><date date-type="received"><day>30</day><month>01</month><year>2025</year></date><date date-type="rev-recd"><day>25</day><month>05</month><year>2025</year></date><date date-type="accepted"><day>26</day><month>05</month><year>2025</year></date></history><copyright-statement>&#x00A9; Chun Sing Lam, Rong Hua, Herbert Ho-Fung Loong, Chun-Kit Ngan, Yin Ting Cheung. Originally published in JMIR Cancer (<ext-link ext-link-type="uri" xlink:href="https://cancer.jmir.org">https://cancer.jmir.org</ext-link>), 16.7.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Cancer, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://cancer.jmir.org/">https://cancer.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://cancer.jmir.org/2025/1/e71937"/><abstract><sec><title>Background</title><p>Patients with cancer and cancer survivors often experience multiple chronic health conditions, which can impact symptom burden and treatment outcomes. Despite the high prevalence of multimorbidity, research on cancer prognosis has predominantly focused on cancers in isolation. There is growing interest in machine learning techniques for cancer studies. However, these methods have not been applied in the context of supportive care for patients with cancer who have multimorbidity. Furthermore, few studies have investigated the associations between comorbidity clusters and mortality outcomes.</p></sec><sec><title>Objective</title><p>This study investigated comorbidity clusters among patients with cancer using machine learning and examined their associations with mortality outcomes in two large representative samples from the United States and Hong Kong.</p></sec><sec sec-type="methods"><title>Methods</title><p>This study used data from the National Health and Nutrition Examination Survey (NHANES) and the Hospital Authority Data Collaboration Laboratory (HADCL). Participants aged &#x2265;20 years with a history of cancer were included. The study used a two-step framework to identify clusters of comorbidities in NHANES. In the first step, we used four machine learning techniques, including the Bernoulli mixture model and partition-based methods, to cluster the comorbidities. In the second step, domain experts reviewed and ranked the identified clusters to ensure clinical relevance. The clusters that had the highest average rank were selected for further analysis. The associations between comorbidity clusters and mortality outcomes were analyzed using Cox proportional hazards models. We conducted an external validation to evaluate the generalizability of the clusters identified in the NHANES cohort and their associations with mortality using HADCL. The same number of clusters was replicated based on the distinctive patterns and distribution of comorbidities observed within each cluster.</p></sec><sec sec-type="results"><title>Results</title><p>The study included 4390 participants in NHANES and 12,484 participants in HADCL. Four comorbidity clusters were identified: low comorbidity, metabolic, cardiovascular disease (CVD), and respiratory. In NHANES, participants in the respiratory cluster had the highest risk of all-cause mortality (adjusted hazard ratio [aHR] 1.62, 95% CI 1.26&#x2010;2.08; <italic>P&#x003C;</italic>.001), followed by the CVD cluster (aHR 1.50, 95% CI 1.26&#x2010;1.80; <italic>P&#x003C;</italic>.001) compared to the low comorbidity cluster. The 3 clusters were associated with higher risks of CVD-related mortality (aHR 1.48&#x2010;3.05, 95% CI 1.14-4.07; <italic>P</italic>&#x003C;.003). The effects of comorbidity clusters on mortality were modified by income-to-poverty ratio (<italic>P</italic> for interaction=.04), diet quality (<italic>P</italic> for interaction=.02), and cancer prognosis (<italic>P</italic> for interaction=.005). In the HADCL (validation) cohort, participants in the respiratory and CVD clusters had a higher risk of all-cause mortality.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>High comorbidity burden clusters showed increased all-cause and CVD-related mortality in patients with cancer. These findings highlight the significance of considering comorbidity burden in cancer care. Machine learning approaches can provide valuable insights into complex multimorbidity profiles. Further research is needed to deepen understanding of the relationships between multimorbidity and cancer-specific outcomes.</p></sec></abstract><kwd-group><kwd>comorbidity</kwd><kwd>multimorbidity</kwd><kwd>machine learning</kwd><kwd>cluster</kwd><kwd>clustering</kwd><kwd>cancer</kwd><kwd>mortality</kwd><kwd>oncology</kwd><kwd>multiple chronic conditions</kwd><kwd>metabolic</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Advancements in cancer treatment have significantly increased survival rates and life expectancy for patients with cancer [<xref ref-type="bibr" rid="ref1">1</xref>]. However, survivors may also experience multiple chronic health conditions. The prevalence of multimorbidity, which refers to the presence of two or more medical conditions simultaneously, is steadily rising with improvements in life expectancy [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. In the United States, 40% of patients with cancer have at least one other chronic condition, and 15% have two or more comorbidities [<xref ref-type="bibr" rid="ref4">4</xref>]. Comorbidities are believed to influence cancer detection, treatment uptake, and treatment toxicity [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Hence, there is an urgent need to shift the focus of health care from individual diseases to a more comprehensive approach that considers clusters of medical conditions [<xref ref-type="bibr" rid="ref2">2</xref>]. Despite the high prevalence of multimorbidity, clinical and epidemiological research on cancer prognosis has largely focused on cancers in isolation. It is crucial to recognize that the co-occurrence of chronic health conditions can impact symptom burden and treatment outcomes in patients with cancer [<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>Recently, there has been growing interest in machine learning techniques, including unsupervised and supervised learning, for use in cancer detection, classification, staging, and treatment evaluation [<xref ref-type="bibr" rid="ref8">8</xref>]. However, these methods have not been extensively applied in the context of supportive care for patients with cancer who have multimorbidity. Several studies have used clustering methods or factor analysis to identify clusters of chronic conditions or symptoms among patients with cancer [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref12">12</xref>], but few studies have investigated the associations between these clusters and survival outcomes. A rare example is the study of Hahn et al [<xref ref-type="bibr" rid="ref13">13</xref>], who used latent class analysis to identify 4 comorbidity classes and found that clusters characterized by cardiovascular diseases (CVDs), diabetes, and chronic obstructive pulmonary disease were associated with worse overall survival rates in patients with colorectal cancer. It is currently unclear whether similar associations hold for people with other cancers.</p><p>There is a clear need to identify comorbidity clusters to provide prognostic information regarding cancer and coexisting health conditions [<xref ref-type="bibr" rid="ref5">5</xref>]. Such information could greatly assist in making treatment decisions for patients with cancer who have comorbidities. Therefore, the objective of this predictive modeling study was to investigate clusters of comorbidities among patients with cancer in a nationally representative sample and examine their associations with survival outcomes. We also attempted to validate the association finding between comorbidity clustering patterns identified from machine learning and mortality using another large representative cohort from a different geographical location.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><p>The reporting of the study adheres to the Guidelines for Developing and Reporting Machine Learning Predictive Models in Biomedical Research (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) [<xref ref-type="bibr" rid="ref14">14</xref>].</p><sec id="s2-1"><title>Part 1: Cluster Identification (National Health and Nutrition Examination Survey Data)</title><sec id="s2-1-1"><title>Study Population</title><p>This retrospective study used data from 10 National Health and Nutrition Examination Survey (NHANES) survey cycles, a periodic cross-sectional survey conducted from 1999 to 2018 [<xref ref-type="bibr" rid="ref15">15</xref>]. The NHANES assessed the health and nutritional status of a nationally representative sample of the civilian population in the United States. Detailed information about the sampling methodology has been reported elsewhere [<xref ref-type="bibr" rid="ref15">15</xref>]. The NHANES was approved by the National Center for Health Statistics Institutional Review Board, and informed consent was obtained from all participants.</p><p>We included participants aged &#x2265;20 years with a self-reported history of cancer in this study. They were asked, &#x201C;Have you ever been told by a doctor or other health professional that you had cancer or a malignancy of any kind?&#x201D; A positive response to this question indicated a cancer diagnosis. We excluded participants if they (1) were diagnosed solely with nonmelanoma skin cancer and had no other cancer types or (2) did not report the age at which they were diagnosed with cancer.</p></sec><sec id="s2-1-2"><title>Covariates</title><p>Information on sociodemographic and lifestyle characteristics was collected through at-home interviews. This included the participants&#x2019; sex, age, ethnicity, education level, income level, smoking status, alcohol consumption, physical activity, diet, and supplement use (Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Body weight and height measurements were taken at a mobile examination center. The Healthy Eating Index (HEI) score, a validated measure of diet quality, was calculated using dietary recall data to evaluate conformance with federal dietary guidelines in the United States [<xref ref-type="bibr" rid="ref16">16</xref>]. They also provided information about their age at cancer diagnosis and the type of cancer diagnosed. We further classified the cancer diagnoses based on their prognosis according to US statistics [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>].</p></sec><sec id="s2-1-3"><title>Ascertainment of Comorbidities</title><p>Fifteen specific conditions were consistently assessed in all waves of the survey. These conditions included CVDs (ie, congestive heart failure, coronary heart diseases, angina, heart attack [myocardial infarction], and stroke) [<xref ref-type="bibr" rid="ref19">19</xref>], metabolic syndromes (hypertension, diabetes, and hyperlipidemia) [<xref ref-type="bibr" rid="ref20">20</xref>], respiratory diseases (chronic bronchitis, asthma, and emphysema), arthritis, liver conditions, thyroid problems, and kidney disease. For most comorbidities, the participants were categorized as having a specific condition if they answered &#x201C;Yes&#x201D; to the question: &#x201C;Has a doctor or other health professional ever told you that you have [condition]?&#x201D; In addition to self-reported information, this study also defined diabetes (fasting glucose level or glycated hemoglobin A<sub>1c</sub> level) and hypertension (systolic or diastolic blood pressure) based on quantitative measurements.</p></sec><sec id="s2-1-4"><title>Clustering of Comorbidities</title><p>The study used a 2-step framework to identify clusters of comorbidities. In the first step, we used 4 machine learning techniques to cluster the comorbidities based on the binary (categorical) nature of the comorbidity data. One of these was the Bernoulli mixture model. Mixture models have previously been applied to clustering and dimensionality reduction problems [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. The Bernoulli variant was chosen here, as previous studies have shown its suitability for modeling binary data [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. The three other models were partition-based methods, which divide the data into a predefined number of partitions corresponding to the number of clusters [<xref ref-type="bibr" rid="ref24">24</xref>]. Due to the highly sensitive nature of noises and outliers, traditional K-means algorithms are not suitable for clustering categorical data. Instead, K-modes and K-medoids should be used. K-modes replace means with modes to find the clusters and use a simple matching dissimilarity measure for clustering the data objects, which were characterized by the categorical attributes only [<xref ref-type="bibr" rid="ref25">25</xref>], while K-medoids select an actual and representable data object for each cluster in each iteration that is the most centrally located object within the cluster [<xref ref-type="bibr" rid="ref26">26</xref>]. The final approach was based on K-medoids and incorporated a bisecting methodology [<xref ref-type="bibr" rid="ref27">27</xref>]. Previous studies have successfully applied these methods to identify clusters of symptoms [<xref ref-type="bibr" rid="ref28">28</xref>], clinical prognostic features in oncology [<xref ref-type="bibr" rid="ref29">29</xref>], and comorbidities with other chronic diseases [<xref ref-type="bibr" rid="ref30">30</xref>].</p><p>In the second step, the study incorporated domain knowledge into the interpretation of the clusters [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. As this step required knowledge in clinical oncology, survivorship, and data analytics, domain experts who were clinicians or clinician researchers with relevant experience were invited to review the results from the first step. One medical oncologist (HHL), one cancer epidemiologist (YTC), one pharmacist (CSL), and one data scientist (CN) participated in this step. First, they were provided with the clustering results from the four approaches without identification of the specific method used. Then, they were asked to examine the relative distribution of comorbidities across the clusters generated by the machine learning methods and to assess the distinguishability of patterns of comorbidities between the clusters. After that, they ranked the most clinically relevant clusters. The clusters with the highest average rank were selected for further analysis to investigate their associations with mortality outcomes. We also used performance metrics, including Silhouette analyses, Calinski-Harabasz index, and Davies-Bouldin index, to support the selection [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. This ensured that the clusters chosen for detailed investigation were those with the most potential to provide clinical insights into the relationship between comorbidities and mortality in patients with cancer. The clustering process was performed using Python (version 3.10; Python Software Foundation) and R (version 4.0.1; R Foundation).</p></sec><sec id="s2-1-5"><title>Mortality Outcomes</title><p>The NHANES was linked to the death certificate records from the National Death Index using a probabilistic match method [<xref ref-type="bibr" rid="ref35">35</xref>]. The participants were followed up from the date of interview to the date of death or December 31, 2019, whichever came first (the last date for available mortality data). Causes of death were coded using the <italic>ICD-10</italic> (<italic>International Statistical Classification of Diseases, 10th Revision</italic>). The primary outcomes comprised all-cause mortality and the top three cause-specific mortalities: cancer (<italic>ICD-10</italic> codes C00-C97), CVD (<italic>ICD-10</italic> codes I00-I09, I11, I13, I20-I51, and I60-I69), and respiratory diseases (<italic>ICD-10</italic> codes J40-J47).</p></sec><sec id="s2-1-6"><title>Statistical Analysis</title><p>We conducted data analyses in accordance with the NHANES guidelines [<xref ref-type="bibr" rid="ref15">15</xref>]. The survey design was taken into account by applying sample weights, clustering, and stratification in all analyses. Participants with missing data on death, comorbidities, or other covariates were excluded from the study.</p><p>Cox proportional hazards models were used to examine the associations between comorbidity clusters and mortality outcomes. We ran three models: model 1 (unadjusted), model 2 (adjusted for the age and sex of the participants), and model 3 (further adjusted for socioeconomic factors [educational level, ethnicity, and income-to-poverty ratio], lifestyle behaviors [BMI, HEI score, smoking status, alcohol drinking, physical activity, and supplement use], cancer prognosis, and time since the cancer diagnosis). Multiple imputation using the MICE (Multivariate Imputation by Chained Equations) package was conducted to address missing values [<xref ref-type="bibr" rid="ref36">36</xref>]. We conducted stratified analyses to assess potential effect modification by covariates on the associations between comorbidity clusters and mortality.</p><p>Statistical analyses were carried out using SAS 9.4 (SAS Institute Inc) and R 4.0.1 (R Foundation). A <italic>P</italic>&#x003C;.05 was considered statistically significant.</p></sec></sec><sec id="s2-2"><title>Part 2: Cluster Verification (Hospital Authority Data Collaboration Laboratory Data)</title><sec id="s2-2-1"><title>Study Population</title><p>In Hong Kong, the Hospital Authority is a statutory body that governs all public hospital services. These hospitals provide approximately 90% of all secondary and tertiary care services in Hong Kong [<xref ref-type="bibr" rid="ref37">37</xref>]. Hospital Authority Data Collaboration Laboratory (HADCL) provides comprehensive deidentified data, including sociodemographic details, clinical diagnoses, medications, and hospital admission data from the Hospital Authority, and has been used in large-scale epidemiological studies [<xref ref-type="bibr" rid="ref38">38</xref>]. A large subset of the data, including approximately 200,000 participants who used public health care services in 2007 and 2017, was accessed through a self-service data platform. To ensure the representativeness of the sample, HADCL used a proportionate random sampling approach [<xref ref-type="bibr" rid="ref39">39</xref>]. This HADCL cohort is a relatively representative sample of the general population in Hong Kong and has been used widely in epidemiological studies [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>].</p><p>For this study, we included participants diagnosed with malignant cancer (<italic>ICD-10</italic> code C00-C97). Similar to the NHANES cohort, we excluded participants if they were (1)&#x003C;20 years of age, (2) diagnosed solely with nonmelanoma skin cancer (<italic>ICD-10</italic> code C44) and had no other cancer types.</p></sec><sec id="s2-2-2"><title>Covariates</title><p>Information on sociodemographic and clinical characteristics was collected from the data repository. They included the individuals&#x2019; sex, residential area, age at cancer diagnosis, and cancer site. The income level of individuals was determined based on the income level of their residential areas (Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Cancer diagnoses were further classified according to their prognosis, using classifications consistent with our previous NHANES cohort and local statistics [<xref ref-type="bibr" rid="ref42">42</xref>].</p></sec><sec id="s2-2-3"><title>Ascertainment and Clustering of Comorbidities</title><p>The HADCL data contained clinical diagnoses of patients documented using <italic>ICD-10</italic> codes. Based on the NHANES cohort, patient diagnoses of the same fifteen comorbidities (congestive heart failure, coronary heart diseases, angina, heart attack [myocardial infarction], stroke, hypertension, diabetes, hyperlipidemia, chronic bronchitis, asthma, emphysema, arthritis, liver conditions, thyroid problems, and kidney disease) were collected.</p><p>This part is an external validation study to evaluate the generalizability of the comorbidity clusters identified in the NHANES cohort and their associations with mortality within a different group of patients [<xref ref-type="bibr" rid="ref43">43</xref>]. As this was an external validation, instead of a de novo clustering analysis, we replicated the same number of clusters based on the distinctive patterns and distribution of comorbidities observed within each cluster identified in the main cohort [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. We then examined whether the associations between these clusters and all-cause mortality remained consistent.</p></sec><sec id="s2-2-4"><title>Mortality Outcomes and Statistical Analysis</title><p>The date of death was documented in the HADCL dataset. Participants were followed up until the date of death or the end of follow-up (ie, June 1, 2021, which was the last day of captured data available in the database), whichever occurred first. The index date of the study was defined as the date when individuals first received a diagnosis of malignant cancer. Cox proportional hazards models were used to examine the associations between comorbidity clusters and all-cause mortality. Similarly, three models were used: the crude model (model 1), model adjusted for age at cancer diagnosis and sex (model 2), and model adjusted for age at cancer diagnosis, sex, income level of residential district, and cancer prognosis (model 3). Participants with missing data on death, comorbidities, or other covariates were excluded from the study.</p><p>Statistical analyses were carried out using R (version 4.0.1; R Foundation). A <italic>P</italic>&#x003C;.05 was considered statistically significant.</p></sec></sec><sec id="s2-3"><title>Ethical Considerations</title><p>This study was approved by the Survey and Behavioural Research Ethics Committee of the Chinese University of Hong Kong (SBRE-23&#x2010;0014), which allowed secondary analysis of HADCL and NHANES data without additional consent. The NHANES study protocol was approved by the Research Ethics Review Board of the National Center for Health Statistics, and informed consent was obtained from all NHANES participants. The data from HADCL and NHANES were deidentified.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Characteristics of Participants in the NHANES Cohort</title><p>From the total of 101,316 individuals in the 1999&#x2010;2018 waves of the NHANES, those without a diagnosis of cancer (n=96,150), those with a sole diagnosis of nonmelanoma skin cancer (n=576), and those with missing data (n=200) were excluded. Ultimately, the analysis included 4390 individuals (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Flowchart of individual inclusion and exclusion in the National Health and Nutrition Examination Survey cohort.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e71937_fig01.png"/></fig><p><xref ref-type="table" rid="table1">Table 1</xref> presents the characteristics of the included individuals. The mean age of the participants was 66 (SD 14.6) years, and 54% were female (n=2376). The majority of them were non-Hispanic White (n=2870, 65.4%). The median time since cancer diagnosis was 7 (IQR 3-15) years, and the mean age at cancer diagnosis was 55.4 (SD 17.6) years. Among the individuals, the most common cancer diagnoses were genitourinary cancer (n=1102, 25.1%). More than half of them self-reported a diagnosis of hypertension (n=2812, 64.1%), while approximately half reported having hyperlipidemia (n=2234, 50.9%) and approximately half reported having arthritis (n=2206, 50.3%).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Summary of characteristics of individuals diagnosed with cancer in the National Health and Nutrition Examination Survey cohort (N=4390)</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Characteristic</td><td align="left" valign="top">Value</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Sociodemographic</td></tr><tr><td align="left" valign="top" colspan="2">Sex, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Male</td><td align="left" valign="top">2014 (45.9)</td></tr><tr><td align="left" valign="top">&#x2003;Female</td><td align="left" valign="top">2376 (54.1)</td></tr><tr><td align="left" valign="top">Age (years), mean (SD)</td><td align="left" valign="top">66.0 (14.6)</td></tr><tr><td align="left" valign="top" colspan="2">Education level, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Below college</td><td align="left" valign="top">2154 (49.1)</td></tr><tr><td align="left" valign="top">&#x2003;College or above</td><td align="left" valign="top">2236 (50.9)</td></tr><tr><td align="left" valign="top">Family income to poverty, mean (SD)</td><td align="left" valign="top">2.65 (1.59)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2264;1.3</td><td align="left" valign="top">1184 (27.0)</td></tr><tr><td align="left" valign="top">&#x2003;1.3&#x2010;3.5</td><td align="left" valign="top">1803 (41.0)</td></tr><tr><td align="left" valign="top">&#x2003;&#x003E;3.5</td><td align="left" valign="top">1404 (32.0)</td></tr><tr><td align="left" valign="top" colspan="2">Ethnicities, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Mexican American</td><td align="left" valign="top">344 (7.8)</td></tr><tr><td align="left" valign="top">&#x2003;Non-Hispanic White</td><td align="left" valign="top">2870 (65.4)</td></tr><tr><td align="left" valign="top">&#x2003;Non-Hispanic Black</td><td align="left" valign="top">703 (16.0)</td></tr><tr><td align="left" valign="top">&#x2003;Others</td><td align="left" valign="top">473 (10.8)</td></tr><tr><td align="left" valign="top" colspan="2">Clinical</td></tr><tr><td align="left" valign="top">&#x2003;Age at cancer diagnosis, mean (SD)</td><td align="left" valign="top">55.4 (17.6)</td></tr><tr><td align="left" valign="top">&#x2003;Time since cancer diagnosis, median (IQR)</td><td align="left" valign="top">7 (3-15)</td></tr><tr><td align="left" valign="top" colspan="2">Type of cancer, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Breast cancer</td><td align="left" valign="top">815 (18.6)</td></tr><tr><td align="left" valign="top">&#x2003;Digestive or gastrointestinal cancer</td><td align="left" valign="top">536 (12.2)</td></tr><tr><td align="left" valign="top">&#x2003;Genitourinary cancer</td><td align="left" valign="top">1102 (25.1)</td></tr><tr><td align="left" valign="top">&#x2003;Gynecological cancer</td><td align="left" valign="top">694 (15.8)</td></tr><tr><td align="left" valign="top">&#x2003;Skin cancer</td><td align="left" valign="top">762 (17.4)</td></tr><tr><td align="left" valign="top">&#x2003;Head and neck cancer</td><td align="left" valign="top">173 (3.9)</td></tr><tr><td align="left" valign="top">&#x2003;Respiratory or thoracic cancer</td><td align="left" valign="top">152 (3.5)</td></tr><tr><td align="left" valign="top">&#x2003;Others</td><td align="left" valign="top">552 (12.6)</td></tr><tr><td align="left" valign="top" colspan="2">Cancer prognosis<sup><xref ref-type="table-fn" rid="table1fn1"><bold>a</bold></xref></sup>, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Highest</td><td align="left" valign="top">2512 (57.2)</td></tr><tr><td align="left" valign="top">&#x2003;Middle</td><td align="left" valign="top">1435 (32.7)</td></tr><tr><td align="left" valign="top">&#x2003;Lowest</td><td align="left" valign="top">443 (10.1)</td></tr><tr><td align="left" valign="top" colspan="2">Comorbidities, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Hyperlipidemia</td><td align="left" valign="top">2234 (50.9)</td></tr><tr><td align="left" valign="top">&#x2003;Hypertension</td><td align="left" valign="top">2812 (64.1)</td></tr><tr><td align="left" valign="top">&#x2003;Arthritis</td><td align="left" valign="top">2206 (50.3)</td></tr><tr><td align="left" valign="top">&#x2003;Heart failure</td><td align="left" valign="top">347 (7.9)</td></tr><tr><td align="left" valign="top">&#x2003;Coronary heart diseases</td><td align="left" valign="top">426 (9.7)</td></tr><tr><td align="left" valign="top">&#x2003;Angina</td><td align="left" valign="top">289 (6.6)</td></tr><tr><td align="left" valign="top">&#x2003;Heart attack</td><td align="left" valign="top">443 (10.1)</td></tr><tr><td align="left" valign="top">&#x2003;Stroke</td><td align="left" valign="top">404 (9.2)</td></tr><tr><td align="left" valign="top">&#x2003;Bronchitis</td><td align="left" valign="top">452 (10.3)</td></tr><tr><td align="left" valign="top">&#x2003;Liver condition</td><td align="left" valign="top">255 (5.8)</td></tr><tr><td align="left" valign="top">&#x2003;Kidney diseases</td><td align="left" valign="top">338 (7.7)</td></tr><tr><td align="left" valign="top">&#x2003;Diabetes</td><td align="left" valign="top">1074 (24.5)</td></tr><tr><td align="left" valign="top">&#x2003;Asthma</td><td align="left" valign="top">678 (15.4)</td></tr><tr><td align="left" valign="top">&#x2003;Thyroid diseases</td><td align="left" valign="top">320 (7.3)</td></tr><tr><td align="left" valign="top">&#x2003;Emphysema</td><td align="left" valign="top">263 (6.0)</td></tr><tr><td align="left" valign="top" colspan="2">Lifestyle</td></tr><tr><td align="left" valign="top">&#x2003;BMI, mean (SD)</td><td align="left" valign="top">28.9 (6.6)</td></tr><tr><td align="left" valign="top">&#x2003;&#x003C;25 kg/m<sup>2</sup> (normal)</td><td align="left" valign="top">1266 (28.8)</td></tr><tr><td align="left" valign="top">&#x2003;25&#x2010;30 kg/m<sup>2</sup> (overweight)</td><td align="left" valign="top">1549 (35.3)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2265;30 kg/m<sup>2</sup> (obese)</td><td align="left" valign="top">1575 (35.9)</td></tr><tr><td align="left" valign="top" colspan="2">Smoking status, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Never smokers</td><td align="left" valign="top">1923 (43.8)</td></tr><tr><td align="left" valign="top">&#x2003;Former smokers</td><td align="left" valign="top">1766 (40.2)</td></tr><tr><td align="left" valign="top">&#x2003;Current smokers</td><td align="left" valign="top">701 (16.0)</td></tr><tr><td align="left" valign="top" colspan="2">Drinking status, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Nondrinker</td><td align="left" valign="top">2202 (50.2)</td></tr><tr><td align="left" valign="top">&#x2003;Low-to-moderate drinker</td><td align="left" valign="top">1885 (42.9)</td></tr><tr><td align="left" valign="top">&#x2003;Heavy drinker</td><td align="left" valign="top">303 (6.9)</td></tr><tr><td align="left" valign="top" colspan="2">Physical activity, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Physically active (&#x2265;150 h/wk)</td><td align="left" valign="top">1158 (26.4)</td></tr><tr><td align="left" valign="top">&#x2003;Irregularly active (&#x003C;150 h/wk)</td><td align="left" valign="top">680 (15.5)</td></tr><tr><td align="left" valign="top">&#x2003;Inactive</td><td align="left" valign="top">2552 (58.1)</td></tr><tr><td align="left" valign="top">Healthy Eating Index, median (IQR)</td><td align="left" valign="top">52 (42-63)</td></tr><tr><td align="left" valign="top">&#x2003;&#x003C;51.55</td><td align="left" valign="top">2194 (50.0)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2265;51.55</td><td align="left" valign="top">2196 (50.0)</td></tr><tr><td align="left" valign="top">Supplement use (&#x2265;90 days), n (%)</td><td align="left" valign="top">2646 (60.3)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Cancer prognosis was based on the US statistics: group 1 (highest: average 5-year survival rate &#x2265;90%), group 2 (middle: average 5-year survival rate &#x2265;60% and &#x003C;90%), and group 3 (lowest: average 5-year survival rate &#x003C;60%).</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Characteristics of Participants in the HADCL Cohort</title><p>From the total of 198,289 individuals in the HADCL cohort, those without a diagnosis of cancer or younger than 20 years (n=184,931), those with a sole diagnosis of nonmelanoma skin cancer (n=274), and those with missing data (n=600) were excluded. Ultimately, 12,484 individuals were included in the analysis (<xref ref-type="fig" rid="figure2">Figure 2</xref>).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Flowchart of individual inclusion and exclusion in the Hospital Authority Data Collaboration Laboratory cohort.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e71937_fig02.png"/></fig><p><xref ref-type="table" rid="table2">Table 2</xref> presents the characteristics of the included individuals. Their mean age was 60.9 (SD 14.4) years, and 52.7% were female (n=6584). The mean age at cancer diagnosis was 64.8 years. The most common cancer diagnoses were digestive cancer (n=3938, 31.5%). The most common comorbidity was hypertension (n=3464, 27.7%), followed by diabetes (n=2053, 16.4%).</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Summary of characteristics of individuals diagnosed with cancer in the Hospital Authority Data Collaboration Laboratory cohort (N=12,484).</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Characteristic</td><td align="left" valign="top">Value</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Sociodemographic</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sex, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Male</td><td align="left" valign="top">5900 (47.3)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Female</td><td align="left" valign="top">6584 (52.7)</td></tr><tr><td align="left" valign="top">Age (years), mean (SD)<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="top">60.9 (14.4)</td></tr><tr><td align="left" valign="top" colspan="2">Income level<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup>, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Lowest-income</td><td align="left" valign="top">5332 (42.7)</td></tr><tr><td align="left" valign="top">&#x2003;Middle-income</td><td align="left" valign="top">3642 (29.2)</td></tr><tr><td align="left" valign="top">&#x2003;Highest-income</td><td align="left" valign="top">3510 (28.1)</td></tr><tr><td align="left" valign="top" colspan="2">Clinical, mean (SD)</td></tr><tr><td align="left" valign="top">&#x2003;Age at cancer diagnosis</td><td align="left" valign="top">64.8 (14.7)</td></tr><tr><td align="left" valign="top" colspan="2">Type of cancer, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Cancers of lip, oral cavity, and pharynx (C00-C14)</td><td align="left" valign="top">863 (6.9)</td></tr><tr><td align="left" valign="top">&#x2003;Cancers of digestive organs (C15-C26)</td><td align="left" valign="top">3938 (31.5)</td></tr><tr><td align="left" valign="top">&#x2003;Cancers of respiratory and intrathoracic organs (C30-C39)</td><td align="left" valign="top">1741 (13.9)</td></tr><tr><td align="left" valign="top">&#x2003;Cancers of bone and articular cartilage (C40-C41)</td><td align="left" valign="top">50 (0.4)</td></tr><tr><td align="left" valign="top">&#x2003;Malignant melanoma of skin (C43)</td><td align="left" valign="top">49 (0.4)</td></tr><tr><td align="left" valign="top">&#x2003;Cancers of mesothelial and soft tissue (C45-C49)</td><td align="left" valign="top">162 (1.3)</td></tr><tr><td align="left" valign="top">&#x2003;Breast cancer (C50)</td><td align="left" valign="top">2242 (18.0)</td></tr><tr><td align="left" valign="top">&#x2003;Cancers of female genital organs (C51-C58)</td><td align="left" valign="top">998 (8.0)</td></tr><tr><td align="left" valign="top">&#x2003;Cancers of male genital organs (C60-C63)</td><td align="left" valign="top">898 (7.2)</td></tr><tr><td align="left" valign="top">&#x2003;Cancers of the urinary tract (C64-C68)</td><td align="left" valign="top">772 (6.2)</td></tr><tr><td align="left" valign="top">&#x2003;Cancer of the eye, brain, and other parts of the CNS (C69-C72)</td><td align="left" valign="top">96 (0.8)</td></tr><tr><td align="left" valign="top">&#x2003;Cancers of the thyroid and other endocrine glands (C73-C75)</td><td align="left" valign="top">430 (3.4)</td></tr><tr><td align="left" valign="top">&#x2003;Cancers of ill-defined, secondary, and unspecified sites (C76-C80)</td><td align="left" valign="top">3172 (25.4)</td></tr><tr><td align="left" valign="top">&#x2003;Cancers of primary, lymphoid, hematopoietic, and related tissue (C81-C96)</td><td align="left" valign="top">810 (6.5)</td></tr><tr><td align="left" valign="top" colspan="2">Cancer prognosis<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup>, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Highest</td><td align="left" valign="top">2819 (22.6)</td></tr><tr><td align="left" valign="top">&#x2003;Middle</td><td align="left" valign="top">3839 (30.7)</td></tr><tr><td align="left" valign="top">&#x2003;Lowest</td><td align="left" valign="top">5826 (46.7)</td></tr><tr><td align="left" valign="top" colspan="2">Comorbidities, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Hyperlipidemia</td><td align="left" valign="top">1136 (9.1)</td></tr><tr><td align="left" valign="top">&#x2003;Hypertension</td><td align="left" valign="top">3464 (27.7)</td></tr><tr><td align="left" valign="top">&#x2003;Arthritis</td><td align="left" valign="top">513 (4.1)</td></tr><tr><td align="left" valign="top">&#x2003;Heart failure</td><td align="left" valign="top">801 (6.4)</td></tr><tr><td align="left" valign="top">&#x2003;Coronary heart disease</td><td align="left" valign="top">1047 (8.4)</td></tr><tr><td align="left" valign="top">&#x2003;Angina</td><td align="left" valign="top">397 (3.2)</td></tr><tr><td align="left" valign="top">&#x2003;Heart attack</td><td align="left" valign="top">484 (3.9)</td></tr><tr><td align="left" valign="top">&#x2003;Stroke</td><td align="left" valign="top">905 (7.2)</td></tr><tr><td align="left" valign="top">&#x2003;Bronchitis</td><td align="left" valign="top">137 (1.1)</td></tr><tr><td align="left" valign="top">&#x2003;Liver condition</td><td align="left" valign="top">1265 (10.1)</td></tr><tr><td align="left" valign="top">&#x2003;Kidney diseases</td><td align="left" valign="top">712 (5.7)</td></tr><tr><td align="left" valign="top">&#x2003;Diabetes</td><td align="left" valign="top">2053 (16.4)</td></tr><tr><td align="left" valign="top">&#x2003;Asthma</td><td align="left" valign="top">246 (2.0)</td></tr><tr><td align="left" valign="top">&#x2003;Thyroid diseases</td><td align="left" valign="top">623 (5.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Emphysema</td><td align="left" valign="top">14 (0.1)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>This refers to the age in 2007 (the first time point for patient sampling).</p></fn><fn id="table2fn2"><p><sup>b</sup>The income level is based on the residential areas of individuals, categorized into 3 groups based on median monthly household income.</p></fn><fn id="table2fn3"><p><sup>c</sup>Cancer diagnoses were further classified according to their prognosis, based on previous classification in the National Health and Nutrition Examination Survey cohort and local statistics.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Clustering of Comorbidities Using the NHANES Cohort</title><p>Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> shows the clusters generated using the 4 selected approaches. According to the reviews of domain experts, the Bernoulli mixture model ranked the highest (unanimously ranked highest by all experts based on the distinguishability of clusters and clinical relevance), followed by bisecting K-medoids and K-medoids, while K-modes ranked the lowest due to its inability to identify distinguishable clusters. Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> presents the evaluation of clustering results from the 4 approaches using Silhouette analyses, Calinski-Harabasz index, and Davies-Bouldin index. It also shows the quality of the clusters generated by the Bernoulli mixture model being the highest among the 4 approaches.</p><p>Based on the distribution patterns of comorbidities in the NHANES cohort, 4 clusters derived from the Bernoulli mixture model were chosen due to the notable differences in comorbidity patterns across the clusters. Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> presents the characteristics of the individuals in each of the 4 clusters, which comprised 2127 (48.5%), 1525 (34.7%), 421 (9.6%), and 317 (7.2%) individuals.</p><p><xref ref-type="fig" rid="figure3">Figure 3</xref> and Table S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> illustrate the comorbidity patterns observed in the 4 clusters. The patients in cluster 1 (low comorbidity cluster) exhibited significantly lower percentages of all comorbidities than the patients in the other clusters. Cluster 2 (metabolic cluster) was characterized by the highest burden of metabolic syndrome (hypertension, hyperlipidemia, and diabetes) among all 4 clusters. Cluster 3 (CVD cluster) was characterized by the highest burden of CVD among the clusters and also displayed a relatively high burden of metabolic syndromes (although lower than that of cluster 2). Cluster 4 (respiratory cluster) exhibited a significantly higher burden of respiratory diseases, while also having a moderate burden of metabolic syndromes (although lower than those of clusters 2 and 3).</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Distribution of comorbidities among the 4 comorbidity clusters in the National Health and Nutrition Examination Survey cohort. CVD: cardiovascular disease.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e71937_fig03.png"/></fig></sec><sec id="s3-4"><title>Clustering of Comorbidities Using the HADCL Cohort</title><p>Based on the comorbidity clusters identified in the NHANES cohort, 4 clusters were manually categorized according to observed patterns and the distribution of comorbidities within those clusters. Individuals with metabolic diseases were first grouped into cluster 2 (metabolic cluster), and then those with CVD or respiratory comorbidities were assigned to cluster 3 (CVD cluster) and cluster 4 (respiratory cluster), respectively. The remaining individuals were classified into cluster 1 (low comorbidity). Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> shows the characteristics of the individuals in the 4 clusters, which comprised 7392 (59.2%), 2521 (20.2%), 2188 (17.5%), and 383 (3.1%) participants. Figure S2 and Table S7 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> illustrate the comorbidity patterns observed in the 4 clusters.</p></sec><sec id="s3-5"><title>Association Between Comorbidity Clusters and Mortality Outcomes in the NHANES Cohort</title><p>After a median follow-up of 6.6 (IQR 3.3&#x2010;11) years, 1700 deaths (38.7% of individuals) were recorded. As shown in <xref ref-type="table" rid="table3">Table 3</xref>, all 3 models suggested that there was a significant association between the comorbidity clusters and all-cause mortality. After adjusting for confounders (model 3), compared with the low comorbidity cluster, the individuals in the respiratory cluster had the highest risk of mortality (adjusted hazard ratio [aHR] 1.62, 95% CI 1.26&#x2010;2.08; <italic>P&#x003C;</italic>.001), followed by the CVD cluster (aHR 1.50, 95% CI 1.26&#x2010;1.80; <italic>P&#x003C;</italic>.001) and the metabolic cluster (aHR 1.15, 95% CI 1.02&#x2010;1.29; <italic>P=</italic>.03).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Associations of comorbidity clusters with all-cause and cause-specific mortality in the National Health and Nutrition Examination Survey and Hospital Authority Data Collaboration Laboratory cohort.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Cohort and cluster</td><td align="left" valign="bottom">Number of patients, n (%)</td><td align="left" valign="bottom">Death, n (%)</td><td align="left" valign="bottom">Model 1, hazard ratio (95% CI)</td><td align="left" valign="bottom"><italic>P</italic> value</td><td align="left" valign="bottom">Model 2<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup>, hazard ratio (95% CI)</td><td align="left" valign="bottom"><italic>P</italic> value</td><td align="left" valign="bottom">Model 3<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup>, hazard ratio (95% CI)</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="9">NHANES<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> cohort</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>All-cause mortality</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 1 (low comorbidity)</td><td align="left" valign="top">2127 (48.5)</td><td align="left" valign="top">657(30.9)</td><td align="left" valign="top">Ref<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="top">Ref</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Ref</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 2 (metabolic)</td><td align="left" valign="top">1525 (34.7)</td><td align="left" valign="top">650 (42.7)</td><td align="left" valign="top">2.10 (1.84&#x2010;2.39)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1.24 (1.10&#x2010;1.39)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1.15 (1.02&#x2010;1.29)</td><td align="left" valign="top">.03</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 3 (CVD<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup>)</td><td align="left" valign="top">421 (9.6)</td><td align="left" valign="top">261 (62.0)</td><td align="left" valign="top">3.75 (3.03&#x2010;4.64)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1.78 (1.49&#x2010;2.14)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1.50 (1.26&#x2010;1.80)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 4 (respiratory)</td><td align="left" valign="top">317 (7.2)</td><td align="left" valign="top">132 (41.6)</td><td align="left" valign="top">1.97 (1.52&#x2010;2.54)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1.92 (1.51&#x2010;2.42)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1.62 (1.26&#x2010;2.08)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Cancer mortality (C00-C97)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 1 (low comorbidity)</td><td align="left" valign="top">2127 (48.5)</td><td align="left" valign="top">253 (11.9)</td><td align="left" valign="top">Ref</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Ref</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Ref</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 2 (metabolic)</td><td align="left" valign="top">1525 (34.7)</td><td align="left" valign="top">213 (14.0)</td><td align="left" valign="top">1.48 (1.19&#x2010;1.84)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1.00 (0.80&#x2010;1.25)</td><td align="left" valign="top">.98</td><td align="left" valign="top">0.92 (0.73&#x2010;1.17)</td><td align="left" valign="top">.50</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 3 (CVD)</td><td align="left" valign="top">421 (9.6)</td><td align="left" valign="top">61 (14.5)</td><td align="left" valign="top">1.92 (1.28&#x2010;2.88)</td><td align="left" valign="top">.002</td><td align="left" valign="top">1.07 (0.71&#x2010;1.60)</td><td align="left" valign="top">.75</td><td align="left" valign="top">0.89 (0.59&#x2010;1.33)</td><td align="left" valign="top">.57</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 4 (respiratory)</td><td align="left" valign="top">317 (7.2)</td><td align="left" valign="top">48 (15.1)</td><td align="left" valign="top">1.46 (0.94&#x2010;2.25)</td><td align="left" valign="top">.09</td><td align="left" valign="top">1.50 (0.96&#x2010;2.33)</td><td align="left" valign="top">.07</td><td align="left" valign="top">1.20 (0.77&#x2010;1.87)</td><td align="left" valign="top">.43</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CVD mortality (I00-I09, I11, I13, I20-I51, and I60-69)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 1 (low comorbidity)</td><td align="left" valign="top">2127 (48.5)</td><td align="left" valign="top">130 (6.1)</td><td align="left" valign="top">Ref</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Ref</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Ref</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 2 (metabolic)</td><td align="left" valign="top">1525 (34.7)</td><td align="left" valign="top">174 (11.4)</td><td align="left" valign="top">2.99 (2.24&#x2010;3.99)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1.57 (1.21&#x2010;2.05)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1.48 (1.14&#x2010;1.93)</td><td align="left" valign="top">.003</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 3 (CVD)</td><td align="left" valign="top">421 (9.6)</td><td align="left" valign="top">101 (24.0)</td><td align="left" valign="top">8.45 (6.20&#x2010;11.5)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">3.54 (2.68&#x2010;4.69)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">3.05 (2.29&#x2010;4.07)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 4 (respiratory)</td><td align="left" valign="top">317 (7.2)</td><td align="left" valign="top">32 (10.1)</td><td align="left" valign="top">2.58 (1.64&#x2010;4.08)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">3.13 (1.56&#x2010;3.86)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">2.19 (1.35&#x2010;3.54)</td><td align="left" valign="top">.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Respiratory mortality (J40-J47)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 1 (low comorbidity)</td><td align="left" valign="top">2127 (48.5)</td><td align="left" valign="top">29 (1.4)</td><td align="left" valign="top">Ref</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Ref</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Ref</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 2 (metabolic)</td><td align="left" valign="top">1525 (34.7)</td><td align="left" valign="top">26 (1.7)</td><td align="left" valign="top">2.79 (1.45&#x2010;5.38)</td><td align="left" valign="top">.002</td><td align="left" valign="top">1.53 (0.75&#x2010;3.12)</td><td align="left" valign="top">.24</td><td align="left" valign="top">1.29 (0.61&#x2010;2.72)</td><td align="left" valign="top">.51</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 3 (CVD)</td><td align="left" valign="top">421 (9.6)</td><td align="left" valign="top">18 (4.3)</td><td align="left" valign="top">7.39 (3.81&#x2010;14.3)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">3.38 (1.64&#x2010;6.98)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">2.09 (0.97&#x2010;4.47)</td><td align="left" valign="top">.06</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 4 (respiratory)</td><td align="left" valign="top">317 (7.2)</td><td align="left" valign="top">18 (5.7)</td><td align="left" valign="top">6.51 (3.29&#x2010;12.9)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">5.94 (2.99&#x2010;11.8)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">3.99 (2.03&#x2010;7.83)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="9">HADCL<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup> cohort</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>All-cause mortality</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 1 (low comorbidity)</td><td align="left" valign="top">7392 (59.2)</td><td align="left" valign="top">1747(23.6)</td><td align="left" valign="top">Ref</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Ref</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">Ref</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 2 (metabolic)</td><td align="left" valign="top">2521 (20.2)</td><td align="left" valign="top">905 (35.9)</td><td align="left" valign="top">1.65 (1.52&#x2010;1.79)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1.11 (1.02&#x2010;1.21)</td><td align="left" valign="top">.01</td><td align="left" valign="top">1.08 (0.99&#x2010;1.17)</td><td align="left" valign="top">.08</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 3 (CVD)</td><td align="left" valign="top">2188 (17.5)</td><td align="left" valign="top">1086 (49.6)</td><td align="left" valign="top">2.37 (2.19&#x2010;2.55)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1.78 (1.26&#x2010;1.48)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1.33 (1.23&#x2010;1.45)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Cluster 4 (respiratory)</td><td align="left" valign="top">383 (3.1)</td><td align="left" valign="top">184 (48.0)</td><td align="left" valign="top">2.22 (1.91&#x2010;2.58)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1.32 (1.13&#x2010;1.54)</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">1.32 (1.13&#x2010;1.54)</td><td align="left" valign="top">&#x003C;.001</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>In the NHANES cohort, model 2 was adjusted for age and sex, model 3 was adjusted for age at assessment, sex, socioeconomic status (education level, ethnicities, and income-to-poverty ratio), lifestyle behaviors (BMI, Healthy Eating Index, smoking and alcohol status, physical activity, and supplement use), years since cancer diagnosis, and cancer prognosis. In the HADCL cohort, model 2 was adjusted for age at cancer diagnosis and sex, and model 3 was adjusted for age at cancer diagnosis, sex, income level (based on the median household income of the districts), and cancer prognosis.</p></fn><fn id="table3fn2"><p><sup>b</sup>NHANES: National Health and Nutrition Examination Survey.</p></fn><fn id="table3fn3"><p><sup>c</sup>Ref: reference group.</p></fn><fn id="table3fn4"><p><sup>d</sup>Not applicable.</p></fn><fn id="table3fn5"><p><sup>e</sup>CVD: cardiovascular disease.</p></fn><fn id="table3fn6"><p><sup>f</sup>HADCL: Hospital Authority Data Collaboration Laboratory.</p></fn></table-wrap-foot></table-wrap><p>Regarding cause-specific mortality, the metabolic, CVD, and respiratory clusters were associated with higher risks of CVD-related mortality than the low comorbidity cluster, with the CVD cluster (aHR 3.05, 95% CI 2.29&#x2010;4.07; <italic>P&#x003C;</italic>.001) showing the highest risk. Only individuals in the respiratory cluster had a higher risk of respiratory disease mortality than those in the low comorbidity cluster (aHR 3.99, 95% CI 2.03&#x2010;7.83; <italic>P&#x003C;</italic>.001) after adjusting for all confounders, although the metabolic and CVD clusters were also significantly associated with higher respiratory disease mortality in the crude models. However, no significant differences in the risk of cancer mortality were observed among the clusters (all <italic>P&#x003E;</italic>.05).</p><p>The subgroup analysis (<xref ref-type="fig" rid="figure4">Figure 4</xref>) indicated that the effects of the comorbidity clusters on mortality were modified by the income-to-poverty ratio (<italic>P</italic> for interaction=.04), HEI score (<italic>P</italic> for interaction=.02), time since cancer diagnosis (<italic>P</italic> for interaction=.009), and cancer prognosis (<italic>P</italic> for interaction=.005) after adjusting for all confounders. The individuals in the respiratory cluster had a significantly higher risk of mortality (<italic>P</italic>=.003) if they had a lower income-to-poverty ratio, while those in the metabolic cluster (<italic>P</italic>=.02) had a higher risk of mortality if they reported a higher income-to-poverty ratio. Individuals with low HEI scores in all 3 other clusters had a higher risk of mortality than those with low HEI scores in the low comorbidity cluster (all <italic>P</italic>&#x003C;.01). However, for individuals with high HEI scores, only those in the CVD cluster had a higher risk of mortality than those with high HEI scores in the low comorbidity cluster (<italic>P</italic>=.006).</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Effect modification by factors on the association of clusters with mortality in the National Health and Nutrition Examination Survey cohort (a more detailed figure is presented in Figure S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e71937_fig04.png"/></fig></sec><sec id="s3-6"><title>Verification of the Association Between Comorbidity Clusters and All-Cause Mortality in the HADCL Cohort</title><p>After a median follow-up of 8.9 (IQR 3.8&#x2010;13.5) years, 3922 deaths (31.4% of individuals) were recorded. As shown in <xref ref-type="table" rid="table3">Table 3</xref>, the crude model and the age- and sex-adjusted model suggested that there was a significant association between the comorbidity clusters and all-cause mortality. After adjusting for confounders (model 3), compared with the low comorbidity cluster, the individuals in the CVD cluster (aHR 1.33, 95% CI 1.23&#x2010;1.45; <italic>P</italic>&#x003C;.001) and respiratory cluster (aHR 1.32, 95% CI 1.13&#x2010;1.54; <italic>P</italic>&#x003C;.001) had a higher risk of mortality. The metabolic cluster also tends to have a higher risk of mortality compared with the low comorbidity cluster; however, the association was not significant (aHR 1.08, 95% CI 0.99&#x2010;1.17; <italic>P</italic>=.08).</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This is one of the largest studies to use machine learning in identifying multimorbidity clusters and as a prognosis marker of mortality in patients with different types of cancer. This study focused on clustering comorbidities in 2 distinct patient cohorts from different geographical locations. The following four clusters were identified: low comorbidity (cluster 1), metabolic (cluster 2), cardiovascular (cluster 3), and respiratory (cluster 4) clusters. Compared with the low comorbidity cluster, the CVD and respiratory clusters were consistently associated with higher all-cause mortality in both cohorts, and all clusters were associated with higher CVD mortality rates in the NHANES cohort. No associations were identified between specific clusters of comorbidities and cancer mortality. These observed associations may inform the development of cluster-specific care management and clinical guidelines for common comorbidities for people living with and beyond cancer.</p></sec><sec id="s4-2"><title>Machine Learning Approaches in the Study of Multimorbidity</title><p>The use of machine learning is especially relevant in the study of comorbidities due to the complex interactions between cancer and other coexisting health conditions. Compared with traditional methods, unsupervised learning is more effective and flexible at identifying unknown patterns among patient subgroups, without the need for prior human knowledge and intervention (eg, determining cutoff values). In this study, mixture models outperformed the partition-based methods in distinguishing patterns across the identified clusters, which may be due to their advantages in clustering data with different shapes and sizes [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. Also, Bernoulli mixture models are specifically designed for binary data, where each feature is a Bernoulli distribution [<xref ref-type="bibr" rid="ref23">23</xref>]. In contrast, methods such as K-modes, K-medoids, and bisecting K-medoids are more general and suitable for categorical data but may not capture the nuances of binary data as Bernoulli mixture models do. Mixture models also offer other advantages to K-modes, K-medoids, and bisecting K-medoids; for instance, they can provide soft clustering assignments to allow for flexibility and handle missing data more effectively [<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>].</p><p>Consistent with emerging evidence supporting the role of artificial intelligence in health care [<xref ref-type="bibr" rid="ref50">50</xref>], our findings suggest that, in the near future, such algorithms may be incorporated into health care systems as risk stratification tools to assist clinicians in identifying patients at risk of adverse outcomes. Clustering and other machine learning approaches have been used in the development of risk prediction models for various diseases. Notably, several of these models have included comorbidities as one of the components, such as in predicting the risk of complications in patients with diabetes [<xref ref-type="bibr" rid="ref51">51</xref>] and mortality risk in patients with chronic obstructive pulmonary disease [<xref ref-type="bibr" rid="ref52">52</xref>]. Hence, the clustering approach used in this study may be applied to assist in predicting mortality risk upon the initial cancer diagnosis.</p></sec><sec id="s4-3"><title>Comorbidity Clusters in Cancer</title><p>There are similarities between the comorbidity clusters identified in previous studies [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref54">54</xref>] and those in this study, despite some variations in the scopes of health conditions. For instance, clusters associated with cardiovascular and respiratory conditions have been observed in patients with breast, colorectal, and lung cancers [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref54">54</xref>]. This suggests that the multimorbidity profile may be similar across different types of cancer. Our study revealed that the increased risk of mortality across all three comorbidity clusters (compared with the low comorbidity cluster) remained in cancers with good prognoses. This finding is reasonable, as patients with cancers of poor prognosis are more likely to die from their cancer regardless of their comorbidity status [<xref ref-type="bibr" rid="ref55">55</xref>]. Overall, the individuals in the respiratory and CVD clusters had a higher risk of all-cause mortality than individuals from other clusters. Recent data from the United States indicate that the majority of patients with cancer die from noncancer-related causes, most commonly heart disease [<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref57">57</xref>]. In comparison, respiratory diseases account for a smaller proportion of noncancer-related deaths [<xref ref-type="bibr" rid="ref56">56</xref>]. The fact that the patients in the respiratory cluster also experienced poor prognosis in our study suggests the need for improved screening and management of comorbidities in this group of patients. Many cancer treatment modalities, including radiotherapy, chemotherapy, and immunotherapy, can potentially lead to pulmonary toxicities [<xref ref-type="bibr" rid="ref58">58</xref>-<xref ref-type="bibr" rid="ref60">60</xref>]. Future epidemiological studies should explore how cancer and its treatment impact comorbidity outcomes, particularly regarding pulmonary conditions and their subsequent effects on survival outcomes. Conversely, this study did not find significant differences in cancer mortality across the four comorbidity clusters. One possible reason may be competing risks from mortality due to other causes, such as mortality from CVD. The high cardiovascular mortality risk in patients with a high comorbidity burden could overshadow any differences in cancer mortality that might be observed in later stages of life. Given the improving management and tighter control of cardiovascular risk factors in cancer patients over the recent years, it may be worthwhile to examine these associations again in future waves of data collection within NHANES.</p><p>Notably, our study uncovered a unique cluster that was prevalent among patients with cancer in the NHANES and HADCL cohorts. This cluster was characterized by metabolic syndromes, which are known risk factors for CVDs [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref61">61</xref>], but a low burden of CVD. Previous research has indicated that the prevalence of metabolic syndromes may be higher in patients with cancer or survivors of cancer than in individuals without cancer [<xref ref-type="bibr" rid="ref61">61</xref>]. Cancer and metabolic syndromes share common risk factors, such as age, obesity, and lifestyle factors [<xref ref-type="bibr" rid="ref62">62</xref>]. In terms of mortality outcomes, the CVD cluster exhibited an all-cause mortality rate 1.3 times higher and a CVD mortality rate more than 2 times higher than the rates observed for the metabolic cluster in the NHANES cohort. Considering the important connections between metabolic syndromes and CVD, patients in this cluster are likely to develop CVD if these risk factors are not well controlled. This finding underscores the importance of early interventions to manage metabolic risk factors and reduce CVD risk in patients with cancer.</p></sec><sec id="s4-4"><title>The Modification Effect of Lifestyle and Socioeconomic Factors</title><p>We observed effect modification of some lifestyle factors in the NHANES cohort. One important modifiable factor that may impact the associations between comorbidity clusters and mortality is diet. Our findings suggest that patients with multimorbidity who follow an unhealthy diet may have higher all-cause mortality rates in all 3 clusters compared with the low comorbidity cluster, with a particularly significant difference in the respiratory cluster. Previous studies have demonstrated an inverse relationship between adherence to healthy dietary patterns and mortality in patients with cancer and survivors of cancer [<xref ref-type="bibr" rid="ref63">63</xref>,<xref ref-type="bibr" rid="ref64">64</xref>]. In addition, diet quality has been found to be associated with multimorbidity. For instance, a Western dietary pattern is associated with an increased risk of multimorbidity [<xref ref-type="bibr" rid="ref65">65</xref>]. Notably, metabolic syndrome, which was prevalent across the high comorbidity clusters, may serve as a surrogate marker for dietary risk factors in cancer [<xref ref-type="bibr" rid="ref62">62</xref>]. Taken together, adopting a healthier diet may help reduce mortality in patients with cancer by lowering the incidence of chronic health conditions. Therefore, it is crucial to include dietary interventions as part of a holistic care plan for patients with cancer, especially those with high comorbidity burdens.</p><p>Another effect modifier to consider is the income-to-poverty ratio, which has been found to modify the association between comorbidity clusters and mortality in varying ways. Previous studies have shown that a lower socioeconomic status is associated with higher mortality rates in patients with cancer [<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref67">67</xref>]. While it is known that comorbidities can contribute to inequities in survival, there is a lack of research exploring the impact of different comorbidities on the socioeconomic disparities in survival. Our study shows that a low socioeconomic status is associated with poorer survival in patients with cancer who have respiratory conditions, but the opposite is observed for patients with metabolic diseases. It has previously been demonstrated that modifiable risk factors, such as obesity and smoking, are important mediators of the associations between socioeconomic status and mortality [<xref ref-type="bibr" rid="ref68">68</xref>,<xref ref-type="bibr" rid="ref69">69</xref>]. Therefore, one possible explanation for the differences in the associations between socioeconomic status and mortality across comorbidity clusters may be variations in the mediating effects of these lifestyle factors. For example, mortality in patients with cancer who have respiratory comorbidities and a lower socioeconomic status may be more strongly influenced by smoking status and poor access to health care services, whereas the mortality risk in patients with metabolic diseases may be increased by other lifestyle factors associated with high socioeconomic status, such as a sedentary occupation, unhealthy dietary patterns, and excess alcohol consumption. Future studies should investigate the mediating effects of modifiable risk factors on the associations between socioeconomic status and mortality among patients with different comorbidities. This may assist the development of targeted interventions to reduce the inequities in mortality among patients with cancer.</p></sec><sec id="s4-5"><title>Strengths and Limitations</title><p>The strength of this study lies in its use of a nationally representative sample of patients with cancer to identify the clusters and the validation of the results using another large sample of patients from a different geographical location. However, there are several limitations to this study. First, the data for cancer and comorbidities were self-reported in the NHANES cohort, which may have introduced the possibility of recall bias. However, previous studies have shown generally good agreement between health records and self-reports for conditions including diabetes, hypertension, and myocardial infarction [<xref ref-type="bibr" rid="ref70">70</xref>,<xref ref-type="bibr" rid="ref71">71</xref>]. We further validated the associations between comorbidity clusters and mortality using the HADCL cohort, which includes documented disease diagnoses. Second, the study lacked information regarding the staging of cancer diagnosis. However, studies have shown that differences in survival at diagnosis between stage groups largely disappeared after having survived for 5-10 years [<xref ref-type="bibr" rid="ref72">72</xref>]. As the main cohort in this study consists of survivors (with a median survival of 7 y) rather than patients on active treatment, the comorbidity clusters identified in this study may be more relevant to cancer survivors instead of newly diagnosed patients. Moreover, the study did not provide information on the severity and treatment of comorbidities. Previous studies have demonstrated that the impact of comorbidities increases with their severity, which may be due to differential effects on treatment toxicities and tolerance, direct impacts on cancer progression, or other factors [<xref ref-type="bibr" rid="ref5">5</xref>]. These may limit the mechanistic insights into mortality drivers. Future studies should use electronic health records to ascertain these clinical characteristics for verification of the findings. The manually assigned clusters in the validation cohort (ie, the HADCL data) may introduce a certain degree of confirmation bias. However, the purpose of the validation cohort is to confirm the association between comorbidity clusters identified from the NHANES data and mortality; hence, we reckoned that a manual assignment approach is still reasonable. Future studies should use another external cohort to validate the identified clusters in our study.</p></sec><sec id="s4-6"><title>Conclusions</title><p>This study used machine learning techniques to investigate clusters of comorbidities and mortality outcomes among two large samples of patients with cancer in the United States and Hong Kong. Compared with individuals with low comorbidity burdens, those in the respiratory and CVD clusters showed higher all-cause mortality in both samples, and all 3 clusters showed higher CVD-related mortality rates in the NHANES cohort. However, no significant associations between these clusters and cancer-specific mortality were observed. Diet quality and socioeconomic status are effect modifiers of the associations between comorbidity clusters and mortality. Overall, the results demonstrate the potential of using machine learning approaches to gain valuable insights into the complex multimorbidity profiles of patients with cancer. Further studies using similar methodologies may provide deeper insights into the relationships between multimorbidity, mortality, and cancer-specific outcomes, ultimately enabling the incorporation of multimorbidity considerations to improve strategies for the personalized care of patients with cancer.</p></sec></sec></body><back><ack><p>The authors received no specific funding for this work. A portion of the results was presented at the American Society of Clinical Oncology Annual Meeting 2024. The abstract was awarded the Conquer Cancer&#x2013;New Rhein RWE Endowed Merit Award.</p></ack><notes><sec><title>Data Availability</title><p>Data are presented in the main manuscript. NHANES data are publicly available on the web [<xref ref-type="bibr" rid="ref15">15</xref>]. Owing to Hospital Authority Data Collaboration Laboratory policy, the raw data remain confidential and will not be shared.</p></sec></notes><fn-group><fn fn-type="con"><p>All authors contributed to the conceptualization and design of the study. Data analysis was performed by CSL and RH, while all authors were involved in the interpretation of the data. CSL drafted the original manuscript, and all authors participated in the review and editing process.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">aHR</term><def><p>adjusted hazard ratio</p></def></def-item><def-item><term id="abb2">CVD</term><def><p>cardiovascular disease</p></def></def-item><def-item><term id="abb3">HADCL</term><def><p>Hospital Authority Data Collaboration Laboratory</p></def></def-item><def-item><term id="abb4">HEI</term><def><p>Healthy Eating Index</p></def></def-item><def-item><term id="abb5"><italic>ICD-10</italic></term><def><p><italic>International Statistical Classification of Diseases, 10th Revision</italic></p></def></def-item><def-item><term id="abb6">MICE</term><def><p>Multivariate Imputation by Chained Equations</p></def></def-item><def-item><term id="abb7">NHANES</term><def><p>National Health and Nutrition Examination Survey</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sung</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ferlay</surname><given-names>J</given-names> </name><name name-style="western"><surname>Siegel</surname><given-names>RL</given-names> </name><etal/></person-group><article-title>Global cancer statistics 2020: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries</article-title><source>CA Cancer J Clin</source><year>2021</year><month>05</month><volume>71</volume><issue>3</issue><fpage>209</fpage><lpage>249</lpage><pub-id pub-id-type="doi">10.3322/caac.21660</pub-id><pub-id pub-id-type="medline">33538338</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Whitty</surname><given-names>CJM</given-names> </name><name name-style="western"><surname>MacEwen</surname><given-names>C</given-names> </name><name name-style="western"><surname>Goddard</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Rising to the challenge of multimorbidity</article-title><source>BMJ</source><year>2020</year><month>01</month><day>6</day><volume>368</volume><fpage>l6964</fpage><pub-id pub-id-type="doi">10.1136/bmj.l6964</pub-id><pub-id pub-id-type="medline">31907164</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barnett</surname><given-names>K</given-names> </name><name name-style="western"><surname>Mercer</surname><given-names>SW</given-names> </name><name name-style="western"><surname>Norbury</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Epidemiology of multimorbidity and implications for health care, research, and medical education: a cross-sectional study</article-title><source>Lancet</source><year>2012</year><month>07</month><day>7</day><volume>380</volume><issue>9836</issue><fpage>37</fpage><lpage>43</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(12)60240-2</pub-id><pub-id pub-id-type="medline">22579043</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Edwards</surname><given-names>BK</given-names> </name><name name-style="western"><surname>Noone</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Mariotto</surname><given-names>AB</given-names> </name><etal/></person-group><article-title>Annual report to the nation on the status of cancer, 1975-2010, featuring prevalence of comorbidity and impact on survival among persons with lung, colorectal, breast, or prostate cancer</article-title><source>Cancer</source><year>2014</year><month>05</month><day>1</day><volume>120</volume><issue>9</issue><fpage>1290</fpage><lpage>1314</lpage><pub-id pub-id-type="doi">10.1002/cncr.28509</pub-id><pub-id pub-id-type="medline">24343171</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sarfati</surname><given-names>D</given-names> </name><name name-style="western"><surname>Koczwara</surname><given-names>B</given-names> </name><name name-style="western"><surname>Jackson</surname><given-names>C</given-names> </name></person-group><article-title>The impact of comorbidity on cancer and its treatment</article-title><source>CA Cancer J Clin</source><year>2016</year><month>07</month><volume>66</volume><issue>4</issue><fpage>337</fpage><lpage>350</lpage><pub-id pub-id-type="doi">10.3322/caac.21342</pub-id><pub-id pub-id-type="medline">26891458</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>L</given-names> </name><name name-style="western"><surname>Cheung</surname><given-names>WY</given-names> </name><name name-style="western"><surname>Atkinson</surname><given-names>E</given-names> </name><name name-style="western"><surname>Krzyzanowska</surname><given-names>MK</given-names> </name></person-group><article-title>Impact of comorbidity on chemotherapy use and outcomes in solid tumors: a systematic review</article-title><source>J Clin Oncol</source><year>2011</year><month>01</month><day>1</day><volume>29</volume><issue>1</issue><fpage>106</fpage><lpage>117</lpage><pub-id pub-id-type="doi">10.1200/JCO.2010.31.3049</pub-id><pub-id pub-id-type="medline">21098314</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ritchie</surname><given-names>CS</given-names> </name><name name-style="western"><surname>Kvale</surname><given-names>E</given-names> </name><name name-style="western"><surname>Fisch</surname><given-names>MJ</given-names> </name></person-group><article-title>Multimorbidity: an issue of growing importance for oncologists</article-title><source>J Oncol Pract</source><year>2011</year><month>11</month><volume>7</volume><issue>6</issue><fpage>371</fpage><lpage>374</lpage><pub-id pub-id-type="doi">10.1200/JOP.2011.000460</pub-id><pub-id pub-id-type="medline">22379419</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bertsimas</surname><given-names>D</given-names> </name><name name-style="western"><surname>Wiberg</surname><given-names>H</given-names> </name></person-group><article-title>Machine learning in oncology: methods, applications, and challenges</article-title><source>JCO Clin Cancer Inform</source><year>2020</year><month>10</month><volume>4</volume><fpage>885</fpage><lpage>894</lpage><pub-id pub-id-type="doi">10.1200/CCI.20.00072</pub-id><pub-id pub-id-type="medline">33058693</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kon&#x00E9;</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Scharf</surname><given-names>D</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>A</given-names> </name></person-group><article-title>Multimorbidity and complexity among patients with cancer in Ontario: a retrospective cohort study exploring the clustering of 17 chronic conditions with cancer</article-title><source>Cancer Control</source><year>2023</year><volume>30</volume><fpage>10732748221150393</fpage><pub-id pub-id-type="doi">10.1177/10732748221150393</pub-id><pub-id pub-id-type="medline">36631419</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kenzik</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Kent</surname><given-names>EE</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>MY</given-names> </name><etal/></person-group><article-title>Chronic condition clusters and functional impairment in older cancer survivors: a population-based study</article-title><source>J Cancer Surviv</source><year>2016</year><month>12</month><volume>10</volume><issue>6</issue><fpage>1096</fpage><lpage>1103</lpage><pub-id pub-id-type="doi">10.1007/s11764-016-0553-4</pub-id><pub-id pub-id-type="medline">27229869</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bender</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Engberg</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>Donovan</surname><given-names>HS</given-names> </name><etal/></person-group><article-title>Symptom clusters in adults with chronic health problems and cancer as a comorbidity</article-title><source>Oncol Nurs Forum</source><year>2008</year><month>01</month><volume>35</volume><issue>1</issue><fpage>E1</fpage><lpage>E11</lpage><pub-id pub-id-type="doi">10.1188/08.ONF.E1-E11</pub-id><pub-id pub-id-type="medline">18192145</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Papachristou</surname><given-names>N</given-names> </name><name name-style="western"><surname>Miaskowski</surname><given-names>C</given-names> </name><name name-style="western"><surname>Barnaghi</surname><given-names>P</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Chon</surname><given-names>K</given-names> </name><name name-style="western"><surname>Sacristan</surname><given-names>E</given-names> </name></person-group><article-title>Comparing machine learning clustering with latent class analysis on cancer symptoms&#x2019; data</article-title><conf-name>2016 IEEE Healthcare Innovation Point-Of-Care Technologies Conference (HI-POCT)</conf-name><conf-date>Nov 9-11, 2016</conf-date><conf-loc>Cancun, Mexico</conf-loc><fpage>162</fpage><lpage>166</lpage><pub-id pub-id-type="doi">10.1109/HIC.2016.7797722</pub-id><pub-id pub-id-type="medline">28560119</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hahn</surname><given-names>EE</given-names> </name><name name-style="western"><surname>Gould</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Munoz-Plaza</surname><given-names>CE</given-names> </name><etal/></person-group><article-title>Understanding comorbidity profiles and their effect on treatment and survival in patients with colorectal cancer</article-title><source>J Natl Compr Canc Netw</source><year>2018</year><month>01</month><volume>16</volume><issue>1</issue><fpage>23</fpage><lpage>34</lpage><pub-id pub-id-type="doi">10.6004/jnccn.2017.7026</pub-id><pub-id pub-id-type="medline">29295878</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Luo</surname><given-names>W</given-names> </name><name name-style="western"><surname>Phung</surname><given-names>D</given-names> </name><name name-style="western"><surname>Tran</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Guidelines for developing and reporting machine learning predictive models in biomedical research: a multidisciplinary view</article-title><source>J Med Internet Res</source><year>2016</year><month>12</month><day>16</day><volume>18</volume><issue>12</issue><fpage>e323</fpage><pub-id pub-id-type="doi">10.2196/jmir.5870</pub-id><pub-id pub-id-type="medline">27986644</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="web"><article-title>National health and nutrition examination survey</article-title><source>US Centers for Disease Control and Prevention</source><access-date>2024-07-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/nchs/nhanes/index.htm">https://www.cdc.gov/nchs/nhanes/index.htm</ext-link></comment></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guenther</surname><given-names>PM</given-names> </name><name name-style="western"><surname>Kirkpatrick</surname><given-names>SI</given-names> </name><name name-style="western"><surname>Reedy</surname><given-names>J</given-names> </name><etal/></person-group><article-title>The Healthy Eating Index-2010 is a valid and reliable measure of diet quality according to the 2010 Dietary Guidelines for Americans</article-title><source>J Nutr</source><year>2014</year><month>03</month><volume>144</volume><issue>3</issue><fpage>399</fpage><lpage>407</lpage><pub-id pub-id-type="doi">10.3945/jn.113.183079</pub-id><pub-id pub-id-type="medline">24453128</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="web"><article-title>United States cancer cases and death statistics at a glance</article-title><source>U.S. Centers for Disease Control and Prevention</source><access-date>2024-07-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://gis.cdc.gov/Cancer/USCS/#/Survival/">https://gis.cdc.gov/Cancer/USCS/#/Survival/</ext-link></comment></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="web"><article-title>Cancer stat facts: common cancer sites</article-title><source>National Cancer Institute Surveillance, Epidemiology, and End Results Program</source><access-date>2024-07-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://seer.cancer.gov/statfacts/html/common.html">https://seer.cancer.gov/statfacts/html/common.html</ext-link></comment></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="web"><article-title>Cardiovascular diseases (CVDs)</article-title><source>World Health Organization</source><year>2021</year><access-date>2024-07-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/news-room/fact-sheets/detail/cardiovascular-diseases-(cvds)">https://www.who.int/news-room/fact-sheets/detail/cardiovascular-diseases-(cvds)</ext-link></comment></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>PL</given-names> </name></person-group><article-title>A comprehensive definition for metabolic syndrome</article-title><source>Dis Model Mech</source><year>2009</year><volume>2</volume><issue>5-6</issue><fpage>231</fpage><lpage>237</lpage><pub-id pub-id-type="doi">10.1242/dmm.001180</pub-id><pub-id pub-id-type="medline">19407331</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Saeed</surname><given-names>M</given-names> </name><name name-style="western"><surname>Javed</surname><given-names>K</given-names> </name><name name-style="western"><surname>Atique Babri</surname><given-names>H</given-names> </name></person-group><article-title>Machine learning using Bernoulli mixture models: clustering, rule extraction and dimensionality reduction</article-title><source>Neurocomputing</source><year>2013</year><month>11</month><volume>119</volume><fpage>366</fpage><lpage>374</lpage><pub-id pub-id-type="doi">10.1016/j.neucom.2013.03.021</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Malsiner-Walli</surname><given-names>G</given-names> </name><name name-style="western"><surname>Fr&#x00FC;hwirth-Schnatter</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gr&#x00FC;n</surname><given-names>B</given-names> </name></person-group><article-title>Model-based clustering based on sparse finite Gaussian mixtures</article-title><source>Stat Comput</source><year>2016</year><volume>26</volume><issue>1</issue><fpage>303</fpage><lpage>324</lpage><pub-id pub-id-type="doi">10.1007/s11222-014-9500-2</pub-id><pub-id pub-id-type="medline">26900266</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nadif</surname><given-names>M</given-names> </name><name name-style="western"><surname>Govaert</surname><given-names>G</given-names> </name></person-group><article-title>Clustering for binary data and mixture models&#x2014;choice of the model</article-title><source>Appl Stoch Models Bus Ind</source><year>1997</year><volume>13</volume><issue>3-4</issue><fpage>269</fpage><lpage>278</lpage><pub-id pub-id-type="doi">10.1002/(SICI)1099-0747(199709/12)13:3/4&#x003C;269::AID-ASM321&#x003E;3.0.CO;2-7</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fahad</surname><given-names>A</given-names> </name><name name-style="western"><surname>Alshatri</surname><given-names>N</given-names> </name><name name-style="western"><surname>Tari</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>A survey of clustering algorithms for big data: taxonomy and empirical analysis</article-title><source>IEEE Trans Emerg Topics Comput</source><year>2014</year><volume>2</volume><issue>3</issue><fpage>267</fpage><lpage>279</lpage><pub-id pub-id-type="doi">10.1109/TETC.2014.2330519</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>Z</given-names> </name></person-group><article-title>Extensions to the k-means algorithm for clustering large data sets with categorical values</article-title><source>Data Min Knowl Discov</source><year>1998</year><volume>2</volume><issue>3</issue><fpage>283</fpage><lpage>304</lpage><pub-id pub-id-type="doi">10.1023/A:1009769707641</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Reynolds</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Richards</surname><given-names>G</given-names> </name><name name-style="western"><surname>Rayward-Smith</surname><given-names>VJ</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Yang</surname><given-names>ZR</given-names> </name><name name-style="western"><surname>Yin</surname><given-names>HJ</given-names> </name><name name-style="western"><surname>Everson</surname><given-names>RM</given-names> </name></person-group><article-title>The application of k-medoids and pam to the clustering of rules</article-title><source>Intelligent Data Engineering and Automated Learning &#x2013; IDEAL 2004</source><year>2004</year><publisher-name>Springer</publisher-name><fpage>173</fpage><lpage>178</lpage><pub-id pub-id-type="doi">10.1007/978-3-540-28651-6_25</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fei</surname><given-names>H</given-names> </name><name name-style="western"><surname>Meskens</surname><given-names>N</given-names> </name></person-group><article-title>Clustering of patients&#x2019; trajectories with an auto-stopped Bisecting K-Medoids algorithm</article-title><source>J Math Model Algor</source><year>2013</year><month>06</month><volume>12</volume><issue>2</issue><fpage>135</fpage><lpage>154</lpage><pub-id pub-id-type="doi">10.1007/s10852-012-9198-0</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Papachristou</surname><given-names>N</given-names> </name><name name-style="western"><surname>Barnaghi</surname><given-names>P</given-names> </name><name name-style="western"><surname>Cooper</surname><given-names>BA</given-names> </name><etal/></person-group><article-title>Congruence between latent class and K-modes analyses in the identification of oncology patients with distinct symptom experiences</article-title><source>J Pain Symptom Manage</source><year>2018</year><month>02</month><volume>55</volume><issue>2</issue><fpage>318</fpage><lpage>333</lpage><pub-id pub-id-type="doi">10.1016/j.jpainsymman.2017.08.020</pub-id><pub-id pub-id-type="medline">28859882</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Coombes</surname><given-names>CE</given-names> </name><name name-style="western"><surname>Abrams</surname><given-names>ZB</given-names> </name><name name-style="western"><surname>Li</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Unsupervised machine learning and prognostic factors of survival in chronic lymphocytic leukemia</article-title><source>J Am Med Inform Assoc</source><year>2020</year><month>07</month><day>1</day><volume>27</volume><issue>7</issue><fpage>1019</fpage><lpage>1027</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocaa060</pub-id><pub-id pub-id-type="medline">32483590</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Martens</surname><given-names>P</given-names> </name><name name-style="western"><surname>Augusto</surname><given-names>SN</given-names> </name><name name-style="western"><surname>Finet</surname><given-names>JE</given-names> </name><etal/></person-group><article-title>Distinct impact of noncardiac comorbidities on exercise capacity and functional status in chronic heart failure</article-title><source>JACC Heart Fail</source><year>2023</year><month>10</month><volume>11</volume><issue>10</issue><fpage>1365</fpage><lpage>1376</lpage><pub-id pub-id-type="doi">10.1016/j.jchf.2023.05.018</pub-id><pub-id pub-id-type="medline">37389503</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jasinska-Piadlo</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bond</surname><given-names>R</given-names> </name><name name-style="western"><surname>Biglarbeigi</surname><given-names>P</given-names> </name></person-group><article-title>Data-driven versus a domain-led approach to k-means clustering on an open heart failure dataset</article-title><source>Int J Data Sci Anal</source><year>2023</year><month>01</month><volume>15</volume><issue>1</issue><fpage>49</fpage><lpage>66</lpage><pub-id pub-id-type="doi">10.1007/s41060-022-00346-9</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Peleg</surname><given-names>M</given-names> </name><name name-style="western"><surname>Asbeh</surname><given-names>N</given-names> </name><name name-style="western"><surname>Kuflik</surname><given-names>T</given-names> </name><name name-style="western"><surname>Schertz</surname><given-names>M</given-names> </name></person-group><article-title>Onto-clust--a methodology for combining clustering analysis and ontological methods for identifying groups of comorbidities for developmental disorders</article-title><source>J Biomed Inform</source><year>2009</year><month>02</month><volume>42</volume><issue>1</issue><fpage>165</fpage><lpage>175</lpage><pub-id pub-id-type="doi">10.1016/j.jbi.2008.05.010</pub-id><pub-id pub-id-type="medline">18590984</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tarekegn</surname><given-names>AN</given-names> </name><name name-style="western"><surname>Michalak</surname><given-names>K</given-names> </name><name name-style="western"><surname>Giacobini</surname><given-names>M</given-names> </name></person-group><article-title>Cross-validation approach to evaluate clustering algorithms: an experimental study using multi-label datasets</article-title><source>SN COMPUT SCI</source><year>2020</year><month>09</month><volume>1</volume><issue>5</issue><fpage>1</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1007/s42979-020-00283-z</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Hassan</surname><given-names>IH</given-names> </name><name name-style="western"><surname>Abdullahi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ali</surname><given-names>Y</given-names> </name></person-group><article-title>Analysis of techniques for selecting appropriate number of clusters in k-means clustering algorithm</article-title><conf-name>International Conference on Computing and Advances in Information Technology (ICCAIT)</conf-name><conf-date>Nov 15-17, 2021</conf-date><conf-loc>Zaria, Nigeria</conf-loc><fpage>90</fpage><lpage>96</lpage></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="web"><article-title>2019 public-use linked mortality files</article-title><source>National Center for Health Statistics</source><year>2019</year><access-date>2024-07-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/nchs/data-linkage/mortality-public.htm">https://www.cdc.gov/nchs/data-linkage/mortality-public.htm</ext-link></comment></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Z</given-names> </name></person-group><article-title>Multiple imputation with multivariate imputation by chained equation (MICE) package</article-title><source>Ann Transl Med</source><year>2016</year><volume>4</volume><issue>2</issue><pub-id pub-id-type="doi">10.3978/j.issn.2305-5839.2015.12.63</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="web"><article-title>Health expenditure in Hong Kong</article-title><source>Hong Kong Legislative Council Secretariat Research Office</source><year>2016</year><access-date>2024-07-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.legco.gov.hk/research-publications/english/1617issh03-health-expenditure-in-hong-kong-20161024-e.pdf">https://www.legco.gov.hk/research-publications/english/1617issh03-health-expenditure-in-hong-kong-20161024-e.pdf</ext-link></comment></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wong</surname><given-names>GLH</given-names> </name><name name-style="western"><surname>Hui</surname><given-names>VWK</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>Q</given-names> </name><etal/></person-group><article-title>Novel machine learning models outperform risk scores in predicting hepatocellular carcinoma in patients with chronic viral hepatitis</article-title><source>JHEP Rep</source><year>2022</year><month>03</month><volume>4</volume><issue>3</issue><fpage>100441</fpage><pub-id pub-id-type="doi">10.1016/j.jhepr.2022.100441</pub-id><pub-id pub-id-type="medline">35198928</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="web"><article-title>Data Collaboration Lab</article-title><source>Hospital Authority</source><year>2021</year><access-date>2024-07-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www3.ha.org.hk/data/DCL/SelfService">https://www3.ha.org.hk/data/DCL/SelfService</ext-link></comment></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wong</surname><given-names>MCS</given-names> </name><name name-style="western"><surname>Leung</surname><given-names>EYM</given-names> </name><name name-style="western"><surname>Chun</surname><given-names>SCC</given-names> </name><etal/></person-group><article-title>Risk of recurrent advanced colorectal neoplasia in individuals with baseline non-advanced neoplasia followed up at 5 vs 7-10 years</article-title><source>J Gastroenterol Hepatol</source><year>2023</year><month>12</month><volume>38</volume><issue>12</issue><fpage>2122</fpage><lpage>2129</lpage><pub-id pub-id-type="doi">10.1111/jgh.16367</pub-id><pub-id pub-id-type="medline">37771047</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lam</surname><given-names>CS</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>CP</given-names> </name><name name-style="western"><surname>Chan</surname><given-names>JWY</given-names> </name><name name-style="western"><surname>Cheung</surname><given-names>YT</given-names> </name></person-group><article-title>Patterns and factors associated with the prescription of psychotropic medications after diagnosis of cancer in Chinese patients: a population-based cohort study</article-title><source>Pharmacoepidemiol Drug Saf</source><year>2024</year><month>02</month><volume>33</volume><issue>2</issue><fpage>e5754</fpage><pub-id pub-id-type="doi">10.1002/pds.5754</pub-id><pub-id pub-id-type="medline">38362653</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="web"><article-title>10 most common cancers in Hong Kong in 2022</article-title><source>Hong Kong Cancer Registry, Hospital Authority</source><access-date>2024-07-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www3.ha.org.hk/cancereg/default.asp">https://www3.ha.org.hk/cancereg/default.asp</ext-link></comment></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ramspek</surname><given-names>CL</given-names> </name><name name-style="western"><surname>Jager</surname><given-names>KJ</given-names> </name><name name-style="western"><surname>Dekker</surname><given-names>FW</given-names> </name><etal/></person-group><article-title>External validation of prognostic models: what, why, how, when and where?</article-title><source>Clin Kidney J</source><year>2021</year><month>01</month><volume>14</volume><issue>1</issue><fpage>49</fpage><lpage>58</lpage><pub-id pub-id-type="doi">10.1093/ckj/sfaa188</pub-id><pub-id pub-id-type="medline">33564405</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kahkoska</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Geybels</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Klein</surname><given-names>KR</given-names> </name><etal/></person-group><article-title>Validation of distinct type 2 diabetes clusters and their association with diabetes complications in the DEVOTE, LEADER and SUSTAIN-6 cardiovascular outcomes trials</article-title><source>Diabetes Obes Metab</source><year>2020</year><month>09</month><volume>22</volume><issue>9</issue><fpage>1537</fpage><lpage>1547</lpage><pub-id pub-id-type="doi">10.1111/dom.14063</pub-id><pub-id pub-id-type="medline">32314525</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ullmann</surname><given-names>T</given-names> </name><name name-style="western"><surname>Hennig</surname><given-names>C</given-names> </name><name name-style="western"><surname>Boulesteix</surname><given-names>AL</given-names> </name></person-group><article-title>Validation of cluster analysis results on validation data: a systematic framework</article-title><source>WIREs Data Min Knowl</source><year>2022</year><month>05</month><volume>12</volume><issue>3</issue><fpage>e1444</fpage><pub-id pub-id-type="doi">10.1002/widm.1444</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vermunt</surname><given-names>JK</given-names> </name></person-group><article-title>K-means may perform as well as mixture model clustering but may also be much worse: comment on Steinley and Brusco (2011)</article-title><source>Psychol Methods</source><year>2011</year><month>03</month><volume>16</volume><issue>1</issue><fpage>82</fpage><lpage>88</lpage><pub-id pub-id-type="doi">10.1037/a0020144</pub-id><pub-id pub-id-type="medline">21381819</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Patel</surname><given-names>E</given-names> </name><name name-style="western"><surname>Kushwaha</surname><given-names>DS</given-names> </name></person-group><article-title>Clustering Cloud workloads: K-Means vs Gaussian Mixture Model</article-title><source>Procedia Comput Sci</source><year>2020</year><volume>171</volume><fpage>158</fpage><lpage>167</lpage><pub-id pub-id-type="doi">10.1016/j.procs.2020.04.017</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Reddy</surname><given-names>BGO</given-names> </name></person-group><article-title>Literature survey on clustering techniques</article-title><source>IOSRJCE</source><year>2012</year><volume>3</volume><issue>1</issue><fpage>01</fpage><lpage>12</lpage><pub-id pub-id-type="doi">10.9790/0661-0310112</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Najafi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Motahari</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Rabiee</surname><given-names>HR</given-names> </name></person-group><article-title>Reliable clustering of Bernoulli mixture models</article-title><source>Bernoulli (Andover)</source><year>2020</year><volume>26</volume><issue>2</issue><fpage>1535</fpage><lpage>1559</lpage><pub-id pub-id-type="doi">10.3150/19-BEJ1173</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>KH</given-names> </name><name name-style="western"><surname>Beam</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Kohane</surname><given-names>IS</given-names> </name></person-group><article-title>Artificial intelligence in healthcare</article-title><source>Nat Biomed Eng</source><year>2018</year><month>10</month><volume>2</volume><issue>10</issue><fpage>719</fpage><lpage>731</lpage><pub-id pub-id-type="doi">10.1038/s41551-018-0305-z</pub-id><pub-id pub-id-type="medline">31015651</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Oikonomou</surname><given-names>EK</given-names> </name><name name-style="western"><surname>Khera</surname><given-names>R</given-names> </name></person-group><article-title>Machine learning in precision diabetes care and cardiovascular risk prediction</article-title><source>Cardiovasc Diabetol</source><year>2023</year><month>09</month><day>25</day><volume>22</volume><issue>1</issue><fpage>259</fpage><pub-id pub-id-type="doi">10.1186/s12933-023-01985-3</pub-id><pub-id pub-id-type="medline">37749579</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aramburu</surname><given-names>A</given-names> </name><name name-style="western"><surname>Arostegui</surname><given-names>I</given-names> </name><name name-style="western"><surname>Moraza</surname><given-names>J</given-names> </name><etal/></person-group><article-title>COPD classification models and mortality prediction capacity</article-title><source>Int J Chron Obstruct Pulmon Dis</source><year>2019</year><volume>14</volume><fpage>605</fpage><lpage>613</lpage><pub-id pub-id-type="doi">10.2147/COPD.S184695</pub-id><pub-id pub-id-type="medline">30880950</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jansana</surname><given-names>A</given-names> </name><name name-style="western"><surname>Poblador-Plou</surname><given-names>B</given-names> </name><name name-style="western"><surname>Gimeno-Miguel</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Multimorbidity clusters among long-term breast cancer survivors in Spain: results of the SURBCAN study</article-title><source>Int J Cancer</source><year>2021</year><month>11</month><day>15</day><volume>149</volume><issue>10</issue><fpage>1755</fpage><lpage>1767</lpage><pub-id pub-id-type="doi">10.1002/ijc.33736</pub-id><pub-id pub-id-type="medline">34255861</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gould</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Munoz-Plaza</surname><given-names>CE</given-names> </name><name name-style="western"><surname>Hahn</surname><given-names>EE</given-names> </name><etal/></person-group><article-title>Comorbidity profiles and their effect on treatment selection and survival among patients with lung cancer</article-title><source>Ann Am Thorac Soc</source><year>2017</year><month>10</month><volume>14</volume><issue>10</issue><fpage>1571</fpage><lpage>1580</lpage><pub-id pub-id-type="doi">10.1513/AnnalsATS.201701-030OC</pub-id><pub-id pub-id-type="medline">28541748</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Read</surname><given-names>WL</given-names> </name><name name-style="western"><surname>Tierney</surname><given-names>RM</given-names> </name><name name-style="western"><surname>Page</surname><given-names>NC</given-names> </name><etal/></person-group><article-title>Differential prognostic impact of comorbidity</article-title><source>J Clin Oncol</source><year>2004</year><month>08</month><day>1</day><volume>22</volume><issue>15</issue><fpage>3099</fpage><lpage>3103</lpage><pub-id pub-id-type="doi">10.1200/JCO.2004.08.040</pub-id><pub-id pub-id-type="medline">15284260</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kc</surname><given-names>M</given-names> </name><name name-style="western"><surname>Fan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hyslop</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Relative burden of cancer and noncancer mortality among long-term survivors of breast, prostate, and colorectal cancer in the US</article-title><source>JAMA Netw Open</source><year>2023</year><month>07</month><day>3</day><volume>6</volume><issue>7</issue><fpage>e2323115</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.23115</pub-id><pub-id pub-id-type="medline">37436746</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zaorsky</surname><given-names>NG</given-names> </name><name name-style="western"><surname>Churilla</surname><given-names>TM</given-names> </name><name name-style="western"><surname>Egleston</surname><given-names>BL</given-names> </name><etal/></person-group><article-title>Causes of death among cancer patients</article-title><source>Ann Oncol</source><year>2017</year><month>02</month><day>1</day><volume>28</volume><issue>2</issue><fpage>400</fpage><lpage>407</lpage><pub-id pub-id-type="doi">10.1093/annonc/mdw604</pub-id><pub-id pub-id-type="medline">27831506</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Leger</surname><given-names>P</given-names> </name><name name-style="western"><surname>Limper</surname><given-names>AH</given-names> </name><name name-style="western"><surname>Maldonado</surname><given-names>F</given-names> </name></person-group><article-title>Pulmonary toxicities from conventional chemotherapy</article-title><source>Clin Chest Med</source><year>2017</year><month>06</month><volume>38</volume><issue>2</issue><fpage>209</fpage><lpage>222</lpage><pub-id pub-id-type="doi">10.1016/j.ccm.2017.01.002</pub-id><pub-id pub-id-type="medline">28477634</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Meattini</surname><given-names>I</given-names> </name><name name-style="western"><surname>Guenzi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Fozza</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Overview on cardiac, pulmonary and cutaneous toxicity in patients treated with adjuvant radiotherapy for breast cancer</article-title><source>Breast Cancer (Auckl)</source><year>2017</year><month>01</month><volume>24</volume><issue>1</issue><fpage>52</fpage><lpage>62</lpage><pub-id pub-id-type="doi">10.1007/s12282-016-0694-3</pub-id><pub-id pub-id-type="medline">27025498</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kennedy</surname><given-names>LB</given-names> </name><name name-style="western"><surname>Salama</surname><given-names>AKS</given-names> </name></person-group><article-title>A review of cancer immunotherapy toxicity</article-title><source>CA Cancer J Clin</source><year>2020</year><month>03</month><volume>70</volume><issue>2</issue><fpage>86</fpage><lpage>104</lpage><pub-id pub-id-type="doi">10.3322/caac.21596</pub-id><pub-id pub-id-type="medline">31944278</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Haas</surname><given-names>EC</given-names> </name><name name-style="western"><surname>Oosting</surname><given-names>SF</given-names> </name><name name-style="western"><surname>Lefrandt</surname><given-names>JD</given-names> </name><etal/></person-group><article-title>The metabolic syndrome in cancer survivors</article-title><source>Lancet Oncol</source><year>2010</year><month>02</month><volume>11</volume><issue>2</issue><fpage>193</fpage><lpage>203</lpage><pub-id pub-id-type="doi">10.1016/S1470-2045(09)70287-6</pub-id><pub-id pub-id-type="medline">20152771</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bellastella</surname><given-names>G</given-names> </name><name name-style="western"><surname>Scappaticcio</surname><given-names>L</given-names> </name><name name-style="western"><surname>Esposito</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Metabolic syndrome and cancer: &#x201C;The common soil hypothesis&#x201D;</article-title><source>Diabetes Res Clin Pract</source><year>2018</year><month>09</month><volume>143</volume><fpage>389</fpage><lpage>397</lpage><pub-id pub-id-type="doi">10.1016/j.diabres.2018.05.024</pub-id><pub-id pub-id-type="medline">29807099</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schwedhelm</surname><given-names>C</given-names> </name><name name-style="western"><surname>Boeing</surname><given-names>H</given-names> </name><name name-style="western"><surname>Hoffmann</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Effect of diet on mortality and cancer recurrence among cancer survivors: a systematic review and meta-analysis of cohort studies</article-title><source>Nutr Rev</source><year>2016</year><month>12</month><volume>74</volume><issue>12</issue><fpage>737</fpage><lpage>748</lpage><pub-id pub-id-type="doi">10.1093/nutrit/nuw045</pub-id><pub-id pub-id-type="medline">27864535</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ricci</surname><given-names>C</given-names> </name><name name-style="western"><surname>Freisling</surname><given-names>H</given-names> </name><name name-style="western"><surname>Leitzmann</surname><given-names>MF</given-names> </name><etal/></person-group><article-title>Diet and sedentary behaviour in relation to cancer survival. A report from the national health and nutrition examination survey linked to the U.S. mortality registry</article-title><source>Clin Nutr</source><year>2020</year><month>11</month><volume>39</volume><issue>11</issue><fpage>3489</fpage><lpage>3496</lpage><pub-id pub-id-type="doi">10.1016/j.clnu.2020.03.013</pub-id><pub-id pub-id-type="medline">32229168</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>H</given-names> </name><name name-style="western"><surname>Carrillo-Larco</surname><given-names>RM</given-names> </name><etal/></person-group><article-title>Association of dietary patterns and food groups intake with multimorbidity: a prospective cohort study</article-title><source>Clin Nutr ESPEN</source><year>2022</year><month>10</month><volume>51</volume><fpage>359</fpage><lpage>366</lpage><pub-id pub-id-type="doi">10.1016/j.clnesp.2022.07.019</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Singh</surname><given-names>GK</given-names> </name><name name-style="western"><surname>Jemal</surname><given-names>A</given-names> </name></person-group><article-title>Socioeconomic and racial/ethnic disparities in cancer mortality, incidence, and survival in the United States, 1950-2014: over six decades of changing patterns and widening inequalities</article-title><source>J Environ Public Health</source><year>2017</year><volume>2017</volume><fpage>2819372</fpage><pub-id pub-id-type="doi">10.1155/2017/2819372</pub-id><pub-id pub-id-type="medline">28408935</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Quaglia</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lillini</surname><given-names>R</given-names> </name><name name-style="western"><surname>Mamo</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Socio-economic inequalities: a review of methodological issues and the relationships with cancer survival</article-title><source>Crit Rev Oncol Hematol</source><year>2013</year><month>03</month><volume>85</volume><issue>3</issue><fpage>266</fpage><lpage>277</lpage><pub-id pub-id-type="doi">10.1016/j.critrevonc.2012.08.007</pub-id><pub-id pub-id-type="medline">22999326</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nejatinamini</surname><given-names>S</given-names> </name><name name-style="western"><surname>Godley</surname><given-names>J</given-names> </name><name name-style="western"><surname>Minaker</surname><given-names>LM</given-names> </name><etal/></person-group><article-title>Quantifying the contribution of modifiable risk factors to socio-economic inequities in cancer morbidity and mortality: a nationally representative population-based cohort study</article-title><source>Int J Epidemiol</source><year>2021</year><month>11</month><day>10</day><volume>50</volume><issue>5</issue><fpage>1498</fpage><lpage>1511</lpage><pub-id pub-id-type="doi">10.1093/ije/dyab067</pub-id><pub-id pub-id-type="medline">33846746</pub-id></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Afshar</surname><given-names>N</given-names> </name><name name-style="western"><surname>English</surname><given-names>DR</given-names> </name><name name-style="western"><surname>Milne</surname><given-names>RL</given-names> </name></person-group><article-title>Factors explaining socio-economic inequalities in cancer survival: a systematic review</article-title><source>Cancer Control</source><year>2021</year><volume>28</volume><fpage>10732748211011956</fpage><pub-id pub-id-type="doi">10.1177/10732748211011956</pub-id><pub-id pub-id-type="medline">33929888</pub-id></nlm-citation></ref><ref id="ref70"><label>70</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Okura</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Urban</surname><given-names>LH</given-names> </name><name name-style="western"><surname>Mahoney</surname><given-names>DW</given-names> </name><etal/></person-group><article-title>Agreement between self-report questionnaires and medical record data was substantial for diabetes, hypertension, myocardial infarction and stroke but not for heart failure</article-title><source>J Clin Epidemiol</source><year>2004</year><month>10</month><volume>57</volume><issue>10</issue><fpage>1096</fpage><lpage>1103</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2004.04.005</pub-id><pub-id pub-id-type="medline">15528061</pub-id></nlm-citation></ref><ref id="ref71"><label>71</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Muggah</surname><given-names>E</given-names> </name><name name-style="western"><surname>Graves</surname><given-names>E</given-names> </name><name name-style="western"><surname>Bennett</surname><given-names>C</given-names> </name><name name-style="western"><surname>Manuel</surname><given-names>DG</given-names> </name></person-group><article-title>Ascertainment of chronic diseases using population health data: a comparison of health administrative data and patient self-report</article-title><source>BMC Public Health</source><year>2013</year><month>01</month><day>9</day><volume>13</volume><fpage>1</fpage><lpage>8</lpage><pub-id pub-id-type="doi">10.1186/1471-2458-13-16</pub-id><pub-id pub-id-type="medline">23302258</pub-id></nlm-citation></ref><ref id="ref72"><label>72</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Janssen-Heijnen</surname><given-names>MLG</given-names> </name><name name-style="western"><surname>Houterman</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lemmens</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Prognosis for long-term survivors of cancer</article-title><source>Ann Oncol</source><year>2007</year><month>08</month><volume>18</volume><issue>8</issue><fpage>1408</fpage><lpage>1413</lpage><pub-id pub-id-type="doi">10.1093/annonc/mdm127</pub-id><pub-id pub-id-type="medline">17693654</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Additional file.</p><media xlink:href="cancer_v11i1e71937_app1.docx" xlink:title="DOCX File, 615 KB"/></supplementary-material></app-group></back></article>