<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Cancer</journal-id><journal-id journal-id-type="publisher-id">cancer</journal-id><journal-id journal-id-type="index">21</journal-id><journal-title>JMIR Cancer</journal-title><abbrev-journal-title>JMIR Cancer</abbrev-journal-title><issn pub-type="epub">2369-1999</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v11i1e64000</article-id><article-id pub-id-type="doi">10.2196/64000</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Leveraging Digital Twins for Stratification of Patients with Breast Cancer and Treatment Optimization in Geriatric Oncology: Multivariate Clustering Analysis</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Heudel</surname><given-names>Pierre</given-names></name><degrees>MSc, LLM, MD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Ahmed</surname><given-names>Mashal</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Renard</surname><given-names>Felix</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Attye</surname><given-names>Arnaud</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Medical Oncology, Centre Leon B&#x00E9;rard</institution><addr-line>28 rue Laennec</addr-line><addr-line>Lyon</addr-line><country>France</country></aff><aff id="aff2"><institution>GEODAISICS</institution><addr-line>Grenoble</addr-line><country>France</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Cahill</surname><given-names>Naomi</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Chow</surname><given-names>James C L</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Ping</surname><given-names>Zhu</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Pierre Heudel, MSc, LLM, MD, Department of Medical Oncology, Centre Leon B&#x00E9;rard, 28 rue Laennec, Lyon, 69008, France, 33 478782952; <email>pierreetienne.heudel@lyon.unicancer.fr</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>all authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>23</day><month>5</month><year>2025</year></pub-date><volume>11</volume><elocation-id>e64000</elocation-id><history><date date-type="received"><day>05</day><month>07</month><year>2024</year></date><date date-type="rev-recd"><day>22</day><month>01</month><year>2025</year></date><date date-type="accepted"><day>24</day><month>02</month><year>2025</year></date></history><copyright-statement>&#x00A9; Pierre Heudel, Mashal Ahmed, Felix Renard, Arnaud Attye. Originally published in JMIR Cancer (<ext-link ext-link-type="uri" xlink:href="https://cancer.jmir.org">https://cancer.jmir.org</ext-link>), 23.5.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Cancer, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://cancer.jmir.org/">https://cancer.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://cancer.jmir.org/2025/1/e64000"/><abstract><sec><title>Background</title><p>Defining optimal adjuvant therapeutic strategies for older adult patients with breast cancer remains a challenge, given that this population is often overlooked and underserved in clinical research and decision-making tools.</p></sec><sec><title>Objectives</title><p>This study aimed to develop a prognostic and treatment guidance tool tailored to older adult patients using artificial intelligence (AI) and a combination of clinical and biological features.</p></sec><sec sec-type="methods"><title>Methods</title><p>A retrospective analysis was conducted on data from women aged 70+ years with HER2-negative early-stage breast cancer treated at the French L&#x00E9;on B&#x00E9;rard Cancer Center between 1997 and 2016. Manifold learning and machine learning algorithms were applied to uncover complex data relationships and develop predictive models. Predictors included age, BMI, comorbidities, hemoglobin levels, lymphocyte counts, hormone receptor status, Scarff-Bloom-Richardson grade, tumor size, and lymph node involvement. The dimension reduction technique PaCMAP was used to map patient profiles into a 3D space, allowing comparison with similar cases to estimate prognoses and potential treatment benefits.</p></sec><sec sec-type="results"><title>Results</title><p>Out of 1229 initial patients, 793 were included after data refinement. The selected predictors demonstrated high predictive efficacy for 5-year mortality, with mean area under the curve scores of 0.81 for Random Forest Classification and 0.76 for Support Vector Classifier. The tool categorized patients into prognostic clusters and enabled the estimation of treatment outcomes, such as chemotherapy benefits. Unlike traditional models that focus on isolated factors, this AI-based approach integrates multiple clinical and biological features to generate a comprehensive biomedical profile.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study introduces a novel AI-driven prognostic tool for older adult patients with breast cancer, enhancing treatment guidance by leveraging advanced machine learning techniques. The model provides a more nuanced understanding of disease dynamics and therapeutic strategies, emphasizing the importance of personalized oncology care.</p></sec></abstract><kwd-group><kwd>digital twins</kwd><kwd>artificial intelligence</kwd><kwd>breast cancer</kwd><kwd>older adult patients with cancer</kwd><kwd>treatment</kwd><kwd>geriatric oncology</kwd><kwd>geriatric</kwd><kwd>oncology</kwd><kwd>cancer</kwd><kwd>clustering analysis</kwd><kwd>therapeutic</kwd><kwd>older adult</kwd><kwd>elder</kwd><kwd>old</kwd><kwd>patients with cancer</kwd><kwd>decision-making tools</kwd><kwd>decision-making</kwd><kwd>manifold learning model</kwd><kwd>chemotherapy</kwd><kwd>comorbidities</kwd><kwd>comorbidity</kwd><kwd>health care</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Breast cancer is more commonly diagnosed in older populations, particularly among women aged 65 years and older in wealthier countries. In the United States, the average age of breast cancer diagnosis is 62 years, and in 2020, women aged 70 years and older accounted for 30% of all new cases of the disease [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. In the European Union, women older than 65 years made up about 44% of all breast cancer cases [<xref ref-type="bibr" rid="ref3">3</xref>]. However, treatment approaches for early-stage breast cancer in these older age groups are often inadequate and unclear, largely due to a lack of solid evidence and the unreliability of web-based tools for making decisions about additional therapies, leading to less than ideal treatment outcomes [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>].</p><p>The treatment plan for breast cancer is tailored based on the cancer&#x2019;s characteristics, the patients&#x2019; overall health status, and their personal preferences. Standard care for early-stage breast cancer usually involves surgery, and may also include radiation, as well as neoadjuvant or adjuvant systemic therapy, used alone or in various combinations. Crafting postsurgical treatment strategies for older patients with breast cancer is complex due to their typically compromised health and the lack of data from clinical trials, since older adults are seldom participants in such studies and are not well represented in meta-analyses that evaluate the effectiveness of adjuvant chemotherapy in reducing breast cancer mortality and improving survival rates [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Consequently, artificial intelligence (AI) has been investigated as a potential tool to support decision-making in the context of limited clinical trial evidence.</p><p>Early uses of AI in cancer treatment guidance involved knowledge-based systems [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. Recently, a broader spectrum of machine learning methods has been examined to aid both clinicians and patients with breast cancer [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref15">15</xref>]. Nonetheless, most decision support tools are designed for patients aged between 18 and 65 years, reflecting the age group most studied, with limited research focusing on treatment outcomes for older patients with breast cancer [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. The prognostic tool PREDICT [<xref ref-type="bibr" rid="ref20">20</xref>], although popular, has shown limited effectiveness for older adult patients [<xref ref-type="bibr" rid="ref21">21</xref>]. Adjutorium [<xref ref-type="bibr" rid="ref22">22</xref>], which uses extensive datasets from the United Kingdom and the United States, provides more precise prognosis and treatment benefit predictions for breast cancer than PREDICT. Despite this, it primarily includes patients aged between 30 and 65 years, with fewer older patients in its datasets, and omits certain vital tumor information such as progesterone receptor (PR) status [<xref ref-type="bibr" rid="ref19">19</xref>]. Another established tool, Adjuvant! Online, predicts 10-year overall survival, breast cancer survival, and recurrence rates, commonly used to inform expected outcomes from endocrine therapy and chemotherapy [<xref ref-type="bibr" rid="ref23">23</xref>]. Its accuracy is questionable for older women with early-stage breast cancer, probably because it was trained on data with a maximum age limit of 69 years [<xref ref-type="bibr" rid="ref24">24</xref>]. In a review by Engelhardt et al [<xref ref-type="bibr" rid="ref25">25</xref>], various models could forecast breast cancer outcomes, typically based on genetic risk scores, but only Adjuvant! Online factored in comorbidity status. Yet, none had been thoroughly validated in older adult populations. The more recent PORTRET tool was designed to predict 5-year recurrence, overall mortality, and mortality from other causes in patients older than 65 years with early invasive breast cancer, as well as to estimate the benefits of adjuvant systemic treatment [<xref ref-type="bibr" rid="ref26">26</xref>]. The tool&#x2019;s authors observed that their treatment effect estimates were based on data from pooled randomized clinical trials, which might not be entirely applicable to older adults due to the typically selective nature of older participants in these trials.</p><p>This study aims to develop models that overcome the shortcomings of past research by using cohorts that accurately reflect the demographic of older patients with breast cancer and by leveraging a detailed dataset that includes administrative, biological, treatment, primary tumor, and survival information. Our latest research uses manifold learning, an advanced tool for nonlinear dimensionality reduction that excels in unraveling complex geometric relationships within high-dimensional data, revealing intricate connections between clinical factors.</p><p>We introduce a new prognostic and predictive tool tailored for older adult patients with breast cancer, providing postoperative treatment recommendations. This tool is distinctive in its consideration of the interdependencies among variables within a patient population. It acknowledges the relative importance of prognostic factors in a way that many existing models do not. Our findings are set to be extremely beneficial for oncologists when determining suitable adjuvant treatment approaches for older adult patients with breast cancer, taking into account the nuances of both tumor-related and patient-specific characteristics.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Recruitment</title><p>In this retrospective study, we examined pseudonymized data from women aged 70 years and older who received a diagnosis of early-stage breast cancer and underwent surgery with the intent to cure (either lumpectomy or mastectomy, with or without axillary lymph node dissection) at the French L&#x00E9;on B&#x00E9;rard Cancer Center from January 1997 to December 2016. The French L&#x00E9;on B&#x00E9;rard Cancer Center is a 300-bed comprehensive cancer center located in Lyon, France, serving more than 30,000 patients annually, with a multidisciplinary team of 2000 health care professionals and a catchment area covering southeast France.</p><p>The inclusion criteria were not limited by the breast cancer&#x2019;s histological or molecular characteristics, the size of the tumor, or the status of the lymph nodes. However, the study did exclude patients who had noninvasive in situ carcinoma without invasive carcinoma, HER2 (human epidermal growth factor receptor 2) positive breast carcinoma, or who presented with distant metastases at the time of surgery. HER2-positive breast cancer cases were excluded because these patients typically receive trastuzumab-based targeted therapies, which dramatically improved their prognosis following its widespread adoption for nonmetastatic breast cancer around 2005. In contrast, chemotherapy protocols for HER2-negative cases remained consistent during the treatment period of the patients included in this study, ensuring uniformity in therapeutic strategies and outcomes across the cohort. The research concentrated on the 5-year survival rates, selecting only those who had at least 5 years of follow-up and whose vital status information was available.</p><p>The database was constructed using ConSore, a data-mining application developed by UNICANCER [<xref ref-type="bibr" rid="ref27">27</xref>]. The ConSore platform extracts data from the electronic health records of the L&#x00E9;on B&#x00E9;rard Cancer Center, integrating patient demographics, clinical variables, and treatment details. To ensure accuracy, each record was also subject to a manual verification process. Data compiled included demographic details and clinical features of patients at diagnosis, alongside comprehensive biological and disease-specific information, and the treatments administered.</p><p>We included the following characteristics for patients diagnosed with early-stage breast cancer: age; Eastern Cooperative Oncology Group performance status; BMI; comorbidities such as diabetes, heart failure, coronary artery disease, chronic obstructive pulmonary disease, and cognitive impairments; history of hospitalizations; and polypharmacy. We also gathered biological indicators at the time of diagnosis, which included hemoglobin levels, lymphocyte counts, and creatinine clearance. We extracted data on disease attributes including histological subtype, hormone receptor status, HER2 status, Scarff-Bloom-Richardson (SBR) grade, tumor count, size of the largest tumor, and the extent of lymph node involvement as per the Tumor,&#x201D; &#x201C;Nodes,&#x201D; &#x201C;Metastases (TNM) classification [<xref ref-type="bibr" rid="ref28">28</xref>]. The statuses of estrogen receptors (ERs), PRs, and HER2 were determined from the histopathological analysis of pretreatment biopsies. Hormone receptor negativity was classified when fewer than 10% of cells were stained for ER and PR. HER2 negativity was assigned when immunohistochemistry staining was below 1+. For tumors scoring 2+, further in situ hybridization tests were conducted to assess HER2 amplification [<xref ref-type="bibr" rid="ref29">29</xref>]. Treatment data collected encompassed the type of surgery performed, lymph node dissection, and adjuvant treatments including radiotherapy, chemotherapy, and endocrine therapy.</p></sec><sec id="s2-2"><title>Outcome, Predictors, and Predictive Power</title><p>Outcome was overall survival in 5 years. Due to the high percentage of missing values for cause of death, cancer-specific survival was not considered. Nine predictors were selected: age, tumor size (mm), tumor grade (defined as either SBR low: 1&#x2010;2; or high: 3), number of affected ganglions, hormone-receptor status (positive if either estrogen or PRs were immunohistochemically present in &#x2265;10% of tumor cells; otherwise, patients were classified as triple negative), serum hemoglobin (g/dL) and lymphocyte count (G/L), BMI, and the presence of comorbidities.</p><p>The initial database, built using ConSore, compiled a range of clinical, biological, and disease-specific data, along with information on administered treatments. We aimed for a predictors representing a mixture of features typically tested before patients undergo treatment plans. Thus, we excluded features regarding treatments as (1) we wanted to gauge prediction accuracies based only on the initial testing of the patient, and (2) the efficacy of treatment strategies was also an outcome of interest in the study. We further excluded features with significant number of missing values so as to limit the loss of usable data. Creatinine was excluded due to its high correlation with patient&#x2019;s age and potential kidney disorders that are not uncommon in the study&#x2019;s demographic. The feature was found to correlate with negative patient outcome, but this was independent of cancer and introduced a bias. Following these steps, 9 predictors were isolated, a list comprising both continuous and categorical variables, as well as an acceptable mixture of relevant biological and clinical features. Random Forest Classification (RFC) and Support Vector Classifier (SVC) were used to evaluate the predictive power of the selected features. We used 5-fold cross-validation to mitigate overfitting and ensure the validity of our results.</p></sec><sec id="s2-3"><title>Model Development and Validation</title><p>Patients in the initial cohort with missing values for any of the 9 predictors were cut from the study. The remaining patients comprised the model development cohort. This was divided into reference and model data.</p><sec id="s2-3-1"><title>Reference Data and Digital Twins</title><p>The reference data inclusion criteria were positive outcome for survival in 5 years and remission without relapse by the last follow-up. The purpose of this group was to calibrate the our patented algorithm, generating digital twins for future test subjects. Digital twins refer to synthetic patient data derived from the reference group specifically similar in profile to a new test subject. The model uses these synthetic profiles to recognize complex variations within the test profile. Thus, digital twins are generated and used in the model to provide recommendations on a new patient but do not themselves constitute the result that a physician would need to interpret.</p></sec><sec id="s2-3-2"><title>Model Data</title><p>The model data, distinct from the reference data to prevent data leaking, are the population that is run through the precalibrated model and scored against the reference group. The data are thus transformed from raw patient data to a numerical and standardized representation of their deviation from the reference group (their digital twins). The purpose of these transformed data is to populate the model with a range of patient profiles that will serve for future prognostic analysis.</p></sec></sec><sec id="s2-4"><title>PaCMAP, Mean-Shift Clustering, and Manifold Visualization</title><p>The transformed model data underwent dimensionality reduction using PaCMAP (Pairwise Controlled Manifold Approximation) [<xref ref-type="bibr" rid="ref30">30</xref>] to generate 3D data referred to as a manifold, permitting easy visualization. The data were then stratified using mean-shift clustering [<xref ref-type="bibr" rid="ref31">31</xref>], a nonparametric, density-based clustering algorithm that can be used to identify clusters in a dataset (<xref ref-type="fig" rid="figure1">Figure 1</xref>). Each cluster represents a local group of similar patients in the 3D space. Clusters represent typical patient profiles in the overall population. The advantage of clustering is that it captures the variability of subjects of a subgroup for easy analysis. A better understanding of the cluster and its variability allows clinicians to assess whether a new test subject aligns well with the cluster and to identify potential differences. When considering a new patient, estimates of prognosis and expected benefits of adjuvant treatment are ascertained by the examination of cluster-specific treatment outcomes pertinent to the patient&#x2019;s clinical profile.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Graphical representation of the 6 clusters of patients in the 3D manifold space. Patients in the reduced 3D space, or manifold, were grouped into clusters by their spatial distribution and profile similarity. Clusters were then colored based on the overall mortality rate of included patients. A newly tested patient is localized on the manifold and represented by a blue sphere.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e64000_fig01.png"/></fig></sec><sec id="s2-5"><title>Prediction of Chemotherapy Benefit</title><p>To estimate the benefit of chemotherapy, the position of a new patient is identified within the 3D manifold. Using the K-nearest neighbors algorithm, the 15 closest chemotherapy-treated patients and the 15 nearest non&#x2013;chemotherapy-treated patients are pinpointed. Kaplan-Meier (KM) survival curves were plotted for each of these patient groups, providing a visual estimation of chemotherapy benefit for a clinical profile.</p></sec><sec id="s2-6"><title>Validation of Treatment Benefit Predictions With Kullback-Leibler Divergence</title><p>To validate that the distributions of the 2 treatment subgroups are comparable, we used the Probability Density Function, which describes the spread of the data points in the 3D space. To measure the difference between these distributions, we applied the symmetrized Kullback-Leibler (KL) divergence, a statistical method that quantifies how much one distribution differs from another. To assess whether the observed difference was meaningful or just due to random chance, we conducted a permutation analysis. This technique works by randomly shuffling the data multiple times to create many new random comparisons; comparing the real result with the random results allows us to determine whether the observed difference between the distributions was statistically significant. If distributions of 2 different treatment groups were found to be similar, they could be compared to provide a prediction of treatment benefit.</p></sec><sec id="s2-7"><title>Model Stability Validation</title><p>The original model data were split into 2 groups: 70% (327) of every cluster was pooled into the training group, and the remaining 30% (139) was pooled into the test group. A new manifold learning process was applied to the training group, and the test group was then projected onto this newly generated manifold. Patients in the test and model groups from the same cluster of origin were compared to evaluate whether data points would exhibit similar distributions (appear in proximity to each other) in the new manifold space across 10 different manifold initializations.</p></sec><sec id="s2-8"><title>Statistical Analysis</title><sec id="s2-8-1"><title>Kullback-Leibler Divergence and Permutation Test</title><p>The symmetrical KL divergence was used to measure the difference between 2 probability distributions. A permutation test was subsequently conducted to assess the significance of the observed KL divergence. This involved calculating the KL divergence for a large number of permutations of the combined datasets and comparing these values with the original KL divergence. The <italic>P</italic> value is calculated as the proportion of permutations where the KL divergence is as extreme as, or more extreme than, the original KL divergence calculated between the actual groups, thus providing a measure of how likely it was to observe a divergence as extreme as the original, under the null hypothesis of no difference between the distributions. Mathematically, this <italic>P</italic> value is the ratio of the number of permuted KL divergences that are equal to the original KL divergence or greater to the total number of permutations. A low <italic>P</italic>-value suggests that the observed difference in distributions is unlikely to have occurred by chance, thus indicating a significant divergence between the 2 groups.</p></sec><sec id="s2-8-2"><title>Survival Analysis using the KM Estimator and Log-Rank Test</title><p>The KM estimator was used to generate survival curves for different treatment subgroups. The log-rank test, a nonparametric test, was applied to compare the survival distributions and a <italic>P</italic> value was calculated to determine the statistical significance of the differences observed between the groups. A low <italic>P</italic> value suggests that the observed survival curves are significantly different. The statistical package used for the analysis is Lifelines 0.30.0 (Lifelines Developers) [<xref ref-type="bibr" rid="ref32">32</xref>].</p></sec></sec><sec id="s2-9"><title>Ethical Considerations</title><p>This retrospective study involving human subjects was reviewed and approved by the French data protection authority, the Commission Nationale de l&#x2019;Informatique et des Libert&#x00E9;s, under authorization number 9191415, dated October 10, 2019. According to institutional and national guidelines, no additional approval from a research ethics board was required, as the data used were previously collected for clinical purposes. No new informed consent was required for this study. The analysis was conducted using data for which participants had provided general consent at the time of data collection. All data were pseudonymized prior to analysis to protect patient confidentiality. No identifiable personal information was retained in the research dataset. No compensation was provided to participants.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Cohort Characteristics</title><p>A total of 1229 patients comprised the initial cohort. Of these, 793 (65%) remained after entries with missing values were removed (<xref ref-type="fig" rid="figure2">Figure 2</xref>). Eliminating the risk of introducing a bias, the initial cohorts&#x2019; demographic and clinical characteristics were found to be strictly similar to that of the final cohort and are summarized in <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Flowchart of data construction.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e64000_fig02.png"/></fig><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Patient characteristics of the initial cohort (N=1229).</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Characteristics</td><td align="left" valign="bottom">Participants</td></tr></thead><tbody><tr><td align="left" valign="top">Age at diagnosis (years), n (%)</td><td align="left" valign="top">&#x2003;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>70&#x2010;74</td><td align="left" valign="top">580 (47)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>75&#x2010;79</td><td align="left" valign="top">331 (27)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>80&#x2010;84</td><td align="left" valign="top">204 (17)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>85&#x2010;89</td><td align="left" valign="top">93 (8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x003E;90</td><td align="left" valign="top">20 (2)</td></tr><tr><td align="left" valign="top">Performance status, n (%)</td><td align="left" valign="top">&#x2003;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>0</td><td align="left" valign="top">339 (28)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1</td><td align="left" valign="top">322 (26)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2</td><td align="left" valign="top">48 (4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3-4</td><td align="left" valign="top">23 (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Missing data</td><td align="left" valign="top">497 (40)</td></tr><tr><td align="left" valign="top">BMI, n (%)</td><td align="left" valign="top">&#x2003;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x003C;18.5</td><td align="left" valign="top">32 (3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>18.5&#x2010;25</td><td align="left" valign="top">446 (36)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>25&#x2010;30</td><td align="left" valign="top">409 (33)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x003E;30</td><td align="left" valign="top">266 (22)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Missing data</td><td align="left" valign="top">76 (6)</td></tr><tr><td align="left" valign="top">Comorbidities, n (%)</td><td align="left" valign="top">&#x2003;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Creatinine clearance &#x003C;40 mL/minute</td><td align="left" valign="top">57 (5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Heart failure</td><td align="left" valign="top">105 (9)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Coronary artery disease</td><td align="left" valign="top">123 (10)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Chronic obstructive pulmonary disease</td><td align="left" valign="top">36 (3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Diabetes</td><td align="left" valign="top">174 (14)</td></tr></tbody></table></table-wrap><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Cancer characteristics of the initial cohort (N=1229).</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="6">Tumor size</td></tr></thead><tbody><tr><td align="left" valign="top">Status</td><td align="left" valign="top">T1</td><td align="left" valign="top">T2</td><td align="left" valign="top">T3</td><td align="left" valign="top">T4</td><td align="left" valign="top">Missing data</td></tr><tr><td align="left" valign="top">Participants, n (%)</td><td align="left" valign="top">567 (46)</td><td align="left" valign="top">286 (23)</td><td align="left" valign="top">36 (3)</td><td align="left" valign="top">250 (20)</td><td align="left" valign="top">90 (7)</td></tr><tr><td align="left" valign="top" colspan="6">Lymph nodes</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Status</td><td align="left" valign="top">N0</td><td align="left" valign="top">N1</td><td align="left" valign="top">N2</td><td align="left" valign="top">N3</td><td align="left" valign="top">Missing data</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Participants, n (%)</td><td align="left" valign="top">614 (50)</td><td align="left" valign="top">243 (20)</td><td align="left" valign="top">55 (4)</td><td align="left" valign="top">55 (4)</td><td align="left" valign="top">262 (21)</td></tr><tr><td align="left" valign="top" colspan="6">Grade SBR<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Status</td><td align="left" valign="top">I</td><td align="left" valign="top" colspan="2">II</td><td align="left" valign="top">III</td><td align="left" valign="top">Missing data</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Participants, n (%)</td><td align="left" valign="top">188 (15)</td><td align="left" valign="top" colspan="2">648 (53)</td><td align="left" valign="top">281 (23)</td><td align="left" valign="top">112 (9)</td></tr><tr><td align="left" valign="top" colspan="6">Estrogen receptor</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Status</td><td align="left" valign="top" colspan="2">Positive</td><td align="left" valign="top" colspan="2">Negative</td><td align="left" valign="top">Missing data</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Participants, n (%)</td><td align="left" valign="top" colspan="2">978 (80)</td><td align="left" valign="top" colspan="2">145 (12)</td><td align="left" valign="top">106 (9)</td></tr><tr><td align="left" valign="top" colspan="6">Progesterone receptor</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Status</td><td align="left" valign="top" colspan="2">Positive</td><td align="left" valign="top" colspan="2">Negative</td><td align="left" valign="top">Missing data</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Participants, n (%)</td><td align="left" valign="top" colspan="2">838 (68)</td><td align="left" valign="top" colspan="2">285 (23)</td><td align="left" valign="top">106 (9)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>SBR: Scarff-Bloom-Richardson.</p></fn></table-wrap-foot></table-wrap><p>Patient demographics and characteristics were evaluated on the date of breast cancer diagnosis (<xref ref-type="table" rid="table1">Table 1</xref>). Median age was 75 years (range: 70&#x2010;100 years), with 317/1229 (26%) patients aged 80 years or older. Performance status was generally good, as most are categorized as 0 or 1. The main comorbidities were diabetes (174/1229 patients, or 14%), followed by coronary artery disease (123/1229 patents, 10%) and cardiac insufficiency (105/1229 patients, 9%).</p><p>The majority presented early-stage tumors (T1 in 567/1229 patients, with a prevalence of 46%), and lymph node involvement was mostly absent (N0 in 614/1229 patients, or 50%). The tumors were typically SBR grade II and 80% (978/1229 patients) were ER-positive. Progesterone receptor positivity was also high at 68% (838/1229 patients). Twelve percent of patients (149/1229) were reported to have received chemotherapy (<xref ref-type="table" rid="table2">Table 2</xref>).</p></sec><sec id="s3-2"><title>Development Cohort</title><p>The final cohort was divided into &#x201C;reference&#x201D; and &#x201C;model&#x201D; cohorts for model development (<xref ref-type="fig" rid="figure2">Figure 2</xref>). A total of 327 patients, that is, 50% of patients meeting the criteria for manifold-estimated derivation training were randomly selected. The purpose of this training group was to calibrate the manifold-estimated derivation&#x2013;scoring algorithm. The model data comprised all remaining patients (466, 59% of the model development cohort).</p></sec><sec id="s3-3"><title>Features Performance and Area Under the Curve Scores</title><p>In <xref ref-type="fig" rid="figure3">Figure 3</xref>, we ascertained the predictive efficacy of the selected variables using RFC and SVC. Analyzing the receiver operating characteristic curves, both models demonstrated commendable predictive capabilities. RFC yielded a mean area under the curve (AUC) of 0.81 (SD 0.06) and a mean accuracy of 0.82 (SD 0.02), while SVC followed closely with a mean AUC of 0.76 (SD 0.05) and a mean accuracy of 0.78 (SD 0.01). The overlapping SDs of these scores suggest that the differences in their performance are not statistically significant.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Receiver operating characteristic curves for 5-year mortality predictive models. The predictive efficacy of the selected features was ascertained using Random Forest Classification and Support Vector Classifier. Results are presented as the mean of ROC and AUC values derived from 5-fold cross-validation. AUC: area under the curve; ROC: receiver operating characteristic; SVC: Support Vector Classifier.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e64000_fig03.png"/></fig><p>The overall relative importance of variables for the prediction of the 5-year outcome was also determined by RFC (<xref ref-type="table" rid="table3">Table 3</xref>). Age, tumor size, and hemoglobin were the top predictors, closely followed by lymphocyte count and BMI. Curiously, the cancer grade, axillary lymph nodes involvement, and the presence of comorbidities ranked low in overall importance. This indicates that although typically taken as important factors from a clinical perspective, comorbidities and cancer grade alone are not the best prognostic features in a patient; rather, a patient&#x2019;s overall biological profile may be more valuable, underscoring the usefulness of manifold learning as a prognostic tool.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Overall importance of predictors according to Random Forest Classification.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variable</td><td align="left" valign="bottom">Importance (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Age</td><td align="left" valign="top">18.33</td></tr><tr><td align="left" valign="top">Tumor size</td><td align="left" valign="top">17.26</td></tr><tr><td align="left" valign="top">Hemoglobin (g/dL)</td><td align="left" valign="top">16.41</td></tr><tr><td align="left" valign="top">Lymphocytes (g/L)</td><td align="left" valign="top">14.84</td></tr><tr><td align="left" valign="top">BMI</td><td align="left" valign="top">13.06</td></tr><tr><td align="left" valign="top">Lymph nodes involvement</td><td align="left" valign="top">10.39</td></tr><tr><td align="left" valign="top">SBR<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> grade</td><td align="left" valign="top">4.06</td></tr><tr><td align="left" valign="top">ER<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> status</td><td align="left" valign="top">2.88</td></tr><tr><td align="left" valign="top">Comorbidities</td><td align="left" valign="top">2.78</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>SBR: Scarff-Bloom-Richardson.</p></fn><fn id="table3fn2"><p><sup>b</sup>ER: estrogen receptor.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-4"><title>Model Stability</title><p>Patients in the test and model groups from the same cluster of origin were compared to evaluate whether data points would exhibit similar distributions (appear in proximity to each other) in the new manifold space across 10 different manifold initializations. The distributions of the test group (n=140) consistently matched closely with those of the model group (n=326), with all <italic>P</italic> values being above the threshold of .05 indicating a lack of significant variation between groups (Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p></sec><sec id="s3-5"><title>Prognostic Ability</title><p>The primary objective of our study is to evaluate the prognostic ability of the manifold learning model, as measured by the 5-year survival rate of our population. The 3D clusters in <xref ref-type="fig" rid="figure1">Figure 1</xref> illuminated the landscape of our dataset, representing local groups of patients characterized by distinct clinical and prognostic profiles. Clusters are colored based on the overall mortality rate of included patients: Groups 0, 1, and 4 in green have the best prognosis with a 5-year survival rate of more than 80% while group 3 has the worst prognosis with a 5-year mortality rate of at least 35%.</p><p><xref ref-type="table" rid="table4">Table 4</xref> further elucidates the variability in values across the patient clusters, especially in BMI, tumor size (in mm), and median age, underscoring the diversity in our cohort.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Characteristics of the 6 clusters defined by manifold learning.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2">Feature</td><td align="left" valign="bottom" colspan="6">Cluster</td></tr><tr><td align="left" valign="bottom">0</td><td align="left" valign="bottom">1</td><td align="left" valign="bottom">2</td><td align="left" valign="bottom">3</td><td align="left" valign="bottom">4</td><td align="left" valign="bottom">5</td></tr></thead><tbody><tr><td align="left" valign="top">Hemoglobin (g/dL)</td><td align="left" valign="top">13.4</td><td align="left" valign="top">13.3</td><td align="left" valign="top">13</td><td align="left" valign="top">13</td><td align="left" valign="top">13.3</td><td align="left" valign="top">11.9</td></tr><tr><td align="left" valign="top">BMI</td><td align="left" valign="top">25</td><td align="left" valign="top">28.4</td><td align="left" valign="top">24.9</td><td align="left" valign="top">28.9</td><td align="left" valign="top">25.6</td><td align="left" valign="top">23</td></tr><tr><td align="left" valign="top">Lymph nodes involved</td><td align="left" valign="top">0.6</td><td align="left" valign="top">0.8</td><td align="left" valign="top">1.6</td><td align="left" valign="top">6.2</td><td align="left" valign="top">1</td><td align="left" valign="top">0.8</td></tr><tr><td align="left" valign="top">Tumor size (mm)</td><td align="left" valign="top">19.6</td><td align="left" valign="top">19.3</td><td align="left" valign="top">26.1</td><td align="left" valign="top">65.8</td><td align="left" valign="top">23.1</td><td align="left" valign="top">30.6</td></tr><tr><td align="left" valign="top">Age (years)</td><td align="left" valign="top">75.8</td><td align="left" valign="top">76.3</td><td align="left" valign="top">77.8</td><td align="left" valign="top">77.4</td><td align="left" valign="top">79.2</td><td align="left" valign="top">80.5</td></tr><tr><td align="left" valign="top">Lymphocytes (g/L)</td><td align="left" valign="top">1.8</td><td align="left" valign="top">2.1</td><td align="left" valign="top">2.1</td><td align="left" valign="top">1.6</td><td align="left" valign="top">3.4</td><td align="left" valign="top">1.6</td></tr><tr><td align="left" valign="top">Comorbidities</td><td align="left" valign="top">0</td><td align="left" valign="top">1</td><td align="left" valign="top">0.4</td><td align="left" valign="top">0.3</td><td align="left" valign="top">0</td><td align="left" valign="top">0.9</td></tr><tr><td align="left" valign="top">Estrogen receptor status</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td><td align="left" valign="top">0.8</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">SBR<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> (high/low)</td><td align="left" valign="top">0</td><td align="left" valign="top">0.1</td><td align="left" valign="top">0.7</td><td align="left" valign="top">0.5</td><td align="left" valign="top">0.8</td><td align="left" valign="top">0.6</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>SBR: Scarff-Bloom-Richardson.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-6"><title>Predictive Ability</title><p>Next, we attempted to ascertain the individual benefit of performing adjuvant chemotherapy, demonstrated in <xref ref-type="fig" rid="figure4">Figure 4</xref> with 3 examples.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Three case examples assessing the individual benefit of adjuvant chemotherapy. (A) The closest chemotherapy-treated and non&#x2013;chemotherapy-treated patients to a new patient are identified in the 3D manifold and their survival curves are compared to show the treatment&#x2019;s potential benefit or lack thereof. (B) The new patient&#x2019;s position in the 3D manifold (black star), with the 15 closest patients of each treatment groups are shown, displaying varying distributions of treatment subgroups. (C) To quantify distances between the subgroups, the real calculated KL divergence between the treatment groups&#x2019; distributions (red line) was compared with that of permutated data (blue histograms) to verify whether observed divergences between treatment subgroups are significant or not. KL: Kullback-Leibler.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e64000_fig04.png"/></fig><p>When a target patient is localized in the 3D manifold, the closest patient profiles are identified. This is done for 2 treatment groups based on whether the patients received chemotherapy (chemo and nonchemo groups), permitting the visualization of KM survival curves that would show the treatment&#x2019;s potential benefit or lack thereof (<xref ref-type="fig" rid="figure4">Figure 4A</xref>).</p><p><xref ref-type="fig" rid="figure4">Figure 4B</xref> shows the target patient&#x2019;s position in the 3D manifold (black star), with the 15 closest patients of each treatment groups also marked. In examples 1 and 2, the 2 treatment groups are found to be well &#x201C;mixed&#x201D; in the local vicinity of the target, indicating that the target profile is well represented by similar chemotherapy-treated and non&#x2013;chemotherapy-treated patients. To quantify distances between the subgroups, we used permutation analysis (<xref ref-type="fig" rid="figure4">Figure 4C</xref>). The real calculated KL divergences between the treatment groups&#x2019; distributions (red line) for examples 1 and 2 fall well within the range of what could be expected by chance (blue histograms) (<italic>P</italic>&#x003E;.1), indicating that the observed divergences are not significant.</p><p>Example 3 showcases a situation where patients from the 2 treatment groups are not well mixed in the local vicinity of the target patient. In this case, the real KL divergence is far right of the histogram (<italic>P</italic>&#x003C;.01), suggesting a significant difference between the distributions. Thus, the KM survival curves and any conclusion drawn from them must be taken with consideration of the heterogeneity in the profiles of the treatment groups being compared.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>From an initial cohort of 1229 patients, we used 793 (65%) to develop a model that clustered patients by their clinical and biological features. These clusters represent a potential prognostic tool for physicians, attributing a risk of mortality in 5 years to patients with consideration to multivariate profiles. The model is further able to indicate the potential benefit or lack thereof of chemotherapy treatment in older adult patients. We found that the predictors used in our model gave a good overall result of 0.81 and 0.76 AUCs with RFC and SVC, respectively.</p><p>In summation, our multifaceted approach, blending manifold learning with classical machine learning paradigms and intuitive data visualizations, has unveiled profound insights into the prognosis determinants of early-stage breast cancer in older adults. These revelations bring a more nuanced understanding of the disease and hold promise for tailoring patient-specific therapeutic strategies. Our study&#x2019;s utilization of manifold learning and advanced machine learning algorithms represents a significant contribution to oncology. The accuracy of 81% in differentiating patient subgroups through manifold learning is impressive, showcasing an advancement beyond traditional linear models [<xref ref-type="bibr" rid="ref33">33</xref>]. This approach is in line with recent trends in personalized medicine [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>], which discuss the potential of machine learning in cancer prognosis. The high AUC values achieved by RFC and SVC reflect the importance of our combined predictors in medical diagnostics, aligning with the findings of recent studies on the application of machine learning in cancer detection [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. The application of data visualization techniques such as heatmaps and 3D scatterplots in elucidating complex clinical relationships is noteworthy. This approach is supported by advancements in data visualization in medical research, as seen in the study by Borkin et al [<xref ref-type="bibr" rid="ref38">38</xref>] on how data visualization supports medical decision-making [<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>].</p></sec><sec id="s4-2"><title>Limitations</title><p>The present results should be interpreted in the light of some limitations. First, the monocentric nature of the research may impact the representativeness of the cohort, potentially affecting the generalizability of our findings. Second, the exclusion of specific patient characteristics, such as the ONCODAGE score [<xref ref-type="bibr" rid="ref41">41</xref>], from our datasets may have limited the comprehensiveness of our prognostic tools. Third, the retrospective design of the study constrains our ability to establish causality between clinical characteristics and patient outcomes. A fourth limitation concerns the fact that patients may present with or have a history of multiple comorbidities. We chose to group together patients with any number of comorbidities for reasons related to (1) the reduction of the sample size for each category of comorbidity, and (2) the potential skewing of patient distribution in the 3D manifold due to multiple related qualitative variables. PaCMAP is susceptible to &#x201C;overseparate&#x201D; the population if provided with too many binary features. These reasons in mind, we nonetheless acknowledge that omitting the consideration of multiple comorbidities is a limitation of the study. Other notable limitations include the absence of cancer-specific or treatment-specific survival metrics, a lack of detailed analysis on specific comorbidities, and the need for more data to enhance the less populated clusters. Furthermore, the external validation of our model remains pending, which is crucial for assessing its generalizability.</p></sec><sec id="s4-3"><title>Future Prospects</title><p>Looking forward, the promising application of manifold learning in oncology, as demonstrated in our study, aligns with the burgeoning field of personalized medicine. The integration of machine learning in personalized cancer therapy, as discussed by Danishuddin et al [<xref ref-type="bibr" rid="ref42">42</xref>], supports the potential of such approaches. The development of advanced AI-driven prognostic tools, particularly for older adult patients who are often underrepresented in clinical trials, could revolutionize treatment guidelines and care approaches. The rapid advancement of machine learning techniques poses a challenge in ensuring the longevity and relevance of models, necessitating continuous updates. This is echoed in the broader context of AI in health care, as discussed in Topol&#x2019;s [<xref ref-type="bibr" rid="ref43">43</xref>] comprehensive review of AI in medicine. Concerns about the adoption of AI tools due to accuracy, explainability, and ethical considerations are also prevalent, as reflected in the exploration of implementing AI in clinical practice by Char et al [<xref ref-type="bibr" rid="ref44">44</xref>]. Our findings may open up avenues for the personalized treatment specifically catered to neglected populations in oncology, starting with geriatric patients with breast cancer. We expect our software to provide rapid guidance to physicians in the process of charting treatment plans for their patients, going beyond simple monovariate statistics and instead considering patients&#x2019; combined clinical and biological profiles.</p></sec><sec id="s4-4"><title>Conclusions</title><p>Our study aimed to further the management of early breast cancer in older adult patients by integrating cutting-edge AI techniques. We proposed a technique that uses patient data to create a visualizable 3D map of pathology profiles that allow rapid prognostic estimations for new patients. These prognostic predictions include the potential benefits of treatment strategies such as chemotherapy, aiding clinical decision-making. It reflects the ongoing evolution in oncology, emphasizing the importance of tailored treatment strategies and highlighting both the potential and the challenges of AI applications in health care. This study also prompts considerations for future research directions and ethical implications in the rapidly evolving field of AI in medicine.</p></sec></sec></body><back><notes><sec><title>Data Availability</title><p>The data that support the findings of this study are not publicly available due to privacy and confidentiality agreements. However, pseudonymized data may be made available upon reasonable request to the corresponding author and following approval by the French Commission Nationale de l&#x2019;Informatique et des Libert&#x00E9;s under the data-sharing agreements of the L&#x00E9;on B&#x00E9;rard Cancer Center.</p></sec></notes><fn-group><fn fn-type="con"><p>FR and PH contributed to conceptualization, methodology, project administration, validation, writing (review and editing). MA contributed to methodology, investigation, data curation, validation, and writing (original draft and editing). AA contributed to conceptualization, supervision, writing (review and editing), and resources.</p></fn><fn fn-type="conflict"><p>The model described in this study was developed by GeodAIsics, who are listed among the authors. PH reports : grants; personal fees; and nonfinancial support from PFIZER, LILLY, DAICHII, and ASTRAZENECA grants, and nonfinancial support from NOVARTIS and ROCHE&#x2014;personal fees and nonfinancial support from SEAGEN, GILEAD, and MSD&#x2014;cofounder and chief medical officer of GEODAISICS. AA is the founder and CEO of GeodAIsics. The remaining authors have no conflicts of interest.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">AUC</term><def><p>area under the curve</p></def></def-item><def-item><term id="abb3">COPD</term><def><p>chronic obstructive pulmonary disease</p></def></def-item><def-item><term id="abb4">ER</term><def><p>estrogen receptor</p></def></def-item><def-item><term id="abb5">HER2</term><def><p>human epidermal growth factor receptor 2</p></def></def-item><def-item><term id="abb6">KL</term><def><p>Kullback-Leibler</p></def></def-item><def-item><term id="abb7">KM</term><def><p>Kaplan-Meier</p></def></def-item><def-item><term id="abb8">PaCMAP</term><def><p>Pairwise Controlled Manifold Approximation</p></def></def-item><def-item><term id="abb9">PR</term><def><p>progesterone receptor</p></def></def-item><def-item><term id="abb10">RFC</term><def><p>Random Forest Classification</p></def></def-item><def-item><term id="abb11">SBR</term><def><p>Scarff-Bloom-Richardson</p></def></def-item><def-item><term id="abb12">SVC</term><def><p>Support Vector Classification</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Howlader</surname><given-names>N</given-names> </name><name name-style="western"><surname>Noone</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Krapcho</surname><given-names>M</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Cronin</surname><given-names>KA</given-names> </name></person-group><article-title>SEER cancer statistics review, 1975-2017</article-title><source>Based on November 2019 SEER data submission, posted to the SEER web site</source><access-date>2025-05-04</access-date><publisher-name>National Cancer Institute</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://seer.cancer.gov/archive/csr/1975_2017/">https://seer.cancer.gov/archive/csr/1975_2017/</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="report"><person-group person-group-type="author"><collab>American Cancer Society</collab></person-group><article-title>Breast cancer facts &#x0026; figures 2019-2020</article-title><access-date>2025-05-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancer.org/content/dam/cancer-org/research/cancer-facts-and-statistics/breast-cancer-facts-and-figures/breast-cancer-facts-and-figures-2019-2020.pdf">https://www.cancer.org/content/dam/cancer-org/research/cancer-facts-and-statistics/breast-cancer-facts-and-figures/breast-cancer-facts-and-figures-2019-2020.pdf</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Ferlay</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ervik</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lam</surname><given-names>F</given-names> </name><etal/></person-group><source>Global Cancer Observatory: Cancer Today</source><year>2020</year><access-date>2025-05-20</access-date><publisher-name>Lyon, France: International Agency for Research on Cancer</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://gco.iarc.fr/today">https://gco.iarc.fr/today</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tesarova</surname><given-names>P</given-names> </name></person-group><article-title>Specific aspects of breast cancer therapy of elderly women</article-title><source>Biomed Res Int</source><year>2016</year><volume>2016</volume><fpage>1381695</fpage><pub-id pub-id-type="doi">10.1155/2016/1381695</pub-id><pub-id pub-id-type="medline">27807536</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Glaser</surname><given-names>R</given-names> </name><name name-style="western"><surname>Marinopoulos</surname><given-names>S</given-names> </name><name name-style="western"><surname>Dimitrakakis</surname><given-names>C</given-names> </name></person-group><article-title>Breast cancer treatment in women over the age of 80: a tailored approach</article-title><source>Maturitas</source><year>2018</year><month>04</month><volume>110</volume><fpage>29</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1016/j.maturitas.2018.01.014</pub-id><pub-id pub-id-type="medline">29563032</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Clarke</surname><given-names>M</given-names> </name><name name-style="western"><surname>Coates</surname><given-names>AS</given-names> </name><etal/></person-group><article-title>Adjuvant chemotherapy in oestrogen-receptor-poor breast cancer: patient-level meta-analysis of randomised trials</article-title><source>Lancet</source><year>2008</year><month>01</month><day>5</day><volume>371</volume><issue>9606</issue><fpage>29</fpage><lpage>40</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(08)60069-0</pub-id><pub-id pub-id-type="medline">18177773</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Peto</surname><given-names>R</given-names> </name><name name-style="western"><surname>Davies</surname><given-names>C</given-names> </name><collab>Early Breast Cancer Trialists&#x2019; Collaborative Group (EBCTCG)</collab><etal/></person-group><article-title>Comparisons between different polychemotherapy regimens for early breast cancer: meta-analyses of long-term outcome among 100,000 women in 123 randomised trials</article-title><source>Lancet</source><year>2012</year><month>02</month><day>4</day><volume>379</volume><issue>9814</issue><fpage>432</fpage><lpage>444</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(11)61625-5</pub-id><pub-id pub-id-type="medline">22152853</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="book"><person-group person-group-type="editor"><name name-style="western"><surname>Buchanan</surname><given-names>BG</given-names> </name><name name-style="western"><surname>Shortlife</surname><given-names>EH</given-names> </name></person-group><article-title>An expert system for oncology protocol management</article-title><source>Rule-Based Expert Systems: The MYCIN Experiments of the Stanford Heuristic Programming Project</source><year>1984</year><publisher-name>Reading:Addison-Wesley</publisher-name><fpage>876</fpage><lpage>881</lpage></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Lieber</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bresson</surname><given-names>B</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Blanzieri</surname><given-names>E</given-names> </name><name name-style="western"><surname>Portinale</surname><given-names>L</given-names> </name></person-group><article-title>Case-based reasoning for breast cancer treatment decision helping</article-title><source>Advances in Case-Based Reasoning EWCBR 2000 Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)</source><year>2000</year><volume>1898</volume><publisher-name>Springer</publisher-name><fpage>173</fpage><lpage>185</lpage></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hawley</surname><given-names>ST</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>An</surname><given-names>LC</given-names> </name><etal/></person-group><article-title>Improving breast cancer surgical treatment decision making: the iCanDecide randomized clinical trial</article-title><source>J Clin Oncol</source><year>2018</year><month>03</month><day>1</day><volume>36</volume><issue>7</issue><fpage>659</fpage><lpage>666</lpage><pub-id pub-id-type="doi">10.1200/JCO.2017.74.8442</pub-id><pub-id pub-id-type="medline">29364772</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wyatt</surname><given-names>KD</given-names> </name><name name-style="western"><surname>Jenkins</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Plevak</surname><given-names>MF</given-names> </name><name name-style="western"><surname>Venegas Pont</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Pruthi</surname><given-names>S</given-names> </name></person-group><article-title>A personalized, web-based breast cancer decision making application: a pre-post survey</article-title><source>BMC Med Inform Decis Mak</source><year>2019</year><month>10</month><day>21</day><volume>19</volume><issue>1</issue><fpage>196</fpage><pub-id pub-id-type="doi">10.1186/s12911-019-0924-7</pub-id><pub-id pub-id-type="medline">31638964</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xu</surname><given-names>F</given-names> </name><name name-style="western"><surname>Sep&#x00FA;lveda</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Artificial intelligence treatment decision support for complex breast cancer among oncologists with varying expertise</article-title><source>JCO Clin Cancer Inform</source><year>2019</year><month>08</month><volume>3</volume><fpage>1</fpage><lpage>15</lpage><pub-id pub-id-type="doi">10.1200/CCI.18.00159</pub-id><pub-id pub-id-type="medline">31419181</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mazo</surname><given-names>C</given-names> </name><name name-style="western"><surname>Kearns</surname><given-names>C</given-names> </name><name name-style="western"><surname>Mooney</surname><given-names>C</given-names> </name><name name-style="western"><surname>Gallagher</surname><given-names>WM</given-names> </name></person-group><article-title>Clinical decision support systems in breast cancer: a systematic review</article-title><source>Cancers (Basel)</source><year>2020</year><month>02</month><day>6</day><volume>12</volume><issue>2</issue><fpage>369</fpage><pub-id pub-id-type="doi">10.3390/cancers12020369</pub-id><pub-id pub-id-type="medline">32041094</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wishart</surname><given-names>GC</given-names> </name><name name-style="western"><surname>Azzato</surname><given-names>EM</given-names> </name><name name-style="western"><surname>Greenberg</surname><given-names>DC</given-names> </name><etal/></person-group><article-title>PREDICT: a new UK prognostic model that predicts survival following surgery for invasive breast cancer</article-title><source>Breast Cancer Res</source><year>2010</year><volume>12</volume><issue>1</issue><fpage>R1</fpage><pub-id pub-id-type="doi">10.1186/bcr2464</pub-id><pub-id pub-id-type="medline">20053270</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jiang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wells</surname><given-names>A</given-names> </name><name name-style="western"><surname>Brufsky</surname><given-names>A</given-names> </name><name name-style="western"><surname>Neapolitan</surname><given-names>R</given-names> </name></person-group><article-title>A clinical decision support system learned from data to personalize treatment recommendations towards preventing breast cancer metastasis</article-title><source>PLoS ONE</source><year>2019</year><volume>14</volume><issue>3</issue><fpage>e0213292</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0213292</pub-id><pub-id pub-id-type="medline">30849111</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hughes</surname><given-names>KS</given-names> </name><name name-style="western"><surname>Schnaper</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Berry</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Lumpectomy plus tamoxifen with or without irradiation in women 70 years of age or older with early breast cancer</article-title><source>N Engl J Med</source><year>2004</year><month>09</month><day>2</day><volume>351</volume><issue>10</issue><fpage>971</fpage><lpage>977</lpage><pub-id pub-id-type="doi">10.1056/NEJMoa040587</pub-id><pub-id pub-id-type="medline">15342805</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Clough-Gorr</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Thwin</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Stuck</surname><given-names>AE</given-names> </name><name name-style="western"><surname>Silliman</surname><given-names>RA</given-names> </name></person-group><article-title>Examining five- and ten-year survival in older women with breast cancer using cancer-specific geriatric assessment</article-title><source>Eur J Cancer</source><year>2012</year><month>04</month><volume>48</volume><issue>6</issue><fpage>805</fpage><lpage>812</lpage><pub-id pub-id-type="doi">10.1016/j.ejca.2011.06.016</pub-id><pub-id pub-id-type="medline">21741826</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stotter</surname><given-names>A</given-names> </name><name name-style="western"><surname>Reed</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Gray</surname><given-names>LJ</given-names> </name><name name-style="western"><surname>Moore</surname><given-names>N</given-names> </name><name name-style="western"><surname>Robinson</surname><given-names>TG</given-names> </name></person-group><article-title>Comprehensive Geriatric Assessment and predicted 3-year survival in treatment planning for frail patients with early breast cancer</article-title><source>Br J Surg</source><year>2015</year><month>04</month><volume>102</volume><issue>5</issue><fpage>525</fpage><lpage>533</lpage><pub-id pub-id-type="doi">10.1002/bjs.9755</pub-id><pub-id pub-id-type="medline">25708660</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alaa</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Gurdasani</surname><given-names>D</given-names> </name><name name-style="western"><surname>Harris</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Rashbass</surname><given-names>J</given-names> </name><name name-style="western"><surname>van der Schaar</surname><given-names>M</given-names> </name></person-group><article-title>Machine learning to guide the use of adjuvant therapies for breast cancer</article-title><source>Nat Mach Intell</source><year>2012</year><volume>3</volume><issue>8</issue><fpage>716</fpage><lpage>726</lpage><pub-id pub-id-type="doi">10.1038/s42256-021-00353-8</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shachar</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Muss</surname><given-names>HB</given-names> </name></person-group><article-title>Internet tools to enhance breast cancer care</article-title><source>NPJ Breast Cancer</source><year>2016</year><volume>2</volume><fpage>16011</fpage><pub-id pub-id-type="doi">10.1038/npjbcancer.2016.11</pub-id><pub-id pub-id-type="medline">28721377</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Maaren</surname><given-names>MC</given-names> </name><name name-style="western"><surname>van Steenbeek</surname><given-names>CD</given-names> </name><name name-style="western"><surname>Pharoah</surname><given-names>PDP</given-names> </name><etal/></person-group><article-title>Validation of the online prediction tool PREDICT v. 2.0 in the Dutch breast cancer population</article-title><source>Eur J Cancer</source><year>2017</year><month>11</month><volume>86</volume><fpage>364</fpage><lpage>372</lpage><pub-id pub-id-type="doi">10.1016/j.ejca.2017.09.031</pub-id><pub-id pub-id-type="medline">29100191</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="web"><article-title>Van der Schaar Lab</article-title><source>Adjutorium: a machine learning framework for personalized survival prediction</source><access-date>2025-05-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://vanderschaar-lab.com/adjutorium/">https://vanderschaar-lab.com/adjutorium/</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ravdin</surname><given-names>PM</given-names> </name><name name-style="western"><surname>Siminoff</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Davis</surname><given-names>GJ</given-names> </name><etal/></person-group><article-title>Computer program to assist in making decisions about adjuvant therapy for women with early breast cancer</article-title><source>J Clin Oncol</source><year>2001</year><month>02</month><day>15</day><volume>19</volume><issue>4</issue><fpage>980</fpage><lpage>991</lpage><pub-id pub-id-type="doi">10.1200/JCO.2001.19.4.980</pub-id><pub-id pub-id-type="medline">11181660</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Glas</surname><given-names>NA</given-names> </name><name name-style="western"><surname>van de Water</surname><given-names>W</given-names> </name><name name-style="western"><surname>Engelhardt</surname><given-names>EG</given-names> </name><etal/></person-group><article-title>Validity of Adjuvant! Online program in older patients with breast cancer: a population-based study</article-title><source>Lancet Oncol</source><year>2014</year><month>06</month><volume>15</volume><issue>7</issue><fpage>722</fpage><lpage>729</lpage><pub-id pub-id-type="doi">10.1016/S1470-2045(14)70200-1</pub-id><pub-id pub-id-type="medline">24836274</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Engelhardt</surname><given-names>EG</given-names> </name><name name-style="western"><surname>Garvelink</surname><given-names>MM</given-names> </name><name name-style="western"><surname>de Haes</surname><given-names>JHCJM</given-names> </name><etal/></person-group><article-title>Predicting and communicating the risk of recurrence and death in women with early-stage breast cancer: a systematic review of risk prediction models</article-title><source>J Clin Oncol</source><year>2014</year><month>01</month><day>20</day><volume>32</volume><issue>3</issue><fpage>238</fpage><lpage>250</lpage><pub-id pub-id-type="doi">10.1200/JCO.2013.50.3417</pub-id><pub-id pub-id-type="medline">24344212</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van der Plas-Krijgsman</surname><given-names>WG</given-names> </name><name name-style="western"><surname>Giardiello</surname><given-names>D</given-names> </name><name name-style="western"><surname>Putter</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Development and validation of the PORTRET tool to predict recurrence, overall survival, and other-cause mortality in older patients with breast cancer in the Netherlands: a population-based study</article-title><source>Lancet Healthy Longev</source><year>2021</year><month>11</month><volume>2</volume><issue>11</issue><fpage>e704</fpage><lpage>e711</lpage><pub-id pub-id-type="doi">10.1016/S2666-7568(21)00229-4</pub-id><pub-id pub-id-type="medline">36098027</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heudel</surname><given-names>P</given-names> </name><name name-style="western"><surname>Livartowski</surname><given-names>A</given-names> </name><name name-style="western"><surname>Arveux</surname><given-names>P</given-names> </name><name name-style="western"><surname>Willm</surname><given-names>E</given-names> </name><name name-style="western"><surname>Jamain</surname><given-names>C</given-names> </name></person-group><article-title>The ConSoRe project supports the implementation of big data in oncology</article-title><source>Bull Cancer</source><year>2016</year><month>11</month><volume>103</volume><issue>11</issue><fpage>949</fpage><lpage>950</lpage><pub-id pub-id-type="doi">10.1016/j.bulcan.2016.10.001</pub-id><pub-id pub-id-type="medline">27816168</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Amin</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Greene</surname><given-names>FL</given-names> </name><name name-style="western"><surname>Edge</surname><given-names>SB</given-names> </name><etal/></person-group><article-title>The Eighth Edition AJCC Cancer Staging Manual: continuing to build a bridge from a population-based to a more &#x201C;personalized&#x201D; approach to cancer staging</article-title><source>CA Cancer J Clin</source><year>2017</year><month>03</month><volume>67</volume><issue>2</issue><fpage>93</fpage><lpage>99</lpage><pub-id pub-id-type="doi">10.3322/caac.21388</pub-id><pub-id pub-id-type="medline">28094848</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wolff</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Hammond</surname><given-names>MEH</given-names> </name><name name-style="western"><surname>Hicks</surname><given-names>DG</given-names> </name><etal/></person-group><article-title>Recommendations for human epidermal growth factor receptor 2 testing in breast cancer: American Society of Clinical Oncology/College of American Pathologists clinical practice guideline update</article-title><source>J Clin Oncol</source><year>2013</year><month>11</month><day>1</day><volume>31</volume><issue>31</issue><fpage>3997</fpage><lpage>4013</lpage><pub-id pub-id-type="doi">10.1200/JCO.2013.50.9984</pub-id><pub-id pub-id-type="medline">24101045</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name></person-group><article-title>GitHub repository</article-title><source>PaCMAP: Pairwise Controlled Manifold Approximation</source><access-date>2025-05-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/YingfanWang/PaCMAP">https://github.com/YingfanWang/PaCMAP</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><article-title>Scikit-learn developers</article-title><source>"sklearn.clusterMeanShift," scikit-learn documentation</source><access-date>2025-05-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://scikit-learn.org/dev/modules/generated/sklearn.cluster.MeanShift.html">https://scikit-learn.org/dev/modules/generated/sklearn.cluster.MeanShift.html</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Davidson-Pilon</surname><given-names>C</given-names> </name></person-group><source>Lifelines: survival analysis in Python," Read the Docs</source><access-date>2025-05-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://antiphishing.vadesecure.com/v4?f=WExObUdsNkxHUTVwMTdKdg9XvgXO8Cu4fwIkj7gAD8xPip7tYbm7MrM9TOzJlbvb&#x0026;i=dktEMmMyTnlid1lsUjVvYvDN_JOcSy-MnYrzg1_2L5o&#x0026;k=dPpv&#x0026;r=bGMwQ1dycHZ0bUpyOWJIRhJnIftWtEMd5uOOEiOFp19wG5_6SGkO_QGtssB6mbs-Rakx_9mw4s4NHld5ghzXZQ&#x0026;s=ea1df091c2a129d37187a974facc80fa62a90968c96c3a752e4cdbb3d1ee5183&#x0026;u=https%3A%2F%2Flifelines.readthedocs.io%2Fen%2Flatest%2F">https://antiphishing.vadesecure.com/v4?f=WExObUdsNkxHUTVwMTdKdg9XvgXO8Cu4fwIkj7gAD8xPip7tYbm7MrM9TOzJlbvb&#x0026;i=dktEMmMyTnlid1lsUjVvYvDN_JOcSy-MnYrzg1_2L5o&#x0026;k=dPpv&#x0026;r=bGMwQ1dycHZ0bUpyOWJIRhJnIftWtEMd5uOOEiOFp19wG5_6SGkO_QGtssB6mbs-Rakx_9mw4s4NHld5ghzXZQ&#x0026;s=ea1df091c2a129d37187a974facc80fa62a90968c96c3a752e4cdbb3d1ee5183&#x0026;u=https%3A%2F%2Flifelines.readthedocs.io%2Fen%2Flatest%2F</ext-link></comment></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bhavnani</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>W</given-names> </name><name name-style="western"><surname>Visweswaran</surname><given-names>S</given-names> </name><name name-style="western"><surname>Raji</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kuo</surname><given-names>YF</given-names> </name></person-group><article-title>A framework for modeling and interpreting patient subgroups applied to hospital readmission: visual analytical approach</article-title><source>JMIR Med Inform</source><year>2022</year><month>12</month><day>7</day><volume>10</volume><issue>12</issue><fpage>e37239</fpage><pub-id pub-id-type="doi">10.2196/37239</pub-id><pub-id pub-id-type="medline">35537203</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kourou</surname><given-names>K</given-names> </name><name name-style="western"><surname>Exarchos</surname><given-names>TP</given-names> </name><name name-style="western"><surname>Exarchos</surname><given-names>KP</given-names> </name><name name-style="western"><surname>Karamouzis</surname><given-names>MV</given-names> </name><name name-style="western"><surname>Fotiadis</surname><given-names>DI</given-names> </name></person-group><article-title>Machine learning applications in cancer prognosis and prediction</article-title><source>Comput Struct Biotechnol J</source><year>2015</year><volume>13</volume><issue>13</issue><fpage>8</fpage><lpage>17</lpage><pub-id pub-id-type="doi">10.1016/j.csbj.2014.11.005</pub-id><pub-id pub-id-type="medline">25750696</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ozer</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Sarica</surname><given-names>PO</given-names> </name><name name-style="western"><surname>Arga</surname><given-names>KY</given-names> </name></person-group><article-title>New machine learning applications to accelerate personalized medicine in breast cancer: rise of the support vector machines</article-title><source>OMICS</source><year>2020</year><month>05</month><volume>24</volume><issue>5</issue><fpage>241</fpage><lpage>246</lpage><pub-id pub-id-type="doi">10.1089/omi.2020.0001</pub-id><pub-id pub-id-type="medline">32228365</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shafiq</surname><given-names>A</given-names> </name><name name-style="western"><surname>&#x00C7;olak</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Sindhu</surname><given-names>TN</given-names> </name><name name-style="western"><surname>Lone</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Abushal</surname><given-names>TA</given-names> </name></person-group><article-title>Modeling and survival exploration of breast carcinoma: A statistical, maximum likelihood estimation, and artificial neural network perspective</article-title><source>Artificial Intelligence in the Life Sciences</source><year>2023</year><month>12</month><volume>4</volume><fpage>100082</fpage><pub-id pub-id-type="doi">10.1016/j.ailsci.2023.100082</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>R</given-names> </name><name name-style="western"><surname>Luo</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wan</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Evaluation of machine learning algorithms for the prognosis of breast cancer from the Surveillance, Epidemiology, and End Results database</article-title><source>PLoS ONE</source><year>2023</year><volume>18</volume><issue>1</issue><fpage>e0280340</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0280340</pub-id><pub-id pub-id-type="medline">36701415</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Borkin</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Vo</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Bylinskii</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>What makes a visualization memorable?</article-title><source>IEEE Trans Vis Comput Graph</source><year>2013</year><month>12</month><volume>19</volume><issue>12</issue><fpage>2306</fpage><lpage>2315</lpage><pub-id pub-id-type="doi">10.1109/TVCG.2013.234</pub-id><pub-id pub-id-type="medline">24051797</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Palmas</surname><given-names>G</given-names> </name><name name-style="western"><surname>Bachynskyi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Oulasvirta</surname><given-names>A</given-names> </name><name name-style="western"><surname>Seidel</surname><given-names>HP</given-names> </name><name name-style="western"><surname>Weinkauf</surname><given-names>T</given-names> </name></person-group><article-title>MovExp: a versatile visualization tool for human-computer interaction studies with 3D performance and biomechanical data</article-title><source>IEEE Trans Vis Comput Graph</source><year>2014</year><month>12</month><volume>20</volume><issue>12</issue><fpage>2359</fpage><lpage>2368</lpage><pub-id pub-id-type="doi">10.1109/TVCG.2014.2346311</pub-id><pub-id pub-id-type="medline">26356950</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Park</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bekemeier</surname><given-names>B</given-names> </name><name name-style="western"><surname>Flaxman</surname><given-names>A</given-names> </name><name name-style="western"><surname>Schultz</surname><given-names>M</given-names> </name></person-group><article-title>Impact of data visualization on decision-making and its implications for public health practice: a systematic literature review</article-title><source>Inform Health Soc Care</source><year>2022</year><month>04</month><day>3</day><volume>47</volume><issue>2</issue><fpage>175</fpage><lpage>193</lpage><pub-id pub-id-type="doi">10.1080/17538157.2021.1982949</pub-id><pub-id pub-id-type="medline">34582297</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Soubeyran</surname><given-names>P</given-names> </name><name name-style="western"><surname>Bellera</surname><given-names>C</given-names> </name><name name-style="western"><surname>Goyard</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Screening for vulnerability in older cancer patients: the ONCODAGE Prospective Multicenter Cohort Study</article-title><source>PLoS One</source><year>2014</year><volume>9</volume><issue>12</issue><fpage>e115060</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0115060</pub-id><pub-id pub-id-type="medline">25503576</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Danishuddin</surname><given-names>A</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>JJ</given-names> </name></person-group><article-title>From cancer big data to treatment: artificial intelligence in cancer research</article-title><source>J Gene Med</source><year>2024</year><month>01</month><volume>26</volume><issue>1</issue><fpage>e3629</fpage><pub-id pub-id-type="doi">10.1002/jgm.3629</pub-id><pub-id pub-id-type="medline">37940369</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Topol</surname><given-names>EJ</given-names> </name></person-group><article-title>High-performance medicine: the convergence of human and artificial intelligence</article-title><source>Nat Med</source><year>2019</year><month>01</month><volume>25</volume><issue>1</issue><fpage>44</fpage><lpage>56</lpage><pub-id pub-id-type="doi">10.1038/s41591-018-0300-7</pub-id><pub-id pub-id-type="medline">30617339</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Char</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>NH</given-names> </name><name name-style="western"><surname>Magnus</surname><given-names>D</given-names> </name></person-group><article-title>Implementing machine learning in health care&#x2014;addressing ethical challenges</article-title><source>N Engl J Med</source><year>2018</year><month>03</month><day>15</day><volume>378</volume><issue>11</issue><fpage>981</fpage><lpage>983</lpage><pub-id pub-id-type="doi">10.1056/NEJMp1714229</pub-id><pub-id pub-id-type="medline">29539284</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Stability analysis of manifold learning applied to clustered data. The original cohort data were divided into 2 groups; 70% of every cluster was pooled into the model group, and the remaining 30% was pooled into the test group. A fresh manifold learning process was applied to the model group, and the test group was then projected onto the newly generated manifold. Patients in the test and model groups from the same cluster of origin were compared to evaluate whether they would exhibit similar distributions (appear in proximity to each other) in the new manifold space. (A) Examples of permutation analysis of clusters 0 and 1. The permutation test determined whether the observed KL (red line) divergence was significantly different from what can be expected from random shuffling of the 2 groups (blue histograms). (B) Table summarizing the median <italic>P</italic> values of the stability tests across 10 different manifold initializations. All <italic>P</italic> values above .05 indicated a lack of significant variation between groups.</p><media xlink:href="cancer_v11i1e64000_app1.png" xlink:title="PNG File, 214 KB"/></supplementary-material></app-group></back></article>