<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Cancer</journal-id><journal-id journal-id-type="publisher-id">cancer</journal-id><journal-id journal-id-type="index">21</journal-id><journal-title>JMIR Cancer</journal-title><abbrev-journal-title>JMIR Cancer</abbrev-journal-title><issn pub-type="epub">2369-1999</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v12i1e84234</article-id><article-id pub-id-type="doi">10.2196/84234</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Quality, Empathy, and Readability of AI Chatbot Responses to the Survivorship Needs of Adolescents and Young Adults With Melanoma: Evaluation Study</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Jafarnia</surname><given-names>Jordan Lily</given-names></name><degrees>BA</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Haff</surname><given-names>Priscilla Lynne</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Moore</surname><given-names>Reece Philip</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Osheim</surname><given-names>Alyssa Leigh</given-names></name><degrees>BBA</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Riley</surname><given-names>Katherine McKenna</given-names></name><degrees>BA</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zheng</surname><given-names>Sabrina</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Roth</surname><given-names>Michael</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Salge</surname><given-names>Madeleine Hines</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Nelson</surname><given-names>Kelly Carter</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib></contrib-group><aff id="aff1"><institution>UTHealth Houston, McGovern Medical School</institution><addr-line>6431 Fannin St</addr-line><addr-line>Houston</addr-line><addr-line>TX</addr-line><country>United States</country></aff><aff id="aff2"><institution>Paul L. Foster School of Medicine at Texas Tech University Health Sciences Center El Paso</institution><addr-line>El Paso</addr-line><addr-line>TX</addr-line><country>United States</country></aff><aff id="aff3"><institution>Division of Pediatrics, University of Texas MD Anderson Cancer Center</institution><addr-line>Houston</addr-line><addr-line>TX</addr-line><country>United States</country></aff><aff id="aff4"><institution>Department of Dermatology, Division of Internal Medicine, The University of Texas MD Anderson Cancer Center</institution><addr-line>1515 Holcombe Blvd</addr-line><addr-line>Houston</addr-line><addr-line>TX</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Balcarras</surname><given-names>Matthew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Chrimes</surname><given-names>Dillon</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Chatzimina</surname><given-names>Maria</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Fabrizio</surname><given-names>Tommaso</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Kelly Carter Nelson, MD, Department of Dermatology, Division of Internal Medicine, The University of Texas MD Anderson Cancer Center, 1515 Holcombe Blvd, Houston, TX, 77030, United States, 1 713-745-1113; <email>kcnelson1@mdanderson.org</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>26</day><month>3</month><year>2026</year></pub-date><volume>12</volume><elocation-id>e84234</elocation-id><history><date date-type="received"><day>21</day><month>09</month><year>2025</year></date><date date-type="rev-recd"><day>13</day><month>02</month><year>2026</year></date><date date-type="accepted"><day>25</day><month>02</month><year>2026</year></date></history><copyright-statement>&#x00A9; Jordan Lily Jafarnia, Priscilla Lynne Haff, Reece Philip Moore, Alyssa Leigh Osheim, Katherine McKenna Riley, Sabrina Zheng, Michael Roth, Madeleine Hines Salge, Kelly Carter Nelson. Originally published in JMIR Cancer (<ext-link ext-link-type="uri" xlink:href="https://cancer.jmir.org">https://cancer.jmir.org</ext-link>), 26.3.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Cancer, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://cancer.jmir.org/">https://cancer.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://cancer.jmir.org/2026/1/e84234"/><abstract><sec><title>Background</title><p>Melanoma, a highly aggressive form of skin cancer, is the second most common type of cancer for adolescent and young adult (AYA, ages 15-39 years) patients. AYA patients with melanoma may turn to internet sources, especially artificial intelligence (AI) chatbots, to manage uncertainty about prognosis and treatment.</p></sec><sec><title>Objective</title><p>This study aims to evaluate the quality, empathy, and readability of responses generated by leading AI chatbots when addressing the top unmet needs of AYA patients with melanoma receiving treatment.</p></sec><sec sec-type="methods"><title>Methods</title><p>Our research team recently surveyed 152 AYA patients with melanoma using the Needs Assessment Service Bridge, a validated instrument that assesses psychosocial needs for AYA patients with cancer. The survey identified the top 5 needs for advanced AYA patients with melanoma receiving treatment. Each need was reframed into a question and brief clinical history, then entered into each chatbot by 5 individuals who cleared their prequestion and postquestion history. Chatbot responses were evaluated to assess information quality (Global Quality Score [GQS] and DISCERN), accessibility and readability (GQS, Flesch Kincaid Grade Level, Flesch Reading Ease), and perceived empathy (Perceived Empathy of Technology Scale [PETS], including domains of Emotional Responsiveness [PETS-ER], Understanding and Trust [PETS-UT]).</p></sec><sec sec-type="results"><title>Results</title><p>Across 75 chatbot responses, ChatGPT achieved the highest average quality (mean GQS 4.42, SD 0.32; mean DISCERN 3.24, SD 0.31) and empathy (mean PETS-ER 5.35, SD 1.85; mean PETS-UT 6.36, SD 1.83), though with greater variability. Copilot produced the lowest quality and empathy scores, while Gemini responses were consistently midrange. PETS-UT exceeded PETS-ER across all models, suggesting stronger cognitive empathy than emotional responsiveness. Readability analysis showed outputs exceeded the average US reading level (mean Flesch Kincaid Grade Level 11.82, SD 1.44; mean FRE 38.60, SD 9.00), limiting accessibility. The most readable responses were found in question 2, which also scored higher in quality and empathy, whereas questions 4 and 5 produced the most complex, difficult-to-read responses corresponding with lower quality and empathy ratings.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>AI chatbots can provide moderately accurate and supportive responses to needs of AYA patients with melanoma, but outputs are inconsistent, written above the recommended reading level for health information, and limited in empathy. Question framing strongly influenced chatbot performance, with more emotional prompts drawing greater empathy, and readability aligning with both quality and empathy. Chatbot use in this population should remain adjunctive, with further research needed to standardize quality, improve readability, and enhance empathetic communication.</p></sec></abstract><kwd-group><kwd>melanoma</kwd><kwd>young adult</kwd><kwd>artificial intelligence</kwd><kwd>natural language processing</kwd><kwd>empathy</kwd><kwd>readability</kwd><kwd>adolescent</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Melanoma, a malignant tumor of melanocytes, is an aggressive form of skin cancer due to its high metastatic potential and associated mortality. Despite being most frequently diagnosed in older adults, cutaneous melanoma accounts for approximately 7% of newly diagnosed cancers among adolescents and young adults (AYAs, age 15&#x2010;39 years) [<xref ref-type="bibr" rid="ref1">1</xref>]. A melanoma diagnosis and subsequent treatment can be highly disruptive for AYA patients, with implications for physical health, emotional well-being, and social function. The severity of the disease, coupled with the uncertainty of treatment outcomes, often introduces feelings of fear and isolation [<xref ref-type="bibr" rid="ref2">2</xref>]. In seeking clarity in this uncertainty, patients may turn to the internet and social media for answers, education, and support. When seeking answers through internet interfaces, patients are likely to leverage artificial intelligence (AI)&#x2013;driven chatbots, such as Chat GPT, Microsoft Copilot, and Google Gemini [<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>AI chatbots use natural language processing to generate conversational responses to user queries and have been shown to provide health information in an accessible and interactive manner [<xref ref-type="bibr" rid="ref4">4</xref>]. Their availability, objectivity, and capacity to adapt personalized responses to diverse communication styles make them appealing to patients navigating difficult diagnoses [<xref ref-type="bibr" rid="ref5">5</xref>]. While some studies have evaluated the most frequently asked chatbot questions regarding specific forms of cancer (ie, breast, leukemia, prostate), no published research to our knowledge has systematically examined their ability to address AYAs&#x2019; specific concerns regarding melanoma treatment and survivorship [<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>A recent survey study of AYAs with melanoma identified several unmet needs for patients in different phases of cancer care using the validated Needs Assessment Service Bridge (NA-SB) questionnaire [<xref ref-type="bibr" rid="ref7">7</xref>]. This survey represents a separate study and included 152 AYA respondents, 20 of whom were actively receiving systemic treatment at the time of participation; detailed methods and results are reported elsewhere [<xref ref-type="bibr" rid="ref7">7</xref>]. For AYA patients with melanoma currently undergoing systemic treatment, the top 5 unmet needs included the following: (1) what happens after treatment, (2) fear of cancer recurrence, (3) long-term hormonal side effects, (4) long-term systemic side effects, and (5) genetic implications on diagnosis and treatment [<xref ref-type="bibr" rid="ref7">7</xref>]. In this study, survey findings were used solely to identify patient-prioritized unmet needs for chatbot evaluation.</p><p>Building on these empirically derived patient priorities, this study anchors chatbot prompts in validated unmet needs rather than search-derived queries; evaluates perceived empathy using the Perceived Empathy of Technology Scale (PETS), a recently validated instrument not previously applied in oncology chatbot research; and examines how readability, empathy, and information quality interact. Together, this novel approach enables a patient-centered and multidimensional evaluation of chatbot suitability for AYA melanoma care, further building on previous chatbot-oncology research. With these established unmet needs and the increasing use of AI chatbots for AYA patient support, we evaluated the unique response of 3 chatbots to patient-centered questions to determine if these resources are safe and valid for use by AYA patients with melanoma.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Need Selection and Question Creation</title><p>The top 5 most frequently reported needs for AYA patients with melanoma receiving systemic treatment were identified through the NA-SB survey. Once identified, each NA-SB prompt was slightly modified into a question format with a standardized and appropriate brief clinical history (<xref ref-type="other" rid="box1">Textbox 1</xref>). All questions shared a common stem that identified the patient as an AYA with metastatic melanoma, with tailored follow-up questions added to reflect the specific need category.</p><boxed-text id="box1"><title> Questions input into each chatbot, based on the 5 most frequently reported needs.</title><p>Questions from the perspective of patients currently on treatment:</p><list list-type="order"><list-item><p>I am an adolescent/young adult patient with metastatic melanoma and am currently receiving treatment. I want more information about what will happen when treatment finishes.</p></list-item><list-item><p>I am an adolescent/young adult patient with metastatic melanoma and am currently receiving treatment. In the last month, I have become more worried about my cancer spreading. What do I need to know and what steps can I take to cope with this fear?</p></list-item><list-item><p>I am an adolescent/young adult patient with metastatic melanoma and am currently receiving treatment. How will my treatment affect long-term hormone changes?</p></list-item><list-item><p>I am an adolescent/young adult patient with metastatic melanoma and am currently receiving treatment. I want more information about how my genetics may or may not have impacted my diagnosis and treatment.</p></list-item><list-item><p>I am an adolescent/young adult patient with metastatic melanoma and am currently receiving treatment. I want more information about the long-term side effects of treatment.</p></list-item></list></boxed-text></sec><sec id="s2-2"><title>Study Design</title><p>This study evaluated the quality of responses generated by AI chatbots to needs frequently reported by AYA patients with melanoma receiving treatment. Our methodologic approach included six components: (1) need selection, (2) question creation, (3) chatbot input, (4) data extraction, (5) data coding, and (6) evaluation (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Study design workflow. AI: artificial intelligence; AYA: adolescent and young adult.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v12i1e84234_fig01.png"/></fig></sec><sec id="s2-3"><title>Ethical Considerations</title><p>This study relied exclusively on publicly available chatbot outputs and did not involve interaction with human participants, collection of identifiable private information, or intervention. As such, it did not meet the definition of human participant research and did not require institutional research board approval.</p></sec><sec id="s2-4"><title>Chatbot Input and Data Extraction</title><p>Chat GPT 4.0 (Open AI), Copilot (Microsoft), and Gemini (Google) were selected for evaluation based on their increasing usage among AYAs and advancements in their technology. The free, publicly accessible versions of all chatbots were used to reflect the most accessible chatbot platforms available to patients. The resulting need&#x2013;related questions, outlined in <xref ref-type="other" rid="box1">Textbox 1</xref>, were entered into each chatbot 5 times by 4 individuals (RPM, ALO, KMR, and SZ) during a single study period (April 8, 2025, to August 8, 2025). To minimize bias, these team members did not participate in question selection, and AI personalization was avoided through memory clearing between each input, ensuring that responses did not draw on prior prompts or stored contextual data, thus replicating a true AYA patient experience. Responses were copied and exported into an Excel spreadsheet where a separate team completed data coding and evaluation.</p></sec><sec id="s2-5"><title>Data Coding and Evaluation</title><p>Responses were evaluated and coded to assess information quality (Global Quality Score [GQS] and DISCERN), perceived empathy (Perceived Empathy of Technology), and readability (Flesch Kincaid Grade Level [FKGL], Flesch Reading Ease [FRE]; <xref ref-type="table" rid="table1">Table 1</xref>). Four team members, 2 medical students (JLJ and PLH) and 2 physicians (KCN and MHS), completed evaluations independently, with results hidden until completion to minimize bias. Quality metrics (GQS and DISCERN) were evaluated by physician reviewers due to their clinical expertise, while empathy (PETS) was evaluated by medical student reviewers to approximate a patient-facing perspective. Score interpretation for all tools is provided in <xref ref-type="table" rid="table1">Table 1</xref>. Reviewers met prior to scoring to align on scale interpretation. Any discrepancy in scores between the raters is a product of the qualitative nature of the rating and is representative of different physicians&#x2019; interpretation. All analyses are exploratory and descriptive. Duplicate independent scoring with averaging was used to mitigate individual subjectivity, which was considered sufficient for this evaluation using previously validated instruments. Formal interrater reliability statistics (eg, intraclass correlation coefficient or Cohen &#x03BA;) were not calculated, as estimates based on only 2 raters per domain may be unstable and potentially misleading, particularly for perceptual constructs such as empathy. The results are presented as means, standard deviations (SD), and visual comparisons. No inferential statistical testing was performed, and no claims of statistical significance or superiority were made.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Evaluation metrics for chatbot responses, detailing assessed domains and score interpretation.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Assessment tool</td><td align="left" valign="bottom">Evaluated domains</td><td align="left" valign="bottom">Scale</td><td align="left" valign="bottom">Score interpretation</td></tr></thead><tbody><tr><td align="left" valign="top">Global Quality Score [<xref ref-type="bibr" rid="ref8">8</xref>]</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accessibility</p></list-item><list-item><p>Quality</p></list-item><list-item><p>Flow</p></list-item><list-item><p>Usefulness</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>5-point Likert scale</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>1=Poor quality, flow, and missing information</p></list-item><list-item><p>2=Fair</p></list-item><list-item><p>3=Moderate</p></list-item><list-item><p>4=Good</p></list-item><list-item><p>5=Excellent quality, flow, and information</p></list-item></list></td></tr><tr><td align="left" valign="top">DISCERN [<xref ref-type="bibr" rid="ref9">9</xref>]</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item><list-item><p>Quality</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Items 1&#x2010;8 (5-point Likert scale)</p></list-item><list-item><p>Item 16 (summary 5-point Likert scale)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Low quality information &#x2264;2</p></list-item><list-item><p>Medium-quality information=2&#x2010;4</p></list-item><list-item><p>High-quality information &#x2265;4</p></list-item></list></td></tr><tr><td align="left" valign="top">Perceived Empathy of Technology Scale<break/>[<xref ref-type="bibr" rid="ref10">10</xref>]</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ER<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, UT<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>10 items, 2 domains. Each item is assessed with a 10-point scale and then averaged across 5 items for each domain score (ER and UT)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>1&#x2010;3=Low empathy</p></list-item><list-item><p>4&#x2010;6=Moderate empathy</p></list-item><list-item><p>7&#x2010;10=High empathy</p></list-item></list></td></tr><tr><td align="left" valign="top">Flesch Kincaid Grade Level [<xref ref-type="bibr" rid="ref11">11</xref>] (FKGL)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Comprehension</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Automated formula estimating US school grade level required to understand text</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Numeric grade level (eg, 8=8th-grade reading level)</p></list-item></list></td></tr><tr><td align="left" valign="top">Flesch Reading Ease [<xref ref-type="bibr" rid="ref11">11</xref>] (FRE)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Readability</p></list-item><list-item><p>Flow</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Automated formula producing a score from 0 to 100, with higher value indicating easier reading</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>0&#x2010;30=Very difficult to read</p></list-item><list-item><p>31&#x2010;50=Difficult to read</p></list-item><list-item><p>51&#x2010;60=Fairly difficult to read</p></list-item><list-item><p>61&#x2010;70=Plain English</p></list-item><list-item><p>70&#x2010;100=Easy to read</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>ER: Emotional Responsiveness.</p></fn><fn id="table1fn2"><p><sup>b</sup>UT: Understanding and Trust.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-6"><title>Quality (GQS and DISCERN)</title><p>Chatbot answer quality was evaluated using the GQS and DISCERN scales. The GQS scale evaluates quality, accessibility, flow, and perceived usefulness of information for patients, as judged by a physician (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The DISCERN scale assesses the quality of written health information, focusing on reliability, clarity, and explanations of risks and benefits (Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Two physicians (KCN and MHS) evaluated each response with the QGS and DISCERN scales. The results were then averaged into a final score.</p></sec><sec id="s2-7"><title>Empathy (PETS)</title><p>The PETS, a 10-item instrument designed to assess emotional responsiveness and understanding in human-technology interactions, was used to evaluate perceived empathy in chatbot responses. The PETS was applied without modification. The PETS tool is divided into 2 sections: PETS-ER (6 items) and PETS-UT (4 items; Table S3, <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The PETS-ER (Emotional Responsiveness) focuses on emotional engagement and support. PETS-UT (Understanding and Trust) measures &#x201C;cognitive empathy&#x201D; or the ability of the Chatbot to understand the user&#x2019;s perspective. Each item was individually scored, then averaged to give domain-specific scores for each input. Two medical student reviewers (JLJ and PLH) performed scoring and highlighted key examples for qualitative evaluation.</p></sec><sec id="s2-8"><title>Readability (FKGL, FRE, and Word Count)</title><p>To evaluate each output&#x2019;s readability, the FKGL and FRE were utilized. FKGL estimates the US grade level required to comprehend a response, while FRE scores readability from 0 to 100 (higher=easier to read). Scores for both instruments were calculated in Microsoft Word&#x2019;s spelling and grammar tool.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>Descriptive metrics of chatbot performance are reported in <xref ref-type="table" rid="table2">Table 2</xref>, stratified by question with overall summary statistics for each chatbot. Individual input scoring is available in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Descriptive quality, empathy, and readability metrics for chatbot responses by question and overall.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Question and chatbot</td><td align="left" valign="bottom">GQS<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>, mean (SD)</td><td align="left" valign="bottom">DISCERN, mean (SD)</td><td align="left" valign="bottom">PETS-ER<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup>, mean (SD)</td><td align="left" valign="bottom">PETS-UT<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup>, mean (SD)</td><td align="left" valign="bottom">FRE<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup>, mean (SD)</td><td align="left" valign="bottom">FKGL<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup>, mean (SD)</td><td align="left" valign="bottom">Word count, mean (SD)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="8">Question and chatbot</td></tr><tr><td align="char" char="." valign="top" colspan="8"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ChatGPT</td><td align="left" valign="top">4.50 (0.35)</td><td align="left" valign="top">3.34 (0.30)</td><td align="left" valign="top">6.18 (1.02)</td><td align="left" valign="top">7.80 (0.73)</td><td align="left" valign="top">39.20 (4.77)</td><td align="left" valign="top">12.62 (1.71)</td><td align="left" valign="top">538.80 (66.43)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Copilot</td><td align="left" valign="top">4.00 (0.00)</td><td align="left" valign="top">2.75 (0.09)</td><td align="left" valign="top">5.70 (0.79)</td><td align="left" valign="top">6.45 (0.47)</td><td align="left" valign="top">37.32 (5.68)</td><td align="left" valign="top">11.82 (0.82)</td><td align="left" valign="top">344.80 (54.70)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gemini</td><td align="left" valign="top">4.00 (0.00)</td><td align="left" valign="top">3.09 (0.14)</td><td align="left" valign="top">5.87 (0.68)</td><td align="left" valign="top">6.50 (1.01)</td><td align="left" valign="top">46.42 (1.20)</td><td align="left" valign="top">11.10 (0.22)</td><td align="left" valign="top">779.80 (69.88)</td></tr><tr><td align="char" char="." valign="top" colspan="8"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ChatGPT</td><td align="left" valign="top">4.60 (0.22</td><td align="left" valign="top">3.35 (0.12)</td><td align="left" valign="top">7.53 (0.53)</td><td align="left" valign="top">7.60 (0.80)</td><td align="left" valign="top">54.84 (3.43)</td><td align="left" valign="top">9.10 (0.64)</td><td align="left" valign="top">522.40 (78.60)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Copilot</td><td align="left" valign="top">4.50 (0.35)</td><td align="left" valign="top">3.15 (0.15)</td><td align="left" valign="top">6.67 (0.57)</td><td align="left" valign="top">6.65 (0.38)</td><td align="left" valign="top">46.42 (4.37)</td><td align="left" valign="top">10.72 (0.72)</td><td align="left" valign="top">431.80 (30.91)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gemini</td><td align="left" valign="top">4.20 (0.27)</td><td align="left" valign="top">3.09 (0.21)</td><td align="left" valign="top">6.67 (0.57)</td><td align="left" valign="top">6.65 (0.38)</td><td align="left" valign="top">52.68 (4.91)</td><td align="left" valign="top">9.80 (0.99)</td><td align="left" valign="top">728.00 (58.94)</td></tr><tr><td align="char" char="." valign="top" colspan="8"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ChatGPT</td><td align="left" valign="top">4.00 (0.00)</td><td align="left" valign="top">3.00 (0.00)</td><td align="left" valign="top">3.70 (1.31)</td><td align="left" valign="top">4.18 (0.85)</td><td align="left" valign="top">28.58 (4.78)</td><td align="left" valign="top">11.98 (0.47)</td><td align="left" valign="top">389.20 (48.84)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Copilot</td><td align="left" valign="top">4.10 (0.42)</td><td align="left" valign="top">2.75 (0.10)</td><td align="left" valign="top">4.13 (0.40)</td><td align="left" valign="top">4.10 (0.42)</td><td align="left" valign="top">31.82 (7.22)</td><td align="left" valign="top">12.38 (1.35)</td><td align="left" valign="top">315.20 (47.10)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gemini</td><td align="left" valign="top">4.50 (0.00)</td><td align="left" valign="top">3.05 (0.11)</td><td align="left" valign="top">4.13 (0.40)</td><td align="left" valign="top">4.10 (0.42)</td><td align="left" valign="top">34.38 (2.51)</td><td align="left" valign="top">13.12 (0.62)</td><td align="left" valign="top">717.80 (71.51)</td></tr><tr><td align="char" char="." valign="top" colspan="8"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>4</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ChatGPT</td><td align="left" valign="top">4.50 (0.00)</td><td align="left" valign="top">2.94 (0.19)</td><td align="left" valign="top">4.33 (0.41)</td><td align="left" valign="top">6.97 (0.55)</td><td align="left" valign="top">37.62 (1.24)</td><td align="left" valign="top">10.84 (0.30)</td><td align="left" valign="top">545.20 (40.76)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Copilot</td><td align="left" valign="top">3.40 (0.65)</td><td align="left" valign="top">2.55 (0.14)</td><td align="left" valign="top">3.10 (0.37)</td><td align="left" valign="top">3.15 (0.34)</td><td align="left" valign="top">31.72 (4.47)</td><td align="left" valign="top">13.40 (0.65)</td><td align="left" valign="top">360.20 (22.22)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gemini</td><td align="left" valign="top">4.00 (0.00)</td><td align="left" valign="top">2.81 (0.14)</td><td align="left" valign="top">3.10 (0.37)</td><td align="left" valign="top">3.15 (0.34)</td><td align="left" valign="top">40.60 (4.59</td><td align="left" valign="top">12.16 (0.70)</td><td align="left" valign="top">703.00 (45.08)</td></tr><tr><td align="char" char="." valign="top" colspan="8"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ChatGPT</td><td align="left" valign="top">4.40 (0.42)</td><td align="left" valign="top">3.26 (0.50)</td><td align="left" valign="top">5.03 (2.38)</td><td align="left" valign="top">5.30 (2.35)</td><td align="left" valign="top">29.36 (6.90)</td><td align="left" valign="top">12.78 (0.97)</td><td align="left" valign="top">415.60 (178.75)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Copilot</td><td align="left" valign="top">3.70 (0.27)</td><td align="left" valign="top">2.74 (0.19)</td><td align="left" valign="top">2.20 (0.45)</td><td align="left" valign="top">2.70 (0.11)</td><td align="left" valign="top">33.38 (6.52)</td><td align="left" valign="top">12.74 (0.72)</td><td align="left" valign="top">330.80 (34.47)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gemini</td><td align="left" valign="top">4.20 (0.27)</td><td align="left" valign="top">2.76 (0.03)</td><td align="left" valign="top">2.20 (0.45)</td><td align="left" valign="top">2.70 (0.11)</td><td align="left" valign="top">34.68 (3.20)</td><td align="left" valign="top">12.74 (0.57)</td><td align="left" valign="top">713.60 (50.51)</td></tr><tr><td align="left" valign="top" colspan="8">Overall ratings across all questions</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ChatGPT</td><td align="left" valign="top">4.42 (0.32)</td><td align="left" valign="top">3.24 (0.31)</td><td align="left" valign="top">5.35 (1.85)</td><td align="left" valign="top">6.36 (1.83)</td><td align="left" valign="top">38.9 (10.5)</td><td align="left" valign="top">11.9 (1.64)</td><td align="left" valign="top">482 (111)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Copilot</td><td align="left" valign="top">3.94 (0.53)</td><td align="left" valign="top">2.79 (0.24)</td><td align="left" valign="top">4.36 (1.74)</td><td align="left" valign="top">4.61 (1.72)</td><td align="left" valign="top">36.1 (7.7)</td><td align="left" valign="top">12.3 (1.23)</td><td align="left" valign="top">356 (55)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gemini</td><td align="left" valign="top">4.18 (0.24)</td><td align="left" valign="top">2.96 (0.19)</td><td align="left" valign="top">4.40 (1.76)</td><td align="left" valign="top">4.62 (1.76)</td><td align="left" valign="top">41.8 (7.9)</td><td align="left" valign="top">11.8 (1.37)</td><td align="left" valign="top">728 (61)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>GQS: Global Quality Score.</p></fn><fn id="table2fn2"><p><sup>b</sup>PETS-ER: Perceived Empathy of Technology Scale-Emotional Responsiveness.</p></fn><fn id="table2fn3"><p><sup>c</sup>PETS-UT: Perceived Empathy of Technology Scale-Understanding and Trust. </p></fn><fn id="table2fn4"><p><sup>d</sup>FRE: Flesch Reading Ease.</p></fn><fn id="table2fn5"><p><sup>e</sup>FKGL: Flesch-Kincaid Grade Level.</p></fn></table-wrap-foot></table-wrap><sec id="s3-1"><title>Quality (GQS and DISCERN)</title><p>ChatGPT demonstrated the highest average information quality scores, achieving the highest GQS and DISCERN ratings for 4 out of the 5 questions (<xref ref-type="fig" rid="figure2">Figures 2</xref> and <xref ref-type="fig" rid="figure3">3</xref>). Copilot responses showed the lowest average quality scores, while Gemini responses generally fell between the 2 other models.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Global Quality Score (GQS) scores comparing each chatbot by question input.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v12i1e84234_fig02.png"/></fig><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>DISCERN scores comparing each chatbot by question input.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v12i1e84234_fig03.png"/></fig><p>Quality variability differed across chatbots (<xref ref-type="table" rid="table2">Table 2</xref>). Copilot responses demonstrated greater variability in quality scores across question, while Gemini responses showed more consistent quality patterns. ChatGPT exhibited moderate variability for quality metrics. At the question level, Questions 4 and 5 were associated with greater variability in quality scores across chatbots, while Questions 2 and 3 yielded more similar scores across models (see <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> for all SD). Overall, descriptive rank ordering based on average scores and variability suggests differences in consistency and distribution of information quality across chatbots.</p></sec><sec id="s3-2"><title>Empathy (PETS-ER and PETS-UT)</title><p>PETS scores varied at the question and chatbot level (<xref ref-type="table" rid="table2">Table 2</xref>). Questions 1 and 2 are associated with the highest perceived empathy, and Question 5 demonstrates the lowest empathy scores (<xref ref-type="fig" rid="figure4">Figure 4</xref>).</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Average Perceived Empathy of Technology Scale (PETS) across chatbot by question. Avg: average; ER: Emotional Responsiveness; UT: Understanding and Trust.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v12i1e84234_fig04.png"/></fig><p>When empathy scores were summarized by chatbot, descriptive differences in average perceived empathy were observed across models (<xref ref-type="fig" rid="figure5">Figure 5</xref>). ChatGPT responses demonstrated higher average empathy scores compared to Copilot and Gemini, which demonstrated lower averages. Across all models, PETS-UT scores exceeded PETS-ER scores, suggesting stronger cognitive empathy and perspective taking rather than emotional expression.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Box and whisker plots showing Perceived Empathy of Technology Scale-Emotional Responsiveness (PETS-ER) and Perceived Empathy of Technology Scale-Understanding and Trust (PETS-UT) scores across chatbots, illustrating limited overall perceived empathy compared to understandability.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v12i1e84234_fig05.png"/></fig><p>ChatGPT showed the largest UT-ER gap (mean 6.36, SD 1.83 vs 5.35, SD 1.85), indicating moderate-to-high levels of understanding, but only moderate emotional connection. Copilot and Gemini had similar low-moderate ER and UT averages, with limited perceived empathy overall. Variability patterns further distinguish AI models (<xref ref-type="fig" rid="figure5">Figure 5</xref>). ChatGPT produced the widest variation in empathy scores. In contrast, Gemini and Copilot showed slightly more predictable patterns of empathy.</p></sec><sec id="s3-3"><title>Readability (FRE, FKGL, and Word Count)</title><p>All outputs were written at an eighth-grade reading level or higher (range 8.2&#x2010;14.6, mean 11.82, SD 1.44, equivalent to high school senior level). The average FRE score across all AI chatbot responses was 38.60, SD 9.00, indicating difficult readability overall. Gemini tended to produce the longest but most readable responses. Copilot consistently produced shorter responses associated with lower average readability. ChatGPT produced responses with the most variability in readability and word count. Question 5 responses were the least readable (mean grade level 12.76, SD 0.71), while question 2 responses were the most readable (mean grade level 9.9, SD 1.01).</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Quality</title><p>ChatGPT responses were associated with higher average quality scores and moderate variability across multiple questions. Gemini responses demonstrated moderate quality scores with greater consistency, while Copilot responses exhibited greater variability and the lowest quality scores, even producing a factually incorrect answer regarding genetic mutations and the associated risk of developing melanoma (see Copilot 4_1 in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>). In health communication, information quality standardization is critical because it allows for less confusion and increases patient-provider trust [<xref ref-type="bibr" rid="ref12">12</xref>].</p><p>Quality is not only model dependent but also question reliant. Questions 1 and 2 received the highest quality evaluations, whereas Questions 4 and 5 produced lower quality scores. This pattern suggests that chatbots tend to provide more consistent responses to straightforward or well-established topics. In contrast, questions regarding more complex science with sparse and/or unclear literature, such as recurrence risk, long-term treatment effects, and genetic implications, produced less reliable responses, often without communicating their limitations. These complex topics often represent areas of greatest concern for AYA patients with melanoma, which underscores the importance of cautious interpretation of chatbot-generated information.</p><p>The review of lower-quality outputs revealed common pitfalls. Responses with low-quality scores were often oversimplified, incomplete, or presented in bullet-point style responses, with insufficient detail to address the question. The lack of actionable information was most apparent in discussions of treatment, side effects, and hormone changes. For example, when a patient asked how treatment would affect hormones, responses accurately identified potential consequences, such as adrenal insufficiency, but failed to describe symptoms. From a patient perspective, clinically beneficial communication translates these answers into recognizable manifestations at home (ie, fatigue, dizziness, or other changes). Accuracy was further undermined by the lack of transparent source citation and inconsistent use of safeguards, such as disclaimers. Instead, statements were presented as definitive, risking patient confusion when conflicting with other resources or provider guidance.</p><p>Conversely, higher-quality responses demonstrated more complete and organized content. They cited credible resources, gave disclaimers, and emphasized the importance of deferring to the health care team for individualized guidance. These responses were detailed and well-tailored to the questions, without an overwhelming word count, which improved readability and digestion of information. They had clear directions to access specific resources for further support and actionable suggestions for the patient. These qualities serve as a reference standard for future versions of AI-generated health information.</p><p>Although the overall quality scores across all chatbot responses were moderate to high, persistent gaps in sourcing and uncertainty disclosures raise serious patient safety concerns. For AYA patients with melanoma undergoing treatment, inconsistent quality can undermine trust and lead to confusion at a vulnerable stage of care. To be reliable in clinical contexts, AI systems must deliver outputs that are not only accurate and detailed but also verifiable, transparent, and responsibly framed.</p></sec><sec id="s4-2"><title>Empathy</title><p>Across the 3 chatbot platforms, a central pattern emerged: systems struggled to balance clinical accuracy with emotional support. ChatGPT comparatively showed the highest empathetic capacity, while Gemini and Copilot tended to be neutral and lacking emotion. The qualitative review of responses identified a set of best practices that distinguished qualities of higher-empathy outputs from weak ones (<xref ref-type="other" rid="box2">Textbox 2</xref>). Detailed examples supporting these best practices are provided in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>.</p><boxed-text id="box2"><title> Key best practices for artificial intelligence (AI) chatbot responses to express empathy.</title><p><bold>Best practices (Perceived Empathy of Technology Scale [PETS] scores)</bold></p><list list-type="bullet"><list-item><p>Open with empathy and understanding</p></list-item><list-item><p>Asks follow-up and clarification questions</p></list-item><list-item><p>Symptoms and side effects in plain language</p></list-item><list-item><p>Medical disclaimers and boundaries</p></list-item><list-item><p>Cites credible resources</p></list-item><list-item><p>Specific actionable coping strategies</p></list-item><list-item><p>Adolescents and young adults&#x2013;specific</p></list-item><list-item><p>Demonstrates interest in the patient</p></list-item><list-item><p>Provides empathy in all contexts</p></list-item></list><p><bold>Common pitfalls (&#x2193; PETS scores)</bold></p><list list-type="bullet"><list-item><p>Robotic or purely clinical tone</p></list-item><list-item><p>One-way information delivery</p></list-item><list-item><p>Lists of side effects or clinical jargon</p></list-item><list-item><p>Overstep scope</p></list-item><list-item><p>Lack citations or specific resources</p></list-item><list-item><p>Generic or overwhelming advice</p></list-item><list-item><p>Adult-centric or generic framing</p></list-item><list-item><p>Lack of interest or personalization</p></list-item><list-item><p>Overlooks unspoken emotional needs</p></list-item></list></boxed-text><p>AYA patients with melanoma have unique needs compared to adults; however, chatbots often default to general or even pediatric framing [<xref ref-type="bibr" rid="ref13">13</xref>]. When AYA patient-specific needs were addressed, it was often with a surface-level list rather than a meaningful explanation. Systems were better equipped to name emotions rather than express them, as demonstrated by higher UT than ER scores, and empathy was also prompt-dependent. Chatbot responses generally failed to infer patient distress if not explicitly stated (as seen in questions 3-5), unlike a human clinician who can use nonverbal cues and proactively address unspoken emotional needs even in the face of an analytical question. The chatbot lacks the emotional intelligence to deliver difficult news and interpret patient reactions. Responses receiving low PETS scores risk coming across as dismissive or cold, which may invalidate patient concerns and discourage them from seeking support or adhering to care recommendations in the future when they most need it.</p><p>Human providers naturally contextualize information based on patient age, life stage, current treatments, and psychosocial needs, a skill beyond a chatbot&#x2019;s reach. Since our 5 questions represent domains of greatest emotional and informational needs of AYAs with melanoma, the irregularity of empathy suggests that current AI systems may not adequately support this unique patient population.</p></sec><sec id="s4-3"><title>Readability</title><p>Response length and readability varied across chatbots. Gemini responses were longer on average with higher readability scores, whereas ChatGPT responses demonstrated wider variability, and Copilot responses were shorter with lower average readability scores. Responses that were easier to read tended to be perceived as higher quality (GQS/DISCERN) and more empathetic (PETS) as seen in <xref ref-type="fig" rid="figure6">Figure 6</xref>.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>The relationship between Flesch-Kincaid readability and total Perceived Empathy of Technology (PETS) (empathy) scores across all chatbot responses. Each bubble represents a chatbot, with size proportional to overall word count. While word count shows little association with either readability or empathy, responses with higher readability tend to receive higher empathy scores.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v12i1e84234_fig06.png"/></fig><p>Readability analysis revealed that all chatbot responses exceeded the US average reading level (7th-8th grade), with outputs averaging at the 11th-12th grade level. This creates a barrier for accessibility. These findings echo prior research that health care documents are often written at a level too high for general populations, reducing inclusivity and comprehension [<xref ref-type="bibr" rid="ref14">14</xref>]. Unlike clinicians, who are trained to use strategies such as follow-up questions and the &#x201C;teach-back&#x201D; method, chatbots do not check understanding unless prompted. This may highlight a key limitation of AI chatbots for medical communication. They draw primarily from online sources, which are often already written at inflated reading levels, and thus lead to reproduced complexity by AI outputs [<xref ref-type="bibr" rid="ref15">15</xref>]. Clinicians, by contrast, are trained to adapt explanations to patient literacy, developmental stage, and clinical context.</p><p>Readability is particularly important for AYAs, whose health literacy may be affected by education disruption, treatment-related cognitive impairment, and emotional distress. Cancer-related cognitive impairment affects approximately 25% to 28% of AYA survivors and may affect attention, memory, and processing speed, increasing the cognitive burden of complex medical information [<xref ref-type="bibr" rid="ref16">16</xref>]. The National Comprehensive Cancer Network Guidelines for AYA Oncology emphasize that information should be delivered in an age-appropriate and developmentally appropriate manner, particularly in the context of disrupted education and psychosocial stressors [<xref ref-type="bibr" rid="ref17">17</xref>]. While no evidence-based guidelines specify an optimal reading level for AYA oncology populations, the commonly cited 8th-grade target is extrapolated from general health literacy recommendations and represents a pragmatic benchmark rather than an AYA-specific threshold.</p><p>AI may therefore generate content that is technically accurate but inaccessible, potentially leading to confusion or disengagement. For proper implementation, chatbots should aim to target their output responses of health care information to an 8th-grade level and incorporate interactive strategies that mimic provider feedback loops. Without these adjustments, the use of AI to answer the needs of AYA patients with melanoma remains limited.</p></sec><sec id="s4-4"><title>Patient Safety and Clinical Implications</title><p>Although AI chatbots may provide AYA patients with adequate education, support, and resources regarding their diagnosis, they cannot replace clinician input and should be used cautiously. The potential dangers associated with patients using AI in health care are multifaceted, with key risks including misinformation, unsafe recommendations, lack of understanding of the questions being asked, and insensitivity. Given that AYAs are the primary users of AI compared to other age groups, they may be particularly vulnerable to these risks due to their vulnerability to algorithmic bias and variable access to clinical guidance [<xref ref-type="bibr" rid="ref18">18</xref>]. These risks are inferred from content characteristics of chatbot responses and were not measured as patient outcomes in this study.</p><p>AI use in health care has been shown to spread misinformation [<xref ref-type="bibr" rid="ref19">19</xref>]. In our study, we found that chatbot explanations regarding the causes of metastatic melanoma and recommendations for next steps may include inaccurate or misleading information. Responses across all 3 platforms often focused on sun avoidance and using sun protection factor to prevent melanoma recurrence or secondary malignancy. Although this advice may be relevant for certain patient populations, it is not applicable to all patients, particularly for individuals with skin of color. As a result, responses lack inclusivity for diverse patient communities. Additionally, some chatbot responses encouraged AYA patients to explore clinical trials available for their conditions. While well-intended, this response is particularly unsafe to AYA patients, as it may increase the risk of false hope or confusion if patients are encouraged to consider trials that are not medically appropriate. Some responses even offered to interpret lab and genetic testing results. While AI chatbots have the potential to explain pathology reports and simplify medical jargon to patients, their interpretations are not flawless and should be used carefully with clinician oversight [<xref ref-type="bibr" rid="ref20">20</xref>]. Encouraging vulnerable patients to input their private health information to AI systems may expose them to breaches of data privacy and security, potentially leading to unauthorized access or misuse of sensitive medical information [<xref ref-type="bibr" rid="ref21">21</xref>].</p></sec><sec id="s4-5"><title>Limitations</title><p>This study evaluated a limited number of single-turn responses (5 question and 75 single-turn responses), which may not fully capture chatbot performance. AI chatbots can refine responses through follow-up prompts, so the single-turn design does not reflect extended, multiturn conversations. This limitation may influence the perceived quality, empathy, and readability of chatbot responses. Studies testing multiturn chats may be beneficial in the future. In addition, chatbot responses are sensitive to prompt phrasing and contextual detail, and the use of subjective scoring instruments introduces potential variability in interpretation. Each response was independently evaluated by 2 reviewers per domain, with scores averaged to reduce individual rater bias. For transparency, we have included all outputs in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendices 4</xref>, <xref ref-type="supplementary-material" rid="app6">6</xref> and <xref ref-type="supplementary-material" rid="app7">7</xref> as well as an example of the grading process with score justifications in <xref ref-type="supplementary-material" rid="app8">Multimedia Appendix 8</xref>. Formal interrater reliability statistics were not calculated, which limits quantitative assessment of scoring consistency. Although duplicate independent scoring with averaging was used to reduce subjectivity, future studies with larger and cross-disciplinary reviewer pools should incorporate formal reliability testing to further strengthen methodological rigor. Additionally, the separation of reviewer roles by domain may introduce systematic differences in scoring interpretation.</p></sec><sec id="s4-6"><title>Future Directions</title><p>Moving forward, we aim to use responses from this dataset to develop resources within melanoma oncology clinics that answer these questions and provide higher-quality and safer responses for AYA patients with melanoma. By providing age-appropriate education and support tools, we hope to improve the treatment experience and the overall well-being for AYAs navigating melanoma care. This will help expand the currently limited resources available for AYAs currently receiving treatment and hopefully bring more attention to the unaddressed survivorship needs of patients. Future clinic-integrated resources would be developed with clinician oversight and human evaluation to ensure accuracy, safety, and appropriateness for patient use, rather than relying on autonomous AI fine-tuning. If this study were to be replicated or expanded upon, it would be helpful to incorporate intraclass correlation coefficient or Cohen &#x03BA; into the data analysis to correct for discrepancies in the grading system. Additionally, this study establishes a patient-centered framework that can be applied to future research evaluating AI chatbot communication around emerging and experimental melanoma therapies, including novel immunotherapies and mRNA-based cancer vaccines. Our findings suggest that chatbot performance declines in areas with limited or evolving scientific literature, indicating that AI responses on new treatments or clinical trials may be particularly vulnerable to misinformation or oversimplification. Future studies should specifically assess this phenomenon to generate realistic expectations for AYAs navigating the rapidly evolving melanoma therapeutic landscape.</p></sec><sec id="s4-7"><title>Conclusion</title><p>This study provides an evaluation of the utility of AI to answer the most pressing needs of AYA patients with melanoma currently receiving treatment. Overall, chatbot responses were often informative but demonstrated persistent gaps in quality, empathy, and readability. Patient safety may be at risk due to the inability of AI systems to integrate patient context, emotional nuance, and conversational reciprocity into answers. Question wording, emotional phrasing, and clarity were predictive of the responses received. When questions addressed topics covered by multiple publications, AI responses were more consistent, with higher quality. However, when questions addressed controversial or rapidly changing topics, AI responses were often vague. Quality, empathy, and readability were all interconnected, as higher-rated responses incorporated clear and concise wording, showed interest in the patient&#x2019;s medical status, provided tangible resources and references, and recognized the limits of AI, encouraging further conversation with medical providers. This research should be replicated and can provide a reference standard for future AI chatbot responses for AYA patients with melanoma.</p></sec></sec></body><back><ack><p>The authors declare the use of generative artificial intelligence (GAI) in the research process. According to the GAIDeT taxonomy (2025), data collection was delegated to GAI tools under full human supervision. The GAI tools used were ChatGPT (GPT-4o), Gemini, and Copilot. Responsibility for the final manuscript lies entirely with the authors. GAI tools are not listed as authors and do not bear responsibility for the final outcomes.</p></ack><notes><sec><title>Funding</title><p>No external financial support or grants were received from any public, commercial, or not-for-profit entities for the research, authorship, or publication of this article.</p></sec><sec><title>Data Availability</title><p>The dataset generated during this study, including all chatbot outputs and scoring, is available in the Multimedia Appendices. No human participant data were collected.</p></sec></notes><fn-group><fn fn-type="con"><p>JLJ and PLH are co-first authors and contributed equally to this work. JLJ, PLH, and KCN contributed to conceptualization and methodology; JLJ and PLH contributed to question development, empathy, readability scoring, and manuscript writing. RPM, SZ, ALO, and KMR performed question input and data collection. MR provided project oversight and expert clinical opinion. MHS contributed to manuscript writing and editing, quality scoring, and response review. KCN served as principal investigator; provided project oversight, conceptualization, methodology, contributed to data collection; and participated in manuscript writing and editing. All authors reviewed and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">AYA</term><def><p>adolescents and young adults</p></def></def-item><def-item><term id="abb3">ER</term><def><p>Emotional Responsiveness</p></def></def-item><def-item><term id="abb4">FKGL</term><def><p>Flesch-Kincaid Grade Level</p></def></def-item><def-item><term id="abb5">FRE</term><def><p>Flesch Reading Ease</p></def></def-item><def-item><term id="abb6">GQS</term><def><p>Global Quality Score</p></def></def-item><def-item><term id="abb7">NA-SB</term><def><p>Needs Assessment and Service Bridge</p></def></def-item><def-item><term id="abb8">PETS</term><def><p>Perceived Empathy of Technology Scale</p></def></def-item><def-item><term id="abb9">UT</term><def><p>Understanding and Trust</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><person-group person-group-type="author"><collab>National Cancer Institute</collab></person-group><article-title>Cancer stat facts: cancer among adolescents and young adults (AYAs)</article-title><source>SEER Program</source><access-date>2025-08-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://seer.cancer.gov/statfacts/html/aya.html">https://seer.cancer.gov/statfacts/html/aya.html</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McInally</surname><given-names>W</given-names> </name><name name-style="western"><surname>Gray-Brunton</surname><given-names>C</given-names> </name><name name-style="western"><surname>Chouliara</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Kyle</surname><given-names>RG</given-names> </name></person-group><article-title>Life Interrupted: experiences of adolescents, young adults and their family living with malignant melanoma</article-title><source>J Adv Nurs</source><year>2021</year><month>09</month><volume>77</volume><issue>9</issue><fpage>3867</fpage><lpage>3879</lpage><pub-id pub-id-type="doi">10.1111/jan.14959</pub-id><pub-id pub-id-type="medline">34245464</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Sidoti</surname><given-names>O</given-names></name><name name-style="western"><surname>McClain</surname><given-names>C</given-names> </name></person-group><article-title>34% of U.S. adults have used ChatGPT, about double the share in 2023</article-title><source>Pew Research Center</source><access-date>2025-08-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.pewresearch.org/short-reads/2025/06/25/34-of-us-adults-have-used-chatgpt-about-double-the-share-in-2023/">https://www.pewresearch.org/short-reads/2025/06/25/34-of-us-adults-have-used-chatgpt-about-double-the-share-in-2023/</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Clark</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bailey</surname><given-names>S</given-names> </name></person-group><article-title>Chatbots in health care: connecting patients to information</article-title><source>Can J Health Technol</source><year>2024</year><volume>4</volume><issue>1</issue><pub-id pub-id-type="doi">10.51731/cjht.2024.818</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>HK</given-names> </name></person-group><article-title>The effects of artificial intelligence chatbots on women's health: a systematic review and meta-analysis</article-title><source>Healthcare (Basel)</source><year>2024</year><month>02</month><day>23</day><volume>12</volume><issue>5</issue><fpage>534</fpage><pub-id pub-id-type="doi">10.3390/healthcare12050534</pub-id><pub-id pub-id-type="medline">38470645</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pan</surname><given-names>A</given-names> </name><name name-style="western"><surname>Musheyev</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bockelman</surname><given-names>D</given-names> </name><name name-style="western"><surname>Loeb</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kabarriti</surname><given-names>AE</given-names> </name></person-group><article-title>Assessment of artificial intelligence chatbot responses to top searched queries about cancer</article-title><source>JAMA Oncol</source><year>2023</year><month>10</month><day>1</day><volume>9</volume><issue>10</issue><fpage>1437</fpage><lpage>1440</lpage><pub-id pub-id-type="doi">10.1001/jamaoncol.2023.2947</pub-id><pub-id pub-id-type="medline">37615960</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moore</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ahmad</surname><given-names>N</given-names> </name><name name-style="western"><surname>Kadosh</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Identifying gaps and needs in care for adolescents and young adults with melanoma</article-title><source>Support Care Cancer</source><year>2025</year><month>10</month><day>7</day><volume>33</volume><issue>10</issue><fpage>913</fpage><pub-id pub-id-type="doi">10.1007/s00520-025-10001-1</pub-id><pub-id pub-id-type="medline">41057735</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bernard</surname><given-names>A</given-names> </name><name name-style="western"><surname>Langille</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hughes</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rose</surname><given-names>C</given-names> </name><name name-style="western"><surname>Leddin</surname><given-names>D</given-names> </name><name name-style="western"><surname>Veldhuyzen van Zanten</surname><given-names>S</given-names> </name></person-group><article-title>A systematic review of patient inflammatory bowel disease information resources on the World Wide Web</article-title><source>Am J Gastroenterol</source><year>2007</year><month>09</month><volume>102</volume><issue>9</issue><fpage>2070</fpage><lpage>2077</lpage><pub-id pub-id-type="doi">10.1111/j.1572-0241.2007.01325.x</pub-id><pub-id pub-id-type="medline">17511753</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Charnock</surname><given-names>D</given-names> </name><name name-style="western"><surname>Shepperd</surname><given-names>S</given-names> </name><name name-style="western"><surname>Needham</surname><given-names>G</given-names> </name><name name-style="western"><surname>Gann</surname><given-names>R</given-names> </name></person-group><article-title>DISCERN: an instrument for judging the quality of written consumer health information on treatment choices</article-title><source>J Epidemiol Community Health</source><year>1999</year><month>02</month><volume>53</volume><issue>2</issue><fpage>105</fpage><lpage>111</lpage><pub-id pub-id-type="doi">10.1136/jech.53.2.105</pub-id><pub-id pub-id-type="medline">10396471</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Schmidmaier</surname><given-names>M</given-names> </name><name name-style="western"><surname>Rupp</surname><given-names>J</given-names> </name><name name-style="western"><surname>Cvetanova</surname><given-names>D</given-names> </name><name name-style="western"><surname>Mayer</surname><given-names>S</given-names> </name></person-group><article-title>Perceived empathy of technology scale (PETS): measuring empathy of systems toward the user</article-title><conf-name>CHI &#x2019;24: Proceedings of the 2024 CHI Conference on Human Factors in Computing Systems</conf-name><conf-date>May 11, 2024</conf-date><pub-id pub-id-type="doi">10.1145/3613904.364203</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ley</surname><given-names>P</given-names> </name><name name-style="western"><surname>Florio</surname><given-names>T</given-names> </name></person-group><article-title>The use of readability formulas in health care</article-title><source>Psychol Health Med</source><year>1996</year><month>02</month><volume>1</volume><issue>1</issue><fpage>7</fpage><lpage>28</lpage><pub-id pub-id-type="doi">10.1080/13548509608400003</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chauhan</surname><given-names>V</given-names> </name><name name-style="western"><surname>Sagar</surname><given-names>M</given-names> </name></person-group><article-title>Healthcare decision making and choice: an empirical model of patient confusion</article-title><source>Manag Decis</source><year>2023</year><month>11</month><day>7</day><volume>61</volume><issue>11</issue><fpage>3454</fpage><lpage>3474</lpage><pub-id pub-id-type="doi">10.1108/MD-11-2022-1488</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ahmad</surname><given-names>N</given-names> </name><name name-style="western"><surname>Kashyap</surname><given-names>A</given-names> </name><name name-style="western"><surname>Taylor</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Quality of life among adolescents and young adults with melanoma: a systematic review</article-title><source>J Adolesc Young Adult Oncol</source><year>2026</year><month>02</month><volume>15</volume><issue>1</issue><fpage>1</fpage><lpage>13</lpage><pub-id pub-id-type="doi">10.1089/jayao.2025.0029</pub-id><pub-id pub-id-type="medline">40470545</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Espinosa</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lucerna</surname><given-names>A</given-names> </name><name name-style="western"><surname>Schuitema</surname><given-names>H</given-names> </name></person-group><article-title>Healthcare documents: how readable? A study of simple strategies to decrease reading grade level and increase reading ease of healthcare documents</article-title><source>Mathews J Emergency Med</source><year>2022</year><volume>7</volume><issue>1</issue><fpage>44</fpage><pub-id pub-id-type="doi">10.30654/MJEM.10044</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="web"><article-title>How ChatGPT and our foundation models are developed</article-title><source>OpenAI Help Center</source><access-date>2025-09-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://help.openai.com/en/articles/7842364-how-chatgpt-and-our-foundation-models-are-developed">https://help.openai.com/en/articles/7842364-how-chatgpt-and-our-foundation-models-are-developed</ext-link></comment></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="report"><article-title>NCCN clinical practice guidelines in oncology: adolescent and young adult (AYA) oncology</article-title><year>2025</year><access-date>2026-01-06</access-date><publisher-name>National Comprehensive Cancer Network (NCCN)</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.nccn.org/professionals/physician_gls/pdf/aya.pdf">https://www.nccn.org/professionals/physician_gls/pdf/aya.pdf</ext-link></comment></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vizer</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Mikles</surname><given-names>SP</given-names> </name><name name-style="western"><surname>Piepmeier</surname><given-names>AT</given-names> </name></person-group><article-title>Cancer-related cognitive impairment in survivors of adolescent and young adult non-central nervous system cancer: a scoping review</article-title><source>Psychooncology</source><year>2022</year><month>08</month><volume>31</volume><issue>8</issue><fpage>1275</fpage><lpage>1285</lpage><pub-id pub-id-type="doi">10.1002/pon.5980</pub-id><pub-id pub-id-type="medline">35726379</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="report"><person-group person-group-type="author"><name name-style="western"><surname>Carolan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>AW</given-names> </name><name name-style="western"><surname>Gong</surname><given-names>GC</given-names> </name><name name-style="western"><surname>Borja</surname><given-names>S</given-names> </name></person-group><article-title>The state of consumer AI</article-title><year>2025</year><access-date>2026-03-06</access-date><publisher-name>MENLO Ventures</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://menlovc.com/wp-content/uploads/2025/11/menlo_ventures_consumer_ai_report-2025.pdf">https://menlovc.com/wp-content/uploads/2025/11/menlo_ventures_consumer_ai_report-2025.pdf</ext-link></comment></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Denecke</surname><given-names>K</given-names> </name><name name-style="western"><surname>Lopez-Campos</surname><given-names>G</given-names> </name><name name-style="western"><surname>Rivera-Romero</surname><given-names>O</given-names> </name><name name-style="western"><surname>Gabarron</surname><given-names>E</given-names> </name></person-group><article-title>The unexpected harms of artificial intelligence in healthcare: reflections on four real-world cases</article-title><source>Stud Health Technol Inform</source><year>2025</year><month>05</month><day>2</day><volume>325</volume><fpage>55</fpage><lpage>60</lpage><pub-id pub-id-type="doi">10.3233/SHTI250219</pub-id><pub-id pub-id-type="medline">40326654</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Steimetz</surname><given-names>E</given-names> </name><name name-style="western"><surname>Minkowitz</surname><given-names>J</given-names> </name><name name-style="western"><surname>Gabutan</surname><given-names>EC</given-names> </name><etal/></person-group><article-title>Use of artificial intelligence chatbots in interpretation of pathology reports</article-title><source>JAMA Netw Open</source><year>2024</year><month>05</month><day>1</day><volume>7</volume><issue>5</issue><fpage>e2412767</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2024.12767</pub-id><pub-id pub-id-type="medline">38776080</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>J</given-names> </name></person-group><article-title>Security implications of AI chatbots in health care</article-title><source>J Med Internet Res</source><year>2023</year><month>11</month><day>28</day><volume>25</volume><fpage>e47551</fpage><pub-id pub-id-type="doi">10.2196/47551</pub-id><pub-id pub-id-type="medline">38015597</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Global Quality Score (GQS), DISCERN, and Perceived Empathy of Technology Scale (PETS) scoring scales utilized for this study.</p><media xlink:href="cancer_v12i1e84234_app1.docx" xlink:title="DOCX File, 17 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Chatbot response scores for each input.</p><media xlink:href="cancer_v12i1e84234_app2.docx" xlink:title="DOCX File, 31 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Calculated standard deviations of response scores.</p><media xlink:href="cancer_v12i1e84234_app3.docx" xlink:title="DOCX File, 19 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>All Microsoft Copilot outputs.</p><media xlink:href="cancer_v12i1e84234_app4.docx" xlink:title="DOCX File, 75 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>Expanded best practices table.</p><media xlink:href="cancer_v12i1e84234_app5.docx" xlink:title="DOCX File, 16 KB"/></supplementary-material><supplementary-material id="app6"><label>Multimedia Appendix 6</label><p>All Google Gemini outputs.</p><media xlink:href="cancer_v12i1e84234_app6.docx" xlink:title="DOCX File, 74 KB"/></supplementary-material><supplementary-material id="app7"><label>Multimedia Appendix 7</label><p>All ChatGPT outputs.</p><media xlink:href="cancer_v12i1e84234_app7.docx" xlink:title="DOCX File, 77 KB"/></supplementary-material><supplementary-material id="app8"><label>Multimedia Appendix 8</label><p>Example of response score breakdown.</p><media xlink:href="cancer_v12i1e84234_app8.docx" xlink:title="DOCX File, 16 KB"/></supplementary-material></app-group></back></article>