<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Cancer</journal-id><journal-id journal-id-type="publisher-id">cancer</journal-id><journal-id journal-id-type="index">21</journal-id><journal-title>JMIR Cancer</journal-title><abbrev-journal-title>JMIR Cancer</abbrev-journal-title><issn pub-type="epub">2369-1999</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v11i1e68426</article-id><article-id pub-id-type="doi">10.2196/68426</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Evaluating a Chatbot as a Companion for Patients With Breast Cancer: Collaborative Pilot Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Boie</surname><given-names>Sebastian Daniel</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Glastetter</surname><given-names>Esther</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lux</surname><given-names>Michael Patrick</given-names></name><degrees>MD, MBA</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Balzer</surname><given-names>Felix</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>von Kalle</surname><given-names>Christof</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lenz</surname><given-names>Christian</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>M&#x00FC;ller</surname><given-names>Ulrike</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Pfizer Pharma GmbH</institution><addr-line>Friedrichstr. 110</addr-line><addr-line>Berlin</addr-line><country>Germany</country></aff><aff id="aff2"><institution>Department for Gynecology and Obstetrics, St. Louise Women's Hospital, Paderborn, St. Josefs Women's Hospital, Salzkotten, St. Vincenz Clinics</institution><addr-line>Salzkotten + Paderborn</addr-line><country>Germany</country></aff><aff id="aff3"><institution>Institute of Medical Informatics, Charit&#x00E9; - Universit&#x00E4;tsmedizin Berlin, Corporate Member of Freie Universit&#x00E4;t Berlin and Humboldt-Universit&#x00E4;t zu Berlin</institution><addr-line>Berlin</addr-line><country>Germany</country></aff><aff id="aff4"><institution>Clinical Study Center, Berlin Institute of Health at Charit&#x00E9; &#x2013; Universit&#x00E4;tsmedizin Berlin</institution><addr-line>Berlin</addr-line><country>Germany</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Cahill</surname><given-names>Naomi</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Wang</surname><given-names>Chenxu</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Kim</surname><given-names>Minjin</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Rizzo</surname><given-names>Veronica</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Sebastian Daniel Boie, PhD, Pfizer Pharma GmbH, Friedrichstr. 110, Berlin, 10117, Germany, 49 15152377580; <email>sebastian.boie@pfizer.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>13</day><month>8</month><year>2025</year></pub-date><volume>11</volume><elocation-id>e68426</elocation-id><history><date date-type="received"><day>06</day><month>11</month><year>2024</year></date><date date-type="rev-recd"><day>13</day><month>06</month><year>2025</year></date><date date-type="accepted"><day>17</day><month>06</month><year>2025</year></date></history><copyright-statement>&#x00A9; Sebastian Daniel Boie, Esther Glastetter, Michael Patrick Lux, Felix Balzer, Christof von Kalle, Christian Lenz, Ulrike M&#x00FC;ller. Originally published in JMIR Cancer (<ext-link ext-link-type="uri" xlink:href="https://cancer.jmir.org">https://cancer.jmir.org</ext-link>), 13.8.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Cancer, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://cancer.jmir.org/">https://cancer.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://cancer.jmir.org/2025/1/e68426"/><abstract><sec><title>Background</title><p>Patients with breast cancer frequently experience significant uncertainty, prompting them to seek detailed, personalized, and reliable medical information to enhance adherence to prescribed treatments, medications, and recommended lifestyle adjustments. Although high-quality information exists within oncology guidelines and patient-oriented resources, the provision of tailored responses to individual patient queries remains challenging, especially for non&#x2013;English-speaking populations.</p></sec><sec><title>Objective</title><p>This study aims to evaluate the potential of an artificial intelligence&#x2013;driven chatbot, specifically leveraging ChatGPT (GPT-4; OpenAI) combined with retrieval-augmented generation, to deliver personalized answers to complex breast cancer-related patient questions in German.</p></sec><sec sec-type="methods"><title>Methods</title><p>We collaborated with one of Germany&#x2019;s largest breast cancer Patient Representation Groups to collect authentic patient inquiries, receiving a total of 118 questions. After initial screening, we selected 104 medical questions, organized into 7 distinct categories: aftercare, bone health, ductal carcinoma in situ, diagnostics, nutrition and supplements, complementary medicine, and therapy. A customized version of GPT-4 was configured with specific system prompts emphasizing empathetic, evidence-based responses and integrated with a comprehensive database comprising guidelines, recommendations, and patient information materials published by recognized German medical societies. To assess chatbot responses, we used 4 evaluation criteria: comprehensibility (clarity from a patient perspective), correctness (accuracy per current medical guidelines), completeness (inclusion of all relevant aspects), and potential harm (risk of undue patient harm or misinformation). Ratings were conducted using a 5-point Likert scale by a breast cancer expert (correctness, completeness, and potential harm) and patient representatives (comprehensibility).</p></sec><sec sec-type="results"><title>Results</title><p>The chatbot provided high-quality responses across multiple dimensions. Of the 499 responses evaluated for comprehensibility, 427 (85.6%) were rated as comprehensible. Among the 104 responses assessed for the remaining dimensions, 91 (87.5%) were rated as correct, 72 (69.2%) as complete, and 93 (89.4%) as nonharmful. Reasons for incomplete answers included omission of reimbursement details, updates from recent therapeutic guidelines, or nuanced recommendations regarding endocrine therapy and aftercare schedules. In addition, 6 (5.8%) of the answers were rated as potentially harmful due to outdated or contextually inappropriate recommendations. The chatbot also performed well in the nutrition and bone health categories despite occasionally incomplete document retrieval.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Our findings demonstrate that an artificial intelligence&#x2013;powered chatbot with GPT-4 and retrieval augmentation can effectively provide personalized, linguistically accessible, and largely accurate information to German-speaking patients with breast cancer. This approach holds considerable promise for improving patient-centered communication, empowering patients to make informed decisions. Nonetheless, observed limitations regarding response completeness and potential harm underscore the critical need for ongoing human oversight. Future research and development should prioritize regularly updated databases, advanced retrieval methods to handle complex document structures, multimodal capabilities, and clearly articulated disclaimers emphasizing the necessity of professional medical consultation. Our evaluation, along with the provided set of realistic patient questions, establishes a benchmark for future development and validation of German-language oncology chatbots.</p></sec></abstract><kwd-group><kwd>breast cancer</kwd><kwd>chatbot</kwd><kwd>patient information</kwd><kwd>Generative artificial intelligence</kwd><kwd>Gen AI</kwd><kwd>Retrieval-augmented generation</kwd><kwd>RAG</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Breast cancer (BC) is the most frequent form of cancer in women [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>] and a global health concern. Patients with BC have a substantial need for information on their disease at all stages of the patient journey, prefer information tailored to their individual circumstances [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>], and it is well-known that information on disease and treatments is an important factor for medication adherence and persistence [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. While health care professionals (HCPs) aim to provide comprehensive answers, time constraints and resource limitations can lead to a mismatch in information provision.</p><p>Many patients routinely use the internet as a source for information about their condition [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref10">10</xref>], which increases their risk of exposure to misinformation [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. It is expected that patients increasingly turn to artificial intelligence (AI)&#x2013;based solutions, such as chatbots, which can help to assess the credibility of information. Chatbots can play a significant role in informing patients about their situation and treatment options [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. A substantial benefit of digital information sources is the easy 24-hour access to information and that these sources may answer questions that were left out during consultation by the HCPs. This may help patients have a more informed consultation with their HCP and supports shared decision-making.</p><p>Chatbot apps based on large language models (LLMs) are promising as interactive assistants to tailor medical information for patients&#x2019; specific needs [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref20">20</xref>]. Most existing chatbots are primarily trained on English language data, creating a language barrier for non&#x2013;English-speaking patients [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. LLM-based chatbots can generate conversational and personalized answers that include context. In general, medical practice differs significantly between different countries due to differences in reimbursements, regulatory frameworks, and cultural attitudes, among other factors [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. While English-speaking LLM-based chatbots are developed for other cancer entities [<xref ref-type="bibr" rid="ref25">25</xref>], there are, to the best of our knowledge, no existing publications about LLM-powered chatbot solutions for German-speaking patients with BC.</p><p>The major disadvantage of LLMs is that they can confidently generate various types of false answers (eg, hallucinations, confabulations, misrepresentations, and omissions among others). A taxonomy of false output and mitigation strategies is a topic of current research [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. A mitigation strategy is to map user questions by classifying their intent to predefined answers. Solutions with predefined answers exist for lung cancer in Japanese [<xref ref-type="bibr" rid="ref28">28</xref>] and prostate cancer in German [<xref ref-type="bibr" rid="ref29">29</xref>]. While this approach is generally more reliable, since answers are preselected and quality controlled, it lacks flexibility and personalization of the answers.</p><p>Another emerging strategy to mitigate the risk of false information is retrieval-augmented generation (RAG) [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref32">32</xref>] where a trained LLM, such as a generative pretrained transformer (GPT), has access to additional information sources (eg, a database with oncology guidelines and other quality-controlled documents). This additional information can provide up-to-date and quality-controlled context for an LLM to a given query [<xref ref-type="bibr" rid="ref33">33</xref>].</p><p>Ultimately, as AI technologies continue to evolve, health care institutions and organizations may increasingly explore the development of their own LLM-based apps to support more inclusive and patient-centered care. For such efforts to be effective and responsible, they must be grounded in language- and context-specific considerations that reflect patients&#x2019; real-world concerns.</p><p>The aim of this paper is to explore the potential of a retrieval-augmented German-language chatbot based on ChatGPT to address typical information needs of breast cancer patients using real patient questions.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>This study was conducted in Germany as a collaborative project between researchers from industry and academic experts in digital health, oncology, and AI. An essential partner in this initiative was one of the country&#x2019;s largest breast cancer Patient Representation Groups, comprising individuals with lived experience of breast cancer and deep engagement in patient advocacy. The Patient Representation Group contributed real-world patient questions and helped shape the evaluation criteria, enabling the assessment of the chatbot in a way that reflects the practical needs and concerns of breast cancer patients in the German health care context. This study was performed in accordance with the TRIPOD+LLM (Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis+Large Language Model) guidelines (the TRIPOD+LLM checklist is provided in <xref ref-type="supplementary-material" rid="app3">Checklist 1</xref>).</p><p>The initial phase of the work, including chatbot refinement, question selection, and response evaluation, was completed in April 2024 (see <xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Overview of the document selection process, chatbot preparation, and evaluation. BC: Breast cancer; HCP: health care professional; PRG: Patient Representation Group.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e68426_fig01.png"/></fig></sec><sec id="s2-2"><title>Document Selection</title><p>We selected documents with the purpose of providing the chatbot with up-to-date, evidence-based guidelines and recommendations, ensuring that its responses are grounded in the current standard of care for BC in Germany.</p><p>Oncology guidelines and recommendations provided by professional societies are reliable, high-quality sources of information on the diagnosis and treatment of BC. The guidelines are based on expert consensus and compile evidence-based information with a comprehensive coverage and regular updates [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. Guidelines are designed for clinicians to develop tailored diagnosis and treatment plans to the patients&#x2019; individual tumor biology [<xref ref-type="bibr" rid="ref36">36</xref>].</p><p>Some societies (the German Cancer Society [DKG e.V.], the Arbeitsgemeinschaft Gyn&#x00E4;kologische Onkologie e.V. [AGO e.V.], and the Commission Breast of the German Society of Gynecology and Obstetrics) additionally provide information sources directly for patients.</p><p>The authors screened and reviewed guidelines and information documents from prominent German medical societies, BC working groups, and nonprofit organizations providing material for HCPs and patients. The entities were selected based on their established role and recognition within the German BC clinical and patient support landscape. A consensus from all authors was reached through discussion and feedback from the Patient Representation Group solicited.</p><p>We considered documents from well-known entities (see <xref ref-type="other" rid="box1">Textbox 1</xref>).</p><boxed-text id="box1"><title> Medical entities considered for document selection.</title><p>We considered documents from the following well-known entities:</p><list list-type="bullet"><list-item><p>Arbeitsgemeinschaft der Wissenschaftlichen Medizinischen Fachgesellschaften e.V. (AWMF).</p></list-item><list-item><p>Deutsche Krebsgesellschaft e.V. (DKG).</p></list-item><list-item><p>Deutschen Krebshilfe e.V. (DKH).</p></list-item><list-item><p>Deutsche Gesellschaft f&#x00FC;r Gyn&#x00E4;kologie und Geburtshilfe e.V. (DGGG).</p></list-item><list-item><p>PRIO (Pr&#x00E4;vention und Integrative Onkologie, eine Arbeitsgemeinschaft der DKG).</p></list-item><list-item><p>Deutsche Gesellschaft f&#x00FC;r H&#x00E4;matologie und Medizinische Onkologie e.V. (DGHO).</p></list-item><list-item><p>Deutschen Gesellschaft f&#x00FC;r Ern&#x00E4;hrungsmedizin e.V. (DGEM).</p></list-item><list-item><p>Arbeitsgemeinschaft, Supportive Ma&#x00DF;nahmen in der Onkologie, Rehabilitation und Sozialmedizin der Deutschen Krebsgesellschaft (ASORS).</p></list-item><list-item><p>&#x00D6;sterreichische Arbeitsgemeinschaft f&#x00FC;r klinische Ern&#x00E4;hrung (AKE).</p></list-item><list-item><p>Dachverband der Deutschsprachigen Wissenschaftlichen Osteologischen Gesellschaften e.V. (DVO).</p></list-item><list-item><p>Arbeitsgemeinschaft Gyn&#x00E4;kologische Onkologie e.V. (AGO).</p></list-item><list-item><p>Arbeitsgemeinschaft f&#x00FC;r Psychoonkologie in der Deutschen Krebsgesellschaft (PSO).</p></list-item></list></boxed-text><p>And selected all documents addressing HCPs or patients that cover diagnosis, treatment, and aftercare of breast cancer; complementary medicine for oncological patients; nutrition in clinical oncology; osteoporosis and psycho-oncological diagnosis, consultation, and treatment.</p><p>Each document was manually reviewed, and we excluded documents that are either expired or not yet consented (ie, draft versions and versions for discussion only) from our selection. Draft, discussion, or expired marks (as assigned by the Arbeitsgemeinschaft der Wissenschaftlichen Medizinischen Fachgesellschaften e.V.) were verified by 2 authors (SDB and UM). The selection was internally reviewed and discussed with representatives from the BC Patient Representation Group as well as the senior BC expert. All suggestions by one of the authors or the Patient Representation Group made it into the final selection. In total, we included 13 documents comprising 3110 pages (<xref ref-type="fig" rid="figure1">Figure 1A</xref>). The documents are available through the respective entities&#x2019; website, and the compendium can be used by other researchers.</p><p>The included documents are shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p><p>A list of excluded documents can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Included documents.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Document title</td><td align="left" valign="bottom">Type of document</td><td align="left" valign="bottom">Primary target audience</td><td align="left" valign="bottom">Publisher or leading Medical Societies</td></tr></thead><tbody><tr><td align="left" valign="top">Interdisziplin&#x00E4;re S3-Leitlinie f&#x00FC;r die Fr&#x00FC;herkennung, Diagnostik, Therapie und Nachsorge des Mammakarzinoms. Langversion 4.4, June 2021, Register 032-045OL</td><td align="left" valign="top">S3 Guideline</td><td align="left" valign="top">HCPs</td><td align="left" valign="top">AWMF, DKG, DKH, DGGG, and DKG.</td></tr><tr><td align="left" valign="top">S3-Leitlinie Komplement&#x00E4;rmedizin in der Behandlung von onkologischen PatientInnen Langversion 1.1, September 2021, Register 032/055OL</td><td align="left" valign="top">S3 Guideline</td><td align="left" valign="top">HCPs</td><td align="left" valign="top">AWMF, DKG, DKH, DKG, PRIO, DGGG, and DGHO.</td></tr><tr><td align="left" valign="top">Klinische Ern&#x00E4;hrung in der Onkologie. 2015, Register 073/006. (DOI 10.1055/s-0035-1552741).</td><td align="left" valign="top">S3 Guideline</td><td align="left" valign="top">HCPs</td><td align="left" valign="top">DGEM with DGHO, ASORS, and AKE.</td></tr><tr><td align="left" valign="top">Prophylaxe, Diagnostik und Therapie der OSTEOPOROSE. Langfassung, September 2023, Register 183/001</td><td align="left" valign="top">S3 Guideline</td><td align="left" valign="top">HCPs</td><td align="left" valign="top">DVO.</td></tr><tr><td align="left" valign="top">Diagnostik und Therapie fr&#x00FC;her und fortgeschrittener Mammakarzinome 2023.1</td><td align="left" valign="top">Recommendations</td><td align="left" valign="top">HCPs</td><td align="left" valign="top">AGO Breast Commission (of DGGG) and DKG.</td></tr><tr><td align="left" valign="top">Mammakarzinom der Frau. January 2018</td><td align="left" valign="top">Recommendations</td><td align="left" valign="top">HCPs</td><td align="left" valign="top">Onkopedia, DGHO.</td></tr><tr><td align="left" valign="top">Psychoonkologische Diagnostik, Beratung und Behandlung von erwachsenen Krebspatient*innen. Version 2.1 &#x2013; August 2023</td><td align="left" valign="top">S3 Guideline</td><td align="left" valign="top">HCPs</td><td align="left" valign="top">AWMF, DKG, DKH, DKG, and PSO.</td></tr><tr><td align="left" valign="top">Peri- und Postmenopause &#x2013; Diagnostik und Interventionen. Register 015-&#x2010;062, Version 1.1, January 2020</td><td align="left" valign="top">S3 Guideline</td><td align="left" valign="top">HCPs</td><td align="left" valign="top">DGGG.</td></tr><tr><td align="left" valign="top">Patientinnenleitlinie. Brustkrebs im fr&#x00FC;hen Stadium. December 2018</td><td align="left" valign="top">Patient Guideline based on S3 Guideline</td><td align="left" valign="top">Patients</td><td align="left" valign="top">AWMF, DKG, and Stiftung Deutsche Krebshilfe.</td></tr><tr><td align="left" valign="top">BRUSTKREBS Patientenratgeber zu den AGO-Empfehlungen 2023</td><td align="left" valign="top">Patient companion based on AGO recommendations</td><td align="left" valign="top">Patients</td><td align="left" valign="top">AGO Breast Commission with AGO Patient Forum.</td></tr><tr><td align="left" valign="top">Voi&#x00DF; P. (2018) M&#x00F6;glichkeiten und Grenzen der Komplement&#x00E4;rmedizin.</td><td align="left" valign="top">Information leaflet</td><td align="left" valign="top">Patients</td><td align="left" valign="top">Brustkrebs Deutschland e.V.</td></tr></tbody></table></table-wrap></sec><sec id="s2-3"><title>Model Preparation</title><p>We used OpenAI&#x2019;s feature to build custom GPTs, based on GPT-4, with user-defined instructions and access to a document database for RAG [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]. The retrieval mechanism can find relevant information from the uploaded documents and pass it along with the original question to the GPT. For our experiments, it was sufficient to upload the documents one by one. All technical details (eg, splitting documents into chunks, embedding chunks to obtain a vector index, question embedding, and similarity-based matching) are handled by OpenAI. The RAG mechanism is used as is, since parameters that affect retrieval (eg, threshold values for similarity measures or number of retrieved documents) are not exposed. A detailed explanation of the RAG technology can be found elsewhere [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>].</p><p>We experimented with different instructions in the system prompt using a set of 5 short questions (included in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) that the authors formulated before receiving test questions from the Patient Representation Group. Based on the initial experiments, we agreed on using the following five instructions: (1) search for relevant information in the documents uploaded; (2) clearly advise against therapies that are not evidence-based; (3) ask clarifying questions, if necessary; (4) formulate empathetic answers; and (5) not to mention severe complications unless they are clearly indicated by the patient; implemented through the system prompt (<xref ref-type="fig" rid="figure1">Figure 1B</xref>). Since our goal is to evaluate the answering capability in the German language, we used the instructions in German (also included in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). All evaluations took place on or before April 19, 2024.</p></sec><sec id="s2-4"><title>Test Questions</title><p>We asked one of Germany&#x2019;s largest BC Patient Representation Groups to share a set of commonly asked questions. We indicated that we have a focus on medical questions only and made no further recommendations as to topic, number, difficulty, or length of the question. Questions were submitted to the Patient Representation Group in person at regional or national meetings, via phone, email, or various online social network platforms that the BC Patient Representation Group is active on. The Patient Representation Group selected the questions based on their judgment of their importance and frequency of occurrence in real-world patient interactions. They grouped the questions into 7 categories (Aftercare, Bone health, DCIS [ductal carcinoma in situ], Diagnostics, Diet and nutritional supplements, Complementary medicine, and Therapy). Answers were not provided.</p><p>We performed an initial screening of each received question to determine whether its nature is medical and excluding legal and reimbursement questions. Exclusion decisions are based on consensus between 2 reviewers (SB and UM). All questions (included and excluded) are included in the <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>Most remaining questions are used in the evaluation &#x201C;as-is,&#x201D; even if the question is complex or with a high potential of misinterpretation. We edited some questions by writing out abbreviations or adding the category at the start of the question. The edits are detailed in <xref ref-type="table" rid="table2">Table 2</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Edits on the original questions before using them to evaluate our chatbot.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Original</td><td align="left" valign="bottom">Modification</td><td align="left" valign="bottom">Translation</td><td align="left" valign="bottom">Affected questions</td></tr></thead><tbody><tr><td align="left" valign="top">AI<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="top">Aromatase-inhibitor</td><td align="left" valign="top">Aromatase inhibitor</td><td align="left" valign="top">17, 34, 37, 38, 41, 78, and 104.</td></tr><tr><td align="left" valign="top">Empty</td><td align="left" valign="top">Category placed in front of the question for context (eg, Brustkrebs, hormoneller Brustkrebs, Ern&#x00E4;hrung und Nahrungserg&#x00E4;nzungsmittel, Hitzewallungen und Schwei&#x00DF;ausbr&#x00FC;che (vasomotorische Symptome))</td><td align="left" valign="top">Breast cancer, hormonal breast cancer, nutrition and dietary supplements, excessive sweating (vasomotor symptoms)</td><td align="left" valign="top">74, 75, 76, 77, 89, 96, and 98.</td></tr><tr><td align="left" valign="top">NEM</td><td align="left" valign="top">Nahrungserg&#x00E4;nzungsmittel</td><td align="left" valign="top">Dietary supplements</td><td align="left" valign="top">76, 80, 82, 88, and 102.</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>AI: aromatase inhibitor.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-5"><title>Evaluation</title><p>Evaluating the output of LLMs in medical question-answering systems is a topic of current debate, with consortia developing standards [<xref ref-type="bibr" rid="ref41">41</xref>]. To date, a standardized evaluation framework is missing. Typically, evaluation criteria are defined at the outset of the study and evaluated on a Likert scale [<xref ref-type="bibr" rid="ref42">42</xref>-<xref ref-type="bibr" rid="ref45">45</xref>].</p><p>We consented to 4 criteria on which to evaluate the chatbot after feedback from the Patient Representation Group. It was agreed that the Patient Representation Group rate the comprehensibility of the answers (&#x201C;The answer is clear to me.&#x201D;), a senior BC expert (ML), who is represented in several German guideline commissions and on the board of the German Society for Senology, rated correctness (&#x201C;The answer presents scientifically correct information.&#x201D;), completeness (&#x201C;The answer includes all important aspects.&#x201D;) and whether the answer has potential to cause undue harm (&#x201C;The answer does not cause undue harm.&#x201D;; <xref ref-type="fig" rid="figure1">Figure 1</xref>C). For the Patient Representation Group, 5 raters were recruited by the spokesperson during an in-person event of regional leaders. Each rater independently completed the evaluation using the 5-point Likert scale and a &#x201C;don&#x2019;t know / can&#x2019;t answer&#x201D; option. Only aggregate response counts per item were collected; no individual-level data were recorded. Direct interaction with individual patient raters was not pursued due to legal and ethical considerations.</p><p>The 104 questions are posed to the LLM once. To rate an answer, the raters evaluated each statement on a 5-point Likert scale (5=Strongly agree, 4=Agree, 3=Neutral, 2=Disagree, and 1=Strongly disagree). For each of the given criteria, we present individual ratings per category divided by the total number of ratings for the given category.</p></sec><sec id="s2-6"><title>Ethics Statement</title><p>According to &#x00A7;15 of the Professional Code of the Berlin Medical Association, research based solely on anonymized data is exempt from the requirement for formal ethical approval. In line with this provision, we did not obtain ethics committee approval for this study, as only aggregated, deidentified data were used. However, we recognize that the Ethics Committee&#x2019;s guidance encourages researchers to seek consultation even when data are deidentified or aggregated. Prospective consultation with the ethics committee was not sought.</p><p>Participants were invited to complete a paper-based form, and there was no direct interaction between the research team and patients. All communication and data collection were managed by a spokesperson from the Patient Representation Group, who aggregated and anonymized the responses before sharing them with the research team. All data were handled in accordance with applicable data protection regulations and shared in anonymized, aggregated form only.</p><p>No direct compensation was provided to individual participants. A modest compensation was provided to the Patient Representation Group for their coordination efforts and data aggregation in accordance with the FSA Code for Collaboration with Patient Organizations specified in the EFPIA Code of Practice (2008). All financial contributions and contracts with Patient Representation Groups are publicly disclosed in the &#x201C;Transparenzkodex&#x201D; annually.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Questions</title><p>We received a total of 118 questions and a corresponding category for each question. A total of 14 questions are excluded because they were related to nonmedical aspects (eg, insurance coverage, reimbursements, and legal aspects). The questions are grouped into 7 categories (Aftercare, Bone health, DCIS, Diagnostics, Diet and nutritional supplements, Complementary medicine, and Therapy). <xref ref-type="fig" rid="figure2">Figure 2</xref> shows the breakdown of the number of questions per category. Many questions were asked on managing side effects, particularly for endocrine and endocrine-based therapy, with a specific emphasis on complementary and alternative medicine, as well as dietary supplements.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>In total, we received 118 questions (black). Human review determined that 14 questions are out of scope (red). The remaining questions are grouped into different categories (blue). Color codes green and yellow indicate whether the model retrieves relevant information from the documents. DCIS: Ductal carcinoma in situ.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e68426_fig02.png"/></fig><p>For some questions, the LLM reported that no relevant information was found in the documents included. This means that the retriever mechanism was not able to match a query to information stored in the adjacent database. In several cases, the RAG-based model returned no relevant documents from the retrieval component. As internet search was not enabled in our experimental setup, the LLM proceeded to generate responses based solely on its pretrained and fine-tuned knowledge. These instances are reported as part of the results, reflecting the system&#x2019;s behavior when retrieval fails. The model reported for 24 questions that no relevant information was found (see <xref ref-type="fig" rid="figure2">Figure 2</xref>).</p></sec><sec id="s3-2"><title>Chatbot Answers and Evaluation</title><p>The answers were evaluated using the four criteria (1) correctness, (2) completeness, (3) no harm, and (4) comprehensibility, either by a senior BC expert (criteria 1&#x2010;3) or the Patient Representation Group (criteria 4). For each criterion, we formulated a statement (see the Methods section) and evaluated the statement along a 5-point Likert scale. By combining &#x201C;Strongly Agree&#x201D; with &#x201C;Agree&#x201D; and &#x201C;Disagree&#x201D; with &#x201C;Strongly Disagree,&#x201D; we see that:</p><list list-type="bullet"><list-item><p>In total, 427 out of 499 (85.6%) ratings of the answers are rated as comprehensible and 42 (8.4%) as incomprehensible.</p></list-item><list-item><p>A total of 91 out of 104 (87.5%) answers are rated as correct and 7 (6.7%) incorrect.</p></list-item><list-item><p>In addition, 72 out of 104 (69.2%) answers are rated as including all relevant information, and 20 (19.2%) have some missing information.</p></list-item><list-item><p>Furthermore, 93 out of 104 (89.4%) answers are rated as not harmful, and 6 (5.7%) may cause potential harm.</p></list-item></list><p>The &#x201C;Neutral&#x201D; ratings account for the remaining percentages up to 100. Note that comprehensibility was rated by multiple raters, hence the number of ratings exceeds the number of questions.</p><p>A total of 6 answers are considered potentially harmful. One answer related to gene expression testing was classified as potentially harmful due to an incorrect statement on reimbursement. In addition, 2 answers on the use of abemaciclib did not provide information on its use in premenopausal women or failed to mention the absence of data regarding the initiation of abemaciclib therapy 2&#x2010;3 years after starting endocrine therapy. Furthermore, 2 answers were deemed potentially harmful because they provided individualized recommendations for the duration of aftercare, including breast sonography. Finally, the chatbot mentioned hormone replacement therapy as a treatment for vasomotor symptoms but failed to mention nondrug therapies.</p><p>A total of 2 answers were deemed incomplete due to missing reimbursement information. In addition, some answers failed to mention new therapeutic treatment regimens, lacked comprehensive details on aftercare or testing, or omitted aspects related to endocrine therapy, osteo-oncology, or hormonal testing for menopause status.</p><p>We analyze the answers for each category separately (see <xref ref-type="fig" rid="figure3">Figure 3</xref> and <xref ref-type="table" rid="table3">Table 3</xref>). We observe that the chatbot performs well across all criteria for Bone health (14 questions) and diet and nutritional supplements (15 questions). For the latter category, the chatbot reported no relevant information found in the documents for 7 out of 15 questions (see <xref ref-type="fig" rid="figure2">Figure 2</xref>). The worst-performing categories (Aftercare and Diagnostics) also have the fewest number of questions (6 and 4, respectively). Some answers (20 out of 104) omit important information, except for the Diet and nutritional supplement category. A total of 6 responses were identified as potentially harmful by expert judgment, typically for omitting important caveats about medication use or not mentioning alternative (nondrug-based) therapies. One answer on abemaciclib therapy did not include information on its use in premenopausal women. Another answer failed to mention the absence of data regarding the initiation of abemaciclib therapy 2&#x2010;3 years after starting endocrine therapy. Furthermore, 2 answers were deemed potentially harmful because they provided individualized recommendations for the duration and type of aftercare.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Evaluation of the 4 criteria (Comprehensibility, Correctness, Completeness, and No harm) using a 5-point Likert scale for each question category. Compl: Complimentary; DCIS: Ductal carcinoma in situ; Nutr suppl: nutrition supplement.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e68426_fig03.png"/></fig><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Individual ratings per category along the 4 dimensions.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Category</td><td align="left" valign="bottom">Questions</td><td align="left" valign="bottom">No information found</td><td align="left" valign="bottom">Dimension</td><td align="left" valign="bottom" colspan="5">Ratings, n</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom">5</td><td align="left" valign="bottom">4</td><td align="left" valign="bottom">3</td><td align="left" valign="bottom">2</td><td align="left" valign="bottom">1</td></tr></thead><tbody><tr><td align="left" valign="top">Aftercare</td><td align="left" valign="top">6</td><td align="left" valign="top">0</td><td align="left" valign="top">Correctness</td><td align="left" valign="top">2</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">2</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Completeness</td><td align="left" valign="top">2</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">2</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">No harm</td><td align="left" valign="top">3</td><td align="left" valign="top">0</td><td align="left" valign="top">1</td><td align="left" valign="top">2</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Comprehensibility</td><td align="left" valign="top">19</td><td align="left" valign="top">4</td><td align="left" valign="top">1</td><td align="left" valign="top">6</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Bone health</td><td align="left" valign="top">14</td><td align="left" valign="top">2</td><td align="left" valign="top">Correctness</td><td align="left" valign="top">12</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Completeness</td><td align="left" valign="top">9</td><td align="left" valign="top">3</td><td align="left" valign="top">0</td><td align="left" valign="top">2</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">No harm</td><td align="left" valign="top">10</td><td align="left" valign="top">4</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Comprehensibility</td><td align="left" valign="top">38</td><td align="left" valign="top">18</td><td align="left" valign="top">2</td><td align="left" valign="top">2</td><td align="left" valign="top">3</td></tr><tr><td align="left" valign="top">Complementary medicine</td><td align="left" valign="top">9</td><td align="left" valign="top">5</td><td align="left" valign="top">Correctness</td><td align="left" valign="top">8</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Completeness</td><td align="left" valign="top">6</td><td align="left" valign="top">0</td><td align="left" valign="top">3</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">No Harm</td><td align="left" valign="top">8</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Comprehensibility</td><td align="left" valign="top">24</td><td align="left" valign="top">11</td><td align="left" valign="top">4</td><td align="left" valign="top">2</td><td align="left" valign="top">3</td></tr><tr><td align="left" valign="top">Ductal carcinoma in situ</td><td align="left" valign="top">10</td><td align="left" valign="top">2</td><td align="left" valign="top">Correctness</td><td align="left" valign="top">5</td><td align="left" valign="top">4</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Completeness</td><td align="left" valign="top">3</td><td align="left" valign="top">4</td><td align="left" valign="top">1</td><td align="left" valign="top">2</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">No harm</td><td align="left" valign="top">6</td><td align="left" valign="top">3</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Comprehensibility</td><td align="left" valign="top">32</td><td align="left" valign="top">15</td><td align="left" valign="top">2</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Diagnostic</td><td align="left" valign="top">4</td><td align="left" valign="top">0</td><td align="left" valign="top">Correctness</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td><td align="left" valign="top">2</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Completeness</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td><td align="left" valign="top">2</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">No harm</td><td align="left" valign="top">1</td><td align="left" valign="top">2</td><td align="left" valign="top">0</td><td align="left" valign="top">2</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Comprehensibility</td><td align="left" valign="top">13</td><td align="left" valign="top">5</td><td align="left" valign="top">0</td><td align="left" valign="top">2</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Diet and nutritional supplements</td><td align="left" valign="top">15</td><td align="left" valign="top">7</td><td align="left" valign="top">Correctness</td><td align="left" valign="top">15</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Completeness</td><td align="left" valign="top">14</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">No harm</td><td align="left" valign="top">12</td><td align="left" valign="top">3</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Comprehensibility</td><td align="left" valign="top">52</td><td align="left" valign="top">12</td><td align="left" valign="top">6</td><td align="left" valign="top">3</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Therapy</td><td align="left" valign="top">46</td><td align="left" valign="top">8</td><td align="left" valign="top">Correctness</td><td align="left" valign="top">26</td><td align="left" valign="top">14</td><td align="left" valign="top">3</td><td align="left" valign="top">2</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Completeness</td><td align="left" valign="top">19</td><td align="left" valign="top">8</td><td align="left" valign="top">7</td><td align="left" valign="top">12</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">No harm</td><td align="left" valign="top">33</td><td align="left" valign="top">8</td><td align="left" valign="top">3</td><td align="left" valign="top">2</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Comprehensibility</td><td align="left" valign="top">136</td><td align="left" valign="top">48</td><td align="left" valign="top">15</td><td align="left" valign="top">20</td><td align="left" valign="top">0</td></tr></tbody></table></table-wrap><p>Finally, the chatbot mentioned hormone replacement therapy as a treatment for vasomotor symptoms (as side effects of endocrine therapy). However, hormone replacement therapy is a contraindication for breast cancer patients in this specific therapeutic situation (<xref ref-type="table" rid="table3">Table 3</xref>).</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>One of the largest German BC Patient Representation Group shared a set of typical questions (without identifiable information) that commonly arise. These questions highlight significant and specific information needs of patients&#x2014;specially regarding endocrine or endocrine-based therapy and the prevention or management of side effects&#x2014;that go beyond the scope of general health literacy support. Unlike other AI or chatbot or patient vignette studies, this BC Patient Representation Group provided relatively complex medical questions in German, not easily answered through a simple Google search or by referring to patient versions of guidelines.</p><p>In response, we evaluated whether a RAG and LLM-based chatbot solution could provide high-quality, tailored information in German. The database included authoritative German-language guidelines and patient information materials that are valid for Germany. Overall, the answers to 104 BC questions were evaluated on 4 criteria (comprehensibility, correctness, completeness, and potential harm) using a 5-point Likert scale. Despite an overall high quality of responses, 20 out of 104 answers were incomplete, and 6 were potentially harmful.</p><p>In summary, the chatbot was tested on 104 frequently asked breast cancer&#x2013;related questions and produced answers that were mostly rated positively across 4 criteria (comprehensibility, correctness, completeness, and harm potential). Specifically, 85.6% (427 out of 499 ratings) of responses were deemed comprehensible, 87.5% (91/104) correct, and 89.4% (93/104) free of undue harm; however, only 69.2% (72/104) of answers were judged complete. Shortcomings primarily involved incomplete details on reimbursement or newer treatment regimens and omissions regarding aftercare or endocrine therapy.</p><p>Potentially harmful guidance stemmed mainly from two issues: (1) outdated or ambiguous source material (eg, reimbursement rules that had changed, guidance on hormone replacement therapy lacking caveats about medication use, or not mentioning alternatives) and (2) retrieval gaps that prevented the model from grounding its answer in the most relevant passages. When the necessary nuance was scattered across tables, figures, or inconsistent terminology (&#x201C;aftercare,&#x201D; &#x201C;follow-up,&#x201D; and &#x201C;screening&#x201D;), the retrieval component sometimes surfaced a partial context, prompting the LLM to fill the vacuum with general knowledge that did not fit the oncological edge cases.</p><p>For example, the answer related to gene expression testing was classified as potentially harmful due to an outdated statement on reimbursement found in the source document [<xref ref-type="bibr" rid="ref46">46</xref>]. In addition, no context was provided for the use of abemaciclib on premenopausal women, likely because the 2018 and 2021 guidelines [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>] did not provide data on its use in early breast cancer.</p><p>The chatbot may have been misled for the recommendation of individualized recommendations for the duration and type of aftercare by the varying terminologies used in the documents (&#x201C;aftercare,&#x201D; &#x201C;follow-up,&#x201D; and &#x201C;screening&#x201D;), different design formats, tables, and inconsistent time formats (months, years; in numbers or text). In addition, considerations for specific situations such as DCIS, breast scarring after surgery, or the statement that aftercare can be adapted to the symptoms may have contributed to the &#x201C;confusion.&#x201D; While potentially harmful answers were identified through expert review, actual harm to patients remains unlikely due to clinical safeguards.</p><p>Notably, the chatbot performed better than expected in the areas of bone health and nutrition, even when relevant documents were not retrieved, suggesting that its base training data sufficed to answer many diet-related questions. Overall, the system demonstrated promising accuracy and clarity but showed room for improvement in providing comprehensive and fully risk-aware medical guidance.</p><p>Several questions encompassed multiple aspects and included subquestions that required broad answers to cover all points (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Incomplete answers were primarily due to the absence of reimbursement details, newer therapeutic regimens not yet documented in guidelines, and retrieval limitations affecting tabular and graphical data representation. In 3 cases, there was a lack of clinical data to address the specific question. For 16 other cases, there was an absence of information in the source documents. Specifically, in 2 of these cases, information on new therapeutic treatment regimens was not yet included in any guidelines. In another case, outdated guideline information led to an inaccurate and incomplete answer. In addition, in 2 cases, the topic of reimbursement was inappropriately covered, despite it typically being out of scope for the source documents. Furthermore, in 8 instances, the available information was not retrieved, probably because it was presented in tables, figures, or listings with ratings (such as on level of evidence and on the grade of recommendation) rather than text, making it accessible only to medical experts who could interpret it, a task beyond the capability of the current retrieval mechanism. The inconsistent terminologies and varying design and time formats across and within the source documents might have also contributed to the nonretrieval of information.</p><p>The curation of up-to-date and machine-accessible information in the database improves results. We hypothesize that a textual representation of information from tables, figures, or listings improves results with the given RAG technology. Alternatively, digitized and annotated versions of relevant documents [<xref ref-type="bibr" rid="ref49">49</xref>] or multimodal RAGs [<xref ref-type="bibr" rid="ref50">50</xref>] likely improve retrieval.</p><p>To mitigate the risk of potentially harmful or incomplete answers, it is important to provide a disclaimer that the chatbot is not able to replace consultation with medical professionals and encourage patients to seek consultation. We strongly suggest that continuous monitoring in the form of transcript reviews and human oversight by medical professionals is implemented, as others have pointed out as well [<xref ref-type="bibr" rid="ref44">44</xref>]. Furthermore, it is clear that chatbots are classified as medical devices [<xref ref-type="bibr" rid="ref31">31</xref>] and need substantial safety measures before being used at scale.</p></sec><sec id="s4-2"><title>Comparison With Previous Work</title><p>In agreement with our results, it has been observed that ChatGPT scores high on correctness and often significantly lower on completeness for cancer-related question-answer pairs in an English-language setup [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref52">52</xref>]. A growing body of literature demonstrates that the RAG mechanism helps to ground LLMs in factual information for medical use cases [<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref54">54</xref>]. To the best of our knowledge, there is no similar study evaluating LLM-based chatbots on German question-answer pairs for any cancer entity. The training on English-language data may introduce subtle biases from the respective health care systems. Our results show that ChatGPT, together with authoritative documents, can answer BC questions in German with similar performance.</p><p>Our realistic set of real-world patient questions can be used by other researchers to develop German BC chatbots. Many questions raised by the BC Patient Representation Group pertain to endocrine and endocrine-based therapy, particularly related to the prevention or management of side effects. Providing active patient support for this therapy is crucial to ensure adherence and prevent early termination, as discontinuation of endocrine or endocrine-based therapy is associated with worse outcomes, including reduced overall survival [<xref ref-type="bibr" rid="ref55">55</xref>].</p><p>A further topic of interest among patients is food supplements and alternative medicine. Evidence-based recommendations on complementary and alternative medicine can help patients avoid negative interactions with cancer treatment, prevent harmful or ineffective therapies, and potentially contribute to treatment success [<xref ref-type="bibr" rid="ref56">56</xref>]. The chatbot might provide substantial support for patients, especially when based on evidence-based information such as from the German S3 guidelines on complementary medicine in oncology.</p><p>This evaluation of a chatbot prototype in German language provides a strong baseline for evaluating German BC chatbot apps. We observe that ChatGPT, together with appropriate documents in the vector store, provides a strong performance even for a German medical question answering task.</p></sec><sec id="s4-3"><title>Limitations</title><p>First, we pose each question to the chatbot only once. Thus, we do not assess whether the retrieval mechanism consistently retrieves the same result or if the LLM&#x2019;s answers are consistent across repeated queries.</p><p>Second, the evaluation criteria&#x2014;correctness, completeness, and potential for undue harm&#x2014;are only evaluated by a single expert, introducing subjective judgment and potential bias representing a subjective evaluation.</p><p>Third, it is possible that the Patient Representation Group introduces a bias in the question selection.</p><p>Fourth, our study does not simulate real-world interactions, where patients typically ask follow-up questions and discuss outcomes directly with their treating physicians. A comprehensive real-world evaluation would require extended and interactive dialogues between patients and the chatbot.</p><p>Fifth, although we share test questions, source documents and prompts, reproducibility is only partially achievable because our study relies on closed-source algorithms for information retrieval and output generation by the LLM, whose parameters and implementations may change over time.</p></sec><sec id="s4-4"><title>Future Directions</title><p>Since we observe that some questions are not directly addressed in the included documents, a future model can be improved by including relevant journal articles or guidelines and recommendations from other countries. It is well-known that the prompting strategy can influence model performance [<xref ref-type="bibr" rid="ref57">57</xref>]; therefore, improved prompting (eg, chain-of-thought [<xref ref-type="bibr" rid="ref58">58</xref>], multiround iterative questioning [<xref ref-type="bibr" rid="ref59">59</xref>], or self-reasoning [<xref ref-type="bibr" rid="ref60">60</xref>]) may significantly improve results. In addition, specific fine-tuning on a BC question answering task can be expected to improve performance [<xref ref-type="bibr" rid="ref61">61</xref>].</p><p>A total of 14 questions revolved around insurance coverage, reimbursement, and legal aspects of social law, which underscores the need for specific support in these areas. For our study, we consider these topics out of scope, since an interdisciplinary collaboration between legal and social service experts would be required and a different set of authoritative documents. However, it is crucial to address these issues, as a cancer diagnosis poses a high risk of financial problems for patients [<xref ref-type="bibr" rid="ref55">55</xref>]. An enhanced chatbot or a chatbot specifically designed to address patient questions in these areas would increase.</p></sec><sec id="s4-5"><title>Conclusion</title><p>A cancer diagnosis is a major turning point in life for most, as it is potentially life-threatening and life-changing. We evaluate an AI-based chatbot in answering realistic and challenging patient questions in the German language. The BC chatbot prototype provides largely accurate, comprehensible, and safe answers for German-speaking patients with BC, but incomplete information remains a limitation, particularly concerning reimbursement and newer treatments. The technology may provide real value today, as it can always be used easily and might help to meet patients&#x2019; needs for information. Further development, testing, and evaluation of chatbots for patients is a multidisciplinary endeavor that should involve patients actively in this process. In addition, the inclusion of guardrails and prominent disclaimers that technology cannot replace professional consultation and human oversight is important for apps.</p></sec></sec></body><back><ack><p>We would like to thank mamazone&#x2013;Frauen und Forschung gegen Brustkrebs e. V. for their invaluable support that made this project possible. In addition, we would like to thank Christina Clau&#x00DF;en and her Patient Advocacy Team of Pfizer Germany for their many contributions.</p></ack><notes><sec><title>Data Availability</title><p>All data generated or analyzed during this study are included in this published article and its supplementary information files.</p></sec></notes><fn-group><fn fn-type="con"><p>SB, EG, CL, and UM conceptualized the study. SB and UM curated the data and wrote the original draft. ML, FB, and CvK advised on methodology, investigation, and formal analysis. All authors read and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>MPL was a paid consultant to Pfizer in connection with the development of this paper. FB was a paid consultant to Pfizer in connection with the development of this paper. CvK was a paid consultant to Pfizer in connection with the development of this paper. SDB, EG, CL, and UM are salaried employees at Pfizer Pharma GmbH and shareholders of Pfizer.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AGO</term><def><p>Arbeitsgemeinschaft Gyn&#x00E4;kologische Onkologie e.V.</p></def></def-item><def-item><term id="abb2">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb3">BC</term><def><p>breast cancer</p></def></def-item><def-item><term id="abb4">DCIS</term><def><p>ductal carcinoma in situ</p></def></def-item><def-item><term id="abb5">GPT</term><def><p>Generative pretrained transformer</p></def></def-item><def-item><term id="abb6">HCP</term><def><p>health care professional</p></def></def-item><def-item><term id="abb7">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb8">RAG</term><def><p>retrieval-augmented generation</p></def></def-item><def-item><term id="abb9">TRIPOD+LLM</term><def><p>Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis+Large Language Model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arnold</surname><given-names>M</given-names> </name><name name-style="western"><surname>Morgan</surname><given-names>E</given-names> </name><name name-style="western"><surname>Rumgay</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Current and future burden of breast cancer: global statistics for 2020 and 2040</article-title><source>Breast</source><year>2022</year><month>12</month><volume>66</volume><fpage>15</fpage><lpage>23</lpage><pub-id pub-id-type="doi">10.1016/j.breast.2022.08.010</pub-id><pub-id pub-id-type="medline">36084384</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>&#x0141;ukasiewicz</surname><given-names>S</given-names> </name><name name-style="western"><surname>Czeczelewski</surname><given-names>M</given-names> </name><name name-style="western"><surname>Forma</surname><given-names>A</given-names> </name><name name-style="western"><surname>Baj</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sitarz</surname><given-names>R</given-names> </name><name name-style="western"><surname>Stanis&#x0142;awek</surname><given-names>A</given-names> </name></person-group><article-title>Breast cancer-epidemiology, risk factors, classification, prognostic markers, and current treatment strategies-an updated review</article-title><source>Cancers (Basel)</source><year>2021</year><month>08</month><day>25</day><volume>13</volume><issue>17</issue><fpage>4287</fpage><pub-id pub-id-type="doi">10.3390/cancers13174287</pub-id><pub-id pub-id-type="medline">34503097</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mistry</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wilson</surname><given-names>S</given-names> </name><name name-style="western"><surname>Priestman</surname><given-names>T</given-names> </name><name name-style="western"><surname>Damery</surname><given-names>S</given-names> </name><name name-style="western"><surname>Haque</surname><given-names>M</given-names> </name></person-group><article-title>How do the information needs of cancer patients differ at different stages of the cancer journey? A cross-sectional survey</article-title><source>JRSM Short Rep</source><year>2010</year><month>09</month><day>15</day><volume>1</volume><issue>4</issue><fpage>30</fpage><pub-id pub-id-type="doi">10.1258/shorts.2010.010032</pub-id><pub-id pub-id-type="medline">21103122</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tran</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Lamprell</surname><given-names>K</given-names> </name><name name-style="western"><surname>Nic Giolla Easpaig</surname><given-names>B</given-names> </name><name name-style="western"><surname>Arnolda</surname><given-names>G</given-names> </name><name name-style="western"><surname>Braithwaite</surname><given-names>J</given-names> </name></person-group><article-title>What information do patients want across their cancer journeys? A network analysis of cancer patients&#x2019; information needs</article-title><source>Cancer Med</source><year>2019</year><month>01</month><volume>8</volume><issue>1</issue><fpage>155</fpage><lpage>164</lpage><pub-id pub-id-type="doi">10.1002/cam4.1915</pub-id><pub-id pub-id-type="medline">30525298</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Eenbergen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Vromans</surname><given-names>RD</given-names> </name><name name-style="western"><surname>Boll</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Changes in internet use and wishes of cancer survivors: A comparison between 2005 and 2017</article-title><source>Cancer</source><year>2020</year><month>01</month><day>15</day><volume>126</volume><issue>2</issue><fpage>408</fpage><lpage>415</lpage><pub-id pub-id-type="doi">10.1002/cncr.32524</pub-id><pub-id pub-id-type="medline">31580497</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Buyens</surname><given-names>G</given-names> </name><name name-style="western"><surname>van Balken</surname><given-names>M</given-names> </name><name name-style="western"><surname>Oliver</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Cancer literacy - Informing patients and implementing shared decision making</article-title><source>J Cancer Policy</source><year>2023</year><month>03</month><volume>35</volume><fpage>100375</fpage><pub-id pub-id-type="doi">10.1016/j.jcpo.2022.100375</pub-id><pub-id pub-id-type="medline">36462750</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heisig</surname><given-names>SR</given-names> </name><name name-style="western"><surname>Shedden-Mora</surname><given-names>MC</given-names> </name><name name-style="western"><surname>von Blanckenburg</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Informing women with breast cancer about endocrine therapy: effects on knowledge and adherence</article-title><source>Psychooncology</source><year>2015</year><month>02</month><volume>24</volume><issue>2</issue><fpage>130</fpage><lpage>137</lpage><pub-id pub-id-type="doi">10.1002/pon.3611</pub-id><pub-id pub-id-type="medline">24953538</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kvarnstr&#x00F6;m</surname><given-names>K</given-names> </name><name name-style="western"><surname>Westerholm</surname><given-names>A</given-names> </name><name name-style="western"><surname>Airaksinen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Liira</surname><given-names>H</given-names> </name></person-group><article-title>Factors contributing to medication adherence in patients with a chronic condition: a scoping review of qualitative research</article-title><source>Pharmaceutics</source><year>2021</year><month>07</month><day>20</day><volume>13</volume><issue>7</issue><fpage>1100</fpage><pub-id pub-id-type="doi">10.3390/pharmaceutics13071100</pub-id><pub-id pub-id-type="medline">34371791</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Verma</surname><given-names>S</given-names> </name><name name-style="western"><surname>Madarnas</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Sehdev</surname><given-names>S</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>G</given-names> </name><name name-style="western"><surname>Bajcar</surname><given-names>J</given-names> </name></person-group><article-title>Patient adherence to aromatase inhibitor treatment in the adjuvant setting</article-title><source>Curr Oncol</source><year>2011</year><month>05</month><volume>18 Suppl 1</volume><issue>Suppl 1</issue><fpage>S3</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.3747/co.v18i0.899</pub-id><pub-id pub-id-type="medline">21698059</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>R</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Shang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>M</given-names> </name></person-group><article-title>Relationship between internet health information and patient compliance based on trust: empirical study</article-title><source>J Med Internet Res</source><year>2018</year><month>08</month><day>17</day><volume>20</volume><issue>8</issue><fpage>e253</fpage><pub-id pub-id-type="doi">10.2196/jmir.9364</pub-id><pub-id pub-id-type="medline">30120087</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>L</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Peng</surname><given-names>TQ</given-names> </name></person-group><article-title>Nature and diffusion of gynecologic cancer&#x2013;related misinformation on social media: analysis of tweets</article-title><source>J Med Internet Res</source><year>2018</year><month>10</month><day>16</day><volume>20</volume><issue>10</issue><fpage>e11515</fpage><pub-id pub-id-type="doi">10.2196/11515</pub-id><pub-id pub-id-type="medline">30327289</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lazard</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Nicolla</surname><given-names>S</given-names> </name><name name-style="western"><surname>Vereen</surname><given-names>RN</given-names> </name><etal/></person-group><article-title>Exposure and reactions to cancer treatment misinformation and advice: survey study</article-title><source>JMIR Cancer</source><year>2023</year><month>07</month><day>28</day><volume>9</volume><fpage>e43749</fpage><pub-id pub-id-type="doi">10.2196/43749</pub-id><pub-id pub-id-type="medline">37505790</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fritsch</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>Blankenheim</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wahl</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Attitudes and perception of artificial intelligence in healthcare: a cross-sectional survey among patients</article-title><source>Digit Health</source><year>2022</year><volume>8</volume><fpage>20552076221116772</fpage><pub-id pub-id-type="doi">10.1177/20552076221116772</pub-id><pub-id pub-id-type="medline">35983102</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hopkins</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Logan</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Kichenadasse</surname><given-names>G</given-names> </name><name name-style="western"><surname>Sorich</surname><given-names>MJ</given-names> </name></person-group><article-title>Artificial intelligence chatbots will revolutionize how cancer patients access information: ChatGPT represents a paradigm-shift</article-title><source>JNCI Cancer Spectr</source><year>2023</year><month>03</month><day>1</day><volume>7</volume><issue>2</issue><fpage>pkad010</fpage><pub-id pub-id-type="doi">10.1093/jncics/pkad010</pub-id><pub-id pub-id-type="medline">36808255</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hudecek</surname><given-names>MFC</given-names> </name><name name-style="western"><surname>Lermer</surname><given-names>E</given-names> </name><name name-style="western"><surname>Gaube</surname><given-names>S</given-names> </name><name name-style="western"><surname>Cecil</surname><given-names>J</given-names> </name><name name-style="western"><surname>Heiss</surname><given-names>SF</given-names> </name><name name-style="western"><surname>Batz</surname><given-names>F</given-names> </name></person-group><article-title>Fine for others but not for me: the role of perspective in patients&#x2019; perception of artificial intelligence in online medical platforms</article-title><source>Computers in Human Behavior: Artificial Humans</source><year>2024</year><month>01</month><volume>2</volume><issue>1</issue><fpage>100046</fpage><pub-id pub-id-type="doi">10.1016/j.chbah.2024.100046</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Young</surname><given-names>AT</given-names> </name><name name-style="western"><surname>Amara</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bhattacharya</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wei</surname><given-names>ML</given-names> </name></person-group><article-title>Patient and general public attitudes towards clinical artificial intelligence: a mixed methods systematic review</article-title><source>Lancet Digit Health</source><year>2021</year><month>09</month><volume>3</volume><issue>9</issue><fpage>e599</fpage><lpage>e611</lpage><pub-id pub-id-type="doi">10.1016/S2589-7500(21)00132-1</pub-id><pub-id pub-id-type="medline">34446266</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bitterman</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Downing</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mau&#x00E9;s</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lustberg</surname><given-names>M</given-names> </name></person-group><article-title>Promise and perils of large language models for cancer survivorship and supportive care</article-title><source>J Clin Oncol</source><year>2024</year><month>05</month><day>10</day><volume>42</volume><issue>14</issue><fpage>1607</fpage><lpage>1611</lpage><pub-id pub-id-type="doi">10.1200/JCO.23.02439</pub-id><pub-id pub-id-type="medline">38452323</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Haase</surname><given-names>I</given-names> </name><name name-style="western"><surname>Xiong</surname><given-names>T</given-names> </name><name name-style="western"><surname>Rissmann</surname><given-names>A</given-names> </name><name name-style="western"><surname>Knitza</surname><given-names>J</given-names> </name><name name-style="western"><surname>Greenfield</surname><given-names>J</given-names> </name><name name-style="western"><surname>Krusche</surname><given-names>M</given-names> </name></person-group><article-title>ChatSLE: consulting ChatGPT-4 for 100 frequently asked lupus questions</article-title><source>Lancet Rheumatol</source><year>2024</year><month>04</month><volume>6</volume><issue>4</issue><fpage>e196</fpage><lpage>e199</lpage><pub-id pub-id-type="doi">10.1016/S2665-9913(24)00056-0</pub-id><pub-id pub-id-type="medline">38508817</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Siglen</surname><given-names>E</given-names> </name><name name-style="western"><surname>Vetti</surname><given-names>HH</given-names> </name><name name-style="western"><surname>Lunde</surname><given-names>ABF</given-names> </name><etal/></person-group><article-title>Ask Rosa - The making of a digital genetic conversation tool, a chatbot, about hereditary breast and ovarian cancer</article-title><source>Patient Educ Couns</source><year>2022</year><month>06</month><volume>105</volume><issue>6</issue><fpage>1488</fpage><lpage>1494</lpage><pub-id pub-id-type="doi">10.1016/j.pec.2021.09.027</pub-id><pub-id pub-id-type="medline">34649750</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sun</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>K</given-names> </name><name name-style="western"><surname>Lan</surname><given-names>W</given-names> </name><etal/></person-group><article-title>An AI dietitian for type 2 diabetes mellitus management based on large language and image recognition models: preclinical concept validation study</article-title><source>J Med Internet Res</source><year>2023</year><month>11</month><day>9</day><volume>25</volume><fpage>e51300</fpage><pub-id pub-id-type="doi">10.2196/51300</pub-id><pub-id pub-id-type="medline">37943581</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Al Shamsi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Almutairi</surname><given-names>AG</given-names> </name><name name-style="western"><surname>Al Mashrafi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Al Kalbani</surname><given-names>T</given-names> </name></person-group><article-title>Implications of language barriers for healthcare: a systematic review</article-title><source>Oman Med J</source><year>2020</year><month>03</month><volume>35</volume><issue>2</issue><fpage>e122</fpage><pub-id pub-id-type="doi">10.5001/omj.2020.40</pub-id><pub-id pub-id-type="medline">32411417</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bressem</surname><given-names>KK</given-names> </name><name name-style="western"><surname>Papaioannou</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Grundmann</surname><given-names>P</given-names> </name><etal/></person-group><article-title>medBERT.de: a comprehensive German BERT model for the medical domain</article-title><source>Expert Syst Appl</source><year>2024</year><month>03</month><volume>237</volume><fpage>121598</fpage><pub-id pub-id-type="doi">10.1016/j.eswa.2023.121598</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Tikkanen</surname><given-names>R</given-names> </name><name name-style="western"><surname>Osborn</surname><given-names>R</given-names> </name><name name-style="western"><surname>Mossialos</surname><given-names>E</given-names> </name><name name-style="western"><surname>Djordjevic</surname><given-names>A</given-names> </name></person-group><source>International Profiles of Health Care Systems</source><year>2020</year><publisher-name>Commonwealth Fund</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.commonwealthfund.org/sites/default/files/2020-12/International_Profiles_of_Health_Care_Systems_Dec2020.pdf">https://www.commonwealthfund.org/sites/default/files/2020-12/International_Profiles_of_Health_Care_Systems_Dec2020.pdf</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>von dem Knesebeck</surname><given-names>O</given-names> </name><name name-style="western"><surname>B&#x00F6;nte</surname><given-names>M</given-names> </name><name name-style="western"><surname>Siegrist</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Country differences in the diagnosis and management of coronary heart disease - a comparison between the US, the UK and Germany</article-title><source>BMC Health Serv Res</source><year>2008</year><month>09</month><day>29</day><volume>8</volume><issue>1</issue><fpage>198</fpage><pub-id pub-id-type="doi">10.1186/1472-6963-8-198</pub-id><pub-id pub-id-type="medline">18823556</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khene</surname><given-names>ZE</given-names> </name><name name-style="western"><surname>Bigot</surname><given-names>P</given-names> </name><name name-style="western"><surname>Mathieu</surname><given-names>R</given-names> </name><name name-style="western"><surname>Roupr&#x00EA;t</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bensalah</surname><given-names>K</given-names> </name><collab>French Committee of Urologic Oncology</collab></person-group><article-title>Development of a personalized chat model based on the european association of urology oncology guidelines: harnessing the power of generative artificial intelligence in clinical practice</article-title><source>Eur Urol Oncol</source><year>2024</year><month>02</month><volume>7</volume><issue>1</issue><fpage>160</fpage><lpage>162</lpage><pub-id pub-id-type="doi">10.1016/j.euo.2023.06.009</pub-id><pub-id pub-id-type="medline">37474402</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>W</given-names> </name><etal/></person-group><article-title>A survey on hallucination in large language models: principles, taxonomy, challenges, and open questions</article-title><source>arXiv</source><comment>Preprint posted online on  Nov 9, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2311.05232</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Rawte</surname><given-names>V</given-names> </name><name name-style="western"><surname>Chakraborty</surname><given-names>S</given-names> </name><name name-style="western"><surname>Pathak</surname><given-names>A</given-names> </name><etal/></person-group><article-title>The troubling emergence of hallucination in large language models -- an extensive definition, quantification, and prescriptive remediations</article-title><source>arXiv</source><comment>Preprint posted online on  Oct 8, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2310.04988</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kataoka</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Takemura</surname><given-names>T</given-names> </name><name name-style="western"><surname>Sasajima</surname><given-names>M</given-names> </name><name name-style="western"><surname>Katoh</surname><given-names>N</given-names> </name></person-group><article-title>Development andearly feasibility of chatbots for educating patients with lung cancer and their caregivers in Japan: mixed methods study</article-title><source>JMIR Cancer</source><year>2021</year><month>03</month><day>10</day><volume>7</volume><issue>1</issue><fpage>e26911</fpage><pub-id pub-id-type="doi">10.2196/26911</pub-id><pub-id pub-id-type="medline">33688839</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>G&#x00F6;rtz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Baumg&#x00E4;rtner</surname><given-names>K</given-names> </name><name name-style="western"><surname>Schmid</surname><given-names>T</given-names> </name><etal/></person-group><article-title>An artificial intelligence-based chatbot for prostate cancer education: Design and patient evaluation study</article-title><source>Digit Health</source><year>2023</year><volume>9</volume><fpage>20552076231173304</fpage><pub-id pub-id-type="doi">10.1177/20552076231173304</pub-id><pub-id pub-id-type="medline">37152238</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ge</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>S</given-names> </name><name name-style="western"><surname>Owens</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Development of a liver disease-specific large language model chat interface using retrieval-augmented generation</article-title><source>Hepatology</source><year>2024</year><month>11</month><day>1</day><volume>80</volume><issue>5</issue><fpage>1158</fpage><lpage>1168</lpage><pub-id pub-id-type="doi">10.1097/HEP.0000000000000834</pub-id><pub-id pub-id-type="medline">38451962</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gilbert</surname><given-names>S</given-names> </name><name name-style="western"><surname>Harvey</surname><given-names>H</given-names> </name><name name-style="western"><surname>Melvin</surname><given-names>T</given-names> </name><name name-style="western"><surname>Vollebregt</surname><given-names>E</given-names> </name><name name-style="western"><surname>Wicks</surname><given-names>P</given-names> </name></person-group><article-title>Large language model AI chatbots require approval as medical devices</article-title><source>Nat Med</source><year>2023</year><month>10</month><volume>29</volume><issue>10</issue><fpage>2396</fpage><lpage>2398</lpage><pub-id pub-id-type="doi">10.1038/s41591-023-02412-6</pub-id><pub-id pub-id-type="medline">37391665</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Tonmoy</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zaman</surname><given-names>SMM</given-names> </name><name name-style="western"><surname>Jain</surname><given-names>V</given-names> </name><etal/></person-group><article-title>A comprehensive survey of hallucination mitigation techniques in large language models</article-title><year>2024</year><month>01</month><day>2</day><pub-id pub-id-type="doi">10.48550/arXiv.2401.01313</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Lewis</surname><given-names>P</given-names> </name><name name-style="western"><surname>Perez</surname><given-names>E</given-names> </name><name name-style="western"><surname>Piktus</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Retrieval-augmented generation for knowledge-intensive NLP tasks</article-title><year>2020</year><month>05</month><day>22</day><pub-id pub-id-type="doi">10.48550/arXiv.2005.11401</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Reames</surname><given-names>BN</given-names> </name><name name-style="western"><surname>Krell</surname><given-names>RW</given-names> </name><name name-style="western"><surname>Ponto</surname><given-names>SN</given-names> </name><name name-style="western"><surname>Wong</surname><given-names>SL</given-names> </name></person-group><article-title>Critical evaluation of oncology clinical practice guidelines</article-title><source>J Clin Oncol</source><year>2013</year><month>07</month><day>10</day><volume>31</volume><issue>20</issue><fpage>2563</fpage><lpage>2568</lpage><pub-id pub-id-type="doi">10.1200/JCO.2012.46.8371</pub-id><pub-id pub-id-type="medline">23752105</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Untch</surname><given-names>M</given-names> </name><name name-style="western"><surname>Fasching</surname><given-names>PA</given-names> </name><name name-style="western"><surname>Brucker</surname><given-names>SY</given-names> </name><etal/></person-group><article-title>Behandlung von Patientinnen mit fr&#x00FC;hem Mammakarzinom: Evidenz, Kontroversen, Konsens &#x2013; Meinungsbild deutscher Expert*innen zur 17. Internationalen St.-Gallen-Konsensuskonferenz [Article in German]</article-title><source>Senologie - Zeitschrift F&#x00FC;r Mammadiagnostik und -therapie</source><year>2021</year><month>06</month><volume>18</volume><issue>2</issue><fpage>163</fpage><lpage>181</lpage><pub-id pub-id-type="doi">10.1055/a-1463-8544</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gradishar</surname><given-names>WJ</given-names> </name><name name-style="western"><surname>Anderson</surname><given-names>BO</given-names> </name><name name-style="western"><surname>Abraham</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Breast cancer, version 3.2020, NCCN clinical practice guidelines in oncology</article-title><source>J Natl Compr Canc Netw</source><year>2020</year><month>04</month><volume>18</volume><issue>4</issue><fpage>452</fpage><lpage>478</lpage><pub-id pub-id-type="doi">10.6004/jnccn.2020.0016</pub-id><pub-id pub-id-type="medline">32259783</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="other"><person-group person-group-type="author"><collab>OpenAI</collab><name name-style="western"><surname>Adler</surname><given-names>S</given-names> </name><name name-style="western"><surname>Agarwal</surname><given-names>S</given-names> </name><etal/></person-group><article-title>GPT-4 technical report</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 15, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2303.08774</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="web"><article-title>Introducing GPTs</article-title><source>OpenAI</source><year>2023</year><comment><ext-link ext-link-type="uri" xlink:href="https://openai.com/index/introducing-gpts/">https://openai.com/index/introducing-gpts/</ext-link></comment></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Finardi</surname><given-names>P</given-names> </name><name name-style="western"><surname>Avila</surname><given-names>L</given-names> </name><name name-style="western"><surname>Castaldoni</surname><given-names>R</given-names> </name><etal/></person-group><article-title>The chronicles of RAG: the retriever, the chunk and the generator</article-title><year>2024</year><comment><ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/2401.07883">http://arxiv.org/abs/2401.07883</ext-link></comment></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Popat</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Deshmukh</surname><given-names>PB</given-names> </name><name name-style="western"><surname>Metre</surname><given-names>VA</given-names> </name></person-group><article-title>Hierarchical document clustering based on cosine similarity measure</article-title><conf-name>2017 1st International Conference on Intelligent Systems and Information Management (ICISIM)</conf-name><conf-date>Oct 5-6, 2017</conf-date><conf-loc>Aurangabad, India</conf-loc><publisher-name>IEEE</publisher-name><fpage>153</fpage><lpage>159</lpage><pub-id pub-id-type="doi">10.1109/ICISIM.2017.8122166</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>The CHART Collaborative</collab></person-group><article-title>Protocol for the development of the Chatbot Assessment Reporting Tool (CHART) for clinical advice</article-title><source>BMJ Open</source><year>2024</year><month>05</month><volume>14</volume><issue>5</issue><fpage>e081155</fpage><pub-id pub-id-type="doi">10.1136/bmjopen-2023-081155</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bernstein</surname><given-names>IA</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>YV</given-names> </name><name name-style="western"><surname>Govil</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Comparison of ophthalmologist and large language model chatbot responses to online patient eye care questions</article-title><source>JAMA Netw Open</source><year>2023</year><month>08</month><day>1</day><volume>6</volume><issue>8</issue><fpage>e2330320</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.30320</pub-id><pub-id pub-id-type="medline">37606922</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jeblick</surname><given-names>K</given-names> </name><name name-style="western"><surname>Schachtner</surname><given-names>B</given-names> </name><name name-style="western"><surname>Dexl</surname><given-names>J</given-names> </name><etal/></person-group><article-title>ChatGPT makes medicine easy to swallow: an exploratory case study on simplified radiology reports</article-title><source>Eur Radiol</source><year>2024</year><month>05</month><volume>34</volume><issue>5</issue><fpage>2817</fpage><lpage>2825</lpage><pub-id pub-id-type="doi">10.1007/s00330-023-10213-1</pub-id><pub-id pub-id-type="medline">37794249</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maroncelli</surname><given-names>R</given-names> </name><name name-style="western"><surname>Rizzo</surname><given-names>V</given-names> </name><name name-style="western"><surname>Pasculli</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Probing clarity: AI-generated simplified breast imaging reports for enhanced patient comprehension powered by ChatGPT-4o</article-title><source>Eur Radiol Exp</source><year>2024</year><month>10</month><day>30</day><volume>8</volume><issue>1</issue><fpage>124</fpage><pub-id pub-id-type="doi">10.1186/s41747-024-00526-1</pub-id><pub-id pub-id-type="medline">39477904</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Ng</surname><given-names>WH</given-names> </name><etal/></person-group><article-title>Assessing the performance of ChatGPT in answering questions regarding cirrhosis and hepatocellular carcinoma</article-title><source>Clin Mol Hepatol</source><year>2023</year><month>07</month><volume>29</volume><issue>3</issue><fpage>721</fpage><lpage>732</lpage><pub-id pub-id-type="doi">10.3350/cmh.2023.0089</pub-id><pub-id pub-id-type="medline">36946005</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Janni</surname><given-names>W</given-names> </name><name name-style="western"><surname>M&#x00FC;ller</surname><given-names>V</given-names> </name></person-group><source>Patientenratgeber Zu Den AGO-Empfehlungen 2023 [Book in German]</source><year>2023</year><publisher-name>Zuckschwerdt Verlag M&#x00FC;nchen</publisher-name></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>W&#x00F6;ckel</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kreienberg</surname><given-names>R</given-names> </name><name name-style="western"><surname>Janni</surname><given-names>W</given-names> </name></person-group><article-title>Interdisziplin&#x00E4;re S3-leitlinie f&#x00FC;r die fr&#x00FC;herkennung, diagnostik</article-title><source>Therapie Und Nachsorge Des Mammakarzinoms [Book in German]</source><year>2021</year></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>W&#x00F6;rmann</surname><given-names>B</given-names> </name><name name-style="western"><surname>Aebi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Balic</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Mammakarzinom der frau</article-title><source>DGHO Deutsche Gesellschaft F&#x00FC;r H&#x00E4;matologie Und Medizinische Onkologie eV [Book in German]</source><year>2018</year></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Borchert</surname><given-names>F</given-names> </name><name name-style="western"><surname>Lohr</surname><given-names>C</given-names> </name><name name-style="western"><surname>Modersohn</surname><given-names>L</given-names> </name><etal/></person-group><article-title>GGPONC: a corpus of german medical text with rich metadata based on clinical practice guidelines</article-title><source>arXiv</source><comment>Preprint posted online on  Jul 13, 2020</comment><pub-id pub-id-type="doi">10.48550/arXiv.2007.06400</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Lahiri</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>QV</given-names> </name></person-group><article-title>AlzheimerRAG: multimodal retrieval augmented generation for PubMed articles</article-title><year>2024</year><month>12</month><day>21</day><pub-id pub-id-type="doi">10.48550/arXiv.2412.16701</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goodman</surname><given-names>RS</given-names> </name><name name-style="western"><surname>Patrinely</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Stone</surname><given-names>CA</given-names>  <suffix>Jr</suffix></name><etal/></person-group><article-title>Accuracy and reliability of chatbot responses to physician questions</article-title><source>JAMA Netw Open</source><year>2023</year><month>10</month><day>2</day><volume>6</volume><issue>10</issue><fpage>e2336483</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.36483</pub-id><pub-id pub-id-type="medline">37782499</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Iannantuono</surname><given-names>GM</given-names> </name><name name-style="western"><surname>Bracken-Clarke</surname><given-names>D</given-names> </name><name name-style="western"><surname>Floudas</surname><given-names>CS</given-names> </name><name name-style="western"><surname>Roselli</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gulley</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Karzai</surname><given-names>F</given-names> </name></person-group><article-title>Applications of large language models in cancer care: current evidence and future perspectives</article-title><source>Front Oncol</source><year>2023</year><volume>13</volume><fpage>1268915</fpage><pub-id pub-id-type="doi">10.3389/fonc.2023.1268915</pub-id><pub-id pub-id-type="medline">37731643</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ng</surname><given-names>KKY</given-names> </name><name name-style="western"><surname>Matsuba</surname><given-names>I</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>PC</given-names> </name></person-group><article-title>RAG in health care: a novel framework for improving communication and decision-making by addressing LLM limitations</article-title><source>NEJM AI</source><year>2025</year><month>01</month><volume>2</volume><issue>1</issue><pub-id pub-id-type="doi">10.1056/AIra2400380</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Quidwai</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Lagana</surname><given-names>A</given-names> </name></person-group><article-title>A RAG chatbot for precision medicine of multiple myeloma</article-title><source>Genetic and Genomic Medicine</source><comment>Preprint posted online on 2024</comment><pub-id pub-id-type="doi">10.1101/2024.03.14.24304293</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eliassen</surname><given-names>FM</given-names> </name><name name-style="western"><surname>Bl&#x00E5;fjelldal</surname><given-names>V</given-names> </name><name name-style="western"><surname>Helland</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Importance of endocrine treatment adherence and persistence in breast cancer survivorship: a systematic review</article-title><source>BMC Cancer</source><year>2023</year><month>07</month><day>4</day><volume>23</volume><issue>1</issue><fpage>625</fpage><pub-id pub-id-type="doi">10.1186/s12885-023-11122-8</pub-id><pub-id pub-id-type="medline">37403065</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Johnson</surname><given-names>SB</given-names> </name><name name-style="western"><surname>Park</surname><given-names>HS</given-names> </name><name name-style="western"><surname>Gross</surname><given-names>CP</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>JB</given-names> </name></person-group><article-title>Complementary medicine, refusal of conventional cancer therapy, and survival among patients with curable cancers</article-title><source>JAMA Oncol</source><year>2018</year><month>10</month><day>1</day><volume>4</volume><issue>10</issue><fpage>1375</fpage><lpage>1381</lpage><pub-id pub-id-type="doi">10.1001/jamaoncol.2018.2487</pub-id><pub-id pub-id-type="medline">30027204</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sivarajkumar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kelley</surname><given-names>M</given-names> </name><name name-style="western"><surname>Samolyk-Mazzanti</surname><given-names>A</given-names> </name><name name-style="western"><surname>Visweswaran</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name></person-group><article-title>An empirical evaluation of prompting strategies for large language models in zero-shot clinical natural language processing: algorithm development and validation study</article-title><source>JMIR Med Inform</source><year>2024</year><month>04</month><day>8</day><volume>12</volume><fpage>e55318</fpage><pub-id pub-id-type="doi">10.2196/55318</pub-id><pub-id pub-id-type="medline">38587879</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>GG</given-names> </name><name name-style="western"><surname>Latif</surname><given-names>E</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>N</given-names> </name><name name-style="western"><surname>Zhai</surname><given-names>X</given-names> </name></person-group><article-title>Applying large language models and chain-of-thought for automatic scoring</article-title><source>Computers and Education: Artificial Intelligence</source><year>2024</year><month>06</month><volume>6</volume><fpage>100213</fpage><pub-id pub-id-type="doi">10.1016/j.caeai.2024.100213</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Yuan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bao</surname><given-names>P</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Advanced prompting as a catalyst: empowering large language models in the management of gastrointestinal cancers</article-title></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Xia</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>J</given-names> </name><name name-style="western"><surname>Shi</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>H</given-names> </name></person-group><article-title>Improving retrieval augmented language model with self-reasoning</article-title><year>2024</year><month>07</month><day>29</day><pub-id pub-id-type="doi">10.48550/arXiv.2407.19813</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nguyen</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Annunziata</surname><given-names>A</given-names> </name><name name-style="western"><surname>Luong</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Enhancing Q&#x0026;A with domain-specific fine-tuning and iterative reasoning: a comparative study</article-title><source>arXiv</source><year>2024</year><month>04</month><day>17</day><pub-id pub-id-type="doi">10.48550/arXiv.2404.11792</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Questions for initial experimentation, prompts, and incomplete answers.</p><media xlink:href="cancer_v11i1e68426_app1.docx" xlink:title="DOCX File, 20 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Questions of the Patient Representation Group along with answers by the chatbot.</p><media xlink:href="cancer_v11i1e68426_app2.xlsx" xlink:title="XLSX File, 76 KB"/></supplementary-material><supplementary-material id="app3"><label>Checklist 1</label><p>TRIPOD+LLM (Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis+Large Language Model) checklist.</p><media xlink:href="cancer_v11i1e68426_app3.pdf" xlink:title="PDF File, 176 KB"/></supplementary-material></app-group></back></article>