<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Cancer</journal-id><journal-id journal-id-type="publisher-id">cancer</journal-id><journal-id journal-id-type="index">21</journal-id><journal-title>JMIR Cancer</journal-title><abbrev-journal-title>JMIR Cancer</abbrev-journal-title><issn pub-type="epub">2369-1999</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v11i1e67914</article-id><article-id pub-id-type="doi">10.2196/67914</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Evaluation of Large Language Models in Tailoring Educational Content for Cancer Survivors and Their Caregivers: Quality Analysis</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Liu</surname><given-names>Darren</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hu</surname><given-names>Xiao</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Xiao</surname><given-names>Canhua</given-names></name><degrees>RN, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bai</surname><given-names>Jinbing</given-names></name><degrees>RN, MSN, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Barandouzi</surname><given-names>Zahra A</given-names></name><degrees>RN, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lee</surname><given-names>Stephanie</given-names></name><degrees>OCN, RN, MS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Webster</surname><given-names>Caitlin</given-names></name><degrees>CPHON, RN, MSN</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Brock</surname><given-names>La-Urshalar</given-names></name><degrees>CNM, FNP-BC, MSN</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lee</surname><given-names>Lindsay</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bold</surname><given-names>Delgersuren</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Lin</surname><given-names>Yufen</given-names></name><degrees>RN, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>Nell Hodgson Woodruff School of Nursing, Emory University</institution><addr-line>1520 Clifton Rd NE</addr-line><addr-line>Atlanta</addr-line><addr-line>GA</addr-line><country>United States</country></aff><aff id="aff2"><institution>Center for Data Science, Emory University</institution><addr-line>Atlanta</addr-line><addr-line>GA</addr-line><country>United States</country></aff><aff id="aff3"><institution>Winship Cancer Institute, Emory University</institution><addr-line>Atlanta</addr-line><addr-line>GA</addr-line><country>United States</country></aff><aff id="aff4"><institution>Department of Medicine, University of Florida</institution><addr-line>Gainesville</addr-line><addr-line>FL</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Cahill</surname><given-names>Naomi</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Deroy</surname><given-names>Aniket</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Wang</surname><given-names>Zhiyuan</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Yufen Lin, RN, PhD, Nell Hodgson Woodruff School of Nursing, Emory University, 1520 Clifton Rd NE, Atlanta, GA, 30322, United States, 1 4042514072; <email>yufen.lin@emory.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>7</day><month>4</month><year>2025</year></pub-date><volume>11</volume><elocation-id>e67914</elocation-id><history><date date-type="received"><day>24</day><month>10</month><year>2024</year></date><date date-type="rev-recd"><day>27</day><month>02</month><year>2025</year></date><date date-type="accepted"><day>28</day><month>02</month><year>2025</year></date></history><copyright-statement>&#x00A9; Darren Liu, Xiao Hu, Canhua Xiao, Jinbing Bai, Zahra A Barandouzi, Stephanie Lee, Caitlin Webster, La-Urshalar Brock, Lindsay Lee, Delgersuren Bold, Yufen Lin. Originally published in JMIR Cancer (<ext-link ext-link-type="uri" xlink:href="https://cancer.jmir.org">https://cancer.jmir.org</ext-link>), 7.4.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Cancer, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://cancer.jmir.org/">https://cancer.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://cancer.jmir.org/2025/1/e67914"/><abstract><sec><title>Background</title><p>Cancer survivors and their caregivers, particularly those from disadvantaged backgrounds with limited health literacy or racial and ethnic minorities facing language barriers, are at a disproportionately higher risk of experiencing symptom burdens from cancer and its treatments. Large language models (LLMs) offer a promising avenue for generating concise, linguistically appropriate, and accessible educational materials tailored to these populations. However, there is limited research evaluating how effectively LLMs perform in creating targeted content for individuals with diverse literacy and language needs.</p></sec><sec><title>Objective</title><p>This study aimed to evaluate the overall performance of LLMs in generating tailored educational content for cancer survivors and their caregivers with limited health literacy or language barriers, compare the performances of 3 Generative Pretrained Transformer (GPT) models (ie, GPT-3.5 Turbo, GPT-4, and GPT-4 Turbo; OpenAI), and examine how different prompting approaches influence the quality of the generated content.</p></sec><sec sec-type="methods"><title>Methods</title><p>We selected 30 topics from national guidelines on cancer care and education. GPT-3.5 Turbo, GPT-4, and GPT-4 Turbo were used to generate tailored content of up to 250 words at a 6th-grade reading level, with translations into Spanish and Chinese for each topic. Two distinct prompting approaches (textual and bulleted) were applied and evaluated. Nine oncology experts evaluated 360 generated responses based on predetermined criteria: word limit, reading level, and quality assessment (ie, clarity, accuracy, relevance, completeness, and comprehensibility). ANOVA (analysis of variance) or chi-square analyses were used to compare differences among the various GPT models and prompts.</p></sec><sec sec-type="results"><title>Results</title><p>Overall, LLMs showed excellent performance in tailoring educational content, with 74.2% (267/360) adhering to the specified word limit and achieving an average quality assessment score of 8.933 out of 10. However, LLMs showed moderate performance in reading level, with 41.1% (148/360) of content failing to meet the sixth-grade reading level. LLMs demonstrated strong translation capabilities, achieving an accuracy of 96.7% (87/90) for Spanish and 81.1% (73/90) for Chinese translations. Common errors included imprecise scopes, inaccuracies in definitions, and content that lacked actionable recommendations. The more advanced GPT-4 family models showed better overall performance compared to GPT-3.5 Turbo. Prompting GPTs to produce bulleted-format content was likely to result in better educational content compared with textual-format content.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>All 3 LLMs demonstrated high potential for delivering multilingual, concise, and low health literacy educational content for cancer survivors and caregivers who face limited literacy or language barriers. GPT-4 family models were notably more robust. While further refinement is required to ensure simpler reading levels and fully comprehensive information, these findings highlight LLMs as an emerging tool for bridging gaps in cancer education and advancing health equity. Future research should integrate expert feedback, additional prompt engineering strategies, and specialized training data to optimize content accuracy and accessibility.</p></sec><sec sec-type="registered-report"><title>International Registered Report Identifier (IRRID)</title><p>RR2-10.2196/48499</p></sec></abstract><kwd-group><kwd>large language models</kwd><kwd>GPT-4</kwd><kwd>cancer survivors</kwd><kwd>caregivers</kwd><kwd>education</kwd><kwd>health equity</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>More than 18.1 million individuals with a history of cancer were alive in the United States in 2022, and that number is projected to reach 26 million by 2040 [<xref ref-type="bibr" rid="ref1">1</xref>]. Cancer survivors receive a wide range of treatments, often experiencing severe symptoms or side effects, including fatigue, depression, anxiety, sleep disturbance, pain, cognitive impairment, nausea, vomiting, and neuropathy [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. These symptoms negatively impact survivors&#x2019; functional status, quality of life, and overall survival rates [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. Cancer caregivers, typically family members or significant others offering primary emotional and physical support for cancer survivors, experience an array of similar distressing symptoms [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. These symptoms are linked to high caregiving burden, emotional distress, and communication barriers with cancer survivors and providers [<xref ref-type="bibr" rid="ref15">15</xref>]. In addition, disparities in health care access further exacerbate the challenges faced by cancer survivors and their caregivers, especially those from disadvantaged communities that have limited health literacy or language barriers [<xref ref-type="bibr" rid="ref16">16</xref>]. Those with limited health literacy and racial and ethnic minorities facing language barriers are at greater risk for poorer access to care [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. Consequently, they tend to experience a heavier symptom burden and poorer health outcomes during and after cancer treatments [<xref ref-type="bibr" rid="ref20">20</xref>].</p><p>With over 3-quarters of the disadvantaged population owning smartphones or computers [<xref ref-type="bibr" rid="ref21">21</xref>], technology-based intervention programs can bridge the accessibility gap and promote health equity [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. The advent and growth of artificial intelligence have enabled researchers to design tailored and personalized interventions and educational content to meet individual unmet needs [<xref ref-type="bibr" rid="ref24">24</xref>]. Large language models (LLMs) are advanced artificial intelligence systems that can understand and generate human-like text by training on vast amounts of data [<xref ref-type="bibr" rid="ref25">25</xref>]. LLMs perform various language tasks, such as answering questions and translating languages. How questions are asked can significantly affect the performance of LLMs. This process, known as prompt engineering, is crucial for obtaining accurate and relevant responses from LLMs [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. While LLMs have demonstrated remarkable potential in cancer research [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref31">31</xref>], their efficacy in real-world scenarios, such as cancer care and education, which often require advanced levels of comprehension, have yet to be thoroughly assessed.</p><p>Recent advancements in LLMs, such as GPT-4 and GPT-4 Turbo (OpenAI) [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>], have demonstrated their exceptional proficiency in completing various tasks, including coding, design, and content summarization. Previous research [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] indicates that LLMs can capture large volumes of text effectively, even without specialized domain knowledge. This ability highlights its sophistication in processing and understanding information across a broad spectrum of topics, and its potential to significantly aid in analyzing unstructured data in clinical environments (eg, clinical notes) [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. However, there are several notable gaps in the current knowledge. First, while LLMs have demonstrated high levels of accuracy in understanding extensive texts [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>], even minor inaccuracies can have detrimental effects on patient outcomes [<xref ref-type="bibr" rid="ref38">38</xref>], particularly regarding actionable advice. Therefore, the content they generate still necessitates additional expert verification to ensure it is error-free and ready to be presented to patients and their caregivers. Second, although previous research [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>] has demonstrated promising results in content summarization, these LLMs are often not applied in clinical environments, or they specifically address cancer care and education among disadvantaged groups that has limited health literacy or language barriers [<xref ref-type="bibr" rid="ref39">39</xref>]. Finally, most educational resources for cancer care are available exclusively in English, which can create comprehension challenges for non-English speakers (eg, Hispanic individuals and immigrants). Also, cancer survivors and their caregivers, already overwhelmed by treatment, often lack the time to read lengthy content. Therefore, it is essential to provide educational content in multiple languages and in concise content to ensure effective communication and education [<xref ref-type="bibr" rid="ref40">40</xref>].</p><p>To address these gaps, our team aimed to evaluate how LLMs perform in tailoring educational content to enhance accessibility and comprehension for cancer survivors and their caregivers. In this study, our primary task was to evaluate and compare the capabilities of multiple GPT-based LLMs in generating concise, low-literacy-level, and multilingual educational content tailored for cancer survivors and their caregivers with limited health literacy or language barriers. Specifically, we aimed to evaluate the overall performance of LLMs in generating tailored educational content that adheres to a strict word limit, a sixth-grade reading level, and high-quality criteria (clarity, accuracy, relevance, completeness, and comprehensibility), compare the performances of 3 GPT models (GPT-3.5 Turbo, GPT-4, and GPT-4 Turbo), and explore how different prompt structures (textual vs bulleted format) influence the quality of the generated content. This approach helps them manage their symptoms more effectively, thereby reducing health disparities and promoting health equity.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Design</title><p>This study involved a multistep methodology that included: (1) specifying the exact task requirements for the LLMs, to produce educational content on 30 selected cancer care topics written at a sixth-grade reading level, limited to 250 words, and translated into Spanish and Chinese; (2) generating tailored educational content using 3 GPT models (GPT-3.5 Turbo, GPT-4, and GPT-4 Turbo) with 2 prompt styles (textual and bulleted); (3) expert evaluation of the generated content&#x2019;s adherence to word count, reading level, and 5 quality criteria; and (4) statistical analyses (ANOVA [analysis of variance] and chi-square test) to compare performance across models and prompt formats.</p></sec><sec id="s2-2"><title>Prompt Engineering</title><p>To promote the accessibility and comprehension of educational content for cancer survivors and their caregivers with limited health literacy and language barriers, we structured prompts to have LLMs produce content at a low reading level, maintain a word limit of 250, and provide Spanish and Chinese translations for each topic, as described below [<xref ref-type="bibr" rid="ref41">41</xref>].</p><p>The Flesch-Kincaid Grade Level (FKG) system [<xref ref-type="bibr" rid="ref42">42</xref>] was used to assess the readability of content produced by the LLMs. The FKG level is a readability test designed to indicate how difficult a text is to understand. It calculates the grade level required for someone to comprehend the text. The FKG is based on word length and sentence length, providing a numerical score that corresponds to US grade levels [<xref ref-type="bibr" rid="ref42">42</xref>]. The National Institutes of Health (NIH) and the American Medical Association (AMA) suggest that patient education materials should be written at a reading level no higher than the sixth grade [<xref ref-type="bibr" rid="ref43">43</xref>]. This recommendation is in place to guarantee that the information is reachable by a broad spectrum of individuals, encompassing those with limited health literacy. Therefore, our research targets an FKG level of 6 to align with this guidance.</p><p>We set a 250-word limit for our educational content, recognizing that cancer survivors and their caregivers are frequently preoccupied with treatment schedules and daily responsibilities, leaving them with limited time for reading [<xref ref-type="bibr" rid="ref44">44</xref>]. This word limit is designed to ensure that participants can complete the reading within 5 minutes, making the task both manageable and feasible within their schedules.</p><p>Furthermore, it is shown that prompts exert a considerable impact on the responses generated by LLMs [<xref ref-type="bibr" rid="ref45">45</xref>]. Therefore, we compared different prompts, including both textual and bulleted formats, to determine which approach yields better results.</p><p>The prompts we used are as follows:</p><list list-type="order"><list-item><p>Textual format: &#x201C;Please summarize the following content in Flesch-Kincaid Grade level of 6 and under 250 words: [original text]&#x201D;</p></list-item><list-item><p>Bulleted format: &#x201C;Please summarize the following content into bullet points in Flesch-Kincaid Grade level of 6 and under 250 words: [original text]&#x201D;</p></list-item><list-item><p>Spanish translation: &#x201C;Please translate the following content into Spanish: [tailored text]&#x201D;</p></list-item><list-item><p>Chinese translation: &#x201C;Please translate the following content into Chinese: [tailored text]&#x201D;</p></list-item></list></sec><sec id="s2-3"><title>Expert Evaluation</title><p>We assembled a panel of 9 oncology experts, comprising 4 oncology professors, 4 doctoral students, and 1 medical resident. Among them, all are fluent in English, with 4 experts proficient in Chinese and 1 proficient in Spanish. Each response generated by the LLMs was evaluated by at least 2 experts to ensure a comprehensive assessment, except for the Spanish translation task, which was evaluated by a single expert. The panel conducted several Zoom meetings: the initial meeting provided training on content evaluation, and 3 additional meetings were held to discuss the results and feedback. Each expert was assigned 10 topics to evaluate and was required to provide feedback on the errors committed by the LLMs. These experts critically reviewed and annotated the LLM-generated content using a web-based Cohort Adjudication and Data Annotation (CADA) application [<xref ref-type="bibr" rid="ref34">34</xref>] (<xref ref-type="fig" rid="figure1">Figure 1</xref>) developed by our team.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>A screenshot of Cohort Adjudication and Data Annotation application.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e67914_fig01.png"/></fig></sec><sec id="s2-4"><title>Data Sources</title><p>Our primary sources for content generation were cancer survivors and caregiver education materials from the National Cancer Institute and the National Comprehensive Cancer Network guidelines [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. We selected 30 distinct topics covering a range of content such as fatigue, depression, anxiety, pain, cognitive impairment, nutrition, physical activity, healthy lifestyle, family communication, coping skills, and more. The selection of topics was informed by insights from our previous qualitative interviews with cancer survivors and their caregivers [<xref ref-type="bibr" rid="ref48">48</xref>] and an extensive review of the literature [<xref ref-type="bibr" rid="ref49">49</xref>-<xref ref-type="bibr" rid="ref51">51</xref>]. We identified the key areas of interest and specific needs of cancer survivors and their caregivers with limited health literacy or language barriers, resulting in these 30 topics.</p></sec><sec id="s2-5"><title>Appraisal Criteria</title><p>Based on a previous study of evaluating responses from LLMs [<xref ref-type="bibr" rid="ref34">34</xref>], we formulated a set of multidimensional criteria to thoroughly assess the performance of LLMs, which include adherence to a word limit of 250 words, achieving a reading level as per the FKG of below 6, and quality assessment: (1) clarity (ie, ease of understanding in the response); (2) accuracy (ie, the response does not contain errors, like medical or language errors, that could negatively impact patients and their caregivers); (3) relevance (ie, the response is fully grounded in the materials we provided); (4) completeness (ie, the response encompasses all critical points from the materials); (5) comprehensibility (ie, the response is understandable that readers can apply it to their daily routine).</p><p>In terms of word limit, &#x201C;yes&#x201D; refers to a word limit within 250 words, and &#x201C;no&#x201D; refers to a word limit of more than 250 words. The reading level was evaluated using &#x201C;yes&#x201D; for an FKG level &#x2264;6; &#x201C;partial&#x201D; for an FKG level of 6 to &#x2264;8; and &#x201C;no&#x201D; for an FKG level &#x003E;8). The FKG level was calculated by the Python package Textstat (version 0.7.3, Azu). For the quality assessment criteria, we implemented a scoring system in which evaluations were quantified based on their alignment with the expected outcomes. A score of 2 was assigned for &#x201C;yes&#x201D; evaluations, indicating full compliance; a score of 1 was given for &#x201C;partial&#x201D; evaluations, reflecting partial compliance; and a score of 0 was allocated for &#x201C;no&#x201D; evaluations, indicating noncompliance. The quality assessment included 5 criteria (1-5), each contributing a maximum of 2 points, for a total possible score of 10. The overall quality assessment ranged from 0 to 10, with 0 representing the absence or lowest quality and 10 indicating the highest quality. For translation tasks, &#x201C;yes&#x201D; indicates a completely accurate translation, &#x201C;partially&#x201D; refers to a generally correct and understandable translation with minor errors, and &#x201C;no&#x201D; refers to a completely inaccurate translation containing incorrect or misleading information. Accuracy scores are calculated as the proportion of evaluations labeled as &#x201C;yes.&#x201D;</p></sec><sec id="s2-6"><title>Data Analysis</title><p>Descriptive analyses were conducted to determine the frequencies, percentages (for word limit, reading levels, and translations), mean and SDs (for quality scores) of major variables. Quality scores were determined by calculating the mean scores for each criterion and then obtaining the overall scores through their summation. To compare the differences in each model or prompt, we used ANOVA or chi-square tests, as applicable. Values of <italic>P</italic>&#x003C;.05 were considered to indicate a significant level. All analyses were conducted using Python statistical packages.</p></sec><sec id="s2-7"><title>Ethical Considerations</title><p>The study protocol (STUDY00004750) was approved with exemptions by the institutional review board at Emory University. Oral consent was obtained from 9 oncology experts, as no protected health information was collected. All participants were informed of the voluntary nature of their participation and their right to withdraw at any time without consequence. No protected health information or personally identifiable information was collected, and all research data were anonymized to maintain confidentiality. Study materials were securely stored and accessible only to authorized research team members. Participants did not receive any monetary or nonmonetary compensation for their involvement. The study was conducted in accordance with the US Common Rule (45 CFR 46) [<xref ref-type="bibr" rid="ref52">52</xref>].</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overall Performance of Large Language Models</title><p>In this study, 360 annotation values were collected from 9 experts. Overall, LLMs have shown excellent performance in tailoring content based on our criteria. For word limit, 267/360 responses (74.2%) were within the word limit (less than 250 words) set for the task. The result indicates the excellent ability of LLMs to produce responses that adhere to specified word limit requirements. Regarding reading levels, LLMs demonstrated moderate performance, with 105/360 responses (29.2%) fully meeting the specified FKG level (FKG level &#x2264;6), 107/360 (29.7%) being partially satisfactory (FKG level of 6&#x2010;8), and 148/360 (41.1%) not aligning with the provided FKG level (FKG level &#x003E;8).</p><p>LLMs demonstrated consistently high average scores across all quality criteria (total score: 8.933 out of 10). The highest average score achieved was 1.91 on relevance, highlighting the LLMs&#x2019; ability to generate content that was highly pertinent to the given prompts. The lowest average score observed was 1.58 out of 2 in the category of completeness, indicating a moderate adherence to providing responses that capture all key points. In the translation tasks, the LLMs demonstrated high performance, with 87/90 accuracy translations (88%) for Spanish and 73/90 (81%) for Chinese translation.</p></sec><sec id="s3-2"><title>Three Generative Pretrained Transformer Models Comparisons: GPT-3.5 Turbo, GPT-4, and GPT-4 Turbo</title><p>GPT-4 demonstrated a superior capability in adhering to the specified word limit, with 101/120 responses (84.2%) falling within 250 words (<xref ref-type="table" rid="table1">Table 1</xref>). In contrast, GPT-3.5 Turbo and GPT-4 Turbo exhibited a relatively lower proficiency, with 86/120 (71.7%) and 80/120 (66.7%) responses meeting the word limit, respectively. As shown in <xref ref-type="table" rid="table2">Table 2</xref>, when comparing the models based on word limit, the chi-square test demonstrated a significant difference among the three models (<italic>P</italic>=.006).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Performance of all models and prompts on the summarization task.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2"/><td align="left" valign="bottom" colspan="3">GPT<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>-3.5 Turbo<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> <sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="bottom" colspan="3">GPT-4<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> <sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="bottom" colspan="3">GPT-4 turbo<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> <sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td></tr><tr><td align="left" valign="bottom">Total</td><td align="left" valign="bottom">Textual format</td><td align="left" valign="bottom">Bullet points</td><td align="left" valign="bottom">Total</td><td align="left" valign="bottom">Textual format</td><td align="left" valign="bottom">Bullet points</td><td align="left" valign="bottom">Total</td><td align="left" valign="bottom">Textual format</td><td align="left" valign="bottom">Bullet points</td></tr></thead><tbody><tr><td align="left" valign="top">Word limit, %</td><td align="left" valign="top">71.7 (86/120)</td><td align="left" valign="top">46.7 (28/60)</td><td align="left" valign="top">96.7 (58/60)</td><td align="left" valign="top">84.2 (101/120)</td><td align="left" valign="top">91.7 (55/60)</td><td align="left" valign="top">76.7 (46/60)</td><td align="left" valign="top">66.7 (80/120)</td><td align="left" valign="top">51.7 (31/60)</td><td align="left" valign="top">81.7 (49/60)</td></tr><tr><td align="left" valign="top">Reading level, %</td><td align="left" valign="top">23.3 (28/120)</td><td align="left" valign="top">18.3 (11/60)</td><td align="left" valign="top">28.3 (17/60)</td><td align="left" valign="top">21.7 (26/120)</td><td align="left" valign="top">21.7 (13/60)</td><td align="left" valign="top">21.7 (13/60)</td><td align="left" valign="top">42.5 (51/120)</td><td align="left" valign="top">53.3 (32/60)</td><td align="left" valign="top">31.7 (19/60)</td></tr><tr><td align="left" valign="top">Accuracy, mean (SD)</td><td align="left" valign="top">1.775 (0.493)</td><td align="left" valign="top">1.767 (0.5)</td><td align="left" valign="top">1.783 (0.49)</td><td align="left" valign="top">1.767 (0.561)</td><td align="left" valign="top">1.8 (0.48)</td><td align="left" valign="top">1.733 (0.634)</td><td align="left" valign="top">1.783 (0.522)</td><td align="left" valign="top">1.8 (0.48)</td><td align="left" valign="top">1.767 (0.563)</td></tr><tr><td align="left" valign="top">Clarity, mean (SD)</td><td align="left" valign="top">1.792 (0.447)</td><td align="left" valign="top">1.833 (0.418)</td><td align="left" valign="top">1.75 (0.474)</td><td align="left" valign="top">1.833 (0.396)</td><td align="left" valign="top">1.867 (0.389)</td><td align="left" valign="top">1.8 (0.403)</td><td align="left" valign="top">1.8 (0.422)</td><td align="left" valign="top">1.883 (0.324)</td><td align="left" valign="top">1.717 (0.49)</td></tr><tr><td align="left" valign="top">Relevance, mean (SD)</td><td align="left" valign="top">1.892 (0.362)</td><td align="left" valign="top">1.883 (0.415)</td><td align="left" valign="top">1.9 (0.303)</td><td align="left" valign="top">1.925 (0.295)</td><td align="left" valign="top">1.883 (0.372)</td><td align="left" valign="top">1.967 (0.181)</td><td align="left" valign="top">1.925 (0.264)</td><td align="left" valign="top">1.9 (0.303)</td><td align="left" valign="top">1.95 (0.22)</td></tr><tr><td align="left" valign="top">Completeness, mean (SD)</td><td align="left" valign="top">1.558 (0.632)</td><td align="left" valign="top">1.533 (0.623)</td><td align="left" valign="top">1.583 (0.645)</td><td align="left" valign="top">1.575 (0.617)</td><td align="left" valign="top">1.483 (0.624)</td><td align="left" valign="top">1.667 (0.601)</td><td align="left" valign="top">1.617 (0.582)</td><td align="left" valign="top">1.583 (0.619)</td><td align="left" valign="top">1.65 (0.547)</td></tr><tr><td align="left" valign="top">Comprehensibility, mean (SD)</td><td align="left" valign="top">1.808 (0.436)</td><td align="left" valign="top">1.817 (0.469)</td><td align="left" valign="top">1.8 (0.403)</td><td align="left" valign="top">1.892 (0.312)</td><td align="left" valign="top">1.883 (0.324)</td><td align="left" valign="top">1.9 ( 0.303)</td><td align="left" valign="top">1.858 (0.35)</td><td align="left" valign="top">1.9 (0.303)</td><td align="left" valign="top">1.817 (0.39)</td></tr><tr><td align="left" valign="top">Total score, mean (SD)</td><td align="left" valign="top">8.825 (1.643)</td><td align="left" valign="top">8.833 (1.748)</td><td align="left" valign="top">8.817 (1.546)</td><td align="left" valign="top">8.992 (1.247)</td><td align="left" valign="top">8.917 (1.239)</td><td align="left" valign="top">9.067 (1.26)</td><td align="left" valign="top">8.983 (1.195)</td><td align="left" valign="top">9.067 (1.087)</td><td align="left" valign="top">8.9 (1.298)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup> GPT: Generative Pretrained Transformer.</p></fn><fn id="table1fn2"><p><sup>b</sup> The performance (%) of GPT-3.5 Turbo was 93.3% (28/30), GPT-4 was 96.7% (29/30), and GPT-4 Turbo was 100% (30/30) for the Spanish translation. The overall performance (%) of the three GPT models in Spanish translation was 96.7% (87/90).</p></fn><fn id="table1fn3"><p><sup>c</sup> The performance (%) of GPT-3.5 Turbo was 76.7% (23/30), GPT-4 was 86.7% (26/30), and GPT-4 Turbo was 80% (24/30) for the Chinese translation. The overall performance (%) of the three GPT models in Chinese translation was 81.1% (73/90).</p></fn></table-wrap-foot></table-wrap><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Statistical analysis results from analysis of variance and chi-square tests.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Group and criterion</td><td align="left" valign="bottom">PR(&#x003E;F)<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="bottom">Chi-square (<italic>df</italic>)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3"><bold>Models</bold></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy</td><td align="left" valign="top">0.97</td><td align="left" valign="top">&#x2003;&#x2014;<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Clarity</td><td align="left" valign="top">0.721</td><td align="left" valign="top">&#x2003;&#x2014; (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Relevance</td><td align="left" valign="top">0.63</td><td align="left" valign="top">&#x2003;&#x2014; (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Completeness</td><td align="left" valign="top">0.748</td><td align="left" valign="top">&#x2003;&#x2014; (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Comprehensibility</td><td align="left" valign="top">0.215</td><td align="left" valign="top">&#x2003;&#x2014; (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total score</td><td align="left" valign="top">0.572</td><td align="left" valign="top">&#x2003;&#x2014; (16)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Word limit</td><td align="left" valign="top">0.006</td><td align="left" valign="top">10.178 (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Reading level</td><td align="left" valign="top">&#x003C;0.001</td><td align="left" valign="top">35.468 (4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Spanish translation</td><td align="left" valign="top">0.355</td><td align="left" valign="top">2.069 (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Chinese translation</td><td align="left" valign="top">0.602</td><td align="left" valign="top">1.015 (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Translation</td><td align="left" valign="top">0.481</td><td align="left" valign="top">1.463 (2)</td></tr><tr><td align="left" valign="top" colspan="3"><bold>Prompts</bold></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy</td><td align="left" valign="top">0.213</td><td align="left" valign="top">&#x2003;&#x2014; (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Clarity</td><td align="left" valign="top">0.028</td><td align="left" valign="top">&#x2003;&#x2014; (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Relevance</td><td align="left" valign="top">0.177</td><td align="left" valign="top">&#x2003;&#x2014; (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Completeness</td><td align="left" valign="top">0.154</td><td align="left" valign="top">&#x2003;&#x2014; (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Comprehensibility</td><td align="left" valign="top">0.149</td><td align="left" valign="top">&#x2003;&#x2014; (2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total score</td><td align="left" valign="top">0.939</td><td align="left" valign="top">&#x2003;&#x2014; (8)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup> PR(&#x003E;F): probability that the F-statistic is greater than the observed value under the null hypothesis.</p></fn><fn id="table2fn2"><p><sup>b</sup> &#x2014;: not applicable.</p></fn></table-wrap-foot></table-wrap><p>Regarding the assessment of reading level, GPT-4 Turbo met the required FKG level of 6 in 51/120 (42.5%) cases, nearly doubling the performance of the other 2 models: 26/120 (21.7%) for GPT-4 and 28/120 (23.3%) for GPT-3.5 Turbo. The result indicated significant discrepancies among the models in adherence to the specified reading level (<italic>P</italic>&#x003C;.001), with GPT-4 Turbo performing better compared with the other 2 models.</p><p>In terms of quality assessment, each of the LLMs attained a high score exceeding 8.8 out of 10, with GPT-4 and GPT-4 Turbo achieving 8.992 and 8.983, respectively, and GPT-3.5 Turbo trailing slightly at 8.825. Upon evaluation of each criterion, the performance of all models was found to be similar (<xref ref-type="fig" rid="figure2">Figure 2</xref>). The application of ANOVA tests to each criterion revealed no significant differences among the 3 models (<italic>P</italic>=.57).</p><p>In the translation tasks, GPT-4 Turbo exhibited perfect accuracy with a 30/30 (100%) success cases in Spanish translation, whereas GPT-4 and GPT-3.5 Turbo exhibited slightly lower, yet commendable success rates of 29/30 (97%) and 28/30 (93%), respectively. For the Chinese translation task, GPT-4 outperformed the other models with an accuracy of 26/30 (87%). In contrast, GPT-3.5 Turbo and GPT-4 Turbo achieved 23/30 (77%) and 24/30 (80%), respectively. The 3 models did not show a significant difference in the translation task (<italic>P</italic>=.48).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Assessment scores on each criterion between different models. GPT: Generative Pretrained Transformer.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e67914_fig02.png"/></fig></sec><sec id="s3-3"><title>Two Different Prompt Comparisons: Textual and Bulleted Formats</title><p>We compared 2 prompting methods in terms of word limits, reading level, and quality assessment. The major difference noted in the comparison of the 2 prompts was that responses generated from prompt 2 (bulleted format) were superior in adhering to the target word limit. Specifically, 153/180 responses (85%) from prompt 2 successfully achieved the word limit, in contrast to 114/180 responses (63.3%) from prompt 1 (textual format) that fully satisfied the word limit. Using prompt 1 resulted in only 56/180 responses (31.1%) meeting our desired reading level, with a slight decrease to 49/180 (27.2%) for prompt 2. For the 5 quality criteria, both prompts achieved high scores (<xref ref-type="fig" rid="figure3">Figure 3</xref>). Upon performing an ANOVA test to assess the differences in performance between the 2 prompts (<xref ref-type="table" rid="table2">Table 2</xref>), it was found that the variations between them were not significant (<italic>P</italic>=.939). However, the 2 prompt formats demonstrated a significant difference in the clarity criterion (<italic>P</italic>=.03).</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Assessment scores on each criterion between different prompts.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e67914_fig03.png"/></fig></sec><sec id="s3-4"><title>Error Analysis</title><p>The errors that LLMs committed were categorized into inaccurate scope, inaccurate definition, inaccurate expression, meaningless points, and inaccurate word. Some examples are shown in <xref ref-type="table" rid="table3">Table 3</xref>.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Error cases and analysis.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">Topic</td><td align="left" valign="bottom">Output</td><td align="left" valign="bottom">Error type</td><td align="left" valign="bottom">Reason</td></tr></thead><tbody><tr><td align="left" valign="top">GPT-3.5 Turbo</td><td align="left" valign="top">Nutrition</td><td align="left" valign="top">&#x201C;It advises limiting animal-based food, processed food, and alcohol consumption.&#x201D;</td><td align="left" valign="top">Inaccurate scope</td><td align="left" valign="top">The chapter only mentions to limit red meat, not all animal-based foods (says it can make up half or less of diet).</td></tr><tr><td align="left" valign="top">GPT-3.5 Turbo</td><td align="left" valign="top">Sexual Health Issues in Men with Cancer</td><td align="left" valign="top">&#x201C;It is still important to maintain intimacy with a partner.&#x201D;</td><td align="left" valign="top">Inaccurate expression</td><td align="left" valign="top">The tailored content sounds a little judgmental whereas the original document says, &#x201C;probably still important&#x201D; and is less assuming.</td></tr><tr><td align="left" valign="top">GPT-3.5 Turbo</td><td align="left" valign="top">Relaxation</td><td align="left" valign="top">&#x201C;&#x591A;&#x559D;&#x6DB2;&#x4F53;&#x201D;</td><td align="left" valign="top">Inaccurate word</td><td align="left" valign="top">Based on the English sentence: " Drinking plenty of liquids&#x201D;, &#x201C;liquids&#x201D; can be better translated into &#x201C;&#x6C34;.&#x201D;</td></tr><tr><td align="left" valign="top">GPT-4</td><td align="left" valign="top">Mindfulness</td><td align="left" valign="top">&#x201C;These practices involve focusing the mind on present sensations, such as breathing, a sound, or an image.&#x201D;</td><td align="left" valign="top">Inaccurate definition</td><td align="left" valign="top">It seems to define meditation and mindfulness in one overarching definition, which only defines meditation.<break/>The model merged definitions of MBSR<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> and MBCT<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> together and did not include difference between types.</td></tr><tr><td align="left" valign="top">GPT-4</td><td align="left" valign="top">Family Communication</td><td align="left" valign="top">&#x201C;El apoyo de la comunidad podr&#x00E9;da ser beneficioso durante este dif&#x00ED;cil per&#x00ED;odo.&#x201D;</td><td align="left" valign="top">Inaccurate word</td><td align="left" valign="top">Based on the English sentence: &#x201C;Support from the community might be beneficial during this difficult period. &#x201C;, &#x201C;dif&#x00ED;cil per&#x00ED;odo&#x201D; should be &#x201C;per&#x00ED;odo dif&#x00ED;cil&#x201D;</td></tr><tr><td align="left" valign="top">GPT-4 Turbo</td><td align="left" valign="top">Making a Difference</td><td align="left" valign="top">&#x201C;Learning: Educating yourself about cancer can empower you to assist others. Resources are available online, by phone, and in print.&#x201D;</td><td align="left" valign="top">Meaningless point</td><td align="left" valign="top">The customized content falls short in terms of actionability. The purpose of tailoring content is to educate patients and caregivers, rather than expecting them to educate themselves.</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>MBSR: Mindfulness-based stress reduction.</p></fn><fn id="table3fn2"><p><sup>b</sup>MBCT: Mindfulness-based cognitive therapy.</p></fn></table-wrap-foot></table-wrap><p>A common error observed with LLMs is their tendency to integrate their own knowledge and interpretation rather than adhering strictly to the provided materials, such as an inaccurate scope. For instance, when the text specified &#x201C;to limit red meat.<italic>&#x201D;</italic> in the Nutrition topic, GPT-3.5 Turbo inaccurately generalized this advice to &#x201C;limiting animal-based food.&#x201D; This interpretation is not entirely correct, as animal-based food encompasses more than just red meat, including white meat such as chicken, which the original material did not intend to restrict.</p><p>Other observed errors involve inaccurate expressions. For instance, in the Sexual health issues in men with cancer topic, the original content suggested, <italic>&#x201C;</italic>It is probably still important to maintain intimacy with a partner.<italic>&#x201D;</italic> However, GPT-3.5 Turbo revised this to &#x201C;it is still important to maintain intimacy with a partner.&#x201D; This alteration results in a tone that may seem judgmental, deviating from the original&#x2019;s more tentative stance.</p><p>An example of inaccurate definition was identified within the Mindfulness topic, where GPT-4 defined meditation and mindfulness in one overarching definition for meditation. It also merged definitions of mindfulness-based stress reduction and mindfulness-based cognitive therapy without highlighting differences between the mindfulness interventions.</p><p>LLMs may also include information that, while accurate, might not be actionable for patients. For instance, in the Making a difference topic, GPT-4 Turbo correctly sourced from the material that &#x201C;Learning: Educating yourself about cancer can empower you to assist others. Resources are available online, by phone, and in print.&#x201D; However, this information becomes less useful in the absence of specific links or directions that could guide patients on where to start their education.</p><p>Finally, with respect to translation quality, the primary error observed related to inaccurate word choice. In particular, when an English term offers multiple potential translations, LLMs often encounter difficulty in selecting the most contextually appropriate option. For example, in the Relaxation topic, GPT-3.5 Turbo translated &#x201C;&#x591A;&#x559D;&#x6DB2;&#x4F53;" as &#x201C;drink more liquids.&#x201D; Although &#x201C;&#x6DB2;&#x4F53;" does literally translate to &#x201C;liquids,&#x201D; the more natural and contextually appropriate term would be &#x201C;&#x6C34;.&#x201D;</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>To our knowledge, this is the first study to evaluate the capability of LLMs in tailoring educational content for cancer survivors and their caregivers with limited health literacy or language barriers. In our study, all 3 LLMs have demonstrated overall excellent performance in most criteria. The more advanced GPT-4 family models showed better overall performance compared with GPT-3.5 Turbo. GPT-4&#x2019;s high adherence to word limits and GPT-4 Turbo&#x2019;s better compliance to reading level compliance proved their ability to meet our requirements when tailoring content. Prompting GPTs to produce bulleted-format content is likely to result in better educational content compared with textual-format content. All models exhibit strong capability in generating highly relevant content. However, they fall short in terms of completeness. Overall, it is proven that LLMs are highly effective in tailoring, condensing, and translating educational content for cancer survivors and their caregivers with limited health literacy or language barriers. These findings inform future versions of LLMs to focus more on the reading level and completeness of their output and the development of tailored intervention materials for cancer survivors and their caregivers. These promising results also indicate that LLMs can be a valuable tool in making educational content more accessible and comprehensible to diverse patient populations.</p><p>The capabilities of LLMs in text analysis have been well studied. For example, our previous study [<xref ref-type="bibr" rid="ref34">34</xref>] examined the potential of LLMs to categorize clinical concepts from patient notes. Yet, this study focused solely on the LLMs&#x2019; comprehension of patients&#x2019; conditions from clinical notes rather than educational content. Study by Veen et al [<xref ref-type="bibr" rid="ref53">53</xref>] assessed approaches for LLMs to summarize clinical texts. Although it demonstrated overall preferred performance, especially GPT-4, over human experts, the study was limited to the summarization of radiology report findings and confined to 3 attributes: completeness, correctness, and conciseness, whereas our study expanded on this topic by evaluating LLMs against 7 distinct criteria. Furthermore, none of the existing studies focus on education regarding supportive care in cancer, whereas our innovative findings make a significant contribution to the literature in this field.</p><p>Despite the excellence of LLMs in adhering to specified word limits and generating high-quality content, several challenges remain. One notable area where LLMs struggle is in adjusting the reading level of the content to accommodate patients from various educational levels. The content tailored by LLMs often does not meet the intended FKG level. This oversight implies that some individuals might find the content overly complex, potentially hindering their understanding of health information and educational content [<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref55">55</xref>]. Addressing this challenge is essential for maximizing the applicability of LLMs and ensuring that all cancer survivors receive the support they need to manage their cancer effectively. In future work, in-context learning could be used to offer more detailed guidance to LLMs, focusing on the potential vocabularies frequently appeared in content exceeding the specified FKG level of 6. In addition, retrieval-augmented generation could be implemented to embed vocabularies aligned with an FKG level of 6, thereby enhancing the model&#x2019;s performance.</p><p>It is also observed that the accuracy of Spanish translations is significantly higher than that of Chinese translations. This finding is expected, given the abundance of Spanish content available on the internet compared with Chinese content that can serve as training materials. Previous studies [<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref57">57</xref>] have shown that LLMs&#x2019; performance in different languages has a clear correlation with the proportion of each language in the pretraining corpus. Without fine-tuning, LLMs have a much higher performance in high-resource languages like German, French, and Spanish, and a significantly lower performance in low-resource languages like Kannada, Occitan, and Western Frisian [<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref57">57</xref>]. In future work, integrating high-quality bilingual medical corpora that includes parallel texts of patient education materials, clinical guidelines, and culturally tailored health information could be a promising approach. Fine-tuning LLMs on such specialized corpora may provide them with domain-specific vocabulary and context, thereby increasing their ability to produce accurate, culturally sensitive translations.</p><p>The educational content errors could be detrimental to cancer survivors and their caregivers by providing false physical activity, diet, or medication suggestions. Therefore, content produced by LLMs should undergo thorough evaluation and validation before the content is used in a clinical setting [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref58">58</xref>,<xref ref-type="bibr" rid="ref59">59</xref>]. Our analysis has identified multiple errors in the outputs from LLMs, including inaccuracies in scope, expression, and definition. These types of errors can lead to the dissemination of misinformation, potentially causing harm to patients [<xref ref-type="bibr" rid="ref60">60</xref>]. Therefore, such inaccuracies must be identified, analyzed, and rectified to prevent any negative impacts on patient care. Our study also detected some meaningless points that were not actionable in LLMs&#x2019; outputs, which could increase the reading burden on patients and their caregivers. Recommendations should highlight actionable information for cancer survivors and their caregivers to reduce the burden of reading educational content, emphasizing the need for LLMs to prioritize the use and applicability of the information they present. In addition, education content should be evaluated and validated by content experts before the it is available to cancer survivors and their caregivers.</p><p>In addition, both Xiao et al&#x2019;s and Asthana et al&#x2019;s studies [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>] evaluated the performance of fine-tuned LLMs in nonclinical environments. Their results highlighted the significant potential of LLMs in summarizing general text through the adoption of advanced fine-tuning techniques. It is possible that fine-tuning could further improve LLMs&#x2019; capacity to analyze educational content specifically tailored for groups such as cancer survivors and their caregivers with limited health literacy or language barriers. With this additional data, more advanced fine-tuning techniques such as instruction tuning [<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref62">62</xref>] and parameter-efficient fine-tuning [<xref ref-type="bibr" rid="ref63">63</xref>] can be implemented, and are likely to further enhance the performance.</p></sec><sec id="s4-2"><title>Limitations</title><p>While the study has shown promising results, it has several limitations. First, the dataset size remains relatively small, which could restrict the generalizability of the findings to broader topics. Second, we lacked participant assessment. Relying solely on oncology experts to evaluate the outputs from LLMs might create obstacles when applying these findings to actual cancer patients and their caregivers. While our oncology experts deeply value caring for disadvantaged populations with limited health literacy or language barriers, it&#x2019;s important to note that they are highly educated and might have unintentional biases. This could make it challenging for them to view educational content from the perspective of individuals with low health education and literacy. Therefore, future studies can be broadened to include a wider range of educational topics and additional annotations from cancer patients and their caregivers. Third, this study was limited to zero-shot learning because of the lack of training data. It could be expanded by collecting tailored content from human experts to serve as training data to incorporate few-shot learning and fine-tuning techniques. In addition, chain-of-thought reasoning and in-context learning also present promising avenues for future exploration, particularly because they do not rely on additional training data. Finally, due to a limited number of annotators from diverse backgrounds, our study was only able to evaluate translations in 2 languages. Our analysis suggests that translation performance can vary between languages, influenced by the availability of content in each language. It is important to note that these findings may not be generalizable to languages spoken by smaller populations, where content availability and linguistic nuances could further affect translation accuracy. In future research, more extensive evaluations of translation tasks involving other languages, especially low-resource languages, should be conducted to expand the applicability.</p></sec><sec id="s4-3"><title>Conclusions</title><p>The study highlights the application of LLMs in cancer care while being cognizant of their potential limitations. All 3 LLMs have demonstrated overall high capability in tailoring educational content for cancer survivors and their caregivers with limited health literacy or language barriers. GPT-4 family models showed better overall performance compared with GPT-3.5 Turbo. Prompting GPTs to produce bulleted-format content can generate better educational content. The findings from this study inform the intervention development and implementation in cancer symptom management and health equity. Additional studies are warranted to expedite the integration of AI-driven solutions into clinical settings.</p></sec></sec></body><back><ack><p>The study was supported by the Oncology Nursing Foundation Research Grant (2022 RE03). The authors also appreciate the support from Emory University School of Nursing and Winship Cancer Institute. During the preparation of this work, the authors used ChatGPT solely to improve language and readability. After using this tool, the authors reviewed and edited the content as needed and take full responsibility for the content of the publication.</p></ack><notes><sec><title>Data Availability</title><p>The datasets generated and analyzed during the current study are available upon reasonable request. The corresponding author [YL] will coordinate requests for data and maintain documentation for requests and distributions. Emory University has an established Institutional Data Use Agreement that can easily be adapted and deployed.</p></sec></notes><fn-group><fn fn-type="con"><p>DL developed scripts to collect responses from LLMs, build the annotation platform, and wrote the manuscript. XH provided technical support and helped with manuscript writing. CX, JB, ZB, LL, CW, LB, and SW performed annotations and supported manuscript writing. DB supported the development of the annotation platform and manuscript writing. YL supervised the entire project, provided valuable insights, and contributed to manuscript writing and revisions. All authors read and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AMA</term><def><p>American Medical Association</p></def></def-item><def-item><term id="abb2">ANOVA</term><def><p>analysis of variance</p></def></def-item><def-item><term id="abb3">CADA</term><def><p>Cohort Adjudication and Data Annotation</p></def></def-item><def-item><term id="abb4">FKG</term><def><p>Flesch-Kincaid Grade Level</p></def></def-item><def-item><term id="abb5">GPT</term><def><p>Generative Pretrained Transformer</p></def></def-item><def-item><term id="abb6">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb7">NIH</term><def><p>National Institute of Health</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>Statistics and graphs</article-title><source>National Cancer Institute at the National Institutes of Health</source><access-date>2025-03-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cancercontrol.cancer.gov/ocs/statistics">https://cancercontrol.cancer.gov/ocs/statistics</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Bailey</surname><given-names>DE</given-names> </name><name name-style="western"><surname>Docherty</surname><given-names>SL</given-names> </name><etal/></person-group><article-title>Distinct morning and evening fatigue profiles in gastrointestinal cancer during chemotherapy</article-title><source>BMJ Support Palliat Care</source><year>2023</year><month>11</month><volume>13</volume><issue>e2</issue><fpage>e373</fpage><lpage>e381</lpage><pub-id pub-id-type="doi">10.1136/bmjspcare-2021-002914</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Bailey</surname><given-names>DE</given-names>  <suffix>Jr</suffix></name><name name-style="western"><surname>Docherty</surname><given-names>SL</given-names> </name><etal/></person-group><article-title>Distinct sleep disturbance profiles in patients with gastrointestinal cancers receiving chemotherapy</article-title><source>Cancer Nurs</source><year>2022</year><volume>45</volume><issue>2</issue><fpage>E417</fpage><lpage>E427</lpage><pub-id pub-id-type="doi">10.1097/NCC.0000000000000975</pub-id><pub-id pub-id-type="medline">34183521</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Bailey</surname><given-names>DE</given-names>  <suffix>Jr</suffix></name><name name-style="western"><surname>Docherty</surname><given-names>SL</given-names> </name><etal/></person-group><article-title>Distinct profiles of multiple co-occurring symptoms in patients with gastrointestinal cancers receiving chemotherapy</article-title><source>Support Care Cancer</source><year>2021</year><month>08</month><volume>29</volume><issue>8</issue><fpage>4461</fpage><lpage>4471</lpage><pub-id pub-id-type="doi">10.1007/s00520-020-05946-4</pub-id><pub-id pub-id-type="medline">33454824</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Bailey</surname><given-names>DE</given-names> </name><name name-style="western"><surname>Xiao</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Distinct co-occurring morning and evening fatigue profiles in patients with gastrointestinal cancers receiving chemotherapy</article-title><source>Cancer Nurs</source><year>2024</year><volume>47</volume><issue>1</issue><fpage>E28</fpage><lpage>E37</lpage><pub-id pub-id-type="doi">10.1097/NCC.0000000000001148</pub-id><pub-id pub-id-type="medline">36076314</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Bruner</surname><given-names>DW</given-names> </name><name name-style="western"><surname>Paul</surname><given-names>S</given-names> </name><etal/></person-group><article-title>A network analysis of self-reported psychoneurological symptoms in patients with head and neck cancer undergoing intensity-modulated radiotherapy</article-title><source>Cancer</source><year>2022</year><month>10</month><volume>128</volume><issue>20</issue><fpage>3734</fpage><lpage>3743</lpage><pub-id pub-id-type="doi">10.1002/cncr.34424</pub-id><pub-id pub-id-type="medline">35969226</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Peng</surname><given-names>G</given-names> </name><name name-style="western"><surname>Bruner</surname><given-names>DW</given-names> </name><etal/></person-group><article-title>Associations of differentially expressed genes with psychoneurological symptoms in patients with head and neck cancer: A longitudinal study</article-title><source>J Psychosom Res</source><year>2023</year><month>12</month><volume>175</volume><fpage>111518</fpage><pub-id pub-id-type="doi">10.1016/j.jpsychores.2023.111518</pub-id><pub-id pub-id-type="medline">37832274</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dodd</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Miaskowski</surname><given-names>C</given-names> </name><name name-style="western"><surname>Paul</surname><given-names>SM</given-names> </name></person-group><article-title>Symptom clusters and their effect on the functional status of patients with cancer</article-title><source>Oncol Nurs Forum</source><year>2001</year><month>04</month><volume>28</volume><issue>3</issue><fpage>465</fpage><lpage>470</lpage><pub-id pub-id-type="medline">11338755</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tantoy</surname><given-names>IY</given-names> </name><name name-style="western"><surname>Cooper</surname><given-names>BA</given-names> </name><name name-style="western"><surname>Dhruva</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Quality of life of patients with gastrointestinal cancers undergoing chemotherapy</article-title><source>Qual Life Res</source><year>2018</year><month>07</month><volume>27</volume><issue>7</issue><fpage>1865</fpage><lpage>1876</lpage><pub-id pub-id-type="doi">10.1007/s11136-018-1860-1</pub-id><pub-id pub-id-type="medline">29679368</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Docherty</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Porter</surname><given-names>LS</given-names> </name><name name-style="western"><surname>Bailey</surname><given-names>DE</given-names>  <suffix>Jr</suffix></name></person-group><article-title>Symptom experience and self-management for multiple co-occurring symptoms in patients with gastric cancer: A qualitative study</article-title><source>Eur J Oncol Nurs</source><year>2020</year><month>12</month><volume>49</volume><fpage>101860</fpage><pub-id pub-id-type="doi">10.1016/j.ejon.2020.101860</pub-id><pub-id pub-id-type="medline">33126154</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Docherty</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Porter</surname><given-names>LS</given-names> </name><name name-style="western"><surname>Bailey</surname><given-names>DE</given-names> </name></person-group><article-title>Common and co-occurring symptoms experienced by patients with gastric cancer</article-title><source>Oncol Nurs Forum</source><year>2020</year><month>03</month><day>1</day><volume>47</volume><issue>2</issue><fpage>187</fpage><lpage>202</lpage><pub-id pub-id-type="doi">10.1188/20.ONF.187-202</pub-id><pub-id pub-id-type="medline">32078617</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Avci</surname><given-names>IA</given-names> </name><name name-style="western"><surname>Cavusoglu</surname><given-names>F</given-names> </name></person-group><article-title>Experiences of caregivers with spouses receiving chemotherapy for colorectal cancer and their expectations from nursing services</article-title><source>Asia Pac J Oncol Nurs</source><year>2017</year><volume>4</volume><issue>2</issue><fpage>173</fpage><lpage>179</lpage><pub-id pub-id-type="doi">10.4103/apjon.apjon_11_17</pub-id><pub-id pub-id-type="medline">28503652</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Law</surname><given-names>E</given-names> </name><name name-style="western"><surname>Levesque</surname><given-names>JV</given-names> </name><name name-style="western"><surname>Lambert</surname><given-names>S</given-names> </name><name name-style="western"><surname>Girgis</surname><given-names>A</given-names> </name></person-group><article-title>The &#x201C;sphere of care&#x201D;: a qualitative study of colorectal cancer patient and caregiver experiences of support within the cancer treatment setting</article-title><source>PLoS ONE</source><year>2018</year><volume>13</volume><issue>12</issue><fpage>e0209436</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0209436</pub-id><pub-id pub-id-type="medline">30586391</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Porter</surname><given-names>LS</given-names> </name><name name-style="western"><surname>Chee</surname><given-names>W</given-names> </name><etal/></person-group><article-title>A web-based dyadic intervention to manage psychoneurological symptoms for patients with colorectal cancer and their caregivers: protocol for A mixed methods study</article-title><source>JMIR Res Protoc</source><year>2023</year><month>06</month><day>28</day><volume>12</volume><fpage>e48499</fpage><pub-id pub-id-type="doi">10.2196/48499</pub-id><pub-id pub-id-type="medline">37379055</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Howard</surname><given-names>AF</given-names> </name><name name-style="western"><surname>Lynch</surname><given-names>K</given-names> </name><name name-style="western"><surname>Beck</surname><given-names>S</given-names> </name><etal/></person-group><article-title>At the heart of It all: emotions of consequence for the conceptualization of caregiver-reported outcomes in the context of colorectal cancer</article-title><source>Curr Oncol</source><year>2021</year><month>10</month><day>16</day><volume>28</volume><issue>5</issue><fpage>4184</fpage><lpage>4202</lpage><pub-id pub-id-type="doi">10.3390/curroncol28050355</pub-id><pub-id pub-id-type="medline">34677273</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hollis</surname><given-names>RH</given-names> </name><name name-style="western"><surname>Chu</surname><given-names>DI</given-names> </name></person-group><article-title>Healthcare disparities and colorectal cancer</article-title><source>Surg Oncol Clin N Am</source><year>2022</year><month>04</month><volume>31</volume><issue>2</issue><fpage>157</fpage><lpage>169</lpage><pub-id pub-id-type="doi">10.1016/j.soc.2021.11.002</pub-id><pub-id pub-id-type="medline">35351271</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Levy</surname><given-names>H</given-names> </name><name name-style="western"><surname>Janke</surname><given-names>A</given-names> </name></person-group><article-title>Health literacy and access to care</article-title><source>J Health Commun</source><year>2016</year><volume>21 Suppl 1</volume><issue>Suppl</issue><fpage>43</fpage><lpage>50</lpage><pub-id pub-id-type="doi">10.1080/10810730.2015.1131776</pub-id><pub-id pub-id-type="medline">27043757</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Murphy-Ende</surname><given-names>K</given-names> </name></person-group><article-title>Barriers to palliative and supportive care</article-title><source>Nurs Clin North Am</source><year>2001</year><month>12</month><volume>36</volume><issue>4</issue><fpage>843</fpage><lpage>853</lpage><pub-id pub-id-type="doi">10.1016/s0029-6465(22)02675-5</pub-id><pub-id pub-id-type="medline">11726357</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sherman</surname><given-names>ADF</given-names> </name><name name-style="western"><surname>Febres-Cordero</surname><given-names>S</given-names> </name><name name-style="western"><surname>Johnson</surname><given-names>KB</given-names> </name><etal/></person-group><article-title>Intersectionality in nursing research: a scoping review</article-title><source>Int J Nurs Stud Adv</source><year>2023</year><month>12</month><volume>5</volume><fpage>100155</fpage><pub-id pub-id-type="doi">10.1016/j.ijnsa.2023.100155</pub-id><pub-id pub-id-type="medline">37982092</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="web"><article-title>Toolkit for patient focused therapy development</article-title><source>National Center for Advancing Translational Sciences</source><access-date>2025-03-24</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://toolkit.ncats.nih.gov/glossary/underserved-group/">https://toolkit.ncats.nih.gov/glossary/underserved-group/</ext-link></comment></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Vogels</surname><given-names>EA</given-names> </name></person-group><article-title>Digital divide persists even as americans with lower incomes make gains in tech adoption</article-title><source>Pew Research Center</source><year>2021</year><access-date>2025-03-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.pewresearch.org/short-reads/2021/06/22/digital-divide-persists-even-as-americans-with-lower-incomes-make-gains-in-tech-adoption">https://www.pewresearch.org/short-reads/2021/06/22/digital-divide-persists-even-as-americans-with-lower-incomes-make-gains-in-tech-adoption</ext-link></comment></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Qan&#x2019;ir</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Song</surname><given-names>L</given-names> </name></person-group><article-title>Systematic review of technology-based interventions to improve anxiety, depression, and health-related quality of life among patients with prostate cancer</article-title><source>Psychooncology</source><year>2019</year><month>08</month><volume>28</volume><issue>8</issue><fpage>1601</fpage><lpage>1613</lpage><pub-id pub-id-type="doi">10.1002/pon.5158</pub-id><pub-id pub-id-type="medline">31222956</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Song</surname><given-names>L</given-names> </name><name name-style="western"><surname>Rini</surname><given-names>C</given-names> </name><name name-style="western"><surname>Deal</surname><given-names>AM</given-names> </name><etal/></person-group><article-title>Improving couples&#x2019; quality of life through a Web-based prostate cancer education intervention</article-title><source>Oncol Nurs Forum</source><year>2015</year><month>03</month><volume>42</volume><issue>2</issue><fpage>183</fpage><lpage>192</lpage><pub-id pub-id-type="doi">10.1188/15.ONF.183-192</pub-id><pub-id pub-id-type="medline">25806885</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xue</surname><given-names>VW</given-names> </name><name name-style="western"><surname>Lei</surname><given-names>P</given-names> </name><name name-style="western"><surname>Cho</surname><given-names>WC</given-names> </name></person-group><article-title>The potential impact of ChatGPT in clinical and translational medicine</article-title><source>Clin Transl Med</source><year>2023</year><month>03</month><volume>13</volume><issue>3</issue><fpage>e1216</fpage><pub-id pub-id-type="doi">10.1002/ctm2.1216</pub-id><pub-id pub-id-type="medline">36856370</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Strachan</surname><given-names>JWA</given-names> </name><name name-style="western"><surname>Albergo</surname><given-names>D</given-names> </name><name name-style="western"><surname>Borghini</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Testing theory of mind in large language models and humans</article-title><source>Nat Hum Behav</source><year>2024</year><month>07</month><volume>8</volume><issue>7</issue><fpage>1285</fpage><lpage>1295</lpage><pub-id pub-id-type="doi">10.1038/s41562-024-01882-z</pub-id><pub-id pub-id-type="medline">38769463</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Review of large vision models and visual prompt engineering</article-title><source>Meta-Radiology</source><year>2023</year><month>11</month><volume>1</volume><issue>3</issue><fpage>100047</fpage><pub-id pub-id-type="doi">10.1016/j.metrad.2023.100047</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Shi</surname><given-names>E</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Prompt engineering for healthcare: methodologies and applications</article-title><source>arXiv</source><comment>Preprint posted online on 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2304.14670</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cascella</surname><given-names>M</given-names> </name><name name-style="western"><surname>Montomoli</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bellini</surname><given-names>V</given-names> </name><name name-style="western"><surname>Bignami</surname><given-names>E</given-names> </name></person-group><article-title>Evaluating the feasibility of ChatGPT in healthcare: an analysis of multiple clinical and research scenarios</article-title><source>J Med Syst</source><year>2023</year><month>03</month><day>4</day><volume>47</volume><issue>1</issue><fpage>33</fpage><pub-id pub-id-type="doi">10.1007/s10916-023-01925-4</pub-id><pub-id pub-id-type="medline">36869927</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fink</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Bischoff</surname><given-names>A</given-names> </name><name name-style="western"><surname>Fink</surname><given-names>CA</given-names> </name><etal/></person-group><article-title>Potential of ChatGPT and GPT-4 for data mining of free-text CT reports on lung cancer</article-title><source>Radiology</source><year>2023</year><month>09</month><volume>308</volume><issue>3</issue><fpage>e231362</fpage><pub-id pub-id-type="doi">10.1148/radiol.231362</pub-id><pub-id pub-id-type="medline">37724963</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Haver</surname><given-names>HL</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>CT</given-names> </name><name name-style="western"><surname>Sirajuddin</surname><given-names>A</given-names> </name><name name-style="western"><surname>Yi</surname><given-names>PH</given-names> </name><name name-style="western"><surname>Jeudy</surname><given-names>J</given-names> </name></person-group><article-title>Use of ChatGPT, GPT-4, and bard to improve readability of ChatGPT&#x2019;s answers to common questions about lung cancer and lung cancer screening</article-title><source>AJR Am J Roentgenol</source><year>2023</year><month>11</month><volume>221</volume><issue>5</issue><fpage>701</fpage><lpage>704</lpage><pub-id pub-id-type="doi">10.2214/AJR.23.29622</pub-id><pub-id pub-id-type="medline">37341179</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dai</surname><given-names>W</given-names> </name><name name-style="western"><surname>Mo</surname><given-names>W</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Han</surname><given-names>D</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>X</given-names> </name></person-group><article-title>Systematic analysis of glutamine metabolism family genes and exploration of the biological role of GPT in colorectal cancer</article-title><source>Aging (Milano)</source><year>2023</year><month>11</month><day>15</day><volume>15</volume><issue>21</issue><fpage>11811</fpage><lpage>11830</lpage><pub-id pub-id-type="doi">10.18632/aging.205079</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Achiam</surname><given-names>J</given-names> </name><name name-style="western"><surname>Adler</surname><given-names>S</given-names> </name><name name-style="western"><surname>Agarwal</surname><given-names>S</given-names> </name><etal/></person-group><article-title>GPT-4 technical report</article-title><source>arXiv</source><comment>Preprint posted online on 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2303.08774</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bhattarai</surname><given-names>K</given-names> </name><name name-style="western"><surname>Oh</surname><given-names>IY</given-names> </name><name name-style="western"><surname>Sierra</surname><given-names>JM</given-names> </name><etal/></person-group><article-title>Leveraging GPT-4 for identifying cancer phenotypes in electronic health records: a performance comparison between GPT-4, GPT-3.5-turbo, Flan-T5 and spaCy&#x2019;s rule-based &#x0026; machine learning-based methods</article-title><source>bioRxiv</source><year>2024</year><month>04</month><day>6</day><volume>2023</volume><issue>2023</issue><fpage>27</fpage><pub-id pub-id-type="doi">10.1101/2023.09.27.559788</pub-id><pub-id pub-id-type="medline">37808763</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>D</given-names> </name><name name-style="western"><surname>Ding</surname><given-names>C</given-names> </name><name name-style="western"><surname>Bold</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Evaluation of general large language models in understanding clinical concepts extracted from adult critical care electronic health record notes</article-title><source>arXiv</source><comment>Preprint posted online on 2024</comment><comment>arXiv:240113588</comment></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Agrawal</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hegselmann</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Sontag</surname><given-names>D</given-names> </name></person-group><article-title>Large language models are zero-shot clinical information extractors</article-title><source>arXiv</source><comment>Preprint posted online on 2022</comment><pub-id pub-id-type="doi">10.48550/arXiv.2205.12689</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Xiao</surname><given-names>L</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>X</given-names> </name><name name-style="western"><surname>Shan</surname><given-names>X</given-names> </name></person-group><article-title>Enhancing llm with evolutionary fine tuning for news summary generation</article-title><source>arXiv</source><comment>Preprint posted online on 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2307.02839</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Asthana</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hilleli</surname><given-names>S</given-names> </name><name name-style="western"><surname>He</surname><given-names>P</given-names> </name><name name-style="western"><surname>Halfaker</surname><given-names>A</given-names> </name></person-group><article-title>Summaries, highlights, and action items: design, implementation and evaluation of an LLM-powered meeting recap system</article-title><source>arXiv</source><comment>Preprint posted online on 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2307.15793</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Garrouste-Orgeas</surname><given-names>M</given-names> </name><name name-style="western"><surname>Philippart</surname><given-names>F</given-names> </name><name name-style="western"><surname>Bruel</surname><given-names>C</given-names> </name><name name-style="western"><surname>Max</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lau</surname><given-names>N</given-names> </name><name name-style="western"><surname>Misset</surname><given-names>B</given-names> </name></person-group><article-title>Overview of medical errors and adverse events</article-title><source>Ann Intensive Care</source><year>2012</year><month>02</month><day>16</day><volume>2</volume><issue>1</issue><fpage>1</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1186/2110-5820-2-2</pub-id><pub-id pub-id-type="medline">22339769</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Gero</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>C</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Attribute structuring improves LLM-based evaluation of clinical text summaries</article-title><source>arXiv</source><comment>Preprint posted online on 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2403.01002</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Espenshade</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Fu</surname><given-names>H</given-names> </name></person-group><article-title>An analysis of english-language proficiency among U.S. immigrants</article-title><source>Am Sociol Rev</source><year>1997</year><month>04</month><volume>62</volume><issue>2</issue><fpage>288</fpage><pub-id pub-id-type="doi">10.2307/2657305</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chesser</surname><given-names>A</given-names> </name><name name-style="western"><surname>Burke</surname><given-names>A</given-names> </name><name name-style="western"><surname>Reyes</surname><given-names>J</given-names> </name><name name-style="western"><surname>Rohrberg</surname><given-names>T</given-names> </name></person-group><article-title>Navigating the digital divide: a systematic review of eHealth literacy in underserved populations in the United States</article-title><source>Inform Health Soc Care</source><year>2016</year><volume>41</volume><issue>1</issue><fpage>1</fpage><lpage>19</lpage><pub-id pub-id-type="doi">10.3109/17538157.2014.948171</pub-id><pub-id pub-id-type="medline">25710808</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Solnyshkina</surname><given-names>M</given-names> </name><name name-style="western"><surname>Zamaletdinov</surname><given-names>R</given-names> </name><name name-style="western"><surname>Gorodetskaya</surname><given-names>L</given-names> </name><name name-style="western"><surname>Gabitov</surname><given-names>A</given-names> </name></person-group><article-title>Evaluating text complexity and Flesch-Kincaid grade level</article-title><source>J Soc Stud Educ Res</source><year>2017</year><volume>8</volume><issue>3</issue><fpage>238</fpage><lpage>248</lpage></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Walsh</surname><given-names>TM</given-names> </name><name name-style="western"><surname>Volsko</surname><given-names>TA</given-names> </name></person-group><article-title>Readability assessment of internet-based consumer health information</article-title><source>Respir Care</source><year>2008</year><month>10</month><volume>53</volume><issue>10</issue><fpage>1310</fpage><lpage>1315</lpage><pub-id pub-id-type="medline">18811992</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abazari</surname><given-names>A</given-names> </name><name name-style="western"><surname>Chatterjee</surname><given-names>S</given-names> </name><name name-style="western"><surname>Moniruzzaman</surname><given-names>M</given-names> </name></person-group><article-title>Understanding cancer caregiving and predicting burden: an analytics and machine learning approach</article-title><source>AMIA Annu Symp Proc</source><year>2023</year><volume>2023</volume><fpage>243</fpage><lpage>252</lpage><pub-id pub-id-type="medline">38222371</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>X</given-names> </name><name name-style="western"><surname>Deng</surname><given-names>X</given-names> </name><etal/></person-group><article-title>Prompt engineering in consistency and reliability with the evidence-based guideline for LLMs</article-title><source>NPJ Digit Med</source><year>2024</year><month>02</month><day>20</day><volume>7</volume><issue>1</issue><fpage>41</fpage><pub-id pub-id-type="doi">10.1038/s41746-024-01029-4</pub-id><pub-id pub-id-type="medline">38378899</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="web"><article-title>Patient education publications</article-title><source>National Cancer Institute at the National Institutes of Health</source><access-date>2025-03-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancer.gov/publications/patient-education">https://www.cancer.gov/publications/patient-education</ext-link></comment></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="web"><article-title>Guidelines for patients</article-title><source>National Comprehensive Career Network</source><access-date>2025-03-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.nccn.org/patientresources/patient-resources/guidelines-for-patients">https://www.nccn.org/patientresources/patient-resources/guidelines-for-patients</ext-link></comment></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Epari</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>K</given-names> </name><name name-style="western"><surname>Xiao</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Perceptions and needs for a technology-based dyadic intervention on symptom management among patients with colorectal cancer and their caregivers: a qualitative study</article-title><source>Cancer Nurs</source><year>2024</year><month>11</month><day>29</day><pub-id pub-id-type="doi">10.1097/NCC.0000000000001429</pub-id><pub-id pub-id-type="medline">39625810</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Northouse</surname><given-names>L</given-names> </name><name name-style="western"><surname>Kershaw</surname><given-names>T</given-names> </name><name name-style="western"><surname>Mood</surname><given-names>D</given-names> </name><name name-style="western"><surname>Schafenacker</surname><given-names>A</given-names> </name></person-group><article-title>Effects of a family intervention on the quality of life of women with recurrent breast cancer and their family caregivers</article-title><source>Psychooncology</source><year>2005</year><month>06</month><volume>14</volume><issue>6</issue><fpage>478</fpage><lpage>491</lpage><pub-id pub-id-type="doi">10.1002/pon.871</pub-id><pub-id pub-id-type="medline">15599947</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Northouse</surname><given-names>LL</given-names> </name><name name-style="western"><surname>Mood</surname><given-names>DW</given-names> </name><name name-style="western"><surname>Schafenacker</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Randomized clinical trial of a brief and extensive dyadic intervention for advanced cancer patients and their family caregivers</article-title><source>Psychooncology</source><year>2013</year><month>03</month><volume>22</volume><issue>3</issue><fpage>555</fpage><lpage>563</lpage><pub-id pub-id-type="doi">10.1002/pon.3036</pub-id><pub-id pub-id-type="medline">22290823</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Northouse</surname><given-names>LL</given-names> </name><name name-style="western"><surname>Mood</surname><given-names>DW</given-names> </name><name name-style="western"><surname>Schafenacker</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Randomized clinical trial of a family intervention for prostate cancer patients and their spouses</article-title><source>Cancer</source><year>2007</year><month>12</month><day>15</day><volume>110</volume><issue>12</issue><fpage>2809</fpage><lpage>2818</lpage><pub-id pub-id-type="doi">10.1002/cncr.23114</pub-id><pub-id pub-id-type="medline">17999405</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="web"><article-title>Part 46 - protection of human subjects</article-title><source>Code of Federal Regulations</source><access-date>2025-03-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ecfr.gov/current/title-45/subtitle-A/subchapter-A/part-46">https://www.ecfr.gov/current/title-45/subtitle-A/subchapter-A/part-46</ext-link></comment></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Van Veen</surname><given-names>D</given-names> </name><name name-style="western"><surname>Van Uden</surname><given-names>C</given-names> </name><name name-style="western"><surname>Blankemeier</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Clinical text summarization: adapting large language models can outperform human experts</article-title><source>Res Sq</source><year>2023</year><month>10</month><day>30</day><fpage>rs.3.rs-3483777</fpage><pub-id pub-id-type="doi">10.21203/rs.3.rs-3483777/v1</pub-id><pub-id pub-id-type="medline">37961377</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Delaney</surname><given-names>FT</given-names> </name><name name-style="western"><surname>Doinn</surname><given-names>T&#x00D3;</given-names> </name><name name-style="western"><surname>Broderick</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Stanley</surname><given-names>E</given-names> </name></person-group><article-title>Readability of patient education materials related to radiation safety: What are the implications for patient-centred radiology care?</article-title><source>Insights Imaging</source><year>2021</year><month>10</month><day>21</day><volume>12</volume><issue>1</issue><fpage>148</fpage><pub-id pub-id-type="doi">10.1186/s13244-021-01094-3</pub-id><pub-id pub-id-type="medline">34674063</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Davis</surname><given-names>TC</given-names> </name><name name-style="western"><surname>Mayeaux</surname><given-names>EJ</given-names> </name><name name-style="western"><surname>Fredrickson</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bocchini</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Jackson</surname><given-names>RH</given-names> </name><name name-style="western"><surname>Murphy</surname><given-names>PW</given-names> </name></person-group><article-title>Reading ability of parents compared with reading level of pediatric patient education materials</article-title><source>Pediatrics</source><year>1994</year><month>03</month><volume>93</volume><issue>3</issue><fpage>460</fpage><lpage>468</lpage><pub-id pub-id-type="doi">10.1542/peds.93.3.460</pub-id><pub-id pub-id-type="medline">8115206</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Shi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>F</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>N</given-names> </name><name name-style="western"><surname>Du</surname><given-names>M</given-names> </name></person-group><article-title>Quantifying multilingual performance of large language models across languages</article-title><source>arXiv</source><comment>Preprint posted online on 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2404.11553</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Zan</surname><given-names>C</given-names> </name><name name-style="western"><surname>Ding</surname><given-names>L</given-names> </name><name name-style="western"><surname>Shen</surname><given-names>L</given-names> </name><name name-style="western"><surname>Zhen</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Tao</surname><given-names>D</given-names> </name></person-group><article-title>Building accurate translation-tailored llms with language aware instruction tuning</article-title><source>arXiv</source><comment>Preprint posted online on 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2403.14399</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kalra</surname><given-names>J</given-names> </name></person-group><article-title>Medical errors: impact on clinical laboratories and other critical areas</article-title><source>Clin Biochem</source><year>2004</year><month>12</month><volume>37</volume><issue>12</issue><fpage>1052</fpage><lpage>1062</lpage><pub-id pub-id-type="doi">10.1016/j.clinbiochem.2004.08.009</pub-id><pub-id pub-id-type="medline">15589810</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Robertson</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>Long</surname><given-names>B</given-names> </name></person-group><article-title>Suffering in silence: medical error and its impact on health care providers</article-title><source>J Emerg Med</source><year>2018</year><month>04</month><volume>54</volume><issue>4</issue><fpage>402</fpage><lpage>409</lpage><pub-id pub-id-type="doi">10.1016/j.jemermed.2017.12.001</pub-id><pub-id pub-id-type="medline">29366616</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Burton-Wood</surname><given-names>C</given-names> </name><name name-style="western"><surname>Burnell</surname><given-names>R</given-names> </name><name name-style="western"><surname>Taylor</surname><given-names>A</given-names> </name><name name-style="western"><surname>Brown</surname><given-names>D</given-names> </name><name name-style="western"><surname>Robinson</surname><given-names>B</given-names> </name><name name-style="western"><surname>Garry</surname><given-names>M</given-names> </name></person-group><article-title>Medical professionals&#x2019; (mis)remembering of a simulated interaction with a patient: a medical misinformation effect</article-title><source>PsyArXiv</source><comment>Preprint posted online on 2019</comment><pub-id pub-id-type="doi">10.31234/osf.io/wt8ma</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Dong</surname><given-names>L</given-names> </name><name name-style="western"><surname>Li</surname><given-names>X</given-names> </name><etal/></person-group><article-title>Instruction tuning for large language models: A survey</article-title><source>arXiv</source><comment>Preprint posted online on 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2308.10792</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Bai</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Du</surname><given-names>X</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>COIG-CQIA: quality is all you need for chinese instruction fine-tuning</article-title><source>arXiv</source><comment>Preprint posted online on 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2403.18058</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ding</surname><given-names>N</given-names> </name><name name-style="western"><surname>Qin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Parameter-efficient fine-tuning of large-scale pre-trained language models</article-title><source>Nat Mach Intell</source><year>2023</year><volume>5</volume><issue>3</issue><fpage>220</fpage><lpage>235</lpage><pub-id pub-id-type="doi">10.1038/s42256-023-00626-4</pub-id></nlm-citation></ref></ref-list></back></article>