<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Cancer</journal-id><journal-id journal-id-type="publisher-id">cancer</journal-id><journal-id journal-id-type="index">21</journal-id><journal-title>JMIR Cancer</journal-title><abbrev-journal-title>JMIR Cancer</abbrev-journal-title><issn pub-type="epub">2369-1999</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v11i1e57275</article-id><article-id pub-id-type="doi">10.2196/57275</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Large Language Model Approach for Zero-Shot Information Extraction and Clustering of Japanese Radiology Reports: Algorithm Development and Validation</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Yamagishi</surname><given-names>Yosuke</given-names></name><degrees>MD, MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nakamura</surname><given-names>Yuta</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hanaoka</surname><given-names>Shouhei</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Abe</surname><given-names>Osamu</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Division of Radiology and Biomedical Engineering, Graduate School of Medicine, The University of Tokyo</institution><addr-line>Tokyo</addr-line><country>Japan</country></aff><aff id="aff2"><institution>Department of Computational Diagnostic Radiology and Preventive Medicine, The University of Tokyo Hospital</institution><addr-line>Tokyo</addr-line><country>Japan</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Cahill</surname><given-names>Naomi</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Hu</surname><given-names>Danqing</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Jani</surname><given-names>Mehul</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Shraga</surname><given-names>Roee</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Yosuke Yamagishi, MD, MSc, Division of Radiology and Biomedical Engineering, Graduate School of Medicine, The University of Tokyo, 7-3-1 Hongo, Bunkyo-ku, Tokyo, 113-8655, Japan, 81 3-3815-5411; <email>yamagishi-yosuke0115@g.ecc.u-tokyo.ac.jp</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>23</day><month>1</month><year>2025</year></pub-date><volume>11</volume><elocation-id>e57275</elocation-id><history><date date-type="received"><day>11</day><month>02</month><year>2024</year></date><date date-type="rev-recd"><day>16</day><month>12</month><year>2024</year></date><date date-type="accepted"><day>18</day><month>12</month><year>2024</year></date></history><copyright-statement>&#x00A9; Yosuke Yamagishi, Yuta Nakamura, Shouhei Hanaoka, Osamu Abe. Originally published in JMIR Cancer (<ext-link ext-link-type="uri" xlink:href="https://cancer.jmir.org">https://cancer.jmir.org</ext-link>), 23.1.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Cancer, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://cancer.jmir.org/">https://cancer.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://cancer.jmir.org/2025/1/e57275"/><abstract><sec><title>Background</title><p>The application of natural language processing in medicine has increased significantly, including tasks such as information extraction and classification. Natural language processing plays a crucial role in structuring free-form radiology reports, facilitating the interpretation of textual content, and enhancing data utility through clustering techniques. Clustering allows for the identification of similar lesions and disease patterns across a broad dataset, making it useful for aggregating information and discovering new insights in medical imaging. However, most publicly available medical datasets are in English, with limited resources in other languages. This scarcity poses a challenge for development of models geared toward non-English downstream tasks.</p></sec><sec><title>Objective</title><p>This study aimed to develop and evaluate an algorithm that uses large language models (LLMs) to extract information from Japanese lung cancer radiology reports and perform clustering analysis. The effectiveness of this approach was assessed and compared with previous supervised methods.</p></sec><sec sec-type="methods"><title>Methods</title><p>This study employed the MedTxt-RR dataset, comprising 135 Japanese radiology reports from 9 radiologists who interpreted the computed tomography images of 15 lung cancer patients obtained from Radiopaedia. Previously used in the NTCIR-16 (NII Testbeds and Community for Information Access Research) shared task for clustering performance competition, this dataset was ideal for comparing the clustering ability of our algorithm with those of previous methods. The dataset was split into 8 cases for development and 7 for testing, respectively. The study&#x2019;s approach involved using the LLM to extract information pertinent to lung cancer findings and transforming it into numeric features for clustering, using the K-means method. Performance was evaluated using 135 reports for information extraction accuracy and 63 test reports for clustering performance. This study focused on the accuracy of automated systems for extracting tumor size, location, and laterality from clinical reports. The clustering performance was evaluated using normalized mutual information, adjusted mutual information , and the Fowlkes-Mallows index for both the development and test data.</p></sec><sec sec-type="results"><title>Results</title><p>The tumor size was accurately identified in 99 out of 135 reports (73.3%), with errors in 36 reports (26.7%), primarily due to missing or incorrect size information. Tumor location and laterality were identified with greater accuracy in 112 out of 135 reports (83%); however, 23 reports (17%) contained errors mainly due to empty values or incorrect data. Clustering performance of the test data yielded an normalized mutual information of 0.6414, adjusted mutual information of 0.5598, and Fowlkes-Mallows index of 0.5354. The proposed method demonstrated superior performance across all evaluation metrics compared to previous methods.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The unsupervised LLM approach surpassed the existing supervised methods in clustering Japanese radiology reports. These findings suggest that LLMs hold promise for extracting information from radiology reports and integrating it into disease-specific knowledge structures.</p></sec></abstract><kwd-group><kwd>radiology reports</kwd><kwd>clustering</kwd><kwd>large language model</kwd><kwd>natural language processing</kwd><kwd>information extraction</kwd><kwd>lung cancer</kwd><kwd>machine learning</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Natural language processing (NLP) is vital in medicine as it allows the interpretation of textual content in medical documents. Radiology reports, written as free text by experienced radiologists, contain detailed information about medical imaging findings. While medical images are valuable, text-based analysis offers unique advantages in terms of computational efficiency and the ability to capture expert interpretations and observations of radiologists that may not be immediately apparent from images. Natural language processing can effectively extract this information, enhance its utilization, and provide new insights into medical imaging.</p><p>Advances in radiological NLP applications are driven by the availability of large datasets [<xref ref-type="bibr" rid="ref1">1</xref>]. For example, the MIMIC Chest X-ray (MIMIC-CXR) includes more than 200,000 images, English-language reports, and structured data [<xref ref-type="bibr" rid="ref2">2</xref>]. Numerous NLP models have been developed to summarize and extract clinical entities [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. However, the availability of these datasets in languages other than English is limited.</p><p>To address this challenge, the NTCIR-16 Real-MedNLP shared task focused on clustering Japanese radiology reports by case basis. It is a set of Japanese radiology reports authored by different radiologists for the same case series of lung cancer, and the task was to cluster reports that describe the same medical case together [<xref ref-type="bibr" rid="ref5">5</xref>]. This benchmark evaluates the detailed understanding of radiology reports, as NLP systems must extract sufficient information to recognize reports by diagnosing the same image without being affected by different writing styles.</p><p>Clustering is a powerful analytical tool in medicine and has been successfully applied in various clinical domains. Studies have demonstrated its effectiveness in clustering patients based on their clinical characteristics to guide medical decisions, ranging from cancer aftercare planning to pulmonary embolism risk assessment [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Semantic grouping has enabled efficient insight discovery in medical documents [<xref ref-type="bibr" rid="ref8">8</xref>] and revealed specialty-specific sublanguages in clinical narratives [<xref ref-type="bibr" rid="ref9">9</xref>]. Radiology reports are particularly suited for such analyses, as they provide high-quality annotated data despite their free-form nature, offering a more tractable alternative to direct image analysis.</p><p>While the participants in the NTCIR-16 (NII Testbeds and Community for Information Access Research) shared task used deep-learning models, their clustering performance was constrained by limited training data. Since then, large language models (LLMs) trained on extensive text corpora, such as ChatGPT and LLaMA [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>], have emerged. These LLMs, which are adaptable to new tasks with minimal instructions or examples, have demonstrated high performance in extracting information from medical documents, even under zero-shot conditions [<xref ref-type="bibr" rid="ref12">12</xref>].</p><p>This study aimed to evaluate the ability of LLM to understand real radiological reports through an information extraction task and apply this information to clustering, which is a clinically meaningful task.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title><italic>Study Design and Reporting Guidelines</italic></title><p>This retrospective observational study followed the relevant items of the checklist for Artificial Intelligence in medical imaging (CLAIM) guidelines for methodology reporting [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Although this study analyzed text rather than images, CLAIM was followed because it is an established guideline for AI-based research in radiology and is deemed appropriate for NLP [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref17">17</xref>].</p></sec><sec id="s2-2"><title><italic>Algorithm Overview</italic></title><p>The proposed algorithm is illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref>. Using the LLM, key lung cancer findings were extracted from radiology reports and quantified to obtain structured data. The structured data were subsequently used for clustering.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Flowchart of radiology reports clustering using LLM. LLM: large language model.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e57275_fig01.png"/></fig></sec><sec id="s2-3"><title><italic>Dataset</italic></title><p>The MedTxt-RR dataset was used in this study [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref18">18</xref>], comprising 135 Japanese radiology reports generated by 9 radiologists who interpreted CT images of 15 lung cancer cases sourced from Radiopaedia [<xref ref-type="bibr" rid="ref19">19</xref>]. This dataset was used in an NTCIR-16 shared task [<xref ref-type="bibr" rid="ref5">5</xref>], where participants competed to achieve optimal clustering performance. With each case comprising reports from 9 radiologists, the dataset was suitable for evaluating the clustering performance on a per-case (<xref ref-type="fig" rid="figure2">Figure 2</xref>). Eight cases and seven cases were assigned to the development and test sets, respectively. While no model training was conducted using the development set in this study, performance was evaluated on the same data split to facilitate comparison with the shared task results.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Overview diagram of the radiology report clustering task.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e57275_fig02.png"/></fig></sec><sec id="s2-4"><title><italic>LLM Approach</italic></title><p>Radiology reports contain confidential patient information; processing them using a cloud-based LLM, such as ChatGPT, could expose sensitive data externally, raising significant medical safety concerns. Therefore, a publicly available offline model was selected as an alternative approach.</p><p>The ELYZA-Japanese-Llama-2-7b-fast-instruct model was employed as the LLM [<xref ref-type="bibr" rid="ref20">20</xref>]. Adapted from Llama2 and pre-trained using Japanese datasets, this model demonstrated a performance comparable to that of GPT-3.5 on Japanese datasets [<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>].</p></sec><sec id="s2-5"><title><italic>Information Extraction</italic></title><p>The LLM extracted multiple lung cancer staging parameters from radiology reports, including tumor size, tumor location, and the presence or absence of lymph node enlargement, suggesting metastasis and distant metastasis. To determine the optimal combination of features, clustering performance of the development set were repeatedly measured by using certain features. Consequently, sufficient clustering performance was confirmed achievable using only 3 parameters: tumor size, laterality (left or right), and lung location (upper, hilum, or lower region).</p><p>The prompt input into the LLM comprises system instructions and output format guidelines using json (JavaScript Object Notation), a standardized text-based format for structured information exchange, where data is organized in key-value pairs, such as {&#x201C;size&#x201D;: &#x201C;45 mm,&#x201D; &#x201C;location&#x201D;: &#x201C;right upper lobe&#x201D;}. These system instructions guided the LLM in extracting features from the radiology reports. The details of these prompts are shown in <xref ref-type="fig" rid="figure3">Figure 3</xref> (English version) and Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> (original Japanese version).</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Example of a prompt used as input for the LLM (English translated version). AAH: Atypical Adenomatous Hyperplasia; AIS: Adenocarcinoma in situ; GGN: Ground Glass Nodule; LLM: large language model; SSN: Subsolid Nodule.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cancer_v11i1e57275_fig03.png"/></fig><p>The extracted data were converted into integer vectors comprising the tumor size and other categorical values. Unspecified tumor sizes only described as <italic>large</italic> were replaced with 71 mm, corresponding to the highest category in T classification, where T represents tumor categories in cancer staging. The details of this pipeline can be found in the GitHub Repository [<xref ref-type="bibr" rid="ref24">24</xref>].</p><p>Moreover, a rule-based method was employed as the baseline approach and its performance was compared with that of the proposed method. The rule-based method performs context-sensitive word-based information extraction; the detailed algorithm is shown in Figure S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-6"><title><italic>Clustering</italic></title><p>The resulting numerical matrices were clustered using the K-means algorithm in the scikit-learn library (version 1.3.1). The number of clusters was set to 8, aligning with expected classifications such as disease type or staging, since it was close to the number of test data cases. Centroid initialization used the k-means++ method, with default values for the centroid seed and iteration count, because hyperparameter tuning was not conducted in this zero-shot study.</p></sec><sec id="s2-7"><title><italic>Information Extraction Evaluation</italic></title><p>Two independent radiologists, a radiology resident with 1-year experience and a board-certified radiologist with 7 years of experience evaluated the accuracy of extracted information. In cases of discrepancy, the final assessment was determined by consensus. Evaluation focused on three key elements: tumor size, location (upper, hilum, or lower), and laterality (left or right). The performance of the LLM-based approach was compared to that of the rule-based method for information extraction. A detailed error analysis was conducted for cases with errors, categorizing them into missing information, false information generation, and extraction of multiple values.</p><p>McNemar&#x2019;s test was performed using Statsmodels (version 0.14.2) [<xref ref-type="bibr" rid="ref25">25</xref>] to compare performance differences between the LLM-based and rule-based approaches for extracting tumor size and location.</p></sec><sec id="s2-8"><title><italic>Clustering Performance Evaluation</italic></title><p>We assessed clustering performance using three metrics similar to those used in the shared task [<xref ref-type="bibr" rid="ref5">5</xref>]: (1) normalized mutual information (NMI) that quantifies the mutual dependence between two clusters, normalized to a 0&#x2010;1 scale, with 1 indicating perfect clustering; (2) adjusted mutual information (AMI) which is an adjustment that corrects for NMI, accounting for its tendency to increase with the number of clusters; (3) Fowlkes-Mallows index (FM) that measures the similarity between two clusters by calculating the geometric mean of precision and recall, providing a balanced assessment of clustering accuracy.</p></sec><sec id="s2-9"><title><italic>Ethics Consideration</italic></title><p>This study involved analysis of human subject data from publicly available radiology reports. All data were completely de-identified and accessible through MedTxt-RR [<xref ref-type="bibr" rid="ref26">26</xref>]. In accordance with our institution&#x2019;s policy on research ethics, studies using exclusively de-identified, public datasets are exempt from institutional review board approval [<xref ref-type="bibr" rid="ref27">27</xref>]. No additional privacy or confidentiality measures were required as the dataset contains no personally identifiable information, with all protected health information having been removed prior to public release.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title><italic>Information Extraction Performance</italic></title><p>The details of the findings targeted at information extraction are summarized in <xref ref-type="table" rid="table1">Table 1</xref> . The tumor size was correctly identified in 99 (73.3%) of 135 reports. Among the 36 outputs (26.7%) with errors, 23 (17%) lacked size information in their reports, and 22 (16.3%) contained false size information. The remaining errors were attributed to size inaccuracies or empty values despite size information being mentioned in the reports. Tumor location and laterality were accurately identified in 112 (83%) reports. All 23 (17%) reports with errors contained the necessary information but had empty values for laterality, location, or both, with one output indicating an incorrect location. The detailed error analysis is presented in <xref ref-type="table" rid="table2">Table 2</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Summary of lung cancer cases.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Case no.</td><td align="left" valign="bottom">Side</td><td align="left" valign="bottom">Lobe</td><td align="left" valign="bottom">Size (mm)</td><td align="left" valign="bottom">Lymph node metastasis</td><td align="left" valign="bottom">Distant metastasis</td><td align="left" valign="bottom">Data split</td></tr></thead><tbody><tr><td align="left" valign="top">Case 1</td><td align="left" valign="top">Left</td><td align="left" valign="top">Upper</td><td align="char" char="." valign="top">18</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">Development</td></tr><tr><td align="left" valign="top">Case 2</td><td align="left" valign="top">Right</td><td align="left" valign="top">Lower</td><td align="char" char="." valign="top">12</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">Development</td></tr><tr><td align="left" valign="top">Case 3</td><td align="left" valign="top">Left</td><td align="left" valign="top">Upper</td><td align="char" char="." valign="top">28</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">Development</td></tr><tr><td align="left" valign="top">Case 4</td><td align="left" valign="top">Left</td><td align="left" valign="top">Upper</td><td align="char" char="." valign="top">40</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">Test</td></tr><tr><td align="left" valign="top">Case 5</td><td align="left" valign="top">Left</td><td align="left" valign="top">Upper</td><td align="char" char="." valign="top">48</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">Test</td></tr><tr><td align="left" valign="top">Case 6</td><td align="left" valign="top">Right</td><td align="left" valign="top">Hilum</td><td align="left" valign="top">Not measurable (due to invasion)</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">Development</td></tr><tr><td align="left" valign="top">Case 7</td><td align="left" valign="top">Right</td><td align="left" valign="top">Lower</td><td align="char" char="." valign="top">55</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">Test</td></tr><tr><td align="left" valign="top">Case 8</td><td align="left" valign="top">Left</td><td align="left" valign="top">Upper</td><td align="left" valign="top">Not measurable (due to invasion)</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">Test</td></tr><tr><td align="left" valign="top">Case 9</td><td align="left" valign="top">Right</td><td align="left" valign="top">Hilum</td><td align="char" char="." valign="top">43</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Development</td></tr><tr><td align="left" valign="top">Case 10</td><td align="left" valign="top">Right</td><td align="left" valign="top">Upper</td><td align="left" valign="top">Not measurable (due to invasion)</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">Test</td></tr><tr><td align="left" valign="top">Case 11</td><td align="left" valign="top">Right</td><td align="left" valign="top">Upper</td><td align="left" valign="top">Not measurable (due to invasion)</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">Development</td></tr><tr><td align="left" valign="top">Case 12</td><td align="left" valign="top">Right</td><td align="left" valign="top">Lower</td><td align="left" valign="top">Not measurable (due to lung metastasis)</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Development</td></tr><tr><td align="left" valign="top">Case 13</td><td align="left" valign="top">Left</td><td align="left" valign="top">Lower</td><td align="char" char="." valign="top">78</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">Development</td></tr><tr><td align="left" valign="top">Case 14</td><td align="left" valign="top">Left</td><td align="left" valign="top">Upper</td><td align="char" char="." valign="top">85</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td><td align="left" valign="top">Test</td></tr><tr><td align="left" valign="top">Case 15</td><td align="left" valign="top">Left</td><td align="left" valign="top">Upper</td><td align="left" valign="top">Not measurable (due to invasion)</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Test</td></tr></tbody></table></table-wrap><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Detailed error analysis of tumor size, location, and laterality extraction from radiology reports using large language model (LLM) and rule-based methods.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Category</td><td align="left" valign="bottom" colspan="2">Extraction methods</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">LLM<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>, n (%)</td><td align="left" valign="bottom">Rule-based, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3">Tumor size (details)</td></tr><tr><td align="left" valign="top">&#x2003;Correctly identified</td><td align="char" char="." valign="top">99 (73.3)</td><td align="left" valign="top">93 (68.9)</td></tr><tr><td align="left" valign="top">&#x2003;Errors (total)</td><td align="char" char="." valign="top">36 (26.7)</td><td align="char" char="." valign="top">42 (31.1)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Errors (no size information in reports)</td><td align="char" char="." valign="top">23 (17)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;&#x2003;False size information generated</td><td align="char" char="." valign="top">22 (16.3)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;&#x2003;T classification extracted instead of size</td><td align="char" char="." valign="top">1 (0.7)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Errors (size mentioned in reports)</td><td align="char" char="." valign="top">13 (9.6)</td><td align="left" valign="top">42 (31.1)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;&#x2003;Size inaccuracies</td><td align="char" char="." valign="top">8 (5.9)</td><td align="left" valign="top">3 (2.2)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;&#x2003;Empty values</td><td align="char" char="." valign="top">5 (3.7)</td><td align="left" valign="top">39 (28.9)</td></tr><tr><td align="left" valign="top" colspan="3">Tumor location and laterality (details)</td></tr><tr><td align="left" valign="top">&#x2003;Accurately reported</td><td align="char" char="." valign="top">112 (83)</td><td align="char" char="." valign="top">46 (34.1)</td></tr><tr><td align="left" valign="top">&#x2003;Errors (total)</td><td align="char" char="." valign="top">23 (17)</td><td align="left" valign="top">89 (65.9)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Empty values for laterality</td><td align="char" char="." valign="top">9 (6.7)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Empty values for location</td><td align="char" char="." valign="top">5 (3.7)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Empty values for both</td><td align="char" char="." valign="top">8 (5.9)</td><td align="left" valign="top">80 (59.3)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Incorrect location</td><td align="char" char="." valign="top">1 (0.7)</td><td align="left" valign="top">9 (6.7)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>LLM: large language model</p></fn></table-wrap-foot></table-wrap><p>The rule-based method correctly identified tumor size in 93 (68.9%) reports, whereas tumor location and laterality were accurately identified in only 46 (34.1%) reports. Among the errors in this method, only 1 case (0.7%) failed to accurately extract size information due to the extraction of multiple sizes. In contrast, for location, the number of errors reached 47 (34.8%) (Figure S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Unlike the LLM approach, due to the algorithmic nature of rule-based extraction, there were no cases of false-size information generation. Additionally, as the algorithm extracted laterality and location simultaneously as a single unit, there were no cases where only one of these values was empty; both were either extracted together or left empty.</p><p>McNemar&#x2019;s test showed that the LLM approach was significantly superior to the rule-based method in determining location (<italic>P</italic>&#x003C;.001) but not size (<italic>P</italic>=.539).</p></sec><sec id="s3-2"><title><italic>Clustering Performance</italic></title><p>The development data yielded an NMI score of 0.7152, an AMI score of 0.6516, and an FM index of 0.5959, whereas the test data yielded scores of 0.6414 (NMI), 0.5598 (AMI), and 0.5354 (FM).</p><p>The proposed method outperformed all previous methods in shared tasks across all evaluation metrics. The detailed results and methods are listed in <xref ref-type="table" rid="table3">Table 3</xref>. Further details of each method are available in a system paper describing this shared task [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref31">31</xref>].</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Clustering scores on the test data.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Method Description<break/>(System ID<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup>)</td><td align="left" valign="bottom">NMI<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="bottom">AMI<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="bottom">FM<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="bottom">Supervised model</td><td align="left" valign="bottom">LLM<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Developed a matrix from word count in radiology reports and applied user-based collaborative filtering for case similarity and clustering, (D1) [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="char" char="." valign="top">0.3569</td><td align="char" char="." valign="top">0.1988</td><td align="char" char="." valign="top">0.2674</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top">Used paired radiology reports for BERT<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup> input, fine-tuned for same-case identification and clustered based on predictions, (E1) [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="char" char="." valign="top">0.5415</td><td align="char" char="." valign="top">0.1489</td><td align="char" char="." valign="top">0.1814</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top">Generated embeddings from text via multilingual BERT trained on Wikipedia, followed by dimensionality reduction, and K-means clustering, (F1) [<xref ref-type="bibr" rid="ref30">30</xref>]</td><td align="char" char="." valign="top">0.1744</td><td align="char" char="." valign="top">&#x2013;0.0117</td><td align="char" char="." valign="top">0.1170</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top">Labels simplified from the TNM<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup> classification of lung cancer were assigned to each document using BERT-based model for training, and in the test data, these predicted labels were used as groups for clustering, (J1) [<xref ref-type="bibr" rid="ref31">31</xref>]</td><td align="char" char="." valign="top">0.4622</td><td align="char" char="." valign="top">0.3409</td><td align="char" char="." valign="top">0.3622</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top">This study</td><td align="char" char="." valign="top">0.6414</td><td align="char" char="." valign="top">0.5598</td><td align="char" char="." valign="top">0.5354</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>The System IDs are those used in previously shared tasks with the same dataset [<xref ref-type="bibr" rid="ref5">5</xref>].</p></fn><fn id="table3fn2"><p><sup>b</sup>NMI: normalized mutual information</p></fn><fn id="table3fn3"><p><sup>c</sup>AMI: adjusted mutual information</p></fn><fn id="table3fn4"><p><sup>d</sup>FM: Fowlkes-Mallows index</p></fn><fn id="table3fn5"><p><sup>e</sup>LLM: Large language model</p></fn><fn id="table3fn6"><p><sup>f</sup>BERT: Bidirectional Encoder Representations from Transformers</p></fn><fn id="table3fn7"><p><sup>g</sup>TNM: Tumor, node, metastasis</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title><italic>Principal Findings</italic></title><p>The extraction of lung tumor size showed minimal differences compared to the rule-based method, likely because size information is typically accompanied by standardized units (eg, mm or cm). However, the LLM method significantly outperformed the rule-based method in terms of location extraction, achieving over 80% accuracy and reducing the error rate by half. As demonstrated in Figure S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, the rule-based method frequently generated multiple incorrect location extractions when reports mentioned various anatomical sites, whereas the LLM method successfully identified the correct tumor location. This finding empirically demonstrates the LLM&#x2019;s ability to understand and extract information based on context rather than predefined rules. This capability highlights its value for complex information extraction tasks in medical text analysis, where contextual understanding is crucial.</p></sec><sec id="s4-2"><title><italic>Comparison to Prior Work</italic></title><p>This paper introduces a Japanese LLM algorithm for zero-shot information extraction and clustering that outperforms all previous methods [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref31">31</xref>]. The previous methods (E1, F1, and J1) relied on indirect features extracted by language models, whereas the current approach leverages accurate information extraction through unsupervised learning. The success of this method is particularly notable, given the historically low accuracy of unsupervised methods. By leveraging the LLM&#x2019;s contextual understanding of information extraction, this study demonstrated the potential for effective clustering of medical reports based on various attributes, including disease severity and lesion localization.</p></sec><sec id="s4-3"><title><italic>Strengths and Limitations</italic></title><p>This study has several notable strengths including its methodology and implementation. Accurate information extraction and clustering without supervised learning requirements represent a significant advancement in the field. The flexibility of this method through prompt and algorithmic adjustments suggests broad potential applicability, with potential for further performance improvements through prompt optimization [<xref ref-type="bibr" rid="ref32">32</xref>]. Furthermore, this method shows particular promise for languages with limited training data compared to English, by converting unstructured reports into language-independent structured data, thereby addressing a crucial gap in current medical text analysis.</p><p>However, the limitations must be acknowledged. First, validation was limited to small-scale Japanese datasets. While attempts were made to ensure the representativeness of the dataset by including diverse types of lung cancer cases, this limitation constrained the generalizability of the study findings and should be addressed in future studies through multi-institutional validation. Second, the evaluation focused primarily on clustering tasks; which although is a fundamental task in medical text analysis, its performance in other analytical tasks remains unexplored, suggesting the need for a comprehensive evaluation across various applications. Third, while this method shows promise for languages with limited training data, its generalizability to other languages and medical domains requires further investigation.</p></sec><sec id="s4-4"><title><italic>Conclusions</italic></title><p>The LLM was used to successfully extract important findings from publicly available Japanese radiology reports as highly accurate structured data. By leveraging these structured data, superior results were achieved compared to existing supervised methods for clustering radiology reports. This indicates that employing existing LLMs is effective for solving specific tasks, particularly in languages with a significant shortage of training data compared to English.</p></sec></sec></body><back><ack><p>Prior to submission, the initial draft was edited using ChatGPT by OpenAI for English language improvement, followed by professional editing by Editage. This study did not receive any specific grants from funding agencies in the public, commercial, or non-profit sectors.</p></ack><notes><sec><title>Data Availability</title><p>The MedTxt-RR training dataset is openly accessible and downloadable via an official website [26]. Access to the test dataset requires approval and can be obtained by directly contacting the data providers.</p></sec></notes><fn-group><fn fn-type="con"><p>Research design: YY, YN, SH</p><p>Conceptualization: YY, YN, SH</p><p>Algorithm development and implementation: YY</p><p>Formal analysis: YY, YN</p><p>Data curation: YY, YN</p><p>Writing &#x2013; original draft: YY, YN, SH</p><p>Writing &#x2013; review and editing: YN, SH, OA</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AMI</term><def><p>adjusted mutual information</p></def></def-item><def-item><term id="abb2">CLAIM</term><def><p>Checklist for Artificial Intelligence in Medical Imaging</p></def></def-item><def-item><term id="abb3">FM</term><def><p> Fowlkes-Mallows index</p></def></def-item><def-item><term id="abb4">JSON</term><def><p> JavaScript Object Notation</p></def></def-item><def-item><term id="abb5">LLM</term><def><p> large language model</p></def></def-item><def-item><term id="abb6">MIMIC-CXR</term><def><p>MIMIC Chest X-ray</p></def></def-item><def-item><term id="abb7">NLP</term><def><p> natural language processing</p></def></def-item><def-item><term id="abb8">NMI</term><def><p> normalized mutual information</p></def></def-item><def-item><term id="abb9">NTCIR-16</term><def><p>NII Testbeds and Community for Information Access Research</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Irvin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Rajpurkar</surname><given-names>P</given-names> </name><name name-style="western"><surname>Ko</surname><given-names>M</given-names> </name><etal/></person-group><article-title>CheXpert: a large chest radiograph dataset with uncertainty labels and expert comparison</article-title><source>AAAI</source><year>2019</year><volume>33</volume><issue>1</issue><fpage>590</fpage><lpage>597</lpage><pub-id pub-id-type="doi">10.1609/aaai.v33i01.3301590</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Johnson</surname><given-names>AEW</given-names> </name><name name-style="western"><surname>Pollard</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Berkowitz</surname><given-names>SJ</given-names> </name><etal/></person-group><article-title>MIMIC-CXR, a de-identified publicly available database of chest radiographs with free-text reports</article-title><source>Sci Data</source><year>2019</year><month>12</month><day>12</day><volume>6</volume><issue>1</issue><fpage>317</fpage><pub-id pub-id-type="doi">10.1038/s41597-019-0322-0</pub-id><pub-id pub-id-type="medline">31831740</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Delbrouck</surname><given-names>JB</given-names> </name><name name-style="western"><surname>Varma</surname><given-names>M</given-names> </name><name name-style="western"><surname>Chambon</surname><given-names>P</given-names> </name><name name-style="western"><surname>Langlotz</surname><given-names>C</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Demner-fushman</surname><given-names>D</given-names> </name><name name-style="western"><surname>Ananiadou</surname><given-names>S</given-names> </name><name name-style="western"><surname>Cohen</surname><given-names>K</given-names> </name></person-group><article-title>Overview of the Radsum23 shared task on multi-modal and multi-anatomical radiology report summarization</article-title><source>The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks</source><year>2023</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>478</fpage><lpage>482</lpage><pub-id pub-id-type="doi">10.18653/v1/2023.bionlp-1.45</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Jain</surname><given-names>S</given-names> </name><name name-style="western"><surname>Agrawal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Saporta</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Radgraph: extracting clinical entities and relations from radiology reports</article-title><source>arXiv</source><access-date>2024-02-10</access-date><comment>Preprint posted online on 2021</comment><pub-id pub-id-type="doi">10.48550/arXiv.2106.14463</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Yada</surname><given-names>S</given-names> </name><name name-style="western"><surname>Nakamura</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wakamiya</surname><given-names>S</given-names> </name><name name-style="western"><surname>Aramaki</surname><given-names>E</given-names> </name></person-group><article-title>Real-MedNLP: overview of real document-based medical natural language processing task</article-title><access-date>2024-02-10</access-date><conf-name>Proceedings of the 16th NTCIR Conference on Evaluation of Information Access Technologies</conf-name><conf-date>Jun 14-17, 2022</conf-date><conf-loc>Tokyo, Japan</conf-loc><fpage>285</fpage><lpage>296</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://research.nii.ac.jp/ntcir/workshop/OnlineProceedings16/pdf/ntcir/01-NTCIR16-OV-MEDNLP-YadaS.pdf">https://research.nii.ac.jp/ntcir/workshop/OnlineProceedings16/pdf/ntcir/01-NTCIR16-OV-MEDNLP-YadaS.pdf</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Beuken</surname><given-names>MJM</given-names> </name><name name-style="western"><surname>Kanera</surname><given-names>IM</given-names> </name><name name-style="western"><surname>Ezendam</surname><given-names>NPM</given-names> </name><name name-style="western"><surname>Braun</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zoet</surname><given-names>M</given-names> </name></person-group><article-title>Identification and potential use of clusters of patients with colorectal cancer and patients with prostate cancer in clinical practice: explorative mixed methods study</article-title><source>JMIR Cancer</source><year>2022</year><month>12</month><day>27</day><volume>8</volume><issue>4</issue><fpage>e42908</fpage><pub-id pub-id-type="doi">10.2196/42908</pub-id><pub-id pub-id-type="medline">36574281</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ben Yehuda</surname><given-names>O</given-names> </name><name name-style="western"><surname>Itelman</surname><given-names>E</given-names> </name><name name-style="western"><surname>Vaisman</surname><given-names>A</given-names> </name><name name-style="western"><surname>Segal</surname><given-names>G</given-names> </name><name name-style="western"><surname>Lerner</surname><given-names>B</given-names> </name></person-group><article-title>Early detection of pulmonary embolism in a general patient population immediately upon hospital admission using machine learning to identify new, unidentified risk factors: model development study</article-title><source>J Med Internet Res</source><year>2024</year><month>07</month><day>30</day><volume>26</volume><fpage>e48595</fpage><pub-id pub-id-type="doi">10.2196/48595</pub-id><pub-id pub-id-type="medline">39079116</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>W</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>K</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name></person-group><article-title>A document clustering and ranking system for exploring MEDLINE citations</article-title><source>J Am Med Inform Assoc</source><year>2007</year><volume>14</volume><issue>5</issue><fpage>651</fpage><lpage>661</lpage><pub-id pub-id-type="doi">10.1197/jamia.M2215</pub-id><pub-id pub-id-type="medline">17600104</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Patterson</surname><given-names>O</given-names> </name><name name-style="western"><surname>Hurdle</surname><given-names>JF</given-names> </name></person-group><article-title>Document clustering of clinical narratives: a systematic study of clinical sublanguages</article-title><source>AMIA Annu Symp Proc</source><year>2011</year><volume>2011</volume><fpage>1099</fpage><lpage>1107</lpage><pub-id pub-id-type="medline">22195171</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Achiam</surname><given-names>J</given-names> </name><name name-style="western"><surname>Adler</surname><given-names>S</given-names> </name><name name-style="western"><surname>Agarwal</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Gpt-4 technical report</article-title><source>arXiv</source><access-date>2024-02-10</access-date><comment>Preprint posted online on 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2303.08774</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Touvron</surname><given-names>H</given-names> </name><name name-style="western"><surname>Lavril</surname><given-names>T</given-names> </name><name name-style="western"><surname>Izacard</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Llama: open and efficient foundation language models</article-title><source>arXiv</source><access-date>2024-02-10</access-date><comment>Preprint posted online on 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2302.13971</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Agrawal</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hegselmann</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Sontag</surname><given-names>D</given-names> </name></person-group><article-title>Large language models are few-shot clinical information extractors</article-title><conf-name>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</conf-name><conf-date>Dec 7-11, 2022</conf-date><conf-loc>Abu Dhabi, United Arab Emirates</conf-loc><fpage>1998</fpage><lpage>2022</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2022.emnlp-main">https://aclanthology.org/2022.emnlp-main</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/2022.emnlp-main.130</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mongan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Moy</surname><given-names>L</given-names> </name><name name-style="western"><surname>Kahn</surname><given-names>CE</given-names> </name></person-group><article-title>Checklist for Artificial Intelligence in Medical Imaging (CLAIM): a guide for authors and reviewers</article-title><source>Radiol Artif Intell</source><year>2020</year><month>03</month><volume>2</volume><issue>2</issue><fpage>e200029</fpage><pub-id pub-id-type="doi">10.1148/ryai.2020200029</pub-id><pub-id pub-id-type="medline">33937821</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tejani</surname><given-names>AS</given-names> </name><name name-style="western"><surname>Klontzas</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Gatti</surname><given-names>AA</given-names> </name><etal/></person-group><article-title>Checklist for Artificial Intelligence in Medical Imaging (CLAIM): 2024 update</article-title><source>Radiol Artif Intell</source><year>2024</year><month>07</month><volume>6</volume><issue>4</issue><fpage>e240300</fpage><pub-id pub-id-type="doi">10.1148/ryai.240300</pub-id><pub-id pub-id-type="medline">38809149</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tejani</surname><given-names>AS</given-names> </name></person-group><article-title>To BERT or not to BERT: advancing non-invasive prediction of tumor biomarkers using transformer-based natural language processing (NLP)</article-title><source>Eur Radiol</source><year>2023</year><month>11</month><volume>33</volume><issue>11</issue><fpage>8014</fpage><lpage>8016</lpage><pub-id pub-id-type="doi">10.1007/s00330-023-10224-y</pub-id><pub-id pub-id-type="medline">37740083</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Olthof</surname><given-names>AW</given-names> </name><name name-style="western"><surname>van Ooijen</surname><given-names>PMA</given-names> </name><name name-style="western"><surname>Cornelissen</surname><given-names>LJ</given-names> </name></person-group><article-title>Deep learning-based natural language processing in radiology: the impact of report complexity, disease prevalence, dataset size, and algorithm type on model performance</article-title><source>J Med Syst</source><year>2021</year><month>09</month><day>4</day><volume>45</volume><issue>10</issue><fpage>91</fpage><pub-id pub-id-type="doi">10.1007/s10916-021-01761-4</pub-id><pub-id pub-id-type="medline">34480231</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schachner</surname><given-names>T</given-names> </name><name name-style="western"><surname>Keller</surname><given-names>R</given-names> </name><name name-style="western"><surname>V Wangenheim</surname><given-names>F</given-names> </name></person-group><article-title>Artificial Intelligence-Based conversational agents for chronic conditions: systematic literature review</article-title><source>J Med Internet Res</source><year>2020</year><month>09</month><day>14</day><volume>22</volume><issue>9</issue><fpage>e20701</fpage><pub-id pub-id-type="doi">10.2196/20701</pub-id><pub-id pub-id-type="medline">32924957</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Nakamura</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Hanaoka</surname><given-names>S</given-names> </name><name name-style="western"><surname>Nomura</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Clinical comparable corpus describing the same subjects with different expressions</article-title><conf-name>MEDINFO 2021</conf-name><fpage>253</fpage><lpage>257</lpage><pub-id pub-id-type="doi">10.3233/SHTI220073</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="web"><article-title>Radiopaedia.org, the peer-reviewed collaborative radiology resource</article-title><source>Radiopaedia</source><access-date>2024-02-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://radiopaedia.org/">https://radiopaedia.org/</ext-link></comment></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Sasaki</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hirakawa</surname><given-names>M</given-names> </name><name name-style="western"><surname>Horie</surname><given-names>S</given-names> </name><name name-style="western"><surname>Nakamura</surname><given-names>T</given-names> </name></person-group><article-title>ELYZA-japanese-llama-2-7b</article-title><source>Hugging face</source><access-date>2024-02-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://huggingface.co/elyza/ELYZA-japanese-Llama-2-7b">https://huggingface.co/elyza/ELYZA-japanese-Llama-2-7b</ext-link></comment></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Touvron</surname><given-names>H</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>L</given-names> </name><name name-style="western"><surname>Stone</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Llama 2: open foundation and fine-tuned chat models</article-title><source>arXiv</source><access-date>2024-02-10</access-date><comment>Preprint posted online on 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2307.09288</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Ortiz Su&#x2019;arez</surname><given-names>PJ</given-names> </name><name name-style="western"><surname>Romary</surname><given-names>L</given-names> </name><name name-style="western"><surname>Sagot</surname><given-names>B</given-names> </name></person-group><article-title>A monolingual approach to contextualized word embeddings for mid-resource languages</article-title><year>2020</year><access-date>2024-02-10</access-date><conf-name>In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics Association for Computational Linguistics</conf-name><fpage>1703</fpage><lpage>1714</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.aclweb.org/anthology/2020.acl-main.156">https://www.aclweb.org/anthology/2020.acl-main.156</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="web"><article-title>Openai GPT-3 API</article-title><source>OpenAI</source><access-date>2024-02-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openai.com/blog/openai-api">https://openai.com/blog/openai-api</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="web"><source>yamagishi0824/rad-report-clustering GitHub</source><access-date>2024-02-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/yamagishi0824/rad-report-clustering">https://github.com/yamagishi0824/rad-report-clustering</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Seabold</surname><given-names>S</given-names> </name><name name-style="western"><surname>Perktold</surname><given-names>J</given-names> </name></person-group><article-title>Statsmodels: econometric and statistical modeling with python</article-title><year>2010</year><conf-name>Python in Science Conference</conf-name><conf-loc>Austin, Texas</conf-loc><pub-id pub-id-type="doi">10.25080/Majora-92bf1922-011</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="web"><article-title>MedTxt-RR: radiology reports corpus &#x2013; NAIST social computing lab</article-title><source>Sociocom</source><access-date>2024-11-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://sociocom.naist.jp/medtxt-en/rr/">https://sociocom.naist.jp/medtxt-en/rr/</ext-link></comment></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="web"><source>Research Ethics Committee of the Faculty of Medicine, The University of Tokyo</source><access-date>2024-12-25</access-date><publisher-name>The University of Tokyo, Graduate School of Medicine and Faculty of Medicine</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.m.u-tokyo.ac.jp/ethics/ethcom/gakugai2/index.html">https://www.m.u-tokyo.ac.jp/ethics/ethcom/gakugai2/index.html</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Noguchi</surname><given-names>R</given-names> </name></person-group><article-title>GunNLP at the NTCIR-16 real-MEDNLP task: collaborative filtering-based similar case identification method via structured data &#x201C;case matrix&#x201D;</article-title><year>2022</year><access-date>2024-02-10</access-date><conf-name>In: Proceedings of the 16th NTCIR Conference on Evaluation of Information Access Technologies</conf-name><fpage>349</fpage><lpage>352</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://research.nii.ac.jp/ntcir/workshop/OnlineProceedings16/pdf/ntcir/11-NTCIR16-MEDNLP-NoguchiR.pdf">https://research.nii.ac.jp/ntcir/workshop/OnlineProceedings16/pdf/ntcir/11-NTCIR16-MEDNLP-NoguchiR.pdf</ext-link></comment></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Nishiyama</surname><given-names>T</given-names> </name><name name-style="western"><surname>Nishidani</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ando</surname><given-names>A</given-names> </name><etal/></person-group><article-title>NAISTSOC at the NTCIR-16 real-MedNLP task</article-title><year>2022</year><access-date>2024-02-10</access-date><conf-name>In: Proceedings of the 16th NTCIR Conference on Evaluation of Information Access Technologies</conf-name><fpage>330</fpage><lpage>333</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://research.nii.ac.jp/ntcir/workshop/OnlineProceedings16/pdf/ntcir/07-NTCIR16-MEDNLP-NishiyamaT.pdf">https://research.nii.ac.jp/ntcir/workshop/OnlineProceedings16/pdf/ntcir/07-NTCIR16-MEDNLP-NishiyamaT.pdf</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Ideuchi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tsuchiya</surname><given-names>M</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Utiyama</surname><given-names>M</given-names> </name><etal/></person-group><article-title>NICTmed at the NCTIR-16 real-mednlp task</article-title><year>2022</year><access-date>2024-02-10</access-date><conf-name>In: Proceedings of the 16th NTCIR conference on evaluation of information access technologies</conf-name><fpage>339</fpage><lpage>344</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://research.nii.ac.jp/ntcir/workshop/OnlineProceedings16/pdf/ntcir/09-NTCIR16-MEDNLP-IdeuchiM.pdf">https://research.nii.ac.jp/ntcir/workshop/OnlineProceedings16/pdf/ntcir/09-NTCIR16-MEDNLP-IdeuchiM.pdf</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Fujimoto</surname><given-names>K</given-names> </name><name name-style="western"><surname>Nishio</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sugiyama</surname><given-names>O</given-names> </name><etal/></person-group><article-title>Approach for named entity recognition and case identification implemented by Zukyo-JA sub-team at the NTCIR-16 real-medNLP task</article-title><year>2022</year><access-date>2024-02-10</access-date><conf-name>In: Proceedings of the 16th NTCIR conference on evaluation of information access technologies</conf-name><fpage>322</fpage><lpage>329</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://research.nii.ac.jp/ntcir/workshop/OnlineProceedings16/pdf/ntcir/06-NTCIR16-MEDNLP-FujimotoK.pdf">https://research.nii.ac.jp/ntcir/workshop/OnlineProceedings16/pdf/ntcir/06-NTCIR16-MEDNLP-FujimotoK.pdf</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mesk&#x00F3;</surname><given-names>B</given-names> </name></person-group><article-title>Prompt engineering as an important emerging skill for medical professionals: tutorial</article-title><source>J Med Internet Res</source><year>2023</year><month>10</month><day>4</day><volume>25</volume><fpage>e50638</fpage><pub-id pub-id-type="doi">10.2196/50638</pub-id><pub-id pub-id-type="medline">37792434</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Example of a prompt used as input for the LLM (Japanese original version), pseudo code illustrating the procedure for the rule-based processing, and data representation of extracted information based on the rule-based method.</p><media xlink:href="cancer_v11i1e57275_app1.docx" xlink:title="DOCX File, 731 KB"/></supplementary-material></app-group></back></article>