@Article{info:doi/10.2196/56774, author="Zhong, Jinjia and Zhu, Ting and Huang, Yafang", title="Reporting Quality of AI Intervention in Randomized Controlled Trials in Primary Care: Systematic Review and Meta-Epidemiological Study", journal="J Med Internet Res", year="2025", month="Feb", day="25", volume="27", pages="e56774", keywords="artificial intelligence", keywords="randomized controlled trial", keywords="reporting quality", keywords="primary care", keywords="meta-epidemiological study", abstract="Background: The surge in artificial intelligence (AI) interventions in primary care trials lacks a study on reporting quality. Objective: This study aimed to systematically evaluate the reporting quality of both published randomized controlled trials (RCTs) and protocols for RCTs that investigated AI interventions in primary care. Methods: PubMed, Embase, Cochrane Library, MEDLINE, Web of Science, and CINAHL databases were searched for RCTs and protocols on AI interventions in primary care until November 2024. Eligible studies were published RCTs or full protocols for RCTs exploring AI interventions in primary care. The reporting quality was assessed using CONSORT-AI (Consolidated Standards of Reporting Trials--Artificial Intelligence) and SPIRIT-AI (Standard Protocol Items: Recommendations for Interventional Trials--Artificial Intelligence) checklists, focusing on AI intervention--related items. Results: A total of 11,711 records were identified. In total, 19 published RCTs and 21 RCT protocols for 35 trials were included. The overall proportion of adequately reported items was 65\% (172/266; 95\% CI 59\%-70\%) and 68\% (214/315; 95\% CI 62\%-73\%) for RCTs and protocols, respectively. The percentage of RCTs and protocols that reported a specific item ranged from 11\% (2/19) to 100\% (19/19) and from 10\% (2/21) to 100\% (21/21), respectively. The reporting of both RCTs and protocols exhibited similar characteristics and trends. They both lack transparency and completeness, which can be summarized in three aspects: without providing adequate information regarding the input data, without mentioning the methods for identifying and analyzing performance errors, and without stating whether and how the AI intervention and its code can be accessed. Conclusions: The reporting quality could be improved in both RCTs and protocols. This study helps promote the transparent and complete reporting of trials with AI interventions in primary care. ", doi="10.2196/56774", url="https://www.jmir.org/2025/1/e56774", url="http://www.ncbi.nlm.nih.gov/pubmed/39998876" } @Article{info:doi/10.2196/65699, author="King, C. Abby and Doueiri, N. Zakaria and Kaulberg, Ankita and Goldman Rosas, Lisa", title="The Promise and Perils of Artificial Intelligence in Advancing Participatory Science and Health Equity in Public Health", journal="JMIR Public Health Surveill", year="2025", month="Feb", day="14", volume="11", pages="e65699", keywords="digital health", keywords="artificial intelligence", keywords="community-based participatory research", keywords="citizen science", keywords="health equity", keywords="societal trends", keywords="public health", keywords="viewpoint", keywords="policy makers", keywords="public participation", keywords="information technology", keywords="micro-level data", keywords="macro-level data", keywords="LLM", keywords="natural language processing", keywords="machine learning", keywords="language model", keywords="Our Voice", doi="10.2196/65699", url="https://publichealth.jmir.org/2025/1/e65699" } @Article{info:doi/10.2196/63476, author="Ahn, Seong-Ho and Yim, Kwangil and Won, Hyun-Sik and Kim, Kang-Min and Jeong, Dong-Hwa", title="Discovering Time-Varying Public Interest for COVID-19 Case Prediction in South Korea Using Search Engine Queries: Infodemiology Study", journal="J Med Internet Res", year="2024", month="Dec", day="16", volume="26", pages="e63476", keywords="COVID-19", keywords="confirmed case prediction", keywords="search engine queries", keywords="query expansion", keywords="word embedding", keywords="public health", keywords="case prediction", keywords="South Korea", keywords="search engine", keywords="infodemiology", keywords="infodemiology study", keywords="policy", keywords="lifestyle", keywords="machine learning", keywords="machine learning techniques", keywords="utilization", keywords="temporal variation", keywords="novel framework", keywords="temporal", keywords="web-based search", keywords="temporal semantics", keywords="prediction model", keywords="model", abstract="Background: The number of confirmed COVID-19 cases is a crucial indicator of policies and lifestyles. Previous studies have attempted to forecast cases using machine learning techniques that use a previous number of case counts and search engine queries predetermined by experts. However, they have limitations in reflecting temporal variations in queries associated with pandemic dynamics. Objective: This study aims to propose a novel framework to extract keywords highly associated with COVID-19, considering their temporal occurrence. We aim to extract relevant keywords based on pandemic variations using query expansion. Additionally, we examine time-delayed web-based search behavior related to public interest in COVID-19 and adjust for better prediction performance. Methods: To capture temporal semantics regarding COVID-19, word embedding models were trained on a news corpus, and the top 100 words related to ``Corona'' were extracted over 4-month windows. Time-lagged cross-correlation was applied to select optimal time lags correlated to confirmed cases from the expanded queries. Subsequently, ElasticNet regression models were trained after reducing the feature dimensions using principal component analysis of the time-lagged features to predict future daily case counts. Results: Our approach successfully extracted relevant keywords depending on the pandemic phase, encompassing keywords directly related to COVID-19, such as its symptoms, and its societal impact. Specifically, during the first outbreak, keywords directly linked to COVID-19 and past infectious disease outbreaks similar to those of COVID-19 exhibited a high positive correlation. In the second phase of the pandemic, as community infections emerged, keywords related to the government's pandemic control policies were frequently observed with a high positive correlation. In the third phase of the pandemic, during the delta variant outbreak, keywords such as ``economic crisis'' and ``anxiety'' appeared, reflecting public fatigue. Consequently, prediction models trained by the extracted queries over 4-month windows outperformed previous methods for most predictions 1-14 days ahead. Notably, our approach showed significantly higher Pearson correlation coefficients than models based solely on the number of past cases for predictions 9-11 days ahead (P=.02, P<.01, and P<.01), in contrast to heuristic- and symptom-based query sets. Conclusions: This study proposes a novel COVID-19 case-prediction model that automatically extracts relevant queries over time using word embedding. The model outperformed previous methods that relied on static symptom-based or heuristic queries, even without prior expert knowledge. The results demonstrate the capability of our approach to track temporal shifts in public interest regarding changes in the pandemic. ", doi="10.2196/63476", url="https://www.jmir.org/2024/1/e63476" } @Article{info:doi/10.2196/55856, author="Georgescu, Livia Alexandra and Cummins, Nicholas and Molimpakis, Emilia and Giacomazzi, Eduardo and Rodrigues Marczyk, Joana and Goria, Stefano", title="Screening for Depression and Anxiety Using a Nonverbal Working Memory Task in a Sample of Older Brazilians: Observational Study of Preliminary Artificial Intelligence Model Transferability", journal="JMIR Form Res", year="2024", month="Dec", day="12", volume="8", pages="e55856", keywords="depression", keywords="anxiety", keywords="Brazil", keywords="machine learning", keywords="n-back", keywords="working memory", keywords="artificial intelligence", keywords="gerontology", keywords="older adults", keywords="mental health", keywords="AI", keywords="transferability", keywords="detection", keywords="screening", keywords="questionnaire", keywords="longitudinal study", abstract="Background: Anxiety and depression represent prevalent yet frequently undetected mental health concerns within the older population. The challenge of identifying these conditions presents an opportunity for artificial intelligence (AI)--driven, remotely available, tools capable of screening and monitoring mental health. A critical criterion for such tools is their cultural adaptability to ensure effectiveness across diverse populations. Objective: This study aims to illustrate the preliminary transferability of two established AI models designed to detect high depression and anxiety symptom scores. The models were initially trained on data from a nonverbal working memory game (1- and 2-back tasks) in a dataset by thymia, a company that develops AI solutions for mental health and well-being assessments, encompassing over 6000 participants from the United Kingdom, United States, Mexico, Spain, and Indonesia. We seek to validate the models' performance by applying it to a new dataset comprising older Brazilian adults, thereby exploring its transferability and generalizability across different demographics and cultures. Methods: A total of 69 Brazilian participants aged 51-92 years old were recruited with the help of La{\c{c}}os Sa{\'u}de, a company specializing in nurse-led, holistic home care. Participants received a link to the thymia dashboard every Monday and Thursday for 6 months. The dashboard had a set of activities assigned to them that would take 10-15 minutes to complete, which included a 5-minute game with two levels of the n-back tasks. Two Random Forest models trained on thymia data to classify depression and anxiety based on thresholds defined by scores of the Patient Health Questionnaire (8 items) (PHQ-8) ?10 and those of the Generalized Anxiety Disorder Assessment (7 items) (GAD-7) ?10, respectively, were subsequently tested on the La{\c{c}}os Sa{\'u}de patient cohort. Results: The depression classification model exhibited robust performance, achieving an area under the receiver operating characteristic curve (AUC) of 0.78, a specificity of 0.69, and a sensitivity of 0.72. The anxiety classification model showed an initial AUC of 0.63, with a specificity of 0.58 and a sensitivity of 0.64. This performance surpassed a benchmark model using only age and gender, which had AUCs of 0.47 for PHQ-8 and 0.53 for GAD-7. After recomputing the AUC scores on a cross-sectional subset of the data (the first n-back game session), we found AUCs of 0.79 for PHQ-8 and 0.76 for GAD-7. Conclusions: This study successfully demonstrates the preliminary transferability of two AI models trained on a nonverbal working memory task, one for depression and the other for anxiety classification, to a novel sample of older Brazilian adults. Future research could seek to replicate these findings in larger samples and other cultural contexts. Trial Registration: ISRCTN Registry ISRCTN90727704; https://www.isrctn.com/ISRCTN90727704 ", doi="10.2196/55856", url="https://formative.jmir.org/2024/1/e55856" } @Article{info:doi/10.2196/58413, author="Chung, young Wou and Yoon, Jinsik and Yoon, Dukyong and Kim, Songsoo and Kim, Yujeong and Park, Eun Ji and Kang, Ae Young", title="Development and Validation of Deep Learning--Based Infectivity Prediction in Pulmonary Tuberculosis Through Chest Radiography: Retrospective Study", journal="J Med Internet Res", year="2024", month="Nov", day="7", volume="26", pages="e58413", keywords="pulmonary tuberculosis", keywords="chest radiography", keywords="artificial intelligence", keywords="tuberculosis", keywords="TB", keywords="smear", keywords="smear test", keywords="culture test", keywords="diagnosis", keywords="treatment", keywords="deep learning", keywords="CXR", keywords="PTB", keywords="management", keywords="cost effective", keywords="asymptomatic infection", keywords="diagnostic tools", keywords="infectivity", keywords="AI tool", keywords="cohort", abstract="Background: Pulmonary tuberculosis (PTB) poses a global health challenge owing to the time-intensive nature of traditional diagnostic tests such as smear and culture tests, which can require hours to weeks to yield results. Objective: This study aimed to use artificial intelligence (AI)--based chest radiography (CXR) to evaluate the infectivity of patients with PTB more quickly and accurately compared with traditional methods such as smear and culture tests. Methods: We used DenseNet121 and visualization techniques such as gradient-weighted class activation mapping and local interpretable model-agnostic explanations to demonstrate the decision-making process of the model. We analyzed 36,142 CXR images of 4492 patients with PTB obtained from Severance Hospital, focusing specifically on the lung region through segmentation and cropping with TransUNet. We used data from 2004 to 2020 to train the model, data from 2021 for testing, and data from 2022 to 2023 for internal validation. In addition, we used 1978 CXR images of 299 patients with PTB obtained from Yongin Severance Hospital for external validation. Results: In the internal validation, the model achieved an accuracy of 73.27\%, an area under the receiver operating characteristic curve of 0.79, and an area under the precision-recall curve of 0.77. In the external validation, it exhibited an accuracy of 70.29\%, an area under the receiver operating characteristic curve of 0.77, and an area under the precision-recall curve of 0.8. In addition, gradient-weighted class activation mapping and local interpretable model-agnostic explanations provided insights into the decision-making process of the AI model. Conclusions: This proposed AI tool offers a rapid and accurate alternative for evaluating PTB infectivity through CXR, with significant implications for enhancing screening efficiency by evaluating infectivity before sputum test results in clinical settings, compared with traditional smear and culture tests. ", doi="10.2196/58413", url="https://www.jmir.org/2024/1/e58413" } @Article{info:doi/10.2196/54246, author="Paiva, Bruno and Gon{\c{c}}alves, Andr{\'e} Marcos and da Rocha, Dutra Leonardo Chaves and Marcolino, Soriano Milena and Lana, Barbosa Fernanda Cristina and Souza-Silva, Rego Maira Viana and Almeida, M. Jussara and Pereira, Delfino Polianna and de Andrade, Valiense Claudio Mois{\'e}s and Gomes, Reis Ang{\'e}lica Gomides dos and Ferreira, Pires Maria Ang{\'e}lica and Bartolazzi, Frederico and Sacioto, Furtado Manuela and Boscato, Paula Ana and Guimar{\~a}es-J{\'u}nior, Henriques Milton and dos Reis, Pereira Priscilla and Costa, Roberto Fel{\'i}cio and Jorge, Oliveira Alzira de and Coelho, Reis Laryssa and Carneiro, Marcelo and Sales, Souza Tha{\'i}s Lorenna and Ara{\'u}jo, Ferreira Silvia and Silveira, Vit{\'o}rio Daniel and Ruschel, Brasil Karen and Santos, Veloso Fernanda Caldeira and Cenci, Almeida Evelin Paola de and Menezes, Monteiro Luanna Silva and Anschau, Fernando and Bicalho, Camargos Maria Aparecida and Manenti, Fernandes Euler Roberto and Finger, Goulart Renan and Ponce, Daniela and de Aguiar, Carrilho Filipe and Marques, Margoto Luiza and de Castro, C{\'e}sar Lu{\'i}s and Vietta, Gr{\"u}newald Giovanna and Godoy, de Mariana Frizzo and Vila{\c{c}}a, Nascimento Mariana do and Morais, Costa Vivian", title="A New Natural Language Processing--Inspired Methodology (Detection, Initial Characterization, and Semantic Characterization) to Investigate Temporal Shifts (Drifts) in Health Care Data: Quantitative Study", journal="JMIR Med Inform", year="2024", month="Oct", day="28", volume="12", pages="e54246", keywords="health care", keywords="machine learning", keywords="data drifts", keywords="temporal drifts", abstract="Background: Proper analysis and interpretation of health care data can significantly improve patient outcomes by enhancing services and revealing the impacts of new technologies and treatments. Understanding the substantial impact of temporal shifts in these data is crucial. For example, COVID-19 vaccination initially lowered the mean age of at-risk patients and later changed the characteristics of those who died. This highlights the importance of understanding these shifts for assessing factors that affect patient outcomes. Objective: This study aims to propose detection, initial characterization, and semantic characterization (DIS), a new methodology for analyzing?changes in health outcomes and variables over time while discovering contextual changes for outcomes in large volumes of data. Methods: The DIS methodology involves 3 steps: detection, initial characterization, and semantic characterization. Detection uses metrics such as Jensen-Shannon divergence to identify significant data drifts. Initial characterization offers a global analysis of changes in data distribution and predictive feature significance over time. Semantic characterization uses natural language processing--inspired techniques to understand the local context of these changes, helping identify factors driving changes in patient outcomes. By integrating the outcomes from these 3 steps, our results can identify specific factors (eg, interventions and modifications in health care practices) that drive changes in patient outcomes. DIS was applied to the Brazilian COVID-19 Registry and the Medical Information Mart for Intensive Care, version IV (MIMIC-IV) data sets. Results: Our approach allowed us to (1) identify drifts effectively, especially using metrics such as the Jensen-Shannon divergence, and (2) uncover reasons for the decline in overall mortality in both the COVID-19 and MIMIC-IV data sets, as well as changes in the cooccurrence between different diseases and this particular outcome. Factors such as vaccination during the COVID-19 pandemic and reduced iatrogenic events and cancer-related deaths in MIMIC-IV were highlighted. The methodology also pinpointed shifts in patient demographics and disease patterns, providing insights into the evolving health care landscape during the study period. Conclusions: We developed a novel methodology combining machine learning?and natural language processing techniques to detect, characterize, and understand temporal shifts in health care data. This understanding can enhance predictive algorithms, improve patient outcomes, and optimize health care resource allocation, ultimately?improving the effectiveness of machine learning predictive algorithms applied to health care data. Our methodology can be applied to a variety of scenarios beyond those discussed in this paper. ", doi="10.2196/54246", url="https://medinform.jmir.org/2024/1/e54246" } @Article{info:doi/10.2196/58358, author="Wagner, K. Jennifer and Doerr, Megan and Schmit, D. Cason", title="AI Governance: A Challenge for Public Health", journal="JMIR Public Health Surveill", year="2024", month="Sep", day="30", volume="10", pages="e58358", keywords="artificial intelligence", keywords="legislation and jurisprudence", keywords="harm reduction", keywords="social determinants of health", keywords="one health", keywords="AI", keywords="invisible algorithms", keywords="modern life", keywords="public health", keywords="engagement", keywords="AI governance", keywords="traditional regulation", keywords="soft law", doi="10.2196/58358", url="https://publichealth.jmir.org/2024/1/e58358" } @Article{info:doi/10.2196/57437, author="Dong, Xing-Xuan and Huang, Yueqing and Miao, Yi-Fan and Hu, Hui-Hui and Pan, Chen-Wei and Zhang, Tianyang and Wu, Yibo", title="Personality and Health-Related Quality of Life of Older Chinese Adults: Cross-Sectional Study and Moderated Mediation Model Analysis", journal="JMIR Public Health Surveill", year="2024", month="Sep", day="12", volume="10", pages="e57437", keywords="personality", keywords="health-related quality of life", keywords="older adults", keywords="sleep quality", keywords="quality of life", keywords="old", keywords="older", keywords="Chinese", keywords="China", keywords="mechanisms", keywords="psychology", keywords="behavior", keywords="analysis", keywords="hypothesis", keywords="neuroticism", keywords="mediation analysis", keywords="health care providers", keywords="aging", abstract="Background: Personality has an impact on the health-related quality of life (HRQoL) of older adults. However, the relationship and mechanisms of the 2 variables are controversial, and few studies have been conducted on older adults. Objective: The aim of this study was to explore the relationship between personality and HRQoL and the mediating and moderating roles of sleep quality and place of residence in this relationship. Methods: A total of 4123 adults 60 years and older were from the Psychology and Behavior Investigation of Chinese Residents survey. Participants were asked to complete the Big Five Inventory, the Brief version of the Pittsburgh Sleep Quality Index, and EQ-5D-5L. A backpropagation neural network was used to explore the order of factors contributing to HRQoL. Path analysis was performed to evaluate the mediation hypothesis. Results: As of August 31, 2022, we enrolled 4123 older adults 60 years and older. Neuroticism and extraversion were strong influencing factors of HRQoL (normalized importance >50\%). The results of the mediation analysis suggested that neuroticism and extraversion may enhance and diminish, respectively, HRQoL (index: $\beta$=?.262, P<.001; visual analog scale: $\beta$=?.193, P<.001) by increasing and decreasing brief version of the Pittsburgh Sleep Quality Index scores (neuroticism: $\beta$=.17, P<.001; extraversion: $\beta$=?.069, P<.001). The multigroup analysis suggested a significant moderating effect of the place of residence (EQ-5D-5L index: P<.001; EQ-5D-5L visual analog scale: P<.001). No significant direct effect was observed between extraversion and EQ-5D-5L index in urban older residents ($\beta$=.037, P=.73). Conclusions: This study sheds light on the potential mechanisms of personality and HRQoL among older Chinese adults and can help health care providers and relevant departments take reasonable measures to promote healthy aging. ", doi="10.2196/57437", url="https://publichealth.jmir.org/2024/1/e57437" } @Article{info:doi/10.2196/56993, author="Oyebola, Kolapo and Ligali, Funmilayo and Owoloye, Afolabi and Erinwusi, Blessing and Alo, Yetunde and Musa, Z. Adesola and Aina, Oluwagbemiga and Salako, Babatunde", title="Machine Learning--Based Hyperglycemia Prediction: Enhancing Risk Assessment in a Cohort of Undiagnosed Individuals", journal="JMIRx Med", year="2024", month="Sep", day="11", volume="5", pages="e56993", keywords="hyperglycemia", keywords="diabetes", keywords="machine learning", keywords="hypertension", keywords="random forest", abstract="Background: Noncommunicable diseases continue to pose a substantial health challenge globally, with hyperglycemia serving as a prominent indicator of diabetes. Objective: This study employed machine learning algorithms to predict hyperglycemia in a cohort of individuals who were asymptomatic and unraveled crucial predictors contributing to early risk identification. Methods: This dataset included an extensive array of clinical and demographic data obtained from 195 adults who were asymptomatic and residing in a suburban community in Nigeria. The study conducted a thorough comparison of multiple machine learning algorithms to ascertain the most effective model for predicting hyperglycemia. Moreover, we explored feature importance to pinpoint correlates of high blood glucose levels within the cohort. Results: Elevated blood pressure and prehypertension were recorded in 8 (4.1\%) and 18 (9.2\%) of the 195 participants, respectively. A total of 41 (21\%) participants presented with hypertension, of which 34 (83\%) were female. However, sex adjustment showed that 34 of 118 (28.8\%) female participants and 7 of 77 (9\%) male participants had hypertension. Age-based analysis revealed an inverse relationship between normotension and age (r=?0.88; P=.02). Conversely, hypertension increased with age (r=0.53; P=.27), peaking between 50?59 years. Of the 195 participants, isolated systolic hypertension and isolated diastolic hypertension were recorded in 16 (8.2\%) and 15 (7.7\%) participants, respectively, with female participants recording a higher prevalence of isolated systolic hypertension (11/16, 69\%) and male participants reporting a higher prevalence of isolated diastolic hypertension (11/15, 73\%). Following class rebalancing, the random forest classifier gave the best performance (accuracy score 0.89; receiver operating characteristic--area under the curve score 0.89; F1-score 0.89) of the 26 model classifiers. The feature selection model identified uric acid and age as important variables associated with hyperglycemia. Conclusions: The random forest classifier identified significant clinical correlates associated with hyperglycemia, offering valuable insights for the early detection of diabetes and informing the design and deployment of therapeutic interventions. However, to achieve a more comprehensive understanding of each feature's contribution to blood glucose levels, modeling additional relevant clinical features in larger datasets could be beneficial. ", doi="10.2196/56993", url="https://xmed.jmir.org/2024/1/e56993" } @Article{info:doi/10.2196/58158, author="Chen, Xi and Wang, Li and You, MingKe and Liu, WeiZhi and Fu, Yu and Xu, Jie and Zhang, Shaoting and Chen, Gang and Li, Kang and Li, Jian", title="Evaluating and Enhancing Large Language Models' Performance in Domain-Specific Medicine: Development and Usability Study With DocOA", journal="J Med Internet Res", year="2024", month="Jul", day="22", volume="26", pages="e58158", keywords="large language model", keywords="retrieval-augmented generation", keywords="domain-specific benchmark framework", keywords="osteoarthritis management", abstract="Background: The efficacy of large language models (LLMs) in domain-specific medicine, particularly for managing complex diseases such as osteoarthritis (OA), remains largely unexplored. Objective: This study focused on evaluating and enhancing the clinical capabilities and explainability of LLMs in specific domains, using OA management as a case study. Methods: A domain-specific benchmark framework was developed to evaluate LLMs across a spectrum from domain-specific knowledge to clinical applications in real-world clinical scenarios. DocOA, a specialized LLM designed for OA management integrating retrieval-augmented generation and instructional prompts, was developed. It can identify the clinical evidence upon which its answers are based through retrieval-augmented generation, thereby demonstrating the explainability of those answers. The study compared the performance of GPT-3.5, GPT-4, and a specialized assistant, DocOA, using objective and human evaluations. Results: Results showed that general LLMs such as GPT-3.5 and GPT-4 were less effective in the specialized domain of OA management, particularly in providing personalized treatment recommendations. However, DocOA showed significant improvements. Conclusions: This study introduces a novel benchmark framework that assesses the domain-specific abilities of LLMs in multiple aspects, highlights the limitations of generalized LLMs in clinical contexts, and demonstrates the potential of tailored approaches for developing domain-specific medical LLMs. ", doi="10.2196/58158", url="https://www.jmir.org/2024/1/e58158", url="http://www.ncbi.nlm.nih.gov/pubmed/38833165" }