@article {1675061,
	title = {Machine Learning and Prediction of All-Cause Mortality in COPD},
	journal = {Chest},
	volume = {158},
	number = {3},
	year = {2020},
	month = {2020 Sep},
	pages = {952-964},
	abstract = {BACKGROUND: COPD is a leading cause of mortality.

RESEARCH QUESTION: We hypothesized that applying machine learning to clinical and quantitative CT imaging features would improve mortality prediction in COPD.

STUDY DESIGN AND METHODS: We selected 30 clinical, spirometric, and imaging features as inputs for a random survival forest. We used top features in a Cox regression to create a machine learning mortality prediction (MLMP) in COPD model and also assessed the performance of other statistical and machine learning models. We trained the models in subjects with moderate to severe COPD from a subset of subjects in Genetic Epidemiology of COPD (COPDGene) and tested prediction performance in the remainder of individuals with moderate to severe COPD in COPDGene and Evaluation of COPD Longitudinally to Identify Predictive Surrogate Endpoints (ECLIPSE). We compared our model with the BMI, airflow obstruction, dyspnea, exercise capacity (BODE) index; BODE modifications; and the age, dyspnea, and airflow obstruction index.

RESULTS: We included 2,632 participants from COPDGene and 1,268 participants from ECLIPSE. The top predictors of mortality were 6-min walk distance, FEV1 \%~predicted, and age. The top imaging predictor was pulmonary artery-to-aorta ratio. The MLMP-COPD model resulted in a C index~>= 0.7 in both COPDGene and ECLIPSE (6.4- and 7.2-year median follow-ups, respectively), significantly better than all tested mortality indexes (P~\&lt; .05). The MLMP-COPD model had fewer predictors but similar performance to that of other models. The group with the highest BODE scores (7-10) had 64\%~mortality, whereas the highest mortality group defined by the MLMP-COPD model had 77\%~mortality (P~= .012).

INTERPRETATION: An MLMP-COPD model outperformed four existing models for predicting all-cause mortality across two COPD cohorts. Performance of machine learning was similar to that of traditional statistical methods. The model is available online at: https://cdnm.shinyapps.io/cgmortalityapp/.},
	keywords = {Cause of Death, Female, Humans, machine learning, Male, Middle Aged, Predictive Value of Tests, Pulmonary Disease, Chronic Obstructive, Respiratory Function Tests},
	issn = {1931-3543},
	doi = {10.1016/j.chest.2020.02.079},
	author = {Moll, Matthew and Qiao, Dandi and Regan, Elizabeth A and Hunninghake, Gary M and Make, Barry J and Tal-Singer, Ruth and McGeachie, Michael J and Castaldi, Peter J and Estepar, Raul San Jose and Washko, George R and Wells, James M and LaFon, David and Strand, Matthew and Bowler, Russell P and Han, MeiLan K and Vestbo, Jorgen and Celli, Bartolome and Calverley, Peter and Crapo, James and Silverman, Edwin K and Hobbs, Brian D and Cho, Michael H}
}