Use of causal DAG and regression analysis to understand and predict complicated osteoarticular infection in children

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 47,606 characters · extracted from preprint-html · click to expand
Use of causal DAG and regression analysis to understand and predict complicated osteoarticular infection in children | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Use of causal DAG and regression analysis to understand and predict complicated osteoarticular infection in children View ORCID Profile Patrick H. Cahill , Aleisha Anderson , Daniel Yeoh , Matthew O’Brien , Timothy Robertson , Matthew Clifford , Cazz Finnucane , Andrew Martin , Katherine Stannage , View ORCID Profile Christopher Blyth , Julie Marsh , Asha C. Bowen , Tom Snelling , Charlie McLeod , View ORCID Profile Yue Wu doi: https://doi.org/10.1101/2025.06.26.25330329 Patrick H. Cahill 1 Sydney School of Public Health, Faculty of Medicine and Health, Edward Ford Building (A27), University of Sydney NSW 2006 , Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Patrick H. Cahill Aleisha Anderson 2 Infectious Diseases Department, Perth Children’s Hospital , Nedlands, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Daniel Yeoh 2 Infectious Diseases Department, Perth Children’s Hospital , Nedlands, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Matthew O’Brien 3 Monash Children’s Hospital , Melbourne, VIC, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Timothy Robertson Find this author on Google Scholar Find this author on PubMed Search for this author on this site Matthew Clifford 4 Department of Radiology, Perth Children’s Hospital , Nedlands, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Cazz Finnucane 5 Wesfarmers Centre of Vaccines and Infectious Diseases, The Kid Research Institute of Australia , Nedlands, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Andrew Martin 6 Department of General Paediatrics, Perth Children’s Hospital , Nedlands, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Katherine Stannage 7 Department of Orthopaedics, Perth Children’s Hospital , Nedlands, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Christopher Blyth 2 Infectious Diseases Department, Perth Children’s Hospital , Nedlands, Australia 5 Wesfarmers Centre of Vaccines and Infectious Diseases, The Kid Research Institute of Australia , Nedlands, Australia 8 Centre for Child Health Research, School of Medicine, University of Western Australia , Nedlands, Australia 9 Department of Microbiology, QEII Pathwest Laboratory , Nedlands, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Christopher Blyth Julie Marsh 5 Wesfarmers Centre of Vaccines and Infectious Diseases, The Kid Research Institute of Australia , Nedlands, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Asha C. Bowen 2 Infectious Diseases Department, Perth Children’s Hospital , Nedlands, Australia 5 Wesfarmers Centre of Vaccines and Infectious Diseases, The Kid Research Institute of Australia , Nedlands, Australia 8 Centre for Child Health Research, School of Medicine, University of Western Australia , Nedlands, Australia 10 Menzies School of Health Research, Charles Darwin University , Tiwi, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tom Snelling 1 Sydney School of Public Health, Faculty of Medicine and Health, Edward Ford Building (A27), University of Sydney NSW 2006 , Australia 5 Wesfarmers Centre of Vaccines and Infectious Diseases, The Kid Research Institute of Australia , Nedlands, Australia 10 Menzies School of Health Research, Charles Darwin University , Tiwi, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Charlie McLeod 1 Sydney School of Public Health, Faculty of Medicine and Health, Edward Ford Building (A27), University of Sydney NSW 2006 , Australia 2 Infectious Diseases Department, Perth Children’s Hospital , Nedlands, Australia 5 Wesfarmers Centre of Vaccines and Infectious Diseases, The Kid Research Institute of Australia , Nedlands, Australia 8 Centre for Child Health Research, School of Medicine, University of Western Australia , Nedlands, Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yue Wu 1 Sydney School of Public Health, Faculty of Medicine and Health, Edward Ford Building (A27), University of Sydney NSW 2006 , Australia Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Yue Wu For correspondence: yue.wu1{at}sydney.edu.au Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Background While osteoarticular infections of the bones and joints in children usually resolve completely with adequate treatment, some children will develop complications. Predicting which children are at highest risk of complications might enable prevention through more aggressive treatment. Methods We used mutual information and Kullback-Leibler divergence methods to compare the characteristics of osteoarticular infections at the same tertiary institution in Australia in two time periods, 2002-2007 (N=295) and 2016-2018 (N=192). We used expert knowledge to develop a causal directed acyclic graph (DAG) that depicts the mechanistic pathways of osteoarticular infections and their progression to complications. Guided by the DAG, we developed three logistic regression (LR) models for predicting complications and evaluated their area under curve (AUC) to assess their performance for predicting complication. Results We observed a shift in the approach to diagnostic testing over the two time periods, with an increase in the number of blood cultures performed and a decrease in the rate of wound cultures. Fourteen of 43 test PCR tests (33%) for K. kingae recorded positive results. The established causal DAG clarified how the underlying, latent and dynamic biological processes become manifest as data. Utilising only data available at the initial point of care, the best LR model identified an optimal feature set that achieved an AUC of 0.85 for predicting complications. Conclusions Supported by domain expert knowledge and data, causal and statistical approaches were combined to offer valuable insights for predicting progression to complicated disease for children with osteoarticular infections. Key messages This study facilitates the understanding of change in epidemiology and clinical management of paediatric osteomyelitis and septic arthritis by examining a prospective cohort of 295 cases (2016-2018) in comparison to a similar study collected at an earlier date (2002-2007) from the same facility. We present a clarified understanding of the mechanistic pathways of osteoarticular infections and their progression to complications through the development of a causal directed acyclic graph (DAG) based on previously published DAG, recently collected data and domain expert knowledge. We demonstrate an approach to combine causal DAG, BN-based predictions of the causative pathogen, logistic regression modelling, and recursive feature elimination to achieve optimal performance in predicting complicated disease. BACKGROUND Infections of the bone (osteomyelitis) and joints (septic arthritis) are collectively termed osteoarticular infection and occur in 4.3 per 100,000 Australian children per annum ( 1 ). The incidence among children from regional areas is estimated to be higher ( 2 ). While most children respond well to antimicrobial therapy, approximately 20% of osteoarticular infections require surgery ( 3 ) or extended hospitalisation ( 4 , 5 , 6 ). Some children develop chronic pain and permanent disability ( 5 ). Osteoarticular infections are typically bacterial. Certain bacterial pathogen(s) like methicillin resistant Staphylococcus aureus (MRSA) have been implicated in complicated disease ( 7 ), however a pathogen is only identified in approximately half of cases ( 6 ). Staphylococcus aureus accounts for ∼60% of culture positive cases across all ages ( 5 ). Kingella kingae is now recognised as an important pathogen in pre-school-aged children ( 8 ). The epidemiology of osteoarticular infections has changed over time, with antimicrobial resistant strains becoming more prevalent ( 9 ). Early pathogen identification helps to support appropriate antibiotic selection in a timely manner ( 10 , 3 , 11 , 8 , 12 ) in order to avoid poor clinical outcomes. The pathogen causing osteoarticular infection is traditionally identified by culturing blood, wounds, bone or joint tissue and/or joint fluid; more recently molecular methods have become available using PCR. Blood cultures have low sensitivity due to the low frequency of detectable bacteraemia in osteoarticular infection ( 11 , 13 , 14 ). Bone, tissue and fluid cultures are more sensitive but require invasive sampling procedures ( 15 ). Superficial wound samples yield a high frequency of false positive culture results owing to contamination of the biological specimen with normal skin flora ( 16 , 17 ). Molecular methods can be more useful for identifying difficult-to-culture (fastidious) bacterial pathogens but are not uniformly available across different healthcare settings ( 18 , 19 ). Attempts to identify a causative pathogen(s) can be challenging, time-consuming, and is often ultimately unsuccessful. Antimicrobial therapy is usually commenced empirically prior to knowing the outcome of diagnostic tests. Causal Bayesian Networks (BN) depict the assumed causal mechanisms which underlie the clinical problem with a DAG, quantifying the strength of each causal relationship with conditional probabilities informed by expert knowledge and/or data-driven algorithms ( 14 ). BNs are a potentially valuable method for aggregating available clinical and laboratory patient data to provide an optimal estimate of the causative pathogen of infection ( 20 , 21 , 22 , 23 , 24 , 25 ) at the point of care. This could potentially improve empiric antibiotic selection and therefore patient outcomes. A simple BN can be used to explicitly model the mechanism underlying infection ( 26 , 27 , 28 ): namely, the relationships between demographic variables, a latent (unobserved) pathogen, various signs and symptoms, and test results. We have developed a BN model previously that reliably predicted the causative pathogen(s) for children with osteoarticular infections ( 11 ). This study aims to inform clinical management of osteoarticular infections in children by predicting who’s more likely to develop complications based on data available to clinicians at the time of initial hospital presentations. By comparing data collected at one hospital in two time-periods (2002-2007) ( 6 ) and (2016-2018), we describe the changing epidemiology and clinical management of osteoarticular infections. Guided by domain knowledge and data, we developed a causal DAG to depict the mechanism of osteoarticular infection, its management and complications. Upon clarification of the disease mechanisms and data generation process, we implemented a method to detect a robust set of features for predicting progression to complicated disease utilising both recently collected data and causative pathogens predicted by our previously published BN ( 11 ) in a regression model. METHODS In this section, we describe the collection and utilisation of data in understanding the epidemiology and clinical management of paediatric osteoarticular infections presenting to the tertiary paediatric referral hospital in Perth, Western Australia, over two time-periods. Data was collected between 2002-2007 (denoted hereafter as OM2002) ( 6 ) and 2016-2018 as part of the Western Australian Register of Septic Arthritis and Bone Infections prospective cohort study (WARSABI). OM2002 dataset OM2002 comprised children aged 3 months to <16 years old hospitalised at Princess Margaret Hospital (PMH) with a clinical diagnosis of acute haematological osteomyelitis between 1st of September 2002 and 31st of August 2007. Further details regarding the epidemiologic features of this cohorts are described by Martin et al ( 6 ). WARSABI dataset The WARSABI cohort comprised children 0 to <18 years old hospitalised with osteoarticular infection at PMH or Perth Children’s Hospital (PCH) between the 18th of July 2016 and the 17th of July 2018. Eligibility criteria included children with confirmed or presumed osteoarticular infection based on consistent symptoms and signs with or without supporting laboratory and radiological evidence. Prosthetic infections were included. Children who were found to have an alternative diagnosis (such as malignancy) were excluded. A study nurse prospectively screened the emergency department admission log every weekday using the following terms: ‘osteomyelitis’, ‘arthritis’, ‘limping’, ‘pain’, ‘infection’, ‘effusion’, ‘ataxia’, and ‘bursitis’. Cases were also identified by notification of culture confirmed cases directly from laboratory or healthcare staff to a study investigator. Potentially eligible participants were provided with a hardcopy participant information sheet and written informed consent was obtained from guardians and participants ≥7 years old. Clinical data were collected on standardized case report forms and entered into a secure electronic database (REDCap) hosted at PMH/PCH ( 29 , 30 ). Participants provided demographic, presentation, prior treatment and comorbidity details at study entry. Subsequently, clinical and treatment data including surgical procedures were obtained from the medical records, and microbiological data and radiological results were obtained from the hospital’s clinical information system. Children were followed up until 12 months to ascertain clinical and functional outcomes. Cases were classified as community-acquired or healthcare-associated and as surgical site infections according to previously defined criteria ( 31 ). Infections were categorised according to the duration of symptoms before presentation: acute ( < 2 weeks), sub-acute (2 weeks to < 3 months), or chronic (≥3 months). The Bayesian network model The BN tested in this paper was trained using the data from OM2002, and its development has been detailed previously ( 11 ). This BN models the relationship between patient demographics, culture results and the sensitivity/specificity of those culture results with respect to the underlying latent causative pathogen of the infection. The structure of the model is shown in Figure 1 . Download figure Open in new tab Figure 1: BN for osteoarticular infections. Data variables collected from WARSABI participants were categorised into two broad categories: BN variables and Other variables , i.e. , variables referenced by the BN versus those which were not, respectively. For a given participant, the computed probability of each causative pathogen was conditioned on the observed values of demographic and laboratory BN variables for that participant. These probabilities were denoted BN pathogen prediction . Table 1 describes each BN variable and the prediction in detail, including any updates from ( 11 ), from which the model was adapted. View this table: View inline View popup Table 1: Complicated disease definition and BN variable definitions including any differences between WARSABI and OM2002. Changes in disease profile In the absence of a consensus definition for ‘complicated’ osteoarticular infection, we adopted a pragmatic definition defined in Table 1 that is similar to previous published definitions ( 6 , 32 ) and which captures clinically important outcomes. With a robust network structure, small departures in the underlying epidemiology of osteoarticular infection from the training set should not significantly affect the performance of BNs ( 33 ), but large departures could do so especially if the BN conditional probability tables (CPTs) are not re-learned ( 23 ). Therefore, we compared the distribution of the baseline variables between OM2002 and WARSABI by investigating their Kullback-Leibler (KL) divergence and by assessing the changes in the mutual information between those variables in each cohort. We considered the mutual information matrix of the OM2002, WARSABI and synthetic data drawn from the BN distribution as well as a random distribution for comparison. The synthetic dataset was produced from the BN described in ( 11 ) using the Netica ( https://norsys.com/ ) application to generate cases that matched the probability distribution. The random dataset was produced similarly with randomised CPTs. Each entry in the matrix is the mutual information between the variable in the column and the variable in the row. We present the ℓ 2 norm distance between each mutual information matrix. Development of a causal DAG for osteoarticular infection Adapting the structure of the BN presented in Figure 1 , we developed a causal DAG to depict the mechanistic processes and clinical pathway for children with osteoarticular infection from initial hospital presentation to final follow-up. We identified 5 relevant time steps: a child with osteoarticular infection presents to the hospital at time t −1 , is admitted to hospital at t 0 and the results of their cultures and other tests become available at t 1. The child is discharged from hospital at t 2 and is followed until t 3 , say one year after discharge. Each variable was sorted into these time steps based on when data is likely to become available to the clinician. From this we constructed the model through expert consultation, including the following latent variables: "causative pathogen(s)" which corresponds to the true infecting organism (i.e. the aetiology), and "severity" of the inflammatory process at the different time steps. We also included "perceived pathogen" to represent the clinician’s evolving ‘working diagnosis’ of the causative pathogen. This informs their decisions regarding testing and treatment over time, possibly giving rise to a "change in antibiotic therapy". Although not captured in the datasets, it was considered important to understanding how beliefs and observed evidence inform management decisions. Supplementary Table S1 shows the definitions and corresponding WARSABI variables in detail. The DAG described in this paper were created using the GeNIe Modeler, https://www.bayesfusion.com/ . To investigate whether the BN pathogen prediction had value for predicting complications, we established three logistic regression models for predicting complications based on WARSABI data, each one only using data available at or before the time of admission, t 1 . The first model, denoted L BN , included only the BN variables , without access to the BN pathogen prediction The second model, denoted L probs , included only the BN pathogen prediction which represents the BN variables data processed by the BN, i.e. the estimated probabilities of each causative pathogen. Thirdly, we developed a model using all variables up to t 1 , i.e. both the BN variables and the Other variables . To avoid overfitting of this model, we employed recursive feature elimination (RFE) to identify a small subset of the most predictive variables ( 34 , 35 ). The mean AUC score across 10 iterations was calculated for 5-fold cross validated models trained on every available variable using an ‘lbgfs’ model with an ℓ 2 penalty. We then recorded the mean score again with one variable removed and repeated this across every variable. We created a subset containing all the variables except the one that least reduced (or most improved) the performance. We repeated this process until no variables remained. We selected the feature set that had the optimal mean AUC-score across this elimination procedure and denoted this the optimal set bf RFE and its corresponding logistic regression model as L RFE . We report the receiver operating characteristic (ROC) curves and mean area under curve (AUC) results with respect to predicting complicated disease for the three models, as well as the mean model parameters, mean standard errors and corresponding feature odds ratios over each iteration. All categorical variables were dummy-encoded, and the data was scaled so each variable possessed a zero mean and unit variance. Reported coefficients and errors corresponded to the scaled values. RESULTS WARSABI epidemiology Of the 192 children in the WARSABI cohort, 35% (N=67) were categorised as having complicated disease, of which 76% (N=51) were categorised as complicated because the patient required a single non-diagnostic surgical debridement. WARSABI and OM2002 data comparison At least one risk factor was present in 93% (N=178) of cases in the WARSABI dataset but risk factors were not reported for OM2002 ( 6 ). In OM2002, comorbidities were recorded free-text rather than coded. Error! Reference source not found. shows a comparison of demographics, inflammatory markers, risk factors and comorbidities. The proportion of children with a blood culture result was 66% (N=195) in OM2002 and 75% (N=144) in WARSABI; the proportion with a wound culture results was 23% (N=67) and 15% (N=29), respectively, and the proportion with culture of specimens other than blood was 24% (N=72) and 31% (N=59), respectively. The proportion of children with no culture results was 23% (N=69) and 16% (N=31), respectively. In WARSABI, a PCR test for K. kingae was conducted in 22% (N=43) cases; of these, 32% (N=14) were positive. No PCR results were reported for OM2002 ( 6 , 11 ). View this table: View inline View popup Table 2: Comparison of the WARSABI data and the OM2002 data Figure 2 shows the mutual information and Kullback-Leibler divergence between the OM2002, WARSABI, synthetic and randomised datasets. Download figure Open in new tab Figure 2: Comparison of the WARSABI data with the BN and the OM2002 dataset. Top : ℓ2 distance between MI matrices for BN, datasets and random BN. Bottom : Row KL divergence between WARSABI data and BN, using a log scale. Each column represents a different row of the CPT and is coloured according to which variable the CPT row corresponds. Causal DAG of osteoarticular infection Figure 3 depicts the causal pathways underlying osteoarticular infection in children who present to hospital. The DAG consists of 96 variables in total, with 35 relating to background factors (blue), 11 relating to the mechanistic disease pathway (purple), 6 relating to management (pink), 33 relating to an overlap between disease and management (green), and 11 relating to disease outcomes (red). Complicated disease is defined by the status of the red outcome nodes. Download figure Open in new tab Figure 3: Causal DAG of osteoarticular infections, its management and complication. Data-driven predictors of complicated disease We identified a set of 64 variables collected at (or before) t 1 which are therefore likely to be available to the clinician at admission ( Figure 3 ). See Table S1 for a full list and explanation of each variable included in this set. We performed recursive feature elimination as depicted in Figure 4 to produce an optimal feature set of variables for prediction of progression to complicated disease corresponding to the outcome sub-models occurring after t 1 in Figure 3 . We found that the optimal feature set, bf RFE , was: Others evidence, admitting team, up-to-date immunisations, bone fractures, erythema of the overlying skin , joint immobility at admission and creatinine at admission . Figure 4 shows the AUC score for each feature set by the feature set size. Similar results were observed when using an alternative (log-loss) scoring function to the AUC score (Figure S1). Table 3 presents the odds ratio and 95% confidence interval for each variable across the three models. View this table: View inline View popup Table 3: Odds ratio (OR) with 95% confidence interval of L RFE , L Probs and L BN for each variable in each model. Variables are indicated in bold. For categorical variables, each value is indicated with italics. Download figure Open in new tab Figure 4: Recursive feature elimination results on WARSABI dataset. Left: Mean AUC over 10 iterations of 5-folds feature reduction leads to the following score against the size of the remaining feature set. Each scatter point gives the performance of the model after a specific variable was removed, while the black line shows the best performing feature set for each feature set length. The scatter points that lie on the line reflect the variable that was chosen for removal by the recursive feature elimination algorithm. Right: ROC curves comparing the performance of the model trained on the optimal feature set, L RFE ; performance of the model trained on the BN variables, L BN ; and the performance of the model trained on the causative pathogen probabilities, P probs . CONCLUSIONS We presented differences in the characteristics of children admitted with osteoarticular infections to the same paediatric hospital in Australia over two time periods, approximately one decade apart. We have developed a causal DAG modelling the clinical pathway of an osteoarticular infection, which enabled the implementation of a new model for investigating the performance of Bayesian networks, using them to predict progression to complicated disease. We also implemented a method to detect a robust set of features for predicting progression to complicated disease. The dynamic nature of this bacterial infection requires an understanding of the potential shifts in its epidemiology or clinical features over time. The demographic characteristics of patients in the WARSABI cohorts were similar to OM2002 and, although the proportion of cases from remote areas increased. Figure 2 ( Figure 2 ) indicate that the differences between the BN and both the OM2002 dataset and the WARSABI dataset were less than the differences between the datasets themselves. This suggests that our BN, developed using expert-solicited opinion in combination with the OM2002 cohort data, effectively captured the relationships between the variables of interest, aiding the prediction of children at risk of disease complication at the time of admission. Our causal DAG ( Figure 3 ) proved useful in several ways. First, it helped clarify our definition of complicated disease ( Table 1 ). For example, earlier draft definitions of included the presence of bacteraemia, however analysis of the DAG illustrates how this could only be realised if blood cultures were taken. Inclusion of blood culture result could overestimate the predictive value of our model. Secondly, it also helped to establish the set of variables used in the recursive feature elimination process by clarifying which variables (evidence) would be available to clinicians soon at the time of admission and therefore usable in a clinical context. We found that incorporating the BN pathogen prediction into a logistic regression model for predicting complicated disease improved the model beyond use of the BN variables alone. This provides support to the validity of our causal DAG ( Figure 3 ) which assumes a key role for the mostly latent (unobserved) causative pathogen in driving patient outcomes, and suggests the information encoded in the BN remains informative even after later observations become available and despite small departures from the training set relationships which may have occurred. We implemented recursive feature elimination to create an optimal feature set of variables for predicting progression to complicated disease. We found that the optimal set improved upon a regression model comprising the BN pathogen prediction alone with an AUC=0.84 under 5-fold cross-validation compared to an AUC=0.79. Moreover, we suggest that the identified variables are not only predictive, they may be causally important. Immunosuppression, bone fracture, postoperative infection, preceding URTI and preceding antibiotics lie in the disease-related risk factors node of the causal DAG. Others evidence lies in the culture results node. Concurrent SSTI lies the disease-related comorbidities node. Joint immobility lies in preceding history. Respiratory rate lies in inflammation markers. Each node is a parent or a child of the latent severity at admission and the causative pathogen nodes. Both these unobserved nodes are hypothesised to drive patient outcomes and therefore, progression (or not) to complicated disease. It is plausible that some of the variables in the optimal set have predictive value because they provide evidence on these latent factors. While immunisation lies on the causal pathway, failure to document the immunisation status predicted complications rather than under-immunisation per se ; the mechanisms which might explain the relationship between completeness of documentation and the risk of complications remain should be explored. A limitation of the work presented here is that while we have provided evidence that the BN encodes relevant information, we have not quantified how much the epidemiology and management of osteoarticular infection has changed. The strength of this work is it provides interpretable measurements of the performance of a BN for modelling new data. Moreover, we have developed an effective method for producing an optimal feature set for the prediction of future outcomes. It has the advantage that it can be applied for any classification task that requires one to predict an outcome that occurs after admission, i.e after t 1 in the causal DAG in Figure 3 . In future work we will include treating the variables identified by the RFE process as candidates for inclusion in developing model-based clinical decision support tools for managing osteoarticular infections, for example, extending the structure of the simple BN as well as update the existing BN parameters using the WARSABI data. Ethics Ethics approval for the data collection in this study was obtained from the Child and Adolescent Health Service Human Research Ethics Committee (2016032EP). Ethics approval for the development of relevant causal and statistical models was obtained from the Sydney Children’s Hospitals Network Human Research Ethics Committee (2020/ETH02510). Data availability The data underlying this article cannot be shared publicly due to the privacy of individuals that participated in the study. Supplementary data Supplementary data may be available upon request, subject to ethics approval. Author contributions PC, YW, TS and CM contributed to the design of this project. CM, JM, AM, CB, AB, AM, KS and TS contributed to the design of the WARSABI study and CM led this study. TR, MO, AA, CF, MC, KS, CB, AB, TS and CM were involved in the implementation of the WARSABI study. PC and YW performed the analyses for this project. PC drafted the initial manuscript. All authors contributed and have approved the final version of the manuscript. Use of artificial intelligence tools No AI tools were used to produce this article. Funding The WARSABI study was supported by the Perth Children’s Hospital Foundation New Investigator Grant (2016, CIA McLeod) and a Wesfarmers Centre of Vaccines and Infectious Diseases Seed Grant (2016). Conflict of interest None declared. Footnotes ↵ + Joint senior Author This was revised in order to fix the rendering of Figure 3. References 1. ↵ Hunter S , Chan H , Baker JF . Global epidemiology of childhood bone and joint infection: a systematic review . Infection . 2022 ; 50 : 329 – 341 . OpenUrl PubMed 2. ↵ Brischetto A , Leung G , Marshall CS , Bowen AC . A retrospective case-series of children with bone and joint infection from Northern Australia . Medicine . 2016 ; 95 : e2885 . OpenUrl PubMed 3. ↵ Le Saux N . Diagnosis and management of acute osteoarticular infections in children . Paediatrics & child health . 2018 ; 23 : 336 – 343 . OpenUrl PubMed 4. ↵ Grammatico-Guillon L , Maakaroun Vermesse Z , Baron S , Gettner S , Rusch E , Bernard L . Paediatric bone and joint infections are more common in boys and toddlers: a national epidemiology study . Acta Paediatrica . 2013 ; 102 : e120 – e125 . OpenUrl CrossRef PubMed 5. ↵ Colston J , Atkins B . Bone and joint infection . Clinical Medicine . 2018 ; 18 : 150 – 154 . OpenUrl Abstract / FREE Full Text 6. ↵ Martin AC , Anderson D , Lucey J , Guttinger R , Jacoby PA , Mok TJ , et al. Predictors of outcome in pediatric osteomyelitis: five years experience in a single tertiary center . The Pediatric infectious disease journal . 2016 ; 35 : 387 – 391 . OpenUrl PubMed 7. ↵ Hawkshead III JJ , Patel NB , Steele RW , Heinrich SD . Comparative severity of pediatric osteomyelitis attributable to methicillin-resistant versus methicillin-sensitive Staphylococcus aureus . Journal of Pediatric Orthopaedics . 2009 Jan ; 29 ( 1 ): 85 – 90 . OpenUrl CrossRef PubMed 8. ↵ Borg MA , Camilleri L . What is driving the epidemiology of methicillin-resistant Staphylococcus aureus infections in Europe? Microbial Drug Resistance . 2021 . 9. ↵ Cameron JK , Hall L , Tong SYC , Paterson DL , Halton K . Incidence of community onset MRSA in Australia: least reported where it is Most prevalent . Antimicrobial Resistance & Infection Control . 2019 ; 8 : 1 – 9 . OpenUrl PubMed 10. ↵ Arnold JC , Bradley JS . Osteoarticular infections in children . Infectious Disease Clinics . 2015 ; 29 : 557 – 574 . OpenUrl PubMed 11. ↵ Wu Y , McLeod C , Blyth C , Bowen A , Martin A , Nicholson A , et al. Predicting the causative pathogen among children with osteomyelitis using Bayesian networks–improving antibiotic selection in clinical practice . Artificial Intelligence in Medicine . 2020 ; 107 : 101895 . 12. ↵ Gentry LO . Antibiotic therapy for osteomyelitis . Infectious Disease Clinics of North America . 1990 ; 4 : 485 – 499 . OpenUrl PubMed 13. ↵ Peltola H , Pääkkönen M , Kallio P , Kallio MJT , Group OA(MSS. Prospective, randomized trial of 10 days versus 30 days of antimicrobial treatment, including a short-term course of parenteral therapy, for childhood septic arthritis . Clinical Infectious Diseases . 2009 ; 48 : 1201 – 121 . OpenUrl CrossRef PubMed Web of Science 14. ↵ Pollino CA , Woodberry O , Nicholson A , Korb K , Hart BT . Parameterisation and evaluation of a Bayesian network for use in an ecological risk assessment . Environmental Modelling & Software . 2007 ; 22 : 1140 – 1152 . OpenUrl 15. ↵ Howard CB , Einhorn M , Dagan R , Yagupski P , Porat S . Fine-needle bone biopsy to diagnose osteomyelitis . The Journal of Bone & Joint Surgery British Volume . 1994 ; 76 : 311 – 314 . OpenUrl PubMed 16. ↵ Vemu L , Sudhaharan S , Mamidi N , Chavali P . Need for appropriate specimen for microbiology diagnosis of chronic osteomyelitis . Journal of Laboratory Physicians . 2018 ; 10 : 021 – 025 . OpenUrl 17. ↵ Senneville E , Melliez H , Beltrand E , Legout L , Valette M , Cazaubie M , et al. Culture of percutaneous bone biopsy specimens for diagnosis of diabetic foot osteomyelitis: concordance with ulcer swab cultures . Clinical infectious diseases . 2006 ; 42 : 57 – 62 . OpenUrl CrossRef PubMed Web of Science 18. ↵ Ceroni D , Cherkaoui A , Ferey S , Kaelin A , Schrenzel J . Kingella kingae osteoarticular infections in young children: clinical features and contribution of a new specific real-time PCR assay to the diagnosis . Journal of Pediatric Orthopaedics . 2010 ; 30 : 301 – 304 . OpenUrl CrossRef PubMed Web of Science 19. ↵ Yagupsky P , Porsch E , St Geme III JW. Kingella kingae: an emerging pathogen in young children . Pediatrics . 2011 ; 127 : 557 – 565 . OpenUrl CrossRef PubMed Web of Science 20. ↵ Lucas PJF , Van der Gaag LC , Abu-Hanna A. Bayesian networks in biomedicine and health-care . 2004 .. 21. ↵ Pearl J . Embracing causality in default reasoning . Artificial Intelligence . 1988 ; 35 : 259 – 271 . OpenUrl CrossRef Web of Science 22. ↵ Pearl J. Graphical Models for Probabilistic and Causal Reasoning . In Quantified Representation of Uncertainty and Imprecision . Dordrecht : Springer Netherlands ; 1998 . p. 367 – 389 . 23. ↵ Andreassen S , Riekehr C , Kristensen B , Schønheyder HC , Leibovici L . Using probabilistic and decision– theoretic methods in treatment and prognosis modeling . Artificial Intelligence in Medicine . 1999 ; 15 : 121 – 134 . OpenUrl CrossRef PubMed Web of Science 24. ↵ Lucas PJF , de Bruijn NC , Schurink K , Hoepelman A . A probabilistic and decision-theoretic approach to the management of infectious disease at the ICU . Artificial Intelligence in Medicine . 2000 ; 19 : 251 – 279 . OpenUrl CrossRef PubMed Web of Science 25. ↵ Korb KB , Nicholson AE. Bayesian artificial intelligence : CRC press ; 2010 . 26. ↵ Wu Y , Mascaro S , Bhuiyan M , Fathima P , Mace AO , Nicol MP , et al. Predicting the causative pathogen among children with pneumonia using a causal Bayesian network . PLoS Computational Biology . 2023 ; 19 : e1010967 . OpenUrl 27. ↵ Visscher S , Kruisheer EM , Schurink CAM , Lucas PJF , Bonten MJM . Predicting pathogens causing ventilator-associated pneumonia using a Bayesian network model . Journal of antimicrobial chemotherapy . 2008 ; 62 : 184 – 188 . OpenUrl CrossRef PubMed 28. ↵ Nikovski D . Constructing Bayesian networks for medical diagnosis from incomplete and partially correct statistics . IEEE Transactions on Knowledge and Data Engineering . 2000 ; 12 : 509 – 516 . OpenUrl 29. ↵ Harris PA , Taylor R , Thielke R , Payne J , Gonzalez N , Conde JG . Research electronic data capture (REDCap)—A metadata-driven methodology and workflow process for providing translational research informatics support . Journal of Biomedical Informatics . 2009 ; 42 : 377 – 381 . OpenUrl CrossRef PubMed Web of Science 30. ↵ Harris PA , Taylor R , Minor BL , Elliott V , Fernandez M , O’Neal L , et al. The REDCap consortium: Building an international community of software platform partners . Journal of Biomedical Informatics . 2019 ; 95 : 103208 . OpenUrl CrossRef PubMed 31. ↵ McCann R , Stirling M , Parker C , editors. Healthcare infection surveillance of Western Australia (HISWA) - Surveillance Manual . 7th ed .: Healthcare Associated Infection Unit, Communicable Disease Control Directorate , Department of Health Western Australia ; 2020 . 32. ↵ Arnold JC , Cannavino CR , Ross MK , Westley B , Miller TC , Riffenburgh RH , et al. Acute bacterial osteoarticular infections: eight-year analysis of C-reactive protein for oral step-down therapy . Pediatrics . 2012 ; 130 : e821 – e828 . OpenUrl CrossRef PubMed Web of Science 33. ↵ Chan H , Darwiche A. Sensitivity analysis in Bayesian networks: From single to multiple parameters . arXiv preprint arXiv : 1207.4124 . 2012 . 34. ↵ You W , Yang Z , Ji G. Feature selection for high-dimensional multi-category data using PLS-based local recursive feature elimination . Expert Systems with Applications . 2014 ; 41 : 1463 – 1475 . OpenUrl 35. ↵ Misra P , Yadav AS. Improving the classification accuracy using recursive feature elimination with cross-validation . Int. J. Emerg. Technol . 2020 ; 11 : 659 – 665 . OpenUrl 36. Pedregosa F , Varoquaux G , Gramfort A , Michel V , Thirion B , Grisel O , et al. Scikit-learn: Machine Learning in Python . Journal of Machine Learning Research . 2011 ; 12 : 2825 – 2830 . OpenUrl 37. Coulin B , Demarco G , Spyropoulou V , Juchler C , Vendeuvre T , Habre C , et al. Osteoarticular infection in children . The Bone & Joint Journal . 2021 ; 103-B : 578 – 583 . OpenUrl PubMed 38. Peltola H , Pääkkönen M. Acute osteomyelitis in children . New England Journal of Medicine . 2014 ; 370 : 352 – 360 . OpenUrl CrossRef PubMed Web of Science View the discussion thread. Back to top Previous Next Posted July 02, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Use of causal DAG and regression analysis to understand and predict complicated osteoarticular infection in children Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Use of causal DAG and regression analysis to understand and predict complicated osteoarticular infection in children Patrick H. Cahill , Aleisha Anderson , Daniel Yeoh , Matthew O’Brien , Timothy Robertson , Matthew Clifford , Cazz Finnucane , Andrew Martin , Katherine Stannage , Christopher Blyth , Julie Marsh , Asha C. Bowen , Tom Snelling , Charlie McLeod , Yue Wu medRxiv 2025.06.26.25330329; doi: https://doi.org/10.1101/2025.06.26.25330329 Share This Article: Copy Citation Tools Use of causal DAG and regression analysis to understand and predict complicated osteoarticular infection in children Patrick H. Cahill , Aleisha Anderson , Daniel Yeoh , Matthew O’Brien , Timothy Robertson , Matthew Clifford , Cazz Finnucane , Andrew Martin , Katherine Stannage , Christopher Blyth , Julie Marsh , Asha C. Bowen , Tom Snelling , Charlie McLeod , Yue Wu medRxiv 2025.06.26.25330329; doi: https://doi.org/10.1101/2025.06.26.25330329 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Epidemiology Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4440) Dentistry and Oral Medicine (444) Dermatology (383) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1510) Epidemiology (15229) Forensic Medicine (30) Gastroenterology (1126) Genetic and Genomic Medicine (6605) Geriatric Medicine (668) Health Economics (998) Health Informatics (4541) Health Policy (1369) Health Systems and Quality Improvement (1613) Hematology (543) HIV/AIDS (1265) Infectious Diseases (except HIV/AIDS) (15923) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (147) Nephrology (668) Neurology (6604) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1145) Occupational and Environmental Health (957) Oncology (3334) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (692) Primary Care Research (711) Psychiatry and Clinical Psychology (5448) Public and Global Health (9235) Radiology and Imaging (2199) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (594) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a0176caf7fecc13d',t:'MTc3OTc0MjM1Mw=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00