Full text
44,693 characters
· extracted from
preprint-html
· click to expand
Full BLOOD count TRends for colorectal cAnCer deteCtion (BLOODTRACC): external validation of colorectal cancer prediction models in English primary care | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Full BLOOD count TRends for colorectal cAnCer deteCtion (BLOODTRACC): external validation of colorectal cancer prediction models in English primary care View ORCID Profile Pradeep S. Virdee , Jacqueline Birks , Tim Holt , Kym I.E. Snell , Gary Abel , View ORCID Profile Brian D. Nicholson doi: https://doi.org/10.1101/2025.11.26.25341055 Pradeep S. Virdee 1 Nuffield Department of Primary Care Health Sciences, University of Oxford , Oxford, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Pradeep S. Virdee For correspondence: pradeep.virdee{at}phc.ox.ac.uk Jacqueline Birks 2 Oxford University Hospital NHS Trust , Oxford, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tim Holt 1 Nuffield Department of Primary Care Health Sciences, University of Oxford , Oxford, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Kym I.E. Snell 3 Department of Applied Health Sciences, School of Health Sciences, College of Medicine and Health, University of Birmingham , Birmingham, UK 4 National Institute for Health and Care Research (NIHR) Birmingham Biomedical Research Centre , UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Gary Abel 5 University of Exeter Medical School, University of Exeter , Exeter, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Brian D. Nicholson 1 Nuffield Department of Primary Care Health Sciences, University of Oxford , Oxford, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Brian D. Nicholson Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Introduction Colorectal cancer has low survival rates when diagnosed late-stage. We previously developed sex-specific dynamic risk prediction models utilising trends in the full blood count (FBC), a blood test commonly performed in primary care, to support early detection. We aimed to externally validate these prediction models. Methods We performed a hybrid case-control and cohort study of patients with at least one FBC test. We first excluded FBCs within two years before diagnosis (cases) or study exit (controls) and selected the most recent FBC as the baseline test per patient from the resulting data. Patients were aged at least 40 years at baseline and had no history of colorectal cancer. The models included age (years) at baseline and simultaneous trends over historical haemoglobin, mean corpuscular volume (MCV), and platelet measurements measured over five years before baseline to inform two-year risk of colorectal cancer diagnosis. Performance measures included the c-statistic and calibration slope. Results We included 2,956,977 males and 3,561,349 females, with 0.4% (n=12,578) and 0.3% (n=11,939) diagnosed with colorectal cancer, respectively. The c-statistic (95% CI) was 0.73 (0.72-0.73) for males and 0.74 (0.74-0.75) for females. The calibration slope (95% CI) was 0.92 (0.89-0.94) for males and 0.95 (0.93-0.98) for females. Calibration was good in subgroups of patient data, except under-predicted risk in those aged 70+ years, White individuals, and those with higher IMD. The c-statistic (95% CI) was similar regardless of the number of FBCs used to define trend and increased as the longitudinal trend window increased until around 2.5-3.0 years for men (0.73 (0.71-0.74)) and 3.0-3.5 years for women (0.73 (0.72-0.75)) and decreased with increasing longitudinal windows thereafter. Conclusion Utilising temporal changes in the FBC test could enhance risk stratification for colorectal cancer. Further research may highlight approaches for improving predictive performance further. Introduction Colorectal cancer is the fourth most common type of cancer[ 1 ] and second most common cause of cancer-related death[ 2 ] in the UK. Survival is associated with tumour stage at diagnosis: five-year survival is 90% at Stage I and 10% at Stage IV[ 3 ]. Around 55% of staged cases are diagnosed late-stage[ 4 ]. Earlier detection would improve the likelihood of successful treatment and reduce mortality[ 5 ]. Most diagnoses (55%) are made following urgent general practitioner (GP) referral for symptoms, as recommended by the National Institute for Health and Care Excellence (NICE)[ 6 ]. Many symptoms are common and non-specific, so many more referrals are made than cancers diagnosed, placing significant pressure on healthcare resources, particularly on colonoscopy capacity. The full blood count (FBC) blood test measures 20 components of the blood and is commonly performed in primary care[ 7 ]. Abnormalities in many individual components, such as raised platelets/thrombocytosis, are associated with increased risk of colorectal cancer[ 8 ]. We previously reported statistically significant association between trends over repeat FBCs for most components and colorectal cancer diagnosis, with cases having on average a different trend within four years prior to diagnosis compared to cancer-free patients[ 9 ]. We found that relevant trends may appear before abnormal FBC thresholds[ 10 – 12 ] that prompt further cancer investigation[ 13 – 15 ] are reached, showing potential for earlier diagnosis. We developed the BLOODTRACC models, sex-stratified diagnostic prediction models derived using joint modelling and English Clinical Practice Research Datalink (CPRD) GOLD primary care data[ 16 ]. Each model utilises age and trends in haemoglobin, mean corpuscular volume (MCV), and platelets to determine two-year risk of colorectal cancer. Internal validation found a c-statistic of 0.76 for females and 0.75 for males, using data measured earlier than two years before diagnosis. We aimed to externally validate the BLOODTRACC models to assess performance in external primary care data. Methods Study reporting follows the TRIPOD guidelines[ 17 ]. Ethical approval was provided by the CPRD Research Data Governance on behalf of the Health Research Authority (protocol number: 22_001798). Data preparation and analysis was performed in RStudio (R V4.1.3). Study population Patient data was from the English CPRD AURUM primary care electronic health record database, available for patients registered between 1 st January 2000 and 31 st December 2018. The data was linked to the National Cancer Registration and Analysis Service (NCRAS) Cancer Registration, Office for National Statistics (ONS) Death Registration, Hospital Episode Statistics (HES) Admitted Patient Care, Outpatient, and Accident and Emergency, and Small Area Level Data. SNOMED-CT, Medcodes, and ICD10 codes for each data item are available at https://github.com/PradeepVirdee/BLOODTRACC_ModelValidation_CPRDAurum [ 18 ]. Patients were aged at least 40 years with at least one haemoglobin, MCV, and platelet measurement available. Patients registered with their primary care practice for less than one year, with a history of colorectal cancer before baseline (defined below), or ineligible for linkage to NCRAS, HES, and ONS were excluded. Study design We used the same design as the model derivation study. We performed a hybrid case-control and cohort study. First, we identified the date of diagnosis in cancer cases and study exit date in cancer-free controls and excluded haemoglobin, MCV, and platelet measurements within the two years prior, ensuring we used data measured at a sufficiently earlier phase that increases the likelihood of successful clinical intervention to improve prognosis. From the resulting dataset, the baseline test was the most recent test available. Tests performed before baseline were considered historic. Trends were identified using all historical tests available up to five years prior to the baseline test. Risk predictions are therefore made from the baseline time-point, incorporating information from historical tests. A five-year longitudinal period was chosen based on our previous work showing differences in trends between patients with and without a diagnosis confined to five years pre-diagnosis[ 9 ]. A graphical depiction of this study design has previously been reported[ 16 ]. Outcome Due to the two-year exclusion window described above, there were no diagnoses or censored patients within the two years following the baseline test. Therefore, the outcome was a diagnosis of colorectal cancer at two years (+/-three months to allow for a time-to-event distribution to form) after the baseline test. Patients without a diagnosis at two years (+/-three months) post-baseline were censored at the earliest of date of leaving the practice, death, 31 st December 2018, or two years after their baseline test. Diagnoses were identified from the NCRAS database using ICD10 codes C18-C20. Covariates The models rely on age (years) at baseline test, sex, and trends in historic haemoglobin, MCV, and platelet measurements up to baseline test. Date of birth and sex were provided for all patients by the CPRD. We cleaned blood test results to remove values outside biologically plausible ranges, which have previously been reported[ 9 ], and standardised to the same unit of measurement following guidance from previous work[ 19 ]. There was little (<~5%) missing haemoglobin, MCV, and platelet data among all FBCs so no data imputation was performed. Instead, the derived models employed joint modelling, which includes a mixed-effects modelling component that can account for sporadically measured and unbalanced data, so every blood test result, as available, was included in this validation study. Model performance Overall performance was assessed using the Brier score[ 20 ]. Discrimination was assessed using the c-statistic and Royston and Sauerbrei’s D-statistic[ 21 ]. Calibration was assessed using the calibration slope and calibration plots. Calibration plots were derived by first categorising patients into 20 equally sized groups of predicted two-year risk and the mean predicted two-year risk compared with the observed two-year risk for each risk group separately, with an overlaid LOWESS smoother. The observed two-year risk for each group was estimated using the Kaplan-Meier survival function to account for censored observations. The c-statistic and calibration plots were assessed overall and in subgroups of patient data. Subgroups included age (10-year age bands from 40 to 90+ years), co-occurring symptoms (recorded in the three months before baseline FBC), and time span of repeat FBCs (6-monthly time bands from zero to five years). We also assessed the c-statistic and calibration by the number of repeat tests available per patient that were used to define their trend (from two to 10), but as older patients are likely to have more repeat FBCs than younger patients, c-statistics from this analysis may be influenced by increasing age. We therefore stratified this analysis by age group. We compared the c-statistic of the models with that of blood test abnormality (yes/no) on the most recent test in the longitudinal window: low haemoglobin if <13g/dL for men and <11.5g/dL for women, low MCV if 400 x 10 9 /L[ 22 ]. The c-statistic for blood test abnormality was derived using a Cox model that included all three tests as binary variables (abnormal=yes/no), with adjustment for age at the test date. We further derived the c-statistic for early-stage (stage I-II) and late-stage (stage III-IV) cancer diagnoses. Patients were categorised into low-vs high-risk groups using thresholds for two-year risk corresponding to the 1 st percentile, every 5 th percentile from 5 th to 95 th , and 99 th percentile of predicted risks derived in this dataset. Sensitivity, specificity, positive predictive value (PPV), and negative predictive value (NPV) were derived at each threshold. We derived the two-year cancer incidence using patient data until 2014 to offer a direct comparison with the cancer incidence in the derivation study, which used data over 2000-2014. Sensitivity analysis We performed two sensitivity analyses. First, including cancer diagnoses from CPRD, HES, and ONS databases, in addition to NCRAS. Second, using a traditional cohort study design. Details for both analyses are described in the Supplementary Methods. Results Summary of patient data We included 2,956,977 males and 3,561,349 females ( Figure 1 ), with 12,578 (0.4%) and 11,939 (0.3%) diagnosed two years after their baseline FBC, respectively, comparable to patient data over 2000-2014 alone: 0.5% males and 0.4% females. Patients with colorectal cancer were on average around 10 years older than patients without ( Table 1 ). White patients were most likely to get diagnosed (men: 93.0% cancer, 81.8% cancer-free; women: 91.8% cancer, 81.9% cancer-free). Median time (years) from first to last blood test used to derive trends was higher in cases than cancer-free patients: male cases 3.2 and cancer-free 2.7; female cases 3.0 and cancer-free 2.1 (Table S1). View this table: View inline View popup Download powerpoint Table 1: Summary of patient data Download figure Open in new tab Figure 1: Patient flow diagram Abbreviations: NCRAS=National Cancer Registration and Analysis Service; HES=Hospital Episode Statistics; ONS=Office of National Statistics; Hb=haemoglobin; MCV=mean corpuscular volume. Model performance The median (range) of predicted risk for men was 0.004 (0.001-0.057) among cases and 0.002 (0.001-0.077) among cancer-free patients. For women, this was 0.003 (0.001-0.049) and 0.001 (0.001-0.099), respectively. The c-statistic (95% CI) was 0.73 (0.72-0.73) for males and 0.74 (0.74-0.75) for females ( Table 2 ). The calibration slope (95% CI) was 0.92 (0.89-0.94) for males and 0.95 (0.93-0.98) for females. Calibration plots showed increasing under-prediction with increasing and was largest for the highest risk group ( Figure 2 ): 0.14% predicted vs 0.16% observed risk in males and 0.10% predicted vs 0.12% observed risk in females. In males, the c-statistic (95% CI) was 0.72 (0.72-0.73) for early-stage diagnoses and 0.71 (0.71-0.72) for late-stage. In females, it was 0.74 (0.73-0.74) for early-stage diagnoses and 0.72 (0.71-0.72) for late-stage. View this table: View inline View popup Download powerpoint Table 2: Performance measures (95% CI) of the BLOODTRACC models Download figure Open in new tab Figure 2: Calibration plots for the BLOODTRACC models Abbreviations: KM=Kaplan-Meier Model performance in patient subgroups The c-statistic generally decreased as males and females grew older, ranging 0.54-0.63 and 0.58-0.63, respectively (Figure S1). Calibration plots showed good calibration in patients aged <50 years, with increasing under-prediction with increasing age, with most under-prediction observed for patients aged 80-89 years and greater for men than women (Figure S2). The models slightly over-predicted risk in each ethnic group, but under-estimated in White patients and higher IMD quintiles. The c-statistic (95% CI) across ethnic groups ranged 0.72-0.79 for men and 0.68-0.81 for women (Figure S3), lowest for White men (0.72 (0.72-0.72)) and South Asian women (0.68 (0.64-0.72)), and similar in IMD quintile groups (Figure S4). The c-statistic (95% CI) was higher in men without constipation (with 0.66 (0.62-0.70); without 0.75 (0.74-0.75)) or with rectal bleeding (with 0.79 (0.76-0.83); without (0.75 (0.74-0.75)) and women without appetite loss (with 0.62 (0.52-0.73); without (0.74 (0.74-0.74)), constipation (with 0.66 (0.62-0.69); without (0.74 (0.74-0.74)), or diarrhoea (with 0.69 (0.66-0.72); without (0.74 (0.74-0.74)) (Figure S5). The symptom subgroups that had the highest c-statistics were rectal bleeding and abdominal pain for both men and women. For each age group, the c-statistic in subgroups formed by the number of repeat tests available per patient that were used to derive trends ranged 0.56-0.72 if aged 40-49 years and 0.50-0.70 if aged 90+ years in men and 0.53-0.73 if aged 40-49 years and 0.56-0.65 if aged 90+ years in women (Figure S6 and Figure S7). When we assessed the c-statistic by the time span encompassing the all repeat tests, the c-statistic was similar for all time spans up to 2.0-2.5 years (range 0.74-0.76) for men and 2.5-3.0 years (range 0.72-0.77) for women, but decreased as the time span of repeat tests grew larger thereafter: 0.73 (95% CI=0.71-0.74) for repeat tests over 2.5-3.0 years to 0.68 (95% CI=0.67-0.69) over 4.5-5.0 years in men and 0.73 (95% CI=0.72-0.75) for repeat tests over 3.0-3.5 years to 0.70 (95% CI=0.69-0.71) over 4.5-5.0 years in women (Figure S8). The models were well calibrated among patients with up to three repeat tests, with increasing under-prediction as the number of repeat tests increased, and among patients with all their repeat tests confined within a six-month period, with under-prediction observed for longer repeat testing periods (Figure S9). Comparison to abnormal test results For each age group, the c-statistic was comparable between the BLOODTRACC models, which included blood test trends, and Cox models, which included the abnormality counterparts (Figure S7). However, the c-statistic for the BLOODTRACC models appeared to become greater than for Cox models including abnormality in women aged at least 70 years. Diagnostic accuracy The highest risk percentile, 99%, corresponded to a risk threshold of 0.7810% for males and 0.6872% for females, with 3.56% and 5.03% sensitivity and 99.01% and 99.01% specificity, respectively (Table S2 and Table S3). The PPV increased as the threshold used to define low-vs high-risk increased, ranging 0.43-1.52% for men and 0.34-1.68% for women. The NPV ranged 99.59-99.98% for men and 99.68-99.96% for women, indicating a high proportion of patients with low predicted risk without an observed diagnosis regardless of the threshold used to define low-vs high-risk, likely due to the low event rate overall. Sensitivity analysis In the first sensitivity analysis, including cancer diagnoses from NCRAS, CPRD, HES, and ONS increased the c-statistic (95% CI) increased to 0.747 (0.744-0.750) for males and remained similar at 0.740 (0.736-0.744) for females, with an increased event rate and therefore increased under-prediction (Table S4, Figure S10). In the second sensitivity analysis, a traditional cohort study design increased the c-statistic (95% CI) to 0.84 (0.83-0.85) in men and 0.91 (0.89-0.93) in women, with comparable event rates (Table S5). Further results are in the Supplementary Results. Discussion Summary of main findings Our prediction models have good discriminative ability for two-year risk of colorectal cancer, based on only age, sex, and trends in haemoglobin, MCV, and platelet count earlier than two years before diagnosis, and performed slightly better for early-stage diagnoses than late-stage. The models however under-predicted two-year risk in the patients with higher predicted risks, likely reflecting older patients, as calibration in younger patients was good. The two-year event rate remained the same when considering only patient data over 2000-2014, coinciding with the prior derivation study period, and was comparable to the derivation study (0.4% in men, 0.3% in women)[ 16 ]. This under-prediction may be explained by practice-level differences between practices contributing to the CPRD GOLD and AURUM databases. Strengths and limitations A key strength of this study was the large sample size and follow-up duration used to validate the prediction models. This allowed us to explore the impact of patient age and varying time periods and number of tests capturing a trend on model performance. One limitation is that the reason for the FBC being ordered in primary care is unknown. The FBC is a non-specific test, so are ordered in primary care for many reasons and not specifically for colorectal cancer. Knowing the reason for testing could help identify other conditions that could influence blood test trends. Additionally, it is possible that patients without colorectal cancer who have many FBCs in the five-year period have another disease or condition that influences blood levels over time. Therefore, some false positives (patients determined to be high risk who are not diagnosed with colorectal cancer) may have another illness. Data on comorbidities, including other cancers, will be accounted for as future work. Comparison with existing literature Our systematic review identified 13 prediction models that use some FBC data to inform risk of colorectal cancer[ 23 ]. All of these models are static, using a single test from one time point per patient, except the ColonFlag model, a machine-learning algorithm, derived using data from an Israeli population, designed to predict three-to-six month colorectal cancer risk based on changes in all FBC components measured at 36 and 18 months before a patient’s current FBC[ 24 ]. The use of repeated measures data can provide more individualised risk predictions, so our models may offer improved risk stratification to existing static models (using covariates measured at one time point). However, predictive performance of our dynamic BLOODTRACC models is yet to be compared to that of static models in the same patient cohort using similar study designs to reduce heterogeneity. We previously reported an external validation of the ColonFlag, performed in the same split sample internal validation cohort used in the BLOODTRACC model derivation study to offer a direct comparison of model performance[ 16 ]. The c-statistic for two-year risk was comparable between our models and the ColonFlag. In men, it was 0.75 for BLOODTRAC in this external validation, 0.75 for BLOODTRACC in its derivation study, and 0.76 for ColonFlag in its previous external validation. For women, it was 0.74 for BLOODTRAC in this external validation, 0.76 for BLOODTRACC in its derivation study, and 0.76 for ColonFlag in its previous external validation. Performance of each model was also similar in subgroups of age, number of FBCs used to derive trend, and length of the longitudinal window. Comparable performance was expected, as the models use the same data (age, sex, changes over time in FBC tests) to identify risk, although the ColonFlag includes trends in all 20 FBC parameters to identify risk[ 24 ] and our models are simpler, using only haemoglobin, MCV, and platelets. Discrimination remained comparable regardless, suggesting these additional FBC parameters may not enhance risk estimation. The ColonFlag model is commercially developed and not publicly available. The BLOODTRACC models may increase the likelihood of adoption and embedding within electronic health record systems, facilitate flagging of cancer risk in practice. Our second systematic review, which focused on clinical prediction models incorporating trends over repeated blood tests (liver function, renal function, and FBC) to inform cancer diagnosis, did not identify any additional existing trend-based prediction models for colorectal cancer, other than our current BLOODTRACC models[ 16 ] and the ColonFlag algorithm[ 24 ]. Implications for practice Our dynamic prediction models are designed to provide an up-to-date risk prediction each time a new haemoglobin, MCV, or platelet measurement is added to a patient’s record. Using combinations of trends over repeated tests could identify subtle simultaneous cancer-relevant changes in tests that could otherwise be missed in practice, including changes within the normal reference range, which are unlikely to be noticed by a clinician. The models are designed to use routinely available data, accounting for the sporadic and irregular nature of blood testing in primary care, and we plan for them to be programmed into practice software to run automatically when a new FBC becomes available. Therefore, there will likely be minimal additional work for patients or GP staff to identify a patient’s risk of undiagnosed colorectal cancer. FIT testing, which examines stool samples for traces of blood, has proved a useful test outside the screening programme for ruling out colorectal cancer in patients with symptoms attending primary care, based on a 98% NPV in those with a negative FIT in a recent primary care study[ 25 , 26 ]. Patients identified as high-risk from our models could be offered a FIT test, which is much more practical, cheaper and less invasive than colonoscopy. As the use of FIT increases in clinical practice and FIT results are recorded in the primary care electronic health record, research should investigate the additional diagnostic value of adding historical blood test trend to the FIT value at the time of clinical presentation. This would complement past and ongoing efforts to increase the predictive value of FIT by combining it in a model with patient characteristics and blood tests taken at the time of the FIT[ 27 – 30 ]. Further work could also aim to modify the FIT threshold considered ‘positive’ in high-risk patients identified from the BLOODTRACC models. Prediction models should be updated regularly to incorporate changes in clinical practice[ 31 ]. The models have good discriminative ability using only data measured earlier than two years before diagnosis, but discrimination could be improved by updating the model to include data closer to diagnosis, enhancing risk stratification. For example, our sensitivity analysis using a cohort study design in essence extended the trend to capture changes measured closer to diagnosis, which improved the c-statistic. This approach would introduce bias in that cancer cases have more frequent testing than cancer-free patients in routinely collected data. However, the updated model could include the number of repeat tests used to derive each patient’s trend as a covariate to adjust for this imbalance. As model discrimination was slightly better for younger individuals, BLOODTRAC may have an important role in enhancing the detection of early-onset colorectal cancer, which is rising in incidence[ 32 ]. Accounting for comorbidity in the model may improve discrimination in older patients, where blood test abnormalities are less likely to be caused by cancer than in younger less comorbid patients. These updates could also reduce the degree of under-prediction identified in this study. Conclusion Our dynamic clinical risk prediction models perform well in external primary care data, relying only on data measured earlier than two years before diagnosis, but did not outperform blood test abnormality, recommended in referral guidelines to inform referral. Extending the trend closer to diagnosis may improve predictive performance. Additional information Funding This work was funded by a National Institute for Health and Care Research (NIHR) School for Primary Care Research (SPCR) Post-doctoral fellowship for this work (award number: C092) and the NIHR Policy Research Programme (Policy Research Unit on Cancer Awareness, Screening and Early Diagnosis, reference PR-PRU-NIHR206132). This report presents independent research and the views expressed are those of the authors and not necessarily those of the NIHR, SPCR, or Department of Health and Social Care. Competing interests The authors declare no competing interests. Data availability The dataset used is available from the authors but is subject to access approval by the CPRD[ 33 ]. Acknowledgements The authors would also like to thank Patient and Public Involvement contributors Sue Dutton, Alton Sutton, Bernard Gudgin, Clara Martins de Barros, Emily Lam, Ian Blelloch, Julian Ashton, Margaret Ogden, Shannon Draisey, and Susan Lynne for applying a patient perspective on the use of blood tests to improved cancer detection and the acceptability of repeat testing in primary care. Footnotes Pradeep S. Virdee (PSV): pradeep.virdee{at}phc.ox.ac.uk Jacqueline Birks (JB): jacqueline.birks{at}ouh.nhs.uk Tim Holt (TH): timholt{at}nhs.net Kym I.E. Snell (KS): k.snell{at}bham.ac.uk Gary Abel (GA): g.a.abel{at}exeter.ac.uk Brian D. Nicholson (BDN): brian.nicholson{at}phc.ox.ac.uk References [1]. ↵ Cancer Research UK . Bowel cancer incidence statistics . 2024 [Accessed 8 November 2025 ]; Available from: https://www.cancerresearchuk.org/health-professional/cancer-statistics/statistics-by-cancer-type/bowel-cancer#BowelCS0 . [2]. ↵ Cancer Research UK . Bowel cancer mortality statistics . 2024 [Accessed 8 November 2025 ]; Available from: https://www.cancerresearchuk.org/health-professional/cancer-statistics/statistics-by-cancer-type/bowel-cancer#BowelCS1 . [3]. ↵ Cancer Research UK . Bowel cancer survival by stage at diagnosis . 2024 [Accessed 8 November 2025 ]; Available from: https://www.cancerresearchuk.org/about-cancer/bowel-cancer/survival#:~:text=Around%2090%20out%20of%20100,more%20after%20they’re%20diagnosed. [4]. ↵ Cancer Research UK Cancer Intelligence . Incidence by Stage - Rapid Cancer Registration Data (RCRD) . 2025 . [Accessed 8 November 2025 ]; Available from: https://crukcancerintelligence.shinyapps.io/EarlyDiagnosis/_w_da89763e0f3a46bb9f2a06b264efffd1/?Tab=incByStageRCRD . [5]. ↵ Cancer Research UK Cancer Intelligence . Survival by Stage at Diagnosis . 2025 . [Accessed 8 November 2025 ]; Available from: https://crukcancerintelligence.shinyapps.io/EarlyDiagnosis/_w_da89763e0f3a46bb9f2a06b264efffd1/?Tab=survByStage . [6]. ↵ NICE . Suspected cancer recognition and referral: site or type of cancer ( 2020) [Accessed 8 November 2025 ]; Available from: https://www.nice.org.uk/guidance/ng12 . [7]. ↵ MayoClinic . Complete blood count (CBC) . 2023 [Accessed 8 November 2025 ]; Available from: https://www.mayoclinic.org/tests-procedures/complete-blood-count/about/pac-20384919 . [8]. ↵ Virdee PS , Marian IR , Mansouri A , Elhussein L , Kirtley S , Holt T , et al. The Full Blood Count Blood Test for Colorectal Cancer Detection: A Systematic Review, Meta-Analysis, and Critical Appraisal . Cancers , 2020 . 12 , 2348: p. 1 – 37 . DOI: 10.3390/cancers12092348 . OpenUrl CrossRef [9]. ↵ Virdee PS , Patnick P , Watkinson P , Birks J , Holt T. Trends in the full blood count blood test and colorectal cancer detection: a longitudinal, case-control study of UK primary care patient data . NIHR Open Research , 2022 , 2 , 32 :1-53. DOI: 10.3310/nihropenres.13266.1 . OpenUrl CrossRef PubMed [10]. ↵ York Teaching Hospital NHS Foundation Trust . Full Blood Count (FBC) Reference Ranges . 2024 [Accessed 8 November 2025 ]; Available from: https://www.yorkhospitals.nhs.uk/our-services/a-z-of-services/laboratory-medicine1/haematology-/ . [11]. Maidstone and Tunbridge Wells NHS Trust . Haematology Reference Ranges . 2021 [Accessed 8 November 2025 ]; Available from: https://www.mtw.nhs.uk/wp-content/uploads/2025/09/Haematology-reference-ranges.pdf . [12]. ↵ Gloucestershire Hospitals NHS Foundation Trust . Full Blood Count (FBC) . 2025 [Accessed 8 November 2025 ]; Available from: https://www.gloshospitals.nhs.uk/our-services/services-we-offer/pathology/tests-and-investigations/full-blood-count-fbc/ . [13]. ↵ National Institute for Health and Care Excellence . Suspected cancer recognition and referral . 2025 [Accessed 8 November 2025 ]; Available from: https://www.nice.org.uk/guidance/ng12/resources/suspected-cancer-recognition-and-referral-pdf-1837268071621 . [14]. World Health Organisation . Haemoglobin concentrations for the diagnosis of anaemia and assessment of severity . 2011 [Accessed 8 November 2025 ]; Available from: https://apps.who.int/iris/bitstream/handle/10665/85839/WHO_NMH_NHD_MNM_11.1_eng.pdf?ua=1 . [15]. ↵ National Institute for Health and Care Excellence . Anaemia - iron deficiency . 2025 [Accessed 8 November 2025 ]; Available from: https://cks.nice.org.uk/topics/anaemia-iron-deficiency/ . [16]. ↵ Virdee PS , Patnick J , Watkinson P , Holt T , Birks J. Full Blood Count Trends for Colorectal Cancer Detection in Primary Care: Development and Validation of a Dynamic Prediction Model . Cancers 2022 , 14 , 4779 . DOI: 10.3390/cancers14194779 . OpenUrl CrossRef PubMed [17]. ↵ Collins GS , Reitsma JB , Altman DG , Moons KG . Transparent reporting of a multivariable prediction model for individual prognosis or diagnosis (TRIPOD): the TRIPOD statement . BMJ , 2015 . 350 : p. g7594 . DOI: 10.1136/bmj.g7594 . OpenUrl CrossRef PubMed [18]. ↵ GitHub . PradeepVirdee / BLOODTRACC_ModelValidation_CPRDAurum . 2025 [Accessed 8 November 2025 ]; Available from: https://github.com/PradeepVirdee/BLOODTRACC_ModelValidation_CPRDAurum . [19]. ↵ Virdee PS , Fuller A , Jacobs M , Holt T , Birks J. Assessing data quality from the Clinical Practice Research Datalink: a methodological approach applied to the full blood count blood test . J big Data , 2020 . 7 , 95 :1-18. DOI: 10.1186/s40537-020-00375-w . OpenUrl CrossRef [20]. ↵ StatisticsHowTo. Brier Score: Definition, Examples . 2016 [Accessed 8 November 2025 ]; Available from: https://www.statisticshowto.com/brier-score/ . [21]. ↵ Royston P , Sauerbrei W. A new measure of prognostic separation in survival data . Stat Med , 2004 . 23 ( 5 ): p. 723 – 48 . DOI: 10.1002/sim.1621 . OpenUrl CrossRef PubMed Web of Science [22]. ↵ Oxford Academic . Reference intervals, etc. 2017 [ Accessed 8 November 2025 ]. Available from: https://academic.oup.com/book/37194/chapter/326364279?login=true . [23]. ↵ Virdee PS , Collins KK , Friedemann Smith C , Yang X , Zhu S , Roberts N , Oke JL , Bankhead C , Perera R , Hobbs FDR , Nicholson BD . Clinical Prediction Models Incorporating Blood Test Trend for Cancer Detection: Systematic Review, Meta-Analysis, and Critical Appraisal . JMIR Cancer , 2025 . DOI: 10.2196/70275 . OpenUrl CrossRef [24]. ↵ Kinar Y , Kalkstein N , Akiva P , Levin B , Half EE , Goldshtein I , et al. Development and validation of a predictive model for detection of colorectal cancer in primary care by analysis of complete blood counts: a binational retrospective study . J Am Med Inform Assoc , 2016 . 23 ( 5 ): p. 879 – 90 . DOI: 10.1093/jamia/ocv195 . OpenUrl CrossRef PubMed [25]. ↵ Monahan KJ , Davies MM , Abulafi M , Banerjea A , Nicholson BD , Arasaradnam R , Barker N , Benton S , Booth R , Burling D , Carten RV , D’Souza N , East JE , Kleijnen J , Machesney M , Pettman M , Pipe J , Saker L , Sharp L , Stephenson J , Steele RJC . Faecal immunochemical testing (FIT) in patients with signs or symptoms of suspected colorectal cancer (CRC): a joint guideline from the Association of Coloproctology of Great Britain and Ireland (ACPGBI) and the British Society of Gastroenterology (BSG) . Gut 2022 . 71 : 1939 – 1962 . DOI: 10.1136/gutjnl-2022-327985 . OpenUrl Abstract / FREE Full Text [26]. ↵ NICE . Quantitative faecal immunochemical testing to guide colorectal cancer pathway referral in primary care . 2023 [Accessed 8 November 2025 ]. Available from: https://www.nice.org.uk/guidance/dg56 . [27]. ↵ Crooks CJ , West J , Jones J , Hamilton W , Bailey SER , Abel G , Banerjea A , Rees CJ , Tamm A , Nicholson BD , Benton SC , Hunt N , COLOFIT Research Group , Humes DJ . COLOFIT: Development and Internal-External Validation of Models Using Age, Sex, Faecal Immunochemical and Blood Tests to Optimise Diagnosis of Colorectal Cancer in Symptomatic Patients . Aliment Pharmacol Ther , 2025 . 61 ( 5 ): 852 – 864 . DOI: 10.1111/apt.18459 . OpenUrl CrossRef PubMed [28]. Withrow DR , Shine B , Oke J , Tamm A , James T , Morris E , Davies J , Harris S , East JE , Nicholson BD . Combining faecal immunochemical testing with blood test results for colorectal cancer risk stratification: a consecutive cohort of 16,604 patients presenting to primary care . BMC Med , 2022 . 15; 20 ( 1 ): 116 . DOI: 10.1186/s12916-022-02272-w . OpenUrl CrossRef PubMed [29]. Digby J , Strachan JA , Mowat C , Steele RJC , Fraser CG . Appraisal of the faecal haemoglobin, age and sex test (FAST) score in assessment of patients with lower bowel symptoms: an observational study . BMC Gastroenterol , 2019 . 11; 19 ( 1 ): 213 . DOI: 10.1186/s12876-019-1135-5 . OpenUrl CrossRef PubMed [30]. ↵ Digby J , Fraser CG , Clark G , Mowat C , Strachan JA , Steele RJC . Do risk scores improve use of faecal immunochemical testing for haemoglobin in symptomatic patients in primary care? Colorectal Dis , 2024 . 26 ( 4 ): 675 – 683 . DOI: 10.1111/codi.16925 . OpenUrl CrossRef [31]. ↵ Jenkins DA , Martin GP , Sperrin M , Riley RD , Debray TPA , Collins GS , et al. Continual updating and monitoring of clinical prediction models: time for dynamic prediction systems? Diagn Progn Res , 2021 ; 5 : 1 . DOI: 10.1186/s41512-020-00090-3 . OpenUrl CrossRef PubMed [32]. ↵ Sung H , Siegel RL , Laversanne M , Jiang C , Morgan E , Zahwe M , Cao Y , Bray F , Jemal A. Colorectal cancer incidence trends in younger versus older adults: an analysis of population-based cancer registry data . The Lancet Oncology , 2025 . 26 : 1 : 51 – 63 . DOI: 10.1016/S1470-2045(24)00600-4 . OpenUrl CrossRef PubMed [33]. ↵ Clinical Practice Research Datalink (CPRD) . 2022 [Accessed 8 November 2025 ]; Available from https://www.cprd.com/ . View the discussion thread. Back to top Previous Next Posted November 27, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Full BLOOD count TRends for colorectal cAnCer deteCtion (BLOODTRACC): external validation of colorectal cancer prediction models in English primary care Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Full BLOOD count TRends for colorectal cAnCer deteCtion (BLOODTRACC): external validation of colorectal cancer prediction models in English primary care Pradeep S. Virdee , Jacqueline Birks , Tim Holt , Kym I.E. Snell , Gary Abel , Brian D. Nicholson medRxiv 2025.11.26.25341055; doi: https://doi.org/10.1101/2025.11.26.25341055 Share This Article: Copy Citation Tools Full BLOOD count TRends for colorectal cAnCer deteCtion (BLOODTRACC): external validation of colorectal cancer prediction models in English primary care Pradeep S. Virdee , Jacqueline Birks , Tim Holt , Kym I.E. Snell , Gary Abel , Brian D. Nicholson medRxiv 2025.11.26.25341055; doi: https://doi.org/10.1101/2025.11.26.25341055 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Oncology Subject Areas All Articles Addiction Medicine (569) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4442) Dentistry and Oral Medicine (444) Dermatology (383) Emergency Medicine (609) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1511) Epidemiology (15230) Forensic Medicine (30) Gastroenterology (1126) Genetic and Genomic Medicine (6610) Geriatric Medicine (668) Health Economics (998) Health Informatics (4542) Health Policy (1370) Health Systems and Quality Improvement (1613) Hematology (543) HIV/AIDS (1266) Infectious Diseases (except HIV/AIDS) (15923) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (147) Nephrology (668) Neurology (6607) Nursing (346) Nutrition (999) Obstetrics and Gynecology (1146) Occupational and Environmental Health (957) Oncology (3337) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (664) Pediatrics (1693) Pharmacology and Therapeutics (692) Primary Care Research (712) Psychiatry and Clinical Psychology (5448) Public and Global Health (9238) Radiology and Imaging (2202) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (596) Sexual and Reproductive Health (714) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a01be55d78f48650',t:'MTc3OTc4OTIzOQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.