Accuracy of CSF Tap Test and Lumbar Infusion Test in Predicting Shunt Response in Idiopathic Normal Pressure Hydrocephalus: A Systematic Review and Meta-Analysis

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 48,565 characters · extracted from preprint-html · click to expand
Accuracy of CSF Tap Test and Lumbar Infusion Test in Predicting Shunt Response in Idiopathic Normal Pressure Hydrocephalus: A Systematic Review and Meta-Analysis | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Accuracy of CSF Tap Test and Lumbar Infusion Test in Predicting Shunt Response in Idiopathic Normal Pressure Hydrocephalus: A Systematic Review and Meta-Analysis Aminu Aliyar , Deepa Dash , Alfonso Fasano , Manya Prasad , Aparna Wagle Shukla , Ashish Dutt Upadhyay , Soaham Desai , Sagar Poudel , Pramod Kumar Pal , View ORCID Profile Arunmozhimaran Elavarasi doi: https://doi.org/10.1101/2025.11.09.25339846 Aminu Aliyar 1 Department of Neurology, All India Institute of Medical Sciences , New Delhi, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site Deepa Dash 2 Department of Clinical Neurologic Sciences, University of Western Ontario , London, Canada Find this author on Google Scholar Find this author on PubMed Search for this author on this site Alfonso Fasano 3 Department of Biomedical Sciences, Humanitas University , Via Rita Levi Montalcini 4, 20090 Pieve Emanuele, Milan, Italy 4 IRCCS Humanitas Research Hospital , via Manzoni 56, 20089 Rozzano, Milan, Italy Find this author on Google Scholar Find this author on PubMed Search for this author on this site Manya Prasad 5 Center for Community Medicine, All India Institute of Medical Sciences , New Delhi, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site Aparna Wagle Shukla 6 Department of Neurology, Fixel Institute for Neurological Diseases, University of Florida , Gainesville, Florida, United States of America Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ashish Dutt Upadhyay 7 Clinical Research Unit, All India Institute of Medical Sciences , New Delhi, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site Soaham Desai 8 Department of Neurology, Pramukhswami Medical College , Karamsad, Anand, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sagar Poudel 9 Department of Medicine, All India Institute of Medical Sciences , New Delhi, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site Pramod Kumar Pal 10 Department of Neurology, National Institute of Mental Health and Neurosciences , Bengaluru, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site Arunmozhimaran Elavarasi 1 Department of Neurology, All India Institute of Medical Sciences , New Delhi, India Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Arunmozhimaran Elavarasi For correspondence: arun_ela{at}yahoo.com Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Background Idiopathic normal pressure hydrocephalus (iNPH) presents with gait disturbance, cognitive impairment, and urinary incontinence. The cerebrospinal fluid tap test (CSF-TT) and lumbar infusion test (LIT) are commonly used to predict postoperative improvement after shunt surgery; however, their validity remains debated. Methods We performed a systematic review and meta-analysis to assess the sensitivity and specificity of CSF-TT and LIT. The protocol was registered in PROSPERO (CRD42023454502). Reporting followed the PRISMA guidelines, and certainty of evidence was assessed using the GRADE approach. Results From 1762 studies, 14 were included, reporting 697 CSF-TT and 393 LIT patients with shunt surgery as the reference standard. Considerable heterogeneity existed in test protocols, timing of assessment, and outcome measures. Several studies were retrospective with a high risk of bias. Using a bivariate random-effects model, pooled sensitivity and specificity of CSF-TT were 67.5% (95% CI 52.2–79.8, I 2 82.3%) and 53.3% (40.7-65.4, I 2 49.4%), respectively. The certainty of evidence was very low for sensitivity due to bias, inconsistency, and imprecision, and low for poor specificity due to bias and inconsistency. LIT showed a pooled sensitivity of 81% (70.3–88.3, I 2 = 27.6%) and specificity of 42.8% (20.8–68.1, I 2 = 60.8%), with moderate certainty for sensitivity and poor specificity. Conclusion Both CSF-TT and LIT demonstrate only modest accuracy in predicting shunt outcomes. The pooled specificity of the CSF-TT is similar to a coin toss, limiting its standalone clinical utility. The moderate certainty regarding the poor specificity of the LIT highlights the need for improved prognostic models. These tests should be interpreted in conjunction with other clinical and imaging findings, rather than in isolation. We need standardized, high-quality studies to better define their diagnostic value and support shared decision-making. Background Normal pressure hydrocephalus (NPH) was first described by Hakim and Adams in 1965. It is characterized by the triad of gait disorder, cognitive impairment, and urinary incontinence. The prevalence of possible idiopathic NPH (iNPH) ranges from 10/100,000 to 29/100,000, with age-specific rates varying from 3.3/100,000 in individuals aged 50-59 to 5.9% in those aged 80 years or older. 1 In the right clinical setting, neuroimaging is done to demonstrate non-obstructive hydrocephalus. Several imaging features, such as the callosal angle, Evan’s index, and disproportionately enlarged subarachnoid spaces (DESH), have been described, which help identify patients for ancillary testing. Suspected patients undergo a CSF tap test (CSF-TT) and/or lumbar infusion test (LIT), with serial monitoring of gait scores and cognitive function. Improvement in symptoms is considered a harbinger of successful symptom control following CSF shunting, such as ventriculoperitoneal (VP) or lumbo-peritoneal shunting. External lumbar drainage (ELD) is another minimally invasive method for CSF diversion; however, since it requires hospital admission and carries some risks (e.g., infection), it is not commonly performed in the preoperative phase. Guidelines suggest that these tests have higher specificity and lower sensitivity in predicting outcomes, with conflicting opinions regarding their predictive power and validity. 2 While some guidelines acknowledge that the CSF-TT has poor sensitivity, 3 , 4 they claim that its specificity is quite high, suggesting that in those with a positive tap test, the likelihood of clinical improvement following the surgery is high. 4 The Japanese guidelines advise clinicians that even if ancillary tests are negative, patients may still be referred for shunt surgery if other criteria, such as clinical and radiological features, are consistent with the diagnosis. Despite these recommendations, there is a wide variation in clinical practice, and several institutions, such as the UK-NHS 5 and the Canadian Health System 6 use the CSF-TT results to decide who undergoes a definitive surgical procedure. The optimal cutoffs and timing of assessment following the CSF-TT to predict improvement after shunting are not known. 7 , 8 Despite variable endorsement in international guidelines, evidence supporting standardized protocols for these tests remains limited. Since there is clinical equipoise regarding the utility of these tests, this systematic review aims to synthesize the diagnostic accuracy of CSF-TT and LIT for predicting shunt response in iNPH, with a focus on methodological heterogeneity and risk of bias across studies Methods We conducted a systematic review and meta-analysis to investigate the sensitivity and specificity of CSF-TT and LIT in predicting outcomes following shunt surgery in patients with iNPH. The protocol for this review was registered in PROSPERO, with the registration number CRD42023454502. We adhered to the PRISMA guidelines in reporting this systematic review. We included studies of patients with probable or possible NPH, defined according to clearly described clinico-radiologic criteria in the individual studies, who underwent diagnostic testing with a large-volume CSF-TT or LIT, followed by VP or lumboperitoneal shunt surgery, regardless of the results of these preoperative ancillary tests. These studies had reported clinico-radiological details and/or functional outcome of patients after surgery and were published in the English language. To avoid circular reasoning, we excluded studies that reported results of surgery in patients who were operated on based on the results of predictive tests (i.e., CSF-TT or LIT) and those that had incomplete clinico-radiological details or lacked objectively quantified outcomes following surgery. We developed a search strategy that included keywords and various terms to cover all aspects of iNPH. The search strategy was approved by all authors, following several iterations to finalize the approach (Appendix 1). We searched MEDLINE (Ovid), Embase, Cochrane CENTRAL, and conducted a PubMed search for studies not yet indexed in MEDLINE on October 22, 2025. We also searched the reference lists of all included studies and relevant systematic reviews for additional references, as well as Scopus databases for studies that fulfilled the inclusion criteria listed above. All studies obtained through electronic search were stored in Zotero reference management software (version 6.0.36, Corporation for Digital Scholarship, Virginia, USA) and then imported into Covidence systematic review software (Veritas Health Innovation, Melbourne, Australia), where the references were de-duplicated, screened, full text review, data extraction, and quality assessment were done. A pair of reviewers (AA and AE) independently screened titles and abstracts, reviewed the full texts of potentially eligible studies to determine the final eligible studies, and abstracted data. We abstracted the last name of the first author, year of publication, country, and hospital, as well as population, interventions, and outcomes, into prespecified abstraction forms. Disagreements between the reviewers were resolved by discussion or by a third reviewer (SD). Risk of bias assessment QUADAS-2 (Quality Assessment of Diagnostic Accuracy Studies 2) framework 9 was employed to assess the risk of bias. It was explored across four domains: patient selection (3 questions), index test (2 questions), reference standard (2 questions), and flow and timing (4 questions), which were answered as “yes,” “no,” or “unclear” based on the data reported in the individual studies. The pair of reviewers (AA and AE) assessed the risk of bias based on the answers to the questions as follows: low risk if all answers were ‘yes’; unclear risk of bias if at least one question was answered as ‘unclear’; and high risk of bias if at least one question was answered as ‘no.’ For the first three domains, the applicability concerns were assessed as low risk, unknown risk, or high risk if the population, index test, or reference standard did not align with the review question. Disagreements between the reviewers were resolved through discussion and, in some cases, by the third reviewer (SD). Thresholds for index test positivity and Definitions for the reference standard The thresholds and definitions used in the individual studies were used for the purpose of analyzing the diagnostic accuracy. Statistical analysis Sensitivity and specificity analyses were performed by extracting the number of patients with and without a response to the CSF-TT or LIT, as well as the number of patients who improved and those who did not improve following surgery. We conducted a meta-analysis of diagnostic test accuracy using the metadta command in Stata version 17.0 (StataCorp LLC, College Station, TX, USA). This command implements a bivariate random-effects model, which jointly analyzes sensitivity and specificity while accounting for their correlation and between-study variability. 10 True positives (TP), false positives (FP), false negatives (FN), and true negatives (TN) were extracted from each study to estimate pooled sensitivity, specificity, diagnostic odds ratio, and construct the summary receiver operating characteristic (SROC) curve. The τ 2 statistic and I 2 were used to assess heterogeneity. No prespecified sensitivity analysis was planned regarding risk of bias assessment. A post hoc sensitivity analysis was performed by excluding studies that used different cutoffs for test positivity. Certainty of Evidence We used the GRADE (Grading of Recommendations Assessment, Development and Evaluation) approach to assess the certainty of evidence for each primary outcome, considering factors such as risk of bias, inconsistency, indirectness, imprecision, and publication bias. To address grading inconsistency and imprecision, we employed a diagnostic threshold of 70% for both sensitivity and specificity, considering the clinical usefulness of the test. Results The electronic search identified 1762 studies, and 452 duplicates were removed. One thousand three hundred and ten articles were screened for eligibility as per our inclusion and exclusion criteria, and 84 studies were selected for full-text review. We extracted data from 14 studies that reported the results of the index tests and reference standards as per the review question ( Figure 1 ). Nine studies reported the diagnostic accuracy of the CSF-TT, and six studies reported the diagnostic accuracy of LITs in NPH. Download figure Open in new tab Figure 1. PRISMA flow diagram of study selection process CSF tap test and post-shunt outcomes We identified nine cohort studies, 11 – 19 , comprising three prospective, four retrospective, and two with an unknown direction of data collection. A total of 697 patients were included. The characteristics of these studies have been described in detail in Supplementary Table 1. The mean ages of the patients ranged from 67 to 78.8 years. Gait dysfunction varied from 84 and 100%, urinary dysfunction between 67 and 82% and cognitive dysfunction between 72 and 84%. The duration of symptoms ranged from one month to 11 years in five studies; however, in four studies, the duration of symptoms was not reported. The radiologic findings, selection criteria for shunting, and the duration from evaluation to the therapeutic procedure varied significantly among the different studies, as shown in Table 1 . View this table: View inline View popup Table 1: CSF tap test: Technique, Assessment, tap responsiveness, shunt responsiveness, and diagnostic accuracy The clinical protocols applied for the tap test varied widely, and the clinical tests used were different and have been described in detail in Table 1 . The volume of CSF tapped during the tap test ranged from 30 mL (Ishikawa et al. 12 , Yamada et al. 14 , Liu et al. 15 and Kameda et al. 17 ) to 50 mL (Wikkelsö et al. 11 , Wikkelsö et al. 13 , and van Bilsen et al. 18 ). Improvements were evaluated as early as 2 hours after the tap (Wikkelsö et al.) 11 to as late as 1 week for cognitive and urinary symptoms (Ishikawa et al. 12 ; Yamada et al. 14 ). Liu et al. 15 performed serial assessments at 8, 24, and 72 hours. Several different clinical assessment tools 10-meter walking test, Bingley’s memory test, reaction time test, MMSE, timed 3-m timed up-and-go test (TUG) and 3-m reciprocating walking test (RWT), iNPH grading system (iNPHGS), Grooved Pegboard (Lafayette Instrument Co Lafayette, IN), Stroop Test, Symbol-Digit Modalities test, and Trail Making Test A were used across various studies. Van Bilsen et al. 18 did not report the criteria for a positive tap test, though they reported the number of patients with a positive tap test. The timing of shunt surgery after tap tests was either not mentioned or variable between the included studies. Similar heterogeneities existed in the criteria used to define improvement and the timing of assessing post-surgical outcomes ( Table 1 ). The CSF-TT was not employed in the decision-making process in any of these studies. We found the pooled sensitivity of the CSF-TT to be 67.5% (95% CI 52.2-79.8) with a substantial between-study variance and heterogeneity (τ 2 0.82, I 2 82.2%) and the pooled specificity to be 53.3% (40.7-65.5) with a moderate between-study variance and heterogeneity (τ 2 0.39, I 2 49.4%), ( Figure 2 ) as also depicted by the (sROC) summary receiver operator characteristic curve (Supplementary Figure 1). The correlation between the logit-transformed sensitivity and specificity was -1, implying a perfect negative correlation, which is quite extreme and may indicate a strong threshold effect. This suggests that studies with higher sensitivity tend to have lower specificity, and vice versa. Download figure Open in new tab Figure 2. Forest plot showing pooled sensitivity and specificity of the CSF tap test (CSF-TT) Lumbar infusion tests and post-shunt outcomes Six studies with 393 patients were included. 13 , 20 – 24 Three studies were prospective cohort studies, two were retrospective cohort studies, and one had an unknown direction of data collection. The details of these studies are described in Table 2 and Supplementary Table 2. The mean ages ranged from 55 to 87 years. Only one study (Sorteberg et al). 22 reported the prevalence of clinical symptoms, with gait dysfunction observed in 94% and urinary dysfunction in 88.2% of patients. The remaining studies did not provide data on the proportion of individuals with gait, urinary, or cognitive impairment. Similar to the studies reporting CSF-TT, the protocols varied widely among studies. View this table: View inline View popup Download powerpoint Table 2: Lumbar Infusion Test: Technique, Assessment, Shunt responsiveness, and diagnostic accuracy Takeuchi et al. 21 employed epidural pressure monitoring with a 10-15 mL bolus saline injection to calculate Resistance to Outflow (R out ). Sorteberg et al. 22 involved prolonged (24-hour) continuous intracranial pressure (ICP) monitoring, followed by a constant-rate infusion test. Eide et al. used a standard infusion rate of 1.5 mL/min of Ringer’s solution, calculating the R out from the opening pressure and plateau pressure. Hasselbalch et al. utilized an automated CELDA System (Likvor AB, Umeå, Sweden) with two needles for infusion and pressure recording, focusing on R out , Pulse Amplitude (PA), and PA/ICP ratio. Wikkelsö et al. 13 performed a baseline pressure measurement, CSF drainage, and saline infusion to return to baseline, followed by a constant infusion or constant pressure method for R out calculation. R out cutoffs selected also differed significantly. Boon et al. 20 reported the diagnostic accuracy measures at various cutoffs. Takeuchi et al. used >20 mmHg/mL/min 21 ; Sorteberg et al. 22 Eide et al. 23 ,, and Wikkelso et al. 13 used a a cutoff >12 mmHg/ml/min. Wikkelsö et al. 13 also explored R out >18 and >8. Hasselbalch et al. 24 used R out percentiles (11, 15.7, 18.9) for predictive power testing without a single fixed threshold for positivity. The presence of pathological pressure waves (Takeuchi et al. 21 ) and other parameters, such as PA and the PA/ICP ratio (Hasselbalch et al. 24 ), was also considered. Eide et al. 23 performed the shunt surgery 2-3 weeks after the LIT, and the other five studies did not report the time interval between the index test and the reference standard. Similar to the studies on the CSF-TT, the criteria for improvement following shunt surgery varied in five studies. Sorteberg et al. reported the number of patients who improved following surgery according to some criteria, which were not explicitly mentioned. 22 The timing of outcome assessments had similar shortcomings. The pooled sensitivity of LIT in predicting clinical improvement following shunt surgery was 80.9% (70.3-88.3) with a low between-study variance and heterogeneity (τ 2 = 0.13, I 2 27.6%), and specificity was 42.8% (20.8-68.1) with substantial and heterogeneity (τ 2 = 1.21, I 2 60.8%)( Figure 3 ) as also depicted in the sROC curve (Supplementary Figure 2) The correlation between sensitivity and specificity was 0.69, implying a positive correlation between sensitivity and specificity across studies. This could reflect low between-study variance. Download figure Open in new tab Figure 3. Forest plot showing pooled sensitivity and specificity of the lumbar infusion test (LIT) Sensitivity analysis Since Takeuchi et al. used a cutoff of Rout ≥20, we performed a sensitivity analysis by excluding this study, as the rest of the studies used a cutoff of ≥12, and Hasselbalch et al. used a cutoff of ≥11. The pooled sensitivity was 77.5% (95% CI 70.2-83.4) with very low heterogeneity (τ 2 = 0.05 and I 2 = 14.7%) and specificity was 28.3% (95% CI 19-39.9) with very low heterogeneity (τ 2 = 0.03 and I 2 = 4.6%). The sensitivity analysis also showed a correlation of -1, suggesting threshold effects. Risk of bias The studies demonstrated various sources of bias, particularly in patient selection, the conduct and reporting of the index tests and reference standards, and in patient flow, as detailed in Table 3 and Supplementary Table 3. Although none of the studies employed a case-control methodology, and most used consecutive series of patients, seven studies had a high risk of bias, and two studies had an unclear risk of bias due to inappropriate patient exclusion, raising concerns about the applicability of the results. Applicability concerns were also identified in several studies, notably Takeuchi et al. 21 , which included only patients with atypical iNPH, Wikkelsö et al. 11 included patients with secondary NPH (post-subarachnoid hemorrhage, head trauma), and Hasselbalch et al. 24 had unclear applicability concerns because it was unclear if all clinico-radiologically diagnosed patients were selected for study enrollment. View this table: View inline View popup Download powerpoint Table 3: Risk of bias summary: QUADAS 2 With the index tests, namely CSF-TT and LIT, most studies were reported without knowledge of the reference standard; however, some studies did not report or have pre-specified thresholds, leading to an unknown or high risk of bias. For the reference standard, some studies did not report the criteria for assessing improvement following shunt surgery, thereby making it difficult to determine if the target condition, in this case, iNPH, was correctly identified. Most studies did not report whether the reference standard was interpreted without knowledge of the index test, leading to an unclear risk of bias. Under the domain flow and timing, only Eide et al. 23 , Ishikawa et al. 12 , and van Bilsen et al. 18 met the criteria for a low risk of bias. Eight studies had a high risk of bias as they did not include all the patients in the analysis ( Table 3 ). Summary of Certainty of Evidence CSF Tap Test The certainty of evidence was very low for sensitivity, downgraded for risk of bias, inconsistency, and imprecision. Based on low-certainty evidence, downgraded for bias and inconsistency, the CSF tap test shows a pooled specificity below 70% (95% CI entirely below 70%) for predicting response to shunt surgery, suggesting that a considerable proportion of non-responders may be incorrectly classified as likely responders. (Supplementary Table 4) Lumbar Infusion Test The certainty of evidence was moderate for the sensitivity of the LIT to predict response to shunt surgery, downgraded due to serious risk of bias. Additionally, there is moderate certainty evidence, downgraded for bias, the LIT shows a poor specificity (95% CI entirely below 70%) for predicting response to shunt surgery, suggesting that a considerable proportion of non-responders may be incorrectly classified as likely responders. (Supplementary Table 5) Discussion Both the CSF-TT and LIT are widely used diagnostic tools in the evaluation of iNPH. However, there is significant heterogeneity in the protocol for performing these tests, as identified by our systematic review. We also found significant clinical heterogeneity in the timing of assessment following the CSF-TT, type of gait, urinary, or cognitive assessment tools used, cutoffs, and criteria used to assess CSF-TT responsiveness or LIT positivity. Similarly, we found significant differences in the timing of shunt surgery after the index test, the timing of postoperative outcome following the shunt surgery, and the criteria used to define shunt responsiveness. Several studies made no explicit mention of these case definitions, making it difficult, if not impossible, to determine how the diagnosis was established. The diagnosis of ‘definite’ NPH is made based on response to shunt surgery. 3 This is somewhat counterintuitive, as the diagnosis, according to this definition, would be made after the treatment. In medicine, we prefer to arrive at a diagnosis prior to offering an invasive surgical procedure as therapy. However, the lack of standardization of diagnostic tests currently impedes the assessment of diagnostic accuracy in NPH. CSF-TT is generally considered less invasive and simpler to perform. It also gives clinicians a sense that, since it mimics CSF drainage by shunt surgery, it intuitively makes sense that only patients with a positive tap test should undergo definitive surgery, thereby making it a common initial screening tool for referring patients for surgical treatment. However, the currently available literature does not support this notion. The guidelines, therefore, state that, regardless of the tap test results, patients may be referred for surgery, as is the practice in Japan. In line with these recommendations, we found that the sensitivity of the CSF-TT was 67.5% (95% CI 52-80%), meaning that several patients who were CSF-TT non-responders still responded to VP shunting. In contrast, although the LIT is a more invasive procedure, it is considered to provide quantitative physiological data on CSF dynamics, such as R out and ICP PA. While some studies report exceptionally high accuracy (e.g., Takeuchi et al, 21 reported 91.6% sensitivity and 92.3% specificity using a R out cutoff ≥20 mmHg/ml/min), others show concerningly low specificity (e.g., 22% specificity with a cutoff ≥11 in Hasselbalch et al, 24 ), indicating a potential risk of false positives. The lack of consensus on pathological R out values and the influence of methodological variations on measured parameters remain significant limitations. These variations in infusion methods, pressure monitoring techniques, and the specific parameters measured make it difficult to compare these studies uniformly. Both the CSF-TT and LIT were evaluated using the GRADE approach to assess the certainty of evidence for diagnostic accuracy in predicting response to shunt surgery in patients with probable iNPH. The CSF-TT demonstrated a sensitivity of 67.5% (95% CI: 52–80); however, the certainty of evidence was rated very low. This was due to a serious risk of bias in the included studies, a very serious inconsistency arising from statistical heterogeneity (evidenced by a high τ 2 and I 2 ), and serious imprecision, as the lower bound of the confidence interval crossed the threshold of diagnostic utility of 70%. In contrast, there is low-certainty evidence that the specificity of CSF-TT was poor at 53% (95% CI: 40.7–65.5), due to a serious risk of bias and serious inconsistency. Similarly, the LIT showed a high pooled sensitivity of 80.9% (95% CI: 70.3-88.3). The certainty of evidence was rated as moderate, downgraded due to a serious risk of bias resulting from methodological limitations across the studies. The specificity of LIT was poor at 42.8% (95% CI: 20.8-68.1), with moderate certainty of evidence, downgraded due to a serious risk of bias. We did not downgrade for inconsistency because Takeuchi et al. used a different cutoff, and the sensitivity analysis excluding this study revealed no significant heterogeneity. These findings underscore that while CSF-TT and LIT may help identify patients who could benefit from shunt surgery, their limited specificity and very low certainty of evidence caution against their use as standalone diagnostic tools. There remains a critical need for higher-quality studies with standardized protocols and blinded assessments to better define their diagnostic value. The inconsistent performance of both tests, particularly when relying on clinical outcome measures or fixed cutoffs, strongly suggests that NPH shunt responsiveness is a complex, multifactorial phenomenon that is unlikely to be captured by a single, simple test. If multiple underlying pathophysiological mechanisms influence a complex medical condition, it is highly improbable that a single diagnostic marker will be perfectly predictive of treatment response. This reinforces the need for a standardized multi-modal assessment approach that integrates various clinical, radiological, and physiological parameters. Our review has a few limitations that warrant consideration. We did not study ELD, which is used in several settings, albeit infrequently. Many of the included studies were retrospective in design and involved heterogeneous patient populations. In some cases, outcome reporting was incomplete, and patients with surgical complications — such as postoperative bleeding — were excluded from final analyses, which may influence the interpretation of results. Additionally, there was variation in the protocols used for index tests and outcome assessment, reflecting the lack of standardized approaches at the time. Several of these studies were conducted decades ago when reporting standards were still evolving, and this is reflected in the overall methodological quality. As a result, concerns regarding the risk of bias and applicability were raised in several studies. Therefore, the findings on diagnostic accuracy should be interpreted with these limitations in mind. We did not perform a sensitivity analysis because there were very few studies without a significant risk of bias, and thus, it would not make sense to exclude those with a high risk of bias. Heterogeneity of studies is another major issue. Pooled estimates are derived from highly heterogeneous protocols. Thresholds for test positivity, timing of outcome assessment, and definition of ‘shunt response’ varied significantly, limiting the external validity and generalizability of the results. We did not assess for publication bias, and funnel plots were not generated due to the limited number of studies per test; therefore, small-study effects could not be ruled out. Conclusion The role of the CSF-TT and LIT in predicting outcomes following shunt surgery for NPH appears to be modest at best. In our analysis, both tests demonstrated low pooled specificity — 53.3% for CSF-TT and 42.8% for the LIT, indicating a limited ability to accurately distinguish between true non-responders and those who may benefit from surgery as standalone tools. In practical terms, this level of diagnostic performance of the CSF-TT is comparable to the randomness of a coin toss, raising concerns about the reliability of the test when used in isolation to guide clinical decisions. On the other hand, there is moderate-quality evidence to suggest that the LIT has poor specificity, although the included studies suffered from a significant risk of bias. Given these findings, clinicians should exercise caution when using the results of these tests to prognosticate surgical outcomes. Relying solely on the CSF-TT may lead to both over- and under-treatment, especially since only half of patients with positive test results have good outcomes following surgery, and up to 32% of those with negative test results have favorable outcomes. The test results of the tap test should not be interpreted in isolation, and management paradigms should involve looking holistically at the clinical picture, imaging, and also the patients’ values and preferences, given that current tests are not sufficiently valid to predict postoperative outcomes. Prospective studies using standardized protocols with clearly defined outcome parameters following shunt surgery are needed. Moving forward, there is a clear need to develop and validate more accurate and comprehensive predictive models that integrate clinical, radiological, and perhaps biomarker data. Machine learning models incorporating clinical, imaging, and biomarker data should be explored. These will ensure better predictive models for post-shunt recovery in iNPH. Such models could not only improve patient selection for shunt surgery but also enhance the process of shared decision-making by providing patients and families with clearer, evidence-based expectations regarding treatment outcomes. Data Availability none generated Legends Supplementary Figure 1. Summary receiver operating characteristic (SROC) curve for CSF-TT Supplementary Figure 2. Summary receiver operating characteristic (SROC) curve for LIT Supplementary Table 1. Characteristics of included studies reporting the diagnostic accuracy of the CSF-TT Supplementary Table 2. Characteristics of included studies reporting the diagnostic accuracy of the LIT Supplementary Table 3. Methodological quality assessment using QUADAS-2 Supplementary Table 4. GRADE summary of findings table for the CSF-TT Supplementary Table 5. GRADE summary of findings table for the LIT Appendix 1: Search strategy Conflicts of interest None Funding No funding received Patients or the public WERE NOT involved in the design, conduct, reporting, or dissemination plans of our research. References 1. ↵ Zaccaria V , Bacigalupo I , Gervasi G , et al. A systematic review on the epidemiology of normal pressure hydrocephalus . Acta Neurol Scand . 2020 ; 141 ( 2 ): 101 – 114 . doi: 10.1111/ane.13182 OpenUrl CrossRef PubMed 2. ↵ Mihalj M , Dolić K , Kolić K , Ledenko V. CSF tap test - Obsolete or appropriate test for predicting shunt responsiveness? A systematic review . J Neurol Sci . 2016 ; 362 : 78 – 84 . doi: 10.1016/j.jns.2016.01.028 OpenUrl CrossRef PubMed 3. ↵ Nakajima M , Yamada S , Miyajima M , et al. Guidelines for Management of Idiopathic Normal Pressure Hydrocephalus (Third Edition): Endorsed by the Japanese Society of Normal Pressure Hydrocephalus . Neurol Med Chir (Tokyo) . 2021 ; 61 ( 2 ): 63 – 97 . doi: 10.2176/nmc.st.2020-0292 OpenUrl CrossRef PubMed 4. ↵ Halperin JJ , Kurlan R , Schwalb JM , Cusimano MD , Gronseth G , Gloss D. Practice guideline: Idiopathic normal pressure hydrocephalus: Response to shunting and predictors of response . Neurology . 2015 ; 85 ( 23 ): 2063 – 2071 . doi: 10.1212/WNL.0000000000002193 OpenUrl CrossRef PubMed 5. ↵ Carswell C. Idiopathic normal pressure hydrocephalus: historical context and a contemporary guide . Pract Neurol . 2023 ; 23 ( 1 ): 15 – 22 . doi: 10.1136/pn-2021-003291 OpenUrl Abstract / FREE Full Text 6. ↵ Hu T , Lee Y. Idiopathic normal-pressure hydrocephalus . CMAJ . 2019 ; 191 ( 1 ): E15 – E15 . doi: 10.1503/cmaj.180877 OpenUrl FREE Full Text 7. ↵ Fasano A , Espay AJ , Tang-Wai DF , Wikkelsö C , Krauss JK . Gaps, Controversies, and Proposed Roadmap for Research in Normal Pressure Hydrocephalus . Mov Disord Off J Mov Disord Soc . 2020 ; 35 ( 11 ): 1945 – 1954 . doi: 10.1002/mds.28251 OpenUrl CrossRef PubMed 8. ↵ Bluett B , Acosta LM , Ash E , et al. Standardizing the large-volume “tap test” for evaluating idiopathic normal pressure hydrocephalus: a systematic review . J Neurosurg Sci . 2025 ; 69 ( 1 ): 46 – 63 . doi: 10.23736/S0390-5616.24.06368-9 OpenUrl CrossRef PubMed 9. ↵ Whiting PF , Rutjes AWS , Westwood ME , et al. QUADAS-2: A Revised Tool for the Quality Assessment of Diagnostic Accuracy Studies . Ann Intern Med . 2011 ; 155 ( 8 ): 529 – 536 . doi: 10.7326/0003-4819-155-8-201110180-00009 OpenUrl CrossRef PubMed Web of Science 10. ↵ Reitsma JB , Glas AS , Rutjes AWS , Scholten RJPM , Bossuyt PM , Zwinderman AH . Bivariate analysis of sensitivity and specificity produces informative summary measures in diagnostic reviews . J Clin Epidemiol . 2005 ; 58 ( 10 ): 982 – 990 . doi: 10.1016/j.jclinepi.2005.02.022 OpenUrl CrossRef PubMed Web of Science 11. ↵ Wikkelsö C , Andersson H , Blomstrand C. Normal pressure hydrocephalus. Predictive value of the cerebrospinal fluid tap-test . Acta Neurol Scand . 1986 ; 73 ( 6 ): 566 – 573 . OpenUrl PubMed Web of Science 12. ↵ Ishikawa M , Hashimoto M , Mori E , Kuwana N , Kazui H. The value of the cerebrospinal fluid tap test for predicting shunt effectiveness in idiopathic normal pressure hydrocephalus . Fluids Barriers CNS . 2012 ; 9 ( 1 ). doi: 10.1186/2045-8118-9-1 OpenUrl CrossRef PubMed 13. ↵ Wikkelsø C , Hellström P , Klinge PM , Tans JTJ . The European iNPH Multicentre Study on the predictive values of resistance to CSF outflow and the CSF Tap Test in patients with idiopathic normal pressure hydrocephalus . J Neurol Neurosurg Psychiatry . 2013 ; 84 ( 5 ): 562 – 568 . doi: 10.1136/jnnp-2012-303314 OpenUrl Abstract / FREE Full Text 14. ↵ Yamada S , Ishikawa M , Miyajima M , et al. Disease duration: the key to accurate CSF tap test in iNPH . Acta Neurol Scand . 2017 ; 135 ( 2 ): 189 – 196 . doi: 10.1111/ane.12580 OpenUrl CrossRef PubMed 15. ↵ Liu C , Dong L , Li J , et al. A pilot study of multiple time points and multidomain assessment in cerebrospinal fluid tap test for patients with idiopathic normal pressure hydrocephalus . Clin Neurol Neurosurg . 2021 ; 210 ( Liu C. ; Dong L. ; Li J. ; Huang X. ; Wang J. ; Lei D. ; Mao C. ; Cui L. ; Gao J. , gj107{at}163.com ) Department of Neurology, State Key Laboratory of Complex Severe and Rare Disease, Peking Union Medical College Hospital, Chinese Academy of Medical Sciences, Beijin) . doi: 10.1016/j.clineuro.2021.107012 OpenUrl CrossRef 16. Rydja J , Eleftheriou A , Lundin F. Evaluating the cerebrospinal fluid tap test with the Hellström iNPH scale for patients with idiopathic normal pressure hydrocephalus . Fluids Barriers CNS . 2021 ; 18 ( 1 ). doi: 10.1186/s12987-021-00252-5 OpenUrl CrossRef PubMed 17. ↵ Kameda M , Kajimoto Y , Kambara A , et al. Evaluation of the Effectiveness of the Tap Test by Combining the Use of Functional Gait Assessment and Global Rating of Change . Front Neurol . 2022 ; 13 (( Kameda M. , mrkameda{at}gmail.com ; Kajimoto Y. , yoshinaga.kajimoto{at}ompu.ac.jp ; Kambara A. ; Tsujino K. ; Yamada H. ; Takagi F. ; Fukuo Y. ; Kosaka T. ; Kanemitsu T. ; Katayama Y. ; Tsuji Y. ; Yagi R. ; Hiramatsu R. ; Ikeda N. ; Nonoguchi N. ; Furuse M. ; Kawabata S. ; Taka). doi: 10.3389/fneur.2022.846429 OpenUrl CrossRef 18. ↵ van Bilsen MWT , van den Abbeele L , Volovici V , Boogaarts HD , Bartels RHMA , van Lindert EJ . The diagnostic value of the pulsatility curve to predict shunt responsiveness in patients with idiopathic normal pressure hydrocephalus . Acta Neurochir (Wien) . 2022 ; 164 ( 7 ): 1747 – 1754 . doi: 10.1007/s00701-022-05233-7 OpenUrl CrossRef PubMed 19. ↵ Gao W , Liu W , Ying Y , et al. Preoperative imaging biomarkers combined with tap test for predicting shunt surgery outcome in idiopathic normal pressure hydrocephalus: a multicenter retrospective study . Front Aging Neurosci . 2025 ; 17 . doi: 10.3389/fnagi.2025.1509493 OpenUrl CrossRef 20. ↵ Boon AJW , Tans JTJ , Delwel EJ , et al. Dutch Normal-Pressure Hydrocephalus Study: prediction of outcome after shunting by resistance to outflow of cerebrospinal fluid . J Neurosurg . 1997 ; 87 ( 5 ): 687 – 693 . doi: 10.3171/jns.1997.87.5.0687 OpenUrl CrossRef PubMed Web of Science 21. ↵ Takeuchi T , Kasahara E , Iwasaki M , Mima T , Mori K. Indications for Shunting in Patients with Idiopathic Normal Pressure Hydrocephalus Presenting with Dementia and Brain Atrophy (Atypical Idiopathic Normal Pressure Hydrocephalus) . Neurol Med Chir (Tokyo) . 2000 ; 40 ( 1 ): 38 – 47 . doi: 10.2176/nmc.40.38 OpenUrl CrossRef PubMed 22. ↵ Sorteberg A , Eide PK , Fremming AD . A prospective study on the clinical effect of surgical treatment of normal pressure hydrocephalus: The value of hydrodynamic evaluation . Br J Neurosurg . 2004 ; 18 ( 2 ): 149 – 157 . doi: 10.1080/02688690410001681000 OpenUrl CrossRef PubMed 23. ↵ Eide PK , Sorteberg W. Preoperative spinal hydrodynamics versus clinical change 1 year after shunt treatment in idiopathic normal pressure hydrocephalus patients . Br J Neurosurg . 2005 ; 19 ( 6 ): 475 – 483 . doi: 10.1080/02688690500495125 OpenUrl CrossRef PubMed 24. ↵ Hasselbalch SG , Carlsen JF , Alaouie MM , et al. Prediction of shunt response in idiopathic normal pressure hydrocephalus by combined lumbar infusion test and preoperative imaging scoring . Eur J Neurol . 2023 ;(( Hasselbalch S.G. , steen.gregers.hasselbalch{at}regionh.dk ; Alaouie M.M. ; Taudorf S. ; Rørvig-Løppentien C. ; Waldemar G. ) Department of Neurology, Danish Dementia Research Centre, Copenhagen University Hospital—Rigshospitalet, Copenhagen, Denmark(Hasselbalch ). doi: 10.1111/ene.15981 OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted November 10, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Accuracy of CSF Tap Test and Lumbar Infusion Test in Predicting Shunt Response in Idiopathic Normal Pressure Hydrocephalus: A Systematic Review and Meta-Analysis Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Accuracy of CSF Tap Test and Lumbar Infusion Test in Predicting Shunt Response in Idiopathic Normal Pressure Hydrocephalus: A Systematic Review and Meta-Analysis Aminu Aliyar , Deepa Dash , Alfonso Fasano , Manya Prasad , Aparna Wagle Shukla , Ashish Dutt Upadhyay , Soaham Desai , Sagar Poudel , Pramod Kumar Pal , Arunmozhimaran Elavarasi medRxiv 2025.11.09.25339846; doi: https://doi.org/10.1101/2025.11.09.25339846 Share This Article: Copy Citation Tools Accuracy of CSF Tap Test and Lumbar Infusion Test in Predicting Shunt Response in Idiopathic Normal Pressure Hydrocephalus: A Systematic Review and Meta-Analysis Aminu Aliyar , Deepa Dash , Alfonso Fasano , Manya Prasad , Aparna Wagle Shukla , Ashish Dutt Upadhyay , Soaham Desai , Sagar Poudel , Pramod Kumar Pal , Arunmozhimaran Elavarasi medRxiv 2025.11.09.25339846; doi: https://doi.org/10.1101/2025.11.09.25339846 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Neurology Subject Areas All Articles Addiction Medicine (569) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4442) Dentistry and Oral Medicine (444) Dermatology (383) Emergency Medicine (609) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1511) Epidemiology (15230) Forensic Medicine (30) Gastroenterology (1126) Genetic and Genomic Medicine (6610) Geriatric Medicine (668) Health Economics (998) Health Informatics (4542) Health Policy (1370) Health Systems and Quality Improvement (1613) Hematology (543) HIV/AIDS (1266) Infectious Diseases (except HIV/AIDS) (15923) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (147) Nephrology (668) Neurology (6607) Nursing (346) Nutrition (999) Obstetrics and Gynecology (1146) Occupational and Environmental Health (957) Oncology (3337) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (664) Pediatrics (1693) Pharmacology and Therapeutics (692) Primary Care Research (712) Psychiatry and Clinical Psychology (5448) Public and Global Health (9238) Radiology and Imaging (2202) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (596) Sexual and Reproductive Health (714) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a01c90b4b95af01f',t:'MTc3OTc5NjI1OA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00