CAPYBARA: A Generalizable Framework for Predicting Serological Measurements Across Human Cohorts

doi:10.1101/2025.07.07.25331040

CAPYBARA: A Generalizable Framework for Predicting Serological Measurements Across Human Cohorts

2025 · doi:10.1101/2025.07.07.25331040

preprint OA: closed CC-BY-4.0

📄 Open PDF Full text JSON View at publisher

Full text 57,390 characters · extracted from preprint-html · click to expand

CAPYBARA: A Generalizable Framework for Predicting Serological Measurements Across Human Cohorts | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search CAPYBARA: A Generalizable Framework for Predicting Serological Measurements Across Human Cohorts Sierra Orsinelli-Rivers , Daniel Beaglehole , Tal Einav doi: https://doi.org/10.1101/2025.07.07.25331040 Sierra Orsinelli-Rivers 1 Center for Vaccine Innovation, La Jolla Institute for Immunology , La Jolla, CA 92037, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Daniel Beaglehole 2 Computer Science and Engineering, University of California San Diego , La Jolla, CA 92093, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tal Einav 1 Center for Vaccine Innovation, La Jolla Institute for Immunology , La Jolla, CA 92037, USA 3 Department of Medicine, University of California San Diego , La Jolla, CA 92037, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: tal.einav{at}lji.org Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract The rapid growth of biological datasets presents an opportunity to leverage past studies to inform and predict outcomes in new experiments. A central challenge is to distinguish which serological patterns are universally conserved and which are specific to individual datasets. In the context of human serology studies, where antibody-virus interactions assess the strength and breadth of the antibody response and inform vaccine design, differences in cohort demographics or experimental design can markedly impact responses, yet few methods can translate these differences into the value±uncertainty of future measurements. Here, we introduce CAPYBARA, a data-driven framework that quantifies how serological relations map across datasets. As a case study, we applied CAPYBARA to 25 influenza datasets from 1997-2023 that measured vaccine or infection responses against multiple H3N2 variants using hemagglutination inhibition (HAI). To demonstrate how a subset of measurements in each study can infer the remaining data, we withheld all HAI measurements for each variant and accurately predicted them with a 2.0-fold mean absolute error—on par with experimental assay variability. Although studies with similar designs showed the best predictive power ( e . g ., children data are better predicted by children than adult data), predictions across age groups, between vaccination and infection studies, and across studies conducted <5 years apart showed comparable 2−3-fold accuracy. By analyzing feature importance in this interpretable model, we identified global cross-reactivity trends, enabling future serology or vaccine studies to infer broad serological responses from a small subset of measurements. Introduction As biological datasets continue to expand in size and complexity, it is becoming increasingly more challenging to integrate information from prior datasets to inform and predict the outcomes of future experiments. Patterns found in one group of individuals may not apply to another group where factors such as age, exposure history, or immune state differ. 1 - 6 More subtle, and often unknowable, differences in experimental methodology or batch effects may further affect which datasets can predict one another. While many studies have identified that cohorts differ in some way ( e . g ., children and adults show significantly different immune responses 7 - 11 ), we lack methods that estimate how these differences translate into future measurements. Such quantitative predictions are not only the hallmark of deeply understanding a system, but they also facilitate head-to-head comparisons across studies measuring different features. This work tackles this problem in the context of the antibody response against the rapidly evolving influenza virus, which underpins the annual vaccine selection process. 12 , 13 Specifically, we consider serum hemagglutination inhibition (HAI) against multiple influenza variants, where higher HAI titers correlate with greater protection. 14 - 16 While thousands of new variants (or strains) emerge each year, only a small fraction can be functionally characterized using HAI, and the variants measured often differ between studies. Critically, we still lack methods that take a person’s HAI titers against a few variants and infer their titers for other variants, which would quantify the holes in a population’s immunity that should be closed when the vaccine is next updated. Currently available HAI datasets have several direct clinical applications. Prior work has shown that a person’s HAI against multiple strains can infer their influenza exposure history 17 , 18 or help predict their response to future vaccines. 19 Serum-virus HAI titers have been shown to be inherently low dimensional, 20 , 21 where titers against some variants can infer the titers of other strains. 22 , 23 As such, a new study seeking to measure HAI against numerous variants could theoretically extract these cross-reactivity relations from existing datasets, measure a minimal number of variants, and then predict the HAI of the remaining strains. One key hurdle is that cross-reactivity relations may differ with age, influenza exposure, and other immune variables. As the number of prior studies continues to increase, it is unclear a priori which datasets will best predict the cross-reactivity relations in another study, nor what form those relations will take. To that end, we introduce the C ross-study A daptive P redictions, Y ielding B ayesian A ggregation with R ecursive A nalysis (CAPYBARA), a generalizable framework that efficiently selects the most predictive features within each dataset, determines their cross-reactivity relations, estimates prediction error, and then combines predictions in a new study by heavily weighing the most confident predictions. Figure 1 provides an overview of the CAPYBARA workflow, including the feature learning process, model training, error calibration, and Bayesian combination of predictions across datasets. Download figure Open in new tab Figure 1. Combining datasets using CAPYBARA to predict new antibody-virus interactions. (A) Given studies (… S j -1 , S j , S j +1 …) measuring serum HAI against a subset of influenza variants V 0 - V n , and study-of-interest S 0 measuring HAI against V 1 - V n , CAPYBARA predicts V 0 ’s measurements in S 0 . (B) CAPYBARA first identifies the most predictive variant subsets using Recursive Feature Machines (purple). Ridge regression is applied using those features, training on a subset of data in S j and cross-validating on the rest (error σ Internal ). This model predicts titer values μ j from S j → S 0 without uncertainty. (C) To estimate cross-study prediction error, every other variant is withheld and predicted from S j → S 0 to determine the internal ( σ Internal ) and cross-study ( σ External ) error. Combining the errors from every overlapping variant yields the transferability function f j that is applied to V 0 ’s σ Internal from Panel B to estimate the uncertainty σ j in S j . (D) Predictions from all studies are combined through a Bayesian approach to yield a consensus prediction for the study-of-interest ( S 0 ). A key innovation from previous methods is that this model combines state-of-the-art feature selection 24 and error estimation techniques 25 while leveraging ridge regression for greater interpretability. CAPYBARA identifies the most predictive virus variants using a Recursive Feature Machine (RFM) that equips general machine learning models with the ability to learn features from data. 24 The resulting features are used to train an additional ridge regression algorithm, with error estimation performed by predicting all overlapping data between each pair of studies. 25 The final model predictions are combined using Bayesian weighing. We demonstrate that this approach is robust and generalizable by applying CAPYBARA to 25 influenza HAI datasets, which to our knowledge comprises one of the largest compilations of influenza serology studies measuring multiple variants. In the context of influenza immunity, CAPYBARA addresses two essential questions: First, how accurately can we leverage prior studies to predict future antibody inhibition data? Second, how few measurements are needed in order to extrapolate all antibody-virus interactions for any set of variants? By predicting these interactions, CAPYBARA can not only expedite future experiments but also explicitly and unbiasedly quantify how differences in study populations, experimental conditions, and virus panels impact the magnitude, breadth, and resolution of the immune response. Results Overview of the Algorithm The CAPYBARA algorithm predicts the HAI titers of multiple sera against a withheld or unmeasured variant-of-interest V 0 in study-of-interest S 0 . As input, we assume that HAI titers from other variants V 1 , V 2 , V 3 … were measured for these same sera, and that other studies S 1 , S 2 … also measured HAI for V 0 and a subset of other variants ( Fig 1A ). The algorithm proceeds as follows: 1) In every other study S j ∈{ S 1 , S 2 …}, identify the most predictive subset of variants (features) that predict HAI titers for V 0 ( Fig 1B ). 2) Train a model in S j to predict each subject’s titer ( μ j ) for V 0 ( Fig 1B ). 3) Repeat step 2 on all other variants V 1 , V 2 , V 3 … whose values are known, so that within-study and cross-study error can be computed. This determines how the error relationship when predicting from S j to S 0 ( Fig 1C ), which is then applied to determine the uncertainty σ j for V 0 predictions in S j . 4) Combine predictions from all studies to estimate the HAI titer±error for each subject ( Fig 1D , Methods ). Prediction accuracy can only decrease as more datasets are included, although adding a very noisy dataset ( σ j →∞) will negligibly change predictions. To validate how well CAPYBARA predicted unmeasured serum-virus interactions across a compendium of influenza studies, we entirely withheld antibody responses from each variant within 20 vaccine studies and 5 longitudinal infection studies conducted between 1997-2023 ( Table 1 ). These studies covered a variety of vaccine types (inactivated, live attenuated), age groups (children and adults), and geographic regions, containing ∼200,000 HAI titers from 3,855 unique subjects ( Table 1 , Fig S1 ), Given this diversity, it was unclear a priori which datasets would be most informative to impute the HAI titers in any other study. View this table: View inline View popup Table 1. List of large-scale influenza studies used in this analysis. 25 influenza datasets comprising vaccine [Vac, white background] or infection studies [Inf, gray background] used to assess cross-study predictions. The year represents when each study was conducted ( e . g ., 2010-2014 implies that samples were collected annually across these 5 years). Sera collected at different time points from the same subject were considered independently. The total number of measurements in each study equals (# of sera)×(# of viruses)-(% Missing) denotes a missing antibody-virus HAI Antibody responses are predicted between infection and vaccination studies within experimental noise To test how well the HAI of new variants could be inferred across diverse biological contexts, we first examined how a longitudinal 6-year infection study (2007-2011 Fonv Inf ) predicted the overlapping variants in a vaccine study conducted six years later (2017 UGA Vac ), by which point subsequent infections or vaccinations could have dramatically altered HAI cross-reactivity. In total, N =4,336 titers were imputed in the vaccine study with root-mean-squared error (RMSE) σ Actual =3.1x (where “x” denotes fold-error) between the predicted and measured titers ( Fig 2A , blue), implying that a measured HAI=20 will typically be predicted as a titer between 20/3.1=6.5 and 20·3.1=62. The model’s estimated error σ Predict =7.7x represents an upper bound (worst case) error, and although this bound was not tight, it satisfied σ Actual ≲ σ Predict as expected. In contrast, when we predicted this same 2017 UGA Vac dataset using a vaccine study from one year earlier (2016 UGA Vac ), we found a smaller prediction error σ Actual =2.0x and a tighter estimated error σ Predict =2.3x when predicting these same N =4,336 titers ( Fig 2A , red). Download figure Open in new tab Figure 2. HAI titers across vaccination and infection studies are consistently predicted within experimental noise by combining predictions from all other studies. (A,B) Example predictions trained on an individual dataset (left and middle columns) and the combination of both datasets (right column). Labels above each plot identify the training → testing datasets. (C-E) Predicting three datasets using all other studies in Table 1 . The estimated fold-error ( σ Predict ), measured fold-error ( σ Actual ), and the number ( N ) of predicted titers are shown, with the gray diagonal bands representing σ Predict . For each serum-virus HAI, the estimated titer and error ( μ 1 ± σ 1 from study 1, μ 2 ± σ 2 from study 2) were combined using Bayesian statistics, ( μ 1 / σ 1 2 + μ 2 / σ 2 2 )/(1/ σ 1 2 +1/ σ 1 2 ) ± (1/ σ 1 2 +1/ σ 2 2 ) -1/2 , which places more weight on the more confident prediction with a smaller σ Predict ( Methods ). In this case, 2016 UGA Vac was weighed ∼20x more heavily (1/ σ 1 2 =0.19 vs 1/ σ 2 2 =0.01), as may be expected from its similar study design. The combined predictions remained as good as the predictions from the 2016 UGA Vac study alone ( σ Actual =2.0x, σ Predict =2.1x), demonstrating that the model is not hampered by adding the poorly predicted infection study ( Fig 2A , purple). As another example, we used an infection study (2010-2014 Ert Inf ) and a vaccine study (1997 Fonv Vac ) to both individually and jointly predict another infection study (2007-2011 Fonv Inf ). Interestingly, predictions between the infection→infection study ( σ Actual =3.1x) were very slightly worse than vaccine→infection predictions ( σ Actual =2.9x), even though the infection studies overlapped in time while the vaccine study occurred ten years earlier ( Fig 2B , blue/green). Combining both studies led to more accurate predictions than either dataset alone ( σ Actual =2.4x) with similarly tight estimated error ( σ Predict =2.5x, Fig 2B , purple). More generally, the predictions from any number of datasets can be combined using error estimation ( Methods ). As representative examples, we used every dataset in Table 1 to predict HAI titers in an adult health care workers vaccine study (2016 Fox HCW,Vac , Fig 2C ), vaccinated children (2014 Hin V,Vac , Fig 2D ), and an adult infection study (2012-2015 Hay Inf , Fig 2E ; all studies in Fig S2 ). The 10 3 -10 4 predicted HAIs in each study had low RMSE ( σ Actual =1.8-2.1x, similar to the error of the HAI assay) and comparably low estimated error ( σ Predict =1.4-1.7x), demonstrating that combining datasets can precisely and confidently extrapolate HAI titers for completely unmeasured variants. These results corroborate that studies do not need to be pre-screened, since the framework will identify the most predictive datasets and ignore the poorly predictive ones. More training datasets lead to better prediction accuracy To test generalizability, we compared the prediction error when training on any single dataset vs the combined predictions from all studies ( Fig 3A ). As expected, the individual datasets showed far larger variability ( σ Actual =1.6-10.7x) than the combined predictions (1.7-2.5x) that are always comparable to the most accurate pairwise predictions in each column. Interestingly, prediction accuracy does not need to be symmetric. For example, multiple studies had poor prediction with >6x when trained on 1997 Fonv Vac , yet using any training dataset to predict values in 1997 Fonv Vac always led to accuracy <4x. Download figure Open in new tab Figure 3. Predicting HAI responses across all studies. (A) Heatmap of the average RMSE ( σ Actual ) across all subjects and overlapping variants in a study-of-interest (column). Training is either done using all studies (top row) or using a single study (all other rows). (B-C) All predicted versus measured HAIs when training on (B) a single study or (C) all other studies. The number N of predictions is larger for pairwise predictions since the same serum-virus pair is predicted multiple times using different training datasets. The diagonal line y = x represents perfect predictions. At the individual-person level, there is a noticeably greater spread in pairwise predictions ( σ Actual =2.6x across all subjects, Fig 3B ) than in the combined predictions (2.0x, Fig 3C ), with 14.3% of the former predictions having an error >4x while only 5.3% of the latter predictions have such error ( Fig S3 ). Indeed, CAPYBARA does better than averaging the individual predictions from each study by heavily weighing the more reliable, and hence more accurate, predictions ( Fig S4 ). A few general trends can be seen from pairs of studies that poorly predict one another ( Fig 3A ). The two oldest studies (1997/1998 Fonv Vac ) tend to poorly predict studies from 2010 and beyond. The LAIV studies (2012/2013 Ert LAIV,Vac ) were sometimes poorly predicted by the more common IIV studies. Beyond these few rules, it was often unclear which studies would poorly predict one another, emphasizing the utility of CAPYBARA to infer such relationships directly from the data. Subsetting datasets helps explain prediction dynamics Since age is well known to affect the antibody response, we assessed how well children (age≤18) can predict adult responses (age>18) and vice versa. Datasets were categorized as containing children only, adults only, or a combination of both ( Fig S1 ). HAI titers from studies in each category were exclusively predicted using models from either the same or a different category ( Fig 4A ). As expected, the best predictions came from models trained within the same category. For example, children’s titers were better predicted by children data ( σ Actual =1.7x) than by adult data ( σ Actual =2.4x; p <0.05, two-sided permutation test). Studies containing both children and adults represented an intermediate phenotype, which was itself best predicted by studies containing titers from children and adults. Despite most of these age effects being significant, the overall effect of age was small, where even purposefully mismatching datasets (predicting children→adults or adults→children) led to median error <2.5x. Download figure Open in new tab Figure 4. Training on similar datasets marginally improves prediction accuracy. Cross-study RMSE ( σ Actual ) when training and predicting between datasets based on (A) the age groups adult-only, children-only, or mixed (child + adult); (B) vaccination or infection studies; (C) datasets grouped in 5-year intervals based on their median year; or (D) pre-vaccination (Day 0) vs post-vaccination (∼1 month) data. Each box plot shows the distribution of errors for all possible withheld variants. The horizontal line denotes the median, boxes show the interquartile range, and whiskers extend to 1.5 times the interquartile range. Circles denote outliers. Statistical significance was assessed using two-sided permutation tests with Benjamini–Hochberg correction for multiple testing. Asterisks denote adjusted p -values: **** = p <0.0001, *** = p <0.001, ** = p <0.01, * = p <0.05. We next split datasets by study type (vaccine versus infection). As before, there was a small but significant improvement in prediction accuracy when the same type of dataset was used for training ( Fig 4B ). For example, predicting from infection→infection studies ( σ Actual =1.7x) was more accurate than vaccination→infection ( σ Actual =2.0x; p <0.05, two-sided permutation test), although predictions in either case were surprisingly accurate, with similar results when predicting vaccination responses. The worst prediction accuracy was seen when splitting datasets by their year of study and using old datasets to predict responses >10 years into the future ( Fig 4C ). Studies were binned in five year increments, with studies conducted over multiple years represented by their median year. Training on studies from the same bin either led to the best predictions or to comparable predictions with the best bin (median σ Actual 1.6–2.1x). Accuracy dropped, often significantly, when using older datasets to predict more recent ones. In particular, training the oldest 1996– 2000 datasets led to poor predictions and large variation on 2011–2015 ( σ Actual = 3.2x; p <0.05, two-sided permutation test) or 2016–2020 data ( σ Actual = 2.6x; p 10 years tended to be more accurate. Lastly, we examined how accurately pre-vaccination titers predicted the peak post-vaccination titers (21-43 days post-vaccination) across vaccine studies. Surprisingly, we observed nearly identical prediction accuracies (median within ∼0.02x of each other; p=1.0, two-sided permutation test), suggesting that the HAI cross-reactivity across variants holds over time, with most variants increasing in tandem post-vaccination. Identifying universal relations between influenza variants To demonstrate how future studies can leverage CAPYBARA to measure a few variants and infer the response from others, we sought universal relations that could be applied to a new study without requiring dataset reweighing through CAPYBARA. To that end, we used RFM to denote which variant features were the most important when predicting each of the 112 variants across these studies ( Fig 5A , Fig S5 ; red represents greater importance). Download figure Open in new tab Figure 5. A global dictionary of influenza variant importance. (A) Rainbow diagram of feature importance between any pair of variants (connections are bidirectional). ( B ) Examples of universal HAI titer equations for multiple influenza vaccine strains, using titers from one variant (when possible) or two variants. Each virus name stands for the log 2 (HAI/5) of its titer. See the Supporting Information for all relations using ≤5 variants. ( C ) Measured versus predicted HAI titers for all vaccine strains in each study. Predictions were averaged from all other studies that measured the necessary variants. ( D ) Example using a small subset of five variants to predict ten other vaccine strains. While distant strains circulating more than 10 years apart could be important (average feature importance=0.3), the most important variant pairs tended to circulate less than a decade apart (average feature importance=0.5, Fig S5 ). However, feature importance could only be determined when two viruses were measured in at least two studies, so that the more frequently selected vaccine strains tend to have far better coverage than non-vaccinate strains. For example, the 1968 pandemic strain Hong Kong 1968 was often measured, and it exhibited strong feature importance of ≈1 against viruses circulating as late as Hong Kong 2014. Each variant-of-interest V 0 in study S 0 is predicted by every other study with at least three overlapping strains, leading to multiple potential distinct HAI relations. While 53.1% of relations required 1-2 variants, 20.3% of equations required 4 or more variants (all relations shown in Supporting Information ). Since vaccine strains were frequently measured, many relations exclusively use these strains (examples in Fig 5B ). To evaluate the accuracy of these relations, each vaccine strain’s HAI titers were individually withheld from a study-of-interest and derived by averaging the relations from all other studies. To make these results as readily generalizable as possible, predictions were not weighted by their estimated accuracy as in the sections above, but instead averaged equally across all studies. The resulting predictions showed an RMSE of 2.1x, demonstrating that directly averaging these relations leads to remarkably accurate predictions with error comparable to the ≈2-fold error of the HAI assay, provided that each study measures all of the necessary variants to apply these relationships ( Fig 5C ). To further expedite future studies, we assessed whether measuring a smaller set of only five influenza variants (comprising four vaccine strains and one non-vaccine strain) could predict ten other vaccine strains as well as two non-vaccine strains ( Fig 5D , Supporting Information ). This reduced set of variants only had a slightly larger RMSE of 2.7x, demonstrating that cross-study relationships can increase the amount of data generated by a few experiments, and that prediction accuracy should increase as more datasets are measured, or by applying CAPYBARA to heavily weigh the most accurate studies. Discussion Here, we developed CAPYBARA, a general algorithm that combines feature learning, model generation, and error estimation to predict unmeasured interactions based on existing datasets. As a case study, CAPYBARA was applied to identify universal patterns in serum-virus cross-reactivity and predict each serum’s HAI against variants that were entirely withheld from a study. While factors such as age 7 - 10 , 31 or exposure history 1 , 3 , 5 , 6 are known to affect the antibody response, it is unclear how these impact serum cross-react across influenza variants. To that end, CAPYBARA quantifies how accurately the local relationships in one dataset translate into another dataset using all non-withheld data, testing this approach across 25 different influenza studies. A key piece of this approach was to use error estimation to quantify the transferability between datasets, since some studies poorly predicted a dataset-of-interest with error>4x, while others were accurate within the ≈2x intrinsic error of the HAI assay. The combined predictions based on Bayesian weighing consistently favored the most informative datasets, leading to 1.7-2.5x prediction error across all studies. The algorithm did require any prior information about subject demographics, study design (vaccination vs infection, time points measured), or exposure history. Instead, CAPYBARA unbiasedly used overlapping variant HAI titers to determine how well one dataset can predict another. With these results, we retrospectively examined how these various factors affected the antibody response. Subject age had a small but significant effect, suggesting that cross-reactivity changes from childhood (age≤18) into adulthood (age>18). Children predicted other children’s responses better than adults, while adults predicted other adult responses better than children, with mixed datasets containing both children and adults falling in the middle. The year a study was conducted also had a significant effect, with studies within a 10 year window exhibiting 1.6x-2.4x error while studies done further apart in time had 2.0x-3.2x error. Vaccination and infection studies similarly predicted their own category better than the other category. Surprisingly, within vaccine studies, the pre-vaccination (day 0) and peak response (day 21-40) time points predicted one another with comparable accuracy, suggesting that pre- and post-vaccination cross-reactivity resemble one another. This could arise if all variant HAIs increase by a similar amount post-vaccination, or if post-vaccination responses are relatively weak, both of which held true across these datasets and were previously reported. 32 One limitation of this approach is that a variant’s HAI titers can only be predicted in a dataset-of-interest if that variant has been measured in at least one other study. Thus, this method is not equipped to predict the HAI of new variants, although a variant measured in one dataset can be predicted in all other studies. As datasets measuring more variants are added, the number of predictions in each study grows combinatorially. As such, CAPYBARA lays the foundation to design more efficient experiments that leverage existing studies. It further provides a quantitative foundation to determine the minimum number of variants that should be measured to infer the HAIs from multiple variants of interest. To facilitate such use, we also provide the average cross-reactivity relations between all H3N2 influenza variants examined in this work ( Supplementary Information ). These relations can be immediately applied to a new study, or they can be further augmented with CAPYBARA that will derive new dataset-specific relations weighed by dataset similarity. Methods Overview of the datasets We analyzed a collection of 25 influenza vaccine and infection studies spanning 1997–2023 ( Table 1 ). If one participant had multiple sera ( e . g ., pre-vaccination and post-vaccination), the two were analyzed independently. Predictions were carried out between two datasets if they measured HAI against at least three of the same H3N2 variants, since this ensures that there are enough features for cross-study prediction. Analyzing HAI Titers All studies used hemagglutination inhibition, which measures the highest dilution of serum at which hemagglutination is inhibited. A larger HAI titer will reflect a more potent serum, but it may also reflect differences in virus passaging (egg-vs cell-grown) or study design (incubation conditions, type or batch of red blood cells). Missing HAI data, comprising 2.1% of all measurements, were imputed using the row–column mean, and these imputed values were both predicted and also used to predict other titers. As in prior analyses, titers were transformed to log 2 (HAI/5), which reduces the bias toward large titers. 14 , 25 All prediction errors are shown in unlogged units so that they can be compared to the measured HAI titers. More precisely, the root-mean-squared error ( σ Actual ) of the logged titers is exponentiated by 2 to get the unlogged error ( i . e ., σ Actual =1.0 for log 2 titers corresponds to an error of σ Predict =2 1.0 =2-fold, with “fold” or “x” indicating an un-logged number). Prior work has shown that the HAI has an inherent 2-fold error on average, 19 and hence predictions with ≈2-fold error are as accurate as possible. Overview of CAPYBARA We first outline the four main steps of the algorithm and then describe each in detail: Step 1: Feature Learning ( Fig 1B ): For each external study S j that measured the target virus V 0 , a Recursive Feature Machine 24 identifies a small subset of variants that best predict V 0 . Step 2: Model Training ( Fig 1B ): Ridge regression is applied to a subset of sera within S j , using the selected variants as inputs and V 0 as the output. The internal root-mean-square error σ Internal ( V 0 ) is computed on the withheld sera. Step 3: Cross-Study Error Calibration ( Fig 1C ): To extrapolate the regression relation from S j (where σ Internal is known) to the new dataset S 0 , CAPYBARA withholds each variant V k ≠ V 0 and measures how its internal error in S j maps to its external error σ External ( V k ) in S 0 . A piecewise linear function is then fit to the ( σ Internal , σ External ) pairs for all V k , and this function is applied to σ Internal ( V 0 ) to estimate the error in S 0 , denoted by σ Predict ( V 0 ). Step 4: Combined Predictions ( Fig 1D ): When multiple studies can predict a virus V 0 in S 0 , their predictions are combined using Bayesian weighting, i . e ., weighting each prediction inversely by its squared predicted error, (1/ σ Predict ) 2 . This yields a single predicted HAI titer and a calibrated uncertainty estimate for that titer. Step 1: Using Recursive Feature Machines to identify the most predictive features A Recursive Feature Machine (RFM) is a supervised machine learning model that incorporates feature learning into general non-parametric models through the Average Gradient Outer Product (AGOP). 24 Unlike prior methods that used brute force (randomly selecting five variants V 1 - V 5 to predict a target virus V 0 , assessing that selection using cross-validation), RFM gives the feature importance of all variants so that the top candidates can be used to predict V 0 . This leads to more efficiently identifying the predictive features, is not restricted to a pre-imposed number of features ( e . g ., always requiring five), and yields better predictions than a random search through a subset of possibilities ( Fig S6A ). Given any differentiable predictor, f : ℝ d →ℝ trained on n data points x (1) , …, x ( n ) ∈ ℝ d , the AGOP operator G ( f ) is the covariance matrix of the input-output gradients of the predictor over the training data, This covariance captures the most predictive directions in its top eigenvectors, and the most important coordinates on its diagonal. RFMs with kernel machines proceeded by obtaining an initial estimate of the target function using a standard kernel machine without feature learning. Given this initial estimate of the predictor, the AGOP of the predictor was computed on the training data, after which the inner product function was updated using the AGOP. RFM then recursed this procedure beginning with the transformed data. Formally, the algorithm proceeded as follows. Algorithm 1: Recursive Feature Machine (RFM) Download figure Open in new tab Step 2: Model training and internal error Within each external dataset, ridge regression models were trained on selected features identified by RFM. Note that RFM importance was not considered during ridge regression, since in the case of multiple degenerate but highly important features, only a single feature should be selected. Hyperparameters (ridge regularization strength, kernel bandwidth, diagonal thresholds) were optimized via internal cross-validation (80% training, 20% validation splits), but were found to minimally vary ( Fig S6C ). Following ridge regression, each variant feature with ridge coefficient >0.2 (in absolute value) was retained. When deriving universal cross-reactivity relations, if two studies predicted a target virus V 0 using the same variants as features, the ridge coefficients were averaged for each of the viruses in their equation. Step 3: Cross-study error calibration Following prior work, 25 to calibrate how accurately the model trained on study S j applied to study S 0 , every possible virus V k ≠ V 0 was withheld one-by-one (in addition to excluding V 0 ) from both the training and testing datasets. 80% of sera in the training set were used to fit a ridge regression model in the training dataset, with the remaining sera used to compute the internal error σ Internal ( V k ). All sera in the testing dataset were used to compute σ External ( V k ). Performing this for all V k resulted in multiple points ( σ Internal , σ External ) that mapped the transferability of error between the two studies. These paired internal–external errors were fit using a total-least-squares (orthogonal-distance) line, σ External = α σ Internal + β . To account for the uncertainty of this fit ( i . e ., highly scattered points with a poor best-fit line are more uncertain), we added to σ External the root-mean-square vertical distance of each point from the fitted line, δ= [1/ m ∑ k =1 m ( α σ Internal ( V ) + β - σ ( V External )) 2 ] 1/2 . Lastly, the external error was forced to always be at least as large as the internal error. Altogether, the estimated error when predicting variant V 0 in S 0 is given by Step 4: Combining predictions from multiple datasets When multiple studies S 1 , S 2 … predicted the HAI titers of virus V 0 in S 0 , each subject had predictions μ 1 ± σ 1 , μ 2 ± σ 2 … (where σ is a shorthand for σ Predict ). Predictions were combined using Bayesian weighting that is inversely proportional to predicted error squared, namely, More confident predictions (smaller σ j ) are weighted more heavily, while highly inaccurate predictions ( σ j →∞) have little-to-no influence. As a result, all datasets can be included, and the algorithm will unbiasedly determine the most accurate predictions and use their values more heavily. Software and computational resources Analyses were implemented in Python using standard scientific libraries (NumPy, SciPy, scikit-learn). Code is available through the accompanying GitHub repository ( https://github.com/TalEinav/CAPYBARA ). Data Availability Code and data is available through the accompanying GitHub repository https://github.com/TalEinav/CAPYBARA Supplementary Figures Download figure Open in new tab Figure S1. Subject age distributions across datasets. Datasets are ordered chronologically and by study group. Download figure Open in new tab Figure S2. Prediction accuracy using all studies is consistently comparable to experimental noise. Every other study in Table 1 is used to predict HAI titers for all variants in the study-of-interest (shown by the plot label). Download figure Open in new tab Figure S3. Distribution of errors for individual and combined predictions. Fold-error ( σ Actual ) of predictions for every subject and virus using (A) each dataset to make a separate prediction and (B) all datasets to make combined predictions. Red shading marks the region of ≤4x error, and the annotated percentages indicate the fraction of predictions that fall within this threshold. Download figure Open in new tab Figure S4. Combined predictions outperform averaged predictions from individual studies. Prediction errors ( σ Actual ) for all viruses in all infection studies were computed using two other datasets for training. These two datasets either independently predicted each virus, and their resulting predictions were averaged [ x -axis] or CAPYBARA was used to combine these predictions by more heavily weighing the dataset that was more similar to the target infection study [ y -axis]. Points below the diagonal indicate improved performance with the combined model. Download figure Open in new tab Figure S5. Feature importance via RFM. The importance of each virus feature (column) when predicting a target virus ( V 0 , row). Feature importance is quantified within a single study. Only viruses with feature importance≥0.1 shown, as these viruses are subsequently used in ridge regression when predicting the target virus. Any virus not picked is shown in white. Download figure Open in new tab Figure S6. CAPYBARA achieves lower prediction error than brute force approaches and allows for predictive uncertainty estimation. (A) Comparison of fold-error for pairwise models generated by brute-force selection (running ridge regression on five randomly selected viruses, repeating 50 times to find the best five viruses) versus CAPYBARA (runs RFM a single time to identify the most predictive features and then ridge regression). Each point represents an overlapping virus between each dataset pair. More points lie above the diagonal and the average error is slightly smaller along the x -axis, with both traits indicating better performance with CAPYBARA. (B) Predicted versus actual error across all datasets using CAPYBARA, with each point representing all measurements for one virus in one study. We expect the predicted error to represent an upper bound, worst case error ( σ Actual ≲ σ Predict ), which is satisfied in the vast majority of cases. (C) Heatmap of mean σ Actual across all dataset pairs for different hyperparameter settings for the diagonal threshold and bandwidth in RFM, showing nearly comparable prediction accuracy across all parameter choices. Acknowledgements We especially thank the experimental groups who shared their data, and we hope this paper will inspire other groups to integrate their datasets for everyone’s benefit. We always welcome pointers to new datasets. We further acknowledge Adit Radha and Mikhail Belkin for useful discussions. This research was supported by LJI & Kyowa Kirin, Inc. (KKNA - Kyowa Kirin North America), and the Bodman family (TE). References 1. ↵ Andrews , S.F. , Huang , Y. , Kaur , K. , Popova , L.I. , Ho , I.Y. , Pauli , N.T. , Dunand , C.J.H. , Taylor , W.M. , Lim , S. , Huang , M. , et al. ( 2015 ). Immune history profoundly affects broadly protective B cell responses to influenza . Science Translational Medicine 7 , 316ra192 – 316ra311 . doi: 10.1126/scitranslmed.aad0522 . OpenUrl Abstract / FREE Full Text 2. ↵ Gostic , K.M. , Ambrose , M. , Worobey , M. , and Lloyd-Smith , J.O. ( 2016 ). Potent protection against H5N1 and H7N9 influenza via childhood hemagglutinin imprinting . Science 354 , 722 – 726 . doi: 10.1126/science.aag1322 . OpenUrl Abstract / FREE Full Text 3. ↵ Hopping , A.M. , McElhaney , J. , Fonville , J.M. , Powers , D.C. , Beyer , W.E.P. , and Smith , D.J. ( 2016 ). The confounded effects of age and exposure history in response to influenza vaccination . Vaccine 34 , 540 – 546 . doi: 10.1016/j.vaccine.2015.11.058 . OpenUrl CrossRef PubMed 4. Vinh , D.N. , Nhat , N.T.D. , De Bruin , E. , Vy , N.H.T. , Thao , T.T.N. , Phuong , H.T. , Anh , P.H. , Todd , S. , Quan , T.M. , Thanh , N.T.L. , et al. ( 2021 ). Age-seroprevalence curves for the multi-strain structure of influenza A virus . Nature Communications 12 , 1 – 9 . doi: 10.1038/s41467-021-26948-8 . OpenUrl CrossRef 5. ↵ Fox , A. , Carolan , L. , Leung , V. , Phuong , H.V.M. , Khvorov , A. , Auladell , M. , Tseng , Y.Y. , Thai , P.Q. , Barr , I. , Subbarao , K. , et al. ( 2022 ). Opposing Effects of Prior Infection versus Prior Vaccination on Vaccine Immunogenicity against Influenza A(H3N2) Viruses . Viruses 14 , 1 – 15 . doi: 10.3390/v14030470 . OpenUrl CrossRef 6. ↵ Loes , A.N. , Tarabi , R.A.L. , Huddleston , J. , Touyon , L. , Wong , S.S. , Cheng , S.M.S. , Leung , N.H.L. , Hannon , W.W. , Bedford , T. , Cobey , S. , et al. ( 2024 ). High-throughput sequencing-based neutralization assay reveals how repeated vaccinations impact titers to recent human H1N1 influenza strains . Journal of Virology 98 , 1 – 28 . doi: 10.1128/jvi.00689-24 . OpenUrl CrossRef 7. ↵ Lessler , J. , Riley , S. , Read , J.M. , Wang , S. , Zhu , H. , Smith , G.J. , Guan , Y. , Jiang , C.Q. , and Cummings , D.A. ( 2012 ). Evidence for antigenic seniority in influenza A (H3N2) antibody responses in southern China . PLoS Pathog 8 , e1002802 . doi: 10.1371/journal.ppat.1002802 . OpenUrl CrossRef PubMed 8. Henry , C. , Zheng , N.Y. , Huang , M. , Cabanov , A. , Rojas , K.T. , Kaur , K. , Andrews , S.F. , Palm , A.E. , Chen , Y.Q. , Li , Y. , et al. ( 2019 ). Influenza Virus Vaccination Elicits Poorly Adapted B Cell Responses in Elderly Individuals . Cell Host Microbe 25 , 357 – 366 e356 . doi: 10.1016/j.chom.2019.01.002 . OpenUrl CrossRef PubMed 9. Gouma , S. , Kim , K. , Weirick , M.E. , Gumina , M.E. , Branche , A. , Topham , D.J. , Martin , E.T. , Monto , A.S. , Cobey , S. , and Hensley , S.E. ( 2020 ). Middle-aged individuals may be in a perpetual state of H3N2 influenza virus susceptibility . Nat Commun 11 , 4566 . doi: 10.1038/s41467-020-18465-x . OpenUrl CrossRef PubMed 10. ↵ Brouwer , A.F. , Balmaseda , A. , Gresh , L. , Patel , M. , Ojeda , S. , Schiller , A.J. , Lopez , R. , Webby , R.J. , Nelson , M.I. , Kuan , G. , and Gordon , A. ( 2022 ). Birth cohort relative to an influenza A virus’s antigenic cluster introduction drives patterns of children’s antibody titers . PLoS Pathog 18 , e1010317 . doi: 10.1371/journal.ppat.1010317 . OpenUrl CrossRef PubMed 11. ↵ Kim , K. , Marcos , Gouma , S., Madison , Scott , and Cobey , S. ( 2024 ). Measures of Population Immunity Can Predict the Dominant Clade of Influenza A (H3N2) in the 2017–2018 Season and Reveal Age-Associated Differences in Susceptibility and Antibody-Binding Specificity . Influenza and Other Respiratory Viruses 18 , 1 – 13 . doi: 10.1111/irv.70033 . OpenUrl CrossRef 12. ↵ Xie , H. , Wan , X.-F. , Ye , Z. , Plant , E.P. , Zhao , Y. , Xu , Y. , Li , X. , Finch , C. , Zhao , N. , Kawano , T. , et al. ( 2015 ). H3N2 Mismatch of 2014–15 Northern Hemisphere Influenza Vaccines and Head-to-head Comparison between Human and Ferret Antisera derived Antigenic Maps . Scientific Reports 5 , 15279 . doi: 10.1038/srep15279 . OpenUrl CrossRef PubMed 13. ↵ Morris , D.H. , Gostic , K.M. , Pompei , S. , Bedford , T. , Luksza , M. , Neher , R.A. , Grenfell , B.T. , Lassig , M. , and McCauley , J.W. ( 2018 ). Predictive Modeling of Influenza Shows the Promise of Applied Evolutionary Biology . Trends Microbiol 26 , 102 – 118 . doi: 10.1016/j.tim.2017.09.004 . OpenUrl CrossRef PubMed 14. ↵ Zhao , X. , Fang , V.J. , Ohmit , S.E. , Monto , A.S. , Cook , A.R. , and Cowling , B.J. ( 2016 ). Quantifying Protection Against Influenza Virus Infection Measured by Hemagglutination-inhibition Assays in Vaccine Trials . Epidemiology 27 , 143 – 151 . doi: 10.1097/EDE.0000000000000402 . OpenUrl CrossRef PubMed 15. Cowling , B.J. , Lim , W.W. , Perera , R. , Fang , V.J. , Leung , G.M. , Peiris , J.S.M. , and Tchetgen Tchetgen , E.J. ( 2019 ). Influenza Hemagglutination-inhibition Antibody Titer as a Mediator of Vaccine-induced Protection for Influenza B . Clin Infect Dis 68 , 1713 – 1717 . doi: 10.1093/cid/ciy759 . OpenUrl CrossRef 16. ↵ Krammer , F. ( 2019 ). The human antibody response to influenza A virus infection and vaccination . Nat Rev Immunol 19 , 383 – 397 . doi: 10.1038/s41577-019-0143-6 . OpenUrl CrossRef PubMed 17. ↵ Kucharski , A.J. , Lessler , J. , Read , J.M. , Zhu , H. , Jiang , C.Q. , Guan , Y. , Cummings , D.A.T. , and Riley , S. ( 2015 ). Estimating the Life Course of Influenza A(H3N2) Antibody Responses from Cross-Sectional Data . PLOS Biology 13 , e1002082 . doi: 10.1371/journal.pbio.1002082 . OpenUrl CrossRef PubMed 18. ↵ Kucharski , A.J. , Lessler , J. , Cummings , D.A.T. , and Riley , S. ( 2018 ). Timescales of influenza A/H3N2 antibody dynamics . PLoS Biol 16 , e2004974 . doi: 10.1371/journal.pbio.2004974 . OpenUrl CrossRef PubMed 19. ↵ Stacey , H. , Carlock , M.A. , Allen , J.D. , Hanley , H.B. , Crotty , S. , Ross , T.M. , and Einav , T. ( 2025 ). Leveraging pre-vaccination antibody titres across multiple influenza H3N2 variants to forecast the post-vaccination response . eBioMedicine 116 , 105744 . doi: 10.1016/j.ebiom.2025.105744 . OpenUrl CrossRef PubMed 20. ↵ Lapedes , A. , and Farber , R. ( 2001 ). The geometry of shape space: application to influenza . J Theor Biol 212 , 57 – 69 . doi: 10.1006/jtbi.2001.2347 . OpenUrl CrossRef PubMed Web of Science 21. ↵ Smith , D.J. , Lapedes , A.S. , de Jong , J.C. , Bestebroer , T.M. , Rimmelzwaan , G.F. , Osterhaus , A.D. , and Fouchier , R.A. ( 2004 ). Mapping the antigenic and genetic evolution of influenza virus . Science 305 , 371 – 376 . doi: 10.1126/science.1097211 . OpenUrl Abstract / FREE Full Text 22. ↵ Anderson , C.S. , McCall , P.R. , Stern , H.A. , Yang , H. , and Topham , D.J. ( 2018 ). Antigenic cartography of H1N1 influenza viruses using sequence-based antigenic distance calculation . BMC Bioinformatics 19 , 51 . doi: 10.1186/s12859-018-2042-4 . OpenUrl CrossRef PubMed 23. ↵ Einav , T. , and Cleary , B. ( 2022 ). Extrapolating missing antibody-virus measurements across serological studies . Cell Syst 13 , 561 – 573 e565 . doi: 10.1016/j.cels.2022.06.001 . OpenUrl CrossRef PubMed 24. ↵ Radhakrishnan , A. , Beaglehole , D. , Pandit , P. , and Belkin , M. ( 2024 ). Mechanism for feature learning in neural networks and backpropagation-free machine learning models . Science 383 , 1461 – 1467 . doi: 10.1126/science.adi5639 . OpenUrl CrossRef PubMed 25. ↵ Einav , T. , and Ma , R. ( 2023 ). Using interpretable machine learning to extend heterogeneous antibodyvirus datasets . Cell Rep Methods 3 , 100540 . doi: 10.1016/j.crmeth.2023.100540 . OpenUrl CrossRef 26. Fonville , J.M. , Wilks , S.H. , James , S.L. , Fox , A. , Ventresca , M. , Aban , M. , Xue , L. , Jones , T.C. , Le , N.M.H. , Pham , Q.T. , et al. ( 2014 ). Antibody landscapes after influenza virus infection or vaccination . Science 346 , 996 – 1000 . doi: 10.1126/science.1256427 . OpenUrl Abstract / FREE Full Text 27. Ertesvag , N.U. , Cox , R.J. , Lartey , S.L. , Mohn , K.G. , Brokstad , K.A. , and Trieu , M.C. ( 2022 ). Seasonal influenza vaccination expands hemagglutinin-specific antibody breadth to older and future A/H3N2 viruses . NPJ Vaccines 7 , 67 . doi: 10.1038/s41541-022-00490-0 . OpenUrl CrossRef PubMed 28. Hinojosa , M. , Shepard , S.S. , Chung , J.R. , King , J.P. , McLean , H.Q. , Flannery , B. , Belongia , E.A. , and Levine , M.Z. ( 2021 ). Impact of Immune Priming, Vaccination, and Infection on Influenza A(H3N2) Antibody Landscapes in Children . J Infect Dis 224 , 469 – 480 . doi: 10.1093/infdis/jiaa665 . OpenUrl CrossRef PubMed 29. Carlock , M.A. , Allen , J.D. , Hanley , H.B. , and Ross , T.M. ( 2024 ). Longitudinal assessment of human antibody binding to hemagglutinin elicited by split-inactivated influenza vaccination over six consecutive seasons . PLOS ONE 19 , e0301157 . doi: 10.1371/journal.pone.0301157 . OpenUrl CrossRef PubMed 30. Hay , J.A. , Zhu , H. , Jiang , C.Q. , Kwok , K.O. , Shen , R. , Kucharski , A. , Yang , B. , Read , J.M. , Lessler , J. , Cummings , D.A.T. , and Riley , S. ( 2024 ). Reconstructed influenza A/H3N2 infection histories reveal variation in incidence and antibody dynamics over the life course . PLOS Biology 22 , e3002864 . doi: 10.1371/journal.pbio.3002864 . OpenUrl CrossRef PubMed 31. ↵ Welsh , F.C. , Eguia , R.T. , Lee , J.M. , Haddox , H.K. , Galloway , J. , Van Vinh Chau , N. , Loes , A.N. , Huddleston , J. , Yu , T.C. , Quynh Le , M. , et al. ( 2024 ). Age-dependent heterogeneity in the antigenic effects of mutations to influenza hemagglutinin . Cell Host & Microbe 32 , 1 – 15 . doi: 10.1016/j.chom.2024.06.015 . OpenUrl CrossRef 32. ↵ Lane , A. , Quach , H.Q. , Ovsyannikova , I.G. , Kennedy , R.B. , Ross , T.M. , and Einav , T. ( 2025 ). Characterizing the Short- and Long-Term Temporal Dynamics of Antibody Responses to Influenza Vaccination . medRxiv preprint . doi: 10.1101/2025.02.26.25322965 . OpenUrl Abstract / FREE Full Text View the discussion thread. Back to top Previous Next Posted July 08, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following CAPYBARA: A Generalizable Framework for Predicting Serological Measurements Across Human Cohorts Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share CAPYBARA: A Generalizable Framework for Predicting Serological Measurements Across Human Cohorts Sierra Orsinelli-Rivers , Daniel Beaglehole , Tal Einav medRxiv 2025.07.07.25331040; doi: https://doi.org/10.1101/2025.07.07.25331040 Share This Article: Copy Citation Tools CAPYBARA: A Generalizable Framework for Predicting Serological Measurements Across Human Cohorts Sierra Orsinelli-Rivers , Daniel Beaglehole , Tal Einav medRxiv 2025.07.07.25331040; doi: https://doi.org/10.1101/2025.07.07.25331040 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Infectious Diseases (except HIV/AIDS) Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (299) Cardiovascular Medicine (4425) Dentistry and Oral Medicine (443) Dermatology (382) Emergency Medicine (607) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15221) Forensic Medicine (30) Gastroenterology (1123) Genetic and Genomic Medicine (6588) Geriatric Medicine (667) Health Economics (997) Health Informatics (4524) Health Policy (1368) Health Systems and Quality Improvement (1612) Hematology (540) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15910) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (145) Nephrology (667) Neurology (6588) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1143) Occupational and Environmental Health (956) Oncology (3331) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1690) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5440) Public and Global Health (9220) Radiology and Imaging (2195) Rehabilitation Medicine and Physical Therapy (1369) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (710) Sports Medicine (529) Surgery (710) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9ffd8e260ab14193',t:'MTc3OTQ3MTA5NA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-23T02:00:01.238055+00:00

License: CC-BY-4.0