Full text
22,658 characters
· extracted from
preprint-html
· click to expand
Reversing Eroom’s Law: Finitude and Dilution in Small-Molecule Discovery | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Reversing Eroom’s Law: Finitude and Dilution in Small-Molecule Discovery View ORCID Profile Ross Youngs doi: https://doi.org/10.1101/2025.10.29.685434 Ross Youngs 1 Biosortia, Inc. , 2545 Farmers Dr., Suite 370, Columbus, OH 43235 Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ross Youngs For correspondence: ryoungs{at}biosortia.com Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Drug discovery productivity—measured as clinically viable new molecules per research has declined for decades, a trend known as Eroom’s Law. This paradox persists despite advances in screening, computing, and artificial intelligence. Here we show that it stems from two forces: the finite diversity of nature’s biosynthetic rules, leading to saturation of novel natural scaffolds, and the dilution of viable candidates in expanding synthetic chemical spaces, where success rates fall without improved specificity. We propose an optimized hybrid strategy that allocates effort to mining natural products and using their validated patterns to guide synthetic exploration, maximizing novelty per cost. This approach yields testable rules, such as optimal allocation thresholds and benchmarks for artificial intelligence tools. By embedding natural principles in modern discovery, this framework offers a path to reverse Eroom’s Law and boost pharmaceutical efficiency. Introduction: The paradox and thethesis Across the pharmaceutical industry, approvals per inflation-adjusted research and development dollar have fallen even as screening, synthesis and computing have expanded, 1 – 4 a pattern termed Eroom’s law. Here we advance a mechanistic explanation with three coupled results and a practical remedy. The framework rests on three ideas: nature’s finite biosynthetic playbook explains diminishing returns in natural-product discovery; the dilution of synthetic design space explains analogous declines in human-designed exploration; and a finite-prior hybrid that leverages natural priors to steer synthetic search provides a route to recover productivity. Empirical anchors include enumerations of chemical space, 5 – 7 clinical success rates and analyses of research-and-development efficiency. 1 – 4 Throughout, we use “discovery/returns” to mean clinically viable molecular novelty per unit of cost (proxied by approvals or late-stage successes per inflation-adjusted research and development dollar) rather than the number of molecules screened. Formal notation and parameter definitions are provided in Box 1 and the Methods to keep the Introduction accessible to a broad readership. Nature’s finite playbook and diminishing molecular returns The saturation of N discoverable classes over cumulative effort Σ can be modeled as a finite-class arrival process: Across geological time, biochemistry has used a finite element palette with stability/complexity windows and only a finite number of biosynthetic grammar expansions. Upon broad exploration, the accessible set A * is effectively finite and class-level novelty saturates as effort increases. First-arrival hazards over M classes yield 1 − e − Hi with Hi →∞ and bounded hazards, and dominated convergence gives E [ N ] → M and dE [ N ]/ dΣ → 0. Standard arguments imply almost-sure saturation. Punctuations — rare grammar additions—add finite increments to M , producing transient spikes before decline resumes. 8 , 9 Modeled as a non-homogeneous Poisson process with decreasing intensity μ(Σ), punctuations add Poisson(ν) class increments at times T k , causing dN/dΣ spikes (≈ ν λ avg ) with lengthening intervals τ k , testable via taxonomic databases. While total biospheric variants are vast, most new finds in unexplored taxa and cryptic clusters are variants on the existing scaffolds generated by a finite grammar; hence, class discovery faces diminishing returns even as variants abound. Large resources (e.g., COCONUT) show redundancies consistent with this view. 10 – 12 Moreover, many plant-derived molecules are endophyte encoded or governed by epigenetic on/off states— evidence of a shared grammar being leveraged across different organisms rather than open-ended class creation. 13 Figure 1 : Saturation N ( Σ ) and rate dN / dΣ with dashed lines at punctuation events Download figure Open in new tab Download figure Open in new tab Figure 1. The core problem. (a) Natural saturation: N(Σ) saturates with rare grammar expansions (vertical dashed lines). (b) Rate decay: dN/dΣ exhibits transient spikes at expansions and decays toward zero. (c) PPV collapse: ε(Σ) falls as π(Σ) declines; modest β(Σ) gains are insufficient. Infinite-horizon dilution in synthetic space Let U ( Σ ) be the design universe with | U | → ∞, 5 – 7 including synthesizable vendor libraries (e.g., Enamine REAL Space). 14 Let V be truly viable classes with prevalence π ( Σ ) = | V ∩ U |/| U |. Viability requires satisfying multiple orthogonal constraints (efficacy, selectivity, safety, and ADME), so π ( Σ ) → 0 as the universe expands. Pipelines with sensitivity α ( Σ ) and false-positive rate β ( Σ ) yield positive predictive value (PPV) ε ( Σ ) = α /[ α + β (1− π )/ π ]. As π → 0, ε collapses unless specificity improves commensurately; with finite enrichment and throughput, the viable-discovery rate Λε falls toward zero. 15 – 16 Figure 1c : PPV collapse ε ( Σ ) vs. π ( Σ ) Figure 2 : Sublinear growth of N ( Σ ) and decline of dN / dΣ under polynomial case Download figure Open in new tab Figure 2. Synthetic approach fails without super-linear enrichment. (a) Sublinear growth: Cumulative viable classes N(Σ) grow sublinearly when π(Σ) ∝ Σ −(k+1+η) . (b) Rate decline: dN/dΣ declines with Σ under the same parameters. Polynomial sufficiency: If Λ ( Σ ) ∝ Σ k but π( Σ ) ∝ Σ −( k +1+ η ) (for some η > 0), then Λε → 0. Exponential counter—case (AI): If π ≈ e − rΣ and Λ ≈ e cΣ , then Λε ≈ ( α / β ) e ( c − r ) Σ , and dilution is mitigated only if c > r while β falls—which provides a testable AI benchmark for current models. 16 Public repositories continue to document extreme scale growth (e.g., PubChem 2025 update), 17 and recent AI-agent pipelines illustrate how throughput may scale operationally. 18 Hybrid strategy with data dependency The hybrid model’s viable-discovery rate and its return-per-cost, ℛ are defined by the following relationships: Allocate a share a ∈ [0,1] to the finite/natural channel ( N ) and 1− a to the expanding/synthetic channel ( S ). The hybrid’s viable-discovery rate and return-per-cost are governed by data dependency (δ): The transfer of natural priors into S is captured as a multiplicative PPV lift εS (Σ, a ) = ε S 0 (Σ)[1+δ a ]. When δ > 0, investing in N improves the PPV of S ; when δ =0, channels are decoupled. Equal-marginal policy gate (breakeven): shift marginal effort from N to S only if ε S 0 (Σ)[1+δ a ] ≥ ( cS / cN ) εN. With data dependency, this yields a closed-form lower bound for interior optima: a * (Σ) ≥ [( cS / cN ) εN / ε S 0 (Σ) −1] / δ (clipped to [0,1]). As prevalence in S collapses ( ε S 0 ↓), a * rises; larger δ lowers the breakeven PPV that S must achieve. Figure 3 : Shifting optimum a *( Σ ) and bar chart showing hybrid dominating pure strategies Download figure Open in new tab Figure 3. The hybrid solution and payoff. (a) Shifting optimum: Return-per-cost ℛ(a,Σ) exhibits interior optima that shift toward larger a as Σ grows and ε S 0 falls; larger δ lowers breakeven PPV. (b) Dominance: Cumulative return-per-cost—hybrid (adaptive a* ) vs. pure finite (a = 1) vs. pure infinite (a = 0). Bars were plotted on a scientific y-axis; labels were omitted to avoid misleading zeros. Falsifiable predictions P1 (allocation gate): At a decision horizon Σ ref , if ε S 0 S ( Σ_ref ) < ( c S/c_N ) ε N / (1+ δa ), programs do not shift to S ; they invest in N or reduce β until above gate. P2 (AI benchmark): Exponential counter—case is passed only if measured c ≥ r and β(Σ) declines—falsifiable in prospective pipelines. 16 , 18 P3 (punctuations): Natural-channel dN/dΣ exhibits transient spikes post-punctuation (modeled on PAGE 3), with lengthening intervals τ k , falsifiable via taxonomic cluster analysis. 8 , 9 P4 (reverse-Eroom regime): If (1+ δa ) > ( c S / cN ) ( ε N / εS 0 ) (with defaults δ ≈ 1, a ≈ 0.5, c S / c N ≈ 1, ε N / ε S 0 ≈ 1.5), a hybrid increases novelty per cost versus pure S and can raise approvals per dollar; this is testable in portfolio trials. Discussion and validation paths This framework explains finite closure in nature, base-rate dilution in synthetic space and why a non-zero allocation to natural priors is necessary for sustainable productivity. It reframes Eroom’s law as a predictable consequence of saturation and dilution, rather than a failure of technology, and indicates that the long-standing emphasis on synthetic discovery and the resultant dilution becomes unsustainable at scale. Reversing this trend requires a portfolio shift toward a hybrid model, in which finite-prior discovery improves the efficiency and specificity of synthetic and artificial intelligence (AI) pipelines. Validation can proceed along three fronts. First, redundancy analyses in large natural-product corpora to quantify late-stage saturation at the class level even as variants continue to grow. 10 – 12 Second, prospective measurements in modern screening to track the base rate (prevalence), sensitivity and false-positive rate, and to test both the allocation gate and an AI benchmark in which throughput growth outpaces prevalence decay while the false-positive rate falls. 5 – 7 , 13 – 18 Third, portfolio-level trials of the equal-marginal decision rule—allocating effort until marginal return per cost equalizes across channels—with preregistered endpoints of novelty per cost and late-stage success. This perspective also reorients goals for AI and synthetic biology: not only to enumerate molecules in unexplored niches, but to learn and strategically expand nature’s finite biosynthetic grammar, including endophyte and lichen consortia, through targeted epigenetic and pathway engineering under finite-prior constraints. 14 We account for problem depth by using a difficulty-adjusted positive predictive value (see Box 1), which makes explicit that deeper problems require stronger finite-prior lift, higher specificity, greater throughput or lower unit cost to sustain returns. Box 1 | Formal framework This box defines symbols and conventions used in the framework and appendix. Nonstandard symbols are defined here. Indices are shown as subscripts and exponents as superscripts. Sets and spaces U — design universe (set of all classes/constructs considered) V — viable subset of U (those meeting the criteria below) Functions and quantities Σ — cumulative discovery/effort variable used as the independent axis π(Σ) — prevalence of viable classes (fraction of U that is viable). As | U | → ∞, π(Σ) → 0 λ i (Σ) — instantaneous hazard/intensity for class i at cumulative effort Σ H i (Σ) — cumulative hazard for class i . For example, the survival term uses e −Hi(Σ) Λ(Σ) — throughput/attempt rate as a function of Σ ε(Σ) — PPV under the screening pipeline Indices, strategies, and constants i, j, k — class indices (subscripts) N, S — strategy tags (narrow N ; sweep S ), used as subscripts (e.g., Λ N , ε S ) ref — reference point tag, used as a subscript (e.g., Σ ref ) δ, α, β, r, c — constants/parameters as defined in context (screening sensitivity α , false-positive rate β ; exponents/rates r, c ; response slope δ ) ε S 0 — baseline PPV for the sweep strategy ( S ), used in response models Operators and notation Σ i=1 M — summation over classes i = 1, …, M ∫ 0 Σ (…) ds — integral from 0 to Σ with respect to s → , ≤, ≥, ≈ — limit, inequality, and approximation symbols as standard Conventions Indices appear as subscripts (e.g., λ i , H i , ε S ), and exponents appear as superscripts (e.g., Σ k ). Exponentials are written as e n with the entire exponent in a single superscript run (e.g., e −Hi(Σ) ). Greek and special symbols ( Σ, Λ, π , →, ≤, ≥) are preserved; prose and punctuation follow journal style. Default parameters for predictions (e.g., P4): δ ≈ 1 (moderate prior lift), a ≈ 0.5 (balanced allocation), c S / c N ≈ 1 (equal costs), ε N / ε S 0 ≈ 1.5 (natural PPV advantage),, and γ ∈ [0,1], default γ = 1, for sublinear to linear complexity scaling in ε eff = ε / ρ γ . Methods (summary) Natural Finite-class first-arrival process; dominated/monotone convergence; rare grammar shocks add finite increments 8 – 9 Synthetic PPV/base-rate calculus; π(Σ) decays with Λ(Σ) growth; sufficient polynomial and exponential conditions for Λε → 0 5 – 7 , 14 – 16 Hybrid Maximize ℛ(a,Σ) ; equal-marginal rule; data dependency δ as PPV lift; sensitivity: ∂a*/∂ δ > 0, ∂a*/∂(c N /c S )< 0 Hybrid (difficulty extension) We model task difficulty with ρ ≥ 1 and set ε eff = ε/ ρ γ (γ∈ [0,1], default γ = 1); the equal-marginal gate and a * (Σ) follow by substitution. Data and code availability No new data were generated in this study. The code for reproducing figures and the spreadsheet model are available upon reasonable request. Author contributions Ross Youngs conceived the theorems and wrote the manuscript. Competing interests The author declares there are no competing interests. Acknowledgements The author thanks colleagues for feedback that strengthened the clarity, empirical grounding, and policy relevance of this work, with special thanks to Dr. Jack Scannell; Dr. David Newman, retired from NIH; and Professor Jonathan Eisen for their formative discussions. Additional thanks go to Professor Matt Bertin, Dr. Guy Carter, Professor Bill Gerwick, Professor Pieter Dorrestein, Dr. Colin Kruse, Dr. Martin Latterich, Dr. Ron Moss, Eugene Francis, and Rosemarie Trueman for a decade of engagement and feedback that helped bring this work to fruition. Main References 1. ↵ BIO / Informa / QLS . Clinical development success rates 2011 – 2020 ( 2021 ). https://go.bio.org/rs/490-EHZ-999/images/ClinicalDevelopmentSuccessRates2011_2020.pdf 2. Scannell , J. W. et al. Diagnosing the decline in pharmaceutical R&D efficiency . Nat. Rev. Drug Discov . 11 , 191 – 200 ( 2012 ). doi: 10.1038/nrd3681 OpenUrl CrossRef PubMed 3. Wouters , O. J. , McKee , M. & Luyten , J. Estimated research and development investment needed to bring a new medicine to the market, 2009–2018 . JAMA 323 , 844 – 853 ( 2020 ). doi: 10.1001/jama.2020.1166 OpenUrl CrossRef PubMed 4. ↵ Sertkaya , A. et al. Costs of drug development and research and development returns . JAMA Netw. Open 7 , e2415445 ( 2024 ). doi: 10.1001/jamanetworkopen.2024.15445 OpenUrl CrossRef 5. ↵ Ruddigkeit , L. et al. Enumeration of 166 billion organic small molecules in GDB-17 . J. Chem. Inf. Model . 52 , 2864 – 2875 ( 2012 ). doi: 10.1021/ci300415d OpenUrl CrossRef PubMed 6. Reymond , J.-L. The chemical space project . Acc. Chem. Res . 48 , 722 – 730 ( 2015 ). doi: 10.1021/ar500432k OpenUrl CrossRef PubMed 7. ↵ Polishchuk , P. G. et al. Estimation of the size of drug-like chemical space based on GDB-17 data . J. Comput.-Aided Mol. Des . 27 , 675 – 679 ( 2013 ). doi: 10.1007/s10822-013-9672-4 OpenUrl CrossRef 8. ↵ O’Hagan , D. et al. Biosynthesis of an organofluorine molecule . Nature 416 , 279 ( 2002 ). doi: 10.1038/416279a OpenUrl CrossRef PubMed 9. ↵ Pearson , A. et al. Sterol synthesis in the last eukaryotic common ancestor . Proc. Natl Acad. Sci. USA 100 , 3707 – 3712 ( 2003 ). doi: 10.1073/pnas.2536559100 OpenUrl Abstract / FREE Full Text 10. ↵ Pye , C. R. et al. Retrospective analysis of natural products provides insights for future discovery trends . Proc. Natl Acad. Sci. USA 114 , 5601 – 5606 ( 2017 ). doi: 10.1073/pnas.1614680114 OpenUrl Abstract / FREE Full Text 11. Navarro-Muñoz , J. C. et al. A computational framework to explore large-scale biosynthetic diversity . Nat. Chem. Biol . 16 , 60 – 68 ( 2020 ). doi: 10.1038/s41589-019-0400-9 OpenUrl CrossRef PubMed 12. ↵ Sorokina , M. & Steinbeck , C. COCONUT online . J. Cheminform . 13 , 2 ( 2021 ). doi: 10.1186/s13321-020-00478-9 OpenUrl CrossRef PubMed 13. ↵ Liu , R. et al. Unlocking the metabolic potential of endophytic fungi through epigenetics: a paradigm shift for natural product discovery and plant–microbe interactions . Nat. Prod. Rep . 42 , 1690 ( 2025 ). doi: 10.1039/d5np00028a OpenUrl CrossRef PubMed 14. ↵ Enamine . REAL Space Navigator (accessed 14 Sept 2025 ). https://enamine.net/compound-collections/real-compounds/real-space-navigator 15. ↵ Ioannidis , J. P. A. Why most published research findings are false . PLoS Med . 2 , e124 ( 2005 ). doi: 10.1371/journal.pmed.0020124 OpenUrl CrossRef PubMed 16. ↵ Abramson , J. et al. Accurate structure prediction of biomolecular interactions with AlphaFold 3 . Nature 630 , 493 – 500 ( 2024 ). doi: 10.1038/s41586-024-07487-w OpenUrl CrossRef PubMed 17. ↵ Kim , S. et al. PubChem 2025 update . Nucleic Acids Res . 53 , D1516 – D1525 ( 2025 ). doi: 10.1093/nar/gkae1059 OpenUrl CrossRef PubMed 18. ↵ Gao , S. et al. Empowering biomedical discovery with AI agents . Cell 187 , 4881 – 4899 ( 2024 ). doi: 10.1016/j.cell.2024.08.045 OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted October 31, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Reversing Eroom’s Law: Finitude and Dilution in Small-Molecule Discovery Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Reversing Eroom’s Law: Finitude and Dilution in Small-Molecule Discovery Ross Youngs bioRxiv 2025.10.29.685434; doi: https://doi.org/10.1101/2025.10.29.685434 Share This Article: Copy Citation Tools Reversing Eroom’s Law: Finitude and Dilution in Small-Molecule Discovery Ross Youngs bioRxiv 2025.10.29.685434; doi: https://doi.org/10.1101/2025.10.29.685434 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Pharmacology and Toxicology Subject Areas All Articles Animal Behavior and Cognition (7635) Biochemistry (17691) Bioengineering (13892) Bioinformatics (41937) Biophysics (21452) Cancer Biology (18588) Cell Biology (25504) Clinical Trials (138) Developmental Biology (13378) Ecology (19899) Epidemiology (2067) Evolutionary Biology (24320) Genetics (15609) Genomics (22506) Immunology (17736) Microbiology (40394) Molecular Biology (17181) Neuroscience (88605) Paleontology (666) Pathology (2832) Pharmacology and Toxicology (4824) Physiology (7641) Plant Biology (15156) Scientific Communication and Education (2045) Synthetic Biology (4294) Systems Biology (9825) Zoology (2271)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.