Distinct 5′ and 3′ Coverage Biases Shape Transcriptome Interpretation in Nanopore Direct RNA versus PCR-cDNA Sequencing

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 45,040 characters · extracted from preprint-html · click to expand
Distinct 5′ and 3′ Coverage Biases Shape Transcriptome Interpretation in Nanopore Direct RNA versus PCR-cDNA Sequencing | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Distinct 5′ and 3′ Coverage Biases Shape Transcriptome Interpretation in Nanopore Direct RNA versus PCR-cDNA Sequencing Rebecca E Lane , Eleanor Calcutt , Anandagopal Srinivasan , Vicki Gamble , Udo Oppermann , View ORCID Profile Jianfeng Sun , View ORCID Profile Adam P Cribbs doi: https://doi.org/10.1101/2025.10.13.681987 Rebecca E Lane 1 Botnar Research Centre, Nuffield Department of Orthopaedics, Rheumatology and Musculoskeletal Sciences, National Institute of Health Research Oxford Biomedical, Research Unit (BRU), University of Oxford , Oxford, UK 2 Oxford Genetics Laboratories, Oxford University Hospitals NHS Foundation Trust , Oxford, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Eleanor Calcutt 1 Botnar Research Centre, Nuffield Department of Orthopaedics, Rheumatology and Musculoskeletal Sciences, National Institute of Health Research Oxford Biomedical, Research Unit (BRU), University of Oxford , Oxford, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Anandagopal Srinivasan 1 Botnar Research Centre, Nuffield Department of Orthopaedics, Rheumatology and Musculoskeletal Sciences, National Institute of Health Research Oxford Biomedical, Research Unit (BRU), University of Oxford , Oxford, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Vicki Gamble 1 Botnar Research Centre, Nuffield Department of Orthopaedics, Rheumatology and Musculoskeletal Sciences, National Institute of Health Research Oxford Biomedical, Research Unit (BRU), University of Oxford , Oxford, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Udo Oppermann 1 Botnar Research Centre, Nuffield Department of Orthopaedics, Rheumatology and Musculoskeletal Sciences, National Institute of Health Research Oxford Biomedical, Research Unit (BRU), University of Oxford , Oxford, UK 3 Oxford Centre for Translational Myeloma Research University of Oxford , Oxford, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jianfeng Sun 1 Botnar Research Centre, Nuffield Department of Orthopaedics, Rheumatology and Musculoskeletal Sciences, National Institute of Health Research Oxford Biomedical, Research Unit (BRU), University of Oxford , Oxford, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jianfeng Sun Adam P Cribbs 1 Botnar Research Centre, Nuffield Department of Orthopaedics, Rheumatology and Musculoskeletal Sciences, National Institute of Health Research Oxford Biomedical, Research Unit (BRU), University of Oxford , Oxford, UK 3 Oxford Centre for Translational Myeloma Research University of Oxford , Oxford, UK Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Adam P Cribbs For correspondence: adam.cribbs{at}ndorms.ox.ac.uk Abstract Full Text Info/History Metrics Preview PDF Abstract Long-read RNA sequencing enables isoform-resolved transcriptomics, but library preparation introduces systematic biases that shape biological interpretation. We benchmarked Oxford Nanopore’s two protocols—PCR-cDNA and direct RNA—using SKMM2 myeloma cells stimulated with interleukin-6 (IL-6) and ERCC synthetic spike-ins. Direct RNA produced longer, higher-quality reads and more high-confidence isoforms, but showed pronounced 5′ coverage loss. PCR-cDNA yielded shorter fragments with 3′ underrepresentation, detecting more low-abundance transcripts at reduced confidence. Protocol-specific biases had major consequences: differential expression analysis revealed limited overlap in IL-6–responsive genes, and pathway enrichment was broader in direct RNA. At the isoform level, differential transcript usage was almost entirely protocol-specific, with case studies (e.g. RPL22L1, GRB2, RNF220) illustrating concordance and divergence. ERCC controls confirmed these biases as technical rather than biological. Together, our results show that while both methods provide accurate gene-level quantification, transcript-level conclusions depend critically on protocol choice, highlighting the need for careful selection in long-read transcriptomics. Introduction Long-read sequencing technologies have transformed transcriptomic research by enabling direct measurement of full-length RNA molecules and their isoforms( 1 ). Among these, Oxford Nanopore Technologies (ONT) has developed protocols capable of sequencing either cDNA molecules generated from reverse-transcribed RNA( 2 , 3 ), or native RNA molecules themselves( 4 - 8 ). These two approaches—PCR-cDNA and direct RNA sequencing—differ markedly in library preparation, sequencing chemistry, throughput, and bias profiles. Understanding these differences is critical for researchers seeking accurate transcriptome characterisation. The ONT PCR-cDNA protocol benefits from relatively low input requirements, high throughput, and robust detection of low-abundance transcripts. However, the multiple enzymatic steps required—reverse transcription, strand-switching, ligation and PCR amplification—introduce artefacts( 9 ). These include preferential amplification of shorter molecules, underrepresentation of GC-rich regions, and incomplete reverse transcription leading to truncated reads. Such biases are well documented in short-read cDNA sequencing and remain a challenge for nanopore-based approaches ( 10 , 11 ). Direct RNA sequencing eliminates the need for PCR amplification, allowing direct sequencing of native RNA molecules. This provides several advantages: removal of PCR-induced artefacts, improved isoform resolution through long contiguous reads, and the unique ability to detect base modifications such as N6-methyladenosine( 12 , 13 ). However, these benefits come with trade-offs, including higher input requirements, lower throughput, multiplexing restrictions and reduced read accuracy compared to PCR-cDNA. Benchmarking studies have demonstrated that both protocols provide accurate gene-level quantification ( 14 ), but their biases diverge at the transcript and isoform levels ( 15 ). Consequently, biological inferences—particularly regarding alternative splicing, isoform switching, or differential transcript usage—may depend heavily on the chosen protocol. In this study, we perform a systematic comparison of Oxford Nanopore Technologies (ONT) PCR-cDNA and direct RNA sequencing protocols. We first benchmark both methods in the SKMM2 multiple myeloma cell line stimulated with interleukin-6 (IL-6), providing a controlled model to evaluate how protocol-specific biases impact gene expression, isoform detection, and pathway-level interpretation. This analysis reveals areas of concordance—such as gene-level quantification—and key points of divergence, particularly in transcript isoform usage and differential expression outcomes. To validate that these biases arise from technical rather than biological sources, we extended our evaluation using ERCC synthetic spike-in controls. These confirmed differences in sensitivity, coverage uniformity, and quantitative accuracy between the two protocols. Together, our findings establish a technical framework for understanding the strengths and limitations of ONT RNA sequencing strategies and for guiding protocol choice long-read in transcriptomic studies. Results Protocol-dependent read characteristics and transcript detection in SKMM2 myeloma cells To assess protocol-specific differences in nanopore transcriptomics, we first compared PCR-cDNA and direct RNA sequencing in SKMM2 cells with or without IL-6 stimulation ( Figure 1 ). Direct RNA sequencing consistently generated longer reads, with median read lengths of ∼800 bp compared to ∼500 bp for PCR-cDNA ( Fig. 1a ). Read length distributions revealed distinct profiles: PCR-cDNA was enriched for shorter fragments (∼500 bp peak), whereas direct RNA yielded a broader distribution with greater representation of longer molecules ( Fig. 1b ). Read quality scores were slightly higher for direct RNA (median 14–15) compared to PCR-cDNA (median 13– 14), with no marked influence of IL-6 treatment ( Fig 1c ). Download figure Open in new tab Figure 1. Read characteristics and transcriptome coverage differ between PCR-cDNA and direct RNA sequencing. (a) Median read length across biological replicates of SKMM2 myeloma cells cultured with or without IL-6 stimulation. Direct RNA (dRNA, red) yielded consistently longer reads than PCR-cDNA (blue). (b) Distribution of read lengths, showing enrichment of short fragments in PCR-cDNA and broader length representation in direct RNA. (c) Median read quality scores by protocol, with direct RNA achieving slightly higher values. (d) Transcript coverage profiles, highlighting 5′ coverage loss in direct RNA and 3′ underrepresentation in PCR-cDNA. (e) Venn diagrams of gene- and transcript-level detection, showing ∼16,500 genes shared, with PCR-cDNA enriched for unique low-abundance genes and direct RNA identifying more isoforms. (f) Normalised coverage along transcript bodies, demonstrating protocol-specific biases: PCR-cDNA underestimates coverage towards the 3′ end, while direct RNA shows loss of coverage at the 5′ end. Coverage profiling across transcripts highlighted characteristic biases of each protocol. PCR-cDNA showed progressive loss of coverage towards the 3′ end of coding sequence, consistent with incomplete reverse transcription and amplification inefficiency. In contrast, direct RNA exhibited reduced coverage at the 5′ end, reflecting the 3′–5′ sequencing orientation and RNA degradation ( Fig. 1d and Fig. 1e ). These technical biases translated into differences in transcript detection, despite normalising for read depth across both protocols. At the gene level, both methods recovered a comparable number of genes, with ∼16,500 shared and smaller sets uniquely identified by PCR-cDNA (8364) or direct RNA (4351) ( Fig 1f ). At the transcript level, however, differences were more pronounced: direct RNA identified over 63,000 isoforms, whereas PCR-cDNA identified ∼51,000, with ∼37,700 shared between protocols. This suggests that direct RNA sequencing may be better able to support isoform-level inference, although differences between the sequencing methods suggest biases within each sequencing approach. Transcript detection is strongly protocol-dependent We next compared the breadth and confidence of transcript detection between protocols. Direct RNA sequencing (dRNA) identified a greater number of transcripts than PCR-cDNA, with ∼63,425 versus ∼51,951 detected, and ∼37,771 shared between methods ( Fig. 2a–b ). This broad recovery by dRNA was particularly evident at higher confidence levels. At high confidence, dRNA uniquely detected 12,645 transcripts compared to only 753 for PCR-cDNA. At medium confidence, both protocols captured ∼10,743 shared isoforms, but dRNA contributed more unique transcripts (25,174 vs 14,663). In contrast, at low confidence PCR-cDNA recovered a much larger fraction of unique transcripts (16,728 vs 5,799), reflecting increased sensitivity to low-abundance fragments but at reduced assignment certainty. Download figure Open in new tab Figure 2. Transcript detection differs by protocol, confidence, and length. (a) Number of transcripts detected by PCR-cDNA (blue) and direct RNA (dRNA, red) across high-, medium-, and low-confidence categories, stratified by transcript length (3500 bp). PCR-cDNA is enriched for shorter, low-confidence transcripts, whereas direct RNA recovers a greater proportion of longer isoforms. (b) Venn diagrams of transcript detection across protocols. While 37,771 transcripts were shared, direct RNA uniquely identified >25,000 isoforms, particularly at high and medium confidence. PCR-cDNA contributed more unique transcripts at low confidence. (c) Transcript length distributions for high-, medium-, and low-confidence sets. PCR-cDNA libraries are biased towards short isoforms, while direct RNA captures a broader size distribution. (d) Intersection analysis of transcripts across confidence categories and protocols. Direct RNA shows enhanced recovery of high- and medium-confidence isoforms, while PCR-cDNA contributes disproportionately to low-confidence calls. Transcript length distributions further highlighted protocol-specific biases ( Fig. 2c ). PCR-cDNA libraries were strongly skewed toward short isoforms (<1000 bp), particularly at low confidence, consistent with amplification-driven overrepresentation of short molecules. Direct RNA sequencing, in contrast, recovered a more balanced spectrum of isoform sizes, including robust detection of long transcripts (>3500 bp). Intersection analysis confirmed these trends ( Fig. 2d ). dRNA preferentially contributed high- and medium-confidence isoforms, while PCR-cDNA disproportionately inflated the low-confidence category. Together, these results demonstrate complementary detection properties: PCR-cDNA maximises sensitivity at the expense of confidence and length balance, whereas direct RNA sequencing provides broader and more reliable identification of high-confidence, long isoforms. Divergent IL-6–induced transcriptional responses across protocols To assess how protocol choice influences biological interpretation, we compared IL-6–stimulated SKMM2 multiple myeloma cells using both PCR-cDNA and direct RNA sequencing. Although both approaches detected IL-6–responsive changes, the breadth and concordance of results differed markedly ( Fig. 3 ). Download figure Open in new tab Figure 3. Divergent IL-6–induced transcriptional responses detected by PCR-cDNA and direct RNA sequencing. (a) Differential expression analysis of SKMM2 cells stimulated with IL-6. Volcano plots show significantly upregulated (red) and downregulated (blue) genes for PCR-cDNA (left, 115 up, 38 down) and direct RNA (right, 97 up, 138 down). (b) Venn diagrams at the gene (left) and transcript (right) levels. Overlap between protocols is limited, with only 194 genes and 54 transcripts identified by both, while the majority of differentially expressed features are protocol-specific. (c) Correlation of fold-change estimates between protocols. While overall correlation was moderate (R = 0.62), PCR-cDNA compressed fold-change dynamic range relative to direct RNA. (d) Pathway-level analysis of differentially expressed genes. Heatmap of hallmark gene set enrichment reveals broader pathway activation with direct RNA, including IL-6/JAK–STAT signalling, MYC targets, and inflammatory programmes, whereas PCR-cDNA highlights fewer significantly perturbed pathways. At the differential expression level, PCR-cDNA sequencing identified 301genes, whereas direct RNA sequencing detected substantially more, with 899 genes ( Fig. 3a ). Overlap between the two approaches was modest: only 194 differentially expressed genes were shared, with the majority uniquely detected by one protocol. This divergence was even more pronounced at the transcript level, where only 54 isoforms were shared, and most differentially expressed transcripts were protocol-specific ( Fig. 3b ). Direct comparison of log□ fold-change values showed only moderate correlation between protocols (R = 0.62; Fig. 3c ), with PCR-cDNA compressing the dynamic range relative to direct RNA. This compression likely contributes to under-detection of isoform-specific responses. Downstream pathway analysis further highlighted protocol-specific biases ( Fig. 3d ). PCR-cDNA sequencing reported a restricted set of enriched pathways, whereas direct RNA sequencing revealed broader IL-6–responsive signatures, including robust activation of JAK/STAT3 signalling, MYC target genes, and inflammatory programmes, alongside repression of metabolic and epithelial pathways. Together, these results demonstrate that while both ONT protocols detect IL-6– induced transcriptional responses, the scope, sensitivity, and inferred biology differ substantially, with direct RNA sequencing providing broader and more nuanced coverage of isoform- and pathway-level responses. Protocol-specific detection of differential transcript usage To further dissect protocol-specific effects, we compared differential transcript usage (DTU) across selected genes ( Fig. 4 ). These examples illustrate both concordance and divergence in isoform-level interpretation between PCR-cDNA and direct RNA sequencing. Download figure Open in new tab Figure 4. Isoform-specific responses reveal protocol-dependent concordance and divergence. (a) RPL22L1: Two isoforms were detected across conditions. Both PCR-cDNA and direct RNA identified shifts in isoform usage upon IL-6 stimulation, with broadly concordant patterns. (b) GRB2: Three isoforms were detected. While both protocols captured IL-6– induced changes, the relative contribution of minor isoforms differed between PCR-cDNA and direct RNA. (c) RNF220: Five isoforms were identified. Direct RNA resolved multiple isoforms with distinct usage patterns, whereas PCR-cDNA predominantly supported two transcripts, underestimating isoform diversity. For RPL22L1, both protocols consistently identified a shift in isoform usage upon IL-6 stimulation, supporting concordant detection of DTU ( Fig 4a ). This indicates that for certain highly expressed genes with relatively simple isoform structures, both methods provide reproducible isoform-level inferences. By contrast, GRB2 showed a striking divergence: PCR-cDNA suggested a strong switch from ENST00000392563.5 to ENST00000316804.10, whereas direct RNA sequencing supported the continued dominance of ENST00000316804.10 with limited evidence for switching ( Fig. 4b ). This discrepancy may arise from PCR amplification bias against longer isoforms. A different pattern was observed for RNF220, where direct RNA sequencing uniquely identified IL-6–induced changes in isoform proportions, whereas PCR-cDNA showed little evidence of DTU ( Fig. 4c ). This reflects direct RNA’s greater capacity to capture long and complex isoforms with higher confidence. Taken together, these case studies demonstrate that while gene- and pathway-level signals are broadly robust, transcript-level inferences are strongly shaped by the choice of sequencing protocol. The minimal overlap in DTU calls underscores the need for caution when drawing isoform-specific biological conclusions from single-protocol datasets. Novel transcript discovery reveals further protocol-specific divergence We next examined the ability of PCR-cDNA and direct RNA sequencing to detect and quantify novel isoforms ( Fig. 5 ). Principal component analysis (PCA) showed clear separation by both protocol and treatment, indicating that technical biases and IL-6 stimulation both contribute to the observed variance ( Fig. 5a ). Differential expression analysis of novel transcripts identified by bambu revealed 665 candidates, with several highly significant isoforms upregulated in IL-6–treated cells ( Fig. 5b ). Heatmap clustering highlighted distinct expression patterns, with direct RNA capturing a broader repertoire of novel transcripts compared to PCR-cDNA ( Fig. 5c ). Intersection analysis confirmed this trend: direct RNA identified the largest set of unique novel isoforms, while overlap between protocols was limited ( Fig. 5d ). These results demonstrate that protocol choice not only influences detection of annotated isoforms but also strongly affects the discovery and quantification of novel transcripts. Download figure Open in new tab Figure 5. Principal component analysis, differential expression, and detection of novel transcripts across protocols. (a) Principal component analysis (PCA) of SKMM2 cells sequenced by PCR-cDNA or direct RNA (dRNA) protocols, with or without IL-6 stimulation. Clear separation by protocol and treatment indicates protocol-specific bias as well as biological response. (b) Differential expression analysis of novel transcripts detected by bambu. Volcano plot shows significant upregulated (red) and downregulated (green) transcripts after IL-6 treatment, with labelled examples of highly significant candidates. (c) Heatmap of the top differentially expressed novel transcripts, illustrating distinct expression patterns across protocols and treatment conditions. (d) Intersection analysis of novel transcript detection across conditions. Bar plots indicate the number of novel transcripts detected per protocol and treatment, while the Upset plot shows shared and unique subsets. Direct RNA sequencing identified a larger set of novel isoforms compared with PCR-cDNA, with limited overlap between protocols. Synthetic ERCC benchmarking validates protocol-specific biases To disentangle biological variability from protocol artefacts, we sequenced ERCC synthetic spike-in controls using both PCR-cDNA and direct RNA protocols ( Fig. 6 ). At the gene level, both protocols produced highly accurate quantification, with transcript abundances strongly correlated with expected concentrations (R = 0.97 for PCR-cDNA; R = 0.98 for direct RNA). Root mean square error (RMSE) and mean absolute error (MAE) values were lower for direct RNA (RMSE = 0.68, MAE = 0.47) compared with PCR-cDNA (RMSE = 0.86, MAE = 0.66), confirming the higher quantitative accuracy of direct RNA ( Fig. 6a ). Download figure Open in new tab Figure 6. ERCC spike-in controls validate protocol-specific biases. (a) Correlation of measured expression (log□ CPM) with known ERCC transcript concentrations for PCR-cDNA (left) and direct RNA (dRNA, right). Both protocols achieved strong correlations (R = 0.97 for PCR-cDNA, R = 0.98 for direct RNA), but direct RNA showed lower root mean square error (RMSE = 0.68) and mean absolute error (MAE = 0.47) compared with PCR-cDNA (RMSE = 0.86, MAE = 0.66), indicating higher quantitative accuracy. (b) Normalised coverage profiles of ERCC transcripts reveal protocol-specific biases consistent with cellular transcriptomes. PCR-cDNA libraries show underrepresentation towards the 3′ end, whereas direct RNA exhibits reduced coverage at the 5′ end. These effects are observed across all spike-ins and are more pronounced in longer transcripts (>1000 bp). Coverage profiles, however, recapitulated the characteristic biases of each protocol ( Fig. 6b ). PCR-cDNA showed progressive coverage loss at the 3′ ends of spike-ins, consistent with incomplete reverse transcription and PCR artefacts. Conversely, direct RNA sequencing displayed reduced coverage at the 5′ ends, reflecting sequencing orientation and RNA degradation. These biases were consistent with those observed in SKMM2 myeloma cells, validating them as intrinsic to the protocols rather than driven by biological features. Together, the ERCC benchmarking confirms that while both methods provide accurate abundance estimates at the gene level, protocol-specific biases in coverage and isoform detection systematically shape transcriptome interpretation. Discussion This study highlights both the complementary strengths and the limitations of ONT PCR-cDNA and direct RNA sequencing when applied to isoform-resolved transcriptomics. Using SKMM2 multiple myeloma cells as a controlled model, we demonstrate that while both protocols capture IL-6–induced transcriptional responses, they diverge substantially at the isoform and transcript level. Direct RNA sequencing consistently yielded longer reads, more balanced transcript coverage, and higher-confidence isoform assignments, whereas PCR-cDNA provided greater sensitivity for low-abundance transcripts but at the cost of amplification bias and reduced length representation. These protocol-specific biases were independently validated using ERCC synthetic spike-ins, which confirmed that PCR-cDNA libraries tend to enrich for shorter fragments and favour 5′ coverage, while direct RNA libraries underrepresent 5′ regions, consistent with RNA degradation. A further source of technical bias arises during the reverse transcription step of PCR-cDNA library preparation. Incomplete reverse transcription can lead to 3′ truncation of cDNA molecules, which in turn may partially or completely remove the unique molecular identifier (UMI) sequence. As we and others have shown previously ( 9 , 16 , 17 ), this truncation introduces a significant bioinformatic challenge, impeding the accurate identification and collapsing of reads derived from the same RNA molecule. Reads with truncated or unrecognisable UMIs are frequently discarded, resulting in the under-representation of valid long molecules in the final dataset. This not only reduces the accuracy of transcript quantification but also diminishes statistical power for downstream analyses. Such effects are a plausible mechanistic explanation for the weaker identification of differentially expressed genes by the PCR-cDNA method compared to direct RNA sequencing. A notable consequence of these biases is their impact on biological interpretation. At the gene and pathway levels, both protocols recovered canonical IL-6–responsive signatures such as JAK/STAT3 and MYC targets, though direct RNA consistently detected broader sets of differentially expressed genes and pathways. At the isoform level, however, overlap was minimal: differential transcript usage (DTU) calls were largely protocol-specific, and novel transcript discovery diverged between approaches. These findings emphasise that transcript-level biology inferred from long-read sequencing may reflect protocol-specific technical artefacts as much as genuine biology. Our findings align with and extend two recent benchmarking studies. Pardo-Palacious et al. ( 15 ) compared direct RNA and cDNA approaches and highlighted challenges in transcript isoform detection, quantification and de novo transcript detection, with direct RNA sequencing but also emphasised its lower sensitivity. Similarly, Chen et al. underscored protocol-specific biases in coverage, with direct RNA limited by throughput and input requirements( 14 ). Both studies, like ours, stress that while gene-level quantification is broadly concordant between protocols, isoform-level interpretation is highly protocol-dependent. Importantly, our work adds a direct biological case study—IL-6 signalling in myeloma—showing how these technical biases directly propagate into differential expression, pathway enrichment, and isoform usage analyses. Pragmatically, protocol choice should be guided by study aims. For isoform discovery and differential transcript usage, direct RNA sequencing offers higher confidence despite greater RNA input and low throughput. For experiments prioritising sensitivity to low-abundance transcripts or when input is limiting, PCR-cDNA remains valuable. Taken together with prior benchmarking efforts, our analysis provides a technical framework for selecting ONT sequencing protocols and for interpreting their results with appropriate caution, particularly when drawing biological conclusions from isoform-level data. Methods SKMM2 Cell Culture and RNA Extraction SKMM2 cells (DSMZ: ACC 430) were cultured in Gibco™ RPMI 1640 medium (11530586, Fisher Scientific, UK), supplemented with 10% FBS (SV30160.03, Cytiva Life Sciences, UK), and maintained at 37°C in a 5% CO□ atmosphere. Cells were seeded into 12-well plates in triplicate and treated with either 1 ng/mL interleukin-6 (IL-6) in 0.2% BSA or with 0.2% BSA alone for 18 hours. Following treatment, cells were harvested and cell pellets resuspended in 350 µL Trizol reagent. RNA extraction was performed using the Zymo Direct-zol RNA Miniprep kit (R2051, Zymo Research, USA) according to the manufacturer’s protocol, with RNA eluted in 25 µL DNase/RNase-free water. RNA concentration was determined using a NanoPhotometer NP80 UV/Vis Spectrophotometer (Implen, Germany), while RNA integrity and size profile were assessed using the Agilent 4150 TapeStation System (Agilent Technologies, UK) with an RNA ScreenTape (Agilent Technologies, UK). All RNA samples had a RNA Integrity Number (RIN) between 9.7 and 9.8. PCR-cDNA Library Preparation and Sequencing Library preparation was performed using the PCR-cDNA Barcoding Kit (SQK-PCB111.24, Oxford Nanopore Technologies, UK) following the manufacturer’s instructions. A total of 200 ng of RNA (IL-6 treated, n=3; untreated, n=3) or 16 ng of ERCC RNA Spike-In Mix 1 (Thermo Fisher, UK) was used for cDNA synthesis. For SKMM2 samples, 14 cycles of PCR amplification were conducted, while 16 cycles were used for the ERCC mix. Library concentrations were quantified using a Qubit 2.0 Fluorometer (Thermo Fisher, UK) with the Qubit dsDNA HS Assay kit (Q32851, Thermo Fisher, UK). Library quality was assessed on a 4150 TapeStation System (Agilent Technologies, UK) using a high sensitivity D5000 ScreenTape. For sequencing, 2 fmol of each barcoded library were pooled, and 12 libraries (24 fmol) were loaded onto a PromethION R9.4.1 flow cell (FLO-PRO002, Oxford Nanopore Technologies, UK). Sequencing was performed on a PromethION 24 instrument (Oxford Nanopore Technologies, UK) using MinKNOW software (v24.06.15), with live base-calling using the Dorado high accuracy model (v7.4.14). The sequencing run lasted for 72 hours, with a minimum Q score of 9. For the ERCC mix, 5 fmol of the prepared library was loaded onto an R10.4.2 Flongle flow cell (FLO-FLG114, Oxford Nanopore Technologies, UK), and sequencing was conducted on a MinION Mk1B instrument with the Flongle adapter (Oxford Nanopore Technologies, UK). Live base-calling was performed using the Dorado high accuracy model (v7.3.11) with a 24-hour run time, a minimum read length of 20 bp, and a minimum Q score of 9. Direct RNA Library Preparation and Sequencing Library preparation was performed using the Direct RNA Sequencing Kit (SQK-RNA004, Oxford Nanopore Technologies, UK) following the manufacturer’s instructions. A total of 1 µg of RNA (IL-6 treated, n=3; untreated, n=3) or ∼150 ng of ERCC RNA Spike-In Mix 1 (n=1) was used for direct RNA sequencing. The concentration of the prepared libraries was measured using a Qubit 2.0 Fluorometer (Thermo Fisher, UK) and the Qubit dsDNA HS Assay kit (Q32851, Thermo Fisher, UK). All libraries exceeded the recommended 30 ng yield. Library size profiles were assessed using the Agilent 4150 TapeStation System (Agilent Technologies, UK) with a high sensitivity D5000 ScreenTape. Libraries were loaded in their entirety onto separate PromethION RNA Flow Cells (FLO-PRO004RA, Oxford Nanopore Technologies, UK). Sequencing was performed on a PromethION 24 instrument (Oxford Nanopore Technologies, UK) with MinKNOW software (v24.06.15). Live base-calling was performed using the Dorado RNA high accuracy model (v7.4.14). Sequencing lasted for 72 hours, with a minimum read length of 200 bp and a minimum Q score of 9. Data processing and analysis Reads passing quality filters were demultiplexed by MinKNOW software, where applicable, and concatenated into a single FASTQ file for each sample. For PCR-cDNA data, adapter trimming and read orientation were performed using pychopper (v2.7.10, Oxford Nanopore Technologies, UK). Reads shorter than 50 bp were discarded. Trimmed reads were aligned to the hg38 human genome assembly (GRCh38.p14, GENCODE v46) using minimap2 (v2.28) with spliced alignment mode, a k-mer size of 14, a minimum primary-to-secondary score ratio of 0.9, and canonical splice-site searching on the forward strand only. Direct RNA sequencing reads were aligned similarly, without adapter trimming. For ERCC reference transcripts, transcript quantification was performed using featureCounts within the Subread package (v2.0.6). Gene and transcript count matrices for SKMM2 cell line samples were generated using bambu (v3.6.0). Transcript coverage depth was calculated using the gene body coverage module of RSeQC (v5.0.4). All subsequent analyses and data visualisation were performed in R (version 4.4.1). Differential gene expression was analysed using DESeq2 (v1.44.0) with a false discovery rate (FDR) of 5%. Count normalization was performed using variance stabilizing transformation (VST), and VST-normalized data were used for visualisation and exploratory analyses. Gene Set Variation Analysis (GSVA) was performed using the GSVA package (v2.0.0) with hallmark gene sets from the Molecular Signatures Database (MSigDB). Differential expression of gene sets was assessed using linear modelling with empirical Bayes moderation (FDR = 5%). Prior to testing for differential transcript usage (DTU), genes and transcripts were filtered using DRIMSeq (v1.52.0). Genes were required to be expressed in all samples at >5 counts, and transcripts had to be expressed in >3 samples, with >5 counts and >10% transcript usage for the gene. DTU analysis was conducted using DEXSeq (v1.52.0), with FDR set to 5% for both gene-wise and transcript-wise analyses. Stage-wise adjustment for DTU was performed using StageR (v1.28.0) with FDR set to 5%, as described by van den Berge et al. Graphics were produced using ggplot2 (v3.5.1), ggpubr (v0.6.0), ggvenn (v0.1.10), ggtranscript (v1.0.0), ggalluvial (v0.12.5), and pheatmap (v1.0.12). Data availability Sequencing data have been deposited to the Gene Expression Omnibus under the accession number GSE306225. All analysis was performed using hg38 ensembl 101 version. Code availability All custom code used within this analysis are available on GitHub ( https://github.com/cribbslab/ONT_comparison ) Funding Research support was obtained from the National Institute for Health Research Oxford Biomedical Research Unit (U.O), Cancer Research UK (CRUK, U.O and A.P.C), the Bone Cancer Research Trust (BCRT) (A.P.C and U.O), the Chan Zuckerberg Initiative (A.P.C) and the Myeloma Single Cell Consortium (U.O). A.P.C. is a recipient of a Medical Research Council (MRC) career development fellowship (MR/V010182/1). Author contributions R.E.L performed analysis, data curation and drafting of the manuscript. E.C performed sequencing, quality control and manuscript editing. A.S. performed analysis and manuscript editing. V.G supported experimental work including cell culture and RNA preparation. U.O. and J.S. contributed study supervision and manuscript review. A.P.C conceived and supervised the project, acquired funding, administered the study, and led manuscript revision as corresponding author. All authors approved the final manuscript Conflict of interests A.P.C and U.O are co-founders of Caeruleus Genomics Ltd and are inventors on several patents related to sequencing technologies filed by Oxford University Innovations. Funder Information Declared Medical Research Council, https://ror.org/03x94j517 , MR/V010182/1 References 1. ↵ Marx , V. ( 2023 ) Method of the year: long-read sequencing Nat Methods 20 , 6 – 11 doi: 10.1038/s41592-022-01730-w OpenUrl CrossRef PubMed 2. ↵ Bayega , A. , Oikonomopoulos , S. , Wang , Y. C. , andRagoussis , J. ( 2022 ) Improved Nanopore full-length cDNA sequencing by PCR-suppression Front Genet 13 , 1031355 doi: 10.3389/fgene.2022.1031355 OpenUrl CrossRef PubMed 3. ↵ Stark , R. , Grzelak , M. , and Hadfield , J. ( 2019 ) RNA sequencing: the teenage years Nat Rev Genet 20 , 631 – 656 doi: 10.1038/s41576-019-0150-2 OpenUrl CrossRef PubMed 4. ↵ Kim , Y. , Saville , L. , O’Neill , K. , Garant , J. M. , Liu , Y. , Haile-Merhu , S. et al. ( 2025 ) Nanopore direct RNA sequencing of human transcriptomes reveals the complexity of mRNA modifications and crosstalk between regulatory features Cell Genom 5 , 100872 doi: 10.1016/j.xgen.2025.100872 OpenUrl CrossRef 5. Wang , Y. , Zhao , Y. , Bollas , A. , Wang , Y. , andAu , K. F. ( 2021 ) Nanopore sequencing technology, bioinformatics and applications Nat Biotechnol 39 , 1348 – 1365 doi: 10.1038/s41587-021-01108-x OpenUrl CrossRef PubMed 6. ↵ Leger , A. , Amaral , P. P. , Pandolfini , L. , Capitanchik , C. , Capraro , F. , Miano , V. et al. ( 2021 ) RNA modifications detection by comparative Nanopore direct RNA sequencing Nat Commun 12 , 7198 doi: 10.1038/s41467-021-27393-3 OpenUrl CrossRef PubMed 7. Garalde , D. R. , Snell , E. A. , Jachimowicz , D. , Sipos , B. , Lloyd , J. H. , Bruce , M. et al. ( 2018 ) Highly parallel direct RNA sequencing on an array of nanopores Nat Methods 15 , 201 – 206 doi: 10.1038/nmeth.4577 OpenUrl CrossRef PubMed 8. ↵ Workman , R. E. , Tang , A. D. , Tang , P. S. , Jain , M. , Tyson , J. R. , Razaghi , R. et al. ( 2019 ) Nanopore native RNA sequencing of a human poly(A) transcriptome Nat Methods 16 , 1297 – 1305 doi: 10.1038/s41592-019-0617-2 OpenUrl CrossRef PubMed 9. ↵ Sun , J. , Philpott , M. , Loi , D. , Li , S. , Monteagudo-Mesas , P. , Hoffman , G. et al. ( 2024 ) Correcting PCR amplification errors in unique molecular identifiers to generate accurate numbers of sequencing molecules Nat Methods 21 , 401 – 405 doi: 10.1038/s41592-024-02168-y OpenUrl CrossRef PubMed 10. ↵ Aird , D. , Ross , M. G. , Chen , W. S. , Danielsson , M. , Fennell , T. , Russ , C. et al. ( 2011 ) Analyzing and minimizing PCR amplification bias in Illumina sequencing libraries Genome Biol 12 , R18 doi: 10.1186/gb-2011-12-2-r18 OpenUrl CrossRef PubMed 11. ↵ Chen , H. , Zhang , Y. , Wang , B. , Liao , R. , Duan , X. , Yang , C. et al. ( 2024 ) Characterization and mitigation of artifacts derived from NGS library preparation due to structure-specific sequences in the human genome BMC Genomics 25 , 227 doi: 10.1186/s12864-024-10157-w OpenUrl CrossRef 12. ↵ Arzumanian , V. A. , Kurbatov , I. Y. , Ptitsyn , K. G. , Khmeleva , S. A. , Kurbatov , L. K. , Radko , S. P. et al. ( 2023 ) Identifying N6-Methyladenosine Sites in HepG2 Cell Lines Using Oxford Nanopore Technology Int J Mol Sci 24 , doi: 10.3390/ijms242216477 OpenUrl CrossRef PubMed 13. ↵ Zhong , Z. D. , Xie , Y. Y. , Chen , H. X. , Lan , Y. L. , Liu , X. H. , Ji , J. Y. et al. ( 2023 ) Systematic comparison of tools used for m( 6 )A mapping from nanopore direct RNA sequencing Nat Commun 14 , 1906 doi: 10.1038/s41467-023-37596-5 OpenUrl CrossRef PubMed 14. ↵ Chen , Y. , Davidson , N. M. , Wan , Y. K. , Yao , F. , Su , Y. , Gamaarachchi , H. et al. ( 2025 ) A systematic benchmark of Nanopore long-read RNA sequencing for transcript-level analysis in human cell lines Nat Methods 22 , 801 – 812 doi: 10.1038/s41592-025-02623-4 OpenUrl CrossRef PubMed 15. ↵ Pardo-Palacios , F. J. , Wang , D. , Reese , F. , Diekhans , M. , Carbonell-Sala , S. , Williams , B. et al. ( 2024 ) Systematic assessment of long-read RNA-seq methods for transcript identification and quantification Nat Methods 21 , 1349 – 1363 doi: 10.1038/s41592-024-02298-3 OpenUrl CrossRef PubMed 16. ↵ Huppertz , I. , Haberman , N. , andUle , J. ( 2018 ) ‘Read-through marking’ reveals differential nucleotide composition of read-through and truncated cDNAs in iCLIP Wellcome Open Res 3 , 77 doi: 10.12688/wellcomeopenres.14663.1 OpenUrl CrossRef PubMed 17. ↵ Sugimoto , Y. , Konig , J. , Hussain , S. , Zupan , B. , Curk , T. , Frye , M. et al. ( 2012 ) Analysis of CLIP and iCLIP methods for nucleotide-resolution studies of protein-RNA interactions Genome Biol 13 , R67 doi: 10.1186/gb-2012-13-8-r67 OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted October 14, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Distinct 5′ and 3′ Coverage Biases Shape Transcriptome Interpretation in Nanopore Direct RNA versus PCR-cDNA Sequencing Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Distinct 5′ and 3′ Coverage Biases Shape Transcriptome Interpretation in Nanopore Direct RNA versus PCR-cDNA Sequencing Rebecca E Lane , Eleanor Calcutt , Anandagopal Srinivasan , Vicki Gamble , Udo Oppermann , Jianfeng Sun , Adam P Cribbs bioRxiv 2025.10.13.681987; doi: https://doi.org/10.1101/2025.10.13.681987 Share This Article: Copy Citation Tools Distinct 5′ and 3′ Coverage Biases Shape Transcriptome Interpretation in Nanopore Direct RNA versus PCR-cDNA Sequencing Rebecca E Lane , Eleanor Calcutt , Anandagopal Srinivasan , Vicki Gamble , Udo Oppermann , Jianfeng Sun , Adam P Cribbs bioRxiv 2025.10.13.681987; doi: https://doi.org/10.1101/2025.10.13.681987 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Cell Biology Subject Areas All Articles Animal Behavior and Cognition (7633) Biochemistry (17680) Bioengineering (13889) Bioinformatics (41927) Biophysics (21445) Cancer Biology (18585) Cell Biology (25491) Clinical Trials (138) Developmental Biology (13373) Ecology (19897) Epidemiology (2067) Evolutionary Biology (24308) Genetics (15606) Genomics (22494) Immunology (17736) Microbiology (40385) Molecular Biology (17175) Neuroscience (88583) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4822) Physiology (7641) Plant Biology (15149) Scientific Communication and Education (2045) Synthetic Biology (4293) Systems Biology (9822) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00